#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Generates a DAG from JSON spec in [config] and outputs nodes to [filename]_nodes, and edges to [filename]_edges in format convertible to Memgraph snapshot. Here's an example JSON spec: { "layers": [ { "name": "A", "sublayers": 1, "degree_lo": 1, "degree_hi": 3, "nodes": 4 }, { "name": "B", "sublayers": 3, "degree_lo": 2, "degree_hi": 3, "nodes": 10 }, { "name": "C", "sublayers": 1, "degree_lo": 1, "degree_hi": 1, "nodes": 5 } ] } Nodes from each layer will be randomly divided into sublayers. A node can only have edges pointing to nodes in lower sublayers of the same layer, or to nodes from the layer directly below it. Out-degree is chosen uniformly random from [degree_lo, degree_hi] interval.""" import argparse from itertools import accumulate import json import random def _split_into_sum(n, k): assert 1 <= n, "n should be at least 1" assert k <= n, "k shouldn't be greater than n" xs = [0] + sorted(random.sample(range(1, n), k-1)) + [n] return [b - a for a, b in zip(xs, xs[1:])] def generate_dag(graph_config, seed=None): random.seed(seed) nodes = [] edges = [] layer_lo = 1 for layer in graph_config: sublayers = _split_into_sum(layer['nodes'], layer['sublayers']) sub_range = accumulate([layer_lo] + sublayers) layer['sublayer_range'] = list(sub_range) nodes.extend([ (u, layer['name']) for u in range(layer_lo, layer_lo + layer['nodes']) ]) layer_lo += layer['nodes'] edges = [] for layer, next_layer in zip(graph_config, graph_config[1:]): degree_lo = layer['degree_lo'] degree_hi = layer['degree_hi'] sub_range = layer['sublayer_range'] sub_range_next = next_layer['sublayer_range'] layer_lo = sub_range[0] next_layer_hi = sub_range_next[-1] for sub_lo, sub_hi in zip(sub_range, sub_range[1:]): for u in range(sub_lo, sub_hi): num_edges = random.randint(degree_lo, degree_hi) for _ in range(num_edges): v = random.randint(sub_hi, next_layer_hi - 1) edges.append((u, v)) for sub_lo, sub_hi in zip(sub_range_next, sub_range_next[1:]): for u in range(sub_lo, sub_hi): v = random.randint(layer_lo, sub_lo - 1) edges.append((v, u)) return nodes, edges if __name__ == '__main__': parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__) parser.add_argument('config', type=str, help='graph config JSON file') parser.add_argument('filename', type=str, help='nodes will be stored to filename_nodes, ' 'edges to filename_edges') parser.add_argument('--seed', type=int, help='seed for the random generator (default = ' 'current system time)') args = parser.parse_args() with open(args.config, 'r') as f: graph_config = json.loads(f.read())['layers'] nodes, edges = generate_dag(graph_config, seed=args.seed) # print nodes into CSV file with open('{}_nodes'.format(args.filename), 'w') as out: out.write('nodeId:ID(Node),name,topological_index:Int,:LABEL\n') for node_id, layer in nodes: out.write('{0},{1}{0},{0},{1}\n'.format(node_id, layer)) # print edges into CSV file with open('{}_edges'.format(args.filename), 'w') as out: out.write(':START_ID(Node),:END_ID(Node),:TYPE\n') for u, v in edges: out.write('{},{},child\n'.format(u, v))