126 lines
3.9 KiB
Plaintext
126 lines
3.9 KiB
Plaintext
|
#!/usr/bin/env python3
|
||
|
# -*- coding: utf-8 -*-
|
||
|
"""Generates a DAG from JSON spec in [config] and outputs nodes to
|
||
|
[filename]_nodes, and edges to [filename]_edges in format convertible
|
||
|
to Memgraph snapshot.
|
||
|
|
||
|
Here's an example JSON spec:
|
||
|
|
||
|
{
|
||
|
"layers": [
|
||
|
{
|
||
|
"name": "A",
|
||
|
"sublayers": 1,
|
||
|
"degree_lo": 1,
|
||
|
"degree_hi": 3,
|
||
|
"nodes": 4
|
||
|
},
|
||
|
{
|
||
|
"name": "B",
|
||
|
"sublayers": 3,
|
||
|
"degree_lo": 2,
|
||
|
"degree_hi": 3,
|
||
|
"nodes": 10
|
||
|
},
|
||
|
{
|
||
|
"name": "C",
|
||
|
"sublayers": 1,
|
||
|
"degree_lo": 1,
|
||
|
"degree_hi": 1,
|
||
|
"nodes": 5
|
||
|
}
|
||
|
]
|
||
|
}
|
||
|
|
||
|
Nodes from each layer will be randomly divided into sublayers. A node can
|
||
|
only have edges pointing to nodes in lower sublayers of the same layer, or
|
||
|
to nodes from the layer directly below it. Out-degree is chosen uniformly
|
||
|
random from [degree_lo, degree_hi] interval."""
|
||
|
|
||
|
import argparse
|
||
|
from itertools import accumulate
|
||
|
import json
|
||
|
import random
|
||
|
|
||
|
|
||
|
def _split_into_sum(n, k):
|
||
|
assert 1 <= n, "n should be at least 1"
|
||
|
assert k <= n, "k shouldn't be greater than n"
|
||
|
xs = [0] + sorted(random.sample(range(1, n), k-1)) + [n]
|
||
|
return [b - a for a, b in zip(xs, xs[1:])]
|
||
|
|
||
|
|
||
|
def generate_dag(graph_config, seed=None):
|
||
|
random.seed(seed)
|
||
|
|
||
|
nodes = []
|
||
|
edges = []
|
||
|
|
||
|
layer_lo = 1
|
||
|
for layer in graph_config:
|
||
|
sublayers = _split_into_sum(layer['nodes'], layer['sublayers'])
|
||
|
sub_range = accumulate([layer_lo] + sublayers)
|
||
|
layer['sublayer_range'] = list(sub_range)
|
||
|
nodes.extend([
|
||
|
(u, layer['name'])
|
||
|
for u in range(layer_lo, layer_lo + layer['nodes'])
|
||
|
])
|
||
|
layer_lo += layer['nodes']
|
||
|
|
||
|
edges = []
|
||
|
|
||
|
for layer, next_layer in zip(graph_config, graph_config[1:]):
|
||
|
degree_lo = layer['degree_lo']
|
||
|
degree_hi = layer['degree_hi']
|
||
|
|
||
|
sub_range = layer['sublayer_range']
|
||
|
sub_range_next = next_layer['sublayer_range']
|
||
|
|
||
|
layer_lo = sub_range[0]
|
||
|
next_layer_hi = sub_range_next[-1]
|
||
|
|
||
|
for sub_lo, sub_hi in zip(sub_range, sub_range[1:]):
|
||
|
for u in range(sub_lo, sub_hi):
|
||
|
num_edges = random.randint(degree_lo, degree_hi)
|
||
|
for _ in range(num_edges):
|
||
|
v = random.randint(sub_hi, next_layer_hi - 1)
|
||
|
edges.append((u, v))
|
||
|
|
||
|
for sub_lo, sub_hi in zip(sub_range_next, sub_range_next[1:]):
|
||
|
for u in range(sub_lo, sub_hi):
|
||
|
v = random.randint(layer_lo, sub_lo - 1)
|
||
|
edges.append((v, u))
|
||
|
|
||
|
return nodes, edges
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
parser = argparse.ArgumentParser(
|
||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
|
description=__doc__)
|
||
|
parser.add_argument('config', type=str, help='graph config JSON file')
|
||
|
parser.add_argument('filename', type=str,
|
||
|
help='nodes will be stored to filename_nodes, '
|
||
|
'edges to filename_edges')
|
||
|
parser.add_argument('--seed', type=int,
|
||
|
help='seed for the random generator (default = '
|
||
|
'current system time)')
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
with open(args.config, 'r') as f:
|
||
|
graph_config = json.loads(f.read())['layers']
|
||
|
|
||
|
nodes, edges = generate_dag(graph_config, seed=args.seed)
|
||
|
|
||
|
# print nodes into CSV file
|
||
|
with open('{}_nodes'.format(args.filename), 'w') as out:
|
||
|
out.write('nodeId:ID(Node),name,topological_index:Int,:LABEL\n')
|
||
|
for node_id, layer in nodes:
|
||
|
out.write('{0},{1}{0},{0},{1}\n'.format(node_id, layer))
|
||
|
|
||
|
# print edges into CSV file
|
||
|
with open('{}_edges'.format(args.filename), 'w') as out:
|
||
|
out.write(':START_ID(Node),:END_ID(Node),:TYPE\n')
|
||
|
for u, v in edges:
|
||
|
out.write('{},{},child\n'.format(u, v))
|