8a3f3b6c88
Reviewers: msantl, mferencevic Reviewed By: msantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1586
223 lines
8.0 KiB
Python
Executable File
223 lines
8.0 KiB
Python
Executable File
#!/usr/bin/python3
|
|
|
|
'''
|
|
Test dynamic graph partitioner on Memgraph cluster
|
|
with randomly generated graph (uniform distribution
|
|
of nodes and edges). The partitioning goal is to
|
|
minimize number of crossing edges while keeping
|
|
the cluster balanced.
|
|
'''
|
|
|
|
import argparse
|
|
import atexit
|
|
import logging
|
|
import os
|
|
import random
|
|
import sys
|
|
import subprocess
|
|
import time
|
|
|
|
try:
|
|
# graphviz==0.9
|
|
from graphviz import Digraph
|
|
except ImportError:
|
|
print("graphviz module isn't available. "
|
|
"Graph won't be generated but the checks will still work.")
|
|
from neo4j.v1 import GraphDatabase, basic_auth
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
PROJECT_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", ".."))
|
|
COMMON_ARGS = ["--durability-enabled=false",
|
|
"--snapshot-on-exit=false",
|
|
"--db-recover-on-startup=false"]
|
|
MASTER_ARGS = ["--master",
|
|
"--master-port", "10000",
|
|
"--dynamic-graph-partitioner-enabled",
|
|
"--durability-directory=durability_master"]
|
|
|
|
log = logging.getLogger(__name__)
|
|
memgraph_processes = []
|
|
|
|
|
|
def worker_args(worker_id):
|
|
args = ["--worker",
|
|
"--worker-id", str(worker_id),
|
|
"--worker-port", str(10000 + worker_id),
|
|
"--master-port", str(10000),
|
|
"--durability-directory=durability_worker%s" % worker_id]
|
|
return args
|
|
|
|
|
|
def wait_for_server(port, delay=0.01):
|
|
cmd = ["nc", "-z", "-w", "1", "127.0.0.1", port]
|
|
while subprocess.call(cmd) != 0:
|
|
time.sleep(delay)
|
|
time.sleep(delay)
|
|
|
|
|
|
def run_memgraph_process(binary, args):
|
|
global memgraph_processes
|
|
process = subprocess.Popen([binary] + args, cwd=os.path.dirname(binary))
|
|
memgraph_processes.append(process)
|
|
|
|
|
|
def run_distributed_memgraph(args):
|
|
run_memgraph_process(args.memgraph, COMMON_ARGS + MASTER_ARGS)
|
|
wait_for_server("10000")
|
|
for i in range(1, int(args.machine_count)):
|
|
run_memgraph_process(args.memgraph, COMMON_ARGS + worker_args(i))
|
|
wait_for_server("7687")
|
|
|
|
|
|
def terminate():
|
|
global memgraph_processes
|
|
for process in memgraph_processes:
|
|
process.terminate()
|
|
status = process.wait()
|
|
if status != 0:
|
|
raise Exception(
|
|
"Memgraph binary returned non-zero ({})!".format(status))
|
|
|
|
|
|
@atexit.register
|
|
def cleanup():
|
|
global memgraph_processes
|
|
for proc in memgraph_processes:
|
|
if proc.poll() is not None:
|
|
continue
|
|
proc.kill()
|
|
proc.wait()
|
|
|
|
|
|
def run_test(args):
|
|
driver = GraphDatabase.driver(args.endpoint,
|
|
auth=basic_auth(args.username,
|
|
args.password),
|
|
encrypted=args.encrypted)
|
|
session = driver.session()
|
|
|
|
session.run("CREATE INDEX ON :Node(id)").consume()
|
|
session.run(
|
|
"UNWIND range(0, $num - 1) AS n CREATE (:Node {id: n})",
|
|
num=args.node_count - 1).consume()
|
|
|
|
created_edges = 0
|
|
while created_edges <= args.edge_count:
|
|
# Retry block is here because DGP is running in background and
|
|
# serialization errors may occure.
|
|
try:
|
|
session.run("MATCH (n:Node {id: $id1}), (m:Node {id:$id2}) "
|
|
"CREATE (n)-[:Edge]->(m)",
|
|
id1=random.randint(0, args.node_count - 1),
|
|
id2=random.randint(0, args.node_count - 1)).consume()
|
|
created_edges += 1
|
|
except Exception:
|
|
pass
|
|
|
|
# Check cluster state periodically.
|
|
crossing_edges_history = []
|
|
load_history = []
|
|
duration = 0
|
|
for iteration in range(args.iteration_count):
|
|
iteration_start_time = time.time()
|
|
data = session.run(
|
|
"MATCH (n)-[r]->(m) "
|
|
"RETURN "
|
|
" id(n) AS v1, id(m) AS v2, "
|
|
" workerid(n) AS position_n, workerid(m) AS position_m").data()
|
|
|
|
# Visualize cluster state.
|
|
if args.visualize and 'graphviz' in sys.modules:
|
|
output_dir = os.path.join(SCRIPT_DIR, "output")
|
|
if not os.path.exists(output_dir):
|
|
os.makedirs(output_dir)
|
|
cluster = Digraph(name="memgraph_cluster", format="png")
|
|
cluster.attr(splines="false", rank="TB")
|
|
subgraphs = [Digraph(name="cluster_worker%s" % i)
|
|
for i in range(args.machine_count)]
|
|
for index, subgraph in enumerate(subgraphs):
|
|
subgraph.attr(label="worker%s" % index)
|
|
edges = []
|
|
for row in data:
|
|
# start_id end_id machine_id
|
|
edges.append((row["v1"], row["v2"], row["position_n"]))
|
|
edges = sorted(edges, key=lambda x: x[0])
|
|
for edge in edges:
|
|
# machine_id start_id end_id
|
|
subgraphs[edge[2]].edge(str(edge[0]), str(edge[1]))
|
|
for subgraph in subgraphs:
|
|
cluster.subgraph(subgraph)
|
|
cluster.render("output/iteration_%s" % iteration)
|
|
|
|
# Collect data.
|
|
num_of_crossing_edges = 0
|
|
load = [0] * args.machine_count
|
|
for edge in data:
|
|
src_worker = int(edge["position_n"])
|
|
dst_worker = int(edge["position_m"])
|
|
if src_worker != dst_worker:
|
|
num_of_crossing_edges = num_of_crossing_edges + 1
|
|
load[src_worker] = load[src_worker] + 1
|
|
crossing_edges_history.append(num_of_crossing_edges)
|
|
load_history.append(load)
|
|
iteration_delta_time = time.time() - iteration_start_time
|
|
duration += iteration_delta_time
|
|
log.info("Number of crossing edges %s" % num_of_crossing_edges)
|
|
log.info("Cluster load %s" % load)
|
|
|
|
# Wait for DGP a bit.
|
|
if iteration_delta_time < args.iteration_interval:
|
|
time.sleep(args.iteration_interval - iteration_delta_time)
|
|
duration += args.iteration_interval
|
|
else:
|
|
duration += iteration_delta_time
|
|
# TODO (buda): Somehow align with DGP turns. Replace runtime param with
|
|
# the query.
|
|
|
|
assert crossing_edges_history[-1] < crossing_edges_history[0], \
|
|
"Number of crossing edges is equal or bigger."
|
|
for machine in range(args.machine_count):
|
|
assert load_history[-1][machine] > 0, "Machine %s is empty." % machine
|
|
log.info("Config")
|
|
log.info(" Machines: %s" % args.machine_count)
|
|
log.info(" Nodes: %s" % args.node_count)
|
|
log.info(" Edges: %s" % args.edge_count)
|
|
log.info("Start")
|
|
log.info(" Crossing Edges: %s" % crossing_edges_history[0])
|
|
log.info(" Cluster Load: %s" % load_history[0])
|
|
log.info("End")
|
|
log.info(" Crossing Edges: %s" % crossing_edges_history[-1])
|
|
log.info(" Cluster Load: %s" % load_history[-1])
|
|
log.info("Stats")
|
|
log.info(" Duration: %ss" % duration)
|
|
|
|
session.close()
|
|
driver.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
memgraph_binary = os.path.join(PROJECT_DIR, "build", "memgraph")
|
|
if not os.path.exists(memgraph_binary):
|
|
memgraph_binary = os.path.join(PROJECT_DIR, "build_debug", "memgraph")
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--memgraph", default=memgraph_binary)
|
|
parser.add_argument("--endpoint", type=str,
|
|
default="bolt://localhost:7687")
|
|
parser.add_argument("--username", type=str, default="")
|
|
parser.add_argument("--password", type=str, default="")
|
|
parser.add_argument("--encrypted", type=bool, default=False)
|
|
parser.add_argument("--visualize", type=bool, default=False)
|
|
parser.add_argument("--machine-count", type=int, default=3)
|
|
parser.add_argument("--node-count", type=int, default=1000000)
|
|
parser.add_argument("--edge-count", type=int, default=1000000)
|
|
parser.add_argument("--iteration-count", type=int, default=10)
|
|
parser.add_argument("--iteration-interval", type=float, default=5)
|
|
args = parser.parse_args()
|
|
|
|
run_distributed_memgraph(args)
|
|
run_test(args)
|
|
terminate()
|