memgraph/tests/integration/dgp/run.py
Marko Budiselic 2f4ca7d3f7 Improve error handling related to the dynamic graph partitioning
Summary:
Fixing bugs in error handling in the following cases (DGP ON):
  * SIGKILL master
  * SIGKILL worker
  * SIGTERM master

Reviewers: mferencevic, msantl, vkasljevic

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1896
2019-04-11 16:10:06 +02:00

225 lines
8.0 KiB
Python
Executable File

#!/usr/bin/python3
'''
Test dynamic graph partitioner on Memgraph cluster
with randomly generated graph (uniform distribution
of nodes and edges). The partitioning goal is to
minimize number of crossing edges while keeping
the cluster balanced.
'''
import argparse
import atexit
import logging
import os
import random
import sys
import subprocess
import time
try:
# graphviz==0.9
from graphviz import Digraph
except ImportError:
print("graphviz module isn't available. "
"Graph won't be generated but the checks will still work.")
from neo4j.v1 import GraphDatabase, basic_auth
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
PROJECT_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", ".."))
COMMON_ARGS = ["--durability-enabled=false",
"--snapshot-on-exit=false",
"--db-recover-on-startup=false"]
MASTER_ARGS = ["--master",
"--master-port", "10000",
"--dynamic-graph-partitioner-enabled",
"--durability-directory=durability_master"]
log = logging.getLogger(__name__)
memgraph_processes = []
def worker_args(worker_id):
args = ["--worker",
"--worker-id", str(worker_id),
"--worker-port", str(10000 + worker_id),
"--master-port", str(10000),
"--durability-directory=durability_worker%s" % worker_id]
return args
def wait_for_server(port, delay=0.01):
cmd = ["nc", "-z", "-w", "1", "127.0.0.1", port]
while subprocess.call(cmd) != 0:
time.sleep(delay)
time.sleep(delay)
def run_memgraph_process(binary, args):
global memgraph_processes
process = subprocess.Popen([binary] + args, cwd=os.path.dirname(binary))
memgraph_processes.append(process)
def run_distributed_memgraph(args):
run_memgraph_process(args.memgraph, COMMON_ARGS + MASTER_ARGS)
wait_for_server("10000")
for i in range(1, int(args.machine_count)):
run_memgraph_process(args.memgraph, COMMON_ARGS + worker_args(i))
wait_for_server("7687")
def terminate():
global memgraph_processes
for process in memgraph_processes:
process.terminate()
status = process.wait()
if status != 0:
raise Exception(
"Memgraph binary returned non-zero ({})!".format(status))
@atexit.register
def cleanup():
global memgraph_processes
for proc in memgraph_processes:
if proc.poll() is not None:
continue
proc.kill()
proc.wait()
def run_test(args):
driver = GraphDatabase.driver(args.endpoint,
auth=basic_auth(args.username,
args.password),
encrypted=args.encrypted)
session = driver.session()
session.run("CREATE INDEX ON :Node(id)").consume()
session.run(
"UNWIND range(0, $num - 1) AS n CREATE (:Node {id: n})",
num=args.node_count - 1).consume()
created_edges = 0
while created_edges <= args.edge_count:
# Retry block is here because DGP is running in background and
# serialization errors may occure.
try:
session.run("MATCH (n:Node {id: $id1}), (m:Node {id:$id2}) "
"CREATE (n)-[:Edge]->(m)",
id1=random.randint(0, args.node_count - 1),
id2=random.randint(0, args.node_count - 1)).consume()
created_edges += 1
except Exception:
pass
# Check cluster state periodically.
crossing_edges_history = []
load_history = []
duration = 0
for iteration in range(args.iteration_count):
iteration_start_time = time.time()
data = session.run(
"MATCH (n)-[r]->(m) "
"RETURN "
" id(n) AS v1, id(m) AS v2, "
" workerid(n) AS position_n, workerid(m) AS position_m").data()
# Visualize cluster state.
if args.visualize and 'graphviz' in sys.modules:
output_dir = os.path.join(SCRIPT_DIR, "output")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
cluster = Digraph(name="memgraph_cluster", format="png")
cluster.attr(splines="false", rank="TB")
subgraphs = [Digraph(name="cluster_worker%s" % i)
for i in range(args.machine_count)]
for index, subgraph in enumerate(subgraphs):
subgraph.attr(label="worker%s" % index)
edges = []
for row in data:
# start_id end_id machine_id
edges.append((row["v1"], row["v2"], row["position_n"]))
edges = sorted(edges, key=lambda x: x[0])
for edge in edges:
# machine_id start_id end_id
subgraphs[edge[2]].edge(str(edge[0]), str(edge[1]))
for subgraph in subgraphs:
cluster.subgraph(subgraph)
cluster.render("output/iteration_%s" % iteration)
# Collect data.
num_of_crossing_edges = 0
load = [0] * args.machine_count
for edge in data:
src_worker = int(edge["position_n"])
dst_worker = int(edge["position_m"])
if src_worker != dst_worker:
num_of_crossing_edges = num_of_crossing_edges + 1
load[src_worker] = load[src_worker] + 1
crossing_edges_history.append(num_of_crossing_edges)
load_history.append(load)
iteration_delta_time = time.time() - iteration_start_time
duration += iteration_delta_time
log.info("Number of crossing edges %s" % num_of_crossing_edges)
log.info("Cluster load %s" % load)
# Wait for DGP a bit.
if iteration_delta_time < args.iteration_interval:
time.sleep(args.iteration_interval - iteration_delta_time)
duration += args.iteration_interval
else:
duration += iteration_delta_time
# TODO (buda): Somehow align with DGP turns. Replace runtime param with
# the query.
assert crossing_edges_history[-1] < crossing_edges_history[0], \
"Number of crossing edges is equal or bigger."
for machine in range(args.machine_count):
assert load_history[-1][machine] > 0, "Machine %s is empty." % machine
log.info("Config")
log.info(" Machines: %s" % args.machine_count)
log.info(" Nodes: %s" % args.node_count)
log.info(" Edges: %s" % args.edge_count)
log.info("Start")
log.info(" Crossing Edges: %s" % crossing_edges_history[0])
log.info(" Cluster Load: %s" % load_history[0])
log.info("End")
log.info(" Crossing Edges: %s" % crossing_edges_history[-1])
log.info(" Cluster Load: %s" % load_history[-1])
log.info("Stats")
log.info(" Duration: %ss" % duration)
session.close()
driver.close()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
memgraph_binary = os.path.join(PROJECT_DIR, "build",
"memgraph_distributed")
if not os.path.exists(memgraph_binary):
memgraph_binary = os.path.join(PROJECT_DIR, "build_debug",
"memgraph_distributed")
parser = argparse.ArgumentParser()
parser.add_argument("--memgraph", default=memgraph_binary)
parser.add_argument("--endpoint", type=str,
default="bolt://localhost:7687")
parser.add_argument("--username", type=str, default="")
parser.add_argument("--password", type=str, default="")
parser.add_argument("--encrypted", type=bool, default=False)
parser.add_argument("--visualize", type=bool, default=False)
parser.add_argument("--machine-count", type=int, default=3)
parser.add_argument("--node-count", type=int, default=1000000)
parser.add_argument("--edge-count", type=int, default=1000000)
parser.add_argument("--iteration-count", type=int, default=10)
parser.add_argument("--iteration-interval", type=float, default=5)
args = parser.parse_args()
run_distributed_memgraph(args)
run_test(args)
terminate()