memgraph/tests/distributed/card_fraud/card_fraud.py
Dominik Gleich 0681040395 Durability distributed wal
Summary:
Recover only snapshot

Return recovery info on worker recovery

Update tests

Start wal recovery

Single node wal split

Keep track of wal possible to recover

Fix comment

Wal tx intersection

Merge branch 'master' into sync_wal_tx

Reviewers: buda, ipaljak, dgleich, vkasljevic, teon.banek

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1489
2018-07-24 08:55:13 +02:00

224 lines
7.8 KiB
Python

import json
import os
import time
# to change the size of the cluster, just change this parameter
NUM_MACHINES = 3
# test setup
SCENARIOS = ["point_lookup", "create_tx"]
DURATION = 300
WORKERS = 6
# constants
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
MEMGRAPH_BINARY = "memgraph"
CLIENT_BINARY = "tests/macro_benchmark/card_fraud_client"
BINARIES = [MEMGRAPH_BINARY, CLIENT_BINARY]
# wrappers
class WorkerWrapper:
def __init__(self, address, worker):
self._address = address
self._worker = worker
self._tid = worker.get_jail()
def get_address(self):
return self._address
def __getattr__(self, name):
if name in ["allocate_file", "read_file", "store_label"]:
return getattr(self._worker, name)
def func(*args, **kwargs):
args = [self._tid] + list(args)
return getattr(self._worker, name)(*args, **kwargs)
return func
class MgCluster:
def __init__(self, machine_ids, workers):
# create wrappers
self._master = WorkerWrapper(os.environ[machine_ids[0]],
workers[machine_ids[0]])
self._workers = []
for machine_id in machine_ids[1:]:
self._workers.append(WorkerWrapper(os.environ[machine_id],
workers[machine_id]))
def start(self):
# start memgraph master
self._master.start(MEMGRAPH_BINARY, [
"--master",
"--master-host", self._master.get_address(),
"--master-port", "10000",
"--durability-directory", os.path.join(SCRIPT_DIR, "snapshots",
"worker_0"),
"--db-recover-on-startup",
"--query-vertex-count-to-expand-existing", "-1",
"--num-workers", str(WORKERS),
"--rpc-num-workers", str(WORKERS),
"--recovering-cluster-size", str(len(self._workers) + 1)
])
# sleep to allow the master to startup
time.sleep(5)
# start memgraph workers
for i, worker in enumerate(self._workers, start=1):
worker.start(MEMGRAPH_BINARY, [
"--worker", "--worker-id", str(i),
"--worker-host", worker.get_address(),
"--worker-port", str(10000 + i),
"--master-host", self._master.get_address(),
"--master-port", "10000",
"--durability-directory", os.path.join(SCRIPT_DIR, "snapshots",
"worker_" + str(i)),
"--db-recover-on-startup",
"--num-workers", str(WORKERS),
"--rpc-num-workers", str(WORKERS),
])
# sleep to allow the workers to startup
time.sleep(15)
# store initial usage
self._usage_start = [self._master.get_usage()]
for worker in self._workers:
self._usage_start.append(worker.get_usage())
self._usage_start_time = time.time()
def get_master_address(self):
return self._master.get_address()
def check_status(self):
if not self._master.check_status():
return False
for worker in self._workers:
if not worker.check_status():
return False
return True
def stop(self):
# store final usage
self._usage_stop = [self._master.get_usage()]
for worker in self._workers:
self._usage_stop.append(worker.get_usage())
self._usage_stop_time = time.time()
# stop the master
self._master.stop()
# wait to allow the master and workers to die
time.sleep(5)
# stop the workers
for worker in self._workers:
worker.stop()
# wait to allow the workers to die
time.sleep(5)
def get_usage(self):
ret = []
tdelta = self._usage_stop_time - self._usage_start_time
for val_start, val_stop in zip(self._usage_start, self._usage_stop):
data = {
"cpu": (val_stop["cpu"] - val_start["cpu"]) / tdelta,
"memory": val_stop["max_memory"] / 1024,
"threads": val_stop["max_threads"],
"network": {}
}
net_start = val_start["network"]["eth0"]
net_stop = val_stop["network"]["eth0"]
for i in ["bytes", "packets"]:
data["network"][i] = {}
for j in ["rx", "tx"]:
data["network"][i][j] = (net_stop[i][j] -
net_start[i][j]) / tdelta
ret.append(data)
return ret
def store_label(self, label):
self._master.store_label(label)
for worker in self._workers:
worker.store_label(label)
def write_scenario_summary(scenario, throughput, usage, output):
output.write("Scenario **{}** throughput !!{:.2f}!! queries/s.\n\n".format(
scenario, throughput))
headers = ["Memgraph", "CPU", "Max memory", "Max threads",
"Network RX", "Network TX"]
output.write("<table>\n<tr>")
for header in headers:
output.write("<th>{}</th>".format(header))
output.write("</tr>\n")
for i, current in enumerate(usage):
name = "master" if i == 0 else "worker" + str(i)
output.write("<tr><td>{}</td>".format(name))
for key, unit in [("cpu", "s/s"), ("memory", "MiB"), ("threads", "")]:
fmt = ".2f" if key != "threads" else ""
output.write(("<td>{:" + fmt + "} {}</td>").format(
current[key], unit).strip())
for key in ["rx", "tx"]:
output.write("<td>{:.2f} packets/s</td>".format(
current["network"]["packets"][key]))
output.write("</tr>\n")
output.write("</table>\n\n")
# main test function
def run(machine_ids, workers):
# create output directory
output_dir = os.path.join(SCRIPT_DIR, "output")
if not os.path.exists(output_dir):
os.mkdir(output_dir)
# create memgraph cluster and client
mg_cluster = MgCluster(machine_ids, workers)
mg_client = WorkerWrapper(os.environ[machine_ids[0]],
workers[machine_ids[0]])
# execute the tests
stats = {}
for scenario in SCENARIOS:
output_file = os.path.join(output_dir, scenario + ".json")
print("Starting memgraph cluster")
mg_cluster.store_label("Start: cluster")
mg_cluster.start()
print("Starting client scenario:", scenario)
mg_cluster.store_label("Start: " + scenario)
mg_client.start(CLIENT_BINARY, [
"--address", mg_cluster.get_master_address(),
"--group", "card_fraud",
"--scenario", scenario,
"--duration", str(DURATION),
"--num-workers", str(WORKERS),
"--output", output_file,
])
# wait for the client to terminate and check the cluster status
while mg_client.check_status():
assert mg_cluster.check_status(), "The memgraph cluster has died!"
time.sleep(2)
# stop everything
mg_client.wait()
mg_cluster.store_label("Stop: " + scenario)
mg_cluster.stop()
mg_cluster.store_label("Stop: cluster")
# process the stats
data = json.loads(list(filter(lambda x: x.strip(),
open(output_file).read().split("\n")))[-1])
throughput = data["num_executed_queries"] / data["elapsed_time"]
usage = mg_cluster.get_usage()
stats[scenario] = (throughput, usage)
# dump the stats
stats_file = open(os.path.join(output_dir, ".card_fraud_summary"), "w")
stats_file.write("==== Distributed card fraud summary: ====\n\n")
for scenario in SCENARIOS:
throughput, usage = stats[scenario]
write_scenario_summary(scenario, throughput, usage, stats_file)
stats_file.close()