2018-04-21 21:36:28 +08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
'''
|
|
|
|
Durability Stress Test
|
|
|
|
'''
|
|
|
|
|
|
|
|
# TODO (mg_durability_stress_test): extend once we add full durability mode
|
|
|
|
|
|
|
|
import atexit
|
|
|
|
import multiprocessing
|
|
|
|
import os
|
|
|
|
import random
|
|
|
|
import shutil
|
|
|
|
import subprocess
|
|
|
|
import time
|
|
|
|
import threading
|
|
|
|
|
|
|
|
from common import connection_argument_parser, SessionCache
|
|
|
|
from multiprocessing import Pool, Manager
|
|
|
|
|
|
|
|
# Constants and args
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
BASE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))
|
|
|
|
BUILD_DIR = os.path.join(BASE_DIR, "build")
|
2019-12-05 20:24:30 +08:00
|
|
|
DATA_DIR = os.path.join(BUILD_DIR, "mg_data")
|
2018-04-21 21:36:28 +08:00
|
|
|
if "THREADS" in os.environ:
|
|
|
|
DB_WORKERS = os.environ["THREADS"]
|
|
|
|
else:
|
|
|
|
DB_WORKERS = multiprocessing.cpu_count()
|
|
|
|
# The snapshot interval has to be in sync with Memgraph's parameter
|
|
|
|
# --snapshot-cycle-sec
|
|
|
|
SNAPSHOT_CYCLE_SEC = 5
|
|
|
|
|
|
|
|
# Parse command line arguments
|
|
|
|
parser = connection_argument_parser()
|
|
|
|
|
|
|
|
parser.add_argument("--memgraph", default=os.path.join(BUILD_DIR,
|
|
|
|
"memgraph"))
|
|
|
|
parser.add_argument("--log-file", default="")
|
|
|
|
parser.add_argument("--verbose", action="store_const",
|
|
|
|
const=True, default=False)
|
2019-12-05 20:24:30 +08:00
|
|
|
parser.add_argument("--data-directory", default=DATA_DIR)
|
2018-04-21 21:36:28 +08:00
|
|
|
parser.add_argument("--num-clients", default=multiprocessing.cpu_count())
|
|
|
|
parser.add_argument("--num-steps", type=int, default=5)
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
# Find Memgraph directory (alternative location)
|
|
|
|
if not os.path.exists(args.memgraph):
|
|
|
|
args.memgraph = os.path.join(BASE_DIR, "build_release", "memgraph")
|
|
|
|
|
|
|
|
# Memgraph run command construction
|
|
|
|
cwd = os.path.dirname(args.memgraph)
|
2019-12-08 18:42:59 +08:00
|
|
|
cmd = [args.memgraph, "--bolt-num-workers=" + str(DB_WORKERS),
|
2019-12-05 20:24:30 +08:00
|
|
|
"--storage-properties-on-edges=true",
|
|
|
|
"--storage-snapshot-on-exit=false",
|
|
|
|
"--storage-snapshot-interval-sec=5",
|
|
|
|
"--storage-snapshot-retention-count=2",
|
|
|
|
"--storage-wal-enabled=true",
|
|
|
|
"--storage-recover-on-startup=true",
|
|
|
|
"--query-execution-timeout-sec=600"]
|
2018-04-21 21:36:28 +08:00
|
|
|
if not args.verbose:
|
|
|
|
cmd += ["--min-log-level", "1"]
|
|
|
|
if args.log_file:
|
|
|
|
cmd += ["--log-file", args.log_file]
|
2019-12-05 20:24:30 +08:00
|
|
|
if args.data_directory:
|
|
|
|
cmd += ["--data-directory", args.data_directory]
|
2018-04-21 21:36:28 +08:00
|
|
|
|
|
|
|
data_manager = Manager()
|
|
|
|
data = data_manager.dict()
|
|
|
|
|
|
|
|
# Pretest cleanup
|
2019-12-05 20:24:30 +08:00
|
|
|
if os.path.exists(DATA_DIR):
|
|
|
|
shutil.rmtree(DATA_DIR)
|
2018-04-21 21:36:28 +08:00
|
|
|
|
|
|
|
atexit.register(SessionCache.cleanup)
|
|
|
|
|
|
|
|
|
|
|
|
@atexit.register
|
|
|
|
def clean_memgraph():
|
|
|
|
global proc_mg
|
|
|
|
if proc_mg is None:
|
|
|
|
return
|
|
|
|
if proc_mg.poll() is not None:
|
|
|
|
return
|
|
|
|
proc_mg.kill()
|
|
|
|
proc_mg.wait()
|
|
|
|
|
|
|
|
|
|
|
|
def wait_for_server(port, delay=0.1):
|
|
|
|
cmd = ["nc", "-z", "-w", "1", "127.0.0.1", port]
|
|
|
|
while subprocess.call(cmd) != 0:
|
|
|
|
time.sleep(0.01)
|
|
|
|
time.sleep(delay)
|
|
|
|
|
|
|
|
|
|
|
|
def run_memgraph():
|
|
|
|
global proc_mg
|
2019-12-05 20:24:30 +08:00
|
|
|
proc_mg = subprocess.Popen(cmd, cwd=cwd)
|
2018-04-21 21:36:28 +08:00
|
|
|
# Wait for Memgraph to finish the recovery process
|
|
|
|
wait_for_server(args.endpoint.split(":")[1])
|
|
|
|
|
|
|
|
|
|
|
|
def run_client(id, data):
|
|
|
|
# init
|
|
|
|
session = SessionCache.argument_session(args)
|
|
|
|
data.setdefault(id, 0)
|
|
|
|
counter = data[id]
|
|
|
|
|
|
|
|
# Check recovery for this client
|
|
|
|
num_nodes_db = session.run(
|
|
|
|
"MATCH (n:%s) RETURN count(n) as cnt" %
|
|
|
|
("Client%d" % id)).data()[0]["cnt"]
|
|
|
|
print("Client%d DB: %d; ACTUAL: %d" % (id, num_nodes_db, data[id]))
|
|
|
|
|
|
|
|
# Execute a new set of write queries
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
session.run("CREATE (n:%s {id:%s}) RETURN n;" %
|
|
|
|
("Client%d" % id, counter)).consume()
|
|
|
|
counter += 1
|
|
|
|
if counter % 100000 == 0:
|
|
|
|
print("Client %d executed %d" % (id, counter))
|
|
|
|
except Exception:
|
|
|
|
print("DB isn't reachable any more")
|
|
|
|
break
|
|
|
|
data[id] = counter
|
|
|
|
print("Client %d executed %d" % (id, counter))
|
|
|
|
|
|
|
|
|
|
|
|
def run_step():
|
|
|
|
with Pool(args.num_clients) as p:
|
|
|
|
p.starmap(run_client, [(id, data)
|
|
|
|
for id in range(1, args.num_clients + 1)])
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
for step in range(1, args.num_steps + 1):
|
|
|
|
print("#### Step %d" % step)
|
|
|
|
run_memgraph()
|
|
|
|
thread = threading.Thread(target=run_step)
|
|
|
|
thread.daemon = True
|
|
|
|
thread.start()
|
|
|
|
# Kill Memgraph at arbitrary point in time. It makse sense to ensure
|
|
|
|
# that at least one snapshot has to be generated beucase we want to
|
|
|
|
# test the entire recovery process (Snapshot + WAL).
|
|
|
|
# Also it makes sense to run this test for a longer period of time
|
|
|
|
# but with small --snapshot-cycle-sec to force that Memgraph is being
|
|
|
|
# killed in the middle of generating the snapshot.
|
|
|
|
time.sleep(
|
|
|
|
random.randint(2 * SNAPSHOT_CYCLE_SEC, 3 * SNAPSHOT_CYCLE_SEC))
|
|
|
|
clean_memgraph()
|
|
|
|
|
|
|
|
# Final check
|
|
|
|
run_memgraph()
|
|
|
|
session = SessionCache.argument_session(args)
|
|
|
|
num_nodes_db = \
|
|
|
|
session.run("MATCH (n) RETURN count(n) AS cnt").data()[0]["cnt"]
|
|
|
|
num_nodes = sum(data.values())
|
|
|
|
# Check that more than 99.9% of data is recoverable.
|
|
|
|
# NOTE: default WAL flush interval is 1ms.
|
|
|
|
# Once the full sync durability mode is introduced,
|
|
|
|
# this test has to be extended.
|
|
|
|
assert num_nodes_db > 0.999 * num_nodes, \
|
|
|
|
"Memgraph lost more than 0.001 of data!"
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|