2019-02-06 22:40:03 +08:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
2019-06-04 21:18:30 +08:00
|
|
|
"""
|
|
|
|
This test checks the the basic functionality of HA Memgraph. It incorporates
|
|
|
|
both leader election and log replication processes.
|
|
|
|
|
|
|
|
The test proceeds as follows for clusters of size 3 and 5:
|
|
|
|
1) Start the whole cluster
|
|
|
|
2) Kill random workers but leave the majority alive
|
|
|
|
3) Create a single Node
|
|
|
|
4) Bring dead nodes back to life
|
|
|
|
5) Kill random workers but leave the majority alive
|
|
|
|
6) Check if everything is ok with DB state
|
|
|
|
7) GOTO 1) and repeat 25 times
|
|
|
|
"""
|
|
|
|
|
2019-02-06 22:40:03 +08:00
|
|
|
import argparse
|
|
|
|
import os
|
|
|
|
import time
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import random
|
|
|
|
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
PROJECT_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", "..", ".."))
|
|
|
|
|
|
|
|
# append parent directory
|
|
|
|
sys.path.append(os.path.join(SCRIPT_DIR, ".."))
|
|
|
|
|
|
|
|
from ha_test import HaTestBase
|
|
|
|
|
|
|
|
|
|
|
|
class HaBasicTest(HaTestBase):
|
2019-06-04 21:18:30 +08:00
|
|
|
def execute_step(self, step, node_count):
|
2019-02-06 22:40:03 +08:00
|
|
|
if step == "create":
|
2019-02-14 21:56:44 +08:00
|
|
|
print("Executing create query")
|
2019-06-04 21:18:30 +08:00
|
|
|
client = subprocess.Popen([self.tester_binary,
|
|
|
|
"--step", "create",
|
|
|
|
"--cluster-size", str(self.cluster_size),
|
|
|
|
"--node-count", str(node_count)])
|
2019-02-06 22:40:03 +08:00
|
|
|
elif step == "count":
|
2019-02-14 21:56:44 +08:00
|
|
|
print("Executing count query")
|
2019-06-04 21:18:30 +08:00
|
|
|
client = subprocess.Popen([self.tester_binary,
|
|
|
|
"--step", "count",
|
|
|
|
"--cluster_size", str(self.cluster_size),
|
|
|
|
"--node-count", str(node_count)])
|
2019-02-06 22:40:03 +08:00
|
|
|
else:
|
|
|
|
return 0
|
|
|
|
|
|
|
|
# Check what happened with query execution.
|
|
|
|
try:
|
|
|
|
code = client.wait(timeout=30)
|
|
|
|
except subprocess.TimeoutExpired as e:
|
2019-02-14 21:56:44 +08:00
|
|
|
print("HA client timed out!")
|
2019-02-06 22:40:03 +08:00
|
|
|
client.kill()
|
|
|
|
return 1
|
|
|
|
|
|
|
|
return code
|
|
|
|
|
|
|
|
|
2019-06-04 21:18:30 +08:00
|
|
|
def start_workers(self, worker_ids):
|
|
|
|
for wid in worker_ids:
|
|
|
|
print("Starting worker {}".format(wid + 1))
|
|
|
|
self.start_worker(wid)
|
|
|
|
|
|
|
|
|
|
|
|
def kill_workers(self, worker_ids):
|
|
|
|
for wid in worker_ids:
|
|
|
|
print("Killing worker {}".format(wid + 1))
|
|
|
|
self.kill_worker(wid)
|
|
|
|
|
|
|
|
|
2019-02-06 22:40:03 +08:00
|
|
|
def execute(self):
|
|
|
|
self.start_cluster()
|
|
|
|
|
|
|
|
expected_results = 0
|
|
|
|
|
|
|
|
# Make sure at least one node exists.
|
|
|
|
assert self.execute_step("create", expected_results) == 0, \
|
|
|
|
"Error while executing create query"
|
|
|
|
expected_results = 1
|
|
|
|
|
2019-06-04 21:18:30 +08:00
|
|
|
for i in range(20):
|
|
|
|
# Create step
|
2019-02-06 22:40:03 +08:00
|
|
|
partition = random.sample(range(self.cluster_size),
|
2019-06-04 21:18:30 +08:00
|
|
|
random.randint(0, int((self.cluster_size - 1) / 2)))
|
|
|
|
|
|
|
|
self.kill_workers(partition)
|
2019-02-06 22:40:03 +08:00
|
|
|
|
2019-06-04 21:18:30 +08:00
|
|
|
assert self.execute_step("create", expected_results) == 0, \
|
|
|
|
"Error while executing create query"
|
|
|
|
expected_results += 1
|
2019-02-06 22:40:03 +08:00
|
|
|
|
2019-06-04 21:18:30 +08:00
|
|
|
self.start_workers(partition)
|
|
|
|
|
|
|
|
# Check step
|
|
|
|
partition = random.sample(range(self.cluster_size),
|
|
|
|
random.randint(0, int((self.cluster_size - 1) / 2)))
|
2019-02-06 22:40:03 +08:00
|
|
|
|
2019-06-04 21:18:30 +08:00
|
|
|
self.kill_workers(partition)
|
2019-02-06 22:40:03 +08:00
|
|
|
|
2019-06-04 21:18:30 +08:00
|
|
|
assert self.execute_step("count", expected_results) == 0, \
|
|
|
|
"Error while executing count query"
|
2019-02-06 22:40:03 +08:00
|
|
|
|
2019-06-04 21:18:30 +08:00
|
|
|
self.start_workers(partition)
|
2019-02-14 21:56:44 +08:00
|
|
|
|
2019-02-06 22:40:03 +08:00
|
|
|
# Check that no data was lost.
|
|
|
|
assert self.execute_step("count", expected_results) == 0, \
|
|
|
|
"Error while executing count query"
|
|
|
|
|
|
|
|
|
|
|
|
def find_correct_path(path):
|
|
|
|
f = os.path.join(PROJECT_DIR, "build", path)
|
|
|
|
if not os.path.exists(f):
|
|
|
|
f = os.path.join(PROJECT_DIR, "build_debug", path)
|
|
|
|
return f
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
memgraph_binary = find_correct_path("memgraph_ha")
|
|
|
|
tester_binary = find_correct_path(os.path.join("tests", "integration", "ha",
|
|
|
|
"basic", "tester"))
|
|
|
|
|
|
|
|
raft_config_file = os.path.join(PROJECT_DIR, "tests", "integration", "ha",
|
|
|
|
"basic", "raft.json")
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("--memgraph", default=memgraph_binary)
|
|
|
|
parser.add_argument("--raft_config_file", default=raft_config_file)
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
for cluster_size in [3, 5]:
|
|
|
|
print("\033[1;36m~~ Executing test with cluster size: %d~~\033[0m" % (cluster_size))
|
|
|
|
HaBasicTest(
|
|
|
|
args.memgraph, tester_binary, args.raft_config_file, cluster_size)
|
|
|
|
print("\033[1;32m~~ The test finished successfully ~~\033[0m")
|
|
|
|
|
|
|
|
sys.exit(0)
|