memgraph/tools/apollo/generate
florijan be16409da2 Harness MATCH tests refactored
Summary:
1. Test setup rewritten to take cca 8 seconds. Note that edges are created by using:
`MATCH (a) WITH a MATCH (b) WITH b WHERE rand() < X CREATE (a)-[:ET]->(b)`
Where `X` is a threshold calculated so the desired edge count is the expectation. This seems the only feasable way of generating a large number of edges since query execution does not depend on edge count, but on vertex count.

2. Using the new `assert` function to verify graph state. I recommend doing that in all the harness tests (I don't think we currently have something better).

3. All tests rewritten to take around 200ms per iteration.

4. Test are using SKIP to avoid sending data to the client, but ensure that appropriate operations get executed. This currently seems like the best way of removing unwanted side-effects.

Harness will cost us our sanity. And it doesn't even provide good quality regression testing we really need :(

Reviewers: buda, mislav.bradac, mferencevic

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D752
2017-09-07 12:05:05 +02:00

259 lines
10 KiB
Python
Executable File

#!/usr/bin/python3
import json
import os
import shutil
import subprocess
import sys
class UnbufferedFile:
def __init__(self, f):
self._file = f
def write(self, data):
self._file.write(data)
self.flush()
def flush(self):
self._file.flush()
def isatty(self):
return True
# Remove buffering from output streams
sys.stdout = UnbufferedFile(sys.stdout)
sys.stderr = UnbufferedFile(sys.stderr)
# paths
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
BASE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))
WORKSPACE_DIR = os.path.normpath(os.path.join(BASE_DIR, ".."))
BASE_DIR_NAME = os.path.basename(BASE_DIR)
BUILD_DIR = os.path.join(BASE_DIR, "build")
LIBS_DIR = os.path.join(BASE_DIR, "libs")
TESTS_DIR = os.path.join(BUILD_DIR, "tests")
OUTPUT_DIR = os.path.join(BUILD_DIR, "apollo")
# output lists
ARCHIVES = []
RUNS = []
DATA_PROCESS = []
# generation mode
if len(sys.argv) >= 2:
mode = sys.argv[1]
else:
mode = "diff"
# helper functions
def run_cmd(cmd, cwd):
ret = subprocess.run(cmd, cwd = cwd, stdout = subprocess.PIPE, check = True)
return ret.stdout.decode("utf-8")
def find_memgraph_binary(loc):
return run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
"f", "-name", "memgraph*"], loc).split("\n")[0][2:]
def generate_run(name, typ = "run", supervisor = "", commands = "",
arguments = "", enable_network = False,
outfile_paths = "", infile = ""):
if not commands.endswith("\n"): commands += "\n"
return dict(name = name, type = typ, supervisor = supervisor,
commands = commands, arguments = arguments,
enable_network = enable_network, outfile_paths = outfile_paths,
infile = infile)
def generate_archive(name, short_name, archive):
return dict(name = name, short_name = short_name, archive = archive)
def create_archive(name, files, cwd):
oname = name + ".tar.gz"
ofile = os.path.join(OUTPUT_DIR, oname)
print("Creating archive:", name)
for i in range(len(files)):
files[i] = os.path.relpath(files[i], cwd)
subprocess.run(["tar", "-cpzf", ofile, "-C", cwd] + files, check = True)
return oname
def store_metadata(cwd, name, data):
json.dump(data, open(os.path.join(cwd, name + ".json"), "w"))
# create output directory
if os.path.exists(OUTPUT_DIR):
shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR)
# store memgraph binary to archive
binary_name = find_memgraph_binary(BUILD_DIR)
binary_path = os.path.join(BUILD_DIR, binary_name)
binary_link_path = os.path.join(BUILD_DIR, "memgraph")
config_path = os.path.join(BASE_DIR, "config")
config_copy_path = os.path.join(BUILD_DIR, "config")
if os.path.exists(config_copy_path):
shutil.rmtree(config_copy_path)
shutil.copytree(config_path, config_copy_path)
archive = create_archive("binary", [binary_path, config_copy_path], BUILD_DIR)
ARCHIVES.append(generate_archive("Binary", "binary", archive))
# store documentation to archive
docs_path = os.path.join(BASE_DIR, "docs", "doxygen", "html")
archive = create_archive("doxygen_documentation", [docs_path], docs_path)
ARCHIVES.append(generate_archive("Doxygen documentation", "doxygen_documentation", archive))
# TODO: store user documentation to archive
# cppcheck run
cppcheck = os.path.join(BASE_DIR, "tools", "apollo", "cppcheck")
check_dirs = list(map(lambda x: os.path.join(BASE_DIR, x), ["src", "tests",
"poc", ".git"])) + [cppcheck]
archive = create_archive("cppcheck", check_dirs, WORKSPACE_DIR)
cmd = os.path.relpath(cppcheck, WORKSPACE_DIR)
outfile_paths = "\./" + cmd.replace("cppcheck", ".cppcheck_errors").replace(".", "\\.")
RUNS.append(generate_run("cppcheck", commands = 'TIMEOUT=1000 ./{} {}'.format(cmd, mode),
infile = archive, outfile_paths = outfile_paths))
# ctest tests
ctest_output = run_cmd(["ctest", "-N"], TESTS_DIR)
tests = []
# test ordering: first unit, then concurrent, then everything else
CTEST_ORDER = {"unit": 0, "concurrent": 1}
CTEST_DELIMITER = "__"
for row in ctest_output.split("\n"):
# filter rows to find tests, ctest prefixes all test names with BASE_DIR_NAME
if row.count(BASE_DIR_NAME + CTEST_DELIMITER) == 0: continue
name = row.split(":")[1].strip().replace(BASE_DIR_NAME + CTEST_DELIMITER, "")
path = os.path.join(TESTS_DIR, name.replace(CTEST_DELIMITER, "/", 1))
order = CTEST_ORDER.get(name.split(CTEST_DELIMITER)[0], len(CTEST_ORDER))
tests.append((order, name, path))
tests.sort()
for test in tests:
order, name, path = test
# TODO: integration_query_engine is currently ignored because it
# doesn't include its compile dependencies properly
if name == "integration__query_engine": continue
dirname = os.path.dirname(path)
cmakedir = os.path.join(dirname, "CMakeFiles",
BASE_DIR_NAME + CTEST_DELIMITER + name + ".dir")
files = [path, cmakedir]
# extra files for specific tests
if name in ["unit__fswatcher", "integration__query_engine"]:
files.append(os.path.normpath(os.path.join(dirname, "..", "data")))
if name == "integration__query_engine":
files.append(os.path.normpath(os.path.join(dirname, "..", "compiled")))
files.append(os.path.join(BUILD_DIR, "include"))
for i in ["hardcoded_query", "stream", "template"]:
files.append(os.path.join(dirname, i))
# larger timeout for benchmark tests
prefix = ""
if name.startswith("benchmark"):
prefix = "TIMEOUT=600 "
cwd = os.path.dirname(BASE_DIR)
infile = create_archive(name, files, cwd = cwd)
exepath = os.path.relpath(path, cwd)
commands = "cd {}\n{}./{}\n".format(os.path.dirname(exepath),
prefix, os.path.basename(exepath))
outfile_paths = ""
if name.startswith("unit"):
cmakedir_rel = os.path.relpath(cmakedir, WORKSPACE_DIR)
outfile_paths = "\./" + cmakedir_rel.replace(".", "\\.") + ".+\n"
run = generate_run(name, commands = commands, infile = infile,
outfile_paths = outfile_paths)
RUNS.append(run)
# quality assurance tests
qa_path = os.path.join(BASE_DIR, "tests", "qa")
infile = create_archive("quality_assurance", [qa_path, binary_path,
binary_link_path, config_path], cwd = WORKSPACE_DIR)
commands = "cd {}/tests/qa\n./continuous_integration\n".format(
BASE_DIR_NAME)
RUNS.append(generate_run("quality_assurance", commands = commands,
infile = infile, outfile_paths = "\./{}/tests/qa/"
"\.quality_assurance_status".format(
BASE_DIR_NAME)))
# build release paths
if mode == "release":
BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build")
else:
BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build_release")
binary_release_name = find_memgraph_binary(BUILD_RELEASE_DIR)
binary_release_path = os.path.join(BUILD_RELEASE_DIR, binary_release_name)
binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph")
# macro benchmark tests
MACRO_BENCHMARK_ARGS = (
"QuerySuite MemgraphRunner "
"--groups aggregation 1000_create unwind_create dense_expand match "
"--no-strict")
macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark")
harness_client_binaries = os.path.join(BUILD_RELEASE_DIR, "tests",
"macro_benchmark")
postgresql_lib_dir = os.path.join(LIBS_DIR, "postgresql", "lib")
infile = create_archive("macro_benchmark", [binary_release_path,
binary_release_link_path, macro_bench_path, config_path,
harness_client_binaries, postgresql_lib_dir], cwd = WORKSPACE_DIR)
supervisor = "./memgraph/tests/macro_benchmark/harness/harness.py"
outfile_paths = "\./memgraph/tests/macro_benchmark/harness/\.harness_summary"
RUNS.append(generate_run("macro_benchmark", supervisor = supervisor,
arguments = MACRO_BENCHMARK_ARGS, infile = infile,
outfile_paths = outfile_paths))
# macro benchmark parent tests
if mode == "diff":
PARENT_DIR = os.path.join(WORKSPACE_DIR, "parent")
BUILD_PARENT_DIR = os.path.join(PARENT_DIR, "build")
LIBS_PARENT_DIR = os.path.join(PARENT_DIR, "libs")
binary_parent_name = find_memgraph_binary(BUILD_PARENT_DIR)
binary_parent_path = os.path.join(BUILD_PARENT_DIR, binary_parent_name)
binary_parent_link_path = os.path.join(BUILD_PARENT_DIR, "memgraph")
parent_config_path = os.path.join(PARENT_DIR, "config")
parent_macro_bench_path = os.path.join(PARENT_DIR, "tests", "macro_benchmark")
parent_harness_client_binaries = os.path.join(BUILD_PARENT_DIR, "tests",
"macro_benchmark")
parent_postgresql_lib_dir = os.path.join(LIBS_PARENT_DIR, "postgresql", "lib")
infile = create_archive("macro_benchmark_parent", [binary_parent_path,
binary_parent_link_path, parent_macro_bench_path, parent_config_path,
parent_harness_client_binaries, parent_postgresql_lib_dir],
cwd = WORKSPACE_DIR)
supervisor = "./parent/tests/macro_benchmark/harness/harness.py"
args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path
outfile_paths = "\./parent/tests/macro_benchmark/harness/\.harness_summary"
RUNS.append(generate_run("macro_benchmark_parent", supervisor = supervisor,
arguments = args, infile = infile, outfile_paths = outfile_paths))
# macro benchmark comparison data process
script_path = os.path.join(BASE_DIR, "tools", "apollo",
"macro_benchmark_summary")
infile = create_archive("macro_benchmark_summary", [script_path],
cwd = WORKSPACE_DIR)
cmd = "./memgraph/tools/apollo/macro_benchmark_summary " \
"macro_benchmark/memgraph/tests/macro_benchmark/harness/.harness_summary " \
"macro_benchmark_parent/parent/tests/macro_benchmark/harness/.harness_summary " \
".harness_summary"
outfile_paths = "\./.harness_summary"
DATA_PROCESS.append(generate_run("macro_benchmark_summary", typ = "data process",
commands = cmd, infile = infile, outfile_paths = outfile_paths))
# stress tests
stress_path = os.path.join(BASE_DIR, "tests", "stress")
infile = create_archive("stress", [binary_release_path,
binary_release_link_path, stress_path, config_path],
cwd = WORKSPACE_DIR)
cmd = "cd {}/tests/stress\nTIMEOUT=600 ./continuous_integration " \
"--memgraph {}".format(BASE_DIR_NAME, binary_release_link_path)
RUNS.append(generate_run("stress", commands = cmd, infile = infile))
# store ARCHIVES and RUNS
store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)