memgraph/tools/apollo/generate

#!/usr/bin/python3
import json
import os
import shutil
import subprocess
import sys

class UnbufferedFile:
    def __init__(self, f):
        self._file = f

    def write(self, data):
        self._file.write(data)
        self.flush()

    def flush(self):
        self._file.flush()

    def isatty(self):
        return True

# Remove buffering from output streams
sys.stdout = UnbufferedFile(sys.stdout)
sys.stderr = UnbufferedFile(sys.stderr)

# paths
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
BASE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))
WORKSPACE_DIR = os.path.normpath(os.path.join(BASE_DIR, ".."))
BASE_DIR_NAME = os.path.basename(BASE_DIR)
BUILD_DIR = os.path.join(BASE_DIR, "build")
LIBS_DIR = os.path.join(BASE_DIR, "libs")
TESTS_DIR = os.path.join(BUILD_DIR, "tests")
OUTPUT_DIR = os.path.join(BUILD_DIR, "apollo")

# output lists
ARCHIVES = []
RUNS = []
DATA_PROCESS = []

# generation mode
if len(sys.argv) >= 2:
    mode = sys.argv[1]
else:
    mode = "diff"

# helper functions
def run_cmd(cmd, cwd):
    ret = subprocess.run(cmd, cwd = cwd, stdout = subprocess.PIPE, check = True)
    return ret.stdout.decode("utf-8")

def find_memgraph_binary(loc):
    return run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
            "f", "-name", "memgraph*"], loc).split("\n")[0][2:]

def generate_run(name, typ = "run", supervisor = "", commands = "",
        arguments = "", enable_network = False,
        outfile_paths = "", infile = "", slave_group = "local"):
    if not commands.endswith("\n"): commands += "\n"
    return dict(name = name, type = typ, supervisor = supervisor,
            commands = commands, arguments = arguments,
            enable_network = enable_network, outfile_paths = outfile_paths,
            infile = infile, slave_group = slave_group)

def generate_archive(name, short_name, archive):
    return dict(name = name, short_name = short_name, archive = archive)

def create_archive(name, files, cwd):
    oname = name + ".tar.gz"
    ofile = os.path.join(OUTPUT_DIR, oname)
    print("Creating archive:", name)
    for i in range(len(files)):
        files[i] = os.path.relpath(files[i], cwd)
    subprocess.run(["tar", "-cpzf", ofile, "-C", cwd] + files, check = True)
    return oname

def store_metadata(cwd, name, data):
    json.dump(data, open(os.path.join(cwd, name + ".json"), "w"))

# create output directory
if os.path.exists(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR)

# store memgraph binary to archive
binary_name = find_memgraph_binary(BUILD_DIR)
binary_path = os.path.join(BUILD_DIR, binary_name)
binary_link_path = os.path.join(BUILD_DIR, "memgraph")
config_path = os.path.join(BASE_DIR, "config")
config_copy_path = os.path.join(BUILD_DIR, "config")
if os.path.exists(config_copy_path):
    shutil.rmtree(config_copy_path)
shutil.copytree(config_path, config_copy_path)
archive = create_archive("binary", [binary_path, config_copy_path], BUILD_DIR)
ARCHIVES.append(generate_archive("Binary", "binary", archive))

# store documentation to archive
docs_path = os.path.join(BASE_DIR, "docs", "doxygen", "html")
archive = create_archive("doxygen_documentation", [docs_path], docs_path)
ARCHIVES.append(generate_archive("Doxygen documentation", "doxygen_documentation", archive))

# TODO: store user documentation to archive

# cppcheck run
cppcheck = os.path.join(BASE_DIR, "tools", "apollo", "cppcheck")
check_dirs = list(map(lambda x: os.path.join(BASE_DIR, x), ["src", "tests",
        "poc", ".git"])) + [cppcheck]
archive = create_archive("cppcheck", check_dirs, WORKSPACE_DIR)
cmd = os.path.relpath(cppcheck, WORKSPACE_DIR)
outfile_paths = "\./" + cmd.replace("cppcheck", ".cppcheck_errors").replace(".", "\\.")
RUNS.append(generate_run("cppcheck", commands = 'TIMEOUT=1000 ./{} {}'.format(cmd, mode),
        infile = archive, outfile_paths = outfile_paths))

# ctest tests
ctest_output = run_cmd(["ctest", "-N"], TESTS_DIR)
tests = []

# test ordering: first unit, then concurrent, then everything else
CTEST_ORDER = {"unit": 0, "concurrent": 1}
CTEST_DELIMITER = "__"
for row in ctest_output.split("\n"):
    # filter rows to find tests, ctest prefixes all test names with BASE_DIR_NAME
    if row.count(BASE_DIR_NAME + CTEST_DELIMITER) == 0: continue
    name = row.split(":")[1].strip().replace(BASE_DIR_NAME + CTEST_DELIMITER, "")
    path = os.path.join(TESTS_DIR, name.replace(CTEST_DELIMITER, "/", 1))
    order = CTEST_ORDER.get(name.split(CTEST_DELIMITER)[0], len(CTEST_ORDER))
    tests.append((order, name, path))

tests.sort()

for test in tests:
    order, name, path = test

    # TODO: integration_query_engine is currently ignored because it
    # doesn't include its compile dependencies properly
    if name == "integration__query_engine": continue

    dirname = os.path.dirname(path)
    cmakedir = os.path.join(dirname, "CMakeFiles",
            BASE_DIR_NAME + CTEST_DELIMITER + name + ".dir")
    files = [path, cmakedir]

    # extra files for specific tests
    if name in ["unit__fswatcher", "integration__query_engine"]:
        files.append(os.path.normpath(os.path.join(dirname, "..", "data")))
    if name == "integration__query_engine":
        files.append(os.path.normpath(os.path.join(dirname, "..", "compiled")))
        files.append(os.path.join(BUILD_DIR, "include"))
        for i in ["hardcoded_query", "stream", "template"]:
            files.append(os.path.join(dirname, i))

    # larger timeout for benchmark tests
    prefix = ""
    if name.startswith("benchmark"):
        prefix = "TIMEOUT=600 "

    cwd = os.path.dirname(BASE_DIR)
    infile = create_archive(name, files, cwd = cwd)

    exepath = os.path.relpath(path, cwd)
    commands = "cd {}\n{}./{}\n".format(os.path.dirname(exepath),
            prefix, os.path.basename(exepath))

    outfile_paths = ""
    if name.startswith("unit"):
        cmakedir_rel = os.path.relpath(cmakedir, WORKSPACE_DIR)
        outfile_paths = "\./" + cmakedir_rel.replace(".", "\\.") + ".+\n"
    run = generate_run(name, commands = commands, infile = infile,
            outfile_paths = outfile_paths)

    RUNS.append(run)

# quality assurance tests
qa_path = os.path.join(BASE_DIR, "tests", "qa")
infile = create_archive("quality_assurance", [qa_path, binary_path,
        binary_link_path, config_path], cwd = WORKSPACE_DIR)
commands = "cd {}/tests/qa\n./continuous_integration\n".format(
        BASE_DIR_NAME)
RUNS.append(generate_run("quality_assurance", commands = commands,
        infile = infile, outfile_paths = "\./{}/tests/qa/"
        "\.quality_assurance_status".format(
        BASE_DIR_NAME)))

# build release paths
if mode == "release":
    BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build")
else:
    BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build_release")
binary_release_name = find_memgraph_binary(BUILD_RELEASE_DIR)
binary_release_path = os.path.join(BUILD_RELEASE_DIR, binary_release_name)
binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph")

# macro benchmark tests
MACRO_BENCHMARK_ARGS = (
    "QuerySuite MemgraphRunner "
    "--groups aggregation 1000_create unwind_create dense_expand match "
    "--no-strict")
macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark")
harness_client_binaries = os.path.join(BUILD_RELEASE_DIR, "tests",
        "macro_benchmark")
postgresql_lib_dir = os.path.join(LIBS_DIR, "postgresql", "lib")
infile = create_archive("macro_benchmark", [binary_release_path,
        binary_release_link_path, macro_bench_path, config_path,
        harness_client_binaries, postgresql_lib_dir], cwd = WORKSPACE_DIR)
supervisor = "./memgraph/tests/macro_benchmark/harness.py"
outfile_paths = "\./memgraph/tests/macro_benchmark/\.harness_summary"
RUNS.append(generate_run("macro_benchmark", supervisor = supervisor,
        arguments = MACRO_BENCHMARK_ARGS, infile = infile,
        outfile_paths = outfile_paths))

# macro benchmark parent tests
if mode == "diff":
    PARENT_DIR = os.path.join(WORKSPACE_DIR, "parent")
    BUILD_PARENT_DIR = os.path.join(PARENT_DIR, "build")
    LIBS_PARENT_DIR = os.path.join(PARENT_DIR, "libs")
    binary_parent_name = find_memgraph_binary(BUILD_PARENT_DIR)
    binary_parent_path = os.path.join(BUILD_PARENT_DIR, binary_parent_name)
    binary_parent_link_path = os.path.join(BUILD_PARENT_DIR, "memgraph")
    parent_config_path = os.path.join(PARENT_DIR, "config")
    parent_macro_bench_path = os.path.join(PARENT_DIR, "tests", "macro_benchmark")
    parent_harness_client_binaries = os.path.join(BUILD_PARENT_DIR, "tests",
            "macro_benchmark")
    parent_postgresql_lib_dir = os.path.join(LIBS_PARENT_DIR, "postgresql", "lib")
    infile = create_archive("macro_benchmark_parent", [binary_parent_path,
            binary_parent_link_path, parent_macro_bench_path, parent_config_path,
            parent_harness_client_binaries, parent_postgresql_lib_dir],
            cwd = WORKSPACE_DIR)
    supervisor = "./parent/tests/macro_benchmark/harness/harness.py"
    args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path
    outfile_paths = "\./parent/tests/macro_benchmark/harness/\.harness_summary"
    RUNS.append(generate_run("macro_benchmark_parent", supervisor = supervisor,
            arguments = args, infile = infile, outfile_paths = outfile_paths))

    # macro benchmark comparison data process
    script_path = os.path.join(BASE_DIR, "tools", "apollo",
            "macro_benchmark_summary")
    infile = create_archive("macro_benchmark_summary", [script_path],
            cwd = WORKSPACE_DIR)
    cmd = "./memgraph/tools/apollo/macro_benchmark_summary " \
            "macro_benchmark/memgraph/tests/macro_benchmark/.harness_summary " \
            "macro_benchmark_parent/parent/tests/macro_benchmark/harness/.harness_summary " \
            ".harness_summary"
    outfile_paths = "\./.harness_summary"
    DATA_PROCESS.append(generate_run("macro_benchmark_summary", typ = "data process",
            commands = cmd, infile = infile, outfile_paths = outfile_paths))

# stress tests
stress_path = os.path.join(BASE_DIR, "tests", "stress")
infile = create_archive("stress", [binary_release_path,
        binary_release_link_path, stress_path, config_path],
        cwd = WORKSPACE_DIR)
cmd = "cd memgraph/tests/stress\nTIMEOUT=600 ./continuous_integration"
RUNS.append(generate_run("stress", commands = cmd, infile = infile))
# stress tests for daily release (large dataset)
if mode == "release":
    cmd = "cd memgraph/tests/stress\nTIMEOUT=43200 ./continuous_integration" \
            " --large-dataset"
    RUNS.append(generate_run("stress_large", commands = cmd, infile = infile,
            slave_group = "remote_16c56g"))

# store ARCHIVES and RUNS
store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)