#!/usr/bin/python3 import json import os import re import shutil import subprocess import sys # paths SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) BASE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..")) WORKSPACE_DIR = os.path.normpath(os.path.join(BASE_DIR, "..")) BASE_DIR_NAME = os.path.basename(BASE_DIR) BUILD_DIR = os.path.join(BASE_DIR, "build") LIBS_DIR = os.path.join(BASE_DIR, "libs") TESTS_DIR = os.path.join(BUILD_DIR, "tests") TOOLS_BUILD_DIR = os.path.join(BUILD_DIR, "tools") OUTPUT_DIR = os.path.join(BUILD_DIR, "apollo") # output lists ARCHIVES = [] RUNS = [] DATA_PROCESS = [] # generation mode if len(sys.argv) >= 2: mode = sys.argv[1] else: mode = "diff" # helper functions def run_cmd(cmd, cwd): ret = subprocess.run(cmd, cwd = cwd, stdout = subprocess.PIPE, check = True) return ret.stdout.decode("utf-8") def find_memgraph_binary(loc): return run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type", "f", "-name", "memgraph*"], loc).split("\n")[0][2:] def generate_run(name, typ = "run", supervisor = "", commands = "", arguments = "", enable_network = False, outfile_paths = "", infile = "", slave_group = "local", link_to_run = ""): if not commands.endswith("\n"): commands += "\n" return dict(name = name, type = typ, supervisor = supervisor, commands = commands, arguments = arguments, enable_network = enable_network, outfile_paths = outfile_paths, infile = infile, slave_group = slave_group, link_to_run = link_to_run) def generate_archive(name, short_name, archive): return dict(name = name, short_name = short_name, archive = archive) def create_archive(name, files, cwd): oname = name + ".tar.gz" ofile = os.path.join(OUTPUT_DIR, oname) print("Creating archive:", name) for i in range(len(files)): files[i] = os.path.relpath(files[i], cwd) subprocess.run(["tar", "-cpzf", ofile, "-C", cwd] + files, check = True) return oname def store_metadata(cwd, name, data): json.dump(data, open(os.path.join(cwd, name + ".json"), "w")) # create output directory if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR) os.makedirs(OUTPUT_DIR) # store memgraph binary to archive binary_name = find_memgraph_binary(BUILD_DIR) binary_path = os.path.join(BUILD_DIR, binary_name) binary_link_path = os.path.join(BUILD_DIR, "memgraph") config_path = os.path.join(BASE_DIR, "config") config_copy_path = os.path.join(BUILD_DIR, "config") if os.path.exists(config_copy_path): shutil.rmtree(config_copy_path) shutil.copytree(config_path, config_copy_path) archive = create_archive("binary", [binary_path, config_copy_path], BUILD_DIR) ARCHIVES.append(generate_archive("Binary", "binary", archive)) # store documentation to archive docs_path = os.path.join(BASE_DIR, "docs", "doxygen", "html") archive = create_archive("doxygen_documentation", [docs_path], docs_path) ARCHIVES.append(generate_archive("Doxygen documentation", "doxygen_documentation", archive)) # store release deb and tarball to archive if mode == "release": print("Copying release packages") build_output_dir = os.path.join(BUILD_DIR, "output") deb_name = run_cmd(["find", ".", "-maxdepth", "1", "-type", "f", "-name", "memgraph*.deb"], build_output_dir).split("\n")[0][2:] arch = run_cmd(["dpkg", "--print-architecture"], build_output_dir).split("\n")[0] version = binary_name.split("-")[1] # Generate Debian package file name as expected by Debian Policy. standard_deb_name = "memgraph_{}-1_{}.deb".format(version, arch) tarball_name = run_cmd(["find", ".", "-maxdepth", "1", "-type", "f", "-name", "memgraph*.tar.gz"], build_output_dir).split("\n")[0][2:] shutil.copyfile(os.path.join(build_output_dir, deb_name), os.path.join(OUTPUT_DIR, standard_deb_name)) shutil.copyfile(os.path.join(build_output_dir, tarball_name), os.path.join(OUTPUT_DIR, tarball_name)) ARCHIVES.append(generate_archive("Release (deb package)", standard_deb_name, standard_deb_name)) ARCHIVES.append(generate_archive("Release (tarball)", tarball_name, tarball_name)) # store user documentation to archive if mode == "release": print("Copying release documentation") shutil.copyfile(os.path.join(BASE_DIR, "docs", "user_technical", "docs.tar.gz"), os.path.join(OUTPUT_DIR, "release_user_docs.tar.gz")) ARCHIVES.append(generate_archive("Release (user docs)", "release_user_docs", "release_user_docs.tar.gz")) # cppcheck run cppcheck = os.path.join(BASE_DIR, "tools", "apollo", "cppcheck") check_dirs = list(map(lambda x: os.path.join(BASE_DIR, x), ["src", "tests", "poc", ".git"])) + [cppcheck] archive = create_archive("cppcheck", check_dirs, WORKSPACE_DIR) cmd = os.path.relpath(cppcheck, WORKSPACE_DIR) outfile_paths = "\./" + cmd.replace("cppcheck", ".cppcheck_errors").replace(".", "\\.") RUNS.append(generate_run("cppcheck", commands = 'TIMEOUT=1000 ./{} {}'.format(cmd, mode), infile = archive, outfile_paths = outfile_paths)) # TODO: Refactor apollo/generate to be a config file which specifies how # each test is run and which files it depends on. # ctest tests ctest_output = run_cmd(["ctest", "-N"], TESTS_DIR) tests = [] # test ordering: first unit, then concurrent, then everything else CTEST_ORDER = {"unit": 0, "concurrent": 1} CTEST_DELIMITER = "__" for row in ctest_output.split("\n"): # Filter rows only containing tests. if not re.match("^\s*Test\s+#", row): continue test_name = row.split(":")[1].strip() # We prefix all test names with BASE_DIR_NAME name = test_name.replace(BASE_DIR_NAME + CTEST_DELIMITER, "") path = os.path.join(TESTS_DIR, name.replace(CTEST_DELIMITER, "/", 1)) order = CTEST_ORDER.get(name.split(CTEST_DELIMITER)[0], len(CTEST_ORDER)) tests.append((order, name, path)) tests.sort() for test in tests: order, name, path = test dirname = os.path.dirname(path) cmakedir = os.path.join(dirname, "CMakeFiles", BASE_DIR_NAME + CTEST_DELIMITER + name + ".dir") files = [path, cmakedir] # extra files for specific tests if name == "unit__fswatcher": files.append(os.path.normpath(os.path.join(dirname, "..", "data"))) # skip benchmark tests on diffs if name.startswith("benchmark") and mode == "diff": continue # larger timeout for benchmark tests prefix = "" if name.startswith("benchmark"): prefix = "TIMEOUT=600 " cwd = os.path.dirname(BASE_DIR) infile = create_archive(name, files, cwd = cwd) exepath = os.path.relpath(path, cwd) commands = "cd {}\n{}./{}\n".format(os.path.dirname(exepath), prefix, os.path.basename(exepath)) outfile_paths = "" if name.startswith("unit"): cmakedir_rel = os.path.relpath(cmakedir, WORKSPACE_DIR) outfile_paths = "\./" + cmakedir_rel.replace(".", "\\.") + ".+\n" run = generate_run(name, commands = commands, infile = infile, outfile_paths = outfile_paths) RUNS.append(run) # quality assurance tests qa_path = os.path.join(BASE_DIR, "tests", "qa") infile = create_archive("quality_assurance", [qa_path, binary_path, binary_link_path, config_path], cwd = WORKSPACE_DIR) commands = "cd {}/tests/qa\n./continuous_integration\n".format( BASE_DIR_NAME) RUNS.append(generate_run("quality_assurance", commands = commands, infile = infile, outfile_paths = "\./{}/tests/qa/" "\.quality_assurance_status".format( BASE_DIR_NAME))) # build release paths if mode == "release": BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build") else: BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build_release") binary_release_name = find_memgraph_binary(BUILD_RELEASE_DIR) binary_release_path = os.path.join(BUILD_RELEASE_DIR, binary_release_name) binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph") # macro benchmark tests MACRO_BENCHMARK_ARGS = ( "QuerySuite MemgraphRunner " "--groups aggregation 1000_create unwind_create dense_expand match " "--no-strict --database-cpu-ids 1 --client-cpu-ids 2") MACRO_PARALLEL_BENCHMARK_ARGS = ( "QueryParallelSuite MemgraphRunner --groups aggregation_parallel " "create_parallel bfs_parallel --database-cpu-ids 1 2 3 4 5 6 7 8 9 " "--client-cpu-ids 10 11 12 13 14 15 16 17 18 19 " "--num-database-workers 9 --num-clients-workers 30 --no-strict") macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark") harness_client_binaries = os.path.join(BUILD_RELEASE_DIR, "tests", "macro_benchmark") postgresql_lib_dir = os.path.join(LIBS_DIR, "postgresql", "lib") infile = create_archive("macro_benchmark", [binary_release_path, binary_release_link_path, macro_bench_path, config_path, harness_client_binaries, postgresql_lib_dir], cwd = WORKSPACE_DIR) supervisor = "./memgraph/tests/macro_benchmark/harness" outfile_paths = "\./memgraph/tests/macro_benchmark/\.harness_summary" RUNS.append(generate_run("macro_benchmark__query_suite", supervisor = supervisor, arguments = MACRO_BENCHMARK_ARGS, infile = infile, outfile_paths = outfile_paths)) RUNS.append(generate_run("macro_benchmark__query_parallel_suite", supervisor = supervisor, arguments = MACRO_PARALLEL_BENCHMARK_ARGS, infile = infile, outfile_paths = outfile_paths, slave_group = "remote_20c140g")) # macro benchmark parent tests if mode == "diff": PARENT_DIR = os.path.join(WORKSPACE_DIR, "parent") BUILD_PARENT_DIR = os.path.join(PARENT_DIR, "build") LIBS_PARENT_DIR = os.path.join(PARENT_DIR, "libs") binary_parent_name = find_memgraph_binary(BUILD_PARENT_DIR) binary_parent_path = os.path.join(BUILD_PARENT_DIR, binary_parent_name) binary_parent_link_path = os.path.join(BUILD_PARENT_DIR, "memgraph") parent_config_path = os.path.join(PARENT_DIR, "config") parent_macro_bench_path = os.path.join(PARENT_DIR, "tests", "macro_benchmark") parent_harness_client_binaries = os.path.join(BUILD_PARENT_DIR, "tests", "macro_benchmark") parent_postgresql_lib_dir = os.path.join(LIBS_PARENT_DIR, "postgresql", "lib") infile = create_archive("macro_benchmark_parent", [binary_parent_path, binary_parent_link_path, parent_macro_bench_path, parent_config_path, parent_harness_client_binaries, parent_postgresql_lib_dir], cwd = WORKSPACE_DIR) supervisor = "./parent/tests/macro_benchmark/harness" args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path outfile_paths = "\./parent/tests/macro_benchmark/\.harness_summary" RUNS.append(generate_run("macro_benchmark_parent__query_suite", supervisor = supervisor, arguments = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path, infile = infile, outfile_paths = outfile_paths, link_to_run = "macro_benchmark__query_suite")) RUNS.append(generate_run("macro_benchmark_parent__query_parallel_suite", supervisor = supervisor, arguments = MACRO_PARALLEL_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path, infile = infile, outfile_paths = outfile_paths, slave_group = "remote_20c140g", link_to_run = "macro_benchmark__query_parallel_suite")) # macro benchmark comparison data process script_path = os.path.join(BASE_DIR, "tools", "apollo", "macro_benchmark_summary") infile = create_archive("macro_benchmark_summary", [script_path], cwd = WORKSPACE_DIR) cmd = "./memgraph/tools/apollo/macro_benchmark_summary " \ "--current " \ "macro_benchmark__query_suite/memgraph/tests/macro_benchmark/.harness_summary " \ "macro_benchmark__query_parallel_suite/memgraph/tests/macro_benchmark/.harness_summary " \ "--previous " \ "macro_benchmark_parent__query_suite/parent/tests/macro_benchmark/.harness_summary " \ "macro_benchmark_parent__query_parallel_suite/parent/tests/macro_benchmark/.harness_summary " \ "--output .harness_summary" outfile_paths = "\./.harness_summary" DATA_PROCESS.append(generate_run("macro_benchmark_summary", typ = "data process", commands = cmd, infile = infile, outfile_paths = outfile_paths)) # stress tests stress_path = os.path.join(BASE_DIR, "tests", "stress") stress_binary_path = os.path.join(BUILD_RELEASE_DIR, "tests", "stress") infile = create_archive("stress", [binary_release_path, binary_release_link_path, stress_path, stress_binary_path, config_path], cwd = WORKSPACE_DIR) cmd = "cd memgraph/tests/stress\nTIMEOUT=600 ./continuous_integration" RUNS.append(generate_run("stress", commands = cmd, infile = infile)) # stress tests for daily release (large dataset) if mode == "release": cmd = "cd memgraph/tests/stress\nTIMEOUT=43200 ./continuous_integration" \ " --large-dataset" RUNS.append(generate_run("stress_large", commands = cmd, infile = infile, slave_group = "remote_16c56g")) # public_benchmark/ldbc tests if mode == "release": ldbc_path = os.path.join(BASE_DIR, "tests", "public_benchmark", "ldbc") neo4j_path = os.path.join(BASE_DIR, "libs", "neo4j") mg_import_csv_path = os.path.join(BASE_DIR, "tools", "mg_import_csv") plot_ldbc_latency_path = os.path.join(BASE_DIR, "tools", "plot_ldbc_latency") infile = create_archive("ldbc", [binary_release_path, ldbc_path, binary_release_link_path, neo4j_path, config_path, mg_import_csv_path, plot_ldbc_latency_path], cwd = WORKSPACE_DIR) cmd = "cd memgraph/tests/public_benchmark/ldbc\n. continuous_integration\n" outfile_paths = "\./memgraph/tests/public_benchmark/ldbc/results/.+\n" \ "\./memgraph/tests/public_benchmark/ldbc/plots/.+\n" RUNS.append(generate_run("public_benchmark__ldbc", commands = cmd, infile = infile, outfile_paths = outfile_paths, slave_group = "remote_20c140g", enable_network = True)) # tools tests ctest_output = run_cmd(["ctest", "-N"], TOOLS_BUILD_DIR) tools_infile = create_archive("tools_test", [TOOLS_BUILD_DIR], cwd = WORKSPACE_DIR) for row in ctest_output.split("\n"): # Filter rows only containing tests. if not re.match("^\s*Test\s+#", row): continue test_name = row.split(":")[1].strip() test_dir = os.path.relpath(TOOLS_BUILD_DIR, WORKSPACE_DIR) commands = "cd {}\nctest --output-on-failure -R \"^{}$\"".format(test_dir, test_name) run = generate_run("tools_" + test_name, commands = commands, infile = tools_infile) RUNS.append(run) # store ARCHIVES and RUNS store_metadata(OUTPUT_DIR, "archives", ARCHIVES) store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)