memgraph/tools/apollo/generate
Teon Banek a17261038c Include tools CMakeLists in the top level CMakeLists
Summary:
Split main CMakeLists into src/CMakeLists

The main CMakeLists duty is to make all the required libraries and
variables visible to all of the other sub-CMakeLists. After doing that,
it should include those sub-CMakeLists according to configuration
options.

This should make global configurations easier to reuse without polluting
the global space with locally related configurations. It is a necessary
step for including other projects like 'tools' in the release
installation.

Building tools is automatically disabled, but can be enabled by setting
the TOOLS option to ON when running cmake. This should allow on demand
building as well as combined installation of Memgraph and its tools.

Reviewers: mferencevic, buda

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1018
2017-12-04 15:18:18 +01:00

330 lines
14 KiB
Python
Executable File

#!/usr/bin/python3
import json
import os
import re
import shutil
import subprocess
import sys
# paths
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
BASE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))
WORKSPACE_DIR = os.path.normpath(os.path.join(BASE_DIR, ".."))
BASE_DIR_NAME = os.path.basename(BASE_DIR)
BUILD_DIR = os.path.join(BASE_DIR, "build")
LIBS_DIR = os.path.join(BASE_DIR, "libs")
TESTS_DIR = os.path.join(BUILD_DIR, "tests")
TOOLS_BUILD_DIR = os.path.join(BUILD_DIR, "tools")
OUTPUT_DIR = os.path.join(BUILD_DIR, "apollo")
# output lists
ARCHIVES = []
RUNS = []
DATA_PROCESS = []
# generation mode
if len(sys.argv) >= 2:
mode = sys.argv[1]
else:
mode = "diff"
# helper functions
def run_cmd(cmd, cwd):
ret = subprocess.run(cmd, cwd = cwd, stdout = subprocess.PIPE, check = True)
return ret.stdout.decode("utf-8")
def find_memgraph_binary(loc):
return run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
"f", "-name", "memgraph*"], loc).split("\n")[0][2:]
def generate_run(name, typ = "run", supervisor = "", commands = "",
arguments = "", enable_network = False,
outfile_paths = "", infile = "", slave_group = "local"):
if not commands.endswith("\n"): commands += "\n"
return dict(name = name, type = typ, supervisor = supervisor,
commands = commands, arguments = arguments,
enable_network = enable_network, outfile_paths = outfile_paths,
infile = infile, slave_group = slave_group)
def generate_archive(name, short_name, archive):
return dict(name = name, short_name = short_name, archive = archive)
def create_archive(name, files, cwd):
oname = name + ".tar.gz"
ofile = os.path.join(OUTPUT_DIR, oname)
print("Creating archive:", name)
for i in range(len(files)):
files[i] = os.path.relpath(files[i], cwd)
subprocess.run(["tar", "-cpzf", ofile, "-C", cwd] + files, check = True)
return oname
def store_metadata(cwd, name, data):
json.dump(data, open(os.path.join(cwd, name + ".json"), "w"))
# create output directory
if os.path.exists(OUTPUT_DIR):
shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR)
# store memgraph binary to archive
binary_name = find_memgraph_binary(BUILD_DIR)
binary_path = os.path.join(BUILD_DIR, binary_name)
binary_link_path = os.path.join(BUILD_DIR, "memgraph")
config_path = os.path.join(BASE_DIR, "config")
config_copy_path = os.path.join(BUILD_DIR, "config")
if os.path.exists(config_copy_path):
shutil.rmtree(config_copy_path)
shutil.copytree(config_path, config_copy_path)
archive = create_archive("binary", [binary_path, config_copy_path], BUILD_DIR)
ARCHIVES.append(generate_archive("Binary", "binary", archive))
# store documentation to archive
docs_path = os.path.join(BASE_DIR, "docs", "doxygen", "html")
archive = create_archive("doxygen_documentation", [docs_path], docs_path)
ARCHIVES.append(generate_archive("Doxygen documentation", "doxygen_documentation", archive))
# store release deb and tarball to archive
if mode == "release":
print("Copying release packages")
build_output_dir = os.path.join(BUILD_DIR, "output")
deb_name = run_cmd(["find", ".", "-maxdepth", "1", "-type", "f",
"-name", "memgraph*.deb"], build_output_dir).split("\n")[0][2:]
arch = run_cmd(["dpkg", "--print-architecture"], build_output_dir).split("\n")[0]
version = binary_name.split("-")[1]
# Generate Debian package file name as expected by Debian Policy.
standard_deb_name = "memgraph_{}-1_{}.deb".format(version, arch)
tarball_name = run_cmd(["find", ".", "-maxdepth", "1", "-type", "f",
"-name", "memgraph*.tar.gz"], build_output_dir).split("\n")[0][2:]
shutil.copyfile(os.path.join(build_output_dir, deb_name),
os.path.join(OUTPUT_DIR, standard_deb_name))
shutil.copyfile(os.path.join(build_output_dir, tarball_name),
os.path.join(OUTPUT_DIR, tarball_name))
ARCHIVES.append(generate_archive("Release (deb package)", standard_deb_name, standard_deb_name))
ARCHIVES.append(generate_archive("Release (tarball)", tarball_name, tarball_name))
# store user documentation to archive
if mode == "release":
print("Copying release documentation")
shutil.copyfile(os.path.join(BASE_DIR, "docs", "user_technical",
"docs.tar.gz"), os.path.join(OUTPUT_DIR, "release_user_docs.tar.gz"))
ARCHIVES.append(generate_archive("Release (user docs)", "release_user_docs",
"release_user_docs.tar.gz"))
# cppcheck run
cppcheck = os.path.join(BASE_DIR, "tools", "apollo", "cppcheck")
check_dirs = list(map(lambda x: os.path.join(BASE_DIR, x), ["src", "tests",
"poc", ".git"])) + [cppcheck]
archive = create_archive("cppcheck", check_dirs, WORKSPACE_DIR)
cmd = os.path.relpath(cppcheck, WORKSPACE_DIR)
outfile_paths = "\./" + cmd.replace("cppcheck", ".cppcheck_errors").replace(".", "\\.")
RUNS.append(generate_run("cppcheck", commands = 'TIMEOUT=1000 ./{} {}'.format(cmd, mode),
infile = archive, outfile_paths = outfile_paths))
# TODO: Refactor apollo/generate to be a config file which specifies how
# each test is run and which files it depends on.
# ctest tests
ctest_output = run_cmd(["ctest", "-N"], TESTS_DIR)
tests = []
# test ordering: first unit, then concurrent, then everything else
CTEST_ORDER = {"unit": 0, "concurrent": 1}
CTEST_DELIMITER = "__"
for row in ctest_output.split("\n"):
# Filter rows only containing tests.
if not re.match("^\s*Test\s+#", row): continue
test_name = row.split(":")[1].strip()
# We prefix all test names with BASE_DIR_NAME
name = test_name.replace(BASE_DIR_NAME + CTEST_DELIMITER, "")
path = os.path.join(TESTS_DIR, name.replace(CTEST_DELIMITER, "/", 1))
order = CTEST_ORDER.get(name.split(CTEST_DELIMITER)[0], len(CTEST_ORDER))
tests.append((order, name, path))
tests.sort()
for test in tests:
order, name, path = test
dirname = os.path.dirname(path)
cmakedir = os.path.join(dirname, "CMakeFiles",
BASE_DIR_NAME + CTEST_DELIMITER + name + ".dir")
files = [path, cmakedir]
# extra files for specific tests
if name == "unit__fswatcher":
files.append(os.path.normpath(os.path.join(dirname, "..", "data")))
# skip benchmark tests on diffs
if name.startswith("benchmark") and mode == "diff":
continue
# larger timeout for benchmark tests
prefix = ""
if name.startswith("benchmark"):
prefix = "TIMEOUT=600 "
cwd = os.path.dirname(BASE_DIR)
infile = create_archive(name, files, cwd = cwd)
exepath = os.path.relpath(path, cwd)
commands = "cd {}\n{}./{}\n".format(os.path.dirname(exepath),
prefix, os.path.basename(exepath))
outfile_paths = ""
if name.startswith("unit"):
cmakedir_rel = os.path.relpath(cmakedir, WORKSPACE_DIR)
outfile_paths = "\./" + cmakedir_rel.replace(".", "\\.") + ".+\n"
run = generate_run(name, commands = commands, infile = infile,
outfile_paths = outfile_paths)
RUNS.append(run)
# quality assurance tests
qa_path = os.path.join(BASE_DIR, "tests", "qa")
infile = create_archive("quality_assurance", [qa_path, binary_path,
binary_link_path, config_path], cwd = WORKSPACE_DIR)
commands = "cd {}/tests/qa\n./continuous_integration\n".format(
BASE_DIR_NAME)
RUNS.append(generate_run("quality_assurance", commands = commands,
infile = infile, outfile_paths = "\./{}/tests/qa/"
"\.quality_assurance_status".format(
BASE_DIR_NAME)))
# build release paths
if mode == "release":
BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build")
else:
BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build_release")
binary_release_name = find_memgraph_binary(BUILD_RELEASE_DIR)
binary_release_path = os.path.join(BUILD_RELEASE_DIR, binary_release_name)
binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph")
# macro benchmark tests
MACRO_BENCHMARK_ARGS = (
"QuerySuite MemgraphRunner "
"--groups aggregation 1000_create unwind_create dense_expand match "
"--no-strict --database-cpu-ids 1 --client-cpu-ids 2")
MACRO_PARALLEL_BENCHMARK_ARGS = (
"QueryParallelSuite MemgraphRunner --groups aggregation_parallel "
"create_parallel bfs_parallel --database-cpu-ids 1 2 3 4 5 6 7 8 9 "
"--client-cpu-ids 10 11 12 13 14 15 16 17 18 19 "
"--num-database-workers 9 --num-clients-workers 30 --no-strict")
macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark")
harness_client_binaries = os.path.join(BUILD_RELEASE_DIR, "tests",
"macro_benchmark")
postgresql_lib_dir = os.path.join(LIBS_DIR, "postgresql", "lib")
infile = create_archive("macro_benchmark", [binary_release_path,
binary_release_link_path, macro_bench_path, config_path,
harness_client_binaries, postgresql_lib_dir], cwd = WORKSPACE_DIR)
supervisor = "./memgraph/tests/macro_benchmark/harness"
outfile_paths = "\./memgraph/tests/macro_benchmark/\.harness_summary"
RUNS.append(generate_run("macro_benchmark__query_suite",
supervisor = supervisor,
arguments = MACRO_BENCHMARK_ARGS,
infile = infile,
outfile_paths = outfile_paths))
RUNS.append(generate_run("macro_benchmark__query_parallel_suite",
supervisor = supervisor,
arguments = MACRO_PARALLEL_BENCHMARK_ARGS,
infile = infile,
outfile_paths = outfile_paths,
slave_group = "remote_20c140g"))
# macro benchmark parent tests
if mode == "diff":
PARENT_DIR = os.path.join(WORKSPACE_DIR, "parent")
BUILD_PARENT_DIR = os.path.join(PARENT_DIR, "build")
LIBS_PARENT_DIR = os.path.join(PARENT_DIR, "libs")
binary_parent_name = find_memgraph_binary(BUILD_PARENT_DIR)
binary_parent_path = os.path.join(BUILD_PARENT_DIR, binary_parent_name)
binary_parent_link_path = os.path.join(BUILD_PARENT_DIR, "memgraph")
parent_config_path = os.path.join(PARENT_DIR, "config")
parent_macro_bench_path = os.path.join(PARENT_DIR, "tests", "macro_benchmark")
parent_harness_client_binaries = os.path.join(BUILD_PARENT_DIR, "tests",
"macro_benchmark")
parent_postgresql_lib_dir = os.path.join(LIBS_PARENT_DIR, "postgresql", "lib")
infile = create_archive("macro_benchmark_parent", [binary_parent_path,
binary_parent_link_path, parent_macro_bench_path, parent_config_path,
parent_harness_client_binaries, parent_postgresql_lib_dir],
cwd = WORKSPACE_DIR)
supervisor = "./parent/tests/macro_benchmark/harness"
args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path
outfile_paths = "\./parent/tests/macro_benchmark/\.harness_summary"
RUNS.append(generate_run("macro_benchmark_parent__query_suite",
supervisor = supervisor,
arguments = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path,
infile = infile,
outfile_paths = outfile_paths))
RUNS.append(generate_run("macro_benchmark_parent__query_parallel_suite",
supervisor = supervisor,
arguments = MACRO_PARALLEL_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path,
infile = infile,
outfile_paths = outfile_paths,
slave_group = "remote_20c140g"))
# macro benchmark comparison data process
script_path = os.path.join(BASE_DIR, "tools", "apollo",
"macro_benchmark_summary")
infile = create_archive("macro_benchmark_summary", [script_path],
cwd = WORKSPACE_DIR)
cmd = "./memgraph/tools/apollo/macro_benchmark_summary " \
"--current " \
"macro_benchmark__query_suite/memgraph/tests/macro_benchmark/.harness_summary " \
"macro_benchmark__query_parallel_suite/memgraph/tests/macro_benchmark/.harness_summary " \
"--previous " \
"macro_benchmark_parent__query_suite/parent/tests/macro_benchmark/.harness_summary " \
"macro_benchmark_parent__query_parallel_suite/parent/tests/macro_benchmark/.harness_summary " \
"--output .harness_summary"
outfile_paths = "\./.harness_summary"
DATA_PROCESS.append(generate_run("macro_benchmark_summary", typ = "data process",
commands = cmd, infile = infile, outfile_paths = outfile_paths))
# stress tests
stress_path = os.path.join(BASE_DIR, "tests", "stress")
stress_binary_path = os.path.join(BUILD_RELEASE_DIR, "tests", "stress")
infile = create_archive("stress", [binary_release_path,
binary_release_link_path, stress_path, stress_binary_path,
config_path],
cwd = WORKSPACE_DIR)
cmd = "cd memgraph/tests/stress\nTIMEOUT=600 ./continuous_integration"
RUNS.append(generate_run("stress", commands = cmd, infile = infile))
# stress tests for daily release (large dataset)
if mode == "release":
cmd = "cd memgraph/tests/stress\nTIMEOUT=43200 ./continuous_integration" \
" --large-dataset"
RUNS.append(generate_run("stress_large", commands = cmd, infile = infile,
slave_group = "remote_16c56g"))
# public_benchmark/ldbc tests
if mode == "release":
ldbc_path = os.path.join(BASE_DIR, "tests", "public_benchmark", "ldbc")
neo4j_path = os.path.join(BASE_DIR, "libs", "neo4j")
mg_import_csv_path = os.path.join(BASE_DIR, "tools", "mg_import_csv")
plot_ldbc_latency_path = os.path.join(BASE_DIR, "tools", "plot_ldbc_latency")
infile = create_archive("ldbc", [binary_release_path, ldbc_path,
binary_release_link_path, neo4j_path, config_path,
mg_import_csv_path, plot_ldbc_latency_path],
cwd = WORKSPACE_DIR)
cmd = "cd memgraph/tests/public_benchmark/ldbc\n. continuous_integration\n"
outfile_paths = "\./memgraph/tests/public_benchmark/ldbc/results/.+\n" \
"\./memgraph/tests/public_benchmark/ldbc/plots/.+\n"
RUNS.append(generate_run("public_benchmark__ldbc", commands = cmd,
infile = infile, outfile_paths = outfile_paths,
slave_group = "remote_20c140g", enable_network = True))
# tools tests
ctest_output = run_cmd(["ctest", "-N"], TOOLS_BUILD_DIR)
tools_infile = create_archive("tools_test", [TOOLS_BUILD_DIR], cwd = WORKSPACE_DIR)
for row in ctest_output.split("\n"):
# Filter rows only containing tests.
if not re.match("^\s*Test\s+#", row): continue
test_name = row.split(":")[1].strip()
test_dir = os.path.relpath(TOOLS_BUILD_DIR, WORKSPACE_DIR)
commands = "cd {}\nctest --output-on-failure -R \"^{}$\"".format(test_dir, test_name)
run = generate_run("tools_" + test_name, commands = commands, infile = tools_infile)
RUNS.append(run)
# store ARCHIVES and RUNS
store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)