Initial version of macro benchmark summary script.

Summary: Changed harness output to be compatible with script. Changed cppcheck message formatting. Changed qa message formatting. Changed macro benchmark summary icons. Fixed newlines in status generators. Added group name to harness summary. Macro benchmark summary now handles different files. Added more output options to summary script. Diff builds now build parent for performance comparison. Reviewers: buda, mislav.bradac Reviewed By: mislav.bradac Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D706
2017-08-24 15:03:21 +02:00 · 2017-08-24 15:03:21 +02:00 · 9d1265f41f
commit 9d1265f41f
parent fdc389f1eb
6 changed files with 229 additions and 36 deletions
--- a/tests/macro_benchmark/harness/harness.py
+++ b/tests/macro_benchmark/harness/harness.py
@ -39,9 +39,11 @@ class _QuerySuite:
    # what the QuerySuite can work with
    KNOWN_KEYS = {"config", "setup", "itersetup", "run", "iterteardown",
                  "teardown", "common"}
-    summary = "Macro benchmark summary:\n" \
-              "{:>27}{:>27}{:>27}{:>27}{:>27}{:>27}\n".format(
-                      "scenario_name", "query_parsing_time",
+    FORMAT = ["{:>24}", "{:>28}", "{:>22}", "{:>24}", "{:>28}",
+              "{:>16}", "{:>16}"]
+    FULL_FORMAT = "".join(FORMAT) + "\n"
+    summary = FULL_FORMAT.format(
+                      "group_name", "scenario_name", "query_parsing_time",
                      "query_planning_time", "query_plan_execution_time",
                      WALL_TIME, CPU_TIME)

@ -186,7 +188,7 @@ class _QuerySuite:

        return group_scenarios

-    def run(self, scenario, scenario_name, runner):
+    def run(self, scenario, group_name, scenario_name, runner):
        log.debug("QuerySuite.run() with scenario: %s", scenario)
        scenario_config = scenario.get("config")
        scenario_config = next(scenario_config()) if scenario_config else {}
@ -252,20 +254,21 @@ class _QuerySuite:
        # TODO value outlier detection and warning across iterations
        execute("teardown")
        runner.stop()
-        self.append_scenario_summary(scenario_name, measurement_sums,
-                                     num_iterations)
+        self.append_scenario_summary(group_name, scenario_name,
+                                     measurement_sums, num_iterations)
        return measurements

-    def append_scenario_summary(self, scenario_name, measurement_sums,
-                                num_iterations):
-        self.summary += "{:>27}".format(scenario_name)
-        for key in ("query_parsing_time", "query_planning_time",
-                    "query_plan_execution_time", WALL_TIME, CPU_TIME):
+    def append_scenario_summary(self, group_name, scenario_name,
+                                measurement_sums, num_iterations):
+        self.summary += self.FORMAT[0].format(group_name)
+        self.summary += self.FORMAT[1].format(scenario_name)
+        for i, key in enumerate(("query_parsing_time", "query_planning_time",
+                    "query_plan_execution_time", WALL_TIME, CPU_TIME)):
            if key not in measurement_sums:
                time = "-"
            else:
                time = "{:.10f}".format(measurement_sums[key] / num_iterations)
-            self.summary += "{:>27}".format(time)
+            self.summary += self.FORMAT[i + 2].format(time)
        self.summary += "\n"

    def runners(self):
@ -523,7 +526,7 @@ def main():
    for (group, scenario_name), scenario in filtered_scenarios.items():
        log.info("Executing group.scenario '%s.%s' with elements %s",
                 group, scenario_name, list(scenario.keys()))
-        for iter_result in suite.run(scenario, scenario_name, runner):
+        for iter_result in suite.run(scenario, group, scenario_name, runner):
            iter_result["group"] = group
            iter_result["scenario"] = scenario_name
            results.append(iter_result)
@ -534,7 +537,8 @@ def main():
    run.update(args.additional_run_fields)
    for result in results:
        jail.store_data(result)
-    print("\n\n{}\n".format(suite.summary))
+    print("\n\nMacro benchmark summary:")
+    print("{}\n".format(suite.summary))
    with open(os.path.join(DIR_PATH, ".harness_summary"), "w") as f:
        print(suite.summary, file=f)

--- a/tests/qa/continuous_integration
+++ b/tests/qa/continuous_integration
@ -8,7 +8,7 @@ everything which is needed for the CI environment.
 List of responsibilities:
    * execute default suites
    * terminate execution if any of internal scenarios fails
-    * creates the report file that is needed by the Jenkins plugin
+    * creates the report file that is needed by the Apollo plugin
      to post the status on Phabricator. (.quality_assurance_status)
 """

@ -43,18 +43,46 @@ def get_newest_path(folder, suffix):
    return os.path.join(folder, name_list.pop())


-def generate_status(suite, f):
+def generate_status(suite, f, required = False):
    """
    :param suite: Test suite name.
    :param f: Json file with status report.
+    :param required: Adds status ticks to the message if required.

    :return: Status string.
    """
    result = json.load(f)
    total = result["total"]
    passed = result["passed"]
-    return ("SUITE: %s, PASSED SCENARIOS: %s, TOTAL SCENARIOS: %s (%.2f%%)" %
-            (suite, passed, total, 100.0 * passed / total)), passed, total
+    ratio = passed / total
+    msg = "{} / {} //({:.2%})//".format(passed, total, ratio)
+    if required:
+        if passed == total:
+            msg += " {icon check color=green}"
+        else:
+            msg += " {icon times color=red}"
+    return (msg, passed, total)
+
+
+def generate_remarkup(data):
+    """
+    :param data: Tabular data to convert to remarkup.
+
+    :return: Remarkup formatted status string.
+    """
+    ret = "==== Quality assurance status: ====\n\n"
+    ret += "<table>\n"
+    for row in data:
+        ret += "  <tr>\n"
+        for item in row:
+            if row == data[0]:
+                fmt = "    <th>{}</th>\n"
+            else:
+                fmt = "    <td>{}</td>\n"
+            ret += fmt.format(item)
+        ret += "  </tr>\n"
+    ret += "</table>\n"
+    return ret


 if __name__ == "__main__":
@ -73,13 +101,14 @@ if __name__ == "__main__":
            suite_suffix.format(memgraph_suite))
    log.info("Memgraph result path is {}".format(memgraph_result_path))

+    # status table headers
+    status_data = [["Suite", "Scenarios"]]
+
    # read internal scenarios
    with open(memgraph_result_path) as f:
        memgraph_status, memgraph_passed, memgraph_total \
-            = generate_status(memgraph_suite, f)
-
-    # create status message
-    qa_status_message = "Quality Assurance Status\n" + memgraph_status + "\n"
+            = generate_status(memgraph_suite, f, required = True)
+    status_data.append([memgraph_suite, memgraph_status])

    # read extra scenarios
    for suite in extra_suites:
@ -87,7 +116,10 @@ if __name__ == "__main__":
        log.info("Extra suite '{}' result path is {}".format(suite, result_path))
        with open(result_path) as f:
            suite_status, _, _ = generate_status(suite, f)
-        qa_status_message += suite_status + "\n"
+        status_data.append([suite, suite_status])
+
+    # create status message
+    qa_status_message = generate_remarkup(status_data)

    # create the report file
    with open(qa_status_path, "w") as f:
--- a/tools/apollo/build_diff
+++ b/tools/apollo/build_diff
@ -3,7 +3,11 @@
 # From the manpage: "If the  -j  option is given without an argument, make will not limit the number of jobs that can run simultaneously."
 # That means that the whole build will be started simultaneously and IT WILL CRASH YOUR COMPUTER!

-cd ../..
+cd ../../..
+
+cp -r memgraph parent
+
+cd memgraph

 TIMEOUT=600 ./init
 bash -c "doxygen Doxyfile >/dev/null 2>/dev/null"
@ -14,11 +18,20 @@ TIMEOUT=1000 make -j$THREADS

 cd ..
 mkdir build_release
-cd build_release

+cd build_release
 cmake -DCMAKE_BUILD_TYPE=release ..
 TIMEOUT=1000 make -j$THREADS memgraph_link_target

-cd ../tools/apollo
+cd ../../parent
+
+git checkout HEAD~1
+TIMEOUT=600 ./init
+
+cd build
+cmake -DCMAKE_BUILD_TYPE=release ..
+TIMEOUT=1000 make -j$THREADS memgraph_link_target
+
+cd ../../memgraph/tools/apollo

 ./generate diff
--- a/tools/apollo/cppcheck
+++ b/tools/apollo/cppcheck
@ -24,5 +24,5 @@ cat "$errfile" >&2

 len="$( cat "$errfile" | wc -l )"
 if [ $len -gt 0 ]; then
-    echo -e "Cppcheck errors:\n$( cat "$errfile" )" > "$errfile"
+    echo -e "==== Cppcheck errors: ====\n\n\`\`\`\n$( cat "$errfile" )\n\`\`\`" > "$errfile"
 fi
--- a/tools/apollo/generate
+++ b/tools/apollo/generate
@ -35,6 +35,7 @@ OUTPUT_DIR = os.path.join(BUILD_DIR, "apollo")
 # output lists
 ARCHIVES = []
 RUNS = []
+DATA_PROCESS = []

 # generation mode
 if len(sys.argv) >= 2:
@ -47,6 +48,10 @@ def run_cmd(cmd, cwd):
    ret = subprocess.run(cmd, cwd = cwd, stdout = subprocess.PIPE, check = True)
    return ret.stdout.decode("utf-8")

+def find_memgraph_binary(loc):
+    return run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
+            "f", "-name", "memgraph*"], loc).split("\n")[0][2:]
+
 def generate_run(name, typ = "run", supervisor = "", commands = "",
        arguments = "", enable_network = False,
        outfile_paths = "", infile = ""):
@ -77,12 +82,9 @@ if os.path.exists(OUTPUT_DIR):
 os.makedirs(OUTPUT_DIR)

 # store memgraph binary to archive
-binary_name = run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
-        "f", "-name", "memgraph*"], BUILD_DIR).split("\n")[0][2:]
-binary_link_name = run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
-        "l", "-name", "memgraph*"], BUILD_DIR).split("\n")[0][2:]
+binary_name = find_memgraph_binary(BUILD_DIR)
 binary_path = os.path.join(BUILD_DIR, binary_name)
-binary_link_path = os.path.join(BUILD_DIR, binary_link_name)
+binary_link_path = os.path.join(BUILD_DIR, "memgraph")
 config_path = os.path.join(BASE_DIR, "config")
 config_copy_path = os.path.join(BUILD_DIR, "config")
 if os.path.exists(config_copy_path):
@ -183,24 +185,55 @@ if mode == "release":
    BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build")
 else:
    BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build_release")
-binary_release_name = run_cmd(["find", ".", "-maxdepth", "1", "-executable",
-        "-type", "f", "-name", "memgraph*"], BUILD_RELEASE_DIR).split("\n")[0][2:]
+binary_release_name = find_memgraph_binary(BUILD_RELEASE_DIR)
 binary_release_path = os.path.join(BUILD_RELEASE_DIR, binary_release_name)
 binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph")

 # macro benchmark tests
+MACRO_BENCHMARK_ARGS = "QuerySuite MemgraphRunner --groups aggregation"
 macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark")
 stress_common = os.path.join(BASE_DIR, "tests", "stress", "common.py")
 infile = create_archive("macro_benchmark", [binary_release_path,
        macro_bench_path, stress_common, config_path],
        cwd = WORKSPACE_DIR)
 supervisor = "./{}/tests/macro_benchmark/harness/harness.py".format(BASE_DIR_NAME)
-args = "QuerySuite MemgraphRunner --groups aggregation --RunnerBin " + binary_release_path
+args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_release_path
 outfile_paths = "\./{}/tests/macro_benchmark/harness/\.harness_summary".format(
        BASE_DIR_NAME)
 RUNS.append(generate_run("macro_benchmark", supervisor = supervisor,
        arguments = args, infile = infile, outfile_paths = outfile_paths))

+# macro benchmark parent tests
+if mode == "diff":
+    PARENT_DIR = os.path.join(WORKSPACE_DIR, "parent")
+    BUILD_PARENT_DIR = os.path.join(PARENT_DIR, "build")
+    binary_parent_name = find_memgraph_binary(BUILD_PARENT_DIR)
+    binary_parent_path = os.path.join(BUILD_PARENT_DIR, binary_parent_name)
+    parent_config_path = os.path.join(PARENT_DIR, "config")
+    parent_macro_bench_path = os.path.join(PARENT_DIR, "tests", "macro_benchmark")
+    parent_stress_common = os.path.join(PARENT_DIR, "tests", "stress", "common.py")
+    infile = create_archive("macro_benchmark_parent", [binary_parent_path,
+            parent_macro_bench_path, parent_stress_common, parent_config_path],
+            cwd = WORKSPACE_DIR)
+    supervisor = "./parent/tests/macro_benchmark/harness/harness.py"
+    args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path
+    outfile_paths = "\./parent/tests/macro_benchmark/harness/\.harness_summary"
+    RUNS.append(generate_run("macro_benchmark_parent", supervisor = supervisor,
+            arguments = args, infile = infile, outfile_paths = outfile_paths))
+
+    # macro benchmark comparison data process
+    script_path = os.path.join(BASE_DIR, "tools", "apollo",
+            "macro_benchmark_summary")
+    infile = create_archive("macro_benchmark_summary", [script_path],
+            cwd = WORKSPACE_DIR)
+    cmd = "./memgraph/tools/apollo/macro_benchmark_summary " \
+            "macro_benchmark/memgraph/tests/macro_benchmark/harness/.harness_summary " \
+            "macro_benchmark_parent/parent/tests/macro_benchmark/harness/.harness_summary " \
+            ".harness_summary"
+    outfile_paths = "\./.harness_summary"
+    DATA_PROCESS.append(generate_run("macro_benchmark_summary", typ = "data process",
+            commands = cmd, infile = infile, outfile_paths = outfile_paths))
+
 # stress tests
 stress_path = os.path.join(BASE_DIR, "tests", "stress")
 infile = create_archive("stress", [binary_release_path,
@ -212,4 +245,4 @@ RUNS.append(generate_run("stress", commands = cmd, infile = infile))

 # store ARCHIVES and RUNS
 store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
-store_metadata(OUTPUT_DIR, "runs", RUNS)
+store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)
--- a/tools/apollo/macro_benchmark_summary
+++ b/tools/apollo/macro_benchmark_summary
@ -0,0 +1,111 @@
+#!/usr/bin/python3
+import os
+import sys
+
+def convert2float(val):
+    try:
+        return float(val)
+    except:
+        return val
+
+def parse_file(fname):
+    with open(fname) as f:
+        data = f.readlines()
+    ret = []
+    for row in data:
+        row = row.strip()
+        if row == "": continue
+        ret.append(list(map(convert2float, row.split())))
+    return ret
+
+def strip_integers(row):
+    return list(filter(lambda x: type(x) == str, row))
+
+def find_item(data, header, row):
+    headers = data[0]
+    row = strip_integers(row)
+    pos_x = -1
+    for i in range(len(data)):
+        s = strip_integers(data[i])
+        if s != row: continue
+        pos_x = i
+        break
+    if pos_x == -1: return None
+    pos_y = -1
+    for j in range(len(headers)):
+        if headers[j] != header: continue
+        pos_y = j
+        break
+    if pos_y == -1: return None
+    return data[pos_x][pos_y]
+
+def compare_values(data_cur, data_prev):
+    ret = []
+    headers = data_cur[0]
+    for i in range(len(data_cur)):
+        ret.append([])
+        row_cur = data_cur[i]
+        for j in range(len(row_cur)):
+            item_cur = row_cur[j]
+            if type(item_cur) == str:
+                item = " ".join(item_cur.split("_")).capitalize()
+            else:
+                item_prev = find_item(data_prev, headers[j], row_cur)
+                if item_prev != None:
+                    if item_prev != 0.0:
+                        diff = (item_cur - item_prev) / item_prev
+                    else:
+                        diff = 0.0
+                    if diff < -0.05:
+                        sign = " {icon arrow-down color=green}"
+                    elif diff > 0.05:
+                        sign = " {icon arrow-up color=red}"
+                    else:
+                        sign = ""
+                    item = "{:.9f} //({:+.2%})//{}".format(item_cur, diff, sign)
+                else:
+                    item = "{:.9f} //(new)// {{icon plus color=blue}}".format(item_cur)
+            ret[i].append(item)
+    return ret
+
+def generate_remarkup(data):
+    ret = "==== Macro benchmark summary: ====\n\n"
+    ret += "<table>\n"
+    for row in data:
+        ret += "  <tr>\n"
+        for item in row:
+            if row == data[0]:
+                fmt = "    <th>{}</th>\n"
+            else:
+                fmt = "    <td>{}</td>\n"
+            ret += fmt.format(item)
+        ret += "  </tr>\n"
+    ret += "</table>\n"
+    return ret
+
+if len(sys.argv) > 4 or len(sys.argv) < 3:
+    print("usage: {} current_values previous_values output_file".format(sys.argv[0]))
+    print("    output_file is optional, if not specified the script outputs")
+    print("    to stdout, if set to '-' then it overwrites current_values")
+    sys.exit(1)
+
+if len(sys.argv) == 4:
+    infile_cur, infile_prev, outfile = sys.argv[1:]
+else:
+    infile_cur, infile_prev = sys.argv[1:]
+    outfile = ""
+
+data_cur = parse_file(infile_cur)
+data_prev = parse_file(infile_prev)
+
+markup = generate_remarkup(compare_values(data_cur, data_prev))
+
+if outfile == "":
+    sys.stdout.write(markup)
+    sys.exit(0)
+
+if outfile == "-":
+    outfile = infile_cur
+
+with open(outfile, "w") as f:
+    f.write(generate_remarkup(compare_values(data_cur, data_prev)))