From 9d1265f41f57b28a1f3c685ccb1823a7a374d35e Mon Sep 17 00:00:00 2001
From: Matej Ferencevic <matej.ferencevic@memgraph.io>
Date: Thu, 24 Aug 2017 15:03:21 +0200
Subject: [PATCH] Initial version of macro benchmark summary script.

Summary:
Changed harness output to be compatible with script.

Changed cppcheck message formatting.

Changed qa message formatting.

Changed macro benchmark summary icons.

Fixed newlines in status generators.

Added group name to harness summary.

Macro benchmark summary now handles different files.

Added more output options to summary script.

Diff builds now build parent for performance comparison.

Reviewers: buda, mislav.bradac

Reviewed By: mislav.bradac

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D706
---
 tests/macro_benchmark/harness/harness.py |  32 ++++---
 tests/qa/continuous_integration          |  50 ++++++++--
 tools/apollo/build_diff                  |  19 +++-
 tools/apollo/cppcheck                    |   2 +-
 tools/apollo/generate                    |  51 +++++++++--
 tools/apollo/macro_benchmark_summary     | 111 +++++++++++++++++++++++
 6 files changed, 229 insertions(+), 36 deletions(-)
 create mode 100755 tools/apollo/macro_benchmark_summary

diff --git a/tests/macro_benchmark/harness/harness.py b/tests/macro_benchmark/harness/harness.py
index c7db01301..40376a807 100755
--- a/tests/macro_benchmark/harness/harness.py
+++ b/tests/macro_benchmark/harness/harness.py
@@ -39,9 +39,11 @@ class _QuerySuite:
     # what the QuerySuite can work with
     KNOWN_KEYS = {"config", "setup", "itersetup", "run", "iterteardown",
                   "teardown", "common"}
-    summary = "Macro benchmark summary:\n" \
-              "{:>27}{:>27}{:>27}{:>27}{:>27}{:>27}\n".format(
-                      "scenario_name", "query_parsing_time",
+    FORMAT = ["{:>24}", "{:>28}", "{:>22}", "{:>24}", "{:>28}",
+              "{:>16}", "{:>16}"]
+    FULL_FORMAT = "".join(FORMAT) + "\n"
+    summary = FULL_FORMAT.format(
+                      "group_name", "scenario_name", "query_parsing_time",
                       "query_planning_time", "query_plan_execution_time",
                       WALL_TIME, CPU_TIME)
 
@@ -186,7 +188,7 @@ class _QuerySuite:
 
         return group_scenarios
 
-    def run(self, scenario, scenario_name, runner):
+    def run(self, scenario, group_name, scenario_name, runner):
         log.debug("QuerySuite.run() with scenario: %s", scenario)
         scenario_config = scenario.get("config")
         scenario_config = next(scenario_config()) if scenario_config else {}
@@ -252,20 +254,21 @@ class _QuerySuite:
         # TODO value outlier detection and warning across iterations
         execute("teardown")
         runner.stop()
-        self.append_scenario_summary(scenario_name, measurement_sums,
-                                     num_iterations)
+        self.append_scenario_summary(group_name, scenario_name,
+                                     measurement_sums, num_iterations)
         return measurements
 
-    def append_scenario_summary(self, scenario_name, measurement_sums,
-                                num_iterations):
-        self.summary += "{:>27}".format(scenario_name)
-        for key in ("query_parsing_time", "query_planning_time",
-                    "query_plan_execution_time", WALL_TIME, CPU_TIME):
+    def append_scenario_summary(self, group_name, scenario_name,
+                                measurement_sums, num_iterations):
+        self.summary += self.FORMAT[0].format(group_name)
+        self.summary += self.FORMAT[1].format(scenario_name)
+        for i, key in enumerate(("query_parsing_time", "query_planning_time",
+                    "query_plan_execution_time", WALL_TIME, CPU_TIME)):
             if key not in measurement_sums:
                 time = "-"
             else:
                 time = "{:.10f}".format(measurement_sums[key] / num_iterations)
-            self.summary += "{:>27}".format(time)
+            self.summary += self.FORMAT[i + 2].format(time)
         self.summary += "\n"
 
     def runners(self):
@@ -523,7 +526,7 @@ def main():
     for (group, scenario_name), scenario in filtered_scenarios.items():
         log.info("Executing group.scenario '%s.%s' with elements %s",
                  group, scenario_name, list(scenario.keys()))
-        for iter_result in suite.run(scenario, scenario_name, runner):
+        for iter_result in suite.run(scenario, group, scenario_name, runner):
             iter_result["group"] = group
             iter_result["scenario"] = scenario_name
             results.append(iter_result)
@@ -534,7 +537,8 @@ def main():
     run.update(args.additional_run_fields)
     for result in results:
         jail.store_data(result)
-    print("\n\n{}\n".format(suite.summary))
+    print("\n\nMacro benchmark summary:")
+    print("{}\n".format(suite.summary))
     with open(os.path.join(DIR_PATH, ".harness_summary"), "w") as f:
         print(suite.summary, file=f)
 
diff --git a/tests/qa/continuous_integration b/tests/qa/continuous_integration
index 37d3dc2c3..775840231 100755
--- a/tests/qa/continuous_integration
+++ b/tests/qa/continuous_integration
@@ -8,7 +8,7 @@ everything which is needed for the CI environment.
 List of responsibilities:
     * execute default suites
     * terminate execution if any of internal scenarios fails
-    * creates the report file that is needed by the Jenkins plugin
+    * creates the report file that is needed by the Apollo plugin
       to post the status on Phabricator. (.quality_assurance_status)
 """
 
@@ -43,18 +43,46 @@ def get_newest_path(folder, suffix):
     return os.path.join(folder, name_list.pop())
 
 
-def generate_status(suite, f):
+def generate_status(suite, f, required = False):
     """
     :param suite: Test suite name.
     :param f: Json file with status report.
+    :param required: Adds status ticks to the message if required.
 
     :return: Status string.
     """
     result = json.load(f)
     total = result["total"]
     passed = result["passed"]
-    return ("SUITE: %s, PASSED SCENARIOS: %s, TOTAL SCENARIOS: %s (%.2f%%)" %
-            (suite, passed, total, 100.0 * passed / total)), passed, total
+    ratio = passed / total
+    msg = "{} / {} //({:.2%})//".format(passed, total, ratio)
+    if required:
+        if passed == total:
+            msg += " {icon check color=green}"
+        else:
+            msg += " {icon times color=red}"
+    return (msg, passed, total)
+
+
+def generate_remarkup(data):
+    """
+    :param data: Tabular data to convert to remarkup.
+
+    :return: Remarkup formatted status string.
+    """
+    ret = "==== Quality assurance status: ====\n\n"
+    ret += "<table>\n"
+    for row in data:
+        ret += "  <tr>\n"
+        for item in row:
+            if row == data[0]:
+                fmt = "    <th>{}</th>\n"
+            else:
+                fmt = "    <td>{}</td>\n"
+            ret += fmt.format(item)
+        ret += "  </tr>\n"
+    ret += "</table>\n"
+    return ret
 
 
 if __name__ == "__main__":
@@ -73,13 +101,14 @@ if __name__ == "__main__":
             suite_suffix.format(memgraph_suite))
     log.info("Memgraph result path is {}".format(memgraph_result_path))
 
+    # status table headers
+    status_data = [["Suite", "Scenarios"]]
+
     # read internal scenarios
     with open(memgraph_result_path) as f:
         memgraph_status, memgraph_passed, memgraph_total \
-            = generate_status(memgraph_suite, f)
-
-    # create status message
-    qa_status_message = "Quality Assurance Status\n" + memgraph_status + "\n"
+            = generate_status(memgraph_suite, f, required = True)
+    status_data.append([memgraph_suite, memgraph_status])
 
     # read extra scenarios
     for suite in extra_suites:
@@ -87,7 +116,10 @@ if __name__ == "__main__":
         log.info("Extra suite '{}' result path is {}".format(suite, result_path))
         with open(result_path) as f:
             suite_status, _, _ = generate_status(suite, f)
-        qa_status_message += suite_status + "\n"
+        status_data.append([suite, suite_status])
+
+    # create status message
+    qa_status_message = generate_remarkup(status_data)
 
     # create the report file
     with open(qa_status_path, "w") as f:
diff --git a/tools/apollo/build_diff b/tools/apollo/build_diff
index 2ec14464a..26897d01d 100644
--- a/tools/apollo/build_diff
+++ b/tools/apollo/build_diff
@@ -3,7 +3,11 @@
 # From the manpage: "If the  -j  option is given without an argument, make will not limit the number of jobs that can run simultaneously."
 # That means that the whole build will be started simultaneously and IT WILL CRASH YOUR COMPUTER!
 
-cd ../..
+cd ../../..
+
+cp -r memgraph parent
+
+cd memgraph
 
 TIMEOUT=600 ./init
 bash -c "doxygen Doxyfile >/dev/null 2>/dev/null"
@@ -14,11 +18,20 @@ TIMEOUT=1000 make -j$THREADS
 
 cd ..
 mkdir build_release
-cd build_release
 
+cd build_release
 cmake -DCMAKE_BUILD_TYPE=release ..
 TIMEOUT=1000 make -j$THREADS memgraph_link_target
 
-cd ../tools/apollo
+cd ../../parent
+
+git checkout HEAD~1
+TIMEOUT=600 ./init
+
+cd build
+cmake -DCMAKE_BUILD_TYPE=release ..
+TIMEOUT=1000 make -j$THREADS memgraph_link_target
+
+cd ../../memgraph/tools/apollo
 
 ./generate diff
diff --git a/tools/apollo/cppcheck b/tools/apollo/cppcheck
index 4f504bb13..d2baf7982 100755
--- a/tools/apollo/cppcheck
+++ b/tools/apollo/cppcheck
@@ -24,5 +24,5 @@ cat "$errfile" >&2
 
 len="$( cat "$errfile" | wc -l )"
 if [ $len -gt 0 ]; then
-    echo -e "Cppcheck errors:\n$( cat "$errfile" )" > "$errfile"
+    echo -e "==== Cppcheck errors: ====\n\n\`\`\`\n$( cat "$errfile" )\n\`\`\`" > "$errfile"
 fi
diff --git a/tools/apollo/generate b/tools/apollo/generate
index 05c89ff1c..914fea54a 100755
--- a/tools/apollo/generate
+++ b/tools/apollo/generate
@@ -35,6 +35,7 @@ OUTPUT_DIR = os.path.join(BUILD_DIR, "apollo")
 # output lists
 ARCHIVES = []
 RUNS = []
+DATA_PROCESS = []
 
 # generation mode
 if len(sys.argv) >= 2:
@@ -47,6 +48,10 @@ def run_cmd(cmd, cwd):
     ret = subprocess.run(cmd, cwd = cwd, stdout = subprocess.PIPE, check = True)
     return ret.stdout.decode("utf-8")
 
+def find_memgraph_binary(loc):
+    return run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
+            "f", "-name", "memgraph*"], loc).split("\n")[0][2:]
+
 def generate_run(name, typ = "run", supervisor = "", commands = "",
         arguments = "", enable_network = False,
         outfile_paths = "", infile = ""):
@@ -77,12 +82,9 @@ if os.path.exists(OUTPUT_DIR):
 os.makedirs(OUTPUT_DIR)
 
 # store memgraph binary to archive
-binary_name = run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
-        "f", "-name", "memgraph*"], BUILD_DIR).split("\n")[0][2:]
-binary_link_name = run_cmd(["find", ".", "-maxdepth", "1", "-executable", "-type",
-        "l", "-name", "memgraph*"], BUILD_DIR).split("\n")[0][2:]
+binary_name = find_memgraph_binary(BUILD_DIR)
 binary_path = os.path.join(BUILD_DIR, binary_name)
-binary_link_path = os.path.join(BUILD_DIR, binary_link_name)
+binary_link_path = os.path.join(BUILD_DIR, "memgraph")
 config_path = os.path.join(BASE_DIR, "config")
 config_copy_path = os.path.join(BUILD_DIR, "config")
 if os.path.exists(config_copy_path):
@@ -183,24 +185,55 @@ if mode == "release":
     BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build")
 else:
     BUILD_RELEASE_DIR = os.path.join(BASE_DIR, "build_release")
-binary_release_name = run_cmd(["find", ".", "-maxdepth", "1", "-executable",
-        "-type", "f", "-name", "memgraph*"], BUILD_RELEASE_DIR).split("\n")[0][2:]
+binary_release_name = find_memgraph_binary(BUILD_RELEASE_DIR)
 binary_release_path = os.path.join(BUILD_RELEASE_DIR, binary_release_name)
 binary_release_link_path = os.path.join(BUILD_RELEASE_DIR, "memgraph")
 
 # macro benchmark tests
+MACRO_BENCHMARK_ARGS = "QuerySuite MemgraphRunner --groups aggregation"
 macro_bench_path = os.path.join(BASE_DIR, "tests", "macro_benchmark")
 stress_common = os.path.join(BASE_DIR, "tests", "stress", "common.py")
 infile = create_archive("macro_benchmark", [binary_release_path,
         macro_bench_path, stress_common, config_path],
         cwd = WORKSPACE_DIR)
 supervisor = "./{}/tests/macro_benchmark/harness/harness.py".format(BASE_DIR_NAME)
-args = "QuerySuite MemgraphRunner --groups aggregation --RunnerBin " + binary_release_path
+args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_release_path
 outfile_paths = "\./{}/tests/macro_benchmark/harness/\.harness_summary".format(
         BASE_DIR_NAME)
 RUNS.append(generate_run("macro_benchmark", supervisor = supervisor,
         arguments = args, infile = infile, outfile_paths = outfile_paths))
 
+# macro benchmark parent tests
+if mode == "diff":
+    PARENT_DIR = os.path.join(WORKSPACE_DIR, "parent")
+    BUILD_PARENT_DIR = os.path.join(PARENT_DIR, "build")
+    binary_parent_name = find_memgraph_binary(BUILD_PARENT_DIR)
+    binary_parent_path = os.path.join(BUILD_PARENT_DIR, binary_parent_name)
+    parent_config_path = os.path.join(PARENT_DIR, "config")
+    parent_macro_bench_path = os.path.join(PARENT_DIR, "tests", "macro_benchmark")
+    parent_stress_common = os.path.join(PARENT_DIR, "tests", "stress", "common.py")
+    infile = create_archive("macro_benchmark_parent", [binary_parent_path,
+            parent_macro_bench_path, parent_stress_common, parent_config_path],
+            cwd = WORKSPACE_DIR)
+    supervisor = "./parent/tests/macro_benchmark/harness/harness.py"
+    args = MACRO_BENCHMARK_ARGS + " --RunnerBin " + binary_parent_path
+    outfile_paths = "\./parent/tests/macro_benchmark/harness/\.harness_summary"
+    RUNS.append(generate_run("macro_benchmark_parent", supervisor = supervisor,
+            arguments = args, infile = infile, outfile_paths = outfile_paths))
+
+    # macro benchmark comparison data process
+    script_path = os.path.join(BASE_DIR, "tools", "apollo",
+            "macro_benchmark_summary")
+    infile = create_archive("macro_benchmark_summary", [script_path],
+            cwd = WORKSPACE_DIR)
+    cmd = "./memgraph/tools/apollo/macro_benchmark_summary " \
+            "macro_benchmark/memgraph/tests/macro_benchmark/harness/.harness_summary " \
+            "macro_benchmark_parent/parent/tests/macro_benchmark/harness/.harness_summary " \
+            ".harness_summary"
+    outfile_paths = "\./.harness_summary"
+    DATA_PROCESS.append(generate_run("macro_benchmark_summary", typ = "data process",
+            commands = cmd, infile = infile, outfile_paths = outfile_paths))
+
 # stress tests
 stress_path = os.path.join(BASE_DIR, "tests", "stress")
 infile = create_archive("stress", [binary_release_path,
@@ -212,4 +245,4 @@ RUNS.append(generate_run("stress", commands = cmd, infile = infile))
 
 # store ARCHIVES and RUNS
 store_metadata(OUTPUT_DIR, "archives", ARCHIVES)
-store_metadata(OUTPUT_DIR, "runs", RUNS)
+store_metadata(OUTPUT_DIR, "runs", RUNS + DATA_PROCESS)
diff --git a/tools/apollo/macro_benchmark_summary b/tools/apollo/macro_benchmark_summary
new file mode 100755
index 000000000..2a396f53b
--- /dev/null
+++ b/tools/apollo/macro_benchmark_summary
@@ -0,0 +1,111 @@
+#!/usr/bin/python3
+import os
+import sys
+
+def convert2float(val):
+    try:
+        return float(val)
+    except:
+        return val
+
+def parse_file(fname):
+    with open(fname) as f:
+        data = f.readlines()
+    ret = []
+    for row in data:
+        row = row.strip()
+        if row == "": continue
+        ret.append(list(map(convert2float, row.split())))
+    return ret
+
+def strip_integers(row):
+    return list(filter(lambda x: type(x) == str, row))
+
+def find_item(data, header, row):
+    headers = data[0]
+    row = strip_integers(row)
+    pos_x = -1
+    for i in range(len(data)):
+        s = strip_integers(data[i])
+        if s != row: continue
+        pos_x = i
+        break
+    if pos_x == -1: return None
+    pos_y = -1
+    for j in range(len(headers)):
+        if headers[j] != header: continue
+        pos_y = j
+        break
+    if pos_y == -1: return None
+    return data[pos_x][pos_y]
+
+def compare_values(data_cur, data_prev):
+    ret = []
+    headers = data_cur[0]
+    for i in range(len(data_cur)):
+        ret.append([])
+        row_cur = data_cur[i]
+        for j in range(len(row_cur)):
+            item_cur = row_cur[j]
+            if type(item_cur) == str:
+                item = " ".join(item_cur.split("_")).capitalize()
+            else:
+                item_prev = find_item(data_prev, headers[j], row_cur)
+                if item_prev != None:
+                    if item_prev != 0.0:
+                        diff = (item_cur - item_prev) / item_prev
+                    else:
+                        diff = 0.0
+                    if diff < -0.05:
+                        sign = " {icon arrow-down color=green}"
+                    elif diff > 0.05:
+                        sign = " {icon arrow-up color=red}"
+                    else:
+                        sign = ""
+                    item = "{:.9f} //({:+.2%})//{}".format(item_cur, diff, sign)
+                else:
+                    item = "{:.9f} //(new)// {{icon plus color=blue}}".format(item_cur)
+            ret[i].append(item)
+    return ret
+
+def generate_remarkup(data):
+    ret = "==== Macro benchmark summary: ====\n\n"
+    ret += "<table>\n"
+    for row in data:
+        ret += "  <tr>\n"
+        for item in row:
+            if row == data[0]:
+                fmt = "    <th>{}</th>\n"
+            else:
+                fmt = "    <td>{}</td>\n"
+            ret += fmt.format(item)
+        ret += "  </tr>\n"
+    ret += "</table>\n"
+    return ret
+
+if len(sys.argv) > 4 or len(sys.argv) < 3:
+    print("usage: {} current_values previous_values output_file".format(sys.argv[0]))
+    print("    output_file is optional, if not specified the script outputs")
+    print("    to stdout, if set to '-' then it overwrites current_values")
+    sys.exit(1)
+
+if len(sys.argv) == 4:
+    infile_cur, infile_prev, outfile = sys.argv[1:]
+else:
+    infile_cur, infile_prev = sys.argv[1:]
+    outfile = ""
+
+data_cur = parse_file(infile_cur)
+data_prev = parse_file(infile_prev)
+
+markup = generate_remarkup(compare_values(data_cur, data_prev))
+
+if outfile == "":
+    sys.stdout.write(markup)
+    sys.exit(0)
+
+if outfile == "-":
+    outfile = infile_cur
+
+with open(outfile, "w") as f:
+    f.write(generate_remarkup(compare_values(data_cur, data_prev)))