From b30e3252e2fddd2a8ef716acfbe12da625d924af Mon Sep 17 00:00:00 2001
From: Marko Budiselic <marko.budiselic@memgraph.io>
Date: Mon, 4 Sep 2017 15:33:52 +0200
Subject: [PATCH] A python script that visualizes latency results from LDBC.

Summary:
  * extension of run_benchmark script
  * tools/plot_latency.py

Reviewers: teon.banek

Reviewed By: teon.banek

Subscribers: mislav.bradac, pullbot

Differential Revision: https://phabricator.memgraph.io/D743
---
 ...dbc-snb-impls-allshortandupdate.properties |  71 ++++++++++
 .../ldbc/ldbc-snb-impls-allshorts.properties  |   8 +-
 tests/public_benchmark/ldbc/plots/.gitignore  |   2 +
 .../public_benchmark/ldbc/results/.gitignore  |   2 +
 tests/public_benchmark/ldbc/run_benchmark     |  12 +-
 tools/plot_ldbc_latency                       | 132 ++++++++++++++++++
 6 files changed, 221 insertions(+), 6 deletions(-)
 create mode 100644 tests/public_benchmark/ldbc/ldbc-snb-impls-allshortandupdate.properties
 create mode 100644 tests/public_benchmark/ldbc/plots/.gitignore
 create mode 100644 tests/public_benchmark/ldbc/results/.gitignore
 create mode 100755 tools/plot_ldbc_latency

diff --git a/tests/public_benchmark/ldbc/ldbc-snb-impls-allshortandupdate.properties b/tests/public_benchmark/ldbc/ldbc-snb-impls-allshortandupdate.properties
new file mode 100644
index 000000000..5eb33259e
--- /dev/null
+++ b/tests/public_benchmark/ldbc/ldbc-snb-impls-allshortandupdate.properties
@@ -0,0 +1,71 @@
+# Linked Data Benchmark Council
+# Social Network Benchmark
+# Interactive Workload
+
+# *** workload-related driver properties ***
+workload=com.ldbc.driver.workloads.ldbc.snb.interactive.LdbcSnbInteractiveWorkload
+
+# *** vendor-related driver properties ***
+# database=
+
+# *** workload-specific properties ***
+
+# Directory containing query parameter files
+# ldbc.snb.interactive.parameters_dir=
+
+# Directory containing data update stream files
+# ldbc.snb.interactive.updates_dir=
+
+# Short reads random walk dissipation rate, in the interval [1.0-0.0]
+# Higher values translate to shorter walks and therefore fewer short reads
+ldbc.snb.interactive.short_read_dissipation=0.2
+
+## frequency of read queries (number of update queries per one read query)
+ldbc.snb.interactive.LdbcQuery1_freq=26
+ldbc.snb.interactive.LdbcQuery2_freq=37
+ldbc.snb.interactive.LdbcQuery3_freq=69
+ldbc.snb.interactive.LdbcQuery4_freq=36
+ldbc.snb.interactive.LdbcQuery5_freq=57
+ldbc.snb.interactive.LdbcQuery6_freq=129
+ldbc.snb.interactive.LdbcQuery7_freq=87
+ldbc.snb.interactive.LdbcQuery8_freq=45
+ldbc.snb.interactive.LdbcQuery9_freq=157
+ldbc.snb.interactive.LdbcQuery10_freq=30
+ldbc.snb.interactive.LdbcQuery11_freq=16
+ldbc.snb.interactive.LdbcQuery12_freq=44
+ldbc.snb.interactive.LdbcQuery13_freq=19
+ldbc.snb.interactive.LdbcQuery14_freq=49
+
+# *** For debugging purposes ***
+
+ldbc.snb.interactive.LdbcQuery1_enable=false
+ldbc.snb.interactive.LdbcQuery2_enable=false
+ldbc.snb.interactive.LdbcQuery3_enable=false
+ldbc.snb.interactive.LdbcQuery4_enable=false
+ldbc.snb.interactive.LdbcQuery5_enable=false
+ldbc.snb.interactive.LdbcQuery6_enable=false
+ldbc.snb.interactive.LdbcQuery7_enable=false
+ldbc.snb.interactive.LdbcQuery8_enable=true
+ldbc.snb.interactive.LdbcQuery9_enable=false
+ldbc.snb.interactive.LdbcQuery10_enable=false
+ldbc.snb.interactive.LdbcQuery11_enable=false
+ldbc.snb.interactive.LdbcQuery12_enable=false
+ldbc.snb.interactive.LdbcQuery13_enable=false
+ldbc.snb.interactive.LdbcQuery14_enable=false
+
+ldbc.snb.interactive.LdbcShortQuery1PersonProfile_enable=true
+ldbc.snb.interactive.LdbcShortQuery2PersonPosts_enable=true
+ldbc.snb.interactive.LdbcShortQuery3PersonFriends_enable=true
+ldbc.snb.interactive.LdbcShortQuery4MessageContent_enable=true
+ldbc.snb.interactive.LdbcShortQuery5MessageCreator_enable=true
+ldbc.snb.interactive.LdbcShortQuery6MessageForum_enable=true
+ldbc.snb.interactive.LdbcShortQuery7MessageReplies_enable=true
+
+ldbc.snb.interactive.LdbcUpdate1AddPerson_enable=true
+ldbc.snb.interactive.LdbcUpdate2AddPostLike_enable=true
+ldbc.snb.interactive.LdbcUpdate3AddCommentLike_enable=true
+ldbc.snb.interactive.LdbcUpdate4AddForum_enable=true
+ldbc.snb.interactive.LdbcUpdate5AddForumMembership_enable=true
+ldbc.snb.interactive.LdbcUpdate6AddPost_enable=true
+ldbc.snb.interactive.LdbcUpdate7AddComment_enable=true
+ldbc.snb.interactive.LdbcUpdate8AddFriendship_enable=true
diff --git a/tests/public_benchmark/ldbc/ldbc-snb-impls-allshorts.properties b/tests/public_benchmark/ldbc/ldbc-snb-impls-allshorts.properties
index f8e910c46..21c6ebe2a 100644
--- a/tests/public_benchmark/ldbc/ldbc-snb-impls-allshorts.properties
+++ b/tests/public_benchmark/ldbc/ldbc-snb-impls-allshorts.properties
@@ -62,10 +62,10 @@ ldbc.snb.interactive.LdbcShortQuery6MessageForum_enable=true
 ldbc.snb.interactive.LdbcShortQuery7MessageReplies_enable=true
 
 ldbc.snb.interactive.LdbcUpdate1AddPerson_enable=false
-ldbc.snb.interactive.LdbcUpdate2AddPostLike_enable=false
-ldbc.snb.interactive.LdbcUpdate3AddCommentLike_enable=false
+ldbc.snb.interactive.LdbcUpdate2AddPostLike_enable=true
+ldbc.snb.interactive.LdbcUpdate3AddCommentLike_enable=true
 ldbc.snb.interactive.LdbcUpdate4AddForum_enable=false
-ldbc.snb.interactive.LdbcUpdate5AddForumMembership_enable=false
+ldbc.snb.interactive.LdbcUpdate5AddForumMembership_enable=true
 ldbc.snb.interactive.LdbcUpdate6AddPost_enable=false
 ldbc.snb.interactive.LdbcUpdate7AddComment_enable=false
-ldbc.snb.interactive.LdbcUpdate8AddFriendship_enable=false
+ldbc.snb.interactive.LdbcUpdate8AddFriendship_enable=true
diff --git a/tests/public_benchmark/ldbc/plots/.gitignore b/tests/public_benchmark/ldbc/plots/.gitignore
new file mode 100644
index 000000000..d6b7ef32c
--- /dev/null
+++ b/tests/public_benchmark/ldbc/plots/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/tests/public_benchmark/ldbc/results/.gitignore b/tests/public_benchmark/ldbc/results/.gitignore
new file mode 100644
index 000000000..d6b7ef32c
--- /dev/null
+++ b/tests/public_benchmark/ldbc/results/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/tests/public_benchmark/ldbc/run_benchmark b/tests/public_benchmark/ldbc/run_benchmark
index 8f132dac2..6a17e42db 100755
--- a/tests/public_benchmark/ldbc/run_benchmark
+++ b/tests/public_benchmark/ldbc/run_benchmark
@@ -16,14 +16,16 @@ function print_help () {
     echo -e "  --time-compression-ratio |"
     echo -e "  --operation-count        | -> https://github.com/ldbc/ldbc_driver/wiki/Driver-Configuration"
     echo -e "  --thread-count           |"
+    echo -e "  --result-file-prefix -> Result file prefix."
 }
 
 # Default parameters.
 host=127.0.0.1
 port=7687
 time_compression_ratio=0.01
-operation_count=100
+operation_count=200
 thread_count=8
+result_file_prefix="undefined"
 
 # Read the arguments.
 while [[ $# -gt 0 ]]
@@ -41,6 +43,10 @@ do
         port=$2
         shift
         ;;
+        --result-file-prefix)
+        result_file_prefix=$2
+        shift
+        ;;
         --time-compression-ratio)
         time_compression_ratio=$2
         shift
@@ -64,4 +70,6 @@ cd ${script_dir}/ldbc-snb-impls
 mvn clean compile assembly:single
 
 cd ${script_dir}/ldbc_driver
-java -cp target/jeeves-0.3-SNAPSHOT.jar:${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/target/snb-interactive-neo4j-1.0.0-jar-with-dependencies.jar com.ldbc.driver.Client -P ${script_dir}/ldbc_driver/configuration/ldbc_driver_default.properties -P ${script_dir}/ldbc-snb-impls-test.properties -P ${script_dir}/ldbc_snb_datagen/social_network/updateStream.properties -p host ${host} -p port ${port} -db net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb -p ldbc.snb.interactive.parameters_dir ${script_dir}/ldbc_snb_datagen/substitution_parameters --time_compression_ratio ${time_compression_ratio} --operation_count ${operation_count} --thread_count ${thread_count}
+java -cp target/jeeves-0.3-SNAPSHOT.jar:${script_dir}/ldbc-snb-impls/snb-interactive-neo4j/target/snb-interactive-neo4j-1.0.0-jar-with-dependencies.jar com.ldbc.driver.Client -P ${script_dir}/ldbc_driver/configuration/ldbc_driver_default.properties -P ${script_dir}/ldbc-snb-impls-allshortandupdate.properties -P ${script_dir}/ldbc_snb_datagen/social_network/updateStream.properties -p host ${host} -p port ${port} -db net.ellitron.ldbcsnbimpls.interactive.neo4j.Neo4jDb -p ldbc.snb.interactive.parameters_dir ${script_dir}/ldbc_snb_datagen/substitution_parameters --time_compression_ratio ${time_compression_ratio} --operation_count ${operation_count} --thread_count ${thread_count}
+
+cp ${script_dir}/ldbc_driver/results/LDBC-results.json ${script_dir}/results/${result_file_prefix}-LDBC-results.json
diff --git a/tools/plot_ldbc_latency b/tools/plot_ldbc_latency
new file mode 100755
index 000000000..d832d9dc7
--- /dev/null
+++ b/tools/plot_ldbc_latency
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+'''
+Latency Barchart (Based on LDBC JSON output).
+'''
+
+import json
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.cbook import get_sample_data
+from argparse import ArgumentParser
+
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+COLORS = {
+    'memgraph': '#ff7300',
+    'neo4j': '#008cc2'
+}
+
+
+def parse_args():
+    argp = ArgumentParser(description=__doc__)
+    argp.add_argument("--vendor-references", nargs="+",
+                      help="Short references that represent all the "
+                           "vendors that are going to be "
+                           "visualized on the plot.")
+    argp.add_argument("--vendor-titles", nargs="+",
+                      help="Vender titles that are going to appear "
+                           "on the plot, e.g. legend titles.")
+    argp.add_argument("--plot-title", default="{{Plot title placeholder}}",
+                      help="Plot title.")
+    argp.add_argument("--logo-path", default=None,
+                      help="Path to the logo that is going to be presented"
+                           " instead of title.")
+    argp.add_argument("--results-dir",
+                      default=os.path.join(SCRIPT_DIR,
+                                           "../tests/public_benchmark"
+                                           "/ldbc/results"),
+                      help="Path to the folder with result files in format "
+                           "{{vendor-reference}}-LDBC-results.json")
+    return argp.parse_args()
+
+
+def autolabel(ax, rects):
+    """
+    Attach a text label above each bar displaying its height
+    """
+    for rect in rects:
+        height = rect.get_height()
+        # TODO: adjust more vendors
+        ax.text(rect.get_x() + rect.get_width()/2., 1.00*height,
+                '%d' % int(height),
+                ha='center', va='bottom')
+
+
+def main():
+    # Read the arguments.
+    args = parse_args()
+
+    # Prepare the datastructure.
+    vendors = {}
+    for vendor_reference, vendor_title in zip(args.vendor_references,
+                                              args.vendor_titles):
+        vendors[vendor_reference] = {}
+        vendors[vendor_reference]['title'] = vendor_title
+        vendors[vendor_reference]['results_path'] = os.path.join(
+            args.results_dir, "%s-LDBC-results.json" % vendor_reference)
+        vendors[vendor_reference]['color'] = COLORS[vendor_reference]
+        vendors[vendor_reference]['latencies'] = []
+        vendors[vendor_reference]['query_names'] = []
+    assert len(vendors) == 2, "The graph is tailored for only 2 vendors."
+
+    # Collect the benchmark data.
+    print("LDBC Latency Data")
+    for vendor_reference, vendor_data in vendors.items():
+        print("Vendor: %s" % vendor_reference)
+        with open(vendor_data['results_path']) as results_file:
+            results_data = json.load(results_file)
+            for query_data in results_data["all_metrics"]:
+                mean_runtime = query_data["run_time"]["mean"]
+                query_name = query_data['name']
+                print("%s -> %sms" % (query_name, str(mean_runtime)))
+                vendor_data['latencies'].append(mean_runtime)
+                vendor_data['query_names'].append(query_name)
+
+    # Consistency check.
+    all_query_names = [tuple(vd['query_names']) for vd in vendors.values()]
+    assert len(set(all_query_names)) == 1, \
+        "Queries between different vendors are different!"
+    query_names = all_query_names[0]
+
+    # Plot.
+    ind = np.arange(len(query_names))   # the x locations for the groups
+    width = 0.40                        # the width of the bars
+    fig, ax = plt.subplots()            # figure setup
+    ax.set_ylabel('Mean Latency (ms)')  # YAxis title
+    ax.set_facecolor('#dcdcdc')         # plot bg color (light gray)
+    ax.set_xticks(ind + width / len(vendors))  # TODO: adjust (more vendors)
+    # IMPORTANT! Long query names on the XAxis don't look compelling.
+    ax.set_xticklabels(['Q%s' % x for x in range(len(query_names))])
+    # set only horizontal grid lines
+    for line in ax.get_xgridlines():
+        line.set_linestyle(' ')
+    for line in ax.get_ygridlines():
+        line.set_linestyle('--')
+    ax.set_axisbelow(True)              # put the grid below all other elements
+    plt.grid(True)                      # show grid
+    # Draw logo or plot title
+    if args.logo_path is None:
+        ax.set_title(args.plot_title)
+    else:
+        # TODO: improve the logo positioning
+        im = plt.imread(get_sample_data(args.logo_path))
+        plt.gcf().subplots_adjust(top=0.85)
+        newax = fig.add_axes([0.4, 0.75, 0.2, 0.25], anchor='N')
+        newax.imshow(im)
+        newax.axis('off')
+    # Draw bars
+    for index, vendor_data in enumerate(vendors.values()):
+        rects = ax.bar(ind + index * width, vendor_data['latencies'], width,
+                       color=vendor_data['color'])
+        vendor_data['rects'] = rects
+        autolabel(ax, rects)
+    rects = [vd['rects'][0] for vd in vendors.values()]
+    titles = [vd['title'] for vd in vendors.values()]
+    ax.legend(rects, titles)           # Draw the legend.
+    plt.show()
+
+if __name__ == '__main__':
+    main()