memgraph/tools/plot/ldbc_latency
Marko Budiselic 142b1f42b1 Add run_pokec script and minimum refactor
Summary:
  * add run_pokec script because more than one step is required
  * refactor of plot_throughput script
  * move all plot scripts under tools/plot

Reviewers: mferencevic, teon.banek, mislav.bradac

Reviewed By: mferencevic

Subscribers: florijan, pullbot, buda

Differential Revision: https://phabricator.memgraph.io/D1106
2018-01-23 16:17:09 +01:00

194 lines
7.1 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
Latency Barchart (Based on LDBC JSON output).
'''
import json
import os
import numpy as np
from argparse import ArgumentParser
import string
import matplotlib
# Must set 'Agg' backend before importing pyplot
# This is so the script works on headless machines (without X11)
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.cbook import get_sample_data
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
COLORS = ['#ff7300', '#008cc2']
LDBC_TIME_FACTORS = {
"SECONDS": 1.0,
"MILLISECONDS": 1000.0,
"MICROSECONDS": 1000000.0,
"NANOSECONDS": 1000000000.0
}
TIME_FACTORS = {
"s": 1.0,
"ms": 1000,
"us": 1000000,
"ns": 1000000000,
}
def parse_args():
argp = ArgumentParser(description=__doc__)
argp.add_argument("--vendor-titles", nargs="+",
default=["Memgraph", "Market leader"],
help="Vender titles that are going to appear "
"on the plot, e.g. legend titles.")
argp.add_argument("--plot-title", default="",
help="Plot title.")
argp.add_argument("--logo-path", default=None,
help="Path to the logo that is going to be presented"
" instead of title.")
argp.add_argument("--results", nargs="+", required=True,
help="Path to the folder with result files in format "
"{{vendor-reference}}-LDBC-results.json")
argp.add_argument("--time-unit", choices=("s", "ms", "us", "ns"),
default="ms", help="The time unit that should be used.")
argp.add_argument("--output", default="",
help="Save plot to file (instead of displaying it).")
argp.add_argument("--plot-width", type=int, default=1920,
help="Pixel width of generated plots.")
argp.add_argument("--plot-height", type=int, default=1080,
help="Pixel height of generated plots.")
argp.add_argument("--dpi", type=int, default=96,
help="DPI of generated plots.")
return argp.parse_args()
def autolabel(ax, rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
# TODO: adjust more vendors
ax.text(rect.get_x() + rect.get_width()/2., 1.00*height,
'{:.1f}'.format(height),
ha='center', va='bottom')
def main():
# Read the arguments.
args = parse_args()
# Prepare the datastructure.
vendors = []
for i, results_file, vendor_title in zip(range(len(args.results)),
args.results,
args.vendor_titles):
vendor = {}
vendor['title'] = vendor_title
vendor['results_file'] = results_file
vendor['color'] = COLORS[i]
vendor['results'] = []
vendors.append(vendor)
assert len(vendors) == 2, "The graph is tailored for only 2 vendors."
# Helper for shortening the query name.
def shorten_query_name(name):
if name.lower().startswith("ldbc"):
name = name[4:]
# Long query names on the x-axis don't look compelling.
num = "".join(filter(lambda x: x in string.digits, name))
prefix = name.split(num)[0]
return prefix + num
# Collect the benchmark data.
print("LDBC Latency Data")
for vendor in vendors:
with open(vendor['results_file']) as results_file:
results_data = json.load(results_file)
for query_data in results_data["all_metrics"]:
mean_runtime = (query_data["run_time"]["mean"] /
LDBC_TIME_FACTORS[results_data["unit"]] *
TIME_FACTORS[args.time_unit])
query_name = shorten_query_name(query_data['name'])
vendor['results'].append((query_name, mean_runtime))
# Helper for sorting the results.
def sort_key(obj):
name = obj[0]
num = int("".join(filter(lambda x: x in string.digits, name)))
prefix = name.split(str(num))[0]
return (prefix, num)
# Sort results.
for vendor in vendors:
vendor['results'].sort(key=sort_key)
# Print results.
for vendor in vendors:
print("Vendor:", vendor['title'])
for query_name, latency in vendor['results']:
print("{} -> {:.3f}{}".format(query_name, latency, args.time_unit))
# Consistency check.
all_query_names = [tuple(res[0] for res in vd['results']) for vd in vendors]
assert len(set(all_query_names)) == 1, \
"Queries between different vendors are different!"
query_names = all_query_names[0]
# Font size.
plt.rc('font', size=12) # controls default text sizes
plt.rc('axes', titlesize=24) # fontsize of the axes title
plt.rc('axes', labelsize=16) # fontsize of the x and y labels
plt.rc('xtick', labelsize=12) # fontsize of the tick labels
plt.rc('ytick', labelsize=12) # fontsize of the tick labels
plt.rc('legend', fontsize=16) # legend fontsize
plt.rc('figure', titlesize=24) # fontsize of the figure title
# Plot.
ind = np.arange(len(query_names)) # the x locations for the groups
width = 0.40 # the width of the bars
fig, ax = plt.subplots() # figure setup
fig.set_size_inches(args.plot_width / args.dpi,
args.plot_height / args.dpi) # set figure size
ax.set_ylabel('Mean Latency (%s)' % (args.time_unit)) # YAxis title
ax.set_facecolor('#dcdcdc') # plot bg color (light gray)
ax.set_xticks(ind + width / len(vendors)) # TODO: adjust (more vendors)
ax.set_xticklabels(query_names, rotation=30)
# set only horizontal grid lines
for line in ax.get_xgridlines():
line.set_linestyle(' ')
for line in ax.get_ygridlines():
line.set_linestyle('--')
ax.set_axisbelow(True) # put the grid below all other elements
plt.grid(True) # show grid
# Set plot title
ax.set_title(args.plot_title)
# Draw logo or plot title
if args.logo_path is not None:
im = plt.imread(get_sample_data(os.path.join(os.getcwd(),
args.logo_path)))
plt.gcf().subplots_adjust(top=0.85)
# magic numbers for logo size - DO NOT TOUCH!
newax = fig.add_axes([0.46, 0.85, 0.126, 0.15], anchor='N')
newax.imshow(im)
newax.axis('off')
# Draw bars
for index, vendor in enumerate(vendors):
latencies = [res[1] for res in vendor['results']]
rects = ax.bar(ind + index * width, latencies, width,
color=vendor['color'])
vendor['rects'] = rects
autolabel(ax, rects)
rects = [vd['rects'][0] for vd in vendors]
titles = [vd['title'] for vd in vendors]
ax.legend(rects, titles) # Draw the legend.
if args.output == "":
plt.show()
else:
plt.savefig(args.output, dpi=args.dpi)
if __name__ == '__main__':
main()