Add run_pokec script and minimum refactor

Summary:
  * add run_pokec script because more than one step is required
  * refactor of plot_throughput script
  * move all plot scripts under tools/plot

Reviewers: mferencevic, teon.banek, mislav.bradac

Reviewed By: mferencevic

Subscribers: florijan, pullbot, buda

Differential Revision: https://phabricator.memgraph.io/D1106
This commit is contained in:
Marko Budiselic 2018-01-23 15:31:57 +01:00
parent ca32538f63
commit 142b1f42b1
11 changed files with 58 additions and 40 deletions

View File

@ -1,2 +1,3 @@
.storage/
.results/
.harness_summary

View File

@ -1,3 +1,3 @@
{
"duration": 30
"duration": 60
}

View File

@ -3,7 +3,4 @@
working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd ${working_dir}
cd pokec
wget -nv -O pokec_small.setup.cypher http://deps.memgraph.io/pokec_small.setup.cypher
wget -nv -O pokec_medium.setup.cypher http://deps.memgraph.io/pokec_medium.setup.cypher
cd ..

View File

@ -259,6 +259,13 @@ def main():
with open(get_absolute_path(".harness_summary"), "w") as f:
json.dump({"results": results, "headers": suite.headers}, f)
# The if block is here because the results from all suites
# aren't compatible with the export below.
if type(suite) not in [QuerySuite, QueryParallelSuite]:
log.warning("The results from the suite "
"aren't compatible with the apollo measurements export.")
return
# Export data points.
with open(get_absolute_path(".apollo_measurements"), "w") as f:
headers = list(suite.headers)

View File

@ -1,11 +1,5 @@
import logging
import os
import time
import itertools
import json
from argparse import ArgumentParser
from collections import defaultdict
from statistics import median
from common import get_absolute_path, APOLLO
from databases import Memgraph, Neo
from clients import QueryClient, LongRunningClient
@ -15,6 +9,7 @@ log = logging.getLogger(__name__)
class LongRunningSuite:
KNOWN_KEYS = {"config", "setup", "run"}
headers = ["elapsed_time", "num_executed_queries"]
def __init__(self, args):
argp = ArgumentParser("LongRunningSuiteArgumentParser")
@ -45,17 +40,17 @@ class LongRunningSuite:
for result in results:
self.summary += summary_format.format(
result["elapsed_time"], result["num_executed_queries"])
# TODO: Revise this.
measurements.append({
"target": "throughput",
"value": result["num_executed_queries"] / result["elapsed_time"],
"unit": "queries per second",
"time": result["elapsed_time"],
"value": result["num_executed_queries"],
"unit": "number of executed queries",
"type": "throughput"})
self.summary += "\n\nThroughtput: " + str(measurements[-1]["value"])
return measurements
def runners(self):
return { "MemgraphRunner" : MemgraphRunner, "NeoRunner" : NeoRunner }
return {"MemgraphRunner": MemgraphRunner, "NeoRunner": NeoRunner}
def groups(self):
return ["pokec"]

22
tests/macro_benchmark/run_pokec Executable file
View File

@ -0,0 +1,22 @@
#!/bin/bash -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Run pokec bench (download dataset, run neo and memgraph, plot the results).
cd ${script_dir}
mkdir -p .results/pokec/
${script_dir}/groups/pokec/download_dataset
./harness LongRunningSuite MemgraphRunner --groups pokec
mv .harness_summary ${script_dir}/.results/pokec/memgraph.summary
./harness LongRunningSuite NeoRunner --groups pokec
mv .harness_summary ${script_dir}/.results/pokec/neo4j.summary
../../tools/plot/pokec_throughput \
--vendor-references neo4j memgraph \
--vendor-titles Neo4j Memgraph \
--results ${script_dir}/.results/pokec/neo4j.summary ${script_dir}/.results/pokec/memgraph.summary \
--plot-title "Pokec Small" --window-size 1

View File

@ -7,7 +7,7 @@
- ../../../config # directory with config files
- ../../../libs/neo4j # neo4j directory
- ../../../tools/mg_import_csv # memgraph csv import tool
- ../../../tools/plot_ldbc_latency # ldbc plot generation tool
- ../../../tools/plot/ldbc_latency # ldbc plot generation tool
outfile_paths: # TODO: maybe this should also accept relative paths?
- \./memgraph/tests/public_benchmark/ldbc/results/.+
- \./memgraph/tests/public_benchmark/ldbc/plots/.+

View File

@ -9,12 +9,12 @@ TIMEOUT=3600 ./build_dataset
# run read benchmarks
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix read
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix read
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
./ve3/bin/python3 ../../../tools/plot/ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
# run update benchmarks
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
./ve3/bin/python3 ../../../tools/plot/ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
# convert results to Apollo measurements
./convert_results

View File

@ -20,7 +20,7 @@ from matplotlib.cbook import get_sample_data
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
COLORS = ['#ff7300', '#008cc2'] # TODO: add more colors!
COLORS = ['#ff7300', '#008cc2']
LDBC_TIME_FACTORS = {
"SECONDS": 1.0,
"MILLISECONDS": 1000.0,

View File

@ -1,17 +1,17 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Example usage:
# ./plot_througput --vendor-references neo4j memgraph \
# --vendor-titles neo4j memgraph \
# --results neo_to_plot memgraph_to_plot \
# --plot-title "Pokec small" --window-size 10
'''
Example usage:
./pokec_throughput --vendor-references neo4j memgraph \
--vendor-titles Neo4j Memgraph \
--results neo_to_plot memgraph_to_plot \
--plot-title "Pokec small" --window-size 10
'''
import json
import os
import matplotlib.pyplot as plt
from matplotlib.cbook import get_sample_data
from argparse import ArgumentParser
COLORS = {
@ -57,29 +57,25 @@ def main():
ax.set_ylabel('Throughput (queries per second)')
ax.set_xlabel('Time (seconds)')
ax.set_title(args.plot_title)
ax.set_aspect(0.01)
# Collect the benchmark data and plot lines.
print("Pokec throughput")
for vendor_reference, vendor_data in vendors.items():
print("Vendor: %s" % vendor_reference)
with open(vendor_data['results_path']) as results_file:
results = json.load(results_file)['results'][0]
# Skip first line which contains titles.
prev_time, prev_num_queries = 0.0, 0
for line in results_file.readlines()[1:]:
data = line.split()
if data == []: break
assert len(data) == 2, "Invalid data"
vendor_data['t'].append(float(data[0]))
vendor_data['q'].append(int(data[1]))
for measurement in results:
vendor_data['t'].append(float(measurement['time']))
vendor_data['q'].append(int(measurement['value']))
for i in range(1, len(vendor_data['t'])):
j = max(0, i - args.window_size)
vendor_data['dq/dt'].append(
(vendor_data['q'][i] - vendor_data['q'][j]) /
(vendor_data['t'][i] - vendor_data['t'][j]))
line1, = ax.plot(vendor_data['t'], vendor_data['dq/dt'], '-', linewidth=2,
label=vendor_data['title'], color=vendor_data['color'])
line1, = ax.plot(vendor_data['t'], vendor_data['dq/dt'], '-',
linewidth=2, label=vendor_data['title'],
color=vendor_data['color'])
ax.legend(loc='lower right')
plt.grid()