Add run_pokec script and minimum refactor

Summary:
  * add run_pokec script because more than one step is required
  * refactor of plot_throughput script
  * move all plot scripts under tools/plot

Reviewers: mferencevic, teon.banek, mislav.bradac

Reviewed By: mferencevic

Subscribers: florijan, pullbot, buda

Differential Revision: https://phabricator.memgraph.io/D1106
This commit is contained in:
Marko Budiselic 2018-01-23 15:31:57 +01:00
parent ca32538f63
commit 142b1f42b1
11 changed files with 58 additions and 40 deletions

View File

@ -1,2 +1,3 @@
.storage/ .storage/
.results/
.harness_summary .harness_summary

View File

@ -1,3 +1,3 @@
{ {
"duration": 30 "duration": 60
} }

View File

@ -3,7 +3,4 @@
working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd ${working_dir} cd ${working_dir}
cd pokec
wget -nv -O pokec_small.setup.cypher http://deps.memgraph.io/pokec_small.setup.cypher wget -nv -O pokec_small.setup.cypher http://deps.memgraph.io/pokec_small.setup.cypher
wget -nv -O pokec_medium.setup.cypher http://deps.memgraph.io/pokec_medium.setup.cypher
cd ..

View File

@ -259,6 +259,13 @@ def main():
with open(get_absolute_path(".harness_summary"), "w") as f: with open(get_absolute_path(".harness_summary"), "w") as f:
json.dump({"results": results, "headers": suite.headers}, f) json.dump({"results": results, "headers": suite.headers}, f)
# The if block is here because the results from all suites
# aren't compatible with the export below.
if type(suite) not in [QuerySuite, QueryParallelSuite]:
log.warning("The results from the suite "
"aren't compatible with the apollo measurements export.")
return
# Export data points. # Export data points.
with open(get_absolute_path(".apollo_measurements"), "w") as f: with open(get_absolute_path(".apollo_measurements"), "w") as f:
headers = list(suite.headers) headers = list(suite.headers)

View File

@ -1,11 +1,5 @@
import logging import logging
import os
import time
import itertools
import json
from argparse import ArgumentParser from argparse import ArgumentParser
from collections import defaultdict
from statistics import median
from common import get_absolute_path, APOLLO from common import get_absolute_path, APOLLO
from databases import Memgraph, Neo from databases import Memgraph, Neo
from clients import QueryClient, LongRunningClient from clients import QueryClient, LongRunningClient
@ -15,6 +9,7 @@ log = logging.getLogger(__name__)
class LongRunningSuite: class LongRunningSuite:
KNOWN_KEYS = {"config", "setup", "run"} KNOWN_KEYS = {"config", "setup", "run"}
headers = ["elapsed_time", "num_executed_queries"]
def __init__(self, args): def __init__(self, args):
argp = ArgumentParser("LongRunningSuiteArgumentParser") argp = ArgumentParser("LongRunningSuiteArgumentParser")
@ -45,11 +40,11 @@ class LongRunningSuite:
for result in results: for result in results:
self.summary += summary_format.format( self.summary += summary_format.format(
result["elapsed_time"], result["num_executed_queries"]) result["elapsed_time"], result["num_executed_queries"])
# TODO: Revise this.
measurements.append({ measurements.append({
"target": "throughput", "target": "throughput",
"value": result["num_executed_queries"] / result["elapsed_time"], "time": result["elapsed_time"],
"unit": "queries per second", "value": result["num_executed_queries"],
"unit": "number of executed queries",
"type": "throughput"}) "type": "throughput"})
self.summary += "\n\nThroughtput: " + str(measurements[-1]["value"]) self.summary += "\n\nThroughtput: " + str(measurements[-1]["value"])
return measurements return measurements

22
tests/macro_benchmark/run_pokec Executable file
View File

@ -0,0 +1,22 @@
#!/bin/bash -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Run pokec bench (download dataset, run neo and memgraph, plot the results).
cd ${script_dir}
mkdir -p .results/pokec/
${script_dir}/groups/pokec/download_dataset
./harness LongRunningSuite MemgraphRunner --groups pokec
mv .harness_summary ${script_dir}/.results/pokec/memgraph.summary
./harness LongRunningSuite NeoRunner --groups pokec
mv .harness_summary ${script_dir}/.results/pokec/neo4j.summary
../../tools/plot/pokec_throughput \
--vendor-references neo4j memgraph \
--vendor-titles Neo4j Memgraph \
--results ${script_dir}/.results/pokec/neo4j.summary ${script_dir}/.results/pokec/memgraph.summary \
--plot-title "Pokec Small" --window-size 1

View File

@ -7,7 +7,7 @@
- ../../../config # directory with config files - ../../../config # directory with config files
- ../../../libs/neo4j # neo4j directory - ../../../libs/neo4j # neo4j directory
- ../../../tools/mg_import_csv # memgraph csv import tool - ../../../tools/mg_import_csv # memgraph csv import tool
- ../../../tools/plot_ldbc_latency # ldbc plot generation tool - ../../../tools/plot/ldbc_latency # ldbc plot generation tool
outfile_paths: # TODO: maybe this should also accept relative paths? outfile_paths: # TODO: maybe this should also accept relative paths?
- \./memgraph/tests/public_benchmark/ldbc/results/.+ - \./memgraph/tests/public_benchmark/ldbc/results/.+
- \./memgraph/tests/public_benchmark/ldbc/plots/.+ - \./memgraph/tests/public_benchmark/ldbc/plots/.+

View File

@ -9,12 +9,12 @@ TIMEOUT=3600 ./build_dataset
# run read benchmarks # run read benchmarks
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix read TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix read
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix read TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix read
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png ./ve3/bin/python3 ../../../tools/plot/ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
# run update benchmarks # run update benchmarks
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200 TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200 TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png ./ve3/bin/python3 ../../../tools/plot/ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
# convert results to Apollo measurements # convert results to Apollo measurements
./convert_results ./convert_results

View File

@ -20,7 +20,7 @@ from matplotlib.cbook import get_sample_data
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
COLORS = ['#ff7300', '#008cc2'] # TODO: add more colors! COLORS = ['#ff7300', '#008cc2']
LDBC_TIME_FACTORS = { LDBC_TIME_FACTORS = {
"SECONDS": 1.0, "SECONDS": 1.0,
"MILLISECONDS": 1000.0, "MILLISECONDS": 1000.0,

View File

@ -1,17 +1,17 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
#
# Example usage: '''
# ./plot_througput --vendor-references neo4j memgraph \ Example usage:
# --vendor-titles neo4j memgraph \ ./pokec_throughput --vendor-references neo4j memgraph \
# --results neo_to_plot memgraph_to_plot \ --vendor-titles Neo4j Memgraph \
# --plot-title "Pokec small" --window-size 10 --results neo_to_plot memgraph_to_plot \
--plot-title "Pokec small" --window-size 10
'''
import json import json
import os
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.cbook import get_sample_data
from argparse import ArgumentParser from argparse import ArgumentParser
COLORS = { COLORS = {
@ -57,29 +57,25 @@ def main():
ax.set_ylabel('Throughput (queries per second)') ax.set_ylabel('Throughput (queries per second)')
ax.set_xlabel('Time (seconds)') ax.set_xlabel('Time (seconds)')
ax.set_title(args.plot_title) ax.set_title(args.plot_title)
ax.set_aspect(0.01)
# Collect the benchmark data and plot lines. # Collect the benchmark data and plot lines.
print("Pokec throughput") print("Pokec throughput")
for vendor_reference, vendor_data in vendors.items(): for vendor_reference, vendor_data in vendors.items():
print("Vendor: %s" % vendor_reference) print("Vendor: %s" % vendor_reference)
with open(vendor_data['results_path']) as results_file: with open(vendor_data['results_path']) as results_file:
results = json.load(results_file)['results'][0]
# Skip first line which contains titles. # Skip first line which contains titles.
prev_time, prev_num_queries = 0.0, 0 for measurement in results:
for line in results_file.readlines()[1:]: vendor_data['t'].append(float(measurement['time']))
data = line.split() vendor_data['q'].append(int(measurement['value']))
if data == []: break
assert len(data) == 2, "Invalid data"
vendor_data['t'].append(float(data[0]))
vendor_data['q'].append(int(data[1]))
for i in range(1, len(vendor_data['t'])): for i in range(1, len(vendor_data['t'])):
j = max(0, i - args.window_size) j = max(0, i - args.window_size)
vendor_data['dq/dt'].append( vendor_data['dq/dt'].append(
(vendor_data['q'][i] - vendor_data['q'][j]) / (vendor_data['q'][i] - vendor_data['q'][j]) /
(vendor_data['t'][i] - vendor_data['t'][j])) (vendor_data['t'][i] - vendor_data['t'][j]))
line1, = ax.plot(vendor_data['t'], vendor_data['dq/dt'], '-',
line1, = ax.plot(vendor_data['t'], vendor_data['dq/dt'], '-', linewidth=2, linewidth=2, label=vendor_data['title'],
label=vendor_data['title'], color=vendor_data['color']) color=vendor_data['color'])
ax.legend(loc='lower right') ax.legend(loc='lower right')
plt.grid() plt.grid()