Add run_pokec script and minimum refactor
Summary: * add run_pokec script because more than one step is required * refactor of plot_throughput script * move all plot scripts under tools/plot Reviewers: mferencevic, teon.banek, mislav.bradac Reviewed By: mferencevic Subscribers: florijan, pullbot, buda Differential Revision: https://phabricator.memgraph.io/D1106
This commit is contained in:
parent
ca32538f63
commit
142b1f42b1
1
tests/macro_benchmark/.gitignore
vendored
1
tests/macro_benchmark/.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
.storage/
|
||||
.results/
|
||||
.harness_summary
|
||||
|
@ -1,3 +1,3 @@
|
||||
{
|
||||
"duration": 30
|
||||
"duration": 60
|
||||
}
|
||||
|
@ -3,7 +3,4 @@
|
||||
working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
cd ${working_dir}
|
||||
|
||||
cd pokec
|
||||
wget -nv -O pokec_small.setup.cypher http://deps.memgraph.io/pokec_small.setup.cypher
|
||||
wget -nv -O pokec_medium.setup.cypher http://deps.memgraph.io/pokec_medium.setup.cypher
|
||||
cd ..
|
@ -259,6 +259,13 @@ def main():
|
||||
with open(get_absolute_path(".harness_summary"), "w") as f:
|
||||
json.dump({"results": results, "headers": suite.headers}, f)
|
||||
|
||||
# The if block is here because the results from all suites
|
||||
# aren't compatible with the export below.
|
||||
if type(suite) not in [QuerySuite, QueryParallelSuite]:
|
||||
log.warning("The results from the suite "
|
||||
"aren't compatible with the apollo measurements export.")
|
||||
return
|
||||
|
||||
# Export data points.
|
||||
with open(get_absolute_path(".apollo_measurements"), "w") as f:
|
||||
headers = list(suite.headers)
|
||||
|
@ -1,11 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import itertools
|
||||
import json
|
||||
from argparse import ArgumentParser
|
||||
from collections import defaultdict
|
||||
from statistics import median
|
||||
from common import get_absolute_path, APOLLO
|
||||
from databases import Memgraph, Neo
|
||||
from clients import QueryClient, LongRunningClient
|
||||
@ -15,6 +9,7 @@ log = logging.getLogger(__name__)
|
||||
|
||||
class LongRunningSuite:
|
||||
KNOWN_KEYS = {"config", "setup", "run"}
|
||||
headers = ["elapsed_time", "num_executed_queries"]
|
||||
|
||||
def __init__(self, args):
|
||||
argp = ArgumentParser("LongRunningSuiteArgumentParser")
|
||||
@ -45,11 +40,11 @@ class LongRunningSuite:
|
||||
for result in results:
|
||||
self.summary += summary_format.format(
|
||||
result["elapsed_time"], result["num_executed_queries"])
|
||||
# TODO: Revise this.
|
||||
measurements.append({
|
||||
"target": "throughput",
|
||||
"value": result["num_executed_queries"] / result["elapsed_time"],
|
||||
"unit": "queries per second",
|
||||
"time": result["elapsed_time"],
|
||||
"value": result["num_executed_queries"],
|
||||
"unit": "number of executed queries",
|
||||
"type": "throughput"})
|
||||
self.summary += "\n\nThroughtput: " + str(measurements[-1]["value"])
|
||||
return measurements
|
||||
|
22
tests/macro_benchmark/run_pokec
Executable file
22
tests/macro_benchmark/run_pokec
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
|
||||
# Run pokec bench (download dataset, run neo and memgraph, plot the results).
|
||||
|
||||
cd ${script_dir}
|
||||
mkdir -p .results/pokec/
|
||||
|
||||
${script_dir}/groups/pokec/download_dataset
|
||||
|
||||
./harness LongRunningSuite MemgraphRunner --groups pokec
|
||||
mv .harness_summary ${script_dir}/.results/pokec/memgraph.summary
|
||||
|
||||
./harness LongRunningSuite NeoRunner --groups pokec
|
||||
mv .harness_summary ${script_dir}/.results/pokec/neo4j.summary
|
||||
|
||||
../../tools/plot/pokec_throughput \
|
||||
--vendor-references neo4j memgraph \
|
||||
--vendor-titles Neo4j Memgraph \
|
||||
--results ${script_dir}/.results/pokec/neo4j.summary ${script_dir}/.results/pokec/memgraph.summary \
|
||||
--plot-title "Pokec Small" --window-size 1
|
@ -7,7 +7,7 @@
|
||||
- ../../../config # directory with config files
|
||||
- ../../../libs/neo4j # neo4j directory
|
||||
- ../../../tools/mg_import_csv # memgraph csv import tool
|
||||
- ../../../tools/plot_ldbc_latency # ldbc plot generation tool
|
||||
- ../../../tools/plot/ldbc_latency # ldbc plot generation tool
|
||||
outfile_paths: # TODO: maybe this should also accept relative paths?
|
||||
- \./memgraph/tests/public_benchmark/ldbc/results/.+
|
||||
- \./memgraph/tests/public_benchmark/ldbc/plots/.+
|
||||
|
@ -9,12 +9,12 @@ TIMEOUT=3600 ./build_dataset
|
||||
# run read benchmarks
|
||||
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix read
|
||||
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix read
|
||||
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
|
||||
./ve3/bin/python3 ../../../tools/plot/ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png
|
||||
|
||||
# run update benchmarks
|
||||
TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200
|
||||
TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200
|
||||
./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
|
||||
./ve3/bin/python3 ../../../tools/plot/ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png
|
||||
|
||||
# convert results to Apollo measurements
|
||||
./convert_results
|
||||
|
@ -20,7 +20,7 @@ from matplotlib.cbook import get_sample_data
|
||||
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
COLORS = ['#ff7300', '#008cc2'] # TODO: add more colors!
|
||||
COLORS = ['#ff7300', '#008cc2']
|
||||
LDBC_TIME_FACTORS = {
|
||||
"SECONDS": 1.0,
|
||||
"MILLISECONDS": 1000.0,
|
@ -1,17 +1,17 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Example usage:
|
||||
# ./plot_througput --vendor-references neo4j memgraph \
|
||||
# --vendor-titles neo4j memgraph \
|
||||
# --results neo_to_plot memgraph_to_plot \
|
||||
# --plot-title "Pokec small" --window-size 10
|
||||
|
||||
'''
|
||||
Example usage:
|
||||
./pokec_throughput --vendor-references neo4j memgraph \
|
||||
--vendor-titles Neo4j Memgraph \
|
||||
--results neo_to_plot memgraph_to_plot \
|
||||
--plot-title "Pokec small" --window-size 10
|
||||
'''
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.cbook import get_sample_data
|
||||
from argparse import ArgumentParser
|
||||
|
||||
COLORS = {
|
||||
@ -57,29 +57,25 @@ def main():
|
||||
ax.set_ylabel('Throughput (queries per second)')
|
||||
ax.set_xlabel('Time (seconds)')
|
||||
ax.set_title(args.plot_title)
|
||||
ax.set_aspect(0.01)
|
||||
|
||||
# Collect the benchmark data and plot lines.
|
||||
print("Pokec throughput")
|
||||
for vendor_reference, vendor_data in vendors.items():
|
||||
print("Vendor: %s" % vendor_reference)
|
||||
with open(vendor_data['results_path']) as results_file:
|
||||
results = json.load(results_file)['results'][0]
|
||||
# Skip first line which contains titles.
|
||||
prev_time, prev_num_queries = 0.0, 0
|
||||
for line in results_file.readlines()[1:]:
|
||||
data = line.split()
|
||||
if data == []: break
|
||||
assert len(data) == 2, "Invalid data"
|
||||
vendor_data['t'].append(float(data[0]))
|
||||
vendor_data['q'].append(int(data[1]))
|
||||
for measurement in results:
|
||||
vendor_data['t'].append(float(measurement['time']))
|
||||
vendor_data['q'].append(int(measurement['value']))
|
||||
for i in range(1, len(vendor_data['t'])):
|
||||
j = max(0, i - args.window_size)
|
||||
vendor_data['dq/dt'].append(
|
||||
(vendor_data['q'][i] - vendor_data['q'][j]) /
|
||||
(vendor_data['t'][i] - vendor_data['t'][j]))
|
||||
|
||||
line1, = ax.plot(vendor_data['t'], vendor_data['dq/dt'], '-', linewidth=2,
|
||||
label=vendor_data['title'], color=vendor_data['color'])
|
||||
line1, = ax.plot(vendor_data['t'], vendor_data['dq/dt'], '-',
|
||||
linewidth=2, label=vendor_data['title'],
|
||||
color=vendor_data['color'])
|
||||
|
||||
ax.legend(loc='lower right')
|
||||
plt.grid()
|
Loading…
Reference in New Issue
Block a user