From 142b1f42b1d81f5d9e3ba487956d8c427e4432b6 Mon Sep 17 00:00:00 2001 From: Marko Budiselic Date: Tue, 23 Jan 2018 15:31:57 +0100 Subject: [PATCH] Add run_pokec script and minimum refactor Summary: * add run_pokec script because more than one step is required * refactor of plot_throughput script * move all plot scripts under tools/plot Reviewers: mferencevic, teon.banek, mislav.bradac Reviewed By: mferencevic Subscribers: florijan, pullbot, buda Differential Revision: https://phabricator.memgraph.io/D1106 --- tests/macro_benchmark/.gitignore | 1 + .../macro_benchmark/groups/pokec/config.json | 2 +- .../download_dataset} | 3 -- tests/macro_benchmark/harness | 7 ++++ tests/macro_benchmark/long_running_suite.py | 21 +++++------- tests/macro_benchmark/run_pokec | 22 ++++++++++++ tests/public_benchmark/ldbc/apollo_runs.yaml | 2 +- .../ldbc/continuous_integration | 4 +-- tools/{plot_gbench_json => plot/gbench_json} | 0 .../{plot_ldbc_latency => plot/ldbc_latency} | 2 +- .../{plot_througput => plot/pokec_throughput} | 34 ++++++++----------- 11 files changed, 58 insertions(+), 40 deletions(-) rename tests/macro_benchmark/groups/{download_datasets => pokec/download_dataset} (64%) create mode 100755 tests/macro_benchmark/run_pokec rename tools/{plot_gbench_json => plot/gbench_json} (100%) rename tools/{plot_ldbc_latency => plot/ldbc_latency} (99%) rename tools/{plot_througput => plot/pokec_throughput} (76%) diff --git a/tests/macro_benchmark/.gitignore b/tests/macro_benchmark/.gitignore index 7855f07fe..d6570241d 100644 --- a/tests/macro_benchmark/.gitignore +++ b/tests/macro_benchmark/.gitignore @@ -1,2 +1,3 @@ .storage/ +.results/ .harness_summary diff --git a/tests/macro_benchmark/groups/pokec/config.json b/tests/macro_benchmark/groups/pokec/config.json index fcafa44ac..ec6cf94bd 100644 --- a/tests/macro_benchmark/groups/pokec/config.json +++ b/tests/macro_benchmark/groups/pokec/config.json @@ -1,3 +1,3 @@ { - "duration": 30 + "duration": 60 } diff --git a/tests/macro_benchmark/groups/download_datasets b/tests/macro_benchmark/groups/pokec/download_dataset similarity index 64% rename from tests/macro_benchmark/groups/download_datasets rename to tests/macro_benchmark/groups/pokec/download_dataset index 3f55c90ec..cd315c5f1 100755 --- a/tests/macro_benchmark/groups/download_datasets +++ b/tests/macro_benchmark/groups/pokec/download_dataset @@ -3,7 +3,4 @@ working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" cd ${working_dir} -cd pokec wget -nv -O pokec_small.setup.cypher http://deps.memgraph.io/pokec_small.setup.cypher -wget -nv -O pokec_medium.setup.cypher http://deps.memgraph.io/pokec_medium.setup.cypher -cd .. diff --git a/tests/macro_benchmark/harness b/tests/macro_benchmark/harness index 523213396..0170f5300 100755 --- a/tests/macro_benchmark/harness +++ b/tests/macro_benchmark/harness @@ -259,6 +259,13 @@ def main(): with open(get_absolute_path(".harness_summary"), "w") as f: json.dump({"results": results, "headers": suite.headers}, f) + # The if block is here because the results from all suites + # aren't compatible with the export below. + if type(suite) not in [QuerySuite, QueryParallelSuite]: + log.warning("The results from the suite " + "aren't compatible with the apollo measurements export.") + return + # Export data points. with open(get_absolute_path(".apollo_measurements"), "w") as f: headers = list(suite.headers) diff --git a/tests/macro_benchmark/long_running_suite.py b/tests/macro_benchmark/long_running_suite.py index 7d71f8d94..dd57a5ce1 100644 --- a/tests/macro_benchmark/long_running_suite.py +++ b/tests/macro_benchmark/long_running_suite.py @@ -1,11 +1,5 @@ import logging -import os -import time -import itertools -import json from argparse import ArgumentParser -from collections import defaultdict -from statistics import median from common import get_absolute_path, APOLLO from databases import Memgraph, Neo from clients import QueryClient, LongRunningClient @@ -15,6 +9,7 @@ log = logging.getLogger(__name__) class LongRunningSuite: KNOWN_KEYS = {"config", "setup", "run"} + headers = ["elapsed_time", "num_executed_queries"] def __init__(self, args): argp = ArgumentParser("LongRunningSuiteArgumentParser") @@ -45,17 +40,17 @@ class LongRunningSuite: for result in results: self.summary += summary_format.format( result["elapsed_time"], result["num_executed_queries"]) - # TODO: Revise this. measurements.append({ "target": "throughput", - "value": result["num_executed_queries"] / result["elapsed_time"], - "unit": "queries per second", + "time": result["elapsed_time"], + "value": result["num_executed_queries"], + "unit": "number of executed queries", "type": "throughput"}) self.summary += "\n\nThroughtput: " + str(measurements[-1]["value"]) return measurements def runners(self): - return { "MemgraphRunner" : MemgraphRunner, "NeoRunner" : NeoRunner } + return {"MemgraphRunner": MemgraphRunner, "NeoRunner": NeoRunner} def groups(self): return ["pokec"] @@ -100,9 +95,9 @@ class MemgraphRunner(_LongRunningRunner): help="Number of clients") self.args, remaining_args = argp.parse_known_args(args) assert not APOLLO or self.args.num_database_workers, \ - "--num-database-workers is obligatory flag on apollo" + "--num-database-workers is obligatory flag on apollo" assert not APOLLO or self.args.num_client_workers, \ - "--num-client-workers is obligatory flag on apollo" + "--num-client-workers is obligatory flag on apollo" database = Memgraph(remaining_args, self.args.runner_config, self.args.num_database_workers) super(MemgraphRunner, self).__init__( @@ -122,7 +117,7 @@ class NeoRunner(_LongRunningRunner): help="Number of clients") self.args, remaining_args = argp.parse_known_args(args) assert not APOLLO or self.args.num_client_workers, \ - "--client-num-clients is obligatory flag on apollo" + "--client-num-clients is obligatory flag on apollo" database = Neo(remaining_args, self.args.runner_config) super(NeoRunner, self).__init__( remaining_args, database, self.args.num_client_workers) diff --git a/tests/macro_benchmark/run_pokec b/tests/macro_benchmark/run_pokec new file mode 100755 index 000000000..e4cb63de6 --- /dev/null +++ b/tests/macro_benchmark/run_pokec @@ -0,0 +1,22 @@ +#!/bin/bash -e + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Run pokec bench (download dataset, run neo and memgraph, plot the results). + +cd ${script_dir} +mkdir -p .results/pokec/ + +${script_dir}/groups/pokec/download_dataset + +./harness LongRunningSuite MemgraphRunner --groups pokec +mv .harness_summary ${script_dir}/.results/pokec/memgraph.summary + +./harness LongRunningSuite NeoRunner --groups pokec +mv .harness_summary ${script_dir}/.results/pokec/neo4j.summary + +../../tools/plot/pokec_throughput \ + --vendor-references neo4j memgraph \ + --vendor-titles Neo4j Memgraph \ + --results ${script_dir}/.results/pokec/neo4j.summary ${script_dir}/.results/pokec/memgraph.summary \ + --plot-title "Pokec Small" --window-size 1 diff --git a/tests/public_benchmark/ldbc/apollo_runs.yaml b/tests/public_benchmark/ldbc/apollo_runs.yaml index 1f7cd3d3b..c98f375f0 100644 --- a/tests/public_benchmark/ldbc/apollo_runs.yaml +++ b/tests/public_benchmark/ldbc/apollo_runs.yaml @@ -7,7 +7,7 @@ - ../../../config # directory with config files - ../../../libs/neo4j # neo4j directory - ../../../tools/mg_import_csv # memgraph csv import tool - - ../../../tools/plot_ldbc_latency # ldbc plot generation tool + - ../../../tools/plot/ldbc_latency # ldbc plot generation tool outfile_paths: # TODO: maybe this should also accept relative paths? - \./memgraph/tests/public_benchmark/ldbc/results/.+ - \./memgraph/tests/public_benchmark/ldbc/plots/.+ diff --git a/tests/public_benchmark/ldbc/continuous_integration b/tests/public_benchmark/ldbc/continuous_integration index ad99aecf0..520aeb8bb 100644 --- a/tests/public_benchmark/ldbc/continuous_integration +++ b/tests/public_benchmark/ldbc/continuous_integration @@ -9,12 +9,12 @@ TIMEOUT=3600 ./build_dataset # run read benchmarks TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix read TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix read -./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png +./ve3/bin/python3 ../../../tools/plot/ldbc_latency --results results/read-memgraph-scale_1-LDBC-results.json results/read-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Read queries, scale 1" --output plots/read-queries-scale_1.png # run update benchmarks TIMEOUT=3600 ./run_benchmark --run-db memgraph --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200 TIMEOUT=3600 ./run_benchmark --run-db neo4j --create-index --thread-count $THREADS --result-file-prefix update --test-type updates --time-compression-ratio 1.5 --operation-count 200 -./ve3/bin/python3 ../../../tools/plot_ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png +./ve3/bin/python3 ../../../tools/plot/ldbc_latency --results results/update-memgraph-scale_1-LDBC-results.json results/update-neo4j-scale_1-LDBC-results.json --logo-path plots/ldbc-logo.png --plot-title "Update queries, scale 1" --output plots/update-queries-scale_1.png # convert results to Apollo measurements ./convert_results diff --git a/tools/plot_gbench_json b/tools/plot/gbench_json similarity index 100% rename from tools/plot_gbench_json rename to tools/plot/gbench_json diff --git a/tools/plot_ldbc_latency b/tools/plot/ldbc_latency similarity index 99% rename from tools/plot_ldbc_latency rename to tools/plot/ldbc_latency index c3c73f451..2850aa228 100755 --- a/tools/plot_ldbc_latency +++ b/tools/plot/ldbc_latency @@ -20,7 +20,7 @@ from matplotlib.cbook import get_sample_data SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) -COLORS = ['#ff7300', '#008cc2'] # TODO: add more colors! +COLORS = ['#ff7300', '#008cc2'] LDBC_TIME_FACTORS = { "SECONDS": 1.0, "MILLISECONDS": 1000.0, diff --git a/tools/plot_througput b/tools/plot/pokec_throughput similarity index 76% rename from tools/plot_througput rename to tools/plot/pokec_throughput index 5684cb431..b6eb5fee1 100755 --- a/tools/plot_througput +++ b/tools/plot/pokec_throughput @@ -1,17 +1,17 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# -# Example usage: -# ./plot_througput --vendor-references neo4j memgraph \ -# --vendor-titles neo4j memgraph \ -# --results neo_to_plot memgraph_to_plot \ -# --plot-title "Pokec small" --window-size 10 + +''' +Example usage: +./pokec_throughput --vendor-references neo4j memgraph \ + --vendor-titles Neo4j Memgraph \ + --results neo_to_plot memgraph_to_plot \ + --plot-title "Pokec small" --window-size 10 +''' import json -import os import matplotlib.pyplot as plt -from matplotlib.cbook import get_sample_data from argparse import ArgumentParser COLORS = { @@ -57,29 +57,25 @@ def main(): ax.set_ylabel('Throughput (queries per second)') ax.set_xlabel('Time (seconds)') ax.set_title(args.plot_title) - ax.set_aspect(0.01) # Collect the benchmark data and plot lines. print("Pokec throughput") for vendor_reference, vendor_data in vendors.items(): print("Vendor: %s" % vendor_reference) with open(vendor_data['results_path']) as results_file: + results = json.load(results_file)['results'][0] # Skip first line which contains titles. - prev_time, prev_num_queries = 0.0, 0 - for line in results_file.readlines()[1:]: - data = line.split() - if data == []: break - assert len(data) == 2, "Invalid data" - vendor_data['t'].append(float(data[0])) - vendor_data['q'].append(int(data[1])) + for measurement in results: + vendor_data['t'].append(float(measurement['time'])) + vendor_data['q'].append(int(measurement['value'])) for i in range(1, len(vendor_data['t'])): j = max(0, i - args.window_size) vendor_data['dq/dt'].append( (vendor_data['q'][i] - vendor_data['q'][j]) / (vendor_data['t'][i] - vendor_data['t'][j])) - - line1, = ax.plot(vendor_data['t'], vendor_data['dq/dt'], '-', linewidth=2, - label=vendor_data['title'], color=vendor_data['color']) + line1, = ax.plot(vendor_data['t'], vendor_data['dq/dt'], '-', + linewidth=2, label=vendor_data['title'], + color=vendor_data['color']) ax.legend(loc='lower right') plt.grid()