From f194160d7ce755807c99eca3ad13de652c62a81c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 17 Feb 2023 17:33:24 +0100 Subject: [PATCH] Add detailed description about the query_performance binary --- tests/manual/query_performance.cpp | 72 +++++++++++++++++++++++-- tests/mgbench/dataset_creator.py | 20 +++++-- tests/mgbench/dataset_creator_unwind.py | 20 +++++-- 3 files changed, 100 insertions(+), 12 deletions(-) diff --git a/tests/manual/query_performance.cpp b/tests/manual/query_performance.cpp index 5fe6a247c..fe0308f82 100644 --- a/tests/manual/query_performance.cpp +++ b/tests/manual/query_performance.cpp @@ -9,6 +9,65 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. +// This binary is meant to easily compare the performance of: +// - Memgraph v2 +// - Memgraph v3 +// - Memgraph v3 with MultiFrame +// This binary measures three things which provides a high level and easily understandable metric about the performance +// difference between the different versions: +// 1. Read time: how much time does it take to read the files: +// 2. Init time: how much time does it take to run the init queries, including the index creation. For details please +// check RunV2. +// 3. Benchmark time: how much time does it take to run the benchmark queries. +// To quickly compare performance of the different versions just change the query or queries in the benchmark queries +// file you can see the different by running this executable. This way we don't have keep multiple binaries of Memgraph +// v2 and Memgraph v3 with/without MultiFrame, start Memgraph and connect to it with mgconsole and other hassles. As +// everything is run in this binary, it makes easier to generate perf reports/flamegraphs from the query execution of +// different Memgraph versions compared to using the full blown version of Memgraph. +// +// A few important notes: +// - All the input files are mandated to have an empty line at the end of the file as the reading logic expect that. +// - tests/mgbench/dataset_creator_unwind.py is recommended to generate the dataset because it generates queries with +// UNWIND that makes the import faster in Memgraph v3, thus we can compare the performance on non trivial datasets +// also. To make it possible to use the generated dataset, you have to move the generated index queries into a +// separate file that can be supplied as index queries file for this binary when using Memgraph v2. The reason for +// this is Memgraph v3 cannot handle indices yet, thus it crashes. +// - Check the command line flags and their description defined in this file. +// - Also check out the --default-multi-frame-size command line flag if you want to play with that. +// - The log level is manually set to warning in the main function to avoid the overwhelming log messages from Memgraph +// v3. Apart from ease of use, the huge amount of looging can degrade the actual performance. +// +// Example usage with Memgraph v2: +// ./query_performance +// --index-queries-file indices.cypher +// --init-queries-file dataset.cypher +// --benchmark-queries-file benchmark_queries.txt +// --use-v3=false +// +// Example usage with Memgraph v3 without MultiFrame: +// ./query_performance +// --split-file split_file +// --init-queries-file dataset.cypher +// --benchmark-queries-file benchmark_queries.txt +// --use-v3=true +// --use-mutli-frame=false +// +// Example usage with Memgraph v3 with MultiFrame: +// ./query_performance +// --split-file split_file +// --init-queries-file dataset.cypher +// --benchmark-queries-file benchmark_queries.txt +// --use-v3=true +// --use-mutli-frame=true +// +// The examples are using only the necessary flags, however specifying all of them is not a problem, so if you specify +// --index-queries-file for Memgraph v3, then it will be safely ignored just as --split-file for Memgraph v2. +// +// To generate flamegraph you can use the following command: +// flamegraph --cmd "record -F 997 --call-graph fp -g" --root -o flamegraph.svg -- ./query_performance +// Using the default option (dwarf) for --call-graph when calling perf might result in too long runtine of flamegraph +// because of address resolution. See https://github.com/flamegraph-rs/flamegraph/issues/74. + #include #include #include @@ -33,15 +92,20 @@ // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_string(index_queries_file, "", - "Path to the file which contains the queries to create indices. Used only for v2."); + "Path to the file which contains the queries to create indices. Used only for v2. Must contain an empty " + "line at the end of the file after the queries."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_string(split_file, "", "Path to the split file which contains the predefined labels, properties, edge types and shard-ranges. " - "Used only for v3."); + "Used only for v3. Must contain an empty line at the end of the file."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_string(init_queries_file, "", "Path to the file that is used to insert the initial dataset."); +DEFINE_string(init_queries_file, "", + "Path to the file that is used to insert the initial dataset, one query per line. Must contain an empty " + "line at the end of the file after the queries."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_string(benchmark_queries_file, "", "Path to the file that contains the queries that we want to measure."); +DEFINE_string(benchmark_queries_file, "", + "Path to the file that contains the queries that we want to compare, one query per line. Must contain an " + "empty line at the end of the file after the queries."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_bool(use_v3, true, "If set to true, then Memgraph v3 will be used, otherwise Memgraph v2 will be used."); diff --git a/tests/mgbench/dataset_creator.py b/tests/mgbench/dataset_creator.py index 9ebeb8cd1..0b73eceed 100644 --- a/tests/mgbench/dataset_creator.py +++ b/tests/mgbench/dataset_creator.py @@ -51,10 +51,22 @@ import helpers def main(): parser = argparse.ArgumentParser() - parser.add_argument("--number_of_identities", type=int, default=10) - parser.add_argument("--number_of_files", type=int, default=10) - parser.add_argument("--percentage_of_permissions", type=float, default=1.0) - parser.add_argument("--filename", default="dataset.cypher") + parser.add_argument( + "--number_of_identities", + type=int, + default=10, + help="Determines how many :Identity nodes will the dataset contain.", + ) + parser.add_argument( + "--number_of_files", type=int, default=10, help="Determines how many :File nodes will the dataset contain." + ) + parser.add_argument( + "--percentage_of_permissions", + type=float, + default=1.0, + help="Determines approximately what percentage of the all possible identity-permission-file connections will be created.", + ) + parser.add_argument("--filename", default="dataset.cypher", help="The name of the output file.") args = parser.parse_args() diff --git a/tests/mgbench/dataset_creator_unwind.py b/tests/mgbench/dataset_creator_unwind.py index 564a4d018..00de1c7bf 100644 --- a/tests/mgbench/dataset_creator_unwind.py +++ b/tests/mgbench/dataset_creator_unwind.py @@ -51,10 +51,22 @@ import helpers def main(): parser = argparse.ArgumentParser() - parser.add_argument("--number_of_identities", type=int, default=10) - parser.add_argument("--number_of_files", type=int, default=10) - parser.add_argument("--percentage_of_permissions", type=float, default=1.0) - parser.add_argument("--filename", default="dataset.cypher") + parser.add_argument( + "--number_of_identities", + type=int, + default=10, + help="Determines how many :Identity nodes will the dataset contain.", + ) + parser.add_argument( + "--number_of_files", type=int, default=10, help="Determines how many :File nodes will the dataset contain." + ) + parser.add_argument( + "--percentage_of_permissions", + type=float, + default=1.0, + help="Determines approximately what percentage of the all possible identity-permission-file connections will be created.", + ) + parser.add_argument("--filename", default="dataset.cypher", help="The name of the output file.") args = parser.parse_args()