Include both v2 and v3 in query performance test

2023-02-13 23:33:01 +01:00 · 2023-02-13 23:33:01 +01:00 · fe14a8674c
commit fe14a8674c
parent 1bc93b64f4
2 changed files with 128 additions and 25 deletions
--- a/tests/manual/CMakeLists.txt
+++ b/tests/manual/CMakeLists.txt
@ -9,6 +9,7 @@ function(add_manual_test test_cpp)
  get_filename_component(exec_name ${test_cpp} NAME_WE)
  set(target_name ${test_prefix}${exec_name})
  add_executable(${target_name} ${test_cpp} ${ARGN})
+
  # OUTPUT_NAME sets the real name of a target when it is built and can be
  # used to help create two targets of the same name even though CMake
  # requires unique logical target names
@ -21,7 +22,7 @@ target_link_libraries(${test_prefix}antlr_parser antlr_opencypher_parser_lib)

 add_manual_test(antlr_sigsegv.cpp)
 target_link_libraries(${test_prefix}antlr_sigsegv gtest gtest_main
-                      antlr_opencypher_parser_lib mg-utils)
+  antlr_opencypher_parser_lib mg-utils)

 add_manual_test(antlr_tree_pretty_print.cpp)
 target_link_libraries(${test_prefix}antlr_tree_pretty_print antlr_opencypher_parser_lib)
@ -37,13 +38,15 @@ target_link_libraries(${test_prefix}query_hash mg-query)

 add_manual_test(query_planner.cpp interactive/planning.cpp)
 target_link_libraries(${test_prefix}query_planner mg-query)
-if (READLINE_FOUND)
+
+if(READLINE_FOUND)
  target_link_libraries(${test_prefix}query_planner readline)
 endif()

 add_manual_test(query_execution_dummy.cpp)
 target_link_libraries(${test_prefix}query_execution_dummy mg-query)
-if (READLINE_FOUND)
+
+if(READLINE_FOUND)
  target_link_libraries(${test_prefix}query_execution_dummy readline)
 endif()

@ -63,4 +66,4 @@ add_manual_test(ssl_server.cpp)
 target_link_libraries(${test_prefix}ssl_server mg-communication)

 add_manual_test(query_performance.cpp)
-target_link_libraries(${test_prefix}query_performance mg-communication mg-utils mg-io mg-io-simulator mg-coordinator mg-query-v2 mg-storage-v3)
+target_link_libraries(${test_prefix}query_performance mg-communication mg-utils mg-io mg-io-simulator mg-coordinator mg-query-v2 mg-storage-v3 mg-query mg-storage-v2)
--- a/tests/manual/query_performance.cpp
+++ b/tests/manual/query_performance.cpp
@ -17,34 +17,61 @@
 #include <spdlog/cfg/env.h>
 #include <spdlog/spdlog.h>

+// v3 includes
 #include "io/address.hpp"
 #include "io/local_transport/local_system.hpp"
 #include "io/message_histogram_collector.hpp"
 #include "machine_manager/machine_manager.hpp"
+#include "query/discard_value_stream.hpp"
 #include "query/v2/discard_value_stream.hpp"
 #include "query/v2/interpreter.hpp"
 #include "query/v2/request_router.hpp"

+// v2 includes
+#include "query/interpreter.hpp"
+#include "storage/v2/storage.hpp"
+
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+DEFINE_string(index_queries_file, "",
+              "Path to the file which contains the queries to create indices. Used only for v2.");
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
 DEFINE_string(split_file, "",
-              "Path to the split file which contains the predefined labels, properties, edge types and shard-ranges.");
+              "Path to the split file which contains the predefined labels, properties, edge types and shard-ranges. "
+              "Used only for v3.");
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
-DEFINE_string(init_queries_file, "",
-              "Path to the split file which contains the predefined labels, properties, edge types and shard-ranges.");
+DEFINE_string(init_queries_file, "", "Path to the file that is used to insert the initial dataset.");
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
-DEFINE_string(benchmark_queries_file, "",
-              "Path to the split file which contains the predefined labels, properties, edge types and shard-ranges.");
+DEFINE_string(benchmark_queries_file, "", "Path to the file that contains the queries that we want to measure.");
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+DEFINE_bool(use_v3, true, "If set to true, then Memgraph v3 will be used, otherwise Memgraph v2 will be used.");

 namespace memgraph::tests::manual {

-using io::LatencyHistogramSummaries;
-using query::v2::DiscardValueResultStream;
-using query::v2::Interpreter;
-using query::v2::InterpreterContext;
+template <typename TInterpreterContext>
+struct DependantTypes {};

-void RunQueries(InterpreterContext &interpreter_context, const std::vector<std::string> &queries) {
-  Interpreter interpreter{&interpreter_context};
-  DiscardValueResultStream stream;
+template <>
+struct DependantTypes<query::InterpreterContext> {
+  using Interpreter = query::Interpreter;
+  using DiscardValueResultStream = query::DiscardValueResultStream;
+};
+
+template <>
+struct DependantTypes<query::v2::InterpreterContext> {
+  using Interpreter = query::v2::Interpreter;
+  using DiscardValueResultStream = query::v2::DiscardValueResultStream;
+};
+
+template <typename TInterpreterContext>
+using Interpreter = typename DependantTypes<TInterpreterContext>::Interpreter;
+
+template <typename TInterpreterContext>
+using DiscardValueResultStream = typename DependantTypes<TInterpreterContext>::DiscardValueResultStream;
+
+template <typename TInterpreterContext>
+void RunQueries(TInterpreterContext &interpreter_context, const std::vector<std::string> &queries) {
+  Interpreter<TInterpreterContext> interpreter{&interpreter_context};
+  DiscardValueResultStream<TInterpreterContext> stream;

  for (const auto &query : queries) {
    auto result = interpreter.Prepare(query, {}, nullptr);
@ -52,15 +79,84 @@ void RunQueries(InterpreterContext &interpreter_context, const std::vector<std::
  }
 }

-void RunInitQueries(InterpreterContext &interpreter_context, const std::vector<std::string> &init_queries) {
+template <typename TInterpreterContext>
+void RunInitQueries(TInterpreterContext &interpreter_context, const std::vector<std::string> &init_queries) {
  RunQueries(interpreter_context, init_queries);
 }

-void RunBenchmarkQueries(InterpreterContext &interpreter_context, const std::vector<std::string> &benchmark_queries) {
+template <typename TInterpreterContext>
+void RunBenchmarkQueries(TInterpreterContext &interpreter_context, const std::vector<std::string> &benchmark_queries) {
  RunQueries(interpreter_context, benchmark_queries);
 }

-LatencyHistogramSummaries Run() {
+void RunV2() {
+  const auto run_start = std::chrono::high_resolution_clock::now();
+
+  std::vector<std::string> init_queries{};
+  std::string buffer;
+
+  std::ifstream indices_file{FLAGS_index_queries_file, std::ios::in};
+  MG_ASSERT(indices_file.good(), "Cannot open index queries file to read: {}", FLAGS_index_queries_file);
+  while (indices_file.good()) {
+    std::getline(indices_file, buffer);
+    if (buffer.empty()) {
+      continue;
+    }
+    // Trim the trailing `;`
+    init_queries.push_back(buffer.substr(0, buffer.size() - 1));
+  }
+
+  std::ifstream init_file{FLAGS_init_queries_file, std::ios::in};
+  MG_ASSERT(init_file.good(), "Cannot open init queries file to read: {}", FLAGS_init_queries_file);
+  while (init_file.good()) {
+    std::getline(init_file, buffer);
+    if (buffer.empty()) {
+      continue;
+    }
+    // Trim the trailing `;`
+    init_queries.push_back(buffer.substr(0, buffer.size() - 1));
+  }
+
+  std::ifstream benchmark_file{FLAGS_benchmark_queries_file, std::ios::in};
+  MG_ASSERT(benchmark_file.good(), "Cannot open benchmark queries file to read: {}", FLAGS_benchmark_queries_file);
+  std::vector<std::string> benchmark_queries{};
+
+  while (benchmark_file.good()) {
+    std::getline(benchmark_file, buffer);
+    if (buffer.empty()) {
+      continue;
+    }
+    // Trim the trailing `;`
+    benchmark_queries.push_back(buffer.substr(0, buffer.size() - 1));
+  }
+  storage::Storage storage{
+      storage::Config{.durability{.snapshot_wal_mode = storage::Config::Durability::SnapshotWalMode::DISABLED}}};
+
+  memgraph::query::InterpreterContext interpreter_context{
+      &storage,
+      {.query = {.allow_load_csv = false},
+       .execution_timeout_sec = 0,
+       .replication_replica_check_frequency = std::chrono::seconds(0),
+       .default_kafka_bootstrap_servers = "",
+       .default_pulsar_service_url = "",
+       .stream_transaction_conflict_retries = 0,
+       .stream_transaction_retry_interval = std::chrono::milliseconds(0)},
+      "query_performance_data"};
+
+  const auto init_start = std::chrono::high_resolution_clock::now();
+  RunInitQueries(interpreter_context, init_queries);
+  const auto benchmark_start = std::chrono::high_resolution_clock::now();
+  spdlog::critical("Read: {}ms", std::chrono::duration_cast<std::chrono::milliseconds>(init_start - run_start).count());
+  RunBenchmarkQueries(interpreter_context, benchmark_queries);
+  const auto benchmark_end = std::chrono::high_resolution_clock::now();
+
+  spdlog::critical("Init: {}ms",
+                   std::chrono::duration_cast<std::chrono::milliseconds>(benchmark_start - init_start).count());
+  spdlog::critical("Benchmark: {}ms",
+                   std::chrono::duration_cast<std::chrono::milliseconds>(benchmark_end - benchmark_start).count());
+}
+
+io::LatencyHistogramSummaries RunV3() {
  const auto run_start = std::chrono::high_resolution_clock::now();
  std::ifstream sm_file{FLAGS_split_file, std::ios::in};
  MG_ASSERT(sm_file.good(), "Cannot open split file to read: {}", FLAGS_split_file);
@ -113,11 +209,11 @@ LatencyHistogramSummaries Run() {

  auto rr_factory = std::make_unique<memgraph::query::v2::LocalRequestRouterFactory>(io);

-  InterpreterContext interpreter_context{(memgraph::storage::v3::Shard *)(nullptr),
-                                         {.execution_timeout_sec = 0},
-                                         "data",
-                                         std::move(rr_factory),
-                                         mm.CoordinatorAddress()};
+  query::v2::InterpreterContext interpreter_context{(memgraph::storage::v3::Shard *)(nullptr),
+                                                    {.execution_timeout_sec = 0},
+                                                    "data",
+                                                    std::move(rr_factory),
+                                                    mm.CoordinatorAddress()};

  // without this it fails sometimes because the CreateVertices request might reach the shard worker faster than the
  // ShardToInitialize
@ -142,6 +238,10 @@ LatencyHistogramSummaries Run() {
 int main(int argc, char **argv) {
  spdlog::cfg::load_env_levels();
  gflags::ParseCommandLineFlags(&argc, &argv, true);
-  memgraph::tests::manual::Run();
+  if (FLAGS_use_v3) {
+    memgraph::tests::manual::RunV3();
+  } else {
+    memgraph::tests::manual::RunV2();
+  }
  return 0;
 }