memgraph/tests/simulation/test_cluster.hpp
gvolfing 41bb988fe9 Fix failing benchmark tests and implement cursor
The benchmarking tests were failing because of the incorrect
implementation of the ScanAllByPrimaryKeyCursor. The previous
implementation caused the currently allocateable 1m edgeids to run out
very quickly, causing the the tests to freeze.
2023-01-12 14:14:59 +01:00

283 lines
10 KiB
C++

// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include <chrono>
#include <iostream>
#include <limits>
#include <memory>
#include <set>
#include <thread>
#include <rapidcheck.h>
#include "cluster_config.hpp"
#include "coordinator/coordinator_client.hpp"
#include "coordinator/coordinator_rsm.hpp"
#include "coordinator/shard_map.hpp"
#include "generated_operations.hpp"
#include "io/address.hpp"
#include "io/message_histogram_collector.hpp"
#include "io/simulator/simulator.hpp"
#include "io/simulator/simulator_config.hpp"
#include "io/simulator/simulator_transport.hpp"
#include "machine_manager/machine_config.hpp"
#include "machine_manager/machine_manager.hpp"
#include "query/v2/request_router.hpp"
#include "query/v2/requests.hpp"
#include "testing_constants.hpp"
#include "utils/print_helpers.hpp"
#include "utils/variant_helpers.hpp"
namespace memgraph::tests::simulation {
using coordinator::Coordinator;
using coordinator::CoordinatorClient;
using coordinator::CoordinatorReadRequests;
using coordinator::CoordinatorWriteRequests;
using coordinator::CoordinatorWriteResponses;
using coordinator::GetShardMapRequest;
using coordinator::GetShardMapResponse;
using coordinator::Hlc;
using coordinator::HlcResponse;
using coordinator::ShardMap;
using coordinator::ShardMetadata;
using io::Address;
using io::Io;
using io::rsm::RsmClient;
using io::simulator::Simulator;
using io::simulator::SimulatorConfig;
using io::simulator::SimulatorStats;
using io::simulator::SimulatorTransport;
using machine_manager::MachineConfig;
using machine_manager::MachineManager;
using memgraph::io::LatencyHistogramSummaries;
using msgs::ReadRequests;
using msgs::ReadResponses;
using msgs::WriteRequests;
using msgs::WriteResponses;
using storage::v3::LabelId;
using storage::v3::SchemaProperty;
using CompoundKey = std::pair<int, int>;
using ShardClient = RsmClient<SimulatorTransport, WriteRequests, WriteResponses, ReadRequests, ReadResponses>;
MachineManager<SimulatorTransport> MkMm(Simulator &simulator, std::vector<Address> coordinator_addresses, Address addr,
ShardMap shard_map) {
MachineConfig config{
.coordinator_addresses = coordinator_addresses,
.is_storage = true,
.is_coordinator = true,
.listen_ip = addr.last_known_ip,
.listen_port = addr.last_known_port,
.shard_worker_threads = 4,
.sync_message_handling = true,
};
Io<SimulatorTransport> io = simulator.Register(addr);
Coordinator coordinator{shard_map};
return MachineManager{io, config, coordinator};
}
void RunMachine(MachineManager<SimulatorTransport> mm) { mm.Run(); }
void WaitForShardsToInitialize(CoordinatorClient<SimulatorTransport> &coordinator_client) {
// Call coordinator client's read method for GetShardMap and keep
// reading it until the shard map contains proper replicas for
// each shard in the label space.
while (true) {
GetShardMapRequest req{};
CoordinatorReadRequests read_req = req;
auto read_res = coordinator_client.SendReadRequest(read_req);
if (read_res.HasError()) {
// timed out
continue;
}
auto response_result = read_res.GetValue();
auto response = std::get<GetShardMapResponse>(response_result);
auto shard_map = response.shard_map;
if (shard_map.ClusterInitialized()) {
spdlog::info("cluster stabilized - beginning workload");
return;
}
}
}
ShardMap TestShardMap(int n_splits, int replication_factor) {
ShardMap sm{};
const std::string label_name = std::string("test_label");
// register new properties
const std::vector<std::string> property_names = {"property_1", "property_2"};
const auto properties = sm.AllocatePropertyIds(property_names);
const auto property_id_1 = properties.at("property_1");
const auto property_id_2 = properties.at("property_2");
const auto type_1 = memgraph::common::SchemaType::INT;
const auto type_2 = memgraph::common::SchemaType::INT;
// register new label space
std::vector<SchemaProperty> schema = {
SchemaProperty{.property_id = property_id_1, .type = type_1},
SchemaProperty{.property_id = property_id_2, .type = type_2},
};
std::optional<LabelId> label_id = sm.InitializeNewLabel(label_name, schema, replication_factor, sm.shard_map_version);
RC_ASSERT(label_id.has_value());
// split the shard at N split points
for (int64_t i = 1; i < n_splits; ++i) {
const auto key1 = memgraph::storage::v3::PropertyValue(i);
const auto key2 = memgraph::storage::v3::PropertyValue(0);
const auto split_point = {key1, key2};
const bool split_success = sm.SplitShard(sm.shard_map_version, label_id.value(), split_point);
RC_ASSERT(split_success);
}
return sm;
}
void ExecuteOp(query::v2::RequestRouter<SimulatorTransport> &request_router, std::set<CompoundKey> &correctness_model,
CreateVertex create_vertex) {
const auto key1 = memgraph::storage::v3::PropertyValue(create_vertex.first);
const auto key2 = memgraph::storage::v3::PropertyValue(create_vertex.second);
std::vector<msgs::Value> primary_key = {msgs::Value(int64_t(create_vertex.first)),
msgs::Value(int64_t(create_vertex.second))};
if (correctness_model.contains(std::make_pair(create_vertex.first, create_vertex.second))) {
// TODO(tyler) remove this early-return when we have properly handled setting non-unique vertexes
return;
}
auto label_id = request_router.NameToLabel("test_label");
msgs::NewVertex nv{.primary_key = primary_key};
nv.label_ids.push_back({label_id});
std::vector<msgs::NewVertex> new_vertices;
new_vertices.push_back(std::move(nv));
auto result = request_router.CreateVertices(std::move(new_vertices));
RC_ASSERT(result.size() == 1);
RC_ASSERT(!result[0].error.has_value());
correctness_model.emplace(std::make_pair(create_vertex.first, create_vertex.second));
}
void ExecuteOp(query::v2::RequestRouter<SimulatorTransport> &request_router, std::set<CompoundKey> &correctness_model,
ScanAll scan_all) {
auto results = request_router.ScanVertices("test_label", std::nullopt);
RC_ASSERT(results.size() == correctness_model.size());
for (const auto &vertex_accessor : results) {
const auto properties = vertex_accessor.Properties();
const auto primary_key = vertex_accessor.Id().second;
const CompoundKey model_key = std::make_pair(primary_key[0].int_v, primary_key[1].int_v);
RC_ASSERT(correctness_model.contains(model_key));
}
}
/// This struct exists as a way of detaching
/// a thread if something causes an uncaught
/// exception - because that thread would not
/// receive a ShutDown message otherwise, and
/// would cause the test to hang forever.
struct DetachIfDropped {
std::jthread &handle;
bool detach = true;
~DetachIfDropped() {
if (detach && handle.joinable()) {
handle.detach();
}
}
};
std::pair<SimulatorStats, LatencyHistogramSummaries> RunClusterSimulation(const SimulatorConfig &sim_config,
const ClusterConfig &cluster_config,
const std::vector<Op> &ops) {
spdlog::info("========================== NEW SIMULATION ==========================");
auto simulator = Simulator(sim_config);
auto machine_1_addr = Address::TestAddress(1);
auto cli_addr = Address::TestAddress(2);
auto cli_addr_2 = Address::TestAddress(3);
Io<SimulatorTransport> cli_io = simulator.Register(cli_addr);
Io<SimulatorTransport> cli_io_2 = simulator.Register(cli_addr_2);
auto coordinator_addresses = std::vector{
machine_1_addr,
};
ShardMap initialization_sm = TestShardMap(cluster_config.shards - 1, cluster_config.replication_factor);
auto mm_1 = MkMm(simulator, coordinator_addresses, machine_1_addr, initialization_sm);
Address coordinator_address = mm_1.CoordinatorAddress();
auto mm_thread_1 = std::jthread(RunMachine, std::move(mm_1));
simulator.IncrementServerCountAndWaitForQuiescentState(machine_1_addr);
auto detach_on_error = DetachIfDropped{.handle = mm_thread_1};
// TODO(tyler) clarify addresses of coordinator etc... as it's a mess
CoordinatorClient<SimulatorTransport> coordinator_client(cli_io, coordinator_address, {coordinator_address});
WaitForShardsToInitialize(coordinator_client);
query::v2::RequestRouter<SimulatorTransport> request_router(std::move(coordinator_client), std::move(cli_io));
std::function<bool()> tick_simulator = simulator.GetSimulatorTickClosure();
request_router.InstallSimulatorTicker(tick_simulator);
request_router.StartTransaction();
auto correctness_model = std::set<CompoundKey>{};
for (const Op &op : ops) {
std::visit([&](auto &o) { ExecuteOp(request_router, correctness_model, o); }, op.inner);
}
// We have now completed our workload without failing any assertions, so we can
// disable detaching the worker thread, which will cause the mm_thread_1 jthread
// to be joined when this function returns.
detach_on_error.detach = false;
simulator.ShutDown();
mm_thread_1.join();
SimulatorStats stats = simulator.Stats();
spdlog::info("total messages: {}", stats.total_messages);
spdlog::info("dropped messages: {}", stats.dropped_messages);
spdlog::info("timed out requests: {}", stats.timed_out_requests);
spdlog::info("total requests: {}", stats.total_requests);
spdlog::info("total responses: {}", stats.total_responses);
spdlog::info("simulator ticks: {}", stats.simulator_ticks);
auto histo = cli_io_2.ResponseLatencies();
spdlog::info("========================== SUCCESS :) ==========================");
return std::make_pair(stats, histo);
}
} // namespace memgraph::tests::simulation