From f6017697d6b1841c6330abd196878762c30250c4 Mon Sep 17 00:00:00 2001 From: Tyler Neely Date: Thu, 17 Nov 2022 21:32:55 +0000 Subject: [PATCH] Make raft tests fully deterministic for rng_seeds between 0 and 500 at 1% message loss --- src/io/simulator/simulator_handle.cpp | 15 +++++++++++++-- src/io/simulator/simulator_handle.hpp | 9 ++++++--- src/io/simulator/simulator_transport.hpp | 7 +------ tests/simulation/raft.cpp | 16 +++++----------- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/io/simulator/simulator_handle.cpp b/src/io/simulator/simulator_handle.cpp index df7895dd7..2a5b0a247 100644 --- a/src/io/simulator/simulator_handle.cpp +++ b/src/io/simulator/simulator_handle.cpp @@ -75,12 +75,14 @@ bool SimulatorHandle::MaybeTickSimulator() { return false; } + spdlog::info("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ simulator tick ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); stats_.simulator_ticks++; bool fired_cv = false; bool timed_anything_out = TimeoutPromisesPastDeadline(); if (timed_anything_out) { + spdlog::info("simulator progressing: timed out a request"); fired_cv = true; blocked_on_receive_.clear(); cv_.notify_all(); @@ -95,6 +97,7 @@ bool SimulatorHandle::MaybeTickSimulator() { cluster_wide_time_microseconds_ += clock_advance; if (!fired_cv) { + spdlog::info("simulator progressing: clock advanced by {}", clock_advance.count()); cv_.notify_all(); blocked_on_receive_.clear(); fired_cv = true; @@ -109,6 +112,9 @@ bool SimulatorHandle::MaybeTickSimulator() { "in an expected amount of time."); throw utils::BasicException{"Cluster has executed beyond its configured abort_time"}; } + + // if the clock is advanced, no messages should be delivered also. + // do that in a future tick. return true; } @@ -141,7 +147,7 @@ bool SimulatorHandle::MaybeTickSimulator() { if (should_drop || normal_timeout) { stats_.timed_out_requests++; dop.promise.TimeOut(); - spdlog::info("timing out request"); + spdlog::info("timing out request "); } else { stats_.total_responses++; Duration response_latency = cluster_wide_time_microseconds_ - dop.requested_at; @@ -153,15 +159,20 @@ bool SimulatorHandle::MaybeTickSimulator() { } else if (should_drop) { // don't add it anywhere, let it drop spdlog::info("silently dropping request"); + + // we don't want to reset the block list here + return true; } else { // add to can_receive_ if not - spdlog::info("adding message to can_receive_"); + spdlog::info("adding message to can_receive_ from {} to {}", opaque_message.from_address.last_known_port, + opaque_message.to_address.last_known_port); const auto &[om_vec, inserted] = can_receive_.try_emplace(to_address.ToPartialAddress(), std::vector()); om_vec->second.emplace_back(std::move(opaque_message)); } if (!fired_cv) { + spdlog::info("simulator progressing: handled a message"); cv_.notify_all(); blocked_on_receive_.clear(); fired_cv = true; diff --git a/src/io/simulator/simulator_handle.hpp b/src/io/simulator/simulator_handle.hpp index 00aeaee25..ce341a50b 100644 --- a/src/io/simulator/simulator_handle.hpp +++ b/src/io/simulator/simulator_handle.hpp @@ -171,12 +171,15 @@ class SimulatorHandle { } if (!should_shut_down_) { - blocked_on_receive_.emplace(receiver); - cv_.notify_all(); - spdlog::info("blocking receiver {}", receiver.ToPartialAddress().port); + if (!blocked_on_receive_.contains(receiver)) { + blocked_on_receive_.emplace(receiver); + spdlog::info("blocking receiver {}", receiver.ToPartialAddress().port); + cv_.notify_all(); + } cv_.wait(lock); } } + spdlog::info("timing out receiver {}", receiver.ToPartialAddress().port); return TimedOut{}; } diff --git a/src/io/simulator/simulator_transport.hpp b/src/io/simulator/simulator_transport.hpp index 6b6bc7c5c..5e5a24aa9 100644 --- a/src/io/simulator/simulator_transport.hpp +++ b/src/io/simulator/simulator_transport.hpp @@ -34,12 +34,7 @@ class SimulatorTransport { template ResponseFuture Request(Address to_address, Address from_address, RequestT request, Duration timeout) { - std::function maybe_tick_simulator = [this] { - spdlog::info("client calling MaybeTickSimulator"); - bool ret = simulator_handle_->MaybeTickSimulator(); - spdlog::info("client returned from MaybeTickSimulator"); - return ret; - }; + std::function maybe_tick_simulator = [this] { return simulator_handle_->MaybeTickSimulator(); }; return simulator_handle_->template SubmitRequest(to_address, from_address, std::move(request), timeout, std::move(maybe_tick_simulator)); diff --git a/tests/simulation/raft.cpp b/tests/simulation/raft.cpp index 16fcc9906..b69470a72 100644 --- a/tests/simulation/raft.cpp +++ b/tests/simulation/raft.cpp @@ -240,13 +240,6 @@ std::pair RunSimulation(SimulatorConf spdlog::info("========================== SUCCESS :) =========================="); return std::make_pair(simulator.Stats(), cli_io.ResponseLatencies()); - - /* - this is implicit in jthread's dtor - srv_thread_1.join(); - srv_thread_2.join(); - srv_thread_3.join(); - */ } int main() { @@ -264,25 +257,26 @@ int main() { }; for (int i = 0; i < n_tests; i++) { - spdlog::error("========================== NEW SIMULATION SEED {} ==========================", i); spdlog::info("\tTime\t\tTerm\tPort\tRole\t\tMessage\n"); // this is vital to cause tests to behave differently across runs config.rng_seed = i; + spdlog::info("========================== NEW SIMULATION SEED {} ==========================", i); auto [sim_stats_1, latency_stats_1] = RunSimulation(config); + spdlog::info("========================== NEW SIMULATION SEED {} ==========================", i); auto [sim_stats_2, latency_stats_2] = RunSimulation(config); if (sim_stats_1 != sim_stats_2 || latency_stats_1 != latency_stats_2) { - spdlog::error("simulator stats diverged across runs"); + spdlog::error("simulator stats diverged across runs for test rng_seed {}", i); spdlog::error("run 1 simulator stats: {}", sim_stats_1); spdlog::error("run 2 simulator stats: {}", sim_stats_2); spdlog::error("run 1 latency:\n{}", latency_stats_1.SummaryTable()); spdlog::error("run 2 latency:\n{}", latency_stats_2.SummaryTable()); std::terminate(); } - spdlog::error("run 1 simulator stats: {}", sim_stats_1); - spdlog::error("run 2 simulator stats: {}", sim_stats_2); + spdlog::info("run 1 simulator stats: {}", sim_stats_1); + spdlog::info("run 2 simulator stats: {}", sim_stats_2); } spdlog::info("passed {} tests!", n_tests);