Make raft tests fully deterministic for rng_seeds between 0 and 500 at 1% message loss

This commit is contained in:
Tyler Neely 2022-11-17 21:32:55 +00:00
parent 9c3d683942
commit f6017697d6
4 changed files with 25 additions and 22 deletions

View File

@ -75,12 +75,14 @@ bool SimulatorHandle::MaybeTickSimulator() {
return false;
}
spdlog::info("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ simulator tick ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
stats_.simulator_ticks++;
bool fired_cv = false;
bool timed_anything_out = TimeoutPromisesPastDeadline();
if (timed_anything_out) {
spdlog::info("simulator progressing: timed out a request");
fired_cv = true;
blocked_on_receive_.clear();
cv_.notify_all();
@ -95,6 +97,7 @@ bool SimulatorHandle::MaybeTickSimulator() {
cluster_wide_time_microseconds_ += clock_advance;
if (!fired_cv) {
spdlog::info("simulator progressing: clock advanced by {}", clock_advance.count());
cv_.notify_all();
blocked_on_receive_.clear();
fired_cv = true;
@ -109,6 +112,9 @@ bool SimulatorHandle::MaybeTickSimulator() {
"in an expected amount of time.");
throw utils::BasicException{"Cluster has executed beyond its configured abort_time"};
}
// if the clock is advanced, no messages should be delivered also.
// do that in a future tick.
return true;
}
@ -153,15 +159,20 @@ bool SimulatorHandle::MaybeTickSimulator() {
} else if (should_drop) {
// don't add it anywhere, let it drop
spdlog::info("silently dropping request");
// we don't want to reset the block list here
return true;
} else {
// add to can_receive_ if not
spdlog::info("adding message to can_receive_");
spdlog::info("adding message to can_receive_ from {} to {}", opaque_message.from_address.last_known_port,
opaque_message.to_address.last_known_port);
const auto &[om_vec, inserted] =
can_receive_.try_emplace(to_address.ToPartialAddress(), std::vector<OpaqueMessage>());
om_vec->second.emplace_back(std::move(opaque_message));
}
if (!fired_cv) {
spdlog::info("simulator progressing: handled a message");
cv_.notify_all();
blocked_on_receive_.clear();
fired_cv = true;

View File

@ -171,12 +171,15 @@ class SimulatorHandle {
}
if (!should_shut_down_) {
if (!blocked_on_receive_.contains(receiver)) {
blocked_on_receive_.emplace(receiver);
cv_.notify_all();
spdlog::info("blocking receiver {}", receiver.ToPartialAddress().port);
cv_.notify_all();
}
cv_.wait(lock);
}
}
spdlog::info("timing out receiver {}", receiver.ToPartialAddress().port);
return TimedOut{};
}

View File

@ -34,12 +34,7 @@ class SimulatorTransport {
template <Message RequestT, Message ResponseT>
ResponseFuture<ResponseT> Request(Address to_address, Address from_address, RequestT request, Duration timeout) {
std::function<bool()> maybe_tick_simulator = [this] {
spdlog::info("client calling MaybeTickSimulator");
bool ret = simulator_handle_->MaybeTickSimulator();
spdlog::info("client returned from MaybeTickSimulator");
return ret;
};
std::function<bool()> maybe_tick_simulator = [this] { return simulator_handle_->MaybeTickSimulator(); };
return simulator_handle_->template SubmitRequest<RequestT, ResponseT>(to_address, from_address, std::move(request),
timeout, std::move(maybe_tick_simulator));

View File

@ -240,13 +240,6 @@ std::pair<SimulatorStats, LatencyHistogramSummaries> RunSimulation(SimulatorConf
spdlog::info("========================== SUCCESS :) ==========================");
return std::make_pair(simulator.Stats(), cli_io.ResponseLatencies());
/*
this is implicit in jthread's dtor
srv_thread_1.join();
srv_thread_2.join();
srv_thread_3.join();
*/
}
int main() {
@ -264,25 +257,26 @@ int main() {
};
for (int i = 0; i < n_tests; i++) {
spdlog::error("========================== NEW SIMULATION SEED {} ==========================", i);
spdlog::info("\tTime\t\tTerm\tPort\tRole\t\tMessage\n");
// this is vital to cause tests to behave differently across runs
config.rng_seed = i;
spdlog::info("========================== NEW SIMULATION SEED {} ==========================", i);
auto [sim_stats_1, latency_stats_1] = RunSimulation(config);
spdlog::info("========================== NEW SIMULATION SEED {} ==========================", i);
auto [sim_stats_2, latency_stats_2] = RunSimulation(config);
if (sim_stats_1 != sim_stats_2 || latency_stats_1 != latency_stats_2) {
spdlog::error("simulator stats diverged across runs");
spdlog::error("simulator stats diverged across runs for test rng_seed {}", i);
spdlog::error("run 1 simulator stats: {}", sim_stats_1);
spdlog::error("run 2 simulator stats: {}", sim_stats_2);
spdlog::error("run 1 latency:\n{}", latency_stats_1.SummaryTable());
spdlog::error("run 2 latency:\n{}", latency_stats_2.SummaryTable());
std::terminate();
}
spdlog::error("run 1 simulator stats: {}", sim_stats_1);
spdlog::error("run 2 simulator stats: {}", sim_stats_2);
spdlog::info("run 1 simulator stats: {}", sim_stats_1);
spdlog::info("run 2 simulator stats: {}", sim_stats_2);
}
spdlog::info("passed {} tests!", n_tests);