Make raft tests fully deterministic for rng_seeds between 0 and 500 at 1% message loss
This commit is contained in:
parent
9c3d683942
commit
f6017697d6
@ -75,12 +75,14 @@ bool SimulatorHandle::MaybeTickSimulator() {
|
||||
return false;
|
||||
}
|
||||
|
||||
spdlog::info("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ simulator tick ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
|
||||
stats_.simulator_ticks++;
|
||||
|
||||
bool fired_cv = false;
|
||||
bool timed_anything_out = TimeoutPromisesPastDeadline();
|
||||
|
||||
if (timed_anything_out) {
|
||||
spdlog::info("simulator progressing: timed out a request");
|
||||
fired_cv = true;
|
||||
blocked_on_receive_.clear();
|
||||
cv_.notify_all();
|
||||
@ -95,6 +97,7 @@ bool SimulatorHandle::MaybeTickSimulator() {
|
||||
cluster_wide_time_microseconds_ += clock_advance;
|
||||
|
||||
if (!fired_cv) {
|
||||
spdlog::info("simulator progressing: clock advanced by {}", clock_advance.count());
|
||||
cv_.notify_all();
|
||||
blocked_on_receive_.clear();
|
||||
fired_cv = true;
|
||||
@ -109,6 +112,9 @@ bool SimulatorHandle::MaybeTickSimulator() {
|
||||
"in an expected amount of time.");
|
||||
throw utils::BasicException{"Cluster has executed beyond its configured abort_time"};
|
||||
}
|
||||
|
||||
// if the clock is advanced, no messages should be delivered also.
|
||||
// do that in a future tick.
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -141,7 +147,7 @@ bool SimulatorHandle::MaybeTickSimulator() {
|
||||
if (should_drop || normal_timeout) {
|
||||
stats_.timed_out_requests++;
|
||||
dop.promise.TimeOut();
|
||||
spdlog::info("timing out request");
|
||||
spdlog::info("timing out request ");
|
||||
} else {
|
||||
stats_.total_responses++;
|
||||
Duration response_latency = cluster_wide_time_microseconds_ - dop.requested_at;
|
||||
@ -153,15 +159,20 @@ bool SimulatorHandle::MaybeTickSimulator() {
|
||||
} else if (should_drop) {
|
||||
// don't add it anywhere, let it drop
|
||||
spdlog::info("silently dropping request");
|
||||
|
||||
// we don't want to reset the block list here
|
||||
return true;
|
||||
} else {
|
||||
// add to can_receive_ if not
|
||||
spdlog::info("adding message to can_receive_");
|
||||
spdlog::info("adding message to can_receive_ from {} to {}", opaque_message.from_address.last_known_port,
|
||||
opaque_message.to_address.last_known_port);
|
||||
const auto &[om_vec, inserted] =
|
||||
can_receive_.try_emplace(to_address.ToPartialAddress(), std::vector<OpaqueMessage>());
|
||||
om_vec->second.emplace_back(std::move(opaque_message));
|
||||
}
|
||||
|
||||
if (!fired_cv) {
|
||||
spdlog::info("simulator progressing: handled a message");
|
||||
cv_.notify_all();
|
||||
blocked_on_receive_.clear();
|
||||
fired_cv = true;
|
||||
|
@ -171,12 +171,15 @@ class SimulatorHandle {
|
||||
}
|
||||
|
||||
if (!should_shut_down_) {
|
||||
blocked_on_receive_.emplace(receiver);
|
||||
cv_.notify_all();
|
||||
spdlog::info("blocking receiver {}", receiver.ToPartialAddress().port);
|
||||
if (!blocked_on_receive_.contains(receiver)) {
|
||||
blocked_on_receive_.emplace(receiver);
|
||||
spdlog::info("blocking receiver {}", receiver.ToPartialAddress().port);
|
||||
cv_.notify_all();
|
||||
}
|
||||
cv_.wait(lock);
|
||||
}
|
||||
}
|
||||
spdlog::info("timing out receiver {}", receiver.ToPartialAddress().port);
|
||||
|
||||
return TimedOut{};
|
||||
}
|
||||
|
@ -34,12 +34,7 @@ class SimulatorTransport {
|
||||
|
||||
template <Message RequestT, Message ResponseT>
|
||||
ResponseFuture<ResponseT> Request(Address to_address, Address from_address, RequestT request, Duration timeout) {
|
||||
std::function<bool()> maybe_tick_simulator = [this] {
|
||||
spdlog::info("client calling MaybeTickSimulator");
|
||||
bool ret = simulator_handle_->MaybeTickSimulator();
|
||||
spdlog::info("client returned from MaybeTickSimulator");
|
||||
return ret;
|
||||
};
|
||||
std::function<bool()> maybe_tick_simulator = [this] { return simulator_handle_->MaybeTickSimulator(); };
|
||||
|
||||
return simulator_handle_->template SubmitRequest<RequestT, ResponseT>(to_address, from_address, std::move(request),
|
||||
timeout, std::move(maybe_tick_simulator));
|
||||
|
@ -240,13 +240,6 @@ std::pair<SimulatorStats, LatencyHistogramSummaries> RunSimulation(SimulatorConf
|
||||
spdlog::info("========================== SUCCESS :) ==========================");
|
||||
|
||||
return std::make_pair(simulator.Stats(), cli_io.ResponseLatencies());
|
||||
|
||||
/*
|
||||
this is implicit in jthread's dtor
|
||||
srv_thread_1.join();
|
||||
srv_thread_2.join();
|
||||
srv_thread_3.join();
|
||||
*/
|
||||
}
|
||||
|
||||
int main() {
|
||||
@ -264,25 +257,26 @@ int main() {
|
||||
};
|
||||
|
||||
for (int i = 0; i < n_tests; i++) {
|
||||
spdlog::error("========================== NEW SIMULATION SEED {} ==========================", i);
|
||||
spdlog::info("\tTime\t\tTerm\tPort\tRole\t\tMessage\n");
|
||||
|
||||
// this is vital to cause tests to behave differently across runs
|
||||
config.rng_seed = i;
|
||||
|
||||
spdlog::info("========================== NEW SIMULATION SEED {} ==========================", i);
|
||||
auto [sim_stats_1, latency_stats_1] = RunSimulation(config);
|
||||
spdlog::info("========================== NEW SIMULATION SEED {} ==========================", i);
|
||||
auto [sim_stats_2, latency_stats_2] = RunSimulation(config);
|
||||
|
||||
if (sim_stats_1 != sim_stats_2 || latency_stats_1 != latency_stats_2) {
|
||||
spdlog::error("simulator stats diverged across runs");
|
||||
spdlog::error("simulator stats diverged across runs for test rng_seed {}", i);
|
||||
spdlog::error("run 1 simulator stats: {}", sim_stats_1);
|
||||
spdlog::error("run 2 simulator stats: {}", sim_stats_2);
|
||||
spdlog::error("run 1 latency:\n{}", latency_stats_1.SummaryTable());
|
||||
spdlog::error("run 2 latency:\n{}", latency_stats_2.SummaryTable());
|
||||
std::terminate();
|
||||
}
|
||||
spdlog::error("run 1 simulator stats: {}", sim_stats_1);
|
||||
spdlog::error("run 2 simulator stats: {}", sim_stats_2);
|
||||
spdlog::info("run 1 simulator stats: {}", sim_stats_1);
|
||||
spdlog::info("run 2 simulator stats: {}", sim_stats_2);
|
||||
}
|
||||
|
||||
spdlog::info("passed {} tests!", n_tests);
|
||||
|
Loading…
Reference in New Issue
Block a user