Fix a race condition that happens when logging from a detached thread in the cluster property test. Improve the ShardManager dtor and log statements

This commit is contained in:
Tyler Neely 2022-11-04 11:14:39 +00:00
parent 9203616283
commit 8598f6edf4
4 changed files with 28 additions and 4 deletions

View File

@ -105,6 +105,8 @@ class ShardManager {
worker.Push(shard_worker::ShutDown{});
}
workers_.clear();
// The jthread handes for our shard worker threads will be
// blocked on implicitly when worker_handles_ is destroyed.
}

View File

@ -147,7 +147,7 @@ class ShardWorker {
}
Time Cron() {
spdlog::info("running ShardManager::Cron, address {}", io_.GetAddress().ToString());
spdlog::info("running ShardWorker::Cron, address {}", io_.GetAddress().ToString());
Time now = io_.Now();
while (!cron_schedule_.empty()) {

View File

@ -18,6 +18,7 @@
#include <gtest/gtest.h>
#include <rapidcheck.h>
#include <rapidcheck/gtest.h>
#include <spdlog/cfg/env.h>
#include "generated_operations.hpp"
#include "io/simulator/simulator_config.hpp"
@ -35,6 +36,8 @@ using storage::v3::kMaximumCronInterval;
RC_GTEST_PROP(RandomClusterConfig, HappyPath, (ClusterConfig cluster_config, NonEmptyOpVec ops)) {
// TODO(tyler) set abort_time to something more restrictive than Time::max()
spdlog::cfg::load_env_levels();
SimulatorConfig sim_config{
.drop_percent = 0,
.perform_timeouts = false,

View File

@ -194,6 +194,22 @@ void ExecuteOp(msgs::ShardRequestManager<SimulatorTransport> &shard_request_mana
}
}
/// This struct exists as a way of detaching
/// a thread if something causes an uncaught
/// exception - because that thread would not
/// receive a ShutDown message otherwise, and
/// would cause the test to hang forever.
struct DetachIfDropped {
std::jthread &handle;
bool detach = true;
~DetachIfDropped() {
if (detach && handle.joinable()) {
handle.detach();
}
}
};
void RunClusterSimulation(const SimulatorConfig &sim_config, const ClusterConfig &cluster_config,
const std::vector<Op> &ops) {
spdlog::info("========================== NEW SIMULATION ==========================");
@ -217,9 +233,7 @@ void RunClusterSimulation(const SimulatorConfig &sim_config, const ClusterConfig
auto mm_thread_1 = std::jthread(RunMachine, std::move(mm_1));
// Need to detach this thread so that the destructor does not
// block before we can propagate assertion failures.
mm_thread_1.detach();
auto detach_on_error = DetachIfDropped{.handle = mm_thread_1};
// TODO(tyler) clarify addresses of coordinator etc... as it's a mess
@ -236,6 +250,11 @@ void RunClusterSimulation(const SimulatorConfig &sim_config, const ClusterConfig
std::visit([&](auto &o) { ExecuteOp(shard_request_manager, correctness_model, o); }, op.inner);
}
// We have now completed our workload without failing any assertions, so we can
// disable detaching the worker thread, which will cause the mm_thread_1 jthread
// to be joined when this function returns.
detach_on_error.detach = false;
simulator.ShutDown();
SimulatorStats stats = simulator.Stats();