From c6f3f0eb9cd68150371c0c45b84aeb0dc72114c9 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 6 Sep 2016 02:28:35 -0600 Subject: [PATCH] Cleanup RunBenchmark code. (#289) * Cleanup the code for generating and running benchmarks * Rework calculation of real/manual time * Add back TSAN builder --- .travis.yml | 10 ++ src/benchmark.cc | 183 ++++++++++++++++++----------------- src/benchmark_api_internal.h | 3 +- src/benchmark_register.cc | 1 - 4 files changed, 104 insertions(+), 93 deletions(-) diff --git a/.travis.yml b/.travis.yml index c73413ab..19c68dda 100644 --- a/.travis.yml +++ b/.travis.yml @@ -65,6 +65,16 @@ matrix: - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug - LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins - EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" + # Clang w/ libc++ and MSAN + - compiler: clang + addons: + apt: + packages: + clang-3.8 + env: + - COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=RelWithDebInfo + - LIBCXX_BUILD=1 LIBCXX_SANITIZER=Thread + - EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" before_script: - if [ -n "${LIBCXX_BUILD}" ]; then diff --git a/src/benchmark.cc b/src/benchmark.cc index 2ab8e8b8..7bd5f3b6 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -131,15 +131,18 @@ class ThreadManager { } public: - GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0; - GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0; - GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0; - GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0; - GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0; - GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0; - GUARDED_BY(GetBenchmarkMutex()) std::string report_label_; - GUARDED_BY(GetBenchmarkMutex()) std::string error_message_; - GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false; + struct Result { + double real_time_used = 0; + double cpu_time_used = 0; + double manual_time_used = 0; + int64_t bytes_processed = 0; + int64_t items_processed = 0; + int complexity_n = 0; + std::string report_label_; + std::string error_message_; + bool has_error_ = false; + }; + GUARDED_BY(GetBenchmarkMutex()) Result results; private: mutable Mutex benchmark_mutex_; @@ -211,6 +214,47 @@ class ThreadTimer { namespace { +BenchmarkReporter::Run +CreateRunReport(const benchmark::internal::Benchmark::Instance& b, + const internal::ThreadManager::Result& results, + size_t iters, double seconds) +{ + // Create report about this benchmark run. + BenchmarkReporter::Run report; + + report.benchmark_name = b.name; + report.error_occurred = results.has_error_; + report.error_message = results.error_message_; + report.report_label = results.report_label_; + // Report the total iterations across all threads. + report.iterations = static_cast(iters) * b.threads; + report.time_unit = b.time_unit; + + if (!report.error_occurred) { + double bytes_per_second = 0; + if (results.bytes_processed > 0 && seconds > 0.0) { + bytes_per_second = (results.bytes_processed / seconds); + } + double items_per_second = 0; + if (results.items_processed > 0 && seconds > 0.0) { + items_per_second = (results.items_processed / seconds); + } + + if (b.use_manual_time) { + report.real_accumulated_time = results.manual_time_used; + } else { + report.real_accumulated_time = results.real_time_used; + } + report.cpu_accumulated_time = results.cpu_time_used; + report.bytes_per_second = bytes_per_second; + report.items_per_second = items_per_second; + report.complexity_n = results.complexity_n; + report.complexity = b.complexity; + report.complexity_lambda = b.complexity_lambda; + } + return report; +} + // Execute one thread of benchmark b for the specified number of iterations. // Adds the stats collected for the thread into *total. void RunInThread(const benchmark::internal::Benchmark::Instance* b, @@ -223,12 +267,13 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, "Benchmark returned before State::KeepRunning() returned false!"; { MutexLock l(manager->GetBenchmarkMutex()); - manager->cpu_time_used += timer.cpu_time_used(); - manager->real_time_used += timer.real_time_used(); - manager->manual_time_used += timer.manual_time_used(); - manager->bytes_processed += st.bytes_processed(); - manager->items_processed += st.items_processed(); - manager->complexity_n += st.complexity_length_n(); + internal::ThreadManager::Result& results = manager->results; + results.cpu_time_used += timer.cpu_time_used(); + results.real_time_used += timer.real_time_used(); + results.manual_time_used += timer.manual_time_used(); + results.bytes_processed += st.bytes_processed(); + results.items_processed += st.items_processed(); + results.complexity_n += st.complexity_length_n(); } manager->NotifyThreadComplete(); } @@ -239,10 +284,8 @@ std::vector RunBenchmark( std::vector reports; // return value size_t iters = 1; - const int num_threads = b.multithreaded ? b.threads : 1; - std::vector pool; - if (num_threads > 1) pool.resize(num_threads -1); - + std::unique_ptr manager; + std::vector pool(b.threads - 1); const int repeats = b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; const bool report_aggregates_only = repeats != 1 && @@ -250,85 +293,49 @@ std::vector RunBenchmark( ? FLAGS_benchmark_report_aggregates_only : b.report_mode == internal::RM_ReportAggregatesOnly); for (int i = 0; i < repeats; i++) { - std::string mem; for (;;) { // Try benchmark VLOG(2) << "Running " << b.name << " for " << iters << "\n"; - internal::ThreadManager manager(num_threads); - if (b.multithreaded) { - // If this is out first iteration of the while(true) loop then the - // threads haven't been started and can't be joined. Otherwise we need - // to join the thread before replacing them. - for (std::thread& thread : pool) { - if (thread.joinable()) - thread.join(); - } - for (std::size_t ti = 0; ti < pool.size(); ++ti) { - pool[ti] = std::thread(&RunInThread, &b, iters, - static_cast(ti + 1), &manager); - } + manager.reset(new internal::ThreadManager(b.threads)); + for (std::size_t ti = 0; ti < pool.size(); ++ti) { + pool[ti] = std::thread(&RunInThread, &b, iters, + static_cast(ti + 1), manager.get()); } - RunInThread(&b, iters, 0, &manager); - manager.WaitForAllThreads(); - MutexLock l(manager.GetBenchmarkMutex()); + RunInThread(&b, iters, 0, manager.get()); + manager->WaitForAllThreads(); + for (std::thread& thread : pool) + thread.join(); + internal::ThreadManager::Result results; + { + MutexLock l(manager->GetBenchmarkMutex()); + results = manager->results; + } + manager.reset(); + // Adjust real/manual time stats since they were reported per thread. + results.real_time_used /= b.threads; + results.manual_time_used /= b.threads; - const double cpu_accumulated_time = manager.cpu_time_used; - const double real_accumulated_time = manager.real_time_used / num_threads; - const double manual_accumulated_time = manager.manual_time_used / num_threads; - - VLOG(2) << "Ran in " << cpu_accumulated_time << "/" - << real_accumulated_time << "\n"; + VLOG(2) << "Ran in " << results.cpu_time_used << "/" + << results.real_time_used << "\n"; // Base decisions off of real time if requested by this benchmark. - double seconds = cpu_accumulated_time; + double seconds = results.cpu_time_used; if (b.use_manual_time) { - seconds = manual_accumulated_time; + seconds = results.manual_time_used; } else if (b.use_real_time) { - seconds = real_accumulated_time; + seconds = results.real_time_used; } const double min_time = !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; // If this was the first run, was elapsed time or cpu time large enough? // If this is not the first run, go with the current value of iter. - if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) || - (seconds >= min_time) || (real_accumulated_time >= 5 * min_time)) { - // Create report about this benchmark run. - BenchmarkReporter::Run report; - report.benchmark_name = b.name; - report.error_occurred = manager.has_error_; - report.error_message = manager.error_message_; - report.report_label = manager.report_label_; - // Report the total iterations across all threads. - report.iterations = static_cast(iters) * b.threads; - report.time_unit = b.time_unit; - - if (!report.error_occurred) { - double bytes_per_second = 0; - if (manager.bytes_processed > 0 && seconds > 0.0) { - bytes_per_second = (manager.bytes_processed / seconds); - } - double items_per_second = 0; - if (manager.items_processed > 0 && seconds > 0.0) { - items_per_second = (manager.items_processed / seconds); - } - - if (b.use_manual_time) { - report.real_accumulated_time = manual_accumulated_time; - } else { - report.real_accumulated_time = real_accumulated_time; - } - report.cpu_accumulated_time = cpu_accumulated_time; - report.bytes_per_second = bytes_per_second; - report.items_per_second = items_per_second; - report.complexity_n = manager.complexity_n; - report.complexity = b.complexity; - report.complexity_lambda = b.complexity_lambda; - if(report.complexity != oNone) - complexity_reports->push_back(report); - } - + if ((i > 0) || results.has_error_ || (iters >= kMaxIterations) || + (seconds >= min_time) || (results.real_time_used >= 5 * min_time)) { + BenchmarkReporter::Run report = CreateRunReport(b, results, iters, seconds); + if (!report.error_occurred && b.complexity != oNone) + complexity_reports->push_back(report); reports.push_back(report); break; } @@ -352,10 +359,6 @@ std::vector RunBenchmark( iters = static_cast(next_iters + 0.5); } } - if (b.multithreaded) { - for (std::thread& thread : pool) - thread.join(); - } // Calculate additional statistics auto stat_reports = ComputeStats(reports); if((b.complexity != oNone) && b.last_benchmark_instance) { @@ -409,9 +412,9 @@ void State::SkipWithError(const char* msg) { error_occurred_ = true; { MutexLock l(manager_->GetBenchmarkMutex()); - if (manager_->has_error_ == false) { - manager_->error_message_ = msg; - manager_->has_error_ = true; + if (manager_->results.has_error_ == false) { + manager_->results.error_message_ = msg; + manager_->results.has_error_ = true; } } total_iterations_ = max_iterations; @@ -425,7 +428,7 @@ void State::SetIterationTime(double seconds) void State::SetLabel(const char* label) { MutexLock l(manager_->GetBenchmarkMutex()); - manager_->report_label_ = label; + manager_->results.report_label_ = label; } void State::StartKeepRunning() { diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index a491f347..d412cfb4 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -27,8 +27,7 @@ struct Benchmark::Instance { bool last_benchmark_instance; int repetitions; double min_time; - int threads; // Number of concurrent threads to use - bool multithreaded; // Is benchmark multi-threaded? + int threads; // Number of concurrent threads to us }; bool FindBenchmarksInternal(const std::string& re, diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 321c6a41..35e5f162 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -151,7 +151,6 @@ bool BenchmarkFamilies::FindBenchmarks( instance.complexity = family->complexity_; instance.complexity_lambda = family->complexity_lambda_; instance.threads = num_threads; - instance.multithreaded = !(family->thread_counts_.empty()); // Add arguments to instance name for (auto const& arg : args) {