From e253a284029c34f764fc26bb7859298d4ec0551b Mon Sep 17 00:00:00 2001 From: Jussi Knuuttila Date: Sat, 30 Apr 2016 16:23:58 +0300 Subject: [PATCH] Manual timing support. --- AUTHORS | 1 + CONTRIBUTORS | 1 + README.md | 39 +++++++++++++++++++ include/benchmark/benchmark_api.h | 18 +++++++++ src/benchmark.cc | 64 +++++++++++++++++++++++++++++-- test/benchmark_test.cc | 27 +++++++++++++ 6 files changed, 146 insertions(+), 4 deletions(-) diff --git a/AUTHORS b/AUTHORS index 5a4b3553..9da43c73 100644 --- a/AUTHORS +++ b/AUTHORS @@ -17,6 +17,7 @@ Evgeny Safronov Felix Homann Google Inc. JianXiong Zhou +Jussi Knuuttila Kaito Udagawa Lei Xu Matt Clarkson diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 2bbe19e0..fcc3f35e 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -31,6 +31,7 @@ Eugene Zhuk Evgeny Safronov Felix Homann JianXiong Zhou +Jussi Knuuttila Kaito Udagawa Kai Wolf Lei Xu diff --git a/README.md b/README.md index 2ccfd509..051b3011 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,45 @@ BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime(); Without `UseRealTime`, CPU time is used by default. + +## Manual timing +For benchmarking something for which neither CPU time nor real-time are +correct or accurate enough, completely manual timing is supported using +the `UseManualTime` function. + +When `UseManualTime` is used, the benchmarked code must call +`SetIterationTime` once per iteration of the `KeepRunning` loop to +report the manually measured time. + +An example use case for this is benchmarking GPU execution (e.g. OpenCL +or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot +be accurately measured using CPU time or real-time. Instead, they can be +measured accurately using a dedicated API, and these measurement results +can be reported back with `SetIterationTime`. + +```c++ +static void BM_ManualTiming(benchmark::State& state) { + int microseconds = state.range_x(); + std::chrono::duration sleep_duration { + static_cast(microseconds) + }; + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(sleep_duration); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed_seconds = + std::chrono::duration_cast>( + end - start); + + state.SetIterationTime(elapsed_seconds.count()); + } +} +BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime(); +``` + ### Preventing optimisation To prevent a value or expression from being optimized away by the compiler the `benchmark::DoNotOptimize(...)` function can be used. diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index 726e35d9..ef7eca53 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -283,6 +283,17 @@ public: // within each benchmark iteration, if possible. void ResumeTiming(); + // REQUIRES: called exactly once per iteration of the KeepRunning loop. + // Set the manually measured time for this benchmark iteration, which + // is used instead of automatically measured time if UseManualTime() was + // specified. + // + // For threaded benchmarks the SetIterationTime() function acts + // like a barrier. I.e., the ith call by a particular thread to this + // function will block until all threads have made their ith call. + // The time will be set by the last thread to call this function. + void SetIterationTime(double seconds); + // Set the number of bytes processed by the current benchmark // execution. This routine is typically called once at the end of a // throughput oriented benchmark. If this routine is called with a @@ -444,6 +455,13 @@ public: // called, the cpu time used by the benchmark will be used. Benchmark* UseRealTime(); + // If a benchmark must measure time manually (e.g. if GPU execution time is being + // measured), call this method. If called, each benchmark iteration should call + // SetIterationTime(seconds) to report the measured time, which will be used + // to control how many iterations are run, and in the printing of items/second + // or MB/second values. + Benchmark* UseManualTime(); + // Support for running multiple copies of the same benchmark concurrently // in multiple threads. This may be useful when measuring the scaling // of some piece of code. diff --git a/src/benchmark.cc b/src/benchmark.cc index 3ee9c251..1a836eb4 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -130,6 +130,7 @@ class TimerManager { running_(false), real_time_used_(0), cpu_time_used_(0), + manual_time_used_(0), num_finalized_(0), phase_number_(0), entered_(0) { @@ -169,6 +170,21 @@ class TimerManager { } } + // Called by each thread + void SetIterationTime(double seconds) EXCLUDES(lock_) { + bool last_thread = false; + { + MutexLock ml(lock_); + last_thread = Barrier(ml); + if (last_thread) { + manual_time_used_ += seconds; + } + } + if (last_thread) { + phase_condition_.notify_all(); + } + } + // Called by each thread void Finalize() EXCLUDES(lock_) { MutexLock l(lock_); @@ -194,6 +210,13 @@ class TimerManager { return cpu_time_used_; } + // REQUIRES: timer is not running + double manual_time_used() EXCLUDES(lock_) { + MutexLock l(lock_); + CHECK(!running_); + return manual_time_used_; + } + private: Mutex lock_; Condition phase_condition_; @@ -207,6 +230,8 @@ class TimerManager { // Accumulated time so far (does not contain current slice if running_) double real_time_used_; double cpu_time_used_; + // Manually set iteration time. User sets this with SetIterationTime(seconds). + double manual_time_used_; // How many threads have called Finalize() int num_finalized_; @@ -263,6 +288,7 @@ struct Benchmark::Instance { int arg2; TimeUnit time_unit; bool use_real_time; + bool use_manual_time; double min_time; int threads; // Number of concurrent threads to use bool multithreaded; // Is benchmark multi-threaded? @@ -302,6 +328,7 @@ public: void RangePair(int lo1, int hi1, int lo2, int hi2); void MinTime(double n); void UseRealTime(); + void UseManualTime(); void Threads(int t); void ThreadRange(int min_threads, int max_threads); void ThreadPerCpu(); @@ -318,6 +345,7 @@ private: TimeUnit time_unit_; double min_time_; bool use_real_time_; + bool use_manual_time_; std::vector thread_counts_; BenchmarkImp& operator=(BenchmarkImp const&); @@ -378,6 +406,7 @@ bool BenchmarkFamilies::FindBenchmarks( instance.time_unit = family->time_unit_; instance.min_time = family->min_time_; instance.use_real_time = family->use_real_time_; + instance.use_manual_time = family->use_manual_time_; instance.threads = num_threads; instance.multithreaded = !(family->thread_counts_.empty()); @@ -391,7 +420,9 @@ bool BenchmarkFamilies::FindBenchmarks( if (!IsZero(family->min_time_)) { instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); } - if (family->use_real_time_) { + if (family->use_manual_time_) { + instance.name += "/manual_time"; + } else if (family->use_real_time_) { instance.name += "/real_time"; } @@ -411,7 +442,8 @@ bool BenchmarkFamilies::FindBenchmarks( BenchmarkImp::BenchmarkImp(const char* name) : name_(name), arg_count_(-1), time_unit_(kNanosecond), - min_time_(0.0), use_real_time_(false) { + min_time_(0.0), use_real_time_(false), + use_manual_time_(false) { } BenchmarkImp::~BenchmarkImp() { @@ -474,9 +506,15 @@ void BenchmarkImp::MinTime(double t) { } void BenchmarkImp::UseRealTime() { + CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; use_real_time_ = true; } +void BenchmarkImp::UseManualTime() { + CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously."; + use_manual_time_ = true; +} + void BenchmarkImp::Threads(int t) { CHECK_GT(t, 0); thread_counts_.push_back(t); @@ -579,6 +617,11 @@ Benchmark* Benchmark::UseRealTime() { return this; } +Benchmark* Benchmark::UseManualTime() { + imp_->UseManualTime(); + return this; +} + Benchmark* Benchmark::Threads(int t) { imp_->Threads(t); return this; @@ -671,6 +714,7 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, const double cpu_accumulated_time = timer_manager->cpu_time_used(); const double real_accumulated_time = timer_manager->real_time_used(); + const double manual_accumulated_time = timer_manager->manual_time_used(); timer_manager.reset(); VLOG(2) << "Ran in " << cpu_accumulated_time << "/" @@ -678,7 +722,9 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, // Base decisions off of real time if requested by this benchmark. double seconds = cpu_accumulated_time; - if (b.use_real_time) { + if (b.use_manual_time) { + seconds = manual_accumulated_time; + } else if (b.use_real_time) { seconds = real_accumulated_time; } @@ -713,7 +759,11 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, // Report the total iterations across all threads. report.iterations = static_cast(iters) * b.threads; report.time_unit = b.time_unit; - report.real_accumulated_time = real_accumulated_time; + if (b.use_manual_time) { + report.real_accumulated_time = manual_accumulated_time; + } else { + report.real_accumulated_time = real_accumulated_time; + } report.cpu_accumulated_time = cpu_accumulated_time; report.bytes_per_second = bytes_per_second; report.items_per_second = items_per_second; @@ -774,6 +824,12 @@ void State::ResumeTiming() { timer_manager->StartTimer(); } +void State::SetIterationTime(double seconds) +{ + CHECK(running_benchmark); + timer_manager->SetIterationTime(seconds); +} + void State::SetLabel(const char* label) { CHECK(running_benchmark); MutexLock l(GetBenchmarkLock()); diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc index 97abb68f..fa995590 100644 --- a/test/benchmark_test.cc +++ b/test/benchmark_test.cc @@ -14,6 +14,8 @@ #include #include #include +#include +#include #if defined(__GNUC__) # define BENCHMARK_NOINLINE __attribute__((noinline)) @@ -174,5 +176,30 @@ static void BM_ParallelMemset(benchmark::State& state) { } BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); +static void BM_ManualTiming(benchmark::State& state) { + size_t slept_for = 0; + int microseconds = state.range_x(); + std::chrono::duration sleep_duration { + static_cast(microseconds) + }; + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + // Simulate some useful workload with a sleep + std::this_thread::sleep_for(sleep_duration); + auto end = std::chrono::high_resolution_clock::now(); + + auto elapsed = + std::chrono::duration_cast>( + end - start); + + state.SetIterationTime(elapsed.count()); + slept_for += microseconds; + } + state.SetItemsProcessed(slept_for); +} +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime(); +BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime(); + BENCHMARK_MAIN()