Manual timing support.

This commit is contained in:
Jussi Knuuttila 2016-04-30 16:23:58 +03:00
parent dce2ebb403
commit e253a28402
6 changed files with 146 additions and 4 deletions

View File

@ -17,6 +17,7 @@ Evgeny Safronov <division494@gmail.com>
Felix Homann <linuxaudio@showlabor.de> Felix Homann <linuxaudio@showlabor.de>
Google Inc. Google Inc.
JianXiong Zhou <zhoujianxiong2@gmail.com> JianXiong Zhou <zhoujianxiong2@gmail.com>
Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com> Kaito Udagawa <umireon@gmail.com>
Lei Xu <eddyxu@gmail.com> Lei Xu <eddyxu@gmail.com>
Matt Clarkson <mattyclarkson@gmail.com> Matt Clarkson <mattyclarkson@gmail.com>

View File

@ -31,6 +31,7 @@ Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com> Evgeny Safronov <division494@gmail.com>
Felix Homann <linuxaudio@showlabor.de> Felix Homann <linuxaudio@showlabor.de>
JianXiong Zhou <zhoujianxiong2@gmail.com> JianXiong Zhou <zhoujianxiong2@gmail.com>
Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com> Kaito Udagawa <umireon@gmail.com>
Kai Wolf <kai.wolf@gmail.com> Kai Wolf <kai.wolf@gmail.com>
Lei Xu <eddyxu@gmail.com> Lei Xu <eddyxu@gmail.com>

View File

@ -175,6 +175,45 @@ BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime();
Without `UseRealTime`, CPU time is used by default. Without `UseRealTime`, CPU time is used by default.
## Manual timing
For benchmarking something for which neither CPU time nor real-time are
correct or accurate enough, completely manual timing is supported using
the `UseManualTime` function.
When `UseManualTime` is used, the benchmarked code must call
`SetIterationTime` once per iteration of the `KeepRunning` loop to
report the manually measured time.
An example use case for this is benchmarking GPU execution (e.g. OpenCL
or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot
be accurately measured using CPU time or real-time. Instead, they can be
measured accurately using a dedicated API, and these measurement results
can be reported back with `SetIterationTime`.
```c++
static void BM_ManualTiming(benchmark::State& state) {
int microseconds = state.range_x();
std::chrono::duration<double, std::micro> sleep_duration {
static_cast<double>(microseconds)
};
while (state.KeepRunning()) {
auto start = std::chrono::high_resolution_clock::now();
// Simulate some useful workload with a sleep
std::this_thread::sleep_for(sleep_duration);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed_seconds.count());
}
}
BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime();
```
### Preventing optimisation ### Preventing optimisation
To prevent a value or expression from being optimized away by the compiler To prevent a value or expression from being optimized away by the compiler
the `benchmark::DoNotOptimize(...)` function can be used. the `benchmark::DoNotOptimize(...)` function can be used.

View File

@ -283,6 +283,17 @@ public:
// within each benchmark iteration, if possible. // within each benchmark iteration, if possible.
void ResumeTiming(); void ResumeTiming();
// REQUIRES: called exactly once per iteration of the KeepRunning loop.
// Set the manually measured time for this benchmark iteration, which
// is used instead of automatically measured time if UseManualTime() was
// specified.
//
// For threaded benchmarks the SetIterationTime() function acts
// like a barrier. I.e., the ith call by a particular thread to this
// function will block until all threads have made their ith call.
// The time will be set by the last thread to call this function.
void SetIterationTime(double seconds);
// Set the number of bytes processed by the current benchmark // Set the number of bytes processed by the current benchmark
// execution. This routine is typically called once at the end of a // execution. This routine is typically called once at the end of a
// throughput oriented benchmark. If this routine is called with a // throughput oriented benchmark. If this routine is called with a
@ -444,6 +455,13 @@ public:
// called, the cpu time used by the benchmark will be used. // called, the cpu time used by the benchmark will be used.
Benchmark* UseRealTime(); Benchmark* UseRealTime();
// If a benchmark must measure time manually (e.g. if GPU execution time is being
// measured), call this method. If called, each benchmark iteration should call
// SetIterationTime(seconds) to report the measured time, which will be used
// to control how many iterations are run, and in the printing of items/second
// or MB/second values.
Benchmark* UseManualTime();
// Support for running multiple copies of the same benchmark concurrently // Support for running multiple copies of the same benchmark concurrently
// in multiple threads. This may be useful when measuring the scaling // in multiple threads. This may be useful when measuring the scaling
// of some piece of code. // of some piece of code.

View File

@ -130,6 +130,7 @@ class TimerManager {
running_(false), running_(false),
real_time_used_(0), real_time_used_(0),
cpu_time_used_(0), cpu_time_used_(0),
manual_time_used_(0),
num_finalized_(0), num_finalized_(0),
phase_number_(0), phase_number_(0),
entered_(0) { entered_(0) {
@ -169,6 +170,21 @@ class TimerManager {
} }
} }
// Called by each thread
void SetIterationTime(double seconds) EXCLUDES(lock_) {
bool last_thread = false;
{
MutexLock ml(lock_);
last_thread = Barrier(ml);
if (last_thread) {
manual_time_used_ += seconds;
}
}
if (last_thread) {
phase_condition_.notify_all();
}
}
// Called by each thread // Called by each thread
void Finalize() EXCLUDES(lock_) { void Finalize() EXCLUDES(lock_) {
MutexLock l(lock_); MutexLock l(lock_);
@ -194,6 +210,13 @@ class TimerManager {
return cpu_time_used_; return cpu_time_used_;
} }
// REQUIRES: timer is not running
double manual_time_used() EXCLUDES(lock_) {
MutexLock l(lock_);
CHECK(!running_);
return manual_time_used_;
}
private: private:
Mutex lock_; Mutex lock_;
Condition phase_condition_; Condition phase_condition_;
@ -207,6 +230,8 @@ class TimerManager {
// Accumulated time so far (does not contain current slice if running_) // Accumulated time so far (does not contain current slice if running_)
double real_time_used_; double real_time_used_;
double cpu_time_used_; double cpu_time_used_;
// Manually set iteration time. User sets this with SetIterationTime(seconds).
double manual_time_used_;
// How many threads have called Finalize() // How many threads have called Finalize()
int num_finalized_; int num_finalized_;
@ -263,6 +288,7 @@ struct Benchmark::Instance {
int arg2; int arg2;
TimeUnit time_unit; TimeUnit time_unit;
bool use_real_time; bool use_real_time;
bool use_manual_time;
double min_time; double min_time;
int threads; // Number of concurrent threads to use int threads; // Number of concurrent threads to use
bool multithreaded; // Is benchmark multi-threaded? bool multithreaded; // Is benchmark multi-threaded?
@ -302,6 +328,7 @@ public:
void RangePair(int lo1, int hi1, int lo2, int hi2); void RangePair(int lo1, int hi1, int lo2, int hi2);
void MinTime(double n); void MinTime(double n);
void UseRealTime(); void UseRealTime();
void UseManualTime();
void Threads(int t); void Threads(int t);
void ThreadRange(int min_threads, int max_threads); void ThreadRange(int min_threads, int max_threads);
void ThreadPerCpu(); void ThreadPerCpu();
@ -318,6 +345,7 @@ private:
TimeUnit time_unit_; TimeUnit time_unit_;
double min_time_; double min_time_;
bool use_real_time_; bool use_real_time_;
bool use_manual_time_;
std::vector<int> thread_counts_; std::vector<int> thread_counts_;
BenchmarkImp& operator=(BenchmarkImp const&); BenchmarkImp& operator=(BenchmarkImp const&);
@ -378,6 +406,7 @@ bool BenchmarkFamilies::FindBenchmarks(
instance.time_unit = family->time_unit_; instance.time_unit = family->time_unit_;
instance.min_time = family->min_time_; instance.min_time = family->min_time_;
instance.use_real_time = family->use_real_time_; instance.use_real_time = family->use_real_time_;
instance.use_manual_time = family->use_manual_time_;
instance.threads = num_threads; instance.threads = num_threads;
instance.multithreaded = !(family->thread_counts_.empty()); instance.multithreaded = !(family->thread_counts_.empty());
@ -391,7 +420,9 @@ bool BenchmarkFamilies::FindBenchmarks(
if (!IsZero(family->min_time_)) { if (!IsZero(family->min_time_)) {
instance.name += StringPrintF("/min_time:%0.3f", family->min_time_); instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
} }
if (family->use_real_time_) { if (family->use_manual_time_) {
instance.name += "/manual_time";
} else if (family->use_real_time_) {
instance.name += "/real_time"; instance.name += "/real_time";
} }
@ -411,7 +442,8 @@ bool BenchmarkFamilies::FindBenchmarks(
BenchmarkImp::BenchmarkImp(const char* name) BenchmarkImp::BenchmarkImp(const char* name)
: name_(name), arg_count_(-1), time_unit_(kNanosecond), : name_(name), arg_count_(-1), time_unit_(kNanosecond),
min_time_(0.0), use_real_time_(false) { min_time_(0.0), use_real_time_(false),
use_manual_time_(false) {
} }
BenchmarkImp::~BenchmarkImp() { BenchmarkImp::~BenchmarkImp() {
@ -474,9 +506,15 @@ void BenchmarkImp::MinTime(double t) {
} }
void BenchmarkImp::UseRealTime() { void BenchmarkImp::UseRealTime() {
CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
use_real_time_ = true; use_real_time_ = true;
} }
void BenchmarkImp::UseManualTime() {
CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
use_manual_time_ = true;
}
void BenchmarkImp::Threads(int t) { void BenchmarkImp::Threads(int t) {
CHECK_GT(t, 0); CHECK_GT(t, 0);
thread_counts_.push_back(t); thread_counts_.push_back(t);
@ -579,6 +617,11 @@ Benchmark* Benchmark::UseRealTime() {
return this; return this;
} }
Benchmark* Benchmark::UseManualTime() {
imp_->UseManualTime();
return this;
}
Benchmark* Benchmark::Threads(int t) { Benchmark* Benchmark::Threads(int t) {
imp_->Threads(t); imp_->Threads(t);
return this; return this;
@ -671,6 +714,7 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
const double cpu_accumulated_time = timer_manager->cpu_time_used(); const double cpu_accumulated_time = timer_manager->cpu_time_used();
const double real_accumulated_time = timer_manager->real_time_used(); const double real_accumulated_time = timer_manager->real_time_used();
const double manual_accumulated_time = timer_manager->manual_time_used();
timer_manager.reset(); timer_manager.reset();
VLOG(2) << "Ran in " << cpu_accumulated_time << "/" VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
@ -678,7 +722,9 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
// Base decisions off of real time if requested by this benchmark. // Base decisions off of real time if requested by this benchmark.
double seconds = cpu_accumulated_time; double seconds = cpu_accumulated_time;
if (b.use_real_time) { if (b.use_manual_time) {
seconds = manual_accumulated_time;
} else if (b.use_real_time) {
seconds = real_accumulated_time; seconds = real_accumulated_time;
} }
@ -713,7 +759,11 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
// Report the total iterations across all threads. // Report the total iterations across all threads.
report.iterations = static_cast<int64_t>(iters) * b.threads; report.iterations = static_cast<int64_t>(iters) * b.threads;
report.time_unit = b.time_unit; report.time_unit = b.time_unit;
report.real_accumulated_time = real_accumulated_time; if (b.use_manual_time) {
report.real_accumulated_time = manual_accumulated_time;
} else {
report.real_accumulated_time = real_accumulated_time;
}
report.cpu_accumulated_time = cpu_accumulated_time; report.cpu_accumulated_time = cpu_accumulated_time;
report.bytes_per_second = bytes_per_second; report.bytes_per_second = bytes_per_second;
report.items_per_second = items_per_second; report.items_per_second = items_per_second;
@ -774,6 +824,12 @@ void State::ResumeTiming() {
timer_manager->StartTimer(); timer_manager->StartTimer();
} }
void State::SetIterationTime(double seconds)
{
CHECK(running_benchmark);
timer_manager->SetIterationTime(seconds);
}
void State::SetLabel(const char* label) { void State::SetLabel(const char* label) {
CHECK(running_benchmark); CHECK(running_benchmark);
MutexLock l(GetBenchmarkLock()); MutexLock l(GetBenchmarkLock());

View File

@ -14,6 +14,8 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include <chrono>
#include <thread>
#if defined(__GNUC__) #if defined(__GNUC__)
# define BENCHMARK_NOINLINE __attribute__((noinline)) # define BENCHMARK_NOINLINE __attribute__((noinline))
@ -174,5 +176,30 @@ static void BM_ParallelMemset(benchmark::State& state) {
} }
BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
static void BM_ManualTiming(benchmark::State& state) {
size_t slept_for = 0;
int microseconds = state.range_x();
std::chrono::duration<double, std::micro> sleep_duration {
static_cast<double>(microseconds)
};
while (state.KeepRunning()) {
auto start = std::chrono::high_resolution_clock::now();
// Simulate some useful workload with a sleep
std::this_thread::sleep_for(sleep_duration);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed =
std::chrono::duration_cast<std::chrono::duration<double>>(
end - start);
state.SetIterationTime(elapsed.count());
slept_for += microseconds;
}
state.SetItemsProcessed(slept_for);
}
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
BENCHMARK_MAIN() BENCHMARK_MAIN()