mirror of
https://github.com/google/benchmark.git
synced 2024-12-26 12:30:14 +08:00
Manual timing support.
This commit is contained in:
parent
dce2ebb403
commit
e253a28402
1
AUTHORS
1
AUTHORS
@ -17,6 +17,7 @@ Evgeny Safronov <division494@gmail.com>
|
||||
Felix Homann <linuxaudio@showlabor.de>
|
||||
Google Inc.
|
||||
JianXiong Zhou <zhoujianxiong2@gmail.com>
|
||||
Jussi Knuuttila <jussi.knuuttila@gmail.com>
|
||||
Kaito Udagawa <umireon@gmail.com>
|
||||
Lei Xu <eddyxu@gmail.com>
|
||||
Matt Clarkson <mattyclarkson@gmail.com>
|
||||
|
@ -31,6 +31,7 @@ Eugene Zhuk <eugene.zhuk@gmail.com>
|
||||
Evgeny Safronov <division494@gmail.com>
|
||||
Felix Homann <linuxaudio@showlabor.de>
|
||||
JianXiong Zhou <zhoujianxiong2@gmail.com>
|
||||
Jussi Knuuttila <jussi.knuuttila@gmail.com>
|
||||
Kaito Udagawa <umireon@gmail.com>
|
||||
Kai Wolf <kai.wolf@gmail.com>
|
||||
Lei Xu <eddyxu@gmail.com>
|
||||
|
39
README.md
39
README.md
@ -175,6 +175,45 @@ BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime();
|
||||
|
||||
Without `UseRealTime`, CPU time is used by default.
|
||||
|
||||
|
||||
## Manual timing
|
||||
For benchmarking something for which neither CPU time nor real-time are
|
||||
correct or accurate enough, completely manual timing is supported using
|
||||
the `UseManualTime` function.
|
||||
|
||||
When `UseManualTime` is used, the benchmarked code must call
|
||||
`SetIterationTime` once per iteration of the `KeepRunning` loop to
|
||||
report the manually measured time.
|
||||
|
||||
An example use case for this is benchmarking GPU execution (e.g. OpenCL
|
||||
or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot
|
||||
be accurately measured using CPU time or real-time. Instead, they can be
|
||||
measured accurately using a dedicated API, and these measurement results
|
||||
can be reported back with `SetIterationTime`.
|
||||
|
||||
```c++
|
||||
static void BM_ManualTiming(benchmark::State& state) {
|
||||
int microseconds = state.range_x();
|
||||
std::chrono::duration<double, std::micro> sleep_duration {
|
||||
static_cast<double>(microseconds)
|
||||
};
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
// Simulate some useful workload with a sleep
|
||||
std::this_thread::sleep_for(sleep_duration);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto elapsed_seconds =
|
||||
std::chrono::duration_cast<std::chrono::duration<double>>(
|
||||
end - start);
|
||||
|
||||
state.SetIterationTime(elapsed_seconds.count());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime();
|
||||
```
|
||||
|
||||
### Preventing optimisation
|
||||
To prevent a value or expression from being optimized away by the compiler
|
||||
the `benchmark::DoNotOptimize(...)` function can be used.
|
||||
|
@ -283,6 +283,17 @@ public:
|
||||
// within each benchmark iteration, if possible.
|
||||
void ResumeTiming();
|
||||
|
||||
// REQUIRES: called exactly once per iteration of the KeepRunning loop.
|
||||
// Set the manually measured time for this benchmark iteration, which
|
||||
// is used instead of automatically measured time if UseManualTime() was
|
||||
// specified.
|
||||
//
|
||||
// For threaded benchmarks the SetIterationTime() function acts
|
||||
// like a barrier. I.e., the ith call by a particular thread to this
|
||||
// function will block until all threads have made their ith call.
|
||||
// The time will be set by the last thread to call this function.
|
||||
void SetIterationTime(double seconds);
|
||||
|
||||
// Set the number of bytes processed by the current benchmark
|
||||
// execution. This routine is typically called once at the end of a
|
||||
// throughput oriented benchmark. If this routine is called with a
|
||||
@ -444,6 +455,13 @@ public:
|
||||
// called, the cpu time used by the benchmark will be used.
|
||||
Benchmark* UseRealTime();
|
||||
|
||||
// If a benchmark must measure time manually (e.g. if GPU execution time is being
|
||||
// measured), call this method. If called, each benchmark iteration should call
|
||||
// SetIterationTime(seconds) to report the measured time, which will be used
|
||||
// to control how many iterations are run, and in the printing of items/second
|
||||
// or MB/second values.
|
||||
Benchmark* UseManualTime();
|
||||
|
||||
// Support for running multiple copies of the same benchmark concurrently
|
||||
// in multiple threads. This may be useful when measuring the scaling
|
||||
// of some piece of code.
|
||||
|
@ -130,6 +130,7 @@ class TimerManager {
|
||||
running_(false),
|
||||
real_time_used_(0),
|
||||
cpu_time_used_(0),
|
||||
manual_time_used_(0),
|
||||
num_finalized_(0),
|
||||
phase_number_(0),
|
||||
entered_(0) {
|
||||
@ -169,6 +170,21 @@ class TimerManager {
|
||||
}
|
||||
}
|
||||
|
||||
// Called by each thread
|
||||
void SetIterationTime(double seconds) EXCLUDES(lock_) {
|
||||
bool last_thread = false;
|
||||
{
|
||||
MutexLock ml(lock_);
|
||||
last_thread = Barrier(ml);
|
||||
if (last_thread) {
|
||||
manual_time_used_ += seconds;
|
||||
}
|
||||
}
|
||||
if (last_thread) {
|
||||
phase_condition_.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
// Called by each thread
|
||||
void Finalize() EXCLUDES(lock_) {
|
||||
MutexLock l(lock_);
|
||||
@ -194,6 +210,13 @@ class TimerManager {
|
||||
return cpu_time_used_;
|
||||
}
|
||||
|
||||
// REQUIRES: timer is not running
|
||||
double manual_time_used() EXCLUDES(lock_) {
|
||||
MutexLock l(lock_);
|
||||
CHECK(!running_);
|
||||
return manual_time_used_;
|
||||
}
|
||||
|
||||
private:
|
||||
Mutex lock_;
|
||||
Condition phase_condition_;
|
||||
@ -207,6 +230,8 @@ class TimerManager {
|
||||
// Accumulated time so far (does not contain current slice if running_)
|
||||
double real_time_used_;
|
||||
double cpu_time_used_;
|
||||
// Manually set iteration time. User sets this with SetIterationTime(seconds).
|
||||
double manual_time_used_;
|
||||
|
||||
// How many threads have called Finalize()
|
||||
int num_finalized_;
|
||||
@ -263,6 +288,7 @@ struct Benchmark::Instance {
|
||||
int arg2;
|
||||
TimeUnit time_unit;
|
||||
bool use_real_time;
|
||||
bool use_manual_time;
|
||||
double min_time;
|
||||
int threads; // Number of concurrent threads to use
|
||||
bool multithreaded; // Is benchmark multi-threaded?
|
||||
@ -302,6 +328,7 @@ public:
|
||||
void RangePair(int lo1, int hi1, int lo2, int hi2);
|
||||
void MinTime(double n);
|
||||
void UseRealTime();
|
||||
void UseManualTime();
|
||||
void Threads(int t);
|
||||
void ThreadRange(int min_threads, int max_threads);
|
||||
void ThreadPerCpu();
|
||||
@ -318,6 +345,7 @@ private:
|
||||
TimeUnit time_unit_;
|
||||
double min_time_;
|
||||
bool use_real_time_;
|
||||
bool use_manual_time_;
|
||||
std::vector<int> thread_counts_;
|
||||
|
||||
BenchmarkImp& operator=(BenchmarkImp const&);
|
||||
@ -378,6 +406,7 @@ bool BenchmarkFamilies::FindBenchmarks(
|
||||
instance.time_unit = family->time_unit_;
|
||||
instance.min_time = family->min_time_;
|
||||
instance.use_real_time = family->use_real_time_;
|
||||
instance.use_manual_time = family->use_manual_time_;
|
||||
instance.threads = num_threads;
|
||||
instance.multithreaded = !(family->thread_counts_.empty());
|
||||
|
||||
@ -391,7 +420,9 @@ bool BenchmarkFamilies::FindBenchmarks(
|
||||
if (!IsZero(family->min_time_)) {
|
||||
instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
|
||||
}
|
||||
if (family->use_real_time_) {
|
||||
if (family->use_manual_time_) {
|
||||
instance.name += "/manual_time";
|
||||
} else if (family->use_real_time_) {
|
||||
instance.name += "/real_time";
|
||||
}
|
||||
|
||||
@ -411,7 +442,8 @@ bool BenchmarkFamilies::FindBenchmarks(
|
||||
|
||||
BenchmarkImp::BenchmarkImp(const char* name)
|
||||
: name_(name), arg_count_(-1), time_unit_(kNanosecond),
|
||||
min_time_(0.0), use_real_time_(false) {
|
||||
min_time_(0.0), use_real_time_(false),
|
||||
use_manual_time_(false) {
|
||||
}
|
||||
|
||||
BenchmarkImp::~BenchmarkImp() {
|
||||
@ -474,9 +506,15 @@ void BenchmarkImp::MinTime(double t) {
|
||||
}
|
||||
|
||||
void BenchmarkImp::UseRealTime() {
|
||||
CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
|
||||
use_real_time_ = true;
|
||||
}
|
||||
|
||||
void BenchmarkImp::UseManualTime() {
|
||||
CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
|
||||
use_manual_time_ = true;
|
||||
}
|
||||
|
||||
void BenchmarkImp::Threads(int t) {
|
||||
CHECK_GT(t, 0);
|
||||
thread_counts_.push_back(t);
|
||||
@ -579,6 +617,11 @@ Benchmark* Benchmark::UseRealTime() {
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::UseManualTime() {
|
||||
imp_->UseManualTime();
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Threads(int t) {
|
||||
imp_->Threads(t);
|
||||
return this;
|
||||
@ -671,6 +714,7 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
||||
|
||||
const double cpu_accumulated_time = timer_manager->cpu_time_used();
|
||||
const double real_accumulated_time = timer_manager->real_time_used();
|
||||
const double manual_accumulated_time = timer_manager->manual_time_used();
|
||||
timer_manager.reset();
|
||||
|
||||
VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
|
||||
@ -678,7 +722,9 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
||||
|
||||
// Base decisions off of real time if requested by this benchmark.
|
||||
double seconds = cpu_accumulated_time;
|
||||
if (b.use_real_time) {
|
||||
if (b.use_manual_time) {
|
||||
seconds = manual_accumulated_time;
|
||||
} else if (b.use_real_time) {
|
||||
seconds = real_accumulated_time;
|
||||
}
|
||||
|
||||
@ -713,7 +759,11 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
||||
// Report the total iterations across all threads.
|
||||
report.iterations = static_cast<int64_t>(iters) * b.threads;
|
||||
report.time_unit = b.time_unit;
|
||||
report.real_accumulated_time = real_accumulated_time;
|
||||
if (b.use_manual_time) {
|
||||
report.real_accumulated_time = manual_accumulated_time;
|
||||
} else {
|
||||
report.real_accumulated_time = real_accumulated_time;
|
||||
}
|
||||
report.cpu_accumulated_time = cpu_accumulated_time;
|
||||
report.bytes_per_second = bytes_per_second;
|
||||
report.items_per_second = items_per_second;
|
||||
@ -774,6 +824,12 @@ void State::ResumeTiming() {
|
||||
timer_manager->StartTimer();
|
||||
}
|
||||
|
||||
void State::SetIterationTime(double seconds)
|
||||
{
|
||||
CHECK(running_benchmark);
|
||||
timer_manager->SetIterationTime(seconds);
|
||||
}
|
||||
|
||||
void State::SetLabel(const char* label) {
|
||||
CHECK(running_benchmark);
|
||||
MutexLock l(GetBenchmarkLock());
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define BENCHMARK_NOINLINE __attribute__((noinline))
|
||||
@ -174,5 +176,30 @@ static void BM_ParallelMemset(benchmark::State& state) {
|
||||
}
|
||||
BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
|
||||
|
||||
static void BM_ManualTiming(benchmark::State& state) {
|
||||
size_t slept_for = 0;
|
||||
int microseconds = state.range_x();
|
||||
std::chrono::duration<double, std::micro> sleep_duration {
|
||||
static_cast<double>(microseconds)
|
||||
};
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
// Simulate some useful workload with a sleep
|
||||
std::this_thread::sleep_for(sleep_duration);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto elapsed =
|
||||
std::chrono::duration_cast<std::chrono::duration<double>>(
|
||||
end - start);
|
||||
|
||||
state.SetIterationTime(elapsed.count());
|
||||
slept_for += microseconds;
|
||||
}
|
||||
state.SetItemsProcessed(slept_for);
|
||||
}
|
||||
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
|
||||
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
|
||||
|
||||
BENCHMARK_MAIN()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user