mirror of
https://github.com/google/benchmark.git
synced 2025-04-03 16:10:58 +08:00
Manual timing support.
This commit is contained in:
parent
dce2ebb403
commit
e253a28402
1
AUTHORS
1
AUTHORS
@ -17,6 +17,7 @@ Evgeny Safronov <division494@gmail.com>
|
|||||||
Felix Homann <linuxaudio@showlabor.de>
|
Felix Homann <linuxaudio@showlabor.de>
|
||||||
Google Inc.
|
Google Inc.
|
||||||
JianXiong Zhou <zhoujianxiong2@gmail.com>
|
JianXiong Zhou <zhoujianxiong2@gmail.com>
|
||||||
|
Jussi Knuuttila <jussi.knuuttila@gmail.com>
|
||||||
Kaito Udagawa <umireon@gmail.com>
|
Kaito Udagawa <umireon@gmail.com>
|
||||||
Lei Xu <eddyxu@gmail.com>
|
Lei Xu <eddyxu@gmail.com>
|
||||||
Matt Clarkson <mattyclarkson@gmail.com>
|
Matt Clarkson <mattyclarkson@gmail.com>
|
||||||
|
@ -31,6 +31,7 @@ Eugene Zhuk <eugene.zhuk@gmail.com>
|
|||||||
Evgeny Safronov <division494@gmail.com>
|
Evgeny Safronov <division494@gmail.com>
|
||||||
Felix Homann <linuxaudio@showlabor.de>
|
Felix Homann <linuxaudio@showlabor.de>
|
||||||
JianXiong Zhou <zhoujianxiong2@gmail.com>
|
JianXiong Zhou <zhoujianxiong2@gmail.com>
|
||||||
|
Jussi Knuuttila <jussi.knuuttila@gmail.com>
|
||||||
Kaito Udagawa <umireon@gmail.com>
|
Kaito Udagawa <umireon@gmail.com>
|
||||||
Kai Wolf <kai.wolf@gmail.com>
|
Kai Wolf <kai.wolf@gmail.com>
|
||||||
Lei Xu <eddyxu@gmail.com>
|
Lei Xu <eddyxu@gmail.com>
|
||||||
|
39
README.md
39
README.md
@ -175,6 +175,45 @@ BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime();
|
|||||||
|
|
||||||
Without `UseRealTime`, CPU time is used by default.
|
Without `UseRealTime`, CPU time is used by default.
|
||||||
|
|
||||||
|
|
||||||
|
## Manual timing
|
||||||
|
For benchmarking something for which neither CPU time nor real-time are
|
||||||
|
correct or accurate enough, completely manual timing is supported using
|
||||||
|
the `UseManualTime` function.
|
||||||
|
|
||||||
|
When `UseManualTime` is used, the benchmarked code must call
|
||||||
|
`SetIterationTime` once per iteration of the `KeepRunning` loop to
|
||||||
|
report the manually measured time.
|
||||||
|
|
||||||
|
An example use case for this is benchmarking GPU execution (e.g. OpenCL
|
||||||
|
or CUDA kernels, OpenGL or Vulkan or Direct3D draw calls), which cannot
|
||||||
|
be accurately measured using CPU time or real-time. Instead, they can be
|
||||||
|
measured accurately using a dedicated API, and these measurement results
|
||||||
|
can be reported back with `SetIterationTime`.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
static void BM_ManualTiming(benchmark::State& state) {
|
||||||
|
int microseconds = state.range_x();
|
||||||
|
std::chrono::duration<double, std::micro> sleep_duration {
|
||||||
|
static_cast<double>(microseconds)
|
||||||
|
};
|
||||||
|
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
|
// Simulate some useful workload with a sleep
|
||||||
|
std::this_thread::sleep_for(sleep_duration);
|
||||||
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto elapsed_seconds =
|
||||||
|
std::chrono::duration_cast<std::chrono::duration<double>>(
|
||||||
|
end - start);
|
||||||
|
|
||||||
|
state.SetIterationTime(elapsed_seconds.count());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime();
|
||||||
|
```
|
||||||
|
|
||||||
### Preventing optimisation
|
### Preventing optimisation
|
||||||
To prevent a value or expression from being optimized away by the compiler
|
To prevent a value or expression from being optimized away by the compiler
|
||||||
the `benchmark::DoNotOptimize(...)` function can be used.
|
the `benchmark::DoNotOptimize(...)` function can be used.
|
||||||
|
@ -283,6 +283,17 @@ public:
|
|||||||
// within each benchmark iteration, if possible.
|
// within each benchmark iteration, if possible.
|
||||||
void ResumeTiming();
|
void ResumeTiming();
|
||||||
|
|
||||||
|
// REQUIRES: called exactly once per iteration of the KeepRunning loop.
|
||||||
|
// Set the manually measured time for this benchmark iteration, which
|
||||||
|
// is used instead of automatically measured time if UseManualTime() was
|
||||||
|
// specified.
|
||||||
|
//
|
||||||
|
// For threaded benchmarks the SetIterationTime() function acts
|
||||||
|
// like a barrier. I.e., the ith call by a particular thread to this
|
||||||
|
// function will block until all threads have made their ith call.
|
||||||
|
// The time will be set by the last thread to call this function.
|
||||||
|
void SetIterationTime(double seconds);
|
||||||
|
|
||||||
// Set the number of bytes processed by the current benchmark
|
// Set the number of bytes processed by the current benchmark
|
||||||
// execution. This routine is typically called once at the end of a
|
// execution. This routine is typically called once at the end of a
|
||||||
// throughput oriented benchmark. If this routine is called with a
|
// throughput oriented benchmark. If this routine is called with a
|
||||||
@ -444,6 +455,13 @@ public:
|
|||||||
// called, the cpu time used by the benchmark will be used.
|
// called, the cpu time used by the benchmark will be used.
|
||||||
Benchmark* UseRealTime();
|
Benchmark* UseRealTime();
|
||||||
|
|
||||||
|
// If a benchmark must measure time manually (e.g. if GPU execution time is being
|
||||||
|
// measured), call this method. If called, each benchmark iteration should call
|
||||||
|
// SetIterationTime(seconds) to report the measured time, which will be used
|
||||||
|
// to control how many iterations are run, and in the printing of items/second
|
||||||
|
// or MB/second values.
|
||||||
|
Benchmark* UseManualTime();
|
||||||
|
|
||||||
// Support for running multiple copies of the same benchmark concurrently
|
// Support for running multiple copies of the same benchmark concurrently
|
||||||
// in multiple threads. This may be useful when measuring the scaling
|
// in multiple threads. This may be useful when measuring the scaling
|
||||||
// of some piece of code.
|
// of some piece of code.
|
||||||
|
@ -130,6 +130,7 @@ class TimerManager {
|
|||||||
running_(false),
|
running_(false),
|
||||||
real_time_used_(0),
|
real_time_used_(0),
|
||||||
cpu_time_used_(0),
|
cpu_time_used_(0),
|
||||||
|
manual_time_used_(0),
|
||||||
num_finalized_(0),
|
num_finalized_(0),
|
||||||
phase_number_(0),
|
phase_number_(0),
|
||||||
entered_(0) {
|
entered_(0) {
|
||||||
@ -169,6 +170,21 @@ class TimerManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Called by each thread
|
||||||
|
void SetIterationTime(double seconds) EXCLUDES(lock_) {
|
||||||
|
bool last_thread = false;
|
||||||
|
{
|
||||||
|
MutexLock ml(lock_);
|
||||||
|
last_thread = Barrier(ml);
|
||||||
|
if (last_thread) {
|
||||||
|
manual_time_used_ += seconds;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (last_thread) {
|
||||||
|
phase_condition_.notify_all();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Called by each thread
|
// Called by each thread
|
||||||
void Finalize() EXCLUDES(lock_) {
|
void Finalize() EXCLUDES(lock_) {
|
||||||
MutexLock l(lock_);
|
MutexLock l(lock_);
|
||||||
@ -194,6 +210,13 @@ class TimerManager {
|
|||||||
return cpu_time_used_;
|
return cpu_time_used_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// REQUIRES: timer is not running
|
||||||
|
double manual_time_used() EXCLUDES(lock_) {
|
||||||
|
MutexLock l(lock_);
|
||||||
|
CHECK(!running_);
|
||||||
|
return manual_time_used_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Mutex lock_;
|
Mutex lock_;
|
||||||
Condition phase_condition_;
|
Condition phase_condition_;
|
||||||
@ -207,6 +230,8 @@ class TimerManager {
|
|||||||
// Accumulated time so far (does not contain current slice if running_)
|
// Accumulated time so far (does not contain current slice if running_)
|
||||||
double real_time_used_;
|
double real_time_used_;
|
||||||
double cpu_time_used_;
|
double cpu_time_used_;
|
||||||
|
// Manually set iteration time. User sets this with SetIterationTime(seconds).
|
||||||
|
double manual_time_used_;
|
||||||
|
|
||||||
// How many threads have called Finalize()
|
// How many threads have called Finalize()
|
||||||
int num_finalized_;
|
int num_finalized_;
|
||||||
@ -263,6 +288,7 @@ struct Benchmark::Instance {
|
|||||||
int arg2;
|
int arg2;
|
||||||
TimeUnit time_unit;
|
TimeUnit time_unit;
|
||||||
bool use_real_time;
|
bool use_real_time;
|
||||||
|
bool use_manual_time;
|
||||||
double min_time;
|
double min_time;
|
||||||
int threads; // Number of concurrent threads to use
|
int threads; // Number of concurrent threads to use
|
||||||
bool multithreaded; // Is benchmark multi-threaded?
|
bool multithreaded; // Is benchmark multi-threaded?
|
||||||
@ -302,6 +328,7 @@ public:
|
|||||||
void RangePair(int lo1, int hi1, int lo2, int hi2);
|
void RangePair(int lo1, int hi1, int lo2, int hi2);
|
||||||
void MinTime(double n);
|
void MinTime(double n);
|
||||||
void UseRealTime();
|
void UseRealTime();
|
||||||
|
void UseManualTime();
|
||||||
void Threads(int t);
|
void Threads(int t);
|
||||||
void ThreadRange(int min_threads, int max_threads);
|
void ThreadRange(int min_threads, int max_threads);
|
||||||
void ThreadPerCpu();
|
void ThreadPerCpu();
|
||||||
@ -318,6 +345,7 @@ private:
|
|||||||
TimeUnit time_unit_;
|
TimeUnit time_unit_;
|
||||||
double min_time_;
|
double min_time_;
|
||||||
bool use_real_time_;
|
bool use_real_time_;
|
||||||
|
bool use_manual_time_;
|
||||||
std::vector<int> thread_counts_;
|
std::vector<int> thread_counts_;
|
||||||
|
|
||||||
BenchmarkImp& operator=(BenchmarkImp const&);
|
BenchmarkImp& operator=(BenchmarkImp const&);
|
||||||
@ -378,6 +406,7 @@ bool BenchmarkFamilies::FindBenchmarks(
|
|||||||
instance.time_unit = family->time_unit_;
|
instance.time_unit = family->time_unit_;
|
||||||
instance.min_time = family->min_time_;
|
instance.min_time = family->min_time_;
|
||||||
instance.use_real_time = family->use_real_time_;
|
instance.use_real_time = family->use_real_time_;
|
||||||
|
instance.use_manual_time = family->use_manual_time_;
|
||||||
instance.threads = num_threads;
|
instance.threads = num_threads;
|
||||||
instance.multithreaded = !(family->thread_counts_.empty());
|
instance.multithreaded = !(family->thread_counts_.empty());
|
||||||
|
|
||||||
@ -391,7 +420,9 @@ bool BenchmarkFamilies::FindBenchmarks(
|
|||||||
if (!IsZero(family->min_time_)) {
|
if (!IsZero(family->min_time_)) {
|
||||||
instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
|
instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
|
||||||
}
|
}
|
||||||
if (family->use_real_time_) {
|
if (family->use_manual_time_) {
|
||||||
|
instance.name += "/manual_time";
|
||||||
|
} else if (family->use_real_time_) {
|
||||||
instance.name += "/real_time";
|
instance.name += "/real_time";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -411,7 +442,8 @@ bool BenchmarkFamilies::FindBenchmarks(
|
|||||||
|
|
||||||
BenchmarkImp::BenchmarkImp(const char* name)
|
BenchmarkImp::BenchmarkImp(const char* name)
|
||||||
: name_(name), arg_count_(-1), time_unit_(kNanosecond),
|
: name_(name), arg_count_(-1), time_unit_(kNanosecond),
|
||||||
min_time_(0.0), use_real_time_(false) {
|
min_time_(0.0), use_real_time_(false),
|
||||||
|
use_manual_time_(false) {
|
||||||
}
|
}
|
||||||
|
|
||||||
BenchmarkImp::~BenchmarkImp() {
|
BenchmarkImp::~BenchmarkImp() {
|
||||||
@ -474,9 +506,15 @@ void BenchmarkImp::MinTime(double t) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BenchmarkImp::UseRealTime() {
|
void BenchmarkImp::UseRealTime() {
|
||||||
|
CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
|
||||||
use_real_time_ = true;
|
use_real_time_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BenchmarkImp::UseManualTime() {
|
||||||
|
CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
|
||||||
|
use_manual_time_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
void BenchmarkImp::Threads(int t) {
|
void BenchmarkImp::Threads(int t) {
|
||||||
CHECK_GT(t, 0);
|
CHECK_GT(t, 0);
|
||||||
thread_counts_.push_back(t);
|
thread_counts_.push_back(t);
|
||||||
@ -579,6 +617,11 @@ Benchmark* Benchmark::UseRealTime() {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Benchmark* Benchmark::UseManualTime() {
|
||||||
|
imp_->UseManualTime();
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
Benchmark* Benchmark::Threads(int t) {
|
Benchmark* Benchmark::Threads(int t) {
|
||||||
imp_->Threads(t);
|
imp_->Threads(t);
|
||||||
return this;
|
return this;
|
||||||
@ -671,6 +714,7 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
|||||||
|
|
||||||
const double cpu_accumulated_time = timer_manager->cpu_time_used();
|
const double cpu_accumulated_time = timer_manager->cpu_time_used();
|
||||||
const double real_accumulated_time = timer_manager->real_time_used();
|
const double real_accumulated_time = timer_manager->real_time_used();
|
||||||
|
const double manual_accumulated_time = timer_manager->manual_time_used();
|
||||||
timer_manager.reset();
|
timer_manager.reset();
|
||||||
|
|
||||||
VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
|
VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
|
||||||
@ -678,7 +722,9 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
|||||||
|
|
||||||
// Base decisions off of real time if requested by this benchmark.
|
// Base decisions off of real time if requested by this benchmark.
|
||||||
double seconds = cpu_accumulated_time;
|
double seconds = cpu_accumulated_time;
|
||||||
if (b.use_real_time) {
|
if (b.use_manual_time) {
|
||||||
|
seconds = manual_accumulated_time;
|
||||||
|
} else if (b.use_real_time) {
|
||||||
seconds = real_accumulated_time;
|
seconds = real_accumulated_time;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -713,7 +759,11 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
|
|||||||
// Report the total iterations across all threads.
|
// Report the total iterations across all threads.
|
||||||
report.iterations = static_cast<int64_t>(iters) * b.threads;
|
report.iterations = static_cast<int64_t>(iters) * b.threads;
|
||||||
report.time_unit = b.time_unit;
|
report.time_unit = b.time_unit;
|
||||||
report.real_accumulated_time = real_accumulated_time;
|
if (b.use_manual_time) {
|
||||||
|
report.real_accumulated_time = manual_accumulated_time;
|
||||||
|
} else {
|
||||||
|
report.real_accumulated_time = real_accumulated_time;
|
||||||
|
}
|
||||||
report.cpu_accumulated_time = cpu_accumulated_time;
|
report.cpu_accumulated_time = cpu_accumulated_time;
|
||||||
report.bytes_per_second = bytes_per_second;
|
report.bytes_per_second = bytes_per_second;
|
||||||
report.items_per_second = items_per_second;
|
report.items_per_second = items_per_second;
|
||||||
@ -774,6 +824,12 @@ void State::ResumeTiming() {
|
|||||||
timer_manager->StartTimer();
|
timer_manager->StartTimer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void State::SetIterationTime(double seconds)
|
||||||
|
{
|
||||||
|
CHECK(running_benchmark);
|
||||||
|
timer_manager->SetIterationTime(seconds);
|
||||||
|
}
|
||||||
|
|
||||||
void State::SetLabel(const char* label) {
|
void State::SetLabel(const char* label) {
|
||||||
CHECK(running_benchmark);
|
CHECK(running_benchmark);
|
||||||
MutexLock l(GetBenchmarkLock());
|
MutexLock l(GetBenchmarkLock());
|
||||||
|
@ -14,6 +14,8 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <chrono>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
# define BENCHMARK_NOINLINE __attribute__((noinline))
|
# define BENCHMARK_NOINLINE __attribute__((noinline))
|
||||||
@ -174,5 +176,30 @@ static void BM_ParallelMemset(benchmark::State& state) {
|
|||||||
}
|
}
|
||||||
BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
|
BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
|
||||||
|
|
||||||
|
static void BM_ManualTiming(benchmark::State& state) {
|
||||||
|
size_t slept_for = 0;
|
||||||
|
int microseconds = state.range_x();
|
||||||
|
std::chrono::duration<double, std::micro> sleep_duration {
|
||||||
|
static_cast<double>(microseconds)
|
||||||
|
};
|
||||||
|
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
|
// Simulate some useful workload with a sleep
|
||||||
|
std::this_thread::sleep_for(sleep_duration);
|
||||||
|
auto end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto elapsed =
|
||||||
|
std::chrono::duration_cast<std::chrono::duration<double>>(
|
||||||
|
end - start);
|
||||||
|
|
||||||
|
state.SetIterationTime(elapsed.count());
|
||||||
|
slept_for += microseconds;
|
||||||
|
}
|
||||||
|
state.SetItemsProcessed(slept_for);
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
|
||||||
|
BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();
|
||||||
|
|
||||||
BENCHMARK_MAIN()
|
BENCHMARK_MAIN()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user