1
0
mirror of https://github.com/google/benchmark.git synced 2025-04-12 20:41:14 +08:00

finish merge

This commit is contained in:
Eric Fiselier 2015-02-18 16:36:59 -05:00
parent 4c786b1671
commit ed5145afea
6 changed files with 201 additions and 219 deletions

View File

@ -35,7 +35,7 @@ namespace benchmark {
class Benchmark {
public:
// The Benchmark takes ownership of the Callback pointed to by f.
Benchmark(const std::string& name, const Function& f);
Benchmark(const std::string& name, Function* f);
~Benchmark();
@ -97,23 +97,6 @@ class Benchmark {
// Equivalent to ThreadRange(NumCPUs(), NumCPUs())
Benchmark* ThreadPerCpu();
// Have "setup" and/or "teardown" invoked once for every benchmark run.
// If the benchmark is multi-threaded (will run in k threads concurrently),
// the setup callback will be be invoked exactly once (not k times) before
// each run with k threads. Time allowing (e.g. for a short benchmark), there
// may be multiple such runs per benchmark, each run with its own
// "setup"/"teardown".
//
// If the benchmark uses different size groups of threads (e.g. via
// ThreadRange), the above will be true for each size group.
//
// The callback will be passed the number of threads for this benchmark run.
//
// The callback must not be self-deleting. The Benchmark
// object takes ownership of the callback object.
Benchmark* Setup(const Function& setup);
Benchmark* Teardown(const Function& teardown);
// TODO(sanjay): Control whether or not real-time is used for this benchmark
// TODO(sanjay): Control the default number of iterations
@ -130,10 +113,9 @@ class Benchmark {
private:
std::string name_;
Function function_;
Function setup_;
Function teardown_;
Function* function_;
int registration_index_;
int arg_count_;
std::vector< std::pair<int, int> > args_; // Args for all benchmark runs
std::vector<int> thread_counts_;

View File

@ -146,6 +146,7 @@ BENCHMARK(BM_memcpy)->Setup(NewPermanentCallback(MemcpySetup))
#ifndef BENCHMARK_MINIMAL_BENCHMARK_H_
#define BENCHMARK_MINIMAL_BENCHMARK_H_
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include "macros.h"
@ -252,38 +253,115 @@ extern void BenchmarkUseRealTime();
class Benchmark;
// A Function object wraps together a callback of one of several
// possible types and allows it to be invoked without the caller
// having to know which type is being invoked. The callback must be
// repeatable.
class Function {
private:
typedef void(F0Type)(int);
typedef void(F1Type)(int, int);
typedef void(F2Type)(int, int, int);
F0Type *f0_;
F1Type *f1_;
F2Type *f2_;
public:
Function() : f0_(NULL), f1_(NULL), f2_(NULL) {}
Function(F0Type* f)
: f0_(f), f1_(NULL), f2_(NULL) {
// State is passed to a running Benchmark and contains state for the
// benchmark to use.
class State {
public:
State(int max_iters, bool has_x, int x, bool has_y, int y, int thread_i)
: started_(false), total_iterations_(0), max_iterations_(max_iters),
has_range_x_(has_x), range_x_(x),
has_range_y_(has_y), range_y_(y),
thread_index(thread_i)
{}
// Returns true iff the benchmark should continue through another iteration.
bool KeepRunning() {
if (__builtin_expect(!started_, false)) {
StartBenchmarkTiming();
started_ = true;
}
Function(F1Type* f)
: f0_(NULL), f1_(f), f2_(NULL) {
bool const res = total_iterations_++ < max_iterations_;
if (__builtin_expect(!res, false)) {
assert(started_);
StopBenchmarkTiming();
}
Function(F2Type* f)
: f0_(NULL), f1_(NULL), f2_(f) {
}
void Run(int iters, int arg1, int arg2) const;
int args() const;
return res;
}
void PauseTiming() {
StopBenchmarkTiming();
}
void ResumeTiming() {
StartBenchmarkTiming();
}
// Set the number of bytes processed by the current benchmark
// execution. This routine is typically called once at the end of a
// throughput oriented benchmark. If this routine is called with a
// value > 0, the report is printed in MB/sec instead of nanoseconds
// per iteration.
//
// REQUIRES: a benchmark has exited its KeepRunning loop.
void SetBytesProcessed(int64_t bytes) {
SetBenchmarkBytesProcessed(bytes);
}
// If this routine is called with items > 0, then an items/s
// label is printed on the benchmark report line for the currently
// executing benchmark. It is typically called at the end of a processing
// benchmark where a processing items/second output is desired.
//
// REQUIRES: a benchmark has exited its KeepRunning loop.
void SetItemsProcessed(int64_t items) {
SetBenchmarkItemsProcessed(items);
}
// If this routine is called, the specified label is printed at the
// end of the benchmark report line for the currently executing
// benchmark. Example:
// static void BM_Compress(int iters) {
// ...
// double compress = input_size / output_size;
// benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
// }
// Produces output that looks like:
// BM_Compress 50 50 14115038 compress:27.3%
//
// REQUIRES: a benchmark has exited its KeepRunning loop.
void SetLabel(const char* label) {
SetBenchmarkLabel(label);
}
// Range arguments for this run. CHECKs if the argument has been set.
int range_x() const {
assert(has_range_x_);
return range_x_;
}
int range_y() const {
assert(has_range_y_);
return range_y_;
}
int iterations() const { return total_iterations_; }
int max_iterations() const { return max_iterations_; }
private:
bool started_;
unsigned total_iterations_, max_iterations_;
bool has_range_x_;
int range_x_;
bool has_range_y_;
int range_y_;
public:
const int thread_index;
private:
DISALLOW_COPY_AND_ASSIGN(State)
};
typedef void(Function)(State&);
class MinimalBenchmark
{
public:
MinimalBenchmark(const char* name, const Function& f);
MinimalBenchmark(const char* name, Function* ptr);
~MinimalBenchmark();
@ -345,23 +423,6 @@ public:
// Equivalent to ThreadRange(NumCPUs(), NumCPUs())
MinimalBenchmark& ThreadPerCpu();
// Have "setup" and/or "teardown" invoked once for every benchmark run.
// If the benchmark is multi-threaded (will run in k threads concurrently),
// the setup callback will be be invoked exactly once (not k times) before
// each run with k threads. Time allowing (e.g. for a short benchmark), there
// may be multiple such runs per benchmark, each run with its own
// "setup"/"teardown".
//
// If the benchmark uses different size groups of threads (e.g. via
// ThreadRange), the above will be true for each size group.
//
// The callback will be passed the number of threads for this benchmark run.
//
// The callback must not be self-deleting. The Benchmark
// object takes ownership of the callback object.
MinimalBenchmark& Setup(const Function& setup);
MinimalBenchmark& Teardown(const Function& teardown);
MinimalBenchmark* operator->() {
return this;
}

View File

@ -261,56 +261,30 @@ const int Benchmark::kNumCpuMarker;
// Information kept per benchmark we may want to run
struct Benchmark::Instance {
std::string name;
Function function;
Function setup;
Function teardown;
Function* function;
bool has_arg1;
int arg1;
bool has_arg2;
int arg2;
int threads; // Number of concurrent threads to use
bool multithreaded; // Is benchmark multi-threaded?
void Run(int iters) const {
function.Run(iters, arg1, arg2);
void Run(int iters, int thread_id) const {
State st(iters, has_arg1, arg1, has_arg2, arg2, thread_id);
function(st);
}
};
void Function::Run(int iters, int arg1, int arg2) const {
if (f0_ != NULL) {
(*f0_)(iters);
} else if (f1_ != NULL) {
(*f1_)(iters, arg1);
} else if (f2_ != NULL) {
(*f2_)(iters, arg1, arg2);
} else {
// NULL function; do nothing.
}
}
int Function::args() const {
if (f0_ != NULL) {
return 0;
} else if (f1_ != NULL) {
return 1;
} else if (f2_ != NULL) {
return 2;
} else {
return -1;
}
}
Benchmark::Benchmark(const std::string& name,
const Function& f) EXCLUDES(GetBenchmarkLock())
: name_(name), function_(f) {
Function* f) EXCLUDES(GetBenchmarkLock())
: name_(name), function_(f), arg_count_(-1) {
MutexLock l(GetBenchmarkLock());
if (families == NULL) {
families = new std::vector<Benchmark*>;
}
registration_index_ = families->size();
families->push_back(this);
if (f.args() == 0) {
// Run it exactly once regardless of Arg/Range calls.
args_.emplace_back(-1, -1);
}
}
Benchmark::~Benchmark() EXCLUDES(GetBenchmarkLock()) {
@ -324,13 +298,18 @@ Benchmark::~Benchmark() EXCLUDES(GetBenchmarkLock()) {
}
Benchmark* Benchmark::Arg(int x) {
CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
// TODO(remove)
//CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
CHECK(arg_count_ == -1 || arg_count_ == 1);
arg_count_ = 1;
args_.emplace_back(x, -1);
return this;
}
Benchmark* Benchmark::Range(int start, int limit) {
CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
//CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
CHECK(arg_count_ == -1 || arg_count_ == 1);
arg_count_ = 1;
std::vector<int> arglist;
AddRange(&arglist, start, limit, kRangeMultiplier);
@ -341,7 +320,9 @@ Benchmark* Benchmark::Range(int start, int limit) {
}
Benchmark* Benchmark::DenseRange(int start, int limit) {
CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
//CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
CHECK(arg_count_ == -1 || arg_count_ == 1);
arg_count_ = 1;
CHECK_GE(start, 0);
CHECK_LE(start, limit);
for (int arg = start; arg <= limit; arg++) {
@ -351,13 +332,17 @@ Benchmark* Benchmark::DenseRange(int start, int limit) {
}
Benchmark* Benchmark::ArgPair(int x, int y) {
CHECK_EQ(function_.args(), 2) << "Wrong number of args for " << name_;
//CHECK_EQ(function_.args(), 2) << "Wrong number of args for " << name_;
CHECK(arg_count_ == -1 || arg_count_ == 2);
arg_count_ = 2;
args_.emplace_back(x, y);
return this;
}
Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) {
CHECK_EQ(function_.args(), 2) << "Wrong number of args for " << name_;
//CHECK_EQ(function_.args(), 2) << "Wrong number of args for " << name_;
CHECK(arg_count_ == -1 || arg_count_ == 2);
arg_count_ = 2;
std::vector<int> arglist1, arglist2;
AddRange(&arglist1, lo1, hi1, kRangeMultiplier);
AddRange(&arglist2, lo2, hi2, kRangeMultiplier);
@ -416,15 +401,6 @@ void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
}
}
Benchmark* Benchmark::Setup(const Function& setup) {
setup_ = setup;
return this;
}
Benchmark* Benchmark::Teardown(const Function& teardown) {
teardown_ = teardown;
return this;
}
// Extract the list of benchmark instances that match the specified
// regular expression.
@ -448,7 +424,10 @@ void Benchmark::FindBenchmarks(
for (Benchmark* family : *families) {
if (family == NULL) continue; // Family was deleted
const int num_args = family->function_.args();
if (family->arg_count_ == -1) {
family->arg_count_ = 0;
family->args_.emplace_back(-1, -1);
}
for (auto const& args : family->args_) {
const std::vector<int>* thread_counts =
(family->thread_counts_.empty()
@ -462,18 +441,18 @@ void Benchmark::FindBenchmarks(
Instance instance;
instance.name = family->name_;
instance.function = family->function_;
instance.has_arg1 = family->arg_count_ >= 1;
instance.arg1 = args.first;
instance.has_arg2 = family->arg_count_ == 2;
instance.arg2 = args.second;
instance.threads = num_threads;
instance.multithreaded = !(family->thread_counts_.empty());
instance.setup = family->setup_;
instance.teardown = family->teardown_;
// Add arguments to instance name
if (num_args >= 1) {
if (family->arg_count_ >= 1) {
AppendHumanReadable(instance.arg1, &instance.name);
}
if (num_args >= 2) {
if (family->arg_count_ >= 2) {
AppendHumanReadable(instance.arg2, &instance.name);
}
@ -536,19 +515,17 @@ static bool CpuScalingEnabled() {
// Execute one thread of benchmark b for the specified number of iterations.
// Adds the stats collected for the thread into *total.
void RunInThread(const benchmark::Benchmark::Instance* b,
int iters,
int iters, int thread_id,
ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
ThreadStats* my_stats = &thread_stats;
ResetThreadStats(my_stats);
timer_manager->StartTimer();
b->Run(iters);
b->Run(iters, thread_id);
timer_manager->Finalize();
{
MutexLock l(GetBenchmarkLock());
AddThreadStats(total, *my_stats);
}
timer_manager->Finalize();
}
void RunBenchmark(const benchmark::Benchmark::Instance& b,
@ -571,7 +548,6 @@ void RunBenchmark(const benchmark::Benchmark::Instance& b,
report_label.clear();
use_real_time = false;
}
b.setup.Run(b.threads, b.arg1, b.arg2);
Notification done;
timer_manager = new TimerManager(b.threads, &done);
@ -586,12 +562,12 @@ void RunBenchmark(const benchmark::Benchmark::Instance& b,
if (thread.joinable())
thread.join();
}
for (std::thread& thread : pool) {
thread = std::thread(&RunInThread, &b, iters, &total);
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[i] = std::thread(&RunInThread, &b, iters, ti, &total);
}
} else {
// Run directly in this thread
RunInThread(&b, iters, &total);
RunInThread(&b, iters, 0, &total);
}
done.WaitForNotification();
running_benchmark = false;
@ -600,7 +576,6 @@ void RunBenchmark(const benchmark::Benchmark::Instance& b,
const double real_accumulated_time = timer_manager->real_time_used();
delete timer_manager;
timer_manager = NULL;
b.teardown.Run(b.threads, b.arg1, b.arg2);
VLOG(1) << "Ran in " << cpu_accumulated_time << "/"
<< real_accumulated_time << "\n";

View File

@ -17,7 +17,7 @@
namespace benchmark {
MinimalBenchmark::MinimalBenchmark(const char* name, const Function& f)
MinimalBenchmark::MinimalBenchmark(const char* name, Function* f)
: imp_(new Benchmark(name, f))
{ }
@ -73,14 +73,4 @@ MinimalBenchmark& MinimalBenchmark::ThreadPerCpu() {
return *this;
}
MinimalBenchmark& MinimalBenchmark::Setup(const Function& setup) {
imp_->Setup(setup);
return *this;
}
MinimalBenchmark& MinimalBenchmark::Teardown(const Function& teardown) {
imp_->Teardown(teardown);
return *this;
}
} // end namespace benchmark

View File

@ -4,22 +4,20 @@
using benchmark::StartBenchmarkTiming;
using benchmark::StopBenchmarkTiming;
void BM_empty(int iters) {
while (iters-- > 0) { }
void BM_empty(benchmark::State& state) {
while (state.KeepRunning()) { }
}
BENCHMARK(BM_empty);
BENCHMARK(BM_empty)->ThreadPerCpu();
void BM_empty2(int iters) {
StopBenchmarkTiming();
StartBenchmarkTiming();
while (iters-- > 0) { }
void BM_empty2(benchmark::State& state) {
while (state.KeepRunning()) { }
}
BENCHMARK(BM_empty2)->ThreadPerCpu();
void BM_spin(int iters, int xrange) {
while (iters-- > 0) {
for (int x = 0; x < xrange; ++x) {
void BM_spin(benchmark::State& state) {
while (state.KeepRunning()) {
for (int x = 0; x < state.range_x(); ++x) {
volatile int dummy = x;
((void)dummy);
}
@ -27,15 +25,13 @@ void BM_spin(int iters, int xrange) {
}
BENCHMARK(BM_spin)->Range(8, 8<<10);
void BM_spin_pause_before(int iters, int xrange) {
StopBenchmarkTiming();
for (int i = 0; i < xrange; ++i) {
void BM_spin_pause_before(benchmark::State& state) {
for (int i = 0; i < state.range_y(); ++i) {
volatile int dummy = i;
((void)dummy);
}
StartBenchmarkTiming();
while(iters-- > 0) {
for (int i = 0; i < xrange; ++i) {
while(state.KeepRunning()) {
for (int i = 0; i < state.range_x(); ++i) {
volatile int dummy = i;
((void)dummy);
}
@ -45,15 +41,15 @@ BENCHMARK(BM_spin_pause_before)->Range(8, 8<<10);
BENCHMARK(BM_spin_pause_before)->Range(8, 8<<10)->ThreadPerCpu();
void BM_spin_pause_during(int iters, int xrange) {
while(iters-- > 0) {
void BM_spin_pause_during(benchmark::State& state) {
while(state.KeepRunning()) {
StopBenchmarkTiming();
for (int i = 0; i < xrange; ++i) {
for (int i = 0; i < state.range_x(); ++i) {
volatile int dummy = i;
((void)dummy);
}
StartBenchmarkTiming();
for (int i = 0; i < xrange; ++i) {
for (int i = 0; i < state.range_y(); ++i) {
volatile int dummy = i;
((void)dummy);
}

View File

@ -16,11 +16,6 @@
#include <gtest/gtest.h>
using benchmark::StartBenchmarkTiming;
using benchmark::StopBenchmarkTiming;
using benchmark::SetBenchmarkBytesProcessed;
using benchmark::SetBenchmarkItemsProcessed;
namespace {
#ifdef DEBUG
@ -48,14 +43,13 @@ std::set<int> ConstructRandomSet(int size) {
std::mutex test_vector_mu;
std::vector<int>* test_vector = nullptr;
static bool setup_called = false;
} // end namespace
#ifdef DEBUG
static void BM_Factorial(int iters) {
static void BM_Factorial(benchmark::State& state) {
int fac_42 = 0;
while (iters-- > 0)
while (state.KeepRunning())
fac_42 = Factorial(8);
// Prevent compiler optimizations
EXPECT_NE(fac_42, std::numeric_limits<int>::max());
@ -63,20 +57,20 @@ static void BM_Factorial(int iters) {
BENCHMARK(BM_Factorial);
#endif
static void BM_CalculatePiRange(int iters, int rangex) {
static void BM_CalculatePiRange(benchmark::State& state) {
double pi = 0.0;
while (iters-- > 0)
pi = CalculatePi(rangex);
while (state.KeepRunning())
pi = CalculatePi(state.range_x());
std::stringstream ss;
ss << pi;
//state.SetLabel(ss.str().c_str());
state.SetLabel(ss.str().c_str());
}
BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
static void BM_CalculatePi(int iters) {
static void BM_CalculatePi(benchmark::State& state) {
static const int depth = 1024;
double pi ATTRIBUTE_UNUSED = 0.0;
while (iters-- > 0) {
while (state.KeepRunning()) {
pi = CalculatePi(depth);
}
}
@ -84,68 +78,53 @@ BENCHMARK(BM_CalculatePi)->Threads(8);
BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
static void BM_SetInsert(int iters, int xrange, int yrange) {
const int total_iters = iters;
while (iters-->0) {
StopBenchmarkTiming();
std::set<int> data = ConstructRandomSet(xrange);
StartBenchmarkTiming();
for (int j = 0; j < yrange; ++j)
static void BM_SetInsert(benchmark::State& state) {
while (state.KeepRunning()) {
state.PauseTiming();
std::set<int> data = ConstructRandomSet(state.range_x());
state.ResumeTiming();
for (int j = 0; j < state.range_y(); ++j)
data.insert(rand());
}
SetBenchmarkItemsProcessed(total_iters * yrange);
SetBenchmarkBytesProcessed(total_iters * yrange * sizeof(int));
state.SetItemsProcessed(state.iterations() * state.range_y());
state.SetBytesProcessed(state.iterations() * state.range_y() * sizeof(int));
}
BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
template<typename Q>
static void BM_Sequential(int iters, int xrange) {
const int total_iters = iters;
static void BM_Sequential(benchmark::State& state) {
typename Q::value_type v = 42;
while (iters-->0) {
while (state.KeepRunning()) {
Q q;
for (int i = xrange; --i; )
for (int i = state.range_x(); --i; )
q.push_back(v);
}
const int64_t items_processed =
static_cast<int64_t>(total_iters) * xrange;
SetBenchmarkItemsProcessed(items_processed);
SetBenchmarkBytesProcessed(items_processed * sizeof(v));
static_cast<int64_t>(state.iterations()) * state.range_x();
state.SetItemsProcessed(items_processed);
state.SetBytesProcessed(items_processed * sizeof(v));
}
BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>)->Range(1 << 0, 1 << 10);
BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
static void BM_StringCompare(int iters, int xrange) {
StopBenchmarkTiming();
std::string s1(xrange, '-');
std::string s2(xrange, '-');
static void BM_StringCompare(benchmark::State& state) {
std::string s1(state.range_x(), '-');
std::string s2(state.range_x(), '-');
int r = 0;
StartBenchmarkTiming();
while (iters-->0)
while (state.KeepRunning())
r |= s1.compare(s2);
// Prevent compiler optimizations
assert(r != std::numeric_limits<int>::max());
}
BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
static void BM_SetupTeardown_Setup(int) {
assert(setup_called == false);
setup_called = true;
test_vector = new std::vector<int>();
}
static void BM_SetupTeardown_Teardown(int) {
assert(setup_called);
setup_called = false;
delete test_vector;
}
static void BM_SetupTeardown(int iters) {
static void BM_SetupTeardown(benchmark::State& state) {
if (state.thread_index == 0) {
// No need to lock test_vector_mu here as this is running single-threaded.
test_vector = new std::vector<int>();
}
int i = 0;
while (iters-->0) {
while (state.KeepRunning()) {
std::lock_guard<std::mutex> l(test_vector_mu);
if (i%2 == 0)
test_vector->push_back(i);
@ -153,18 +132,17 @@ static void BM_SetupTeardown(int iters) {
test_vector->pop_back();
++i;
}
}
BENCHMARK(BM_SetupTeardown)->Setup(&BM_SetupTeardown_Setup)
->Teardown(&BM_SetupTeardown_Teardown)
->ThreadPerCpu();
static void BM_LongTest(int iters, int xrange) {
double tracker = 0.0;
while (iters-->0) {
for (int i = 0; i < xrange; ++i)
tracker += i;
if (state.thread_index == 0) {
delete test_vector;
}
}
BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
static void BM_LongTest(benchmark::State& state) {
double tracker = 0.0;
while (state.KeepRunning())
for (int i = 0; i < state.range_x(); ++i)
tracker += i;
assert(tracker != 0.0);
}
BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);