Add number of threads to State.

Having access to the thread count from within a benchmark is useful,
for when one wants to distribute a workload dynamically among the
benchmarks running in parallel e.g when using ThreadRange() or
ThreadPerCpu().
This commit is contained in:
Jakob Buchgraber 2016-02-15 14:19:43 +01:00
parent bdb9f697d7
commit 007efee751
3 changed files with 32 additions and 3 deletions

View File

@ -221,7 +221,7 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
// benchmark to use. // benchmark to use.
class State { class State {
public: public:
State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i); State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i, int n_threads);
// Returns true iff the benchmark should continue through another iteration. // Returns true iff the benchmark should continue through another iteration.
// NOTE: A benchmark may not return from the test until KeepRunning() has // NOTE: A benchmark may not return from the test until KeepRunning() has
@ -358,7 +358,10 @@ private:
size_t items_processed_; size_t items_processed_;
public: public:
// Index of the executing thread. Values from [0, threads).
const int thread_index; const int thread_index;
// Number of threads concurrently executing the benchmark.
const int threads;
const size_t max_iterations; const size_t max_iterations;
private: private:

View File

@ -599,7 +599,7 @@ namespace {
void RunInThread(const benchmark::internal::Benchmark::Instance* b, void RunInThread(const benchmark::internal::Benchmark::Instance* b,
size_t iters, int thread_id, size_t iters, int thread_id,
ThreadStats* total) EXCLUDES(GetBenchmarkLock()) { ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id); State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id, b->threads);
b->benchmark->Run(st); b->benchmark->Run(st);
CHECK(st.iterations() == st.max_iterations) << CHECK(st.iterations() == st.max_iterations) <<
"Benchmark returned before State::KeepRunning() returned false!"; "Benchmark returned before State::KeepRunning() returned false!";
@ -736,15 +736,17 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b,
} // namespace } // namespace
State::State(size_t max_iters, bool has_x, int x, bool has_y, int y, State::State(size_t max_iters, bool has_x, int x, bool has_y, int y,
int thread_i) int thread_i, int n_threads)
: started_(false), total_iterations_(0), : started_(false), total_iterations_(0),
has_range_x_(has_x), range_x_(x), has_range_x_(has_x), range_x_(x),
has_range_y_(has_y), range_y_(y), has_range_y_(has_y), range_y_(y),
bytes_processed_(0), items_processed_(0), bytes_processed_(0), items_processed_(0),
thread_index(thread_i), thread_index(thread_i),
threads(n_threads),
max_iterations(max_iters) max_iterations(max_iters)
{ {
CHECK(max_iterations != 0) << "At least one iteration must be run"; CHECK(max_iterations != 0) << "At least one iteration must be run";
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
} }
void State::PauseTiming() { void State::PauseTiming() {

View File

@ -150,5 +150,29 @@ static void BM_LongTest(benchmark::State& state) {
} }
BENCHMARK(BM_LongTest)->Range(1<<16,1<<28); BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
static void BM_ParallelMemset(benchmark::State& state) {
int size = state.range_x() / sizeof(int);
int thread_size = size / state.threads;
int from = thread_size * state.thread_index;
int to = from + thread_size;
if (state.thread_index == 0) {
test_vector = new std::vector<int>(size);
}
while (state.KeepRunning()) {
for (int i = from; i < to; i++) {
// No need to lock test_vector_mu as ranges
// do not overlap between threads.
benchmark::DoNotOptimize(test_vector->at(i) = 1);
}
}
if (state.thread_index == 0) {
delete test_vector;
}
}
BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
BENCHMARK_MAIN() BENCHMARK_MAIN()