From 007efee75186d1aa54be7e83e80986e753a2ee44 Mon Sep 17 00:00:00 2001 From: Jakob Buchgraber Date: Mon, 15 Feb 2016 14:19:43 +0100 Subject: [PATCH] Add number of threads to State. Having access to the thread count from within a benchmark is useful, for when one wants to distribute a workload dynamically among the benchmarks running in parallel e.g when using ThreadRange() or ThreadPerCpu(). --- include/benchmark/benchmark_api.h | 5 ++++- src/benchmark.cc | 6 ++++-- test/benchmark_test.cc | 24 ++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index 55235878..10063f63 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -221,7 +221,7 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { // benchmark to use. class State { public: - State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i); + State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i, int n_threads); // Returns true iff the benchmark should continue through another iteration. // NOTE: A benchmark may not return from the test until KeepRunning() has @@ -358,7 +358,10 @@ private: size_t items_processed_; public: + // Index of the executing thread. Values from [0, threads). const int thread_index; + // Number of threads concurrently executing the benchmark. + const int threads; const size_t max_iterations; private: diff --git a/src/benchmark.cc b/src/benchmark.cc index 269b7978..08b180e3 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -599,7 +599,7 @@ namespace { void RunInThread(const benchmark::internal::Benchmark::Instance* b, size_t iters, int thread_id, ThreadStats* total) EXCLUDES(GetBenchmarkLock()) { - State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id); + State st(iters, b->has_arg1, b->arg1, b->has_arg2, b->arg2, thread_id, b->threads); b->benchmark->Run(st); CHECK(st.iterations() == st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; @@ -736,15 +736,17 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, } // namespace State::State(size_t max_iters, bool has_x, int x, bool has_y, int y, - int thread_i) + int thread_i, int n_threads) : started_(false), total_iterations_(0), has_range_x_(has_x), range_x_(x), has_range_y_(has_y), range_y_(y), bytes_processed_(0), items_processed_(0), thread_index(thread_i), + threads(n_threads), max_iterations(max_iters) { CHECK(max_iterations != 0) << "At least one iteration must be run"; + CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; } void State::PauseTiming() { diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc index 2d268ce4..97abb68f 100644 --- a/test/benchmark_test.cc +++ b/test/benchmark_test.cc @@ -150,5 +150,29 @@ static void BM_LongTest(benchmark::State& state) { } BENCHMARK(BM_LongTest)->Range(1<<16,1<<28); +static void BM_ParallelMemset(benchmark::State& state) { + int size = state.range_x() / sizeof(int); + int thread_size = size / state.threads; + int from = thread_size * state.thread_index; + int to = from + thread_size; + + if (state.thread_index == 0) { + test_vector = new std::vector(size); + } + + while (state.KeepRunning()) { + for (int i = from; i < to; i++) { + // No need to lock test_vector_mu as ranges + // do not overlap between threads. + benchmark::DoNotOptimize(test_vector->at(i) = 1); + } + } + + if (state.thread_index == 0) { + delete test_vector; + } +} +BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4); + BENCHMARK_MAIN()