From e428b9eec3291f564073cd4bc98e8c312cbfcafd Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 27 Mar 2015 16:35:46 -0400 Subject: [PATCH] Add 'benchmark::DoNotOptimize(...)' to help users prevent optimizations --- README.md | 13 ++++++++++++ include/benchmark/benchmark_api.h | 19 ++++++++++++++++++ src/benchmark.cc | 2 ++ test/basic_test.cc | 33 +++++++++++-------------------- test/benchmark_test.cc | 14 +++++-------- 5 files changed, 50 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 553e34af..4c0d77ae 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,19 @@ static void BM_MultiThreaded(benchmark::State& state) { } } BENCHMARK(BM_MultiThreaded)->Threads(2); + +To prevent a value or expression from being optimized away by the compiler +the `benchmark::DoNotOptimize(...)` function can be used. + +```c++ +static void BM_test(benchmark::State& state) { + while (state.KeepRunning()) { + int x = 0; + for (int i=0; i < 64; ++i) { + benchmark::DoNotOptimize(x += i); + } + } +} ``` diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index 340ef6f8..ed55c88a 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -174,8 +174,27 @@ struct EnableIfString::type> { typedef int type; }; +void UseCharPointer(char const volatile*); + } // end namespace internal +// The DoNotOptimize(...) function can be used to prevent a value or +// expression from being optimized away by the compiler. This function is +// intented to add little to no overhead. +// See: http://stackoverflow.com/questions/28287064 +#if defined(__GNUC__) +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + asm volatile("" : "+r" (const_cast(value))); +} +#else +template +inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { + internal::UseCharPointer(&reinterpret_cast(value)); +} +#endif + + // State is passed to a running Benchmark and contains state for the // benchmark to use. class State { diff --git a/src/benchmark.cc b/src/benchmark.cc index 5fc149bb..9643a2ab 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -73,6 +73,8 @@ namespace benchmark { namespace internal { +void UseCharPointer(char const volatile*) {} + // NOTE: This is a dummy "mutex" type used to denote the actual mutex // returned by GetBenchmarkLock(). This is only used to placate the thread // safety warnings by giving the return of GetBenchmarkLock() a name. diff --git a/test/basic_test.cc b/test/basic_test.cc index fedcf08b..f7b45be7 100644 --- a/test/basic_test.cc +++ b/test/basic_test.cc @@ -8,8 +8,7 @@ void BM_empty(benchmark::State& state) { while (state.KeepRunning()) { - volatile std::size_t x = state.iterations(); - ((void)x); + benchmark::DoNotOptimize(state.iterations()); } } BENCHMARK(BM_empty); @@ -18,8 +17,7 @@ BENCHMARK(BM_empty)->ThreadPerCpu(); void BM_spin_empty(benchmark::State& state) { while (state.KeepRunning()) { for (int x = 0; x < state.range_x(); ++x) { - volatile int dummy = x; - ((void)dummy); + benchmark::DoNotOptimize(x); } } } @@ -28,13 +26,11 @@ BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu(); void BM_spin_pause_before(benchmark::State& state) { for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } while(state.KeepRunning()) { for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } } } @@ -46,13 +42,11 @@ void BM_spin_pause_during(benchmark::State& state) { while(state.KeepRunning()) { state.PauseTiming(); for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } state.ResumeTiming(); for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } } } @@ -81,13 +75,11 @@ BENCHMARK(BM_pause_during_realtime)->ThreadPerCpu(); void BM_spin_pause_after(benchmark::State& state) { while(state.KeepRunning()) { for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } } for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } } BASIC_BENCHMARK_TEST(BM_spin_pause_after); @@ -96,18 +88,15 @@ BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu(); void BM_spin_pause_before_and_after(benchmark::State& state) { for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } while(state.KeepRunning()) { for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } } for (int i = 0; i < state.range_x(); ++i) { - volatile int dummy = i; - ((void)dummy); + benchmark::DoNotOptimize(i); } } BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after); diff --git a/test/benchmark_test.cc b/test/benchmark_test.cc index f5abe61b..2fb33558 100644 --- a/test/benchmark_test.cc +++ b/test/benchmark_test.cc @@ -84,9 +84,8 @@ BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024); static void BM_CalculatePi(benchmark::State& state) { static const int depth = 1024; - double pi BENCHMARK_UNUSED = 0.0; while (state.KeepRunning()) { - pi = CalculatePi(depth); + benchmark::DoNotOptimize(CalculatePi(depth)); } } BENCHMARK(BM_CalculatePi)->Threads(8); @@ -129,11 +128,8 @@ BENCHMARK_TEMPLATE(BM_Sequential, std::vector, int)->Arg(512); static void BM_StringCompare(benchmark::State& state) { std::string s1(state.range_x(), '-'); std::string s2(state.range_x(), '-'); - int r = 0; while (state.KeepRunning()) - r |= s1.compare(s2); - // Prevent compiler optimizations - assert(r != std::numeric_limits::max()); + benchmark::DoNotOptimize(s1.compare(s2)); } BENCHMARK(BM_StringCompare)->Range(1, 1<<20); @@ -159,10 +155,10 @@ BENCHMARK(BM_SetupTeardown)->ThreadPerCpu(); static void BM_LongTest(benchmark::State& state) { double tracker = 0.0; - while (state.KeepRunning()) + while (state.KeepRunning()) { for (int i = 0; i < state.range_x(); ++i) - tracker += i; - assert(tracker > 1.0); + benchmark::DoNotOptimize(tracker += i); + } } BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);