mirror of
https://github.com/google/benchmark.git
synced 2024-12-27 13:00:36 +08:00
Provide a better implementation of DoNotOptimize(...).
This implementation is less likely to ICE compilers, and is more correct. It also acts as a memory barrier which will help prevent writes to global memory from being optimized away.
This commit is contained in:
parent
2149577f89
commit
7e40ff9e35
45
README.md
45
README.md
@ -279,7 +279,8 @@ BENCHMARK(BM_ManualTiming)->Range(1, 1<<17)->UseManualTime();
|
|||||||
|
|
||||||
### Preventing optimisation
|
### Preventing optimisation
|
||||||
To prevent a value or expression from being optimized away by the compiler
|
To prevent a value or expression from being optimized away by the compiler
|
||||||
the `benchmark::DoNotOptimize(...)` function can be used.
|
the `benchmark::DoNotOptimize(...)` and `benchmark::ClobberMemory()`
|
||||||
|
functions can be used.
|
||||||
|
|
||||||
```c++
|
```c++
|
||||||
static void BM_test(benchmark::State& state) {
|
static void BM_test(benchmark::State& state) {
|
||||||
@ -292,6 +293,48 @@ static void BM_test(benchmark::State& state) {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
`DoNotOptimize(<expr>)` forces the *result* of `<expr>` to be stored in either
|
||||||
|
memory or a register. For GNU based compilers it acts as read/write barrier
|
||||||
|
for global memory. More specifically it forces the compiler to flush pending
|
||||||
|
writes to memory and reload any other values as necessary.
|
||||||
|
|
||||||
|
Note that `DoNotOptimize(<expr>)` does not prevent optimizations on `<expr>`
|
||||||
|
in any way. `<expr>` may even be removed entirely when the result is already
|
||||||
|
known. For example:
|
||||||
|
|
||||||
|
```c++
|
||||||
|
/* Example 1: `<expr>` is removed entirely. */
|
||||||
|
int foo(int x) { return x + 42; }
|
||||||
|
while (...) DoNotOptimize(foo(0)); // Optimized to DoNotOptimize(42);
|
||||||
|
|
||||||
|
/* Example 2: Result of '<expr>' is only reused */
|
||||||
|
int bar(int) __attribute__((const));
|
||||||
|
while (...) DoNotOptimize(bar(0)); // Optimized to:
|
||||||
|
// int __result__ = bar(0);
|
||||||
|
// while (...) DoNotOptimize(__result__);
|
||||||
|
```
|
||||||
|
|
||||||
|
The second tool for preventing optimizations is `ClobberMemory()`. In essence
|
||||||
|
`ClobberMemory()` forces the compiler to perform all pending writes to global
|
||||||
|
memory. Memory managed by block scope objects must be "escaped" using
|
||||||
|
`DoNotOptimize(...)` before it can be clobbered. In the below example
|
||||||
|
`ClobberMemory()` prevents the call to `v.push_back(42)` from being optimized
|
||||||
|
away.
|
||||||
|
|
||||||
|
```c++
|
||||||
|
static void BM_vector_push_back(benchmark::State& state) {
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
std::vector<int> v;
|
||||||
|
v.reserve(1);
|
||||||
|
benchmark::DoNotOptimize(v.data()); // Allow v.data() to be clobbered.
|
||||||
|
v.push_back(42);
|
||||||
|
benchmark::ClobberMemory(); // Force 42 to be written to memory.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that `ClobberMemory()` is only available for GNU based compilers.
|
||||||
|
|
||||||
### Set time unit manually
|
### Set time unit manually
|
||||||
If a benchmark runs a few milliseconds it may be hard to visually compare the
|
If a benchmark runs a few milliseconds it may be hard to visually compare the
|
||||||
measured times, since the output data is given in nanoseconds per default. In
|
measured times, since the output data is given in nanoseconds per default. In
|
||||||
|
@ -207,25 +207,24 @@ Benchmark* RegisterBenchmarkInternal(Benchmark*);
|
|||||||
|
|
||||||
// The DoNotOptimize(...) function can be used to prevent a value or
|
// The DoNotOptimize(...) function can be used to prevent a value or
|
||||||
// expression from being optimized away by the compiler. This function is
|
// expression from being optimized away by the compiler. This function is
|
||||||
// intented to add little to no overhead.
|
// intended to add little to no overhead.
|
||||||
// See: http://stackoverflow.com/questions/28287064
|
// See: https://youtu.be/nXaxk27zwlk?t=2441
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
// TODO(ericwf): Clang has a bug where it tries to always use a register
|
|
||||||
// even if value must be stored in memory. This causes codegen to fail.
|
|
||||||
// To work around this we remove the "r" modifier so the operand is always
|
|
||||||
// loaded into memory.
|
|
||||||
// GCC also has a bug where it complains about inconsistent operand constraints
|
|
||||||
// when "+rm" is used for a type larger than can fit in a register or two.
|
|
||||||
// For now force the operand to memory for both GCC and Clang.
|
|
||||||
template <class Tp>
|
template <class Tp>
|
||||||
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
|
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
|
||||||
asm volatile("" : "+m" (const_cast<Tp&>(value)));
|
asm volatile("" : : "g"(value) : "memory");
|
||||||
|
}
|
||||||
|
// Force the compiler to flush pending writes to global memory. Acts as an
|
||||||
|
// effective read/write barrier
|
||||||
|
inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
|
||||||
|
asm volatile("" : : : "memory");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
template <class Tp>
|
template <class Tp>
|
||||||
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
|
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
|
||||||
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
|
internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
|
||||||
}
|
}
|
||||||
|
// FIXME Add ClobberMemory() for non-gnu compilers
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// TimeUnit is passed to a benchmark in order to specify the order of magnitude
|
// TimeUnit is passed to a benchmark in order to specify the order of magnitude
|
||||||
|
Loading…
Reference in New Issue
Block a user