mirror of
https://github.com/google/benchmark.git
synced 2025-01-17 07:10:18 +08:00
f92903cc53
This is a shameless rip-off of https://github.com/google/benchmark/pull/646
I did promise to look into why that proposed PR was producing
so much worse assembly, and so i finally did.
The reason is - that diff changes `size_t` (unsigned) to `int64_t` (signed).
There is this nice little `assert`:
7a1c370283/include/benchmark/benchmark.h (L744)
It ensures that we didn't magically decide to advance our iterator
when we should have finished benchmarking.
When `cached_` was unsigned, the `assert` was `cached_ UGT 0`.
But we only ever get to that `assert` if `cached_ NE 0`,
and naturally if `cached_` is not `0`, then it is bigger than `0`,
so the `assert` is tautological, and gets folded away.
But now that `cached_` became signed, the assert became `cached_ SGT 0`.
And we still only know that `cached_ NE 0`, so the assert can't be
optimized out, or at least it doesn't currently.
Regardless of whether or not that is a bug in itself,
that particular diff would have regressed the normal 64-bit systems,
by halving the maximal iteration space (since we go from unsigned counter
to signed one, of the same bit-width), which seems like a bug.
And just so it happens, fixing *this* bug, fixes the other bug.
This produces fully (bit-by-bit) identical state_assembly_test.s
The filecheck change is actually needed regardless of this patch,
else this test does not pass for me even without this diff.
137 lines
3.7 KiB
C++
137 lines
3.7 KiB
C++
|
|
#include "benchmark/benchmark.h"
|
|
|
|
#define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->Arg(8)->Arg(512)->Arg(8192)
|
|
|
|
void BM_empty(benchmark::State& state) {
|
|
for (auto _ : state) {
|
|
benchmark::DoNotOptimize(state.iterations());
|
|
}
|
|
}
|
|
BENCHMARK(BM_empty);
|
|
BENCHMARK(BM_empty)->ThreadPerCpu();
|
|
|
|
void BM_spin_empty(benchmark::State& state) {
|
|
for (auto _ : state) {
|
|
for (int x = 0; x < state.range(0); ++x) {
|
|
benchmark::DoNotOptimize(x);
|
|
}
|
|
}
|
|
}
|
|
BASIC_BENCHMARK_TEST(BM_spin_empty);
|
|
BASIC_BENCHMARK_TEST(BM_spin_empty)->ThreadPerCpu();
|
|
|
|
void BM_spin_pause_before(benchmark::State& state) {
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
for (auto _ : state) {
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
}
|
|
}
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_before);
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_before)->ThreadPerCpu();
|
|
|
|
void BM_spin_pause_during(benchmark::State& state) {
|
|
for (auto _ : state) {
|
|
state.PauseTiming();
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
state.ResumeTiming();
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
}
|
|
}
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_during);
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_during)->ThreadPerCpu();
|
|
|
|
void BM_pause_during(benchmark::State& state) {
|
|
for (auto _ : state) {
|
|
state.PauseTiming();
|
|
state.ResumeTiming();
|
|
}
|
|
}
|
|
BENCHMARK(BM_pause_during);
|
|
BENCHMARK(BM_pause_during)->ThreadPerCpu();
|
|
BENCHMARK(BM_pause_during)->UseRealTime();
|
|
BENCHMARK(BM_pause_during)->UseRealTime()->ThreadPerCpu();
|
|
|
|
void BM_spin_pause_after(benchmark::State& state) {
|
|
for (auto _ : state) {
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
}
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
}
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_after);
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_after)->ThreadPerCpu();
|
|
|
|
void BM_spin_pause_before_and_after(benchmark::State& state) {
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
for (auto _ : state) {
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
}
|
|
for (int i = 0; i < state.range(0); ++i) {
|
|
benchmark::DoNotOptimize(i);
|
|
}
|
|
}
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after);
|
|
BASIC_BENCHMARK_TEST(BM_spin_pause_before_and_after)->ThreadPerCpu();
|
|
|
|
void BM_empty_stop_start(benchmark::State& state) {
|
|
for (auto _ : state) {
|
|
}
|
|
}
|
|
BENCHMARK(BM_empty_stop_start);
|
|
BENCHMARK(BM_empty_stop_start)->ThreadPerCpu();
|
|
|
|
|
|
void BM_KeepRunning(benchmark::State& state) {
|
|
benchmark::IterationCount iter_count = 0;
|
|
assert(iter_count == state.iterations());
|
|
while (state.KeepRunning()) {
|
|
++iter_count;
|
|
}
|
|
assert(iter_count == state.iterations());
|
|
}
|
|
BENCHMARK(BM_KeepRunning);
|
|
|
|
void BM_KeepRunningBatch(benchmark::State& state) {
|
|
// Choose a prime batch size to avoid evenly dividing max_iterations.
|
|
const benchmark::IterationCount batch_size = 101;
|
|
benchmark::IterationCount iter_count = 0;
|
|
while (state.KeepRunningBatch(batch_size)) {
|
|
iter_count += batch_size;
|
|
}
|
|
assert(state.iterations() == iter_count);
|
|
}
|
|
BENCHMARK(BM_KeepRunningBatch);
|
|
|
|
void BM_RangedFor(benchmark::State& state) {
|
|
benchmark::IterationCount iter_count = 0;
|
|
for (auto _ : state) {
|
|
++iter_count;
|
|
}
|
|
assert(iter_count == state.max_iterations);
|
|
}
|
|
BENCHMARK(BM_RangedFor);
|
|
|
|
// Ensure that StateIterator provides all the necessary typedefs required to
|
|
// instantiate std::iterator_traits.
|
|
static_assert(std::is_same<
|
|
typename std::iterator_traits<benchmark::State::StateIterator>::value_type,
|
|
typename benchmark::State::StateIterator::value_type>::value, "");
|
|
|
|
BENCHMARK_MAIN();
|