mirror of
https://github.com/google/benchmark.git
synced 2025-01-14 05:40:14 +08:00
f92903cc53
This is a shameless rip-off of https://github.com/google/benchmark/pull/646
I did promise to look into why that proposed PR was producing
so much worse assembly, and so i finally did.
The reason is - that diff changes `size_t` (unsigned) to `int64_t` (signed).
There is this nice little `assert`:
7a1c370283/include/benchmark/benchmark.h (L744)
It ensures that we didn't magically decide to advance our iterator
when we should have finished benchmarking.
When `cached_` was unsigned, the `assert` was `cached_ UGT 0`.
But we only ever get to that `assert` if `cached_ NE 0`,
and naturally if `cached_` is not `0`, then it is bigger than `0`,
so the `assert` is tautological, and gets folded away.
But now that `cached_` became signed, the assert became `cached_ SGT 0`.
And we still only know that `cached_ NE 0`, so the assert can't be
optimized out, or at least it doesn't currently.
Regardless of whether or not that is a bug in itself,
that particular diff would have regressed the normal 64-bit systems,
by halving the maximal iteration space (since we go from unsigned counter
to signed one, of the same bit-width), which seems like a bug.
And just so it happens, fixing *this* bug, fixes the other bug.
This produces fully (bit-by-bit) identical state_assembly_test.s
The filecheck change is actually needed regardless of this patch,
else this test does not pass for me even without this diff.
69 lines
1.8 KiB
C++
69 lines
1.8 KiB
C++
#include <benchmark/benchmark.h>
|
|
|
|
#ifdef __clang__
|
|
#pragma clang diagnostic ignored "-Wreturn-type"
|
|
#endif
|
|
|
|
// clang-format off
|
|
extern "C" {
|
|
extern int ExternInt;
|
|
benchmark::State& GetState();
|
|
void Fn();
|
|
}
|
|
// clang-format on
|
|
|
|
using benchmark::State;
|
|
|
|
// CHECK-LABEL: test_for_auto_loop:
|
|
extern "C" int test_for_auto_loop() {
|
|
State& S = GetState();
|
|
int x = 42;
|
|
// CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv
|
|
// CHECK-NEXT: testq %rbx, %rbx
|
|
// CHECK-NEXT: je [[LOOP_END:.*]]
|
|
|
|
for (auto _ : S) {
|
|
// CHECK: .L[[LOOP_HEAD:[a-zA-Z0-9_]+]]:
|
|
// CHECK-GNU-NEXT: subq $1, %rbx
|
|
// CHECK-CLANG-NEXT: {{(addq \$1, %rax|incq %rax|addq \$-1, %rbx)}}
|
|
// CHECK-NEXT: jne .L[[LOOP_HEAD]]
|
|
benchmark::DoNotOptimize(x);
|
|
}
|
|
// CHECK: [[LOOP_END]]:
|
|
// CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv
|
|
|
|
// CHECK: movl $101, %eax
|
|
// CHECK: ret
|
|
return 101;
|
|
}
|
|
|
|
// CHECK-LABEL: test_while_loop:
|
|
extern "C" int test_while_loop() {
|
|
State& S = GetState();
|
|
int x = 42;
|
|
|
|
// CHECK: j{{(e|mp)}} .L[[LOOP_HEADER:[a-zA-Z0-9_]+]]
|
|
// CHECK-NEXT: .L[[LOOP_BODY:[a-zA-Z0-9_]+]]:
|
|
while (S.KeepRunning()) {
|
|
// CHECK-GNU-NEXT: subq $1, %[[IREG:[a-z]+]]
|
|
// CHECK-CLANG-NEXT: {{(addq \$-1,|decq)}} %[[IREG:[a-z]+]]
|
|
// CHECK: movq %[[IREG]], [[DEST:.*]]
|
|
benchmark::DoNotOptimize(x);
|
|
}
|
|
// CHECK-DAG: movq [[DEST]], %[[IREG]]
|
|
// CHECK-DAG: testq %[[IREG]], %[[IREG]]
|
|
// CHECK-DAG: jne .L[[LOOP_BODY]]
|
|
// CHECK-DAG: .L[[LOOP_HEADER]]:
|
|
|
|
// CHECK: cmpb $0
|
|
// CHECK-NEXT: jne .L[[LOOP_END:[a-zA-Z0-9_]+]]
|
|
// CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv
|
|
|
|
// CHECK: .L[[LOOP_END]]:
|
|
// CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv
|
|
|
|
// CHECK: movl $101, %eax
|
|
// CHECK: ret
|
|
return 101;
|
|
}
|