benchmark/test/donotoptimize_assembly_test.cc
Alexander Popov 8545dfb3ea
Fix DoNotOptimize() GCC copy overhead (#1340) (#1410)
* Fix DoNotOptimize() GCC copy overhead (#1340)

The issue is that GCC DoNotOptimize() does a full copy of an argument
if it's not a pointer and it slows down a benchmark. If an argument is big
enough there is a memcpy() call for copying the argument. An argument
object can be a big object so DoNotOptimize() could add sufficient
overhead and affects benchmark results.

The cause is in GCC behavior with asm volatile constraints. Looks like GCC
trying to use r(register) constraint for all cases despite object size.
See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519

The solution is the split DoNotOptimize() in two cases - value fits
in register and value doesn't fit in register. And use case specific
asm constraint. std::is_trivially_copyable trait is needed because
"+r" constraint doesn't work with non trivial copyable objects.

- Fix requires support C++11 feature std::is_trivially_copyable from GCC
  compiler. The feature has been supported since GCC 5
- Fallback for GCC version < 5 still exists but it uses "m" constraint
  which means a little bit more overhead in some cases
- Add assembly tests for issued cases

Fixes #1340

* Add supported compiler versions info for assembly tests

- Assembly tests are inherently non-portable. So explicitly add GCC
  and Clang versions required for reliable tests passed
- Write a warning message if the current compiler version isn't supported
2022-06-20 10:12:58 +01:00

201 lines
5.1 KiB
C++

#include <benchmark/benchmark.h>
#ifdef __clang__
#pragma clang diagnostic ignored "-Wreturn-type"
#endif
extern "C" {
extern int ExternInt;
extern int ExternInt2;
extern int ExternInt3;
extern int BigArray[2049];
const int ConstBigArray[2049]{};
inline int Add42(int x) { return x + 42; }
struct NotTriviallyCopyable {
NotTriviallyCopyable();
explicit NotTriviallyCopyable(int x) : value(x) {}
NotTriviallyCopyable(NotTriviallyCopyable const &);
int value;
};
struct Large {
int value;
int data[2];
};
struct ExtraLarge {
int arr[2049];
};
}
extern ExtraLarge ExtraLargeObj;
const ExtraLarge ConstExtraLargeObj{};
// CHECK-LABEL: test_with_rvalue:
extern "C" void test_with_rvalue() {
benchmark::DoNotOptimize(Add42(0));
// CHECK: movl $42, %eax
// CHECK: ret
}
// CHECK-LABEL: test_with_large_rvalue:
extern "C" void test_with_large_rvalue() {
benchmark::DoNotOptimize(Large{ExternInt, {ExternInt, ExternInt}});
// CHECK: ExternInt(%rip)
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
// CHECK: ret
}
// CHECK-LABEL: test_with_non_trivial_rvalue:
extern "C" void test_with_non_trivial_rvalue() {
benchmark::DoNotOptimize(NotTriviallyCopyable(ExternInt));
// CHECK: mov{{l|q}} ExternInt(%rip)
// CHECK: ret
}
// CHECK-LABEL: test_with_lvalue:
extern "C" void test_with_lvalue() {
int x = 101;
benchmark::DoNotOptimize(x);
// CHECK-GNU: movl $101, %eax
// CHECK-CLANG: movl $101, -{{[0-9]+}}(%[[REG:[a-z]+]])
// CHECK: ret
}
// CHECK-LABEL: test_with_large_lvalue:
extern "C" void test_with_large_lvalue() {
Large L{ExternInt, {ExternInt, ExternInt}};
benchmark::DoNotOptimize(L);
// CHECK: ExternInt(%rip)
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]])
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
// CHECK: ret
}
// CHECK-LABEL: test_with_extra_large_lvalue_with_op:
extern "C" void test_with_extra_large_lvalue_with_op() {
ExtraLargeObj.arr[16] = 42;
benchmark::DoNotOptimize(ExtraLargeObj);
// CHECK: movl $42, ExtraLargeObj+64(%rip)
// CHECK: ret
}
// CHECK-LABEL: test_with_big_array_with_op
extern "C" void test_with_big_array_with_op() {
BigArray[16] = 42;
benchmark::DoNotOptimize(BigArray);
// CHECK: movl $42, BigArray+64(%rip)
// CHECK: ret
}
// CHECK-LABEL: test_with_non_trivial_lvalue:
extern "C" void test_with_non_trivial_lvalue() {
NotTriviallyCopyable NTC(ExternInt);
benchmark::DoNotOptimize(NTC);
// CHECK: ExternInt(%rip)
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]])
// CHECK: ret
}
// CHECK-LABEL: test_with_const_lvalue:
extern "C" void test_with_const_lvalue() {
const int x = 123;
benchmark::DoNotOptimize(x);
// CHECK: movl $123, %eax
// CHECK: ret
}
// CHECK-LABEL: test_with_large_const_lvalue:
extern "C" void test_with_large_const_lvalue() {
const Large L{ExternInt, {ExternInt, ExternInt}};
benchmark::DoNotOptimize(L);
// CHECK: ExternInt(%rip)
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]])
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
// CHECK: ret
}
// CHECK-LABEL: test_with_const_extra_large_obj:
extern "C" void test_with_const_extra_large_obj() {
benchmark::DoNotOptimize(ConstExtraLargeObj);
// CHECK: ret
}
// CHECK-LABEL: test_with_const_big_array
extern "C" void test_with_const_big_array() {
benchmark::DoNotOptimize(ConstBigArray);
// CHECK: ret
}
// CHECK-LABEL: test_with_non_trivial_const_lvalue:
extern "C" void test_with_non_trivial_const_lvalue() {
const NotTriviallyCopyable Obj(ExternInt);
benchmark::DoNotOptimize(Obj);
// CHECK: mov{{q|l}} ExternInt(%rip)
// CHECK: ret
}
// CHECK-LABEL: test_div_by_two:
extern "C" int test_div_by_two(int input) {
int divisor = 2;
benchmark::DoNotOptimize(divisor);
return input / divisor;
// CHECK: movl $2, [[DEST:.*]]
// CHECK: idivl [[DEST]]
// CHECK: ret
}
// CHECK-LABEL: test_inc_integer:
extern "C" int test_inc_integer() {
int x = 0;
for (int i = 0; i < 5; ++i) benchmark::DoNotOptimize(++x);
// CHECK: movl $1, [[DEST:.*]]
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
// CHECK-CLANG: movl [[DEST]], %eax
// CHECK: ret
return x;
}
// CHECK-LABEL: test_pointer_rvalue
extern "C" void test_pointer_rvalue() {
// CHECK: movl $42, [[DEST:.*]]
// CHECK: leaq [[DEST]], %rax
// CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]])
// CHECK: ret
int x = 42;
benchmark::DoNotOptimize(&x);
}
// CHECK-LABEL: test_pointer_const_lvalue:
extern "C" void test_pointer_const_lvalue() {
// CHECK: movl $42, [[DEST:.*]]
// CHECK: leaq [[DEST]], %rax
// CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]])
// CHECK: ret
int x = 42;
int *const xp = &x;
benchmark::DoNotOptimize(xp);
}
// CHECK-LABEL: test_pointer_lvalue:
extern "C" void test_pointer_lvalue() {
// CHECK: movl $42, [[DEST:.*]]
// CHECK: leaq [[DEST]], %rax
// CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z+]+]])
// CHECK: ret
int x = 42;
int *xp = &x;
benchmark::DoNotOptimize(xp);
}