mirror of
https://github.com/google/benchmark.git
synced 2024-12-26 12:30:14 +08:00
Add tests to verify assembler output -- Fix DoNotOptimize. (#530)
* Add tests to verify assembler output -- Fix DoNotOptimize. For things like `DoNotOptimize`, `ClobberMemory`, and even `KeepRunning()`, it is important exactly what assembly they generate. However, we currently have no way to test this. Instead it must be manually validated every time a change occurs -- including a change in compiler version. This patch attempts to introduce a way to test the assembled output automatically. It's mirrors how LLVM verifies compiler output, and it uses LLVM FileCheck to run the tests in a similar way. The tests function by generating the assembly for a test in CMake, and then using FileCheck to verify the // CHECK lines in the source file are found in the generated assembly. Currently, the tests only run on 64-bit x86 systems under GCC and Clang, and when FileCheck is found on the system. Additionally, this patch tries to improve the code gen from DoNotOptimize. This should probably be a separate change, but I needed something to test. * Disable assembly tests on Bazel for now * Link FIXME to github issue * Fix Tests on OS X * fix strip_asm.py to work on both Linux and OS X like targets
This commit is contained in:
parent
df60aeb266
commit
7b03df7ff7
1
.gitignore
vendored
1
.gitignore
vendored
@ -6,6 +6,7 @@
|
||||
*.dylib
|
||||
*.cmake
|
||||
!/cmake/*.cmake
|
||||
!/test/AssemblyTests.cmake
|
||||
*~
|
||||
*.pyc
|
||||
__pycache__
|
||||
|
24
.travis.yml
24
.travis.yml
@ -34,6 +34,7 @@ matrix:
|
||||
env:
|
||||
- INSTALL_GCC6_FROM_PPA=1
|
||||
- COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug
|
||||
- ENABLE_SANITIZER=1
|
||||
- EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold"
|
||||
- compiler: clang
|
||||
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug
|
||||
@ -91,6 +92,7 @@ matrix:
|
||||
env:
|
||||
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
|
||||
- LIBCXX_BUILD=1 LIBCXX_SANITIZER="Undefined;Address"
|
||||
- ENABLE_SANITIZER=1
|
||||
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=undefined,address -fno-sanitize-recover=all"
|
||||
- UBSAN_OPTIONS=print_stacktrace=1
|
||||
# Clang w/ libc++ and MSAN
|
||||
@ -102,6 +104,7 @@ matrix:
|
||||
env:
|
||||
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
|
||||
- LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins
|
||||
- ENABLE_SANITIZER=1
|
||||
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins"
|
||||
# Clang w/ libc++ and MSAN
|
||||
- compiler: clang
|
||||
@ -112,8 +115,8 @@ matrix:
|
||||
env:
|
||||
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=RelWithDebInfo
|
||||
- LIBCXX_BUILD=1 LIBCXX_SANITIZER=Thread
|
||||
- ENABLE_SANITIZER=1
|
||||
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all"
|
||||
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
@ -131,15 +134,20 @@ matrix:
|
||||
- COMPILER=g++-7 C_COMPILER=gcc-7 BUILD_TYPE=Debug
|
||||
|
||||
before_script:
|
||||
- if [ -z "$BUILD_32_BITS" ]; then
|
||||
export BUILD_32_BITS=OFF && echo disabling 32 bit build;
|
||||
fi
|
||||
- if [ -n "${LIBCXX_BUILD}" ]; then
|
||||
source .travis-libcxx-setup.sh;
|
||||
fi
|
||||
- if [ -n "${ENABLE_SANITIZER}" ]; then
|
||||
export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF";
|
||||
else
|
||||
export EXTRA_OPTIONS="";
|
||||
fi
|
||||
- mkdir -p build && cd build
|
||||
|
||||
before_install:
|
||||
- if [ -z "$BUILD_32_BITS" ]; then
|
||||
export BUILD_32_BITS=OFF && echo disabling 32 bit build;
|
||||
fi
|
||||
- if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then
|
||||
sudo add-apt-repository -y "ppa:ubuntu-toolchain-r/test";
|
||||
sudo apt-get update --option Acquire::Retries=100 --option Acquire::http::Timeout="60";
|
||||
@ -147,7 +155,11 @@ before_install:
|
||||
|
||||
install:
|
||||
- if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then
|
||||
sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install g++-6;
|
||||
sudo -E apt-get -yq --no-install-suggests --no-install-recommends install g++-6;
|
||||
fi
|
||||
- if [ "${TRAVIS_OS_NAME}" == "linux" -a "${BUILD_32_BITS}" == "OFF" ]; then
|
||||
sudo -E apt-get -y --no-install-suggests --no-install-recommends install llvm-3.9-tools;
|
||||
sudo cp /usr/lib/llvm-3.9/bin/FileCheck /usr/local/bin/;
|
||||
fi
|
||||
- if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then
|
||||
PATH=~/.local/bin:${PATH};
|
||||
@ -171,7 +183,7 @@ install:
|
||||
fi
|
||||
|
||||
script:
|
||||
- cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="${EXTRA_FLAGS}" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DBENCHMARK_BUILD_32_BITS=${BUILD_32_BITS} ..
|
||||
- cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="${EXTRA_FLAGS}" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DBENCHMARK_BUILD_32_BITS=${BUILD_32_BITS} ${EXTRA_OPTIONS} ..
|
||||
- make
|
||||
- ctest -C ${BUILD_TYPE} --output-on-failure
|
||||
- bazel test -c dbg --define google_benchmark.have_regex=posix --announce_rc --verbose_failures --test_output=errors --keep_going //test/...
|
||||
|
@ -27,10 +27,48 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi
|
||||
# in cases where it is not possible to build or find a valid version of gtest.
|
||||
option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON)
|
||||
|
||||
set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF)
|
||||
function(should_enable_assembly_tests)
|
||||
if(CMAKE_BUILD_TYPE)
|
||||
string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER)
|
||||
if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage")
|
||||
# FIXME: The --coverage flag needs to be removed when building assembly
|
||||
# tests for this to work.
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
if (MSVC)
|
||||
return()
|
||||
elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
||||
return()
|
||||
elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
# FIXME: Make these work on 32 bit builds
|
||||
return()
|
||||
elseif(BENCHMARK_BUILD_32_BITS)
|
||||
# FIXME: Make these work on 32 bit builds
|
||||
return()
|
||||
endif()
|
||||
find_program(LLVM_FILECHECK_EXE FileCheck)
|
||||
if (LLVM_FILECHECK_EXE)
|
||||
set(LLVM_FILECHECK_EXE "${LLVM_FILECHECK_EXE}" CACHE PATH "llvm filecheck" FORCE)
|
||||
message(STATUS "LLVM FileCheck Found: ${LLVM_FILECHECK_EXE}")
|
||||
else()
|
||||
message(STATUS "Failed to find LLVM FileCheck")
|
||||
return()
|
||||
endif()
|
||||
set(ENABLE_ASSEMBLY_TESTS_DEFAULT ON PARENT_SCOPE)
|
||||
endfunction()
|
||||
should_enable_assembly_tests()
|
||||
|
||||
# This option disables the building and running of the assembly verification tests
|
||||
option(BENCHMARK_ENABLE_ASSEMBLY_TESTS "Enable building and running the assembly tests"
|
||||
${ENABLE_ASSEMBLY_TESTS_DEFAULT})
|
||||
|
||||
# Make sure we can import out CMake functions
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
|
||||
# Read the git tags to determine the project version
|
||||
include(GetGitVersion)
|
||||
get_git_version(GIT_VERSION)
|
||||
|
@ -14,6 +14,8 @@ IRC channel: https://freenode.net #googlebenchmark
|
||||
|
||||
[Additional Tooling Documentation](docs/tools.md)
|
||||
|
||||
[Assembly Testing Documentation](docs/AssemblyTests.md)
|
||||
|
||||
|
||||
## Building
|
||||
|
||||
|
147
docs/AssemblyTests.md
Normal file
147
docs/AssemblyTests.md
Normal file
@ -0,0 +1,147 @@
|
||||
# Assembly Tests
|
||||
|
||||
The Benchmark library provides a number of functions whose primary
|
||||
purpose in to affect assembly generation, including `DoNotOptimize`
|
||||
and `ClobberMemory`. In addition there are other functions,
|
||||
such as `KeepRunning`, for which generating good assembly is paramount.
|
||||
|
||||
For these functions it's important to have tests that verify the
|
||||
correctness and quality of the implementation. This requires testing
|
||||
the code generated by the compiler.
|
||||
|
||||
This document describes how the Benchmark library tests compiler output,
|
||||
as well as how to properly write new tests.
|
||||
|
||||
|
||||
## Anatomy of a Test
|
||||
|
||||
Writing a test has two steps:
|
||||
|
||||
* Write the code you want to generate assembly for.
|
||||
* Add `// CHECK` lines to match against the verified assembly.
|
||||
|
||||
Example:
|
||||
```c++
|
||||
|
||||
// CHECK-LABEL: test_add:
|
||||
extern "C" int test_add() {
|
||||
extern int ExternInt;
|
||||
return ExternInt + 1;
|
||||
|
||||
// CHECK: movl ExternInt(%rip), %eax
|
||||
// CHECK: addl %eax
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
#### LLVM Filecheck
|
||||
|
||||
[LLVM's Filecheck](https://llvm.org/docs/CommandGuide/FileCheck.html)
|
||||
is used to test the generated assembly against the `// CHECK` lines
|
||||
specified in the tests source file. Please see the documentation
|
||||
linked above for information on how to write `CHECK` directives.
|
||||
|
||||
#### Tips and Tricks:
|
||||
|
||||
* Tests should match the minimal amount of output required to establish
|
||||
correctness. `CHECK` directives don't have to match on the exact next line
|
||||
after the previous match, so tests should omit checks for unimportant
|
||||
bits of assembly. ([`CHECK-NEXT`](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-next-directive)
|
||||
can be used to ensure a match occurs exactly after the previous match).
|
||||
|
||||
* The tests are compiled with `-O3 -g0`. So we're only testing the
|
||||
optimized output.
|
||||
|
||||
* The assembly output is further cleaned up using `tools/strip_asm.py`.
|
||||
This removes comments, assembler directives, and unused labels before
|
||||
the test is run.
|
||||
|
||||
* The generated and stripped assembly file for a test is output under
|
||||
`<build-directory>/test/<test-name>.s`
|
||||
|
||||
* Filecheck supports using [`CHECK` prefixes](https://llvm.org/docs/CommandGuide/FileCheck.html#cmdoption-check-prefixes)
|
||||
to specify lines that should only match in certain situations.
|
||||
The Benchmark tests use `CHECK-CLANG` and `CHECK-GNU` for lines that
|
||||
are only expected to match Clang or GCC's output respectively. Normal
|
||||
`CHECK` lines match against all compilers. (Note: `CHECK-NOT` and
|
||||
`CHECK-LABEL` are NOT prefixes. They are versions of non-prefixed
|
||||
`CHECK` lines)
|
||||
|
||||
* Use `extern "C"` to disable name mangling for specific functions. This
|
||||
makes them easier to name in the `CHECK` lines.
|
||||
|
||||
|
||||
## Problems Writing Portable Tests
|
||||
|
||||
Writing tests which check the code generated by a compiler are
|
||||
inherently non-portable. Different compilers and even different compiler
|
||||
versions may generate entirely different code. The Benchmark tests
|
||||
must tolerate this.
|
||||
|
||||
LLVM Filecheck provides a number of mechanisms to help write
|
||||
"more portable" tests; including [matching using regular expressions](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-pattern-matching-syntax),
|
||||
allowing the creation of [named variables](https://llvm.org/docs/CommandGuide/FileCheck.html#filecheck-variables)
|
||||
for later matching, and [checking non-sequential matches](https://llvm.org/docs/CommandGuide/FileCheck.html#the-check-dag-directive).
|
||||
|
||||
#### Capturing Variables
|
||||
|
||||
For example, say GCC stores a variable in a register but Clang stores
|
||||
it in memory. To write a test that tolerates both cases we "capture"
|
||||
the destination of the store, and then use the captured expression
|
||||
to write the remainder of the test.
|
||||
|
||||
```c++
|
||||
// CHECK-LABEL: test_div_no_op_into_shr:
|
||||
extern "C" void test_div_no_op_into_shr(int value) {
|
||||
int divisor = 2;
|
||||
benchmark::DoNotOptimize(divisor); // hide the value from the optimizer
|
||||
return value / divisor;
|
||||
|
||||
// CHECK: movl $2, [[DEST:.*]]
|
||||
// CHECK: idivl [[DEST]]
|
||||
// CHECK: ret
|
||||
}
|
||||
```
|
||||
|
||||
#### Using Regular Expressions to Match Differing Output
|
||||
|
||||
Often tests require testing assembly lines which may subtly differ
|
||||
between compilers or compiler versions. A common example of this
|
||||
is matching stack frame addresses. In this case regular expressions
|
||||
can be used to match the differing bits of output. For example:
|
||||
|
||||
```c++
|
||||
int ExternInt;
|
||||
struct Point { int x, y, z; };
|
||||
|
||||
// CHECK-LABEL: test_store_point:
|
||||
extern "C" void test_store_point() {
|
||||
Point p{ExternInt, ExternInt, ExternInt};
|
||||
benchmark::DoNotOptimize(p);
|
||||
|
||||
// CHECK: movl ExternInt(%rip), %eax
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%rsp)
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%rsp)
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%rsp)
|
||||
// CHECK: ret
|
||||
}
|
||||
```
|
||||
|
||||
## Current Requirements and Limitations
|
||||
|
||||
The tests require Filecheck to be installed along the `PATH` of the
|
||||
build machine. Otherwise the tests will be disabled.
|
||||
|
||||
Additionally, as mentioned in the previous section, codegen tests are
|
||||
inherently non-portable. Currently the tests are limited to:
|
||||
|
||||
* x86_64 targets.
|
||||
* Compiled with GCC or Clang
|
||||
|
||||
Further work could be done, at least on a limited basis, to extend the
|
||||
tests to other architectures and compilers (using `CHECK` prefixes).
|
||||
|
||||
Furthermore, the tests fail for builds which specify additional flags
|
||||
that modify code generation, including `--coverage` or `-fsanitize=`.
|
||||
|
@ -303,15 +303,20 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
|
||||
// See: https://youtu.be/nXaxk27zwlk?t=2441
|
||||
#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
|
||||
template <class Tp>
|
||||
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
|
||||
// Clang doesn't like the 'X' constraint on `value` and certain GCC versions
|
||||
// don't like the 'g' constraint. Attempt to placate them both.
|
||||
inline BENCHMARK_ALWAYS_INLINE
|
||||
void DoNotOptimize(Tp const& value) {
|
||||
asm volatile("" : : "r,m"(value) : "memory");
|
||||
}
|
||||
|
||||
template <class Tp>
|
||||
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
|
||||
#if defined(__clang__)
|
||||
asm volatile("" : : "g"(value) : "memory");
|
||||
asm volatile("" : "+r,m"(value) : : "memory");
|
||||
#else
|
||||
asm volatile("" : : "i,r,m"(value) : "memory");
|
||||
asm volatile("" : "+m,r"(value) : : "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
// Force the compiler to flush pending writes to global memory. Acts as an
|
||||
// effective read/write barrier
|
||||
inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
|
||||
|
45
test/AssemblyTests.cmake
Normal file
45
test/AssemblyTests.cmake
Normal file
@ -0,0 +1,45 @@
|
||||
|
||||
|
||||
set(ASM_TEST_FLAGS "")
|
||||
check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
|
||||
if (BENCHMARK_HAS_O3_FLAG)
|
||||
list(APPEND ASM_TEST_FLAGS -O3)
|
||||
endif()
|
||||
|
||||
check_cxx_compiler_flag(-g0 BENCHMARK_HAS_G0_FLAG)
|
||||
if (BENCHMARK_HAS_G0_FLAG)
|
||||
list(APPEND ASM_TEST_FLAGS -g0)
|
||||
endif()
|
||||
|
||||
check_cxx_compiler_flag(-fno-stack-protector BENCHMARK_HAS_FNO_STACK_PROTECTOR_FLAG)
|
||||
if (BENCHMARK_HAS_FNO_STACK_PROTECTOR_FLAG)
|
||||
list(APPEND ASM_TEST_FLAGS -fno-stack-protector)
|
||||
endif()
|
||||
|
||||
split_list(ASM_TEST_FLAGS)
|
||||
string(TOUPPER "${CMAKE_CXX_COMPILER_ID}" ASM_TEST_COMPILER)
|
||||
|
||||
macro(add_filecheck_test name)
|
||||
cmake_parse_arguments(ARG "" "" "CHECK_PREFIXES" ${ARGV})
|
||||
add_library(${name} OBJECT ${name}.cc)
|
||||
set_target_properties(${name} PROPERTIES COMPILE_FLAGS "-S ${ASM_TEST_FLAGS}")
|
||||
set(ASM_OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${name}.s")
|
||||
add_custom_target(copy_${name} ALL
|
||||
COMMAND ${PROJECT_SOURCE_DIR}/tools/strip_asm.py
|
||||
$<TARGET_OBJECTS:${name}>
|
||||
${ASM_OUTPUT_FILE}
|
||||
BYPRODUCTS ${ASM_OUTPUT_FILE})
|
||||
add_dependencies(copy_${name} ${name})
|
||||
if (NOT ARG_CHECK_PREFIXES)
|
||||
set(ARG_CHECK_PREFIXES "CHECK")
|
||||
endif()
|
||||
foreach(prefix ${ARG_CHECK_PREFIXES})
|
||||
add_test(NAME run_${name}_${prefix}
|
||||
COMMAND
|
||||
${LLVM_FILECHECK_EXE} ${name}.cc
|
||||
--input-file=${ASM_OUTPUT_FILE}
|
||||
--check-prefixes=CHECK,CHECK-${ASM_TEST_COMPILER}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
@ -42,4 +42,7 @@ cc_library(
|
||||
] + (
|
||||
["@com_google_googletest//:gtest_main"] if (test_src in NEEDS_GTEST_MAIN) else []
|
||||
),
|
||||
) for test_src in glob(["*_test.cc"])]
|
||||
# FIXME: Add support for assembly tests to bazel.
|
||||
# See Issue #556
|
||||
# https://github.com/google/benchmark/issues/556
|
||||
) for test_src in glob(["*_test.cc"], exclude = ["*_assembly_test.cc"])]
|
||||
|
@ -22,6 +22,12 @@ if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" )
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
check_cxx_compiler_flag(-O3 BENCHMARK_HAS_O3_FLAG)
|
||||
set(BENCHMARK_O3_FLAG "")
|
||||
if (BENCHMARK_HAS_O3_FLAG)
|
||||
set(BENCHMARK_O3_FLAG "-O3")
|
||||
endif()
|
||||
|
||||
# NOTE: These flags must be added after find_package(Threads REQUIRED) otherwise
|
||||
# they will break the configuration check.
|
||||
if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
|
||||
@ -159,6 +165,25 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
|
||||
add_gtest(statistics_test)
|
||||
endif(BENCHMARK_ENABLE_GTEST_TESTS)
|
||||
|
||||
###############################################################################
|
||||
# Assembly Unit Tests
|
||||
###############################################################################
|
||||
|
||||
if (BENCHMARK_ENABLE_ASSEMBLY_TESTS)
|
||||
if (NOT LLVM_FILECHECK_EXE)
|
||||
message(FATAL_ERROR "LLVM FileCheck is required when including this file")
|
||||
endif()
|
||||
include(AssemblyTests.cmake)
|
||||
add_filecheck_test(donotoptimize_assembly_test)
|
||||
add_filecheck_test(state_assembly_test)
|
||||
add_filecheck_test(clobber_memory_assembly_test)
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Code Coverage Configuration
|
||||
###############################################################################
|
||||
|
||||
# Add the coverage command(s)
|
||||
if(CMAKE_BUILD_TYPE)
|
||||
|
64
test/clobber_memory_assembly_test.cc
Normal file
64
test/clobber_memory_assembly_test.cc
Normal file
@ -0,0 +1,64 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wreturn-type"
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
|
||||
extern int ExternInt;
|
||||
extern int ExternInt2;
|
||||
extern int ExternInt3;
|
||||
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_basic:
|
||||
extern "C" void test_basic() {
|
||||
int x;
|
||||
benchmark::DoNotOptimize(&x);
|
||||
x = 101;
|
||||
benchmark::ClobberMemory();
|
||||
// CHECK: leaq [[DEST:[^,]+]], %rax
|
||||
// CHECK: movl $101, [[DEST]]
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_redundant_store:
|
||||
extern "C" void test_redundant_store() {
|
||||
ExternInt = 3;
|
||||
benchmark::ClobberMemory();
|
||||
ExternInt = 51;
|
||||
// CHECK-DAG: ExternInt
|
||||
// CHECK-DAG: movl $3
|
||||
// CHECK: movl $51
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_redundant_read:
|
||||
extern "C" void test_redundant_read() {
|
||||
int x;
|
||||
benchmark::DoNotOptimize(&x);
|
||||
x = ExternInt;
|
||||
benchmark::ClobberMemory();
|
||||
x = ExternInt2;
|
||||
// CHECK: leaq [[DEST:[^,]+]], %rax
|
||||
// CHECK: ExternInt(%rip)
|
||||
// CHECK: movl %eax, [[DEST]]
|
||||
// CHECK-NOT: ExternInt2
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_redundant_read2:
|
||||
extern "C" void test_redundant_read2() {
|
||||
int x;
|
||||
benchmark::DoNotOptimize(&x);
|
||||
x = ExternInt;
|
||||
benchmark::ClobberMemory();
|
||||
x = ExternInt2;
|
||||
benchmark::ClobberMemory();
|
||||
// CHECK: leaq [[DEST:[^,]+]], %rax
|
||||
// CHECK: ExternInt(%rip)
|
||||
// CHECK: movl %eax, [[DEST]]
|
||||
// CHECK: ExternInt2(%rip)
|
||||
// CHECK: movl %eax, [[DEST]]
|
||||
// CHECK: ret
|
||||
}
|
163
test/donotoptimize_assembly_test.cc
Normal file
163
test/donotoptimize_assembly_test.cc
Normal file
@ -0,0 +1,163 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wreturn-type"
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
|
||||
extern int ExternInt;
|
||||
extern int ExternInt2;
|
||||
extern int ExternInt3;
|
||||
|
||||
inline int Add42(int x) { return x + 42; }
|
||||
|
||||
struct NotTriviallyCopyable {
|
||||
NotTriviallyCopyable();
|
||||
explicit NotTriviallyCopyable(int x) : value(x) {}
|
||||
NotTriviallyCopyable(NotTriviallyCopyable const&);
|
||||
int value;
|
||||
};
|
||||
|
||||
struct Large {
|
||||
int value;
|
||||
int data[2];
|
||||
};
|
||||
|
||||
}
|
||||
// CHECK-LABEL: test_with_rvalue:
|
||||
extern "C" void test_with_rvalue() {
|
||||
benchmark::DoNotOptimize(Add42(0));
|
||||
// CHECK: movl $42, %eax
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_with_large_rvalue:
|
||||
extern "C" void test_with_large_rvalue() {
|
||||
benchmark::DoNotOptimize(Large{ExternInt, {ExternInt, ExternInt}});
|
||||
// CHECK: ExternInt(%rip)
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]]
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_with_non_trivial_rvalue:
|
||||
extern "C" void test_with_non_trivial_rvalue() {
|
||||
benchmark::DoNotOptimize(NotTriviallyCopyable(ExternInt));
|
||||
// CHECK: mov{{l|q}} ExternInt(%rip)
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_with_lvalue:
|
||||
extern "C" void test_with_lvalue() {
|
||||
int x = 101;
|
||||
benchmark::DoNotOptimize(x);
|
||||
// CHECK-GNU: movl $101, %eax
|
||||
// CHECK-CLANG: movl $101, -{{[0-9]+}}(%[[REG:[a-z]+]])
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_with_large_lvalue:
|
||||
extern "C" void test_with_large_lvalue() {
|
||||
Large L{ExternInt, {ExternInt, ExternInt}};
|
||||
benchmark::DoNotOptimize(L);
|
||||
// CHECK: ExternInt(%rip)
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]])
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_with_non_trivial_lvalue:
|
||||
extern "C" void test_with_non_trivial_lvalue() {
|
||||
NotTriviallyCopyable NTC(ExternInt);
|
||||
benchmark::DoNotOptimize(NTC);
|
||||
// CHECK: ExternInt(%rip)
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]])
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_with_const_lvalue:
|
||||
extern "C" void test_with_const_lvalue() {
|
||||
const int x = 123;
|
||||
benchmark::DoNotOptimize(x);
|
||||
// CHECK: movl $123, %eax
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_with_large_const_lvalue:
|
||||
extern "C" void test_with_large_const_lvalue() {
|
||||
const Large L{ExternInt, {ExternInt, ExternInt}};
|
||||
benchmark::DoNotOptimize(L);
|
||||
// CHECK: ExternInt(%rip)
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG:[a-z]+]])
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
|
||||
// CHECK: movl %eax, -{{[0-9]+}}(%[[REG]])
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_with_non_trivial_const_lvalue:
|
||||
extern "C" void test_with_non_trivial_const_lvalue() {
|
||||
const NotTriviallyCopyable Obj(ExternInt);
|
||||
benchmark::DoNotOptimize(Obj);
|
||||
// CHECK: mov{{q|l}} ExternInt(%rip)
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_div_by_two:
|
||||
extern "C" int test_div_by_two(int input) {
|
||||
int divisor = 2;
|
||||
benchmark::DoNotOptimize(divisor);
|
||||
return input / divisor;
|
||||
// CHECK: movl $2, [[DEST:.*]]
|
||||
// CHECK: idivl [[DEST]]
|
||||
// CHECK: ret
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_inc_integer:
|
||||
extern "C" int test_inc_integer() {
|
||||
int x = 0;
|
||||
for (int i=0; i < 5; ++i)
|
||||
benchmark::DoNotOptimize(++x);
|
||||
// CHECK: movl $1, [[DEST:.*]]
|
||||
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
|
||||
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
|
||||
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
|
||||
// CHECK: {{(addl \$1,|incl)}} [[DEST]]
|
||||
// CHECK-CLANG: movl [[DEST]], %eax
|
||||
// CHECK: ret
|
||||
return x;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_pointer_rvalue
|
||||
extern "C" void test_pointer_rvalue() {
|
||||
// CHECK: movl $42, [[DEST:.*]]
|
||||
// CHECK: leaq [[DEST]], %rax
|
||||
// CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]])
|
||||
// CHECK: ret
|
||||
int x = 42;
|
||||
benchmark::DoNotOptimize(&x);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_pointer_const_lvalue:
|
||||
extern "C" void test_pointer_const_lvalue() {
|
||||
// CHECK: movl $42, [[DEST:.*]]
|
||||
// CHECK: leaq [[DEST]], %rax
|
||||
// CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z]+]])
|
||||
// CHECK: ret
|
||||
int x = 42;
|
||||
int * const xp = &x;
|
||||
benchmark::DoNotOptimize(xp);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_pointer_lvalue:
|
||||
extern "C" void test_pointer_lvalue() {
|
||||
// CHECK: movl $42, [[DEST:.*]]
|
||||
// CHECK: leaq [[DEST]], %rax
|
||||
// CHECK-CLANG: movq %rax, -{{[0-9]+}}(%[[REG:[a-z+]+]])
|
||||
// CHECK: ret
|
||||
int x = 42;
|
||||
int *xp = &x;
|
||||
benchmark::DoNotOptimize(xp);
|
||||
}
|
66
test/state_assembly_test.cc
Normal file
66
test/state_assembly_test.cc
Normal file
@ -0,0 +1,66 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wreturn-type"
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
extern int ExternInt;
|
||||
benchmark::State& GetState();
|
||||
void Fn();
|
||||
}
|
||||
|
||||
using benchmark::State;
|
||||
|
||||
// CHECK-LABEL: test_for_auto_loop:
|
||||
extern "C" int test_for_auto_loop() {
|
||||
State& S = GetState();
|
||||
int x = 42;
|
||||
// CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv
|
||||
// CHECK-NEXT: testq %rbx, %rbx
|
||||
// CHECK-NEXT: je [[LOOP_END:.*]]
|
||||
|
||||
for (auto _ : S) {
|
||||
// CHECK: .L[[LOOP_HEAD:[a-zA-Z0-9_]+]]:
|
||||
// CHECK-GNU-NEXT: subq $1, %rbx
|
||||
// CHECK-CLANG-NEXT: {{(addq \$1,|incq)}} %rax
|
||||
// CHECK-NEXT: jne .L[[LOOP_HEAD]]
|
||||
benchmark::DoNotOptimize(x);
|
||||
}
|
||||
// CHECK: [[LOOP_END]]:
|
||||
// CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv
|
||||
|
||||
// CHECK: movl $101, %eax
|
||||
// CHECK: ret
|
||||
return 101;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_while_loop:
|
||||
extern "C" int test_while_loop() {
|
||||
State& S = GetState();
|
||||
int x = 42;
|
||||
|
||||
// CHECK: j{{(e|mp)}} .L[[LOOP_HEADER:[a-zA-Z0-9_]+]]
|
||||
// CHECK-NEXT: .L[[LOOP_BODY:[a-zA-Z0-9_]+]]:
|
||||
while (S.KeepRunning()) {
|
||||
// CHECK-GNU-NEXT: subq $1, %[[IREG:[a-z]+]]
|
||||
// CHECK-CLANG-NEXT: {{(addq \$-1,|decq)}} %[[IREG:[a-z]+]]
|
||||
// CHECK: movq %[[IREG]], [[DEST:.*]]
|
||||
benchmark::DoNotOptimize(x);
|
||||
}
|
||||
// CHECK-DAG: movq [[DEST]], %[[IREG]]
|
||||
// CHECK-DAG: testq %[[IREG]], %[[IREG]]
|
||||
// CHECK-DAG: jne .L[[LOOP_BODY]]
|
||||
// CHECK-DAG: .L[[LOOP_HEADER]]:
|
||||
|
||||
// CHECK: cmpb $0
|
||||
// CHECK-NEXT: jne .L[[LOOP_END:[a-zA-Z0-9_]+]]
|
||||
// CHECK: [[CALL:call(q)*]] _ZN9benchmark5State16StartKeepRunningEv
|
||||
|
||||
// CHECK: .L[[LOOP_END]]:
|
||||
// CHECK: [[CALL]] _ZN9benchmark5State17FinishKeepRunningEv
|
||||
|
||||
// CHECK: movl $101, %eax
|
||||
// CHECK: ret
|
||||
return 101;
|
||||
}
|
151
tools/strip_asm.py
Executable file
151
tools/strip_asm.py
Executable file
@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
strip_asm.py - Cleanup ASM output for the specified file
|
||||
"""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
def find_used_labels(asm):
|
||||
found = set()
|
||||
label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
|
||||
for l in asm.splitlines():
|
||||
m = label_re.match(l)
|
||||
if m:
|
||||
found.add('.L%s' % m.group(1))
|
||||
return found
|
||||
|
||||
|
||||
def normalize_labels(asm):
|
||||
decls = set()
|
||||
label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
|
||||
for l in asm.splitlines():
|
||||
m = label_decl.match(l)
|
||||
if m:
|
||||
decls.add(m.group(0))
|
||||
if len(decls) == 0:
|
||||
return asm
|
||||
needs_dot = next(iter(decls))[0] != '.'
|
||||
if not needs_dot:
|
||||
return asm
|
||||
for ld in decls:
|
||||
asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
|
||||
return asm
|
||||
|
||||
|
||||
def transform_labels(asm):
|
||||
asm = normalize_labels(asm)
|
||||
used_decls = find_used_labels(asm)
|
||||
new_asm = ''
|
||||
label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
|
||||
for l in asm.splitlines():
|
||||
m = label_decl.match(l)
|
||||
if not m or m.group(0) in used_decls:
|
||||
new_asm += l
|
||||
new_asm += '\n'
|
||||
return new_asm
|
||||
|
||||
|
||||
def is_identifier(tk):
|
||||
if len(tk) == 0:
|
||||
return False
|
||||
first = tk[0]
|
||||
if not first.isalpha() and first != '_':
|
||||
return False
|
||||
for i in range(1, len(tk)):
|
||||
c = tk[i]
|
||||
if not c.isalnum() and c != '_':
|
||||
return False
|
||||
return True
|
||||
|
||||
def process_identifiers(l):
|
||||
"""
|
||||
process_identifiers - process all identifiers and modify them to have
|
||||
consistent names across all platforms; specifically across ELF and MachO.
|
||||
For example, MachO inserts an additional understore at the beginning of
|
||||
names. This function removes that.
|
||||
"""
|
||||
parts = re.split(r'([a-zA-Z0-9_]+)', l)
|
||||
new_line = ''
|
||||
for tk in parts:
|
||||
if is_identifier(tk):
|
||||
if tk.startswith('__Z'):
|
||||
tk = tk[1:]
|
||||
elif tk.startswith('_') and len(tk) > 1 and \
|
||||
tk[1].isalpha() and tk[1] != 'Z':
|
||||
tk = tk[1:]
|
||||
new_line += tk
|
||||
return new_line
|
||||
|
||||
|
||||
def process_asm(asm):
|
||||
"""
|
||||
Strip the ASM of unwanted directives and lines
|
||||
"""
|
||||
new_contents = ''
|
||||
asm = transform_labels(asm)
|
||||
|
||||
# TODO: Add more things we want to remove
|
||||
discard_regexes = [
|
||||
re.compile("\s+\..*$"), # directive
|
||||
re.compile("\s*#(NO_APP|APP)$"), #inline ASM
|
||||
re.compile("\s*#.*$"), # comment line
|
||||
re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
|
||||
re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
|
||||
]
|
||||
keep_regexes = [
|
||||
|
||||
]
|
||||
fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
|
||||
for l in asm.splitlines():
|
||||
# Remove Mach-O attribute
|
||||
l = l.replace('@GOTPCREL', '')
|
||||
add_line = True
|
||||
for reg in discard_regexes:
|
||||
if reg.match(l) is not None:
|
||||
add_line = False
|
||||
break
|
||||
for reg in keep_regexes:
|
||||
if reg.match(l) is not None:
|
||||
add_line = True
|
||||
break
|
||||
if add_line:
|
||||
if fn_label_def.match(l) and len(new_contents) != 0:
|
||||
new_contents += '\n'
|
||||
l = process_identifiers(l)
|
||||
new_contents += l
|
||||
new_contents += '\n'
|
||||
return new_contents
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser(
|
||||
description='generate a stripped assembly file')
|
||||
parser.add_argument(
|
||||
'input', metavar='input', type=str, nargs=1,
|
||||
help='An input assembly file')
|
||||
parser.add_argument(
|
||||
'out', metavar='output', type=str, nargs=1,
|
||||
help='The output file')
|
||||
args, unknown_args = parser.parse_known_args()
|
||||
input = args.input[0]
|
||||
output = args.out[0]
|
||||
if not os.path.isfile(input):
|
||||
print(("ERROR: input file '%s' does not exist") % input)
|
||||
sys.exit(1)
|
||||
contents = None
|
||||
with open(input, 'r') as f:
|
||||
contents = f.read()
|
||||
new_contents = process_asm(contents)
|
||||
with open(output, 'w') as f:
|
||||
f.write(new_contents)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|
||||
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
|
||||
# kate: indent-mode python; remove-trailing-spaces modified;
|
Loading…
Reference in New Issue
Block a user