mirror of
https://github.com/google/benchmark.git
synced 2025-04-29 22:40:33 +08:00
Merge branch 'google:main' into master
This commit is contained in:
commit
a75f6cfffc
.clang-tidy
.github
.gitignore.travis-libcxx-setup.sh.travis.ymlAUTHORSBUILD.bazelCMakeLists.txtCONTRIBUTORSMODULE.bazelREADME.mdWORKSPACEWORKSPACE.bzlmod_config.ymlbazel
bindings/python
cmake
AddCXXCompilerFlag.cmakeCXXFeatureCheck.cmakeConfig.cmake.inGetGitVersion.cmakeGoogleTest.cmakeGoogleTest.cmake.in
Modules
benchmark.pc.inpthread_affinity.cppconan
conanfile.pydependencies.mddocs
AssemblyTests.md_config.yml
assets/images
dependencies.mdindex.mdperf_counters.mdplatform_specific_build_instructions.mdpython_bindings.mdrandom_interleaving.mdreducing_variance.mdreleasing.mdtools.mduser_guide.mdinclude/benchmark
pyproject.tomlsetup.pysrc
CMakeLists.txtbenchmark.ccbenchmark_api_internal.ccbenchmark_api_internal.hbenchmark_main.ccbenchmark_name.ccbenchmark_register.ccbenchmark_register.hbenchmark_runner.ccbenchmark_runner.hcheck.cccheck.hcolorprint.cccommandlineflags.cccommandlineflags.hcomplexity.cccomplexity.hconsole_reporter.cccsv_reporter.cccycleclock.hinternal_macros.hjson_reporter.cclog.hmutex.hperf_counters.ccperf_counters.hre.hreporter.ccsleep.ccsleep.h
7
.clang-tidy
Normal file
7
.clang-tidy
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
Checks: 'clang-analyzer-*,readability-redundant-*,performance-*'
|
||||
WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*'
|
||||
HeaderFilterRegex: '.*'
|
||||
AnalyzeTemporaryDtors: false
|
||||
FormatStyle: none
|
||||
User: user
|
13
.github/install_bazel.sh
vendored
Normal file
13
.github/install_bazel.sh
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
if ! bazel version; then
|
||||
arch=$(uname -m)
|
||||
if [ "$arch" == "aarch64" ]; then
|
||||
arch="arm64"
|
||||
fi
|
||||
echo "Installing wget and downloading $arch Bazel binary from GitHub releases."
|
||||
yum install -y wget
|
||||
wget "https://github.com/bazelbuild/bazel/releases/download/6.3.0/bazel-6.3.0-linux-$arch" -O /usr/local/bin/bazel
|
||||
chmod +x /usr/local/bin/bazel
|
||||
else
|
||||
# bazel is installed for the correct architecture
|
||||
exit 0
|
||||
fi
|
26
.github/libcxx-setup.sh
vendored
Executable file
26
.github/libcxx-setup.sh
vendored
Executable file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# Checkout LLVM sources
|
||||
git clone --depth=1 https://github.com/llvm/llvm-project.git llvm-project
|
||||
|
||||
## Setup libc++ options
|
||||
if [ -z "$BUILD_32_BITS" ]; then
|
||||
export BUILD_32_BITS=OFF && echo disabling 32 bit build
|
||||
fi
|
||||
|
||||
## Build and install libc++ (Use unstable ABI for better sanitizer coverage)
|
||||
mkdir llvm-build && cd llvm-build
|
||||
cmake -DCMAKE_C_COMPILER=${CC} \
|
||||
-DCMAKE_CXX_COMPILER=${CXX} \
|
||||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
|
||||
-DCMAKE_INSTALL_PREFIX=/usr \
|
||||
-DLIBCXX_ABI_UNSTABLE=OFF \
|
||||
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
|
||||
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
|
||||
-DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi;libunwind' \
|
||||
-G "Unix Makefiles" \
|
||||
../llvm-project/runtimes/
|
||||
make -j cxx cxxabi unwind
|
||||
cd ..
|
35
.github/workflows/bazel.yml
vendored
Normal file
35
.github/workflows/bazel.yml
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
name: bazel
|
||||
|
||||
on:
|
||||
push: {}
|
||||
pull_request: {}
|
||||
|
||||
jobs:
|
||||
build_and_test_default:
|
||||
name: bazel.${{ matrix.os }}.${{ matrix.bzlmod && 'bzlmod' || 'no_bzlmod' }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
bzlmod: [false, true]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: mount bazel cache
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
cache-name: bazel-cache
|
||||
with:
|
||||
path: "~/.cache/bazel"
|
||||
key: ${{ env.cache-name }}-${{ matrix.os }}-${{ github.ref }}
|
||||
restore-keys: |
|
||||
${{ env.cache-name }}-${{ matrix.os }}-main
|
||||
|
||||
- name: build
|
||||
run: |
|
||||
bazel build ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} //:benchmark //:benchmark_main //test/...
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
bazel test ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} --test_output=all //test/...
|
46
.github/workflows/build-and-test-min-cmake.yml
vendored
Normal file
46
.github/workflows/build-and-test-min-cmake.yml
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
name: build-and-test-min-cmake
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
job:
|
||||
name: ${{ matrix.os }}.min-cmake
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- uses: lukka/get-cmake@latest
|
||||
with:
|
||||
cmakeVersion: 3.10.0
|
||||
|
||||
- name: create build environment
|
||||
run: cmake -E make_directory ${{ runner.workspace }}/_build
|
||||
|
||||
- name: setup cmake initial cache
|
||||
run: touch compiler-cache.cmake
|
||||
|
||||
- name: configure cmake
|
||||
env:
|
||||
CXX: ${{ matrix.compiler }}
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: >
|
||||
cmake -C ${{ github.workspace }}/compiler-cache.cmake
|
||||
$GITHUB_WORKSPACE
|
||||
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
|
||||
-DCMAKE_CXX_VISIBILITY_PRESET=hidden
|
||||
-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
|
||||
|
||||
- name: build
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: cmake --build .
|
51
.github/workflows/build-and-test-perfcounters.yml
vendored
Normal file
51
.github/workflows/build-and-test-perfcounters.yml
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
name: build-and-test-perfcounters
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
job:
|
||||
# TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
|
||||
name: ${{ matrix.os }}.${{ matrix.build_type }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-22.04, ubuntu-20.04]
|
||||
build_type: ['Release', 'Debug']
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: install libpfm
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt -y install libpfm4-dev
|
||||
|
||||
- name: create build environment
|
||||
run: cmake -E make_directory ${{ runner.workspace }}/_build
|
||||
|
||||
- name: configure cmake
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: >
|
||||
cmake $GITHUB_WORKSPACE
|
||||
-DBENCHMARK_ENABLE_LIBPFM=1
|
||||
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||
|
||||
- name: build
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: cmake --build . --config ${{ matrix.build_type }}
|
||||
|
||||
# Skip testing, for now. It seems perf_event_open does not succeed on the
|
||||
# hosting machine, very likely a permissions issue.
|
||||
# TODO(mtrofin): Enable test.
|
||||
# - name: test
|
||||
# shell: bash
|
||||
# working-directory: ${{ runner.workspace }}/_build
|
||||
# run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure
|
||||
|
124
.github/workflows/build-and-test.yml
vendored
124
.github/workflows/build-and-test.yml
vendored
@ -2,37 +2,113 @@ name: build-and-test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
# TODO: add 32-bit builds (g++ and clang++) for ubuntu
|
||||
# (requires g++-multilib and libc6:i386)
|
||||
# TODO: add coverage build (requires lcov)
|
||||
# TODO: add clang + libc++ builds for ubuntu
|
||||
job:
|
||||
# TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
|
||||
name: ${{ matrix.os }}.${{ matrix.build_type }}
|
||||
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.compiler }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04, macos-latest, windows-latest]
|
||||
os: [ubuntu-22.04, ubuntu-20.04, macos-latest]
|
||||
build_type: ['Release', 'Debug']
|
||||
compiler: ['g++', 'clang++']
|
||||
lib: ['shared', 'static']
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: create build environment
|
||||
run: cmake -E make_directory ${{ runner.workspace }}/_build
|
||||
|
||||
- name: configure cmake
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||
|
||||
- name: build
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: cmake --build . --config ${{ matrix.build_type }}
|
||||
|
||||
- name: test
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: ctest -C ${{ matrix.build_type }}
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- uses: lukka/get-cmake@latest
|
||||
|
||||
- name: create build environment
|
||||
run: cmake -E make_directory ${{ runner.workspace }}/_build
|
||||
|
||||
- name: setup cmake initial cache
|
||||
run: touch compiler-cache.cmake
|
||||
|
||||
- name: configure cmake
|
||||
env:
|
||||
CXX: ${{ matrix.compiler }}
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: >
|
||||
cmake -C ${{ github.workspace }}/compiler-cache.cmake
|
||||
$GITHUB_WORKSPACE
|
||||
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
|
||||
-DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||
-DCMAKE_CXX_COMPILER=${{ env.CXX }}
|
||||
-DCMAKE_CXX_VISIBILITY_PRESET=hidden
|
||||
-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
|
||||
|
||||
- name: build
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: cmake --build . --config ${{ matrix.build_type }}
|
||||
|
||||
- name: test
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: ctest -C ${{ matrix.build_type }} -VV
|
||||
|
||||
msvc:
|
||||
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msvc }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
defaults:
|
||||
run:
|
||||
shell: powershell
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
msvc:
|
||||
- VS-16-2019
|
||||
- VS-17-2022
|
||||
arch:
|
||||
- x64
|
||||
build_type:
|
||||
- Debug
|
||||
- Release
|
||||
lib:
|
||||
- shared
|
||||
- static
|
||||
include:
|
||||
- msvc: VS-16-2019
|
||||
os: windows-2019
|
||||
generator: 'Visual Studio 16 2019'
|
||||
- msvc: VS-17-2022
|
||||
os: windows-2022
|
||||
generator: 'Visual Studio 17 2022'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- uses: lukka/get-cmake@latest
|
||||
|
||||
- name: configure cmake
|
||||
run: >
|
||||
cmake -S . -B _build/
|
||||
-A ${{ matrix.arch }}
|
||||
-G "${{ matrix.generator }}"
|
||||
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
|
||||
-DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
|
||||
|
||||
- name: build
|
||||
run: cmake --build _build/ --config ${{ matrix.build_type }}
|
||||
|
||||
- name: setup test environment
|
||||
# Make sure gmock and benchmark DLLs can be found
|
||||
run: >
|
||||
echo "$((Get-Item .).FullName)/_build/bin/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append;
|
||||
echo "$((Get-Item .).FullName)/_build/src/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append;
|
||||
|
||||
- name: test
|
||||
run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV
|
||||
|
||||
|
||||
|
17
.github/workflows/clang-format-lint.yml
vendored
Normal file
17
.github/workflows/clang-format-lint.yml
vendored
Normal file
@ -0,0 +1,17 @@
|
||||
name: clang-format-lint
|
||||
on:
|
||||
push: {}
|
||||
pull_request: {}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: DoozyX/clang-format-lint-action@v0.13
|
||||
with:
|
||||
source: './include/benchmark ./src ./test'
|
||||
extensions: 'h,cc'
|
||||
clangFormatVersion: 12
|
||||
style: Google
|
38
.github/workflows/clang-tidy.yml
vendored
Normal file
38
.github/workflows/clang-tidy.yml
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
name: clang-tidy
|
||||
|
||||
on:
|
||||
push: {}
|
||||
pull_request: {}
|
||||
|
||||
jobs:
|
||||
job:
|
||||
name: run-clang-tidy
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: install clang-tidy
|
||||
run: sudo apt update && sudo apt -y install clang-tidy
|
||||
|
||||
- name: create build environment
|
||||
run: cmake -E make_directory ${{ runner.workspace }}/_build
|
||||
|
||||
- name: configure cmake
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: >
|
||||
cmake $GITHUB_WORKSPACE
|
||||
-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
|
||||
-DBENCHMARK_ENABLE_LIBPFM=OFF
|
||||
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
|
||||
-DCMAKE_C_COMPILER=clang
|
||||
-DCMAKE_CXX_COMPILER=clang++
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
|
||||
-DGTEST_COMPILE_COMMANDS=OFF
|
||||
|
||||
- name: run
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: run-clang-tidy
|
28
.github/workflows/doxygen.yml
vendored
Normal file
28
.github/workflows/doxygen.yml
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
name: doxygen
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
name: Build HTML documentation
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Fetching sources
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Installing build dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install doxygen gcc git
|
||||
|
||||
- name: Creating build directory
|
||||
run: mkdir build
|
||||
|
||||
- name: Building HTML documentation with Doxygen
|
||||
run: |
|
||||
cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON
|
||||
cmake --build build --target benchmark_doxygen
|
28
.github/workflows/pylint.yml
vendored
Normal file
28
.github/workflows/pylint.yml
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
name: pylint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
pylint:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: 3.8
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pylint pylint-exit conan
|
||||
|
||||
- name: Run pylint
|
||||
run: |
|
||||
pylint `find . -name '*.py'|xargs` || pylint-exit $?
|
96
.github/workflows/sanitizer.yml
vendored
Normal file
96
.github/workflows/sanitizer.yml
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
name: sanitizer
|
||||
|
||||
on:
|
||||
push: {}
|
||||
pull_request: {}
|
||||
|
||||
env:
|
||||
UBSAN_OPTIONS: "print_stacktrace=1"
|
||||
|
||||
jobs:
|
||||
job:
|
||||
name: ${{ matrix.sanitizer }}.${{ matrix.build_type }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: ['Debug', 'RelWithDebInfo']
|
||||
sanitizer: ['asan', 'ubsan', 'tsan', 'msan']
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: configure msan env
|
||||
if: matrix.sanitizer == 'msan'
|
||||
run: |
|
||||
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV
|
||||
echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV
|
||||
|
||||
- name: configure ubsan env
|
||||
if: matrix.sanitizer == 'ubsan'
|
||||
run: |
|
||||
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV
|
||||
echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV
|
||||
|
||||
- name: configure asan env
|
||||
if: matrix.sanitizer == 'asan'
|
||||
run: |
|
||||
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV
|
||||
echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV
|
||||
|
||||
- name: configure tsan env
|
||||
if: matrix.sanitizer == 'tsan'
|
||||
run: |
|
||||
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV
|
||||
echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV
|
||||
|
||||
- name: fine-tune asan options
|
||||
# in asan we get an error from std::regex. ignore it.
|
||||
if: matrix.sanitizer == 'asan'
|
||||
run: |
|
||||
echo "ASAN_OPTIONS=alloc_dealloc_mismatch=0" >> $GITHUB_ENV
|
||||
|
||||
- name: setup clang
|
||||
uses: egor-tensin/setup-clang@v1
|
||||
with:
|
||||
version: latest
|
||||
platform: x64
|
||||
|
||||
- name: configure clang
|
||||
run: |
|
||||
echo "CC=cc" >> $GITHUB_ENV
|
||||
echo "CXX=c++" >> $GITHUB_ENV
|
||||
|
||||
- name: build libc++ (non-asan)
|
||||
if: matrix.sanitizer != 'asan'
|
||||
run: |
|
||||
"${GITHUB_WORKSPACE}/.github/libcxx-setup.sh"
|
||||
echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L ${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -Isystem${GITHUB_WORKSPACE}/llvm-build/include -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV
|
||||
|
||||
- name: create build environment
|
||||
run: cmake -E make_directory ${{ runner.workspace }}/_build
|
||||
|
||||
- name: configure cmake
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: >
|
||||
VERBOSE=1
|
||||
cmake $GITHUB_WORKSPACE
|
||||
-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
|
||||
-DBENCHMARK_ENABLE_LIBPFM=OFF
|
||||
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
|
||||
-DCMAKE_C_COMPILER=${{ env.CC }}
|
||||
-DCMAKE_CXX_COMPILER=${{ env.CXX }}
|
||||
-DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}"
|
||||
-DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}"
|
||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||
|
||||
- name: build
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: cmake --build . --config ${{ matrix.build_type }}
|
||||
|
||||
- name: test
|
||||
shell: bash
|
||||
working-directory: ${{ runner.workspace }}/_build
|
||||
run: ctest -C ${{ matrix.build_type }} -VV
|
29
.github/workflows/test_bindings.yml
vendored
Normal file
29
.github/workflows/test_bindings.yml
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
name: test-bindings
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
python_bindings:
|
||||
name: Test GBM Python bindings on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ ubuntu-latest, macos-latest, windows-2019 ]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.11
|
||||
- name: Install GBM Python bindings on ${{ matrix.os}}
|
||||
run:
|
||||
python -m pip install wheel .
|
||||
- name: Run bindings example on ${{ matrix.os }}
|
||||
run:
|
||||
python bindings/python/google_benchmark/example.py
|
79
.github/workflows/wheels.yml
vendored
Normal file
79
.github/workflows/wheels.yml
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
name: Build and upload Python wheels
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
release:
|
||||
types:
|
||||
- published
|
||||
|
||||
jobs:
|
||||
build_sdist:
|
||||
name: Build source distribution
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install Python 3.11
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Build and check sdist
|
||||
run: |
|
||||
python setup.py sdist
|
||||
- name: Upload sdist
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: dist
|
||||
path: dist/*.tar.gz
|
||||
|
||||
build_wheels:
|
||||
name: Build Google Benchmark wheels on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-2019]
|
||||
|
||||
steps:
|
||||
- name: Check out Google Benchmark
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
if: runner.os == 'Linux'
|
||||
uses: docker/setup-qemu-action@v2
|
||||
with:
|
||||
platforms: all
|
||||
|
||||
- name: Build wheels on ${{ matrix.os }} using cibuildwheel
|
||||
uses: pypa/cibuildwheel@v2.14.1
|
||||
env:
|
||||
CIBW_BUILD: 'cp38-* cp39-* cp310-* cp311-*'
|
||||
CIBW_SKIP: "*-musllinux_*"
|
||||
CIBW_TEST_SKIP: "*-macosx_arm64"
|
||||
CIBW_ARCHS_LINUX: x86_64 aarch64
|
||||
CIBW_ARCHS_MACOS: x86_64 arm64
|
||||
CIBW_ARCHS_WINDOWS: AMD64
|
||||
CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh
|
||||
CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py
|
||||
|
||||
- name: Upload Google Benchmark ${{ matrix.os }} wheels
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: dist
|
||||
path: ./wheelhouse/*.whl
|
||||
|
||||
pypi_upload:
|
||||
name: Publish google-benchmark wheels to PyPI
|
||||
needs: [build_sdist, build_wheels]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: dist
|
||||
path: dist
|
||||
|
||||
- uses: pypa/gh-action-pypi-publish@v1.6.4
|
||||
with:
|
||||
user: __token__
|
||||
password: ${{ secrets.PYPI_PASSWORD }}
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -11,6 +11,7 @@
|
||||
*.swp
|
||||
*.pyc
|
||||
__pycache__
|
||||
.DS_Store
|
||||
|
||||
# lcov
|
||||
*.lcov
|
||||
|
@ -1,28 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Install a newer CMake version
|
||||
curl -sSL https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh -o install-cmake.sh
|
||||
chmod +x install-cmake.sh
|
||||
sudo ./install-cmake.sh --prefix=/usr/local --skip-license
|
||||
|
||||
# Checkout LLVM sources
|
||||
git clone --depth=1 https://github.com/llvm-mirror/llvm.git llvm-source
|
||||
git clone --depth=1 https://github.com/llvm-mirror/libcxx.git llvm-source/projects/libcxx
|
||||
git clone --depth=1 https://github.com/llvm-mirror/libcxxabi.git llvm-source/projects/libcxxabi
|
||||
|
||||
# Setup libc++ options
|
||||
if [ -z "$BUILD_32_BITS" ]; then
|
||||
export BUILD_32_BITS=OFF && echo disabling 32 bit build
|
||||
fi
|
||||
|
||||
# Build and install libc++ (Use unstable ABI for better sanitizer coverage)
|
||||
mkdir llvm-build && cd llvm-build
|
||||
cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} \
|
||||
-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr \
|
||||
-DLIBCXX_ABI_UNSTABLE=ON \
|
||||
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
|
||||
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
|
||||
../llvm-source
|
||||
make cxx -j2
|
||||
sudo make install-cxxabi install-cxx
|
||||
cd ../
|
25
.travis.yml
25
.travis.yml
@ -10,10 +10,6 @@ matrix:
|
||||
packages:
|
||||
- lcov
|
||||
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage
|
||||
- compiler: gcc
|
||||
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug
|
||||
- compiler: gcc
|
||||
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release
|
||||
- compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
@ -44,10 +40,6 @@ matrix:
|
||||
- COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug
|
||||
- ENABLE_SANITIZER=1
|
||||
- EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold"
|
||||
- compiler: clang
|
||||
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug
|
||||
- compiler: clang
|
||||
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Release
|
||||
# Clang w/ libc++
|
||||
- compiler: clang
|
||||
dist: xenial
|
||||
@ -146,16 +138,6 @@ matrix:
|
||||
- ENABLE_SANITIZER=1
|
||||
- EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all"
|
||||
- EXTRA_CXX_FLAGS="-stdlib=libc++"
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env:
|
||||
- COMPILER=clang++ BUILD_TYPE=Debug
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
env:
|
||||
- COMPILER=clang++ BUILD_TYPE=Release
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
@ -164,15 +146,10 @@ matrix:
|
||||
- BUILD_TYPE=Release
|
||||
- BUILD_32_BITS=ON
|
||||
- EXTRA_FLAGS="-m32"
|
||||
- os: osx
|
||||
osx_image: xcode9.4
|
||||
compiler: gcc
|
||||
env:
|
||||
- COMPILER=g++-7 C_COMPILER=gcc-7 BUILD_TYPE=Debug
|
||||
|
||||
before_script:
|
||||
- if [ -n "${LIBCXX_BUILD}" ]; then
|
||||
source .travis-libcxx-setup.sh;
|
||||
source .libcxx-setup.sh;
|
||||
fi
|
||||
- if [ -n "${ENABLE_SANITIZER}" ]; then
|
||||
export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF";
|
||||
|
15
AUTHORS
15
AUTHORS
@ -13,6 +13,8 @@ Alex Steele <steeleal123@gmail.com>
|
||||
Andriy Berestovskyy <berestovskyy@gmail.com>
|
||||
Arne Beer <arne@twobeer.de>
|
||||
Carto
|
||||
Cezary Skrzyński <czars1988@gmail.com>
|
||||
Christian Wassermann <christian_wassermann@web.de>
|
||||
Christopher Seymour <chris.j.seymour@hotmail.com>
|
||||
Colin Braley <braley.colin@gmail.com>
|
||||
Daniel Harvey <danielharvey458@gmail.com>
|
||||
@ -20,14 +22,18 @@ David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
|
||||
Deniz Evrenci <denizevrenci@gmail.com>
|
||||
Dirac Research
|
||||
Dominik Czarnota <dominik.b.czarnota@gmail.com>
|
||||
Dominik Korman <kormandominik@gmail.com>
|
||||
Donald Aingworth <donalds_junk_mail@yahoo.com>
|
||||
Eric Backus <eric_backus@alum.mit.edu>
|
||||
Eric Fiselier <eric@efcs.ca>
|
||||
Eugene Zhuk <eugene.zhuk@gmail.com>
|
||||
Evgeny Safronov <division494@gmail.com>
|
||||
Fabien Pichot <pichot.fabien@gmail.com>
|
||||
Federico Ficarelli <federico.ficarelli@gmail.com>
|
||||
Felix Homann <linuxaudio@showlabor.de>
|
||||
Gergő Szitár <szitar.gergo@gmail.com>
|
||||
Google Inc.
|
||||
Henrique Bucher <hbucher@gmail.com>
|
||||
International Business Machines Corporation
|
||||
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
|
||||
Jern-Kuan Leong <jernkuan@gmail.com>
|
||||
@ -38,19 +44,28 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com>
|
||||
Kaito Udagawa <umireon@gmail.com>
|
||||
Kishan Kumar <kumar.kishan@outlook.com>
|
||||
Lei Xu <eddyxu@gmail.com>
|
||||
Marcel Jacobse <mjacobse@uni-bremen.de>
|
||||
Matt Clarkson <mattyclarkson@gmail.com>
|
||||
Maxim Vafin <maxvafin@gmail.com>
|
||||
Mike Apodaca <gatorfax@gmail.com>
|
||||
Min-Yih Hsu <yihshyng223@gmail.com>
|
||||
MongoDB Inc.
|
||||
Nick Hutchinson <nshutchinson@gmail.com>
|
||||
Norman Heino <norman.heino@gmail.com>
|
||||
Oleksandr Sochka <sasha.sochka@gmail.com>
|
||||
Ori Livneh <ori.livneh@gmail.com>
|
||||
Paul Redmond <paul.redmond@gmail.com>
|
||||
Radoslav Yovchev <radoslav.tm@gmail.com>
|
||||
Raghu Raja <raghu@enfabrica.net>
|
||||
Rainer Orth <ro@cebitec.uni-bielefeld.de>
|
||||
Roman Lebedev <lebedev.ri@gmail.com>
|
||||
Sayan Bhattacharjee <aero.sayan@gmail.com>
|
||||
Shapr3D <google-contributors@shapr3d.com>
|
||||
Shuo Chen <chenshuo@chenshuo.com>
|
||||
Staffan Tjernstrom <staffantj@gmail.com>
|
||||
Steinar H. Gunderson <sgunderson@bigfoot.com>
|
||||
Stripe, Inc.
|
||||
Tobias Schmidt <tobias.schmidt@in.tum.de>
|
||||
Yixuan Qiu <yixuanq@gmail.com>
|
||||
Yusuke Suzuki <utatane.tea@gmail.com>
|
||||
Zbigniew Skowron <zbychs@gmail.com>
|
||||
|
49
BUILD.bazel
49
BUILD.bazel
@ -1,14 +1,36 @@
|
||||
licenses(["notice"])
|
||||
|
||||
config_setting(
|
||||
name = "qnx",
|
||||
constraint_values = ["@platforms//os:qnx"],
|
||||
values = {
|
||||
"cpu": "x64_qnx",
|
||||
},
|
||||
visibility = [":__subpackages__"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "windows",
|
||||
constraint_values = ["@platforms//os:windows"],
|
||||
values = {
|
||||
"cpu": "x64_windows",
|
||||
},
|
||||
visibility = [":__subpackages__"],
|
||||
)
|
||||
|
||||
load("@rules_cc//cc:defs.bzl", "cc_library")
|
||||
config_setting(
|
||||
name = "macos",
|
||||
constraint_values = ["@platforms//os:macos"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
config_setting(
|
||||
name = "perfcounters",
|
||||
define_values = {
|
||||
"pfm": "1",
|
||||
},
|
||||
visibility = [":__subpackages__"],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "benchmark",
|
||||
@ -19,19 +41,40 @@ cc_library(
|
||||
],
|
||||
exclude = ["src/benchmark_main.cc"],
|
||||
),
|
||||
hdrs = ["include/benchmark/benchmark.h"],
|
||||
hdrs = [
|
||||
"include/benchmark/benchmark.h",
|
||||
"include/benchmark/export.h",
|
||||
],
|
||||
linkopts = select({
|
||||
":windows": ["-DEFAULTLIB:shlwapi.lib"],
|
||||
"//conditions:default": ["-pthread"],
|
||||
}),
|
||||
copts = select({
|
||||
":windows": [],
|
||||
"//conditions:default": ["-Werror=old-style-cast"],
|
||||
}),
|
||||
strip_include_prefix = "include",
|
||||
visibility = ["//visibility:public"],
|
||||
# Only static linking is allowed; no .so will be produced.
|
||||
# Using `defines` (i.e. not `local_defines`) means that no
|
||||
# dependent rules need to bother about defining the macro.
|
||||
linkstatic = True,
|
||||
defines = [
|
||||
"BENCHMARK_STATIC_DEFINE",
|
||||
] + select({
|
||||
":perfcounters": ["HAVE_LIBPFM"],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
deps = select({
|
||||
":perfcounters": ["@libpfm//:libpfm"],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "benchmark_main",
|
||||
srcs = ["src/benchmark_main.cc"],
|
||||
hdrs = ["include/benchmark/benchmark.h"],
|
||||
hdrs = ["include/benchmark/benchmark.h", "include/benchmark/export.h"],
|
||||
strip_include_prefix = "include",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [":benchmark"],
|
||||
|
149
CMakeLists.txt
149
CMakeLists.txt
@ -1,30 +1,34 @@
|
||||
cmake_minimum_required (VERSION 3.5.1)
|
||||
# Require CMake 3.10. If available, use the policies up to CMake 3.22.
|
||||
cmake_minimum_required (VERSION 3.10...3.22)
|
||||
|
||||
foreach(p
|
||||
CMP0048 # OK to clear PROJECT_VERSION on project()
|
||||
CMP0054 # CMake 3.1
|
||||
CMP0056 # export EXE_LINKER_FLAGS to try_run
|
||||
CMP0057 # Support no if() IN_LIST operator
|
||||
CMP0063 # Honor visibility properties for all targets
|
||||
CMP0077 # Allow option() overrides in importing projects
|
||||
)
|
||||
if(POLICY ${p})
|
||||
cmake_policy(SET ${p} NEW)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
project (benchmark CXX)
|
||||
project (benchmark VERSION 1.8.3 LANGUAGES CXX)
|
||||
|
||||
option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
|
||||
option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON)
|
||||
option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF)
|
||||
option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF)
|
||||
option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON)
|
||||
option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF)
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
|
||||
# PGC++ maybe reporting false positives.
|
||||
set(BENCHMARK_ENABLE_WERROR OFF)
|
||||
endif()
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
|
||||
set(BENCHMARK_ENABLE_WERROR OFF)
|
||||
endif()
|
||||
if(BENCHMARK_FORCE_WERROR)
|
||||
set(BENCHMARK_ENABLE_WERROR ON)
|
||||
endif(BENCHMARK_FORCE_WERROR)
|
||||
|
||||
if(NOT MSVC)
|
||||
option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF)
|
||||
else()
|
||||
set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE)
|
||||
endif()
|
||||
option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON)
|
||||
option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF)
|
||||
option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON)
|
||||
|
||||
# Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which
|
||||
# may require downloading the source code.
|
||||
@ -33,8 +37,25 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi
|
||||
# This option can be used to disable building and running unit tests which depend on gtest
|
||||
# in cases where it is not possible to build or find a valid version of gtest.
|
||||
option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON)
|
||||
option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON)
|
||||
|
||||
option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF)
|
||||
|
||||
# Export only public symbols
|
||||
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
|
||||
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
|
||||
|
||||
if(MSVC)
|
||||
# As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and
|
||||
# cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the
|
||||
# undocumented, but working variable.
|
||||
# See https://gitlab.kitware.com/cmake/cmake/-/issues/15170
|
||||
set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID})
|
||||
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM")
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF)
|
||||
function(should_enable_assembly_tests)
|
||||
if(CMAKE_BUILD_TYPE)
|
||||
@ -81,23 +102,42 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
include(GetGitVersion)
|
||||
get_git_version(GIT_VERSION)
|
||||
|
||||
# If no git version can be determined, use the version
|
||||
# from the project() command
|
||||
if ("${GIT_VERSION}" STREQUAL "0.0.0")
|
||||
set(VERSION "${benchmark_VERSION}")
|
||||
else()
|
||||
set(VERSION "${GIT_VERSION}")
|
||||
endif()
|
||||
# Tell the user what versions we are using
|
||||
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION})
|
||||
message(STATUS "Version: ${VERSION}")
|
||||
message(STATUS "Google Benchmark version: ${VERSION}")
|
||||
|
||||
# The version of the libraries
|
||||
set(GENERIC_LIB_VERSION ${VERSION})
|
||||
string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION)
|
||||
|
||||
# Import our CMake modules
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(AddCXXCompilerFlag)
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(CheckLibraryExists)
|
||||
include(CXXFeatureCheck)
|
||||
|
||||
check_library_exists(rt shm_open "" HAVE_LIB_RT)
|
||||
|
||||
if (BENCHMARK_BUILD_32_BITS)
|
||||
add_required_cxx_compiler_flag(-m32)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
set(BENCHMARK_CXX_STANDARD 14)
|
||||
else()
|
||||
set(BENCHMARK_CXX_STANDARD 11)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD})
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED YES)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
if (MSVC)
|
||||
# Turn compiler warnings up to 11
|
||||
string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
@ -130,44 +170,43 @@ if (MSVC)
|
||||
set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG")
|
||||
endif()
|
||||
else()
|
||||
# Try and enable C++11. Don't use C++14 because it doesn't work in some
|
||||
# configurations.
|
||||
add_cxx_compiler_flag(-std=c++11)
|
||||
if (NOT HAVE_CXX_FLAG_STD_CXX11)
|
||||
add_cxx_compiler_flag(-std=c++0x)
|
||||
endif()
|
||||
|
||||
# Turn compiler warnings up to 11
|
||||
add_cxx_compiler_flag(-Wall)
|
||||
add_cxx_compiler_flag(-Wextra)
|
||||
add_cxx_compiler_flag(-Wshadow)
|
||||
add_cxx_compiler_flag(-Werror RELEASE)
|
||||
add_cxx_compiler_flag(-Werror RELWITHDEBINFO)
|
||||
add_cxx_compiler_flag(-Werror MINSIZEREL)
|
||||
# Disabled until googletest (gmock) stops emitting variadic macro warnings
|
||||
#add_cxx_compiler_flag(-pedantic)
|
||||
#add_cxx_compiler_flag(-pedantic-errors)
|
||||
add_cxx_compiler_flag(-Wfloat-equal)
|
||||
add_cxx_compiler_flag(-Wold-style-cast)
|
||||
if(BENCHMARK_ENABLE_WERROR)
|
||||
add_cxx_compiler_flag(-Werror)
|
||||
endif()
|
||||
if (NOT BENCHMARK_ENABLE_TESTING)
|
||||
# Disable warning when compiling tests as gtest does not use 'override'.
|
||||
add_cxx_compiler_flag(-Wsuggest-override)
|
||||
endif()
|
||||
add_cxx_compiler_flag(-pedantic)
|
||||
add_cxx_compiler_flag(-pedantic-errors)
|
||||
add_cxx_compiler_flag(-Wshorten-64-to-32)
|
||||
add_cxx_compiler_flag(-fstrict-aliasing)
|
||||
# Disable warnings regarding deprecated parts of the library while building
|
||||
# and testing those parts of the library.
|
||||
add_cxx_compiler_flag(-Wno-deprecated-declarations)
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
|
||||
# Intel silently ignores '-Wno-deprecated-declarations',
|
||||
# warning no. 1786 must be explicitly disabled.
|
||||
# See #631 for rationale.
|
||||
add_cxx_compiler_flag(-wd1786)
|
||||
add_cxx_compiler_flag(-fno-finite-math-only)
|
||||
endif()
|
||||
# Disable deprecation warnings for release builds (when -Werror is enabled).
|
||||
add_cxx_compiler_flag(-Wno-deprecated RELEASE)
|
||||
add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO)
|
||||
add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL)
|
||||
if(BENCHMARK_ENABLE_WERROR)
|
||||
add_cxx_compiler_flag(-Wno-deprecated)
|
||||
endif()
|
||||
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
|
||||
add_cxx_compiler_flag(-fno-exceptions)
|
||||
endif()
|
||||
|
||||
if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
|
||||
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing
|
||||
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") #ICC17u2: Many false positives for Wstrict-aliasing
|
||||
add_cxx_compiler_flag(-Wstrict-aliasing)
|
||||
endif()
|
||||
endif()
|
||||
@ -176,12 +215,12 @@ else()
|
||||
add_cxx_compiler_flag(-wd654)
|
||||
add_cxx_compiler_flag(-Wthread-safety)
|
||||
if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
|
||||
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
|
||||
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include")
|
||||
endif()
|
||||
|
||||
# On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a
|
||||
# predefined macro, which turns on all of the wonderful libc extensions.
|
||||
# However g++ doesn't do this in Cygwin so we have to define it ourselfs
|
||||
# However g++ doesn't do this in Cygwin so we have to define it ourselves
|
||||
# since we depend on GNU/POSIX/BSD extensions.
|
||||
if (CYGWIN)
|
||||
add_definitions(-D_GNU_SOURCE=1)
|
||||
@ -194,6 +233,7 @@ else()
|
||||
# Link time optimisation
|
||||
if (BENCHMARK_ENABLE_LTO)
|
||||
add_cxx_compiler_flag(-flto)
|
||||
add_cxx_compiler_flag(-Wno-lto-type-mismatch)
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
find_program(GCC_AR gcc-ar)
|
||||
if (GCC_AR)
|
||||
@ -231,7 +271,8 @@ if (BENCHMARK_USE_LIBCXX)
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
|
||||
add_cxx_compiler_flag(-stdlib=libc++)
|
||||
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
|
||||
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
|
||||
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" OR
|
||||
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM")
|
||||
add_cxx_compiler_flag(-nostdinc++)
|
||||
message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS")
|
||||
# Adding -nodefaultlibs directly to CMAKE_<TYPE>_LINKER_FLAGS will break
|
||||
@ -245,11 +286,17 @@ if (BENCHMARK_USE_LIBCXX)
|
||||
endif()
|
||||
endif(BENCHMARK_USE_LIBCXX)
|
||||
|
||||
set(EXTRA_CXX_FLAGS "")
|
||||
if (WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
|
||||
# Clang on Windows fails to compile the regex feature check under C++11
|
||||
set(EXTRA_CXX_FLAGS "-DCMAKE_CXX_STANDARD=14")
|
||||
endif()
|
||||
|
||||
# C++ feature checks
|
||||
# Determine the correct regular expression engine to use
|
||||
cxx_feature_check(STD_REGEX)
|
||||
cxx_feature_check(GNU_POSIX_REGEX)
|
||||
cxx_feature_check(POSIX_REGEX)
|
||||
cxx_feature_check(STD_REGEX ${EXTRA_CXX_FLAGS})
|
||||
cxx_feature_check(GNU_POSIX_REGEX ${EXTRA_CXX_FLAGS})
|
||||
cxx_feature_check(POSIX_REGEX ${EXTRA_CXX_FLAGS})
|
||||
if(NOT HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX)
|
||||
message(FATAL_ERROR "Failed to determine the source files for the regular expression backend")
|
||||
endif()
|
||||
@ -257,10 +304,16 @@ if (NOT BENCHMARK_ENABLE_EXCEPTIONS AND HAVE_STD_REGEX
|
||||
AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX)
|
||||
message(WARNING "Using std::regex with exceptions disabled is not fully supported")
|
||||
endif()
|
||||
|
||||
cxx_feature_check(STEADY_CLOCK)
|
||||
# Ensure we have pthreads
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
cxx_feature_check(PTHREAD_AFFINITY)
|
||||
|
||||
if (BENCHMARK_ENABLE_LIBPFM)
|
||||
find_package(PFM)
|
||||
endif()
|
||||
|
||||
# Set up directories
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
@ -273,7 +326,15 @@ if (BENCHMARK_ENABLE_TESTING)
|
||||
if (BENCHMARK_ENABLE_GTEST_TESTS AND
|
||||
NOT (TARGET gtest AND TARGET gtest_main AND
|
||||
TARGET gmock AND TARGET gmock_main))
|
||||
include(GoogleTest)
|
||||
if (BENCHMARK_USE_BUNDLED_GTEST)
|
||||
include(GoogleTest)
|
||||
else()
|
||||
find_package(GTest CONFIG REQUIRED)
|
||||
add_library(gtest ALIAS GTest::gtest)
|
||||
add_library(gtest_main ALIAS GTest::gtest_main)
|
||||
add_library(gmock ALIAS GTest::gmock)
|
||||
add_library(gmock_main ALIAS GTest::gmock_main)
|
||||
endif()
|
||||
endif()
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
|
19
CONTRIBUTORS
19
CONTRIBUTORS
@ -22,12 +22,16 @@
|
||||
#
|
||||
# Please keep the list sorted.
|
||||
|
||||
Abhina Sreeskantharajan <abhina.sreeskantharajan@ibm.com>
|
||||
Albert Pretorius <pretoalb@gmail.com>
|
||||
Alex Steele <steelal123@gmail.com>
|
||||
Andriy Berestovskyy <berestovskyy@gmail.com>
|
||||
Arne Beer <arne@twobeer.de>
|
||||
Bátor Tallér <bator.taller@shapr3d.com>
|
||||
Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
|
||||
Cezary Skrzyński <czars1988@gmail.com>
|
||||
Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
|
||||
Christian Wassermann <christian_wassermann@web.de>
|
||||
Christopher Seymour <chris.j.seymour@hotmail.com>
|
||||
Colin Braley <braley.colin@gmail.com>
|
||||
Cyrille Faucheux <cyrille.faucheux@gmail.com>
|
||||
@ -36,15 +40,20 @@ David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
|
||||
Deniz Evrenci <denizevrenci@gmail.com>
|
||||
Dominic Hamon <dma@stripysock.com> <dominic@google.com>
|
||||
Dominik Czarnota <dominik.b.czarnota@gmail.com>
|
||||
Dominik Korman <kormandominik@gmail.com>
|
||||
Donald Aingworth <donalds_junk_mail@yahoo.com>
|
||||
Eric Backus <eric_backus@alum.mit.edu>
|
||||
Eric Fiselier <eric@efcs.ca>
|
||||
Eugene Zhuk <eugene.zhuk@gmail.com>
|
||||
Evgeny Safronov <division494@gmail.com>
|
||||
Fabien Pichot <pichot.fabien@gmail.com>
|
||||
Fanbo Meng <fanbo.meng@ibm.com>
|
||||
Federico Ficarelli <federico.ficarelli@gmail.com>
|
||||
Felix Homann <linuxaudio@showlabor.de>
|
||||
Geoffrey Martin-Noble <gcmn@google.com> <gmngeoffrey@gmail.com>
|
||||
Gergő Szitár <szitar.gergo@gmail.com>
|
||||
Hannes Hauswedell <h2@fsfe.org>
|
||||
Henrique Bucher <hbucher@gmail.com>
|
||||
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
|
||||
Jern-Kuan Leong <jernkuan@gmail.com>
|
||||
JianXiong Zhou <zhoujianxiong2@gmail.com>
|
||||
@ -52,25 +61,33 @@ Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
|
||||
John Millikin <jmillikin@stripe.com>
|
||||
Jordan Williams <jwillikers@protonmail.com>
|
||||
Jussi Knuuttila <jussi.knuuttila@gmail.com>
|
||||
Kai Wolf <kai.wolf@gmail.com>
|
||||
Kaito Udagawa <umireon@gmail.com>
|
||||
Kai Wolf <kai.wolf@gmail.com>
|
||||
Kishan Kumar <kumar.kishan@outlook.com>
|
||||
Lei Xu <eddyxu@gmail.com>
|
||||
Marcel Jacobse <mjacobse@uni-bremen.de>
|
||||
Matt Clarkson <mattyclarkson@gmail.com>
|
||||
Maxim Vafin <maxvafin@gmail.com>
|
||||
Mike Apodaca <gatorfax@gmail.com>
|
||||
Min-Yih Hsu <yihshyng223@gmail.com>
|
||||
Nick Hutchinson <nshutchinson@gmail.com>
|
||||
Norman Heino <norman.heino@gmail.com>
|
||||
Oleksandr Sochka <sasha.sochka@gmail.com>
|
||||
Ori Livneh <ori.livneh@gmail.com>
|
||||
Pascal Leroy <phl@google.com>
|
||||
Paul Redmond <paul.redmond@gmail.com>
|
||||
Pierre Phaneuf <pphaneuf@google.com>
|
||||
Radoslav Yovchev <radoslav.tm@gmail.com>
|
||||
Raghu Raja <raghu@enfabrica.net>
|
||||
Rainer Orth <ro@cebitec.uni-bielefeld.de>
|
||||
Raul Marin <rmrodriguez@cartodb.com>
|
||||
Ray Glover <ray.glover@uk.ibm.com>
|
||||
Robert Guo <robert.guo@mongodb.com>
|
||||
Roman Lebedev <lebedev.ri@gmail.com>
|
||||
Sayan Bhattacharjee <aero.sayan@gmail.com>
|
||||
Shuo Chen <chenshuo@chenshuo.com>
|
||||
Steven Wan <wan.yu@ibm.com>
|
||||
Tobias Schmidt <tobias.schmidt@in.tum.de>
|
||||
Tobias Ulvgård <tobias.ulvgard@dirac.se>
|
||||
Tom Madams <tom.ej.madams@gmail.com> <tmadams@google.com>
|
||||
Yixuan Qiu <yixuanq@gmail.com>
|
||||
|
24
MODULE.bazel
Normal file
24
MODULE.bazel
Normal file
@ -0,0 +1,24 @@
|
||||
module(name = "google_benchmark", version="1.8.3")
|
||||
|
||||
bazel_dep(name = "bazel_skylib", version = "1.4.1")
|
||||
bazel_dep(name = "platforms", version = "0.0.6")
|
||||
bazel_dep(name = "rules_foreign_cc", version = "0.9.0")
|
||||
bazel_dep(name = "rules_cc", version = "0.0.6")
|
||||
bazel_dep(name = "rules_python", version = "0.24.0", dev_dependency = True)
|
||||
bazel_dep(name = "googletest", version = "1.12.1", repo_name = "com_google_googletest", dev_dependency = True)
|
||||
bazel_dep(name = "libpfm", version = "4.11.0")
|
||||
|
||||
# Register a toolchain for Python 3.9 to be able to build numpy. Python
|
||||
# versions >=3.10 are problematic.
|
||||
# A second reason for this is to be able to build Python hermetically instead
|
||||
# of relying on the changing default version from rules_python.
|
||||
|
||||
python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True)
|
||||
python.toolchain(python_version = "3.9")
|
||||
|
||||
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
|
||||
pip.parse(
|
||||
hub_name="tools_pip_deps",
|
||||
python_version = "3.9",
|
||||
requirements_lock="//tools:requirements.txt")
|
||||
use_repo(pip, "tools_pip_deps")
|
35
WORKSPACE
35
WORKSPACE
@ -1,37 +1,22 @@
|
||||
workspace(name = "com_github_google_benchmark")
|
||||
|
||||
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
|
||||
load("//:bazel/benchmark_deps.bzl", "benchmark_deps")
|
||||
|
||||
http_archive(
|
||||
name = "rules_cc",
|
||||
strip_prefix = "rules_cc-a508235df92e71d537fcbae0c7c952ea6957a912",
|
||||
urls = ["https://github.com/bazelbuild/rules_cc/archive/a508235df92e71d537fcbae0c7c952ea6957a912.zip"],
|
||||
)
|
||||
benchmark_deps()
|
||||
|
||||
http_archive(
|
||||
name = "com_google_absl",
|
||||
sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111",
|
||||
strip_prefix = "abseil-cpp-20200225.2",
|
||||
urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"],
|
||||
)
|
||||
load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies")
|
||||
|
||||
http_archive(
|
||||
name = "com_google_googletest",
|
||||
strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e",
|
||||
urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"],
|
||||
)
|
||||
rules_foreign_cc_dependencies()
|
||||
|
||||
http_archive(
|
||||
name = "pybind11",
|
||||
build_file = "@//bindings/python:pybind11.BUILD",
|
||||
sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d",
|
||||
strip_prefix = "pybind11-2.4.3",
|
||||
urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"],
|
||||
load("@rules_python//python:pip.bzl", pip3_install="pip_install")
|
||||
|
||||
pip3_install(
|
||||
name = "tools_pip_deps",
|
||||
requirements = "//tools:requirements.txt",
|
||||
)
|
||||
|
||||
new_local_repository(
|
||||
name = "python_headers",
|
||||
build_file = "@//bindings/python:python_headers.BUILD",
|
||||
path = "/usr/include/python3.6", # May be overwritten by setup.py.
|
||||
path = "<PYTHON_INCLUDE_PATH>", # May be overwritten by setup.py.
|
||||
)
|
||||
|
||||
|
2
WORKSPACE.bzlmod
Normal file
2
WORKSPACE.bzlmod
Normal file
@ -0,0 +1,2 @@
|
||||
# This file marks the root of the Bazel workspace.
|
||||
# See MODULE.bazel for dependencies and setup.
|
@ -1 +1,2 @@
|
||||
theme: jekyll-theme-midnight
|
||||
theme: jekyll-theme-midnight
|
||||
markdown: GFM
|
||||
|
65
bazel/benchmark_deps.bzl
Normal file
65
bazel/benchmark_deps.bzl
Normal file
@ -0,0 +1,65 @@
|
||||
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
|
||||
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
|
||||
|
||||
def benchmark_deps():
|
||||
"""Loads dependencies required to build Google Benchmark."""
|
||||
|
||||
if "bazel_skylib" not in native.existing_rules():
|
||||
http_archive(
|
||||
name = "bazel_skylib",
|
||||
sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728",
|
||||
urls = [
|
||||
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
|
||||
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
|
||||
],
|
||||
)
|
||||
|
||||
if "rules_foreign_cc" not in native.existing_rules():
|
||||
http_archive(
|
||||
name = "rules_foreign_cc",
|
||||
sha256 = "bcd0c5f46a49b85b384906daae41d277b3dc0ff27c7c752cc51e43048a58ec83",
|
||||
strip_prefix = "rules_foreign_cc-0.7.1",
|
||||
url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.7.1.tar.gz",
|
||||
)
|
||||
|
||||
if "rules_python" not in native.existing_rules():
|
||||
http_archive(
|
||||
name = "rules_python",
|
||||
url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz",
|
||||
sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0",
|
||||
)
|
||||
|
||||
if "com_google_absl" not in native.existing_rules():
|
||||
http_archive(
|
||||
name = "com_google_absl",
|
||||
sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111",
|
||||
strip_prefix = "abseil-cpp-20200225.2",
|
||||
urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"],
|
||||
)
|
||||
|
||||
if "com_google_googletest" not in native.existing_rules():
|
||||
new_git_repository(
|
||||
name = "com_google_googletest",
|
||||
remote = "https://github.com/google/googletest.git",
|
||||
tag = "release-1.11.0",
|
||||
)
|
||||
|
||||
if "nanobind" not in native.existing_rules():
|
||||
new_git_repository(
|
||||
name = "nanobind",
|
||||
remote = "https://github.com/wjakob/nanobind.git",
|
||||
tag = "v1.4.0",
|
||||
build_file = "@//bindings/python:nanobind.BUILD",
|
||||
recursive_init_submodules = True,
|
||||
)
|
||||
|
||||
if "libpfm" not in native.existing_rules():
|
||||
# Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/
|
||||
http_archive(
|
||||
name = "libpfm",
|
||||
build_file = str(Label("//tools:libpfm.BUILD.bazel")),
|
||||
sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc",
|
||||
type = "tar.gz",
|
||||
strip_prefix = "libpfm-4.11.0",
|
||||
urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"],
|
||||
)
|
@ -8,8 +8,8 @@ def py_extension(name, srcs, hdrs = [], copts = [], features = [], deps = []):
|
||||
shared_lib_name = name + shared_lib_suffix
|
||||
native.cc_binary(
|
||||
name = shared_lib_name,
|
||||
linkshared = 1,
|
||||
linkstatic = 1,
|
||||
linkshared = True,
|
||||
linkstatic = True,
|
||||
srcs = srcs + hdrs,
|
||||
copts = copts,
|
||||
features = features,
|
||||
|
@ -6,7 +6,6 @@ py_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":_benchmark",
|
||||
# pip; absl:app
|
||||
],
|
||||
)
|
||||
|
||||
@ -17,10 +16,13 @@ py_extension(
|
||||
"-fexceptions",
|
||||
"-fno-strict-aliasing",
|
||||
],
|
||||
features = ["-use_header_modules"],
|
||||
features = [
|
||||
"-use_header_modules",
|
||||
"-parse_headers",
|
||||
],
|
||||
deps = [
|
||||
"//:benchmark",
|
||||
"@pybind11",
|
||||
"@nanobind",
|
||||
"@python_headers",
|
||||
],
|
||||
)
|
||||
|
@ -26,42 +26,137 @@ Example usage:
|
||||
if __name__ == '__main__':
|
||||
benchmark.main()
|
||||
"""
|
||||
import atexit
|
||||
|
||||
from absl import app
|
||||
from google_benchmark import _benchmark
|
||||
from google_benchmark._benchmark import (
|
||||
Counter,
|
||||
kNanosecond,
|
||||
kMicrosecond,
|
||||
kMillisecond,
|
||||
kSecond,
|
||||
oNone,
|
||||
o1,
|
||||
oN,
|
||||
oNSquared,
|
||||
oNCubed,
|
||||
oLogN,
|
||||
oNLogN,
|
||||
oAuto,
|
||||
oLambda,
|
||||
State,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"register",
|
||||
"main",
|
||||
"Counter",
|
||||
"kNanosecond",
|
||||
"kMicrosecond",
|
||||
"kMillisecond",
|
||||
"kSecond",
|
||||
"oNone",
|
||||
"o1",
|
||||
"oN",
|
||||
"oNSquared",
|
||||
"oNCubed",
|
||||
"oLogN",
|
||||
"oNLogN",
|
||||
"oAuto",
|
||||
"oLambda",
|
||||
"State",
|
||||
]
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__version__ = "1.8.3"
|
||||
|
||||
|
||||
def register(f=None, *, name=None):
|
||||
if f is None:
|
||||
return lambda f: register(f, name=name)
|
||||
if name is None:
|
||||
name = f.__name__
|
||||
_benchmark.RegisterBenchmark(name, f)
|
||||
return f
|
||||
class __OptionMaker:
|
||||
"""A stateless class to collect benchmark options.
|
||||
|
||||
Collect all decorator calls like @option.range(start=0, limit=1<<5).
|
||||
"""
|
||||
|
||||
class Options:
|
||||
"""Pure data class to store options calls, along with the benchmarked function."""
|
||||
|
||||
def __init__(self, func):
|
||||
self.func = func
|
||||
self.builder_calls = []
|
||||
|
||||
@classmethod
|
||||
def make(cls, func_or_options):
|
||||
"""Make Options from Options or the benchmarked function."""
|
||||
if isinstance(func_or_options, cls.Options):
|
||||
return func_or_options
|
||||
return cls.Options(func_or_options)
|
||||
|
||||
def __getattr__(self, builder_name):
|
||||
"""Append option call in the Options."""
|
||||
|
||||
# The function that get returned on @option.range(start=0, limit=1<<5).
|
||||
def __builder_method(*args, **kwargs):
|
||||
|
||||
# The decorator that get called, either with the benchmared function
|
||||
# or the previous Options
|
||||
def __decorator(func_or_options):
|
||||
options = self.make(func_or_options)
|
||||
options.builder_calls.append((builder_name, args, kwargs))
|
||||
# The decorator returns Options so it is not technically a decorator
|
||||
# and needs a final call to @register
|
||||
return options
|
||||
|
||||
return __decorator
|
||||
|
||||
return __builder_method
|
||||
|
||||
|
||||
# Alias for nicer API.
|
||||
# We have to instantiate an object, even if stateless, to be able to use __getattr__
|
||||
# on option.range
|
||||
option = __OptionMaker()
|
||||
|
||||
|
||||
def register(undefined=None, *, name=None):
|
||||
"""Register function for benchmarking."""
|
||||
if undefined is None:
|
||||
# Decorator is called without parenthesis so we return a decorator
|
||||
return lambda f: register(f, name=name)
|
||||
|
||||
# We have either the function to benchmark (simple case) or an instance of Options
|
||||
# (@option._ case).
|
||||
options = __OptionMaker.make(undefined)
|
||||
|
||||
if name is None:
|
||||
name = options.func.__name__
|
||||
|
||||
# We register the benchmark and reproduce all the @option._ calls onto the
|
||||
# benchmark builder pattern
|
||||
benchmark = _benchmark.RegisterBenchmark(name, options.func)
|
||||
for name, args, kwargs in options.builder_calls[::-1]:
|
||||
getattr(benchmark, name)(*args, **kwargs)
|
||||
|
||||
# return the benchmarked function because the decorator does not modify it
|
||||
return options.func
|
||||
|
||||
|
||||
def _flags_parser(argv):
|
||||
argv = _benchmark.Initialize(argv)
|
||||
return app.parse_flags_with_usage(argv)
|
||||
argv = _benchmark.Initialize(argv)
|
||||
return app.parse_flags_with_usage(argv)
|
||||
|
||||
|
||||
def _run_benchmarks(argv):
|
||||
if len(argv) > 1:
|
||||
raise app.UsageError('Too many command-line arguments.')
|
||||
return _benchmark.RunSpecifiedBenchmarks()
|
||||
if len(argv) > 1:
|
||||
raise app.UsageError("Too many command-line arguments.")
|
||||
return _benchmark.RunSpecifiedBenchmarks()
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
return app.run(_run_benchmarks, argv=argv, flags_parser=_flags_parser)
|
||||
return app.run(_run_benchmarks, argv=argv, flags_parser=_flags_parser)
|
||||
|
||||
|
||||
# Methods for use with custom main function.
|
||||
initialize = _benchmark.Initialize
|
||||
run_benchmarks = _benchmark.RunSpecifiedBenchmarks
|
||||
atexit.register(_benchmark.ClearRegisteredBenchmarks)
|
||||
|
@ -1,11 +1,17 @@
|
||||
// Benchmark for Python.
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl.h"
|
||||
|
||||
#include "nanobind/nanobind.h"
|
||||
#include "nanobind/operators.h"
|
||||
#include "nanobind/stl/bind_map.h"
|
||||
#include "nanobind/stl/string.h"
|
||||
#include "nanobind/stl/vector.h"
|
||||
|
||||
NB_MAKE_OPAQUE(benchmark::UserCounters);
|
||||
|
||||
namespace {
|
||||
namespace py = ::pybind11;
|
||||
namespace nb = nanobind;
|
||||
|
||||
std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
|
||||
// The `argv` pointers here become invalid when this function returns, but
|
||||
@ -28,21 +34,151 @@ std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
|
||||
return remaining_argv;
|
||||
}
|
||||
|
||||
void RegisterBenchmark(const char* name, py::function f) {
|
||||
benchmark::RegisterBenchmark(name, [f](benchmark::State& state) {
|
||||
f(&state);
|
||||
});
|
||||
benchmark::internal::Benchmark* RegisterBenchmark(const std::string& name,
|
||||
nb::callable f) {
|
||||
return benchmark::RegisterBenchmark(
|
||||
name, [f](benchmark::State& state) { f(&state); });
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(_benchmark, m) {
|
||||
NB_MODULE(_benchmark, m) {
|
||||
|
||||
using benchmark::TimeUnit;
|
||||
nb::enum_<TimeUnit>(m, "TimeUnit")
|
||||
.value("kNanosecond", TimeUnit::kNanosecond)
|
||||
.value("kMicrosecond", TimeUnit::kMicrosecond)
|
||||
.value("kMillisecond", TimeUnit::kMillisecond)
|
||||
.value("kSecond", TimeUnit::kSecond)
|
||||
.export_values();
|
||||
|
||||
using benchmark::BigO;
|
||||
nb::enum_<BigO>(m, "BigO")
|
||||
.value("oNone", BigO::oNone)
|
||||
.value("o1", BigO::o1)
|
||||
.value("oN", BigO::oN)
|
||||
.value("oNSquared", BigO::oNSquared)
|
||||
.value("oNCubed", BigO::oNCubed)
|
||||
.value("oLogN", BigO::oLogN)
|
||||
.value("oNLogN", BigO::oNLogN)
|
||||
.value("oAuto", BigO::oAuto)
|
||||
.value("oLambda", BigO::oLambda)
|
||||
.export_values();
|
||||
|
||||
using benchmark::internal::Benchmark;
|
||||
nb::class_<Benchmark>(m, "Benchmark")
|
||||
// For methods returning a pointer to the current object, reference
|
||||
// return policy is used to ask nanobind not to take ownership of the
|
||||
// returned object and avoid calling delete on it.
|
||||
// https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies
|
||||
//
|
||||
// For methods taking a const std::vector<...>&, a copy is created
|
||||
// because a it is bound to a Python list.
|
||||
// https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html
|
||||
.def("unit", &Benchmark::Unit, nb::rv_policy::reference)
|
||||
.def("arg", &Benchmark::Arg, nb::rv_policy::reference)
|
||||
.def("args", &Benchmark::Args, nb::rv_policy::reference)
|
||||
.def("range", &Benchmark::Range, nb::rv_policy::reference,
|
||||
nb::arg("start"), nb::arg("limit"))
|
||||
.def("dense_range", &Benchmark::DenseRange,
|
||||
nb::rv_policy::reference, nb::arg("start"),
|
||||
nb::arg("limit"), nb::arg("step") = 1)
|
||||
.def("ranges", &Benchmark::Ranges, nb::rv_policy::reference)
|
||||
.def("args_product", &Benchmark::ArgsProduct,
|
||||
nb::rv_policy::reference)
|
||||
.def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference)
|
||||
.def("arg_names", &Benchmark::ArgNames,
|
||||
nb::rv_policy::reference)
|
||||
.def("range_pair", &Benchmark::RangePair,
|
||||
nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"),
|
||||
nb::arg("lo2"), nb::arg("hi2"))
|
||||
.def("range_multiplier", &Benchmark::RangeMultiplier,
|
||||
nb::rv_policy::reference)
|
||||
.def("min_time", &Benchmark::MinTime, nb::rv_policy::reference)
|
||||
.def("min_warmup_time", &Benchmark::MinWarmUpTime,
|
||||
nb::rv_policy::reference)
|
||||
.def("iterations", &Benchmark::Iterations,
|
||||
nb::rv_policy::reference)
|
||||
.def("repetitions", &Benchmark::Repetitions,
|
||||
nb::rv_policy::reference)
|
||||
.def("report_aggregates_only", &Benchmark::ReportAggregatesOnly,
|
||||
nb::rv_policy::reference, nb::arg("value") = true)
|
||||
.def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly,
|
||||
nb::rv_policy::reference, nb::arg("value") = true)
|
||||
.def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime,
|
||||
nb::rv_policy::reference)
|
||||
.def("use_real_time", &Benchmark::UseRealTime,
|
||||
nb::rv_policy::reference)
|
||||
.def("use_manual_time", &Benchmark::UseManualTime,
|
||||
nb::rv_policy::reference)
|
||||
.def(
|
||||
"complexity",
|
||||
(Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity,
|
||||
nb::rv_policy::reference,
|
||||
nb::arg("complexity") = benchmark::oAuto);
|
||||
|
||||
using benchmark::Counter;
|
||||
nb::class_<Counter> py_counter(m, "Counter");
|
||||
|
||||
nb::enum_<Counter::Flags>(py_counter, "Flags")
|
||||
.value("kDefaults", Counter::Flags::kDefaults)
|
||||
.value("kIsRate", Counter::Flags::kIsRate)
|
||||
.value("kAvgThreads", Counter::Flags::kAvgThreads)
|
||||
.value("kAvgThreadsRate", Counter::Flags::kAvgThreadsRate)
|
||||
.value("kIsIterationInvariant", Counter::Flags::kIsIterationInvariant)
|
||||
.value("kIsIterationInvariantRate",
|
||||
Counter::Flags::kIsIterationInvariantRate)
|
||||
.value("kAvgIterations", Counter::Flags::kAvgIterations)
|
||||
.value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate)
|
||||
.value("kInvert", Counter::Flags::kInvert)
|
||||
.export_values()
|
||||
.def(nb::self | nb::self);
|
||||
|
||||
nb::enum_<Counter::OneK>(py_counter, "OneK")
|
||||
.value("kIs1000", Counter::OneK::kIs1000)
|
||||
.value("kIs1024", Counter::OneK::kIs1024)
|
||||
.export_values();
|
||||
|
||||
py_counter
|
||||
.def(nb::init<double, Counter::Flags, Counter::OneK>(),
|
||||
nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults,
|
||||
nb::arg("k") = Counter::kIs1000)
|
||||
.def("__init__", ([](Counter *c, double value) { new (c) Counter(value); }))
|
||||
.def_rw("value", &Counter::value)
|
||||
.def_rw("flags", &Counter::flags)
|
||||
.def_rw("oneK", &Counter::oneK)
|
||||
.def(nb::init_implicit<double>());
|
||||
|
||||
nb::implicitly_convertible<nb::int_, Counter>();
|
||||
|
||||
nb::bind_map<benchmark::UserCounters>(m, "UserCounters");
|
||||
|
||||
using benchmark::State;
|
||||
nb::class_<State>(m, "State")
|
||||
.def("__bool__", &State::KeepRunning)
|
||||
.def_prop_ro("keep_running", &State::KeepRunning)
|
||||
.def("pause_timing", &State::PauseTiming)
|
||||
.def("resume_timing", &State::ResumeTiming)
|
||||
.def("skip_with_error", &State::SkipWithError)
|
||||
.def_prop_ro("error_occurred", &State::error_occurred)
|
||||
.def("set_iteration_time", &State::SetIterationTime)
|
||||
.def_prop_rw("bytes_processed", &State::bytes_processed,
|
||||
&State::SetBytesProcessed)
|
||||
.def_prop_rw("complexity_n", &State::complexity_length_n,
|
||||
&State::SetComplexityN)
|
||||
.def_prop_rw("items_processed", &State::items_processed,
|
||||
&State::SetItemsProcessed)
|
||||
.def("set_label", &State::SetLabel)
|
||||
.def("range", &State::range, nb::arg("pos") = 0)
|
||||
.def_prop_ro("iterations", &State::iterations)
|
||||
.def_prop_ro("name", &State::name)
|
||||
.def_rw("counters", &State::counters)
|
||||
.def_prop_ro("thread_index", &State::thread_index)
|
||||
.def_prop_ro("threads", &State::threads);
|
||||
|
||||
m.def("Initialize", Initialize);
|
||||
m.def("RegisterBenchmark", RegisterBenchmark);
|
||||
m.def("RegisterBenchmark", RegisterBenchmark,
|
||||
nb::rv_policy::reference);
|
||||
m.def("RunSpecifiedBenchmarks",
|
||||
[]() { benchmark::RunSpecifiedBenchmarks(); });
|
||||
|
||||
py::class_<benchmark::State>(m, "State")
|
||||
.def("__bool__", &benchmark::State::KeepRunning)
|
||||
.def_property_readonly("keep_running", &benchmark::State::KeepRunning)
|
||||
.def("skip_with_error", &benchmark::State::SkipWithError);
|
||||
m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks);
|
||||
};
|
||||
} // namespace
|
||||
|
@ -20,29 +20,117 @@ In the extracted directory, execute:
|
||||
python setup.py install
|
||||
"""
|
||||
|
||||
import random
|
||||
import time
|
||||
|
||||
import google_benchmark as benchmark
|
||||
from google_benchmark import Counter
|
||||
|
||||
|
||||
@benchmark.register
|
||||
def empty(state):
|
||||
while state:
|
||||
pass
|
||||
while state:
|
||||
pass
|
||||
|
||||
|
||||
@benchmark.register
|
||||
def sum_million(state):
|
||||
while state:
|
||||
sum(range(1_000_000))
|
||||
while state:
|
||||
sum(range(1_000_000))
|
||||
|
||||
@benchmark.register
|
||||
def pause_timing(state):
|
||||
"""Pause timing every iteration."""
|
||||
while state:
|
||||
# Construct a list of random ints every iteration without timing it
|
||||
state.pause_timing()
|
||||
random_list = [random.randint(0, 100) for _ in range(100)]
|
||||
state.resume_timing()
|
||||
# Time the in place sorting algorithm
|
||||
random_list.sort()
|
||||
|
||||
|
||||
@benchmark.register
|
||||
def skipped(state):
|
||||
if True: # Test some predicate here.
|
||||
state.skip_with_error('some error')
|
||||
return # NOTE: You must explicitly return, or benchmark will continue.
|
||||
if True: # Test some predicate here.
|
||||
state.skip_with_error("some error")
|
||||
return # NOTE: You must explicitly return, or benchmark will continue.
|
||||
|
||||
... # Benchmark code would be here.
|
||||
... # Benchmark code would be here.
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
benchmark.main()
|
||||
@benchmark.register
|
||||
def manual_timing(state):
|
||||
while state:
|
||||
# Manually count Python CPU time
|
||||
start = time.perf_counter() # perf_counter_ns() in Python 3.7+
|
||||
# Something to benchmark
|
||||
time.sleep(0.01)
|
||||
end = time.perf_counter()
|
||||
state.set_iteration_time(end - start)
|
||||
|
||||
|
||||
@benchmark.register
|
||||
def custom_counters(state):
|
||||
"""Collect custom metric using benchmark.Counter."""
|
||||
num_foo = 0.0
|
||||
while state:
|
||||
# Benchmark some code here
|
||||
pass
|
||||
# Collect some custom metric named foo
|
||||
num_foo += 0.13
|
||||
|
||||
# Automatic Counter from numbers.
|
||||
state.counters["foo"] = num_foo
|
||||
# Set a counter as a rate.
|
||||
state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate)
|
||||
# Set a counter as an inverse of rate.
|
||||
state.counters["foo_inv_rate"] = Counter(num_foo, Counter.kIsRate | Counter.kInvert)
|
||||
# Set a counter as a thread-average quantity.
|
||||
state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads)
|
||||
# There's also a combined flag:
|
||||
state.counters["foo_avg_rate"] = Counter(num_foo, Counter.kAvgThreadsRate)
|
||||
|
||||
|
||||
@benchmark.register
|
||||
@benchmark.option.measure_process_cpu_time()
|
||||
@benchmark.option.use_real_time()
|
||||
def with_options(state):
|
||||
while state:
|
||||
sum(range(1_000_000))
|
||||
|
||||
|
||||
@benchmark.register(name="sum_million_microseconds")
|
||||
@benchmark.option.unit(benchmark.kMicrosecond)
|
||||
def with_options2(state):
|
||||
while state:
|
||||
sum(range(1_000_000))
|
||||
|
||||
|
||||
@benchmark.register
|
||||
@benchmark.option.arg(100)
|
||||
@benchmark.option.arg(1000)
|
||||
def passing_argument(state):
|
||||
while state:
|
||||
sum(range(state.range(0)))
|
||||
|
||||
|
||||
@benchmark.register
|
||||
@benchmark.option.range(8, limit=8 << 10)
|
||||
def using_range(state):
|
||||
while state:
|
||||
sum(range(state.range(0)))
|
||||
|
||||
|
||||
@benchmark.register
|
||||
@benchmark.option.range_multiplier(2)
|
||||
@benchmark.option.range(1 << 10, 1 << 18)
|
||||
@benchmark.option.complexity(benchmark.oN)
|
||||
def computing_complexity(state):
|
||||
while state:
|
||||
sum(range(state.range(0)))
|
||||
state.complexity_n = state.range(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
benchmark.main()
|
||||
|
17
bindings/python/nanobind.BUILD
Normal file
17
bindings/python/nanobind.BUILD
Normal file
@ -0,0 +1,17 @@
|
||||
cc_library(
|
||||
name = "nanobind",
|
||||
srcs = glob([
|
||||
"src/*.cpp"
|
||||
]),
|
||||
copts = ["-fexceptions"],
|
||||
includes = ["include", "ext/robin_map/include"],
|
||||
textual_hdrs = glob(
|
||||
[
|
||||
"include/**/*.h",
|
||||
"src/*.h",
|
||||
"ext/robin_map/include/tsl/*.h",
|
||||
],
|
||||
),
|
||||
deps = ["@python_headers"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
@ -1,20 +0,0 @@
|
||||
cc_library(
|
||||
name = "pybind11",
|
||||
hdrs = glob(
|
||||
include = [
|
||||
"include/pybind11/*.h",
|
||||
"include/pybind11/detail/*.h",
|
||||
],
|
||||
exclude = [
|
||||
"include/pybind11/common.h",
|
||||
"include/pybind11/eigen.h",
|
||||
],
|
||||
),
|
||||
copts = [
|
||||
"-fexceptions",
|
||||
"-Wno-undefined-inline",
|
||||
"-Wno-pragma-once-outside-header",
|
||||
],
|
||||
includes = ["include"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
@ -1,2 +0,0 @@
|
||||
absl-py>=0.7.1
|
||||
|
@ -34,9 +34,11 @@ function(add_cxx_compiler_flag FLAG)
|
||||
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
|
||||
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
|
||||
if(${MANGLED_FLAG})
|
||||
set(VARIANT ${ARGV1})
|
||||
if(ARGV1)
|
||||
if(ARGC GREATER 1)
|
||||
set(VARIANT ${ARGV1})
|
||||
string(TOUPPER "_${VARIANT}" VARIANT)
|
||||
else()
|
||||
set(VARIANT "")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
|
||||
endif()
|
||||
@ -49,9 +51,11 @@ function(add_required_cxx_compiler_flag FLAG)
|
||||
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
|
||||
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
|
||||
if(${MANGLED_FLAG})
|
||||
set(VARIANT ${ARGV1})
|
||||
if(ARGV1)
|
||||
if(ARGC GREATER 1)
|
||||
set(VARIANT ${ARGV1})
|
||||
string(TOUPPER "_${VARIANT}" VARIANT)
|
||||
else()
|
||||
set(VARIANT "")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE)
|
||||
|
@ -17,6 +17,8 @@ if(__cxx_feature_check)
|
||||
endif()
|
||||
set(__cxx_feature_check INCLUDED)
|
||||
|
||||
option(CXXFEATURECHECK_DEBUG OFF)
|
||||
|
||||
function(cxx_feature_check FILE)
|
||||
string(TOLOWER ${FILE} FILE)
|
||||
string(TOUPPER ${FILE} VAR)
|
||||
@ -27,13 +29,22 @@ function(cxx_feature_check FILE)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
|
||||
if (ARGC GREATER 1)
|
||||
message(STATUS "Enabling additional flags: ${ARGV1}")
|
||||
list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED COMPILE_${FEATURE})
|
||||
message(STATUS "Performing Test ${FEATURE}")
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
message(STATUS "Cross-compiling to test ${FEATURE}")
|
||||
try_compile(COMPILE_${FEATURE}
|
||||
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
|
||||
CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
|
||||
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
|
||||
CXX_STANDARD 11
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
|
||||
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
|
||||
OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
|
||||
if(COMPILE_${FEATURE})
|
||||
message(WARNING
|
||||
"If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
|
||||
@ -42,11 +53,14 @@ function(cxx_feature_check FILE)
|
||||
set(RUN_${FEATURE} 1 CACHE INTERNAL "")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Performing Test ${FEATURE}")
|
||||
message(STATUS "Compiling and running to test ${FEATURE}")
|
||||
try_run(RUN_${FEATURE} COMPILE_${FEATURE}
|
||||
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
|
||||
CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
|
||||
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
|
||||
CXX_STANDARD 11
|
||||
CXX_STANDARD_REQUIRED ON
|
||||
CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
|
||||
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
|
||||
COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -56,7 +70,11 @@ function(cxx_feature_check FILE)
|
||||
add_definitions(-DHAVE_${VAR})
|
||||
else()
|
||||
if(NOT COMPILE_${FEATURE})
|
||||
message(STATUS "Performing Test ${FEATURE} -- failed to compile")
|
||||
if(CXXFEATURECHECK_DEBUG)
|
||||
message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}")
|
||||
else()
|
||||
message(STATUS "Performing Test ${FEATURE} -- failed to compile")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run")
|
||||
endif()
|
||||
|
@ -1 +1,7 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include (CMakeFindDependencyMacro)
|
||||
|
||||
find_dependency (Threads)
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake")
|
||||
|
@ -20,16 +20,20 @@ set(__get_git_version INCLUDED)
|
||||
|
||||
function(get_git_version var)
|
||||
if(GIT_EXECUTABLE)
|
||||
execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
|
||||
execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
RESULT_VARIABLE status
|
||||
OUTPUT_VARIABLE GIT_VERSION
|
||||
OUTPUT_VARIABLE GIT_DESCRIBE_VERSION
|
||||
ERROR_QUIET)
|
||||
if(${status})
|
||||
set(GIT_VERSION "v0.0.0")
|
||||
if(status)
|
||||
set(GIT_DESCRIBE_VERSION "v0.0.0")
|
||||
endif()
|
||||
|
||||
string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION)
|
||||
if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-)
|
||||
string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION})
|
||||
else()
|
||||
string(STRIP ${GIT_VERSION} GIT_VERSION)
|
||||
string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION})
|
||||
string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION})
|
||||
endif()
|
||||
|
||||
# Work out if the repository is dirty
|
||||
@ -43,12 +47,12 @@ function(get_git_version var)
|
||||
ERROR_QUIET)
|
||||
string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
|
||||
if (${GIT_DIRTY})
|
||||
set(GIT_VERSION "${GIT_VERSION}-dirty")
|
||||
set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty")
|
||||
endif()
|
||||
message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}")
|
||||
else()
|
||||
set(GIT_VERSION "v0.0.0")
|
||||
set(GIT_VERSION "0.0.0")
|
||||
endif()
|
||||
|
||||
message(STATUS "git Version: ${GIT_VERSION}")
|
||||
set(${var} ${GIT_VERSION} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
@ -35,7 +35,24 @@ add_subdirectory(${GOOGLETEST_SOURCE_DIR}
|
||||
${GOOGLETEST_BINARY_DIR}
|
||||
EXCLUDE_FROM_ALL)
|
||||
|
||||
set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves.
|
||||
if (MSVC)
|
||||
target_compile_options(gtest PRIVATE "/wd4244" "/wd4722")
|
||||
target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722")
|
||||
target_compile_options(gmock PRIVATE "/wd4244" "/wd4722")
|
||||
target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722")
|
||||
else()
|
||||
target_compile_options(gtest PRIVATE "-w")
|
||||
target_compile_options(gtest_main PRIVATE "-w")
|
||||
target_compile_options(gmock PRIVATE "-w")
|
||||
target_compile_options(gmock_main PRIVATE "-w")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED GTEST_COMPILE_COMMANDS)
|
||||
set(GTEST_COMPILE_COMMANDS ON)
|
||||
endif()
|
||||
|
||||
set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
|
||||
set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
|
||||
set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
|
||||
set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
|
||||
|
@ -31,13 +31,14 @@ if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}"
|
||||
)
|
||||
else()
|
||||
if(NOT ALLOW_DOWNLOADING_GOOGLETEST)
|
||||
message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
|
||||
message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
|
||||
return()
|
||||
else()
|
||||
message(WARNING "Did not find Google Test sources! Fetching from web...")
|
||||
ExternalProject_Add(
|
||||
googletest
|
||||
GIT_REPOSITORY https://github.com/google/googletest.git
|
||||
GIT_TAG master
|
||||
GIT_TAG "release-1.11.0"
|
||||
PREFIX "${CMAKE_BINARY_DIR}"
|
||||
STAMP_DIR "${CMAKE_BINARY_DIR}/stamp"
|
||||
DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download"
|
||||
|
28
cmake/Modules/FindPFM.cmake
Normal file
28
cmake/Modules/FindPFM.cmake
Normal file
@ -0,0 +1,28 @@
|
||||
# If successful, the following variables will be defined:
|
||||
# PFM_FOUND.
|
||||
# PFM_LIBRARIES
|
||||
# PFM_INCLUDE_DIRS
|
||||
# the following target will be defined:
|
||||
# PFM::libpfm
|
||||
|
||||
include(FeatureSummary)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
set_package_properties(PFM PROPERTIES
|
||||
URL http://perfmon2.sourceforge.net/
|
||||
DESCRIPTION "A helper library to develop monitoring tools"
|
||||
PURPOSE "Used to program specific performance monitoring events")
|
||||
|
||||
find_library(PFM_LIBRARY NAMES pfm)
|
||||
find_path(PFM_INCLUDE_DIR NAMES perfmon/pfmlib.h)
|
||||
|
||||
find_package_handle_standard_args(PFM REQUIRED_VARS PFM_LIBRARY PFM_INCLUDE_DIR)
|
||||
|
||||
if (PFM_FOUND AND NOT TARGET PFM::libpfm)
|
||||
add_library(PFM::libpfm UNKNOWN IMPORTED)
|
||||
set_target_properties(PFM::libpfm PROPERTIES
|
||||
IMPORTED_LOCATION "${PFM_LIBRARY}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${PFM_INCLUDE_DIR}")
|
||||
endif()
|
||||
|
||||
mark_as_advanced(PFM_LIBRARY PFM_INCLUDE_DIR)
|
@ -1,7 +1,7 @@
|
||||
prefix=@CMAKE_INSTALL_PREFIX@
|
||||
exec_prefix=${prefix}
|
||||
libdir=${prefix}/lib
|
||||
includedir=${prefix}/include
|
||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
Name: @PROJECT_NAME@
|
||||
Description: Google microbenchmark framework
|
||||
|
16
cmake/pthread_affinity.cpp
Normal file
16
cmake/pthread_affinity.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
#include <pthread.h>
|
||||
int main() {
|
||||
cpu_set_t set;
|
||||
CPU_ZERO(&set);
|
||||
for (int i = 0; i < CPU_SETSIZE; ++i) {
|
||||
CPU_SET(i, &set);
|
||||
CPU_CLR(i, &set);
|
||||
}
|
||||
pthread_t self = pthread_self();
|
||||
int ret;
|
||||
ret = pthread_getaffinity_np(self, sizeof(set), &set);
|
||||
if (ret != 0) return ret;
|
||||
ret = pthread_setaffinity_np(self, sizeof(set), &set);
|
||||
if (ret != 0) return ret;
|
||||
return 0;
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
cmake_minimum_required(VERSION 2.8.11)
|
||||
project(cmake_wrapper)
|
||||
|
||||
include(conanbuildinfo.cmake)
|
||||
conan_basic_setup()
|
||||
|
||||
include(${CMAKE_SOURCE_DIR}/CMakeListsOriginal.txt)
|
@ -1,10 +0,0 @@
|
||||
cmake_minimum_required(VERSION 2.8.11)
|
||||
project(test_package)
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE TRUE)
|
||||
|
||||
include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake)
|
||||
conan_basic_setup()
|
||||
|
||||
add_executable(${PROJECT_NAME} test_package.cpp)
|
||||
target_link_libraries(${PROJECT_NAME} ${CONAN_LIBS})
|
@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from conans import ConanFile, CMake
|
||||
import os
|
||||
|
||||
|
||||
class TestPackageConan(ConanFile):
|
||||
settings = "os", "compiler", "build_type", "arch"
|
||||
generators = "cmake"
|
||||
|
||||
def build(self):
|
||||
cmake = CMake(self)
|
||||
cmake.configure()
|
||||
cmake.build()
|
||||
|
||||
def test(self):
|
||||
bin_path = os.path.join("bin", "test_package")
|
||||
self.run(bin_path, run_environment=True)
|
@ -1,18 +0,0 @@
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
void BM_StringCreation(benchmark::State& state) {
|
||||
while (state.KeepRunning())
|
||||
std::string empty_string;
|
||||
}
|
||||
|
||||
BENCHMARK(BM_StringCreation);
|
||||
|
||||
void BM_StringCopy(benchmark::State& state) {
|
||||
std::string x = "hello";
|
||||
while (state.KeepRunning())
|
||||
std::string copy(x);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_StringCopy);
|
||||
|
||||
BENCHMARK_MAIN();
|
79
conanfile.py
79
conanfile.py
@ -1,79 +0,0 @@
|
||||
from conans import ConanFile, CMake, tools
|
||||
from conans.errors import ConanInvalidConfiguration
|
||||
import shutil
|
||||
import os
|
||||
|
||||
|
||||
class GoogleBenchmarkConan(ConanFile):
|
||||
name = "benchmark"
|
||||
description = "A microbenchmark support library."
|
||||
topics = ("conan", "benchmark", "google", "microbenchmark")
|
||||
url = "https://github.com/google/benchmark"
|
||||
homepage = "https://github.com/google/benchmark"
|
||||
author = "Google Inc."
|
||||
license = "Apache-2.0"
|
||||
exports_sources = ["*"]
|
||||
generators = "cmake"
|
||||
|
||||
settings = "arch", "build_type", "compiler", "os"
|
||||
options = {
|
||||
"shared": [True, False],
|
||||
"fPIC": [True, False],
|
||||
"enable_lto": [True, False],
|
||||
"enable_exceptions": [True, False]
|
||||
}
|
||||
default_options = {"shared": False, "fPIC": True, "enable_lto": False, "enable_exceptions": True}
|
||||
|
||||
_build_subfolder = "."
|
||||
|
||||
def source(self):
|
||||
# Wrap the original CMake file to call conan_basic_setup
|
||||
shutil.move("CMakeLists.txt", "CMakeListsOriginal.txt")
|
||||
shutil.move(os.path.join("conan", "CMakeLists.txt"), "CMakeLists.txt")
|
||||
|
||||
def config_options(self):
|
||||
if self.settings.os == "Windows":
|
||||
if self.settings.compiler == "Visual Studio" and float(self.settings.compiler.version.value) <= 12:
|
||||
raise ConanInvalidConfiguration("{} {} does not support Visual Studio <= 12".format(self.name, self.version))
|
||||
del self.options.fPIC
|
||||
|
||||
def configure(self):
|
||||
if self.settings.os == "Windows" and self.options.shared:
|
||||
raise ConanInvalidConfiguration("Windows shared builds are not supported right now, see issue #639")
|
||||
|
||||
def _configure_cmake(self):
|
||||
cmake = CMake(self)
|
||||
|
||||
cmake.definitions["BENCHMARK_ENABLE_TESTING"] = "OFF"
|
||||
cmake.definitions["BENCHMARK_ENABLE_GTEST_TESTS"] = "OFF"
|
||||
cmake.definitions["BENCHMARK_ENABLE_LTO"] = "ON" if self.options.enable_lto else "OFF"
|
||||
cmake.definitions["BENCHMARK_ENABLE_EXCEPTIONS"] = "ON" if self.options.enable_exceptions else "OFF"
|
||||
|
||||
# See https://github.com/google/benchmark/pull/638 for Windows 32 build explanation
|
||||
if self.settings.os != "Windows":
|
||||
cmake.definitions["BENCHMARK_BUILD_32_BITS"] = "ON" if "64" not in str(self.settings.arch) else "OFF"
|
||||
cmake.definitions["BENCHMARK_USE_LIBCXX"] = "ON" if (str(self.settings.compiler.libcxx) == "libc++") else "OFF"
|
||||
else:
|
||||
cmake.definitions["BENCHMARK_USE_LIBCXX"] = "OFF"
|
||||
|
||||
cmake.configure(build_folder=self._build_subfolder)
|
||||
return cmake
|
||||
|
||||
def build(self):
|
||||
cmake = self._configure_cmake()
|
||||
cmake.build()
|
||||
|
||||
def package(self):
|
||||
cmake = self._configure_cmake()
|
||||
cmake.install()
|
||||
|
||||
self.copy(pattern="LICENSE", dst="licenses")
|
||||
|
||||
def package_info(self):
|
||||
self.cpp_info.libs = tools.collect_libs(self)
|
||||
if self.settings.os == "Linux":
|
||||
self.cpp_info.libs.extend(["pthread", "rt"])
|
||||
elif self.settings.os == "Windows":
|
||||
self.cpp_info.libs.append("shlwapi")
|
||||
elif self.settings.os == "SunOS":
|
||||
self.cpp_info.libs.append("kstat")
|
@ -1,18 +0,0 @@
|
||||
# Build tool dependency policy
|
||||
|
||||
To ensure the broadest compatibility when building the benchmark library, but
|
||||
still allow forward progress, we require any build tooling to be available for:
|
||||
|
||||
* Debian stable AND
|
||||
* The last two Ubuntu LTS releases AND
|
||||
|
||||
Currently, this means using build tool versions that are available for Ubuntu
|
||||
16.04 (Xenial), Ubuntu 18.04 (Bionic), and Debian stretch.
|
||||
|
||||
_Note, [travis](.travis.yml) runs under Ubuntu 14.04 (Trusty) for linux builds._
|
||||
|
||||
## cmake
|
||||
The current supported version is cmake 3.5.1 as of 2018-06-06.
|
||||
|
||||
_Note, this version is also available for Ubuntu 14.04, the previous Ubuntu LTS
|
||||
release, as `cmake3`._
|
@ -111,6 +111,7 @@ between compilers or compiler versions. A common example of this
|
||||
is matching stack frame addresses. In this case regular expressions
|
||||
can be used to match the differing bits of output. For example:
|
||||
|
||||
<!-- {% raw %} -->
|
||||
```c++
|
||||
int ExternInt;
|
||||
struct Point { int x, y, z; };
|
||||
@ -127,6 +128,7 @@ extern "C" void test_store_point() {
|
||||
// CHECK: ret
|
||||
}
|
||||
```
|
||||
<!-- {% endraw %} -->
|
||||
|
||||
## Current Requirements and Limitations
|
||||
|
||||
|
@ -1 +1,3 @@
|
||||
theme: jekyll-theme-midnight
|
||||
theme: jekyll-theme-minimal
|
||||
logo: /assets/images/icon_black.png
|
||||
show_downloads: true
|
||||
|
BIN
docs/assets/images/icon.png
Normal file
BIN
docs/assets/images/icon.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 11 KiB |
BIN
docs/assets/images/icon.xcf
Normal file
BIN
docs/assets/images/icon.xcf
Normal file
Binary file not shown.
BIN
docs/assets/images/icon_black.png
Normal file
BIN
docs/assets/images/icon_black.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 11 KiB |
BIN
docs/assets/images/icon_black.xcf
Normal file
BIN
docs/assets/images/icon_black.xcf
Normal file
Binary file not shown.
13
docs/dependencies.md
Normal file
13
docs/dependencies.md
Normal file
@ -0,0 +1,13 @@
|
||||
# Build tool dependency policy
|
||||
|
||||
We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In
|
||||
particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems).
|
||||
|
||||
## CMake
|
||||
|
||||
The current supported version is CMake 3.10 as of 2023-08-10. Most modern
|
||||
distributions include newer versions, for example:
|
||||
|
||||
* Ubuntu 20.04 provides CMake 3.16.3
|
||||
* Debian 11.4 provides CMake 3.18.4
|
||||
* Ubuntu 22.04 provides CMake 3.22.1
|
12
docs/index.md
Normal file
12
docs/index.md
Normal file
@ -0,0 +1,12 @@
|
||||
# Benchmark
|
||||
|
||||
* [Assembly Tests](AssemblyTests.md)
|
||||
* [Dependencies](dependencies.md)
|
||||
* [Perf Counters](perf_counters.md)
|
||||
* [Platform Specific Build Instructions](platform_specific_build_instructions.md)
|
||||
* [Python Bindings](python_bindings.md)
|
||||
* [Random Interleaving](random_interleaving.md)
|
||||
* [Reducing Variance](reducing_variance.md)
|
||||
* [Releasing](releasing.md)
|
||||
* [Tools](tools.md)
|
||||
* [User Guide](user_guide.md)
|
35
docs/perf_counters.md
Normal file
35
docs/perf_counters.md
Normal file
@ -0,0 +1,35 @@
|
||||
<a name="perf-counters" />
|
||||
|
||||
# User-Requested Performance Counters
|
||||
|
||||
When running benchmarks, the user may choose to request collection of
|
||||
performance counters. This may be useful in investigation scenarios - narrowing
|
||||
down the cause of a regression; or verifying that the underlying cause of a
|
||||
performance improvement matches expectations.
|
||||
|
||||
This feature is available if:
|
||||
|
||||
* The benchmark is run on an architecture featuring a Performance Monitoring
|
||||
Unit (PMU),
|
||||
* The benchmark is compiled with support for collecting counters. Currently,
|
||||
this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a
|
||||
dependency via Bazel.
|
||||
|
||||
The feature does not require modifying benchmark code. Counter collection is
|
||||
handled at the boundaries where timer collection is also handled.
|
||||
|
||||
To opt-in:
|
||||
* If using a Bazel build, add `--define pfm=1` to your build flags
|
||||
* If using CMake:
|
||||
* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
|
||||
* Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`.
|
||||
|
||||
To use, pass a comma-separated list of counter names through the
|
||||
`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
|
||||
they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
|
||||
mapped by libpfm to platform-specifics - see libpfm
|
||||
[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
|
||||
|
||||
The counter values are reported back through the [User Counters](../README.md#custom-counters)
|
||||
mechanism, meaning, they are available in all the formats (e.g. JSON) supported
|
||||
by User Counters.
|
48
docs/platform_specific_build_instructions.md
Normal file
48
docs/platform_specific_build_instructions.md
Normal file
@ -0,0 +1,48 @@
|
||||
# Platform Specific Build Instructions
|
||||
|
||||
## Building with GCC
|
||||
|
||||
When the library is built using GCC it is necessary to link with the pthread
|
||||
library due to how GCC implements `std::thread`. Failing to link to pthread will
|
||||
lead to runtime exceptions (unless you're using libc++), not linker errors. See
|
||||
[issue #67](https://github.com/google/benchmark/issues/67) for more details. You
|
||||
can link to pthread by adding `-pthread` to your linker command. Note, you can
|
||||
also use `-lpthread`, but there are potential issues with ordering of command
|
||||
line parameters if you use that.
|
||||
|
||||
On QNX, the pthread library is part of libc and usually included automatically
|
||||
(see
|
||||
[`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)).
|
||||
There's no separate pthread library to link.
|
||||
|
||||
## Building with Visual Studio 2015 or 2017
|
||||
|
||||
The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following:
|
||||
|
||||
```
|
||||
// Alternatively, can add libraries using linker options.
|
||||
#ifdef _WIN32
|
||||
#pragma comment ( lib, "Shlwapi.lib" )
|
||||
#ifdef _DEBUG
|
||||
#pragma comment ( lib, "benchmarkd.lib" )
|
||||
#else
|
||||
#pragma comment ( lib, "benchmark.lib" )
|
||||
#endif
|
||||
#endif
|
||||
```
|
||||
|
||||
Can also use the graphical version of CMake:
|
||||
* Open `CMake GUI`.
|
||||
* Under `Where to build the binaries`, same path as source plus `build`.
|
||||
* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`.
|
||||
* Click `Configure`, `Generate`, `Open Project`.
|
||||
* If build fails, try deleting entire directory and starting again, or unticking options to build less.
|
||||
|
||||
## Building with Intel 2015 Update 1 or Intel System Studio Update 4
|
||||
|
||||
See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel.
|
||||
|
||||
## Building on Solaris
|
||||
|
||||
If you're running benchmarks on solaris, you'll want the kstat library linked in
|
||||
too (`-lkstat`).
|
34
docs/python_bindings.md
Normal file
34
docs/python_bindings.md
Normal file
@ -0,0 +1,34 @@
|
||||
# Building and installing Python bindings
|
||||
|
||||
Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and
|
||||
using Google Benchmark directly in Python.
|
||||
Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows.
|
||||
Supported Python versions are Python 3.7 - 3.10.
|
||||
|
||||
To install Google Benchmark's Python bindings, run:
|
||||
|
||||
```bash
|
||||
python -m pip install --upgrade pip # for manylinux2014 support
|
||||
python -m pip install google-benchmark
|
||||
```
|
||||
|
||||
In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual
|
||||
environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html)
|
||||
on how to create virtual environments.
|
||||
|
||||
To build a wheel directly from source, you can follow these steps:
|
||||
```bash
|
||||
git clone https://github.com/google/benchmark.git
|
||||
cd benchmark
|
||||
# create a virtual environment and activate it
|
||||
python3 -m venv venv --system-site-packages
|
||||
source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows
|
||||
|
||||
# upgrade Python's system-wide packages
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
# builds the wheel and stores it in the directory "wheelhouse".
|
||||
python -m pip wheel . -w wheelhouse
|
||||
```
|
||||
|
||||
NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel,
|
||||
refer to the [Bazel installation docs](https://bazel.build/install).
|
13
docs/random_interleaving.md
Normal file
13
docs/random_interleaving.md
Normal file
@ -0,0 +1,13 @@
|
||||
<a name="interleaving" />
|
||||
|
||||
# Random Interleaving
|
||||
|
||||
[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
|
||||
technique to lower run-to-run variance. It randomly interleaves repetitions of a
|
||||
microbenchmark with repetitions from other microbenchmarks in the same benchmark
|
||||
test. Data shows it is able to lower run-to-run variance by
|
||||
[40%](https://github.com/google/benchmark/issues/1051) on average.
|
||||
|
||||
To use, you mainly need to set `--benchmark_enable_random_interleaving=true`,
|
||||
and optionally specify non-zero repetition count `--benchmark_repetitions=9`
|
||||
and optionally decrease the per-repetition time `--benchmark_min_time=0.1`.
|
100
docs/reducing_variance.md
Normal file
100
docs/reducing_variance.md
Normal file
@ -0,0 +1,100 @@
|
||||
# Reducing Variance
|
||||
|
||||
<a name="disabling-cpu-frequency-scaling" />
|
||||
|
||||
## Disabling CPU Frequency Scaling
|
||||
|
||||
If you see this error:
|
||||
|
||||
```
|
||||
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
|
||||
```
|
||||
|
||||
you might want to disable the CPU frequency scaling while running the
|
||||
benchmark, as well as consider other ways to stabilize the performance of
|
||||
your system while benchmarking.
|
||||
|
||||
See [Reducing Variance](reducing_variance.md) for more information.
|
||||
|
||||
Exactly how to do this depends on the Linux distribution,
|
||||
desktop environment, and installed programs. Specific details are a moving
|
||||
target, so we will not attempt to exhaustively document them here.
|
||||
|
||||
One simple option is to use the `cpupower` program to change the
|
||||
performance governor to "performance". This tool is maintained along with
|
||||
the Linux kernel and provided by your distribution.
|
||||
|
||||
It must be run as root, like this:
|
||||
|
||||
```bash
|
||||
sudo cpupower frequency-set --governor performance
|
||||
```
|
||||
|
||||
After this you can verify that all CPUs are using the performance governor
|
||||
by running this command:
|
||||
|
||||
```bash
|
||||
cpupower frequency-info -o proc
|
||||
```
|
||||
|
||||
The benchmarks you subsequently run will have less variance.
|
||||
|
||||
<a name="reducing-variance" />
|
||||
|
||||
## Reducing Variance in Benchmarks
|
||||
|
||||
The Linux CPU frequency governor [discussed
|
||||
above](user_guide#disabling-cpu-frequency-scaling) is not the only source
|
||||
of noise in benchmarks. Some, but not all, of the sources of variance
|
||||
include:
|
||||
|
||||
1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same
|
||||
speed, so running a benchmark one time and then again may give a
|
||||
different result depending on which CPU it ran on.
|
||||
2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and
|
||||
AMD Turbo Core and Precision Boost, can temporarily change the CPU
|
||||
frequency even when the using the "performance" governor on Linux.
|
||||
3. Context switching between CPUs, or scheduling competition on the CPU the
|
||||
benchmark is running on.
|
||||
4. Intel Hyperthreading or AMD SMT causing the same issue as above.
|
||||
5. Cache effects caused by code running on other CPUs.
|
||||
6. Non-uniform memory architectures (NUMA).
|
||||
|
||||
These can cause variance in benchmarks results within a single run
|
||||
(`--benchmark_repetitions=N`) or across multiple runs of the benchmark
|
||||
program.
|
||||
|
||||
Reducing sources of variance is OS and architecture dependent, which is one
|
||||
reason some companies maintain machines dedicated to performance testing.
|
||||
|
||||
Some of the easier and and effective ways of reducing variance on a typical
|
||||
Linux workstation are:
|
||||
|
||||
1. Use the performance governor as [discussed
|
||||
above](user_guide#disabling-cpu-frequency-scaling).
|
||||
1. Disable processor boosting by:
|
||||
```sh
|
||||
echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
|
||||
```
|
||||
See the Linux kernel's
|
||||
[boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt)
|
||||
for more information.
|
||||
2. Set the benchmark program's task affinity to a fixed cpu. For example:
|
||||
```sh
|
||||
taskset -c 0 ./mybenchmark
|
||||
```
|
||||
3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the
|
||||
`/sys` file system (see the LLVM project's [Benchmarking
|
||||
tips](https://llvm.org/docs/Benchmarking.html)).
|
||||
4. Close other programs that do non-trivial things based on timers, such as
|
||||
your web browser, desktop environment, etc.
|
||||
5. Reduce the working set of your benchmark to fit within the L1 cache, but
|
||||
do be aware that this may lead you to optimize for an unrelistic
|
||||
situation.
|
||||
|
||||
Further resources on this topic:
|
||||
|
||||
1. The LLVM project's [Benchmarking
|
||||
tips](https://llvm.org/docs/Benchmarking.html).
|
||||
1. The Arch Wiki [Cpu frequency
|
||||
scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page.
|
@ -1,16 +1,41 @@
|
||||
# How to release
|
||||
|
||||
* Make sure you're on master and synced to HEAD
|
||||
* Ensure the project builds and tests run (sanity check only, obviously)
|
||||
* Make sure you're on main and synced to HEAD
|
||||
* Ensure the project builds and tests run
|
||||
* `parallel -j0 exec ::: test/*_test` can help ensure everything at least
|
||||
passes
|
||||
* Prepare release notes
|
||||
* `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
|
||||
commits between the last annotated tag and HEAD
|
||||
* Pick the most interesting.
|
||||
* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel`
|
||||
and the `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the
|
||||
release version you're creating. (This version will be used if benchmark is installed
|
||||
from the archive you'll be creating in the next step.)
|
||||
|
||||
```
|
||||
project (benchmark VERSION 1.8.0 LANGUAGES CXX)
|
||||
```
|
||||
|
||||
```
|
||||
module(name = "com_github_google_benchmark", version="1.8.0")
|
||||
```
|
||||
|
||||
```python
|
||||
# bindings/python/google_benchmark/__init__.py
|
||||
|
||||
# ...
|
||||
|
||||
__version__ = "1.8.0" # <-- change this to the release version you are creating
|
||||
|
||||
# ...
|
||||
```
|
||||
|
||||
* Create a release through github's interface
|
||||
* Note this will create a lightweight tag.
|
||||
* Update this to an annotated tag:
|
||||
* `git pull --tags`
|
||||
* `git tag -a -f <tag> <tag>`
|
||||
* `git push --force origin`
|
||||
* `git push --force --tags origin`
|
||||
* Confirm that the "Build and upload Python wheels" action runs to completion
|
||||
* run it manually if it hasn't run
|
||||
|
140
docs/tools.md
140
docs/tools.md
@ -186,6 +186,146 @@ Benchmark Time CPU Time Old
|
||||
This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one.
|
||||
As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
|
||||
|
||||
### Note: Interpreting the output
|
||||
|
||||
Performance measurements are an art, and performance comparisons are doubly so.
|
||||
Results are often noisy and don't necessarily have large absolute differences to
|
||||
them, so just by visual inspection, it is not at all apparent if two
|
||||
measurements are actually showing a performance change or not. It is even more
|
||||
confusing with multiple benchmark repetitions.
|
||||
|
||||
Thankfully, what we can do, is use statistical tests on the results to determine
|
||||
whether the performance has statistically-significantly changed. `compare.py`
|
||||
uses [Mann–Whitney U
|
||||
test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null
|
||||
hypothesis being that there's no difference in performance.
|
||||
|
||||
**The below output is a summary of a benchmark comparison with statistics
|
||||
provided for a multi-threaded process.**
|
||||
```
|
||||
Benchmark Time CPU Time Old Time New CPU Old CPU New
|
||||
-----------------------------------------------------------------------------------------------------------------------------
|
||||
benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27
|
||||
benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77
|
||||
benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77
|
||||
benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0
|
||||
benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0
|
||||
OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0
|
||||
```
|
||||
--------------------------------------------
|
||||
Here's a breakdown of each row:
|
||||
|
||||
**benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for
|
||||
the statistical test comparing the performance of the process running with one
|
||||
thread. A value of 0.0000 suggests a statistically significant difference in
|
||||
performance. The comparison was conducted using the U Test (Mann-Whitney
|
||||
U Test) with 27 repetitions for each case.
|
||||
|
||||
**benchmark/threads:1/process_time/real_time_mean**: This shows the relative
|
||||
difference in mean execution time between two different cases. The negative
|
||||
value (-0.1442) implies that the new process is faster by about 14.42%. The old
|
||||
time was 90 units, while the new time is 77 units.
|
||||
|
||||
**benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the
|
||||
relative difference in the median execution time. Again, the new process is
|
||||
faster by 14.44%.
|
||||
|
||||
**benchmark/threads:1/process_time/real_time_stddev**: This is the relative
|
||||
difference in the standard deviation of the execution time, which is a measure
|
||||
of how much variation or dispersion there is from the mean. A positive value
|
||||
(+0.3974) implies there is more variance in the execution time in the new
|
||||
process.
|
||||
|
||||
**benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of
|
||||
Variation. It is the ratio of the standard deviation to the mean. It provides a
|
||||
standardized measure of dispersion. An increase (+0.6329) indicates more
|
||||
relative variability in the new process.
|
||||
|
||||
**OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is
|
||||
less influenced by outliers. The negative value indicates a general improvement
|
||||
in the new process. However, given the values are all zero for the old and new
|
||||
times, this seems to be a mistake or placeholder in the output.
|
||||
|
||||
-----------------------------------------
|
||||
|
||||
|
||||
|
||||
Let's first try to see what the different columns represent in the above
|
||||
`compare.py` benchmarking output:
|
||||
|
||||
1. **Benchmark:** The name of the function being benchmarked, along with the
|
||||
size of the input (after the slash).
|
||||
|
||||
2. **Time:** The average time per operation, across all iterations.
|
||||
|
||||
3. **CPU:** The average CPU time per operation, across all iterations.
|
||||
|
||||
4. **Iterations:** The number of iterations the benchmark was run to get a
|
||||
stable estimate.
|
||||
|
||||
5. **Time Old and Time New:** These represent the average time it takes for a
|
||||
function to run in two different scenarios or versions. For example, you
|
||||
might be comparing how fast a function runs before and after you make some
|
||||
changes to it.
|
||||
|
||||
6. **CPU Old and CPU New:** These show the average amount of CPU time that the
|
||||
function uses in two different scenarios or versions. This is similar to
|
||||
Time Old and Time New, but focuses on CPU usage instead of overall time.
|
||||
|
||||
In the comparison section, the relative differences in both time and CPU time
|
||||
are displayed for each input size.
|
||||
|
||||
|
||||
A statistically-significant difference is determined by a **p-value**, which is
|
||||
a measure of the probability that the observed difference could have occurred
|
||||
just by random chance. A smaller p-value indicates stronger evidence against the
|
||||
null hypothesis.
|
||||
|
||||
**Therefore:**
|
||||
1. If the p-value is less than the chosen significance level (alpha), we
|
||||
reject the null hypothesis and conclude the benchmarks are significantly
|
||||
different.
|
||||
2. If the p-value is greater than or equal to alpha, we fail to reject the
|
||||
null hypothesis and treat the two benchmarks as similar.
|
||||
|
||||
|
||||
|
||||
The result of said the statistical test is additionally communicated through color coding:
|
||||
```diff
|
||||
+ Green:
|
||||
```
|
||||
The benchmarks are _**statistically different**_. This could mean the
|
||||
performance has either **significantly improved** or **significantly
|
||||
deteriorated**. You should look at the actual performance numbers to see which
|
||||
is the case.
|
||||
```diff
|
||||
- Red:
|
||||
```
|
||||
The benchmarks are _**statistically similar**_. This means the performance
|
||||
**hasn't significantly changed**.
|
||||
|
||||
In statistical terms, **'green'** means we reject the null hypothesis that
|
||||
there's no difference in performance, and **'red'** means we fail to reject the
|
||||
null hypothesis. This might seem counter-intuitive if you're expecting 'green'
|
||||
to mean 'improved performance' and 'red' to mean 'worsened performance'.
|
||||
```bash
|
||||
But remember, in this context:
|
||||
|
||||
'Success' means 'successfully finding a difference'.
|
||||
'Failure' means 'failing to find a difference'.
|
||||
```
|
||||
|
||||
|
||||
Also, please note that **even if** we determine that there **is** a
|
||||
statistically-significant difference between the two measurements, it does not
|
||||
_necessarily_ mean that the actual benchmarks that were measured **are**
|
||||
different, or vice versa, even if we determine that there is **no**
|
||||
statistically-significant difference between the two measurements, it does not
|
||||
necessarily mean that the actual benchmarks that were measured **are not**
|
||||
different.
|
||||
|
||||
|
||||
|
||||
### U test
|
||||
|
||||
If there is a sufficient repetition count of the benchmarks, the tool can do
|
||||
|
1266
docs/user_guide.md
Normal file
1266
docs/user_guide.md
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
47
include/benchmark/export.h
Normal file
47
include/benchmark/export.h
Normal file
@ -0,0 +1,47 @@
|
||||
#ifndef BENCHMARK_EXPORT_H
|
||||
#define BENCHMARK_EXPORT_H
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define EXPORT_ATTR __declspec(dllexport)
|
||||
#define IMPORT_ATTR __declspec(dllimport)
|
||||
#define NO_EXPORT_ATTR
|
||||
#define DEPRECATED_ATTR __declspec(deprecated)
|
||||
#else // _WIN32
|
||||
#define EXPORT_ATTR __attribute__((visibility("default")))
|
||||
#define IMPORT_ATTR __attribute__((visibility("default")))
|
||||
#define NO_EXPORT_ATTR __attribute__((visibility("hidden")))
|
||||
#define DEPRECATE_ATTR __attribute__((__deprecated__))
|
||||
#endif // _WIN32
|
||||
|
||||
#ifdef BENCHMARK_STATIC_DEFINE
|
||||
#define BENCHMARK_EXPORT
|
||||
#define BENCHMARK_NO_EXPORT
|
||||
#else // BENCHMARK_STATIC_DEFINE
|
||||
#ifndef BENCHMARK_EXPORT
|
||||
#ifdef benchmark_EXPORTS
|
||||
/* We are building this library */
|
||||
#define BENCHMARK_EXPORT EXPORT_ATTR
|
||||
#else // benchmark_EXPORTS
|
||||
/* We are using this library */
|
||||
#define BENCHMARK_EXPORT IMPORT_ATTR
|
||||
#endif // benchmark_EXPORTS
|
||||
#endif // !BENCHMARK_EXPORT
|
||||
|
||||
#ifndef BENCHMARK_NO_EXPORT
|
||||
#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR
|
||||
#endif // !BENCHMARK_NO_EXPORT
|
||||
#endif // BENCHMARK_STATIC_DEFINE
|
||||
|
||||
#ifndef BENCHMARK_DEPRECATED
|
||||
#define BENCHMARK_DEPRECATED DEPRECATE_ATTR
|
||||
#endif // BENCHMARK_DEPRECATED
|
||||
|
||||
#ifndef BENCHMARK_DEPRECATED_EXPORT
|
||||
#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED
|
||||
#endif // BENCHMARK_DEPRECATED_EXPORT
|
||||
|
||||
#ifndef BENCHMARK_DEPRECATED_NO_EXPORT
|
||||
#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED
|
||||
#endif // BENCHMARK_DEPRECATED_EXPORT
|
||||
|
||||
#endif /* BENCHMARK_EXPORT_H */
|
50
pyproject.toml
Normal file
50
pyproject.toml
Normal file
@ -0,0 +1,50 @@
|
||||
[build-system]
|
||||
requires = ["setuptools", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "google_benchmark"
|
||||
description = "A library to benchmark code snippets."
|
||||
requires-python = ">=3.8"
|
||||
license = {file = "LICENSE"}
|
||||
keywords = ["benchmark"]
|
||||
|
||||
authors = [
|
||||
{name = "Google", email = "benchmark-discuss@googlegroups.com"},
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Topic :: Software Development :: Testing",
|
||||
"Topic :: System :: Benchmark",
|
||||
]
|
||||
|
||||
dynamic = ["readme", "version"]
|
||||
|
||||
dependencies = [
|
||||
"absl-py>=0.7.1",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/google/benchmark"
|
||||
Documentation = "https://github.com/google/benchmark/tree/main/docs"
|
||||
Repository = "https://github.com/google/benchmark.git"
|
||||
Discord = "https://discord.gg/cz7UX7wKC2"
|
||||
|
||||
[tool.setuptools]
|
||||
package-dir = {"" = "bindings/python"}
|
||||
zip-safe = false
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["bindings/python"]
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = { attr = "google_benchmark.__version__" }
|
||||
readme = { file = "README.md", content-type = "text/markdown" }
|
178
setup.py
178
setup.py
@ -1,125 +1,113 @@
|
||||
import contextlib
|
||||
import os
|
||||
import posixpath
|
||||
import re
|
||||
import platform
|
||||
import shutil
|
||||
import sys
|
||||
import sysconfig
|
||||
from pathlib import Path
|
||||
|
||||
from distutils import sysconfig
|
||||
import setuptools
|
||||
from setuptools.command import build_ext
|
||||
|
||||
|
||||
here = os.path.dirname(os.path.abspath(__file__))
|
||||
PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>"
|
||||
|
||||
IS_WINDOWS = platform.system() == "Windows"
|
||||
IS_MAC = platform.system() == "Darwin"
|
||||
|
||||
|
||||
IS_WINDOWS = sys.platform.startswith('win')
|
||||
|
||||
|
||||
def _get_version():
|
||||
"""Parse the version string from __init__.py."""
|
||||
with open(os.path.join(here, 'bindings', 'python', 'google_benchmark', '__init__.py')) as f:
|
||||
try:
|
||||
version_line = next(
|
||||
line for line in f if line.startswith('__version__'))
|
||||
except StopIteration:
|
||||
raise ValueError('__version__ not defined in __init__.py')
|
||||
else:
|
||||
ns = {}
|
||||
exec(version_line, ns) # pylint: disable=exec-used
|
||||
return ns['__version__']
|
||||
|
||||
|
||||
def _parse_requirements(path):
|
||||
with open(os.path.join(here, path)) as f:
|
||||
return [
|
||||
line.rstrip() for line in f
|
||||
if not (line.isspace() or line.startswith('#'))
|
||||
]
|
||||
@contextlib.contextmanager
|
||||
def temp_fill_include_path(fp: str):
|
||||
"""Temporarily set the Python include path in a file."""
|
||||
with open(fp, "r+") as f:
|
||||
try:
|
||||
content = f.read()
|
||||
replaced = content.replace(
|
||||
PYTHON_INCLUDE_PATH_PLACEHOLDER,
|
||||
Path(sysconfig.get_paths()['include']).as_posix(),
|
||||
)
|
||||
f.seek(0)
|
||||
f.write(replaced)
|
||||
f.truncate()
|
||||
yield
|
||||
finally:
|
||||
# revert to the original content after exit
|
||||
f.seek(0)
|
||||
f.write(content)
|
||||
f.truncate()
|
||||
|
||||
|
||||
class BazelExtension(setuptools.Extension):
|
||||
"""A C/C++ extension that is defined as a Bazel BUILD target."""
|
||||
"""A C/C++ extension that is defined as a Bazel BUILD target."""
|
||||
|
||||
def __init__(self, name, bazel_target):
|
||||
self.bazel_target = bazel_target
|
||||
self.relpath, self.target_name = (
|
||||
posixpath.relpath(bazel_target, '//').split(':'))
|
||||
setuptools.Extension.__init__(self, name, sources=[])
|
||||
def __init__(self, name: str, bazel_target: str):
|
||||
super().__init__(name=name, sources=[])
|
||||
|
||||
self.bazel_target = bazel_target
|
||||
stripped_target = bazel_target.split("//")[-1]
|
||||
self.relpath, self.target_name = stripped_target.split(":")
|
||||
|
||||
|
||||
class BuildBazelExtension(build_ext.build_ext):
|
||||
"""A command that runs Bazel to build a C/C++ extension."""
|
||||
"""A command that runs Bazel to build a C/C++ extension."""
|
||||
|
||||
def run(self):
|
||||
for ext in self.extensions:
|
||||
self.bazel_build(ext)
|
||||
build_ext.build_ext.run(self)
|
||||
def run(self):
|
||||
for ext in self.extensions:
|
||||
self.bazel_build(ext)
|
||||
build_ext.build_ext.run(self)
|
||||
|
||||
def bazel_build(self, ext):
|
||||
with open('WORKSPACE', 'r') as f:
|
||||
workspace_contents = f.read()
|
||||
def bazel_build(self, ext: BazelExtension):
|
||||
"""Runs the bazel build to create the package."""
|
||||
with temp_fill_include_path("WORKSPACE"):
|
||||
temp_path = Path(self.build_temp)
|
||||
|
||||
with open('WORKSPACE', 'w') as f:
|
||||
f.write(re.sub(
|
||||
r'(?<=path = ").*(?=", # May be overwritten by setup\.py\.)',
|
||||
sysconfig.get_python_inc().replace(os.path.sep, posixpath.sep),
|
||||
workspace_contents))
|
||||
bazel_argv = [
|
||||
"bazel",
|
||||
"build",
|
||||
ext.bazel_target,
|
||||
f"--symlink_prefix={temp_path / 'bazel-'}",
|
||||
f"--compilation_mode={'dbg' if self.debug else 'opt'}",
|
||||
# C++17 is required by nanobind
|
||||
f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
|
||||
]
|
||||
|
||||
if not os.path.exists(self.build_temp):
|
||||
os.makedirs(self.build_temp)
|
||||
if IS_WINDOWS:
|
||||
# Link with python*.lib.
|
||||
for library_dir in self.library_dirs:
|
||||
bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
|
||||
elif IS_MAC:
|
||||
if platform.machine() == "x86_64":
|
||||
# C++17 needs macOS 10.14 at minimum
|
||||
bazel_argv.append("--macos_minimum_os=10.14")
|
||||
|
||||
bazel_argv = [
|
||||
'bazel',
|
||||
'build',
|
||||
ext.bazel_target,
|
||||
'--symlink_prefix=' + os.path.join(self.build_temp, 'bazel-'),
|
||||
'--compilation_mode=' + ('dbg' if self.debug else 'opt'),
|
||||
]
|
||||
# cross-compilation for Mac ARM64 on GitHub Mac x86 runners.
|
||||
# ARCHFLAGS is set by cibuildwheel before macOS wheel builds.
|
||||
archflags = os.getenv("ARCHFLAGS", "")
|
||||
if "arm64" in archflags:
|
||||
bazel_argv.append("--cpu=darwin_arm64")
|
||||
bazel_argv.append("--macos_cpus=arm64")
|
||||
|
||||
if IS_WINDOWS:
|
||||
# Link with python*.lib.
|
||||
for library_dir in self.library_dirs:
|
||||
bazel_argv.append('--linkopt=/LIBPATH:' + library_dir)
|
||||
elif platform.machine() == "arm64":
|
||||
bazel_argv.append("--macos_minimum_os=11.0")
|
||||
|
||||
self.spawn(bazel_argv)
|
||||
self.spawn(bazel_argv)
|
||||
|
||||
shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
|
||||
ext_bazel_bin_path = os.path.join(
|
||||
self.build_temp, 'bazel-bin',
|
||||
ext.relpath, ext.target_name + shared_lib_suffix)
|
||||
ext_dest_path = self.get_ext_fullpath(ext.name)
|
||||
ext_dest_dir = os.path.dirname(ext_dest_path)
|
||||
if not os.path.exists(ext_dest_dir):
|
||||
os.makedirs(ext_dest_dir)
|
||||
shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
|
||||
shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
|
||||
ext_name = ext.target_name + shared_lib_suffix
|
||||
ext_bazel_bin_path = temp_path / 'bazel-bin' / ext.relpath / ext_name
|
||||
|
||||
ext_dest_path = Path(self.get_ext_fullpath(ext.name))
|
||||
shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
|
||||
|
||||
# explicitly call `bazel shutdown` for graceful exit
|
||||
self.spawn(["bazel", "shutdown"])
|
||||
|
||||
|
||||
setuptools.setup(
|
||||
name='google_benchmark',
|
||||
version=_get_version(),
|
||||
url='https://github.com/google/benchmark',
|
||||
description='A library to benchmark code snippets.',
|
||||
author='Google',
|
||||
author_email='benchmark-py@google.com',
|
||||
# Contained modules and scripts.
|
||||
package_dir={'': 'bindings/python'},
|
||||
packages=setuptools.find_packages('bindings/python'),
|
||||
install_requires=_parse_requirements('bindings/python/requirements.txt'),
|
||||
cmdclass=dict(build_ext=BuildBazelExtension),
|
||||
ext_modules=[BazelExtension('google_benchmark._benchmark', '//bindings/python/google_benchmark:_benchmark')],
|
||||
zip_safe=False,
|
||||
# PyPI package information.
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Intended Audience :: Developers',
|
||||
'Intended Audience :: Science/Research',
|
||||
'License :: OSI Approved :: Apache Software License',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Topic :: Software Development :: Testing',
|
||||
'Topic :: System :: Benchmark',
|
||||
ext_modules=[
|
||||
BazelExtension(
|
||||
name="google_benchmark._benchmark",
|
||||
bazel_target="//bindings/python/google_benchmark:_benchmark",
|
||||
)
|
||||
],
|
||||
license='Apache 2.0',
|
||||
keywords='benchmark',
|
||||
)
|
||||
|
@ -25,32 +25,42 @@ set_target_properties(benchmark PROPERTIES
|
||||
SOVERSION ${GENERIC_LIB_SOVERSION}
|
||||
)
|
||||
target_include_directories(benchmark PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
||||
)
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
|
||||
)
|
||||
|
||||
# libpfm, if available
|
||||
if (PFM_FOUND)
|
||||
target_link_libraries(benchmark PRIVATE PFM::libpfm)
|
||||
target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
|
||||
endif()
|
||||
|
||||
# pthread affinity, if available
|
||||
if(HAVE_PTHREAD_AFFINITY)
|
||||
target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY)
|
||||
endif()
|
||||
|
||||
# Link threads.
|
||||
target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
|
||||
find_library(LIBRT rt)
|
||||
if(LIBRT)
|
||||
target_link_libraries(benchmark ${LIBRT})
|
||||
endif()
|
||||
target_link_libraries(benchmark PRIVATE Threads::Threads)
|
||||
|
||||
target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES})
|
||||
|
||||
if(HAVE_LIB_RT)
|
||||
target_link_libraries(benchmark PRIVATE rt)
|
||||
endif(HAVE_LIB_RT)
|
||||
|
||||
if(CMAKE_BUILD_TYPE)
|
||||
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER)
|
||||
endif()
|
||||
if(NOT CMAKE_THREAD_LIBS_INIT AND "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}" MATCHES ".*-fsanitize=[^ ]*address.*")
|
||||
message(WARNING "CMake's FindThreads.cmake did not fail, but CMAKE_THREAD_LIBS_INIT ended up being empty. This was fixed in https://github.com/Kitware/CMake/commit/d53317130e84898c5328c237186dbd995aaf1c12 Let's guess that -pthread is sufficient.")
|
||||
target_link_libraries(benchmark -pthread)
|
||||
endif()
|
||||
|
||||
# We need extra libraries on Windows
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
|
||||
target_link_libraries(benchmark shlwapi)
|
||||
target_link_libraries(benchmark PRIVATE shlwapi)
|
||||
endif()
|
||||
|
||||
# We need extra libraries on Solaris
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
|
||||
target_link_libraries(benchmark kstat)
|
||||
target_link_libraries(benchmark PRIVATE kstat)
|
||||
endif()
|
||||
|
||||
if (NOT BUILD_SHARED_LIBS)
|
||||
target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE)
|
||||
endif()
|
||||
|
||||
# Benchmark main library
|
||||
@ -60,34 +70,45 @@ set_target_properties(benchmark_main PROPERTIES
|
||||
OUTPUT_NAME "benchmark_main"
|
||||
VERSION ${GENERIC_LIB_VERSION}
|
||||
SOVERSION ${GENERIC_LIB_SOVERSION}
|
||||
DEFINE_SYMBOL benchmark_EXPORTS
|
||||
)
|
||||
target_include_directories(benchmark PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
||||
)
|
||||
target_link_libraries(benchmark_main benchmark::benchmark)
|
||||
target_link_libraries(benchmark_main PUBLIC benchmark::benchmark)
|
||||
|
||||
|
||||
set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
|
||||
set(generated_dir "${PROJECT_BINARY_DIR}")
|
||||
|
||||
set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
|
||||
set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
|
||||
set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
|
||||
set(targets_to_export benchmark benchmark_main)
|
||||
set(targets_export_name "${PROJECT_NAME}Targets")
|
||||
|
||||
set(namespace "${PROJECT_NAME}::")
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
configure_package_config_file (
|
||||
${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in
|
||||
${project_config}
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
NO_SET_AND_CHECK_MACRO
|
||||
NO_CHECK_REQUIRED_COMPONENTS_MACRO
|
||||
)
|
||||
write_basic_package_version_file(
|
||||
"${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
|
||||
configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
|
||||
|
||||
export (
|
||||
TARGETS ${targets_to_export}
|
||||
NAMESPACE "${namespace}"
|
||||
FILE ${generated_dir}/${targets_export_name}.cmake
|
||||
)
|
||||
|
||||
if (BENCHMARK_ENABLE_INSTALL)
|
||||
# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
|
||||
install(
|
||||
TARGETS benchmark benchmark_main
|
||||
TARGETS ${targets_to_export}
|
||||
EXPORT ${targets_export_name}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
@ -96,6 +117,7 @@ if (BENCHMARK_ENABLE_INSTALL)
|
||||
|
||||
install(
|
||||
DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
|
||||
"${PROJECT_BINARY_DIR}/include/benchmark"
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
FILES_MATCHING PATTERN "*.*h")
|
||||
|
||||
@ -112,3 +134,37 @@ if (BENCHMARK_ENABLE_INSTALL)
|
||||
NAMESPACE "${namespace}"
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
|
||||
endif()
|
||||
|
||||
if (BENCHMARK_ENABLE_DOXYGEN)
|
||||
find_package(Doxygen REQUIRED)
|
||||
set(DOXYGEN_QUIET YES)
|
||||
set(DOXYGEN_RECURSIVE YES)
|
||||
set(DOXYGEN_GENERATE_HTML YES)
|
||||
set(DOXYGEN_GENERATE_MAN NO)
|
||||
set(DOXYGEN_MARKDOWN_SUPPORT YES)
|
||||
set(DOXYGEN_BUILTIN_STL_SUPPORT YES)
|
||||
set(DOXYGEN_EXTRACT_PACKAGE YES)
|
||||
set(DOXYGEN_EXTRACT_STATIC YES)
|
||||
set(DOXYGEN_SHOW_INCLUDE_FILES YES)
|
||||
set(DOXYGEN_BINARY_TOC YES)
|
||||
set(DOXYGEN_TOC_EXPAND YES)
|
||||
set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md")
|
||||
doxygen_add_docs(benchmark_doxygen
|
||||
docs
|
||||
include
|
||||
src
|
||||
ALL
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMENT "Building documentation with Doxygen.")
|
||||
if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
|
||||
install(
|
||||
DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/"
|
||||
DESTINATION ${CMAKE_INSTALL_DOCDIR})
|
||||
endif()
|
||||
else()
|
||||
if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
|
||||
install(
|
||||
DIRECTORY "${PROJECT_SOURCE_DIR}/docs/"
|
||||
DESTINATION ${CMAKE_INSTALL_DOCDIR})
|
||||
endif()
|
||||
endif()
|
||||
|
520
src/benchmark.cc
520
src/benchmark.cc
@ -13,12 +13,13 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
#include "benchmark_api_internal.h"
|
||||
#include "benchmark_runner.h"
|
||||
#include "internal_macros.h"
|
||||
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
#ifndef BENCHMARK_OS_FUCHSIA
|
||||
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
@ -32,7 +33,10 @@
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
@ -45,94 +49,146 @@
|
||||
#include "internal_macros.h"
|
||||
#include "log.h"
|
||||
#include "mutex.h"
|
||||
#include "perf_counters.h"
|
||||
#include "re.h"
|
||||
#include "statistics.h"
|
||||
#include "string_util.h"
|
||||
#include "thread_manager.h"
|
||||
#include "thread_timer.h"
|
||||
|
||||
namespace benchmark {
|
||||
// Print a list of benchmarks. This option overrides all other options.
|
||||
DEFINE_bool(benchmark_list_tests, false);
|
||||
BM_DEFINE_bool(benchmark_list_tests, false);
|
||||
|
||||
// A regular expression that specifies the set of benchmarks to execute. If
|
||||
// this flag is empty, or if this flag is the string \"all\", all benchmarks
|
||||
// linked into the binary are run.
|
||||
DEFINE_string(benchmark_filter, ".");
|
||||
BM_DEFINE_string(benchmark_filter, "");
|
||||
|
||||
// Minimum number of seconds we should run benchmark before results are
|
||||
// considered significant. For cpu-time based tests, this is the lower bound
|
||||
// Specification of how long to run the benchmark.
|
||||
//
|
||||
// It can be either an exact number of iterations (specified as `<integer>x`),
|
||||
// or a minimum number of seconds (specified as `<float>s`). If the latter
|
||||
// format (ie., min seconds) is used, the system may run the benchmark longer
|
||||
// until the results are considered significant.
|
||||
//
|
||||
// For backward compatibility, the `s` suffix may be omitted, in which case,
|
||||
// the specified number is interpreted as the number of seconds.
|
||||
//
|
||||
// For cpu-time based tests, this is the lower bound
|
||||
// on the total cpu time used by all threads that make up the test. For
|
||||
// real-time based tests, this is the lower bound on the elapsed time of the
|
||||
// benchmark execution, regardless of number of threads.
|
||||
DEFINE_double(benchmark_min_time, 0.5);
|
||||
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
|
||||
|
||||
// Minimum number of seconds a benchmark should be run before results should be
|
||||
// taken into account. This e.g can be necessary for benchmarks of code which
|
||||
// needs to fill some form of cache before performance is of interest.
|
||||
// Note: results gathered within this period are discarded and not used for
|
||||
// reported result.
|
||||
BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
|
||||
|
||||
// The number of runs of each benchmark. If greater than 1, the mean and
|
||||
// standard deviation of the runs will be reported.
|
||||
DEFINE_int32(benchmark_repetitions, 1);
|
||||
BM_DEFINE_int32(benchmark_repetitions, 1);
|
||||
|
||||
// If set, enable random interleaving of repetitions of all benchmarks.
|
||||
// See http://github.com/google/benchmark/issues/1051 for details.
|
||||
BM_DEFINE_bool(benchmark_enable_random_interleaving, false);
|
||||
|
||||
// Report the result of each benchmark repetitions. When 'true' is specified
|
||||
// only the mean, standard deviation, and other statistics are reported for
|
||||
// repeated benchmarks. Affects all reporters.
|
||||
DEFINE_bool(benchmark_report_aggregates_only, false);
|
||||
BM_DEFINE_bool(benchmark_report_aggregates_only, false);
|
||||
|
||||
// Display the result of each benchmark repetitions. When 'true' is specified
|
||||
// only the mean, standard deviation, and other statistics are displayed for
|
||||
// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects
|
||||
// the display reporter, but *NOT* file reporter, which will still contain
|
||||
// all the output.
|
||||
DEFINE_bool(benchmark_display_aggregates_only, false);
|
||||
BM_DEFINE_bool(benchmark_display_aggregates_only, false);
|
||||
|
||||
// The format to use for console output.
|
||||
// Valid values are 'console', 'json', or 'csv'.
|
||||
DEFINE_string(benchmark_format, "console");
|
||||
BM_DEFINE_string(benchmark_format, "console");
|
||||
|
||||
// The format to use for file output.
|
||||
// Valid values are 'console', 'json', or 'csv'.
|
||||
DEFINE_string(benchmark_out_format, "json");
|
||||
BM_DEFINE_string(benchmark_out_format, "json");
|
||||
|
||||
// The file to write additional output to.
|
||||
DEFINE_string(benchmark_out, "");
|
||||
BM_DEFINE_string(benchmark_out, "");
|
||||
|
||||
// Whether to use colors in the output. Valid values:
|
||||
// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if
|
||||
// the output is being sent to a terminal and the TERM environment variable is
|
||||
// set to a terminal type that supports colors.
|
||||
DEFINE_string(benchmark_color, "auto");
|
||||
BM_DEFINE_string(benchmark_color, "auto");
|
||||
|
||||
// Whether to use tabular format when printing user counters to the console.
|
||||
// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false.
|
||||
DEFINE_bool(benchmark_counters_tabular, false);
|
||||
BM_DEFINE_bool(benchmark_counters_tabular, false);
|
||||
|
||||
// List of additional perf counters to collect, in libpfm format. For more
|
||||
// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
|
||||
BM_DEFINE_string(benchmark_perf_counters, "");
|
||||
|
||||
// Extra context to include in the output formatted as comma-separated key-value
|
||||
// pairs. Kept internal as it's only used for parsing from env/command line.
|
||||
BM_DEFINE_kvpairs(benchmark_context, {});
|
||||
|
||||
// Set the default time unit to use for reports
|
||||
// Valid values are 'ns', 'us', 'ms' or 's'
|
||||
BM_DEFINE_string(benchmark_time_unit, "");
|
||||
|
||||
// The level of verbose logging to output
|
||||
DEFINE_int32(v, 0);
|
||||
|
||||
namespace benchmark {
|
||||
BM_DEFINE_int32(v, 0);
|
||||
|
||||
namespace internal {
|
||||
|
||||
std::map<std::string, std::string>* global_context = nullptr;
|
||||
|
||||
BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
|
||||
return global_context;
|
||||
}
|
||||
|
||||
// FIXME: wouldn't LTO mess this up?
|
||||
void UseCharPointer(char const volatile*) {}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
||||
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager)
|
||||
State::State(std::string name, IterationCount max_iters,
|
||||
const std::vector<int64_t>& ranges, int thread_i, int n_threads,
|
||||
internal::ThreadTimer* timer, internal::ThreadManager* manager,
|
||||
internal::PerfCountersMeasurement* perf_counters_measurement)
|
||||
: total_iterations_(0),
|
||||
batch_leftover_(0),
|
||||
max_iterations(max_iters),
|
||||
started_(false),
|
||||
finished_(false),
|
||||
error_occurred_(false),
|
||||
skipped_(internal::NotSkipped),
|
||||
range_(ranges),
|
||||
complexity_n_(0),
|
||||
counters(),
|
||||
thread_index(thread_i),
|
||||
threads(n_threads),
|
||||
name_(std::move(name)),
|
||||
thread_index_(thread_i),
|
||||
threads_(n_threads),
|
||||
timer_(timer),
|
||||
manager_(manager) {
|
||||
CHECK(max_iterations != 0) << "At least one iteration must be run";
|
||||
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
|
||||
manager_(manager),
|
||||
perf_counters_measurement_(perf_counters_measurement) {
|
||||
BM_CHECK(max_iterations != 0) << "At least one iteration must be run";
|
||||
BM_CHECK_LT(thread_index_, threads_)
|
||||
<< "thread_index must be less than threads";
|
||||
|
||||
// Add counters with correct flag now. If added with `counters[name]` in
|
||||
// `PauseTiming`, a new `Counter` will be inserted the first time, which
|
||||
// won't have the flag. Inserting them now also reduces the allocations
|
||||
// during the benchmark.
|
||||
if (perf_counters_measurement_) {
|
||||
for (const std::string& counter_name :
|
||||
perf_counters_measurement_->names()) {
|
||||
counters[counter_name] = Counter(0.0, Counter::kAvgIterations);
|
||||
}
|
||||
}
|
||||
|
||||
// Note: The use of offsetof below is technically undefined until C++17
|
||||
// because State is not a standard layout type. However, all compilers
|
||||
@ -146,38 +202,79 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Winvalid-offsetof"
|
||||
#endif
|
||||
#if defined(__NVCC__)
|
||||
#pragma nv_diagnostic push
|
||||
#pragma nv_diag_suppress 1427
|
||||
#endif
|
||||
#if defined(__NVCOMPILER)
|
||||
#pragma diagnostic push
|
||||
#pragma diag_suppress offset_in_non_POD_nonstandard
|
||||
#endif
|
||||
// Offset tests to ensure commonly accessed data is on the first cache line.
|
||||
const int cache_line_size = 64;
|
||||
static_assert(offsetof(State, error_occurred_) <=
|
||||
(cache_line_size - sizeof(error_occurred_)),
|
||||
"");
|
||||
static_assert(
|
||||
offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), "");
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma warning pop
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
#if defined(__NVCC__)
|
||||
#pragma nv_diagnostic pop
|
||||
#endif
|
||||
#if defined(__NVCOMPILER)
|
||||
#pragma diagnostic pop
|
||||
#endif
|
||||
}
|
||||
|
||||
void State::PauseTiming() {
|
||||
// Add in time accumulated so far
|
||||
CHECK(started_ && !finished_ && !error_occurred_);
|
||||
BM_CHECK(started_ && !finished_ && !skipped());
|
||||
timer_->StopTimer();
|
||||
if (perf_counters_measurement_) {
|
||||
std::vector<std::pair<std::string, double>> measurements;
|
||||
if (!perf_counters_measurement_->Stop(measurements)) {
|
||||
BM_CHECK(false) << "Perf counters read the value failed.";
|
||||
}
|
||||
for (const auto& name_and_measurement : measurements) {
|
||||
const std::string& name = name_and_measurement.first;
|
||||
const double measurement = name_and_measurement.second;
|
||||
// Counter was inserted with `kAvgIterations` flag by the constructor.
|
||||
assert(counters.find(name) != counters.end());
|
||||
counters[name].value += measurement;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void State::ResumeTiming() {
|
||||
CHECK(started_ && !finished_ && !error_occurred_);
|
||||
BM_CHECK(started_ && !finished_ && !skipped());
|
||||
timer_->StartTimer();
|
||||
if (perf_counters_measurement_) {
|
||||
perf_counters_measurement_->Start();
|
||||
}
|
||||
}
|
||||
|
||||
void State::SkipWithError(const char* msg) {
|
||||
CHECK(msg);
|
||||
error_occurred_ = true;
|
||||
void State::SkipWithMessage(const std::string& msg) {
|
||||
skipped_ = internal::SkippedWithMessage;
|
||||
{
|
||||
MutexLock l(manager_->GetBenchmarkMutex());
|
||||
if (manager_->results.has_error_ == false) {
|
||||
manager_->results.error_message_ = msg;
|
||||
manager_->results.has_error_ = true;
|
||||
if (internal::NotSkipped == manager_->results.skipped_) {
|
||||
manager_->results.skip_message_ = msg;
|
||||
manager_->results.skipped_ = skipped_;
|
||||
}
|
||||
}
|
||||
total_iterations_ = 0;
|
||||
if (timer_->running()) timer_->StopTimer();
|
||||
}
|
||||
|
||||
void State::SkipWithError(const std::string& msg) {
|
||||
skipped_ = internal::SkippedWithError;
|
||||
{
|
||||
MutexLock l(manager_->GetBenchmarkMutex());
|
||||
if (internal::NotSkipped == manager_->results.skipped_) {
|
||||
manager_->results.skip_message_ = msg;
|
||||
manager_->results.skipped_ = skipped_;
|
||||
}
|
||||
}
|
||||
total_iterations_ = 0;
|
||||
@ -188,22 +285,22 @@ void State::SetIterationTime(double seconds) {
|
||||
timer_->SetIterationTime(seconds);
|
||||
}
|
||||
|
||||
void State::SetLabel(const char* label) {
|
||||
void State::SetLabel(const std::string& label) {
|
||||
MutexLock l(manager_->GetBenchmarkMutex());
|
||||
manager_->results.report_label_ = label;
|
||||
}
|
||||
|
||||
void State::StartKeepRunning() {
|
||||
CHECK(!started_ && !finished_);
|
||||
BM_CHECK(!started_ && !finished_);
|
||||
started_ = true;
|
||||
total_iterations_ = error_occurred_ ? 0 : max_iterations;
|
||||
total_iterations_ = skipped() ? 0 : max_iterations;
|
||||
manager_->StartStopBarrier();
|
||||
if (!error_occurred_) ResumeTiming();
|
||||
if (!skipped()) ResumeTiming();
|
||||
}
|
||||
|
||||
void State::FinishKeepRunning() {
|
||||
CHECK(started_ && (!finished_ || error_occurred_));
|
||||
if (!error_occurred_) {
|
||||
BM_CHECK(started_ && (!finished_ || skipped()));
|
||||
if (!skipped()) {
|
||||
PauseTiming();
|
||||
}
|
||||
// Total iterations has now wrapped around past 0. Fix this.
|
||||
@ -215,11 +312,42 @@ void State::FinishKeepRunning() {
|
||||
namespace internal {
|
||||
namespace {
|
||||
|
||||
// Flushes streams after invoking reporter methods that write to them. This
|
||||
// ensures users get timely updates even when streams are not line-buffered.
|
||||
void FlushStreams(BenchmarkReporter* reporter) {
|
||||
if (!reporter) return;
|
||||
std::flush(reporter->GetOutputStream());
|
||||
std::flush(reporter->GetErrorStream());
|
||||
}
|
||||
|
||||
// Reports in both display and file reporters.
|
||||
void Report(BenchmarkReporter* display_reporter,
|
||||
BenchmarkReporter* file_reporter, const RunResults& run_results) {
|
||||
auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only,
|
||||
const RunResults& results) {
|
||||
assert(reporter);
|
||||
// If there are no aggregates, do output non-aggregates.
|
||||
aggregates_only &= !results.aggregates_only.empty();
|
||||
if (!aggregates_only) reporter->ReportRuns(results.non_aggregates);
|
||||
if (!results.aggregates_only.empty())
|
||||
reporter->ReportRuns(results.aggregates_only);
|
||||
};
|
||||
|
||||
report_one(display_reporter, run_results.display_report_aggregates_only,
|
||||
run_results);
|
||||
if (file_reporter)
|
||||
report_one(file_reporter, run_results.file_report_aggregates_only,
|
||||
run_results);
|
||||
|
||||
FlushStreams(display_reporter);
|
||||
FlushStreams(file_reporter);
|
||||
}
|
||||
|
||||
void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
||||
BenchmarkReporter* display_reporter,
|
||||
BenchmarkReporter* file_reporter) {
|
||||
// Note the file_reporter can be null.
|
||||
CHECK(display_reporter != nullptr);
|
||||
BM_CHECK(display_reporter != nullptr);
|
||||
|
||||
// Determine the width of the name field using a minimum width of 10.
|
||||
bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
|
||||
@ -227,10 +355,10 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
||||
size_t stat_field_width = 0;
|
||||
for (const BenchmarkInstance& benchmark : benchmarks) {
|
||||
name_field_width =
|
||||
std::max<size_t>(name_field_width, benchmark.name.str().size());
|
||||
might_have_aggregates |= benchmark.repetitions > 1;
|
||||
std::max<size_t>(name_field_width, benchmark.name().str().size());
|
||||
might_have_aggregates |= benchmark.repetitions() > 1;
|
||||
|
||||
for (const auto& Stat : *benchmark.statistics)
|
||||
for (const auto& Stat : benchmark.statistics())
|
||||
stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
|
||||
}
|
||||
if (might_have_aggregates) name_field_width += 1 + stat_field_width;
|
||||
@ -239,75 +367,129 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
||||
BenchmarkReporter::Context context;
|
||||
context.name_field_width = name_field_width;
|
||||
|
||||
// Keep track of running times of all instances of current benchmark
|
||||
std::vector<BenchmarkReporter::Run> complexity_reports;
|
||||
|
||||
// We flush streams after invoking reporter methods that write to them. This
|
||||
// ensures users get timely updates even when streams are not line-buffered.
|
||||
auto flushStreams = [](BenchmarkReporter* reporter) {
|
||||
if (!reporter) return;
|
||||
std::flush(reporter->GetOutputStream());
|
||||
std::flush(reporter->GetErrorStream());
|
||||
};
|
||||
// Keep track of running times of all instances of each benchmark family.
|
||||
std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
|
||||
per_family_reports;
|
||||
|
||||
if (display_reporter->ReportContext(context) &&
|
||||
(!file_reporter || file_reporter->ReportContext(context))) {
|
||||
flushStreams(display_reporter);
|
||||
flushStreams(file_reporter);
|
||||
FlushStreams(display_reporter);
|
||||
FlushStreams(file_reporter);
|
||||
|
||||
for (const auto& benchmark : benchmarks) {
|
||||
RunResults run_results = RunBenchmark(benchmark, &complexity_reports);
|
||||
size_t num_repetitions_total = 0;
|
||||
|
||||
auto report = [&run_results](BenchmarkReporter* reporter,
|
||||
bool report_aggregates_only) {
|
||||
assert(reporter);
|
||||
// If there are no aggregates, do output non-aggregates.
|
||||
report_aggregates_only &= !run_results.aggregates_only.empty();
|
||||
if (!report_aggregates_only)
|
||||
reporter->ReportRuns(run_results.non_aggregates);
|
||||
if (!run_results.aggregates_only.empty())
|
||||
reporter->ReportRuns(run_results.aggregates_only);
|
||||
};
|
||||
// This perfcounters object needs to be created before the runners vector
|
||||
// below so it outlasts their lifetime.
|
||||
PerfCountersMeasurement perfcounters(
|
||||
StrSplit(FLAGS_benchmark_perf_counters, ','));
|
||||
|
||||
report(display_reporter, run_results.display_report_aggregates_only);
|
||||
// Vector of benchmarks to run
|
||||
std::vector<internal::BenchmarkRunner> runners;
|
||||
runners.reserve(benchmarks.size());
|
||||
|
||||
// Count the number of benchmarks with threads to warn the user in case
|
||||
// performance counters are used.
|
||||
int benchmarks_with_threads = 0;
|
||||
|
||||
// Loop through all benchmarks
|
||||
for (const BenchmarkInstance& benchmark : benchmarks) {
|
||||
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
|
||||
if (benchmark.complexity() != oNone)
|
||||
reports_for_family = &per_family_reports[benchmark.family_index()];
|
||||
benchmarks_with_threads += (benchmark.threads() > 1);
|
||||
runners.emplace_back(benchmark, &perfcounters, reports_for_family);
|
||||
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
|
||||
num_repetitions_total += num_repeats_of_this_instance;
|
||||
if (reports_for_family)
|
||||
reports_for_family->num_runs_total += num_repeats_of_this_instance;
|
||||
}
|
||||
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
|
||||
|
||||
// The use of performance counters with threads would be unintuitive for
|
||||
// the average user so we need to warn them about this case
|
||||
if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
|
||||
GetErrorLogInstance()
|
||||
<< "***WARNING*** There are " << benchmarks_with_threads
|
||||
<< " benchmarks with threads and " << perfcounters.num_counters()
|
||||
<< " performance counters were requested. Beware counters will "
|
||||
"reflect the combined usage across all "
|
||||
"threads.\n";
|
||||
}
|
||||
|
||||
std::vector<size_t> repetition_indices;
|
||||
repetition_indices.reserve(num_repetitions_total);
|
||||
for (size_t runner_index = 0, num_runners = runners.size();
|
||||
runner_index != num_runners; ++runner_index) {
|
||||
const internal::BenchmarkRunner& runner = runners[runner_index];
|
||||
std::fill_n(std::back_inserter(repetition_indices),
|
||||
runner.GetNumRepeats(), runner_index);
|
||||
}
|
||||
assert(repetition_indices.size() == num_repetitions_total &&
|
||||
"Unexpected number of repetition indexes.");
|
||||
|
||||
if (FLAGS_benchmark_enable_random_interleaving) {
|
||||
std::random_device rd;
|
||||
std::mt19937 g(rd());
|
||||
std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
|
||||
}
|
||||
|
||||
for (size_t repetition_index : repetition_indices) {
|
||||
internal::BenchmarkRunner& runner = runners[repetition_index];
|
||||
runner.DoOneRepetition();
|
||||
if (runner.HasRepeatsRemaining()) continue;
|
||||
// FIXME: report each repetition separately, not all of them in bulk.
|
||||
|
||||
display_reporter->ReportRunsConfig(
|
||||
runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
|
||||
if (file_reporter)
|
||||
report(file_reporter, run_results.file_report_aggregates_only);
|
||||
file_reporter->ReportRunsConfig(
|
||||
runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
|
||||
|
||||
flushStreams(display_reporter);
|
||||
flushStreams(file_reporter);
|
||||
RunResults run_results = runner.GetResults();
|
||||
|
||||
// Maybe calculate complexity report
|
||||
if (const auto* reports_for_family = runner.GetReportsForFamily()) {
|
||||
if (reports_for_family->num_runs_done ==
|
||||
reports_for_family->num_runs_total) {
|
||||
auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
|
||||
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
|
||||
additional_run_stats.begin(),
|
||||
additional_run_stats.end());
|
||||
per_family_reports.erase(
|
||||
static_cast<int>(reports_for_family->Runs.front().family_index));
|
||||
}
|
||||
}
|
||||
|
||||
Report(display_reporter, file_reporter, run_results);
|
||||
}
|
||||
}
|
||||
display_reporter->Finalize();
|
||||
if (file_reporter) file_reporter->Finalize();
|
||||
flushStreams(display_reporter);
|
||||
flushStreams(file_reporter);
|
||||
FlushStreams(display_reporter);
|
||||
FlushStreams(file_reporter);
|
||||
}
|
||||
|
||||
// Disable deprecated warnings temporarily because we need to reference
|
||||
// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
BENCHMARK_DISABLE_DEPRECATED_WARNING
|
||||
|
||||
std::unique_ptr<BenchmarkReporter> CreateReporter(
|
||||
std::string const& name, ConsoleReporter::OutputOptions output_opts) {
|
||||
typedef std::unique_ptr<BenchmarkReporter> PtrType;
|
||||
if (name == "console") {
|
||||
return PtrType(new ConsoleReporter(output_opts));
|
||||
} else if (name == "json") {
|
||||
return PtrType(new JSONReporter);
|
||||
} else if (name == "csv") {
|
||||
return PtrType(new CSVReporter);
|
||||
} else {
|
||||
std::cerr << "Unexpected format: '" << name << "'\n";
|
||||
std::exit(1);
|
||||
}
|
||||
if (name == "json") {
|
||||
return PtrType(new JSONReporter());
|
||||
}
|
||||
if (name == "csv") {
|
||||
return PtrType(new CSVReporter());
|
||||
}
|
||||
std::cerr << "Unexpected format: '" << name << "'\n";
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
BENCHMARK_RESTORE_DEPRECATED_WARNING
|
||||
|
||||
} // end namespace
|
||||
|
||||
@ -341,17 +523,41 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
BenchmarkReporter* CreateDefaultDisplayReporter() {
|
||||
static auto default_display_reporter =
|
||||
internal::CreateReporter(FLAGS_benchmark_format,
|
||||
internal::GetOutputOptions())
|
||||
.release();
|
||||
return default_display_reporter;
|
||||
}
|
||||
|
||||
size_t RunSpecifiedBenchmarks() {
|
||||
return RunSpecifiedBenchmarks(nullptr, nullptr);
|
||||
return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter);
|
||||
}
|
||||
|
||||
size_t RunSpecifiedBenchmarks(std::string spec) {
|
||||
return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec));
|
||||
}
|
||||
|
||||
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) {
|
||||
return RunSpecifiedBenchmarks(display_reporter, nullptr);
|
||||
return RunSpecifiedBenchmarks(display_reporter, nullptr,
|
||||
FLAGS_benchmark_filter);
|
||||
}
|
||||
|
||||
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
||||
std::string spec) {
|
||||
return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec));
|
||||
}
|
||||
|
||||
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
||||
BenchmarkReporter* file_reporter) {
|
||||
std::string spec = FLAGS_benchmark_filter;
|
||||
return RunSpecifiedBenchmarks(display_reporter, file_reporter,
|
||||
FLAGS_benchmark_filter);
|
||||
}
|
||||
|
||||
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
||||
BenchmarkReporter* file_reporter,
|
||||
std::string spec) {
|
||||
if (spec.empty() || spec == "all")
|
||||
spec = "."; // Regexp that matches all benchmarks
|
||||
|
||||
@ -360,8 +566,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
||||
std::unique_ptr<BenchmarkReporter> default_display_reporter;
|
||||
std::unique_ptr<BenchmarkReporter> default_file_reporter;
|
||||
if (!display_reporter) {
|
||||
default_display_reporter = internal::CreateReporter(
|
||||
FLAGS_benchmark_format, internal::GetOutputOptions());
|
||||
default_display_reporter.reset(CreateDefaultDisplayReporter());
|
||||
display_reporter = default_display_reporter.get();
|
||||
}
|
||||
auto& Out = display_reporter->GetOutputStream();
|
||||
@ -377,12 +582,14 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
||||
if (!fname.empty()) {
|
||||
output_file.open(fname);
|
||||
if (!output_file.is_open()) {
|
||||
Err << "invalid file name: '" << fname << std::endl;
|
||||
Err << "invalid file name: '" << fname << "'" << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
if (!file_reporter) {
|
||||
default_file_reporter = internal::CreateReporter(
|
||||
FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
|
||||
FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular
|
||||
? ConsoleReporter::OO_Tabular
|
||||
: ConsoleReporter::OO_None);
|
||||
file_reporter = default_file_reporter.get();
|
||||
}
|
||||
file_reporter->SetOutputStream(&output_file);
|
||||
@ -399,7 +606,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
||||
|
||||
if (FLAGS_benchmark_list_tests) {
|
||||
for (auto const& benchmark : benchmarks)
|
||||
Out << benchmark.name.str() << "\n";
|
||||
Out << benchmark.name().str() << "\n";
|
||||
} else {
|
||||
internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
|
||||
}
|
||||
@ -407,30 +614,64 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
||||
return benchmarks.size();
|
||||
}
|
||||
|
||||
namespace {
|
||||
// stores the time unit benchmarks use by default
|
||||
TimeUnit default_time_unit = kNanosecond;
|
||||
} // namespace
|
||||
|
||||
TimeUnit GetDefaultTimeUnit() { return default_time_unit; }
|
||||
|
||||
void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; }
|
||||
|
||||
std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; }
|
||||
|
||||
void SetBenchmarkFilter(std::string value) {
|
||||
FLAGS_benchmark_filter = std::move(value);
|
||||
}
|
||||
|
||||
int32_t GetBenchmarkVerbosity() { return FLAGS_v; }
|
||||
|
||||
void RegisterMemoryManager(MemoryManager* manager) {
|
||||
internal::memory_manager = manager;
|
||||
}
|
||||
|
||||
void AddCustomContext(const std::string& key, const std::string& value) {
|
||||
if (internal::global_context == nullptr) {
|
||||
internal::global_context = new std::map<std::string, std::string>();
|
||||
}
|
||||
if (!internal::global_context->emplace(key, value).second) {
|
||||
std::cerr << "Failed to add custom context \"" << key << "\" as it already "
|
||||
<< "exists with value \"" << value << "\"\n";
|
||||
}
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
void (*HelperPrintf)();
|
||||
|
||||
void PrintUsageAndExit() {
|
||||
fprintf(stdout,
|
||||
"benchmark"
|
||||
" [--benchmark_list_tests={true|false}]\n"
|
||||
" [--benchmark_filter=<regex>]\n"
|
||||
" [--benchmark_min_time=<min_time>]\n"
|
||||
" [--benchmark_repetitions=<num_repetitions>]\n"
|
||||
" [--benchmark_report_aggregates_only={true|false}]\n"
|
||||
" [--benchmark_display_aggregates_only={true|false}]\n"
|
||||
" [--benchmark_format=<console|json|csv>]\n"
|
||||
" [--benchmark_out=<filename>]\n"
|
||||
" [--benchmark_out_format=<json|console|csv>]\n"
|
||||
" [--benchmark_color={auto|true|false}]\n"
|
||||
" [--benchmark_counters_tabular={true|false}]\n"
|
||||
" [--v=<verbosity>]\n");
|
||||
HelperPrintf();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
|
||||
if (time_unit_flag == "s") {
|
||||
return SetDefaultTimeUnit(kSecond);
|
||||
}
|
||||
if (time_unit_flag == "ms") {
|
||||
return SetDefaultTimeUnit(kMillisecond);
|
||||
}
|
||||
if (time_unit_flag == "us") {
|
||||
return SetDefaultTimeUnit(kMicrosecond);
|
||||
}
|
||||
if (time_unit_flag == "ns") {
|
||||
return SetDefaultTimeUnit(kNanosecond);
|
||||
}
|
||||
if (!time_unit_flag.empty()) {
|
||||
PrintUsageAndExit();
|
||||
}
|
||||
}
|
||||
|
||||
void ParseCommandLineFlags(int* argc, char** argv) {
|
||||
using namespace benchmark;
|
||||
BenchmarkReporter::Context::executable_name =
|
||||
@ -439,10 +680,14 @@ void ParseCommandLineFlags(int* argc, char** argv) {
|
||||
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
|
||||
&FLAGS_benchmark_list_tests) ||
|
||||
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
|
||||
ParseDoubleFlag(argv[i], "benchmark_min_time",
|
||||
ParseStringFlag(argv[i], "benchmark_min_time",
|
||||
&FLAGS_benchmark_min_time) ||
|
||||
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
|
||||
&FLAGS_benchmark_min_warmup_time) ||
|
||||
ParseInt32Flag(argv[i], "benchmark_repetitions",
|
||||
&FLAGS_benchmark_repetitions) ||
|
||||
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
|
||||
&FLAGS_benchmark_enable_random_interleaving) ||
|
||||
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
|
||||
&FLAGS_benchmark_report_aggregates_only) ||
|
||||
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
|
||||
@ -452,11 +697,14 @@ void ParseCommandLineFlags(int* argc, char** argv) {
|
||||
ParseStringFlag(argv[i], "benchmark_out_format",
|
||||
&FLAGS_benchmark_out_format) ||
|
||||
ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
|
||||
// "color_print" is the deprecated name for "benchmark_color".
|
||||
// TODO: Remove this.
|
||||
ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
|
||||
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
|
||||
&FLAGS_benchmark_counters_tabular) ||
|
||||
ParseStringFlag(argv[i], "benchmark_perf_counters",
|
||||
&FLAGS_benchmark_perf_counters) ||
|
||||
ParseKeyValueFlag(argv[i], "benchmark_context",
|
||||
&FLAGS_benchmark_context) ||
|
||||
ParseStringFlag(argv[i], "benchmark_time_unit",
|
||||
&FLAGS_benchmark_time_unit) ||
|
||||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
|
||||
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
|
||||
|
||||
@ -467,13 +715,18 @@ void ParseCommandLineFlags(int* argc, char** argv) {
|
||||
}
|
||||
}
|
||||
for (auto const* flag :
|
||||
{&FLAGS_benchmark_format, &FLAGS_benchmark_out_format})
|
||||
{&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) {
|
||||
if (*flag != "console" && *flag != "json" && *flag != "csv") {
|
||||
PrintUsageAndExit();
|
||||
}
|
||||
}
|
||||
SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit);
|
||||
if (FLAGS_benchmark_color.empty()) {
|
||||
PrintUsageAndExit();
|
||||
}
|
||||
for (const auto& kv : FLAGS_benchmark_context) {
|
||||
AddCustomContext(kv.first, kv.second);
|
||||
}
|
||||
}
|
||||
|
||||
int InitializeStreams() {
|
||||
@ -483,11 +736,38 @@ int InitializeStreams() {
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
void Initialize(int* argc, char** argv) {
|
||||
void PrintDefaultHelp() {
|
||||
fprintf(stdout,
|
||||
"benchmark"
|
||||
" [--benchmark_list_tests={true|false}]\n"
|
||||
" [--benchmark_filter=<regex>]\n"
|
||||
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
|
||||
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
|
||||
" [--benchmark_repetitions=<num_repetitions>]\n"
|
||||
" [--benchmark_enable_random_interleaving={true|false}]\n"
|
||||
" [--benchmark_report_aggregates_only={true|false}]\n"
|
||||
" [--benchmark_display_aggregates_only={true|false}]\n"
|
||||
" [--benchmark_format=<console|json|csv>]\n"
|
||||
" [--benchmark_out=<filename>]\n"
|
||||
" [--benchmark_out_format=<json|console|csv>]\n"
|
||||
" [--benchmark_color={auto|true|false}]\n"
|
||||
" [--benchmark_counters_tabular={true|false}]\n"
|
||||
#if defined HAVE_LIBPFM
|
||||
" [--benchmark_perf_counters=<counter>,...]\n"
|
||||
#endif
|
||||
" [--benchmark_context=<key>=<value>,...]\n"
|
||||
" [--benchmark_time_unit={ns|us|ms|s}]\n"
|
||||
" [--v=<verbosity>]\n");
|
||||
}
|
||||
|
||||
void Initialize(int* argc, char** argv, void (*HelperPrintf)()) {
|
||||
internal::HelperPrintf = HelperPrintf;
|
||||
internal::ParseCommandLineFlags(argc, argv);
|
||||
internal::LogLevel() = FLAGS_v;
|
||||
}
|
||||
|
||||
void Shutdown() { delete internal::global_context; }
|
||||
|
||||
bool ReportUnrecognizedArguments(int argc, char** argv) {
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0],
|
||||
|
@ -1,15 +1,118 @@
|
||||
#include "benchmark_api_internal.h"
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
#include "string_util.h"
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
State BenchmarkInstance::Run(IterationCount iters, int thread_id,
|
||||
internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager) const {
|
||||
State st(iters, arg, thread_id, threads, timer, manager);
|
||||
benchmark->Run(st);
|
||||
BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
|
||||
int per_family_instance_idx,
|
||||
const std::vector<int64_t>& args,
|
||||
int thread_count)
|
||||
: benchmark_(*benchmark),
|
||||
family_index_(family_idx),
|
||||
per_family_instance_index_(per_family_instance_idx),
|
||||
aggregation_report_mode_(benchmark_.aggregation_report_mode_),
|
||||
args_(args),
|
||||
time_unit_(benchmark_.GetTimeUnit()),
|
||||
measure_process_cpu_time_(benchmark_.measure_process_cpu_time_),
|
||||
use_real_time_(benchmark_.use_real_time_),
|
||||
use_manual_time_(benchmark_.use_manual_time_),
|
||||
complexity_(benchmark_.complexity_),
|
||||
complexity_lambda_(benchmark_.complexity_lambda_),
|
||||
statistics_(benchmark_.statistics_),
|
||||
repetitions_(benchmark_.repetitions_),
|
||||
min_time_(benchmark_.min_time_),
|
||||
min_warmup_time_(benchmark_.min_warmup_time_),
|
||||
iterations_(benchmark_.iterations_),
|
||||
threads_(thread_count) {
|
||||
name_.function_name = benchmark_.name_;
|
||||
|
||||
size_t arg_i = 0;
|
||||
for (const auto& arg : args) {
|
||||
if (!name_.args.empty()) {
|
||||
name_.args += '/';
|
||||
}
|
||||
|
||||
if (arg_i < benchmark->arg_names_.size()) {
|
||||
const auto& arg_name = benchmark_.arg_names_[arg_i];
|
||||
if (!arg_name.empty()) {
|
||||
name_.args += StrFormat("%s:", arg_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
name_.args += StrFormat("%" PRId64, arg);
|
||||
++arg_i;
|
||||
}
|
||||
|
||||
if (!IsZero(benchmark->min_time_)) {
|
||||
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
|
||||
}
|
||||
|
||||
if (!IsZero(benchmark->min_warmup_time_)) {
|
||||
name_.min_warmup_time =
|
||||
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);
|
||||
}
|
||||
|
||||
if (benchmark_.iterations_ != 0) {
|
||||
name_.iterations = StrFormat(
|
||||
"iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_));
|
||||
}
|
||||
|
||||
if (benchmark_.repetitions_ != 0) {
|
||||
name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_);
|
||||
}
|
||||
|
||||
if (benchmark_.measure_process_cpu_time_) {
|
||||
name_.time_type = "process_time";
|
||||
}
|
||||
|
||||
if (benchmark_.use_manual_time_) {
|
||||
if (!name_.time_type.empty()) {
|
||||
name_.time_type += '/';
|
||||
}
|
||||
name_.time_type += "manual_time";
|
||||
} else if (benchmark_.use_real_time_) {
|
||||
if (!name_.time_type.empty()) {
|
||||
name_.time_type += '/';
|
||||
}
|
||||
name_.time_type += "real_time";
|
||||
}
|
||||
|
||||
if (!benchmark_.thread_counts_.empty()) {
|
||||
name_.threads = StrFormat("threads:%d", threads_);
|
||||
}
|
||||
|
||||
setup_ = benchmark_.setup_;
|
||||
teardown_ = benchmark_.teardown_;
|
||||
}
|
||||
|
||||
State BenchmarkInstance::Run(
|
||||
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager,
|
||||
internal::PerfCountersMeasurement* perf_counters_measurement) const {
|
||||
State st(name_.function_name, iters, args_, thread_id, threads_, timer,
|
||||
manager, perf_counters_measurement);
|
||||
benchmark_.Run(st);
|
||||
return st;
|
||||
}
|
||||
|
||||
} // internal
|
||||
} // benchmark
|
||||
void BenchmarkInstance::Setup() const {
|
||||
if (setup_) {
|
||||
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
|
||||
nullptr, nullptr, nullptr);
|
||||
setup_(st);
|
||||
}
|
||||
}
|
||||
|
||||
void BenchmarkInstance::Teardown() const {
|
||||
if (teardown_) {
|
||||
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
|
||||
nullptr, nullptr, nullptr);
|
||||
teardown_(st);
|
||||
}
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace benchmark
|
||||
|
@ -1,9 +1,6 @@
|
||||
#ifndef BENCHMARK_API_INTERNAL_H
|
||||
#define BENCHMARK_API_INTERNAL_H
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "commandlineflags.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <iosfwd>
|
||||
#include <limits>
|
||||
@ -11,32 +8,68 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "commandlineflags.h"
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
// Information kept per benchmark we may want to run
|
||||
struct BenchmarkInstance {
|
||||
BenchmarkName name;
|
||||
Benchmark* benchmark;
|
||||
AggregationReportMode aggregation_report_mode;
|
||||
std::vector<int64_t> arg;
|
||||
TimeUnit time_unit;
|
||||
int range_multiplier;
|
||||
bool measure_process_cpu_time;
|
||||
bool use_real_time;
|
||||
bool use_manual_time;
|
||||
BigO complexity;
|
||||
BigOFunc* complexity_lambda;
|
||||
UserCounters counters;
|
||||
const std::vector<Statistics>* statistics;
|
||||
bool last_benchmark_instance;
|
||||
int repetitions;
|
||||
double min_time;
|
||||
IterationCount iterations;
|
||||
int threads; // Number of concurrent threads to us
|
||||
class BenchmarkInstance {
|
||||
public:
|
||||
BenchmarkInstance(Benchmark* benchmark, int family_index,
|
||||
int per_family_instance_index,
|
||||
const std::vector<int64_t>& args, int threads);
|
||||
|
||||
const BenchmarkName& name() const { return name_; }
|
||||
int family_index() const { return family_index_; }
|
||||
int per_family_instance_index() const { return per_family_instance_index_; }
|
||||
AggregationReportMode aggregation_report_mode() const {
|
||||
return aggregation_report_mode_;
|
||||
}
|
||||
TimeUnit time_unit() const { return time_unit_; }
|
||||
bool measure_process_cpu_time() const { return measure_process_cpu_time_; }
|
||||
bool use_real_time() const { return use_real_time_; }
|
||||
bool use_manual_time() const { return use_manual_time_; }
|
||||
BigO complexity() const { return complexity_; }
|
||||
BigOFunc* complexity_lambda() const { return complexity_lambda_; }
|
||||
const std::vector<Statistics>& statistics() const { return statistics_; }
|
||||
int repetitions() const { return repetitions_; }
|
||||
double min_time() const { return min_time_; }
|
||||
double min_warmup_time() const { return min_warmup_time_; }
|
||||
IterationCount iterations() const { return iterations_; }
|
||||
int threads() const { return threads_; }
|
||||
void Setup() const;
|
||||
void Teardown() const;
|
||||
|
||||
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
||||
internal::ThreadManager* manager) const;
|
||||
internal::ThreadManager* manager,
|
||||
internal::PerfCountersMeasurement* perf_counters_measurement) const;
|
||||
|
||||
private:
|
||||
BenchmarkName name_;
|
||||
Benchmark& benchmark_;
|
||||
const int family_index_;
|
||||
const int per_family_instance_index_;
|
||||
AggregationReportMode aggregation_report_mode_;
|
||||
const std::vector<int64_t>& args_;
|
||||
TimeUnit time_unit_;
|
||||
bool measure_process_cpu_time_;
|
||||
bool use_real_time_;
|
||||
bool use_manual_time_;
|
||||
BigO complexity_;
|
||||
BigOFunc* complexity_lambda_;
|
||||
UserCounters counters_;
|
||||
const std::vector<Statistics>& statistics_;
|
||||
int repetitions_;
|
||||
double min_time_;
|
||||
double min_warmup_time_;
|
||||
IterationCount iterations_;
|
||||
int threads_; // Number of concurrent threads to us
|
||||
|
||||
typedef void (*callback_function)(const benchmark::State&);
|
||||
callback_function setup_ = nullptr;
|
||||
callback_function teardown_ = nullptr;
|
||||
};
|
||||
|
||||
bool FindBenchmarksInternal(const std::string& re,
|
||||
@ -45,6 +78,7 @@ bool FindBenchmarksInternal(const std::string& re,
|
||||
|
||||
bool IsZero(double n);
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -14,4 +14,5 @@
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
BENCHMARK_EXPORT int main(int, char**);
|
||||
BENCHMARK_MAIN();
|
||||
|
@ -51,8 +51,9 @@ std::string join(char delimiter, const Ts&... ts) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
std::string BenchmarkName::str() const {
|
||||
return join('/', function_name, args, min_time, iterations, repetitions,
|
||||
time_type, threads);
|
||||
return join('/', function_name, args, min_time, min_warmup_time, iterations,
|
||||
repetitions, time_type, threads);
|
||||
}
|
||||
} // namespace benchmark
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include "benchmark_register.h"
|
||||
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
#ifndef BENCHMARK_OS_FUCHSIA
|
||||
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
@ -24,6 +24,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cinttypes>
|
||||
#include <condition_variable>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
@ -31,14 +32,10 @@
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "benchmark_api_internal.h"
|
||||
#include "check.h"
|
||||
@ -56,10 +53,13 @@ namespace benchmark {
|
||||
|
||||
namespace {
|
||||
// For non-dense Range, intermediate values are powers of kRangeMultiplier.
|
||||
static const int kRangeMultiplier = 8;
|
||||
static constexpr int kRangeMultiplier = 8;
|
||||
|
||||
// The size of a benchmark family determines is the number of inputs to repeat
|
||||
// the benchmark on. If this is "large" then warn the user during configuration.
|
||||
static const size_t kMaxFamilySize = 100;
|
||||
static constexpr size_t kMaxFamilySize = 100;
|
||||
|
||||
static constexpr char kDisabledPrefix[] = "DISABLED_";
|
||||
} // end namespace
|
||||
|
||||
namespace internal {
|
||||
@ -114,15 +114,15 @@ void BenchmarkFamilies::ClearBenchmarks() {
|
||||
bool BenchmarkFamilies::FindBenchmarks(
|
||||
std::string spec, std::vector<BenchmarkInstance>* benchmarks,
|
||||
std::ostream* ErrStream) {
|
||||
CHECK(ErrStream);
|
||||
BM_CHECK(ErrStream);
|
||||
auto& Err = *ErrStream;
|
||||
// Make regular expression out of command-line flag
|
||||
std::string error_msg;
|
||||
Regex re;
|
||||
bool isNegativeFilter = false;
|
||||
bool is_negative_filter = false;
|
||||
if (spec[0] == '-') {
|
||||
spec.replace(0, 1, "");
|
||||
isNegativeFilter = true;
|
||||
is_negative_filter = true;
|
||||
}
|
||||
if (!re.Init(spec, &error_msg)) {
|
||||
Err << "Could not compile benchmark re: " << error_msg << std::endl;
|
||||
@ -132,8 +132,13 @@ bool BenchmarkFamilies::FindBenchmarks(
|
||||
// Special list of thread counts to use when none are specified
|
||||
const std::vector<int> one_thread = {1};
|
||||
|
||||
int next_family_index = 0;
|
||||
|
||||
MutexLock l(mutex_);
|
||||
for (std::unique_ptr<Benchmark>& family : families_) {
|
||||
int family_index = next_family_index;
|
||||
int per_family_instance_index = 0;
|
||||
|
||||
// Family was deleted or benchmark doesn't match
|
||||
if (!family) continue;
|
||||
|
||||
@ -152,85 +157,27 @@ bool BenchmarkFamilies::FindBenchmarks(
|
||||
<< " will be repeated at least " << family_size << " times.\n";
|
||||
}
|
||||
// reserve in the special case the regex ".", since we know the final
|
||||
// family size.
|
||||
if (spec == ".") benchmarks->reserve(family_size);
|
||||
// family size. this doesn't take into account any disabled benchmarks
|
||||
// so worst case we reserve more than we need.
|
||||
if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size);
|
||||
|
||||
for (auto const& args : family->args_) {
|
||||
for (int num_threads : *thread_counts) {
|
||||
BenchmarkInstance instance;
|
||||
instance.name.function_name = family->name_;
|
||||
instance.benchmark = family.get();
|
||||
instance.aggregation_report_mode = family->aggregation_report_mode_;
|
||||
instance.arg = args;
|
||||
instance.time_unit = family->time_unit_;
|
||||
instance.range_multiplier = family->range_multiplier_;
|
||||
instance.min_time = family->min_time_;
|
||||
instance.iterations = family->iterations_;
|
||||
instance.repetitions = family->repetitions_;
|
||||
instance.measure_process_cpu_time = family->measure_process_cpu_time_;
|
||||
instance.use_real_time = family->use_real_time_;
|
||||
instance.use_manual_time = family->use_manual_time_;
|
||||
instance.complexity = family->complexity_;
|
||||
instance.complexity_lambda = family->complexity_lambda_;
|
||||
instance.statistics = &family->statistics_;
|
||||
instance.threads = num_threads;
|
||||
BenchmarkInstance instance(family.get(), family_index,
|
||||
per_family_instance_index, args,
|
||||
num_threads);
|
||||
|
||||
// Add arguments to instance name
|
||||
size_t arg_i = 0;
|
||||
for (auto const& arg : args) {
|
||||
if (!instance.name.args.empty()) {
|
||||
instance.name.args += '/';
|
||||
}
|
||||
|
||||
if (arg_i < family->arg_names_.size()) {
|
||||
const auto& arg_name = family->arg_names_[arg_i];
|
||||
if (!arg_name.empty()) {
|
||||
instance.name.args += StrFormat("%s:", arg_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
instance.name.args += StrFormat("%" PRId64, arg);
|
||||
++arg_i;
|
||||
}
|
||||
|
||||
if (!IsZero(family->min_time_))
|
||||
instance.name.min_time =
|
||||
StrFormat("min_time:%0.3f", family->min_time_);
|
||||
if (family->iterations_ != 0) {
|
||||
instance.name.iterations =
|
||||
StrFormat("iterations:%lu",
|
||||
static_cast<unsigned long>(family->iterations_));
|
||||
}
|
||||
if (family->repetitions_ != 0)
|
||||
instance.name.repetitions =
|
||||
StrFormat("repeats:%d", family->repetitions_);
|
||||
|
||||
if (family->measure_process_cpu_time_) {
|
||||
instance.name.time_type = "process_time";
|
||||
}
|
||||
|
||||
if (family->use_manual_time_) {
|
||||
if (!instance.name.time_type.empty()) {
|
||||
instance.name.time_type += '/';
|
||||
}
|
||||
instance.name.time_type += "manual_time";
|
||||
} else if (family->use_real_time_) {
|
||||
if (!instance.name.time_type.empty()) {
|
||||
instance.name.time_type += '/';
|
||||
}
|
||||
instance.name.time_type += "real_time";
|
||||
}
|
||||
|
||||
// Add the number of threads used to the name
|
||||
if (!family->thread_counts_.empty()) {
|
||||
instance.name.threads = StrFormat("threads:%d", instance.threads);
|
||||
}
|
||||
|
||||
const auto full_name = instance.name.str();
|
||||
if ((re.Match(full_name) && !isNegativeFilter) ||
|
||||
(!re.Match(full_name) && isNegativeFilter)) {
|
||||
instance.last_benchmark_instance = (&args == &family->args_.back());
|
||||
const auto full_name = instance.name().str();
|
||||
if (full_name.rfind(kDisabledPrefix, 0) != 0 &&
|
||||
((re.Match(full_name) && !is_negative_filter) ||
|
||||
(!re.Match(full_name) && is_negative_filter))) {
|
||||
benchmarks->push_back(std::move(instance));
|
||||
|
||||
++per_family_instance_index;
|
||||
|
||||
// Only bump the next family index once we've estabilished that
|
||||
// at least one instance of this family will be run.
|
||||
if (next_family_index == family_index) ++next_family_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -257,39 +204,50 @@ bool FindBenchmarksInternal(const std::string& re,
|
||||
// Benchmark
|
||||
//=============================================================================//
|
||||
|
||||
Benchmark::Benchmark(const char* name)
|
||||
Benchmark::Benchmark(const std::string& name)
|
||||
: name_(name),
|
||||
aggregation_report_mode_(ARM_Unspecified),
|
||||
time_unit_(kNanosecond),
|
||||
time_unit_(GetDefaultTimeUnit()),
|
||||
use_default_time_unit_(true),
|
||||
range_multiplier_(kRangeMultiplier),
|
||||
min_time_(0),
|
||||
min_warmup_time_(0),
|
||||
iterations_(0),
|
||||
repetitions_(0),
|
||||
measure_process_cpu_time_(false),
|
||||
use_real_time_(false),
|
||||
use_manual_time_(false),
|
||||
complexity_(oNone),
|
||||
complexity_lambda_(nullptr) {
|
||||
complexity_lambda_(nullptr),
|
||||
setup_(nullptr),
|
||||
teardown_(nullptr) {
|
||||
ComputeStatistics("mean", StatisticsMean);
|
||||
ComputeStatistics("median", StatisticsMedian);
|
||||
ComputeStatistics("stddev", StatisticsStdDev);
|
||||
ComputeStatistics("cv", StatisticsCV, kPercentage);
|
||||
}
|
||||
|
||||
Benchmark::~Benchmark() {}
|
||||
|
||||
Benchmark* Benchmark::Name(const std::string& name) {
|
||||
SetName(name);
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Arg(int64_t x) {
|
||||
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
|
||||
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
|
||||
args_.push_back({x});
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Unit(TimeUnit unit) {
|
||||
time_unit_ = unit;
|
||||
use_default_time_unit_ = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
|
||||
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
|
||||
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
|
||||
std::vector<int64_t> arglist;
|
||||
AddRange(&arglist, start, limit, range_multiplier_);
|
||||
|
||||
@ -301,53 +259,61 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
|
||||
|
||||
Benchmark* Benchmark::Ranges(
|
||||
const std::vector<std::pair<int64_t, int64_t>>& ranges) {
|
||||
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
|
||||
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
|
||||
std::vector<std::vector<int64_t>> arglists(ranges.size());
|
||||
std::size_t total = 1;
|
||||
for (std::size_t i = 0; i < ranges.size(); i++) {
|
||||
AddRange(&arglists[i], ranges[i].first, ranges[i].second,
|
||||
range_multiplier_);
|
||||
total *= arglists[i].size();
|
||||
}
|
||||
|
||||
std::vector<std::size_t> ctr(arglists.size(), 0);
|
||||
ArgsProduct(arglists);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::ArgsProduct(
|
||||
const std::vector<std::vector<int64_t>>& arglists) {
|
||||
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
|
||||
|
||||
std::vector<std::size_t> indices(arglists.size());
|
||||
const std::size_t total = std::accumulate(
|
||||
std::begin(arglists), std::end(arglists), std::size_t{1},
|
||||
[](const std::size_t res, const std::vector<int64_t>& arglist) {
|
||||
return res * arglist.size();
|
||||
});
|
||||
std::vector<int64_t> args;
|
||||
args.reserve(arglists.size());
|
||||
for (std::size_t i = 0; i < total; i++) {
|
||||
std::vector<int64_t> tmp;
|
||||
tmp.reserve(arglists.size());
|
||||
|
||||
for (std::size_t j = 0; j < arglists.size(); j++) {
|
||||
tmp.push_back(arglists[j].at(ctr[j]));
|
||||
for (std::size_t arg = 0; arg < arglists.size(); arg++) {
|
||||
args.push_back(arglists[arg][indices[arg]]);
|
||||
}
|
||||
args_.push_back(args);
|
||||
args.clear();
|
||||
|
||||
args_.push_back(std::move(tmp));
|
||||
|
||||
for (std::size_t j = 0; j < arglists.size(); j++) {
|
||||
if (ctr[j] + 1 < arglists[j].size()) {
|
||||
++ctr[j];
|
||||
break;
|
||||
}
|
||||
ctr[j] = 0;
|
||||
}
|
||||
std::size_t arg = 0;
|
||||
do {
|
||||
indices[arg] = (indices[arg] + 1) % arglists[arg].size();
|
||||
} while (indices[arg++] == 0 && arg < arglists.size());
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::ArgName(const std::string& name) {
|
||||
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
|
||||
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
|
||||
arg_names_ = {name};
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
|
||||
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
|
||||
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
|
||||
arg_names_ = names;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
|
||||
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
|
||||
CHECK_LE(start, limit);
|
||||
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
|
||||
BM_CHECK_LE(start, limit);
|
||||
for (int64_t arg = start; arg <= limit; arg += step) {
|
||||
args_.push_back({arg});
|
||||
}
|
||||
@ -355,7 +321,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
|
||||
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
|
||||
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
|
||||
args_.push_back(args);
|
||||
return this;
|
||||
}
|
||||
@ -365,28 +331,48 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) {
|
||||
BM_CHECK(setup != nullptr);
|
||||
setup_ = setup;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) {
|
||||
BM_CHECK(teardown != nullptr);
|
||||
teardown_ = teardown;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::RangeMultiplier(int multiplier) {
|
||||
CHECK(multiplier > 1);
|
||||
BM_CHECK(multiplier > 1);
|
||||
range_multiplier_ = multiplier;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::MinTime(double t) {
|
||||
CHECK(t > 0.0);
|
||||
CHECK(iterations_ == 0);
|
||||
BM_CHECK(t > 0.0);
|
||||
BM_CHECK(iterations_ == 0);
|
||||
min_time_ = t;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::MinWarmUpTime(double t) {
|
||||
BM_CHECK(t >= 0.0);
|
||||
BM_CHECK(iterations_ == 0);
|
||||
min_warmup_time_ = t;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Iterations(IterationCount n) {
|
||||
CHECK(n > 0);
|
||||
CHECK(IsZero(min_time_));
|
||||
BM_CHECK(n > 0);
|
||||
BM_CHECK(IsZero(min_time_));
|
||||
BM_CHECK(IsZero(min_warmup_time_));
|
||||
iterations_ = n;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Repetitions(int n) {
|
||||
CHECK(n > 0);
|
||||
BM_CHECK(n > 0);
|
||||
repetitions_ = n;
|
||||
return this;
|
||||
}
|
||||
@ -419,14 +405,14 @@ Benchmark* Benchmark::MeasureProcessCPUTime() {
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::UseRealTime() {
|
||||
CHECK(!use_manual_time_)
|
||||
BM_CHECK(!use_manual_time_)
|
||||
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
|
||||
use_real_time_ = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::UseManualTime() {
|
||||
CHECK(!use_real_time_)
|
||||
BM_CHECK(!use_real_time_)
|
||||
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
|
||||
use_manual_time_ = true;
|
||||
return this;
|
||||
@ -443,21 +429,22 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::ComputeStatistics(std::string name,
|
||||
StatisticsFunc* statistics) {
|
||||
statistics_.emplace_back(name, statistics);
|
||||
Benchmark* Benchmark::ComputeStatistics(const std::string& name,
|
||||
StatisticsFunc* statistics,
|
||||
StatisticUnit unit) {
|
||||
statistics_.emplace_back(name, statistics, unit);
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Threads(int t) {
|
||||
CHECK_GT(t, 0);
|
||||
BM_CHECK_GT(t, 0);
|
||||
thread_counts_.push_back(t);
|
||||
return this;
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
|
||||
CHECK_GT(min_threads, 0);
|
||||
CHECK_GE(max_threads, min_threads);
|
||||
BM_CHECK_GT(min_threads, 0);
|
||||
BM_CHECK_GE(max_threads, min_threads);
|
||||
|
||||
AddRange(&thread_counts_, min_threads, max_threads, 2);
|
||||
return this;
|
||||
@ -465,9 +452,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
|
||||
|
||||
Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
|
||||
int stride) {
|
||||
CHECK_GT(min_threads, 0);
|
||||
CHECK_GE(max_threads, min_threads);
|
||||
CHECK_GE(stride, 1);
|
||||
BM_CHECK_GT(min_threads, 0);
|
||||
BM_CHECK_GE(max_threads, min_threads);
|
||||
BM_CHECK_GE(stride, 1);
|
||||
|
||||
for (auto i = min_threads; i < max_threads; i += stride) {
|
||||
thread_counts_.push_back(i);
|
||||
@ -481,7 +468,9 @@ Benchmark* Benchmark::ThreadPerCpu() {
|
||||
return this;
|
||||
}
|
||||
|
||||
void Benchmark::SetName(const char* name) { name_ = name; }
|
||||
void Benchmark::SetName(const std::string& name) { name_ = name; }
|
||||
|
||||
const char* Benchmark::GetName() const { return name_.c_str(); }
|
||||
|
||||
int Benchmark::ArgsCnt() const {
|
||||
if (args_.empty()) {
|
||||
@ -491,6 +480,16 @@ int Benchmark::ArgsCnt() const {
|
||||
return static_cast<int>(args_.front().size());
|
||||
}
|
||||
|
||||
const char* Benchmark::GetArgName(int arg) const {
|
||||
BM_CHECK_GE(arg, 0);
|
||||
BM_CHECK_LT(arg, static_cast<int>(arg_names_.size()));
|
||||
return arg_names_[arg].c_str();
|
||||
}
|
||||
|
||||
TimeUnit Benchmark::GetTimeUnit() const {
|
||||
return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
|
||||
}
|
||||
|
||||
//=============================================================================//
|
||||
// FunctionBenchmark
|
||||
//=============================================================================//
|
||||
@ -503,4 +502,19 @@ void ClearRegisteredBenchmarks() {
|
||||
internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
|
||||
}
|
||||
|
||||
std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi) {
|
||||
std::vector<int64_t> args;
|
||||
internal::AddRange(&args, lo, hi, multi);
|
||||
return args;
|
||||
}
|
||||
|
||||
std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step) {
|
||||
BM_CHECK_LE(start, limit);
|
||||
std::vector<int64_t> args;
|
||||
for (int64_t arg = start; arg <= limit; arg += step) {
|
||||
args.push_back(arg);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
} // end namespace benchmark
|
||||
|
@ -1,6 +1,8 @@
|
||||
#ifndef BENCHMARK_REGISTER_H
|
||||
#define BENCHMARK_REGISTER_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "check.h"
|
||||
@ -11,18 +13,18 @@ namespace internal {
|
||||
// Append the powers of 'mult' in the closed interval [lo, hi].
|
||||
// Returns iterator to the start of the inserted range.
|
||||
template <typename T>
|
||||
typename std::vector<T>::iterator
|
||||
AddPowers(std::vector<T>* dst, T lo, T hi, int mult) {
|
||||
CHECK_GE(lo, 0);
|
||||
CHECK_GE(hi, lo);
|
||||
CHECK_GE(mult, 2);
|
||||
typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi,
|
||||
int mult) {
|
||||
BM_CHECK_GE(lo, 0);
|
||||
BM_CHECK_GE(hi, lo);
|
||||
BM_CHECK_GE(mult, 2);
|
||||
|
||||
const size_t start_offset = dst->size();
|
||||
|
||||
static const T kmax = std::numeric_limits<T>::max();
|
||||
|
||||
// Space out the values in multiples of "mult"
|
||||
for (T i = 1; i <= hi; i *= mult) {
|
||||
for (T i = static_cast<T>(1); i <= hi; i *= static_cast<T>(mult)) {
|
||||
if (i >= lo) {
|
||||
dst->push_back(i);
|
||||
}
|
||||
@ -31,16 +33,16 @@ AddPowers(std::vector<T>* dst, T lo, T hi, int mult) {
|
||||
if (i > kmax / mult) break;
|
||||
}
|
||||
|
||||
return dst->begin() + start_offset;
|
||||
return dst->begin() + static_cast<int>(start_offset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) {
|
||||
// We negate lo and hi so we require that they cannot be equal to 'min'.
|
||||
CHECK_GT(lo, std::numeric_limits<T>::min());
|
||||
CHECK_GT(hi, std::numeric_limits<T>::min());
|
||||
CHECK_GE(hi, lo);
|
||||
CHECK_LE(hi, 0);
|
||||
BM_CHECK_GT(lo, std::numeric_limits<T>::min());
|
||||
BM_CHECK_GT(hi, std::numeric_limits<T>::min());
|
||||
BM_CHECK_GE(hi, lo);
|
||||
BM_CHECK_LE(hi, 0);
|
||||
|
||||
// Add positive powers, then negate and reverse.
|
||||
// Casts necessary since small integers get promoted
|
||||
@ -59,8 +61,8 @@ void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
|
||||
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,
|
||||
"Args type must be a signed integer");
|
||||
|
||||
CHECK_GE(hi, lo);
|
||||
CHECK_GE(mult, 2);
|
||||
BM_CHECK_GE(hi, lo);
|
||||
BM_CHECK_GE(mult, 2);
|
||||
|
||||
// Add "lo"
|
||||
dst->push_back(lo);
|
||||
@ -86,7 +88,7 @@ void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
|
||||
}
|
||||
|
||||
// Treat 0 as a special case (see discussion on #762).
|
||||
if (lo <= 0 && hi >= 0) {
|
||||
if (lo < 0 && hi >= 0) {
|
||||
dst->push_back(0);
|
||||
}
|
||||
|
||||
|
@ -13,12 +13,13 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "benchmark_runner.h"
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "benchmark_api_internal.h"
|
||||
#include "internal_macros.h"
|
||||
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
#ifndef BENCHMARK_OS_FUCHSIA
|
||||
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
@ -27,11 +28,14 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <condition_variable>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
@ -45,6 +49,7 @@
|
||||
#include "internal_macros.h"
|
||||
#include "log.h"
|
||||
#include "mutex.h"
|
||||
#include "perf_counters.h"
|
||||
#include "re.h"
|
||||
#include "statistics.h"
|
||||
#include "string_util.h"
|
||||
@ -60,64 +65,72 @@ MemoryManager* memory_manager = nullptr;
|
||||
namespace {
|
||||
|
||||
static constexpr IterationCount kMaxIterations = 1000000000;
|
||||
const double kDefaultMinTime =
|
||||
std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
|
||||
|
||||
BenchmarkReporter::Run CreateRunReport(
|
||||
const benchmark::internal::BenchmarkInstance& b,
|
||||
const internal::ThreadManager::Result& results,
|
||||
IterationCount memory_iterations,
|
||||
const MemoryManager::Result& memory_result, double seconds,
|
||||
int64_t repetition_index) {
|
||||
const MemoryManager::Result* memory_result, double seconds,
|
||||
int64_t repetition_index, int64_t repeats) {
|
||||
// Create report about this benchmark run.
|
||||
BenchmarkReporter::Run report;
|
||||
|
||||
report.run_name = b.name;
|
||||
report.error_occurred = results.has_error_;
|
||||
report.error_message = results.error_message_;
|
||||
report.run_name = b.name();
|
||||
report.family_index = b.family_index();
|
||||
report.per_family_instance_index = b.per_family_instance_index();
|
||||
report.skipped = results.skipped_;
|
||||
report.skip_message = results.skip_message_;
|
||||
report.report_label = results.report_label_;
|
||||
// This is the total iterations across all threads.
|
||||
report.iterations = results.iterations;
|
||||
report.time_unit = b.time_unit;
|
||||
report.threads = b.threads;
|
||||
report.time_unit = b.time_unit();
|
||||
report.threads = b.threads();
|
||||
report.repetition_index = repetition_index;
|
||||
report.repetitions = b.repetitions;
|
||||
report.repetitions = repeats;
|
||||
|
||||
if (!report.error_occurred) {
|
||||
if (b.use_manual_time) {
|
||||
if (!report.skipped) {
|
||||
if (b.use_manual_time()) {
|
||||
report.real_accumulated_time = results.manual_time_used;
|
||||
} else {
|
||||
report.real_accumulated_time = results.real_time_used;
|
||||
}
|
||||
report.cpu_accumulated_time = results.cpu_time_used;
|
||||
report.complexity_n = results.complexity_n;
|
||||
report.complexity = b.complexity;
|
||||
report.complexity_lambda = b.complexity_lambda;
|
||||
report.statistics = b.statistics;
|
||||
report.complexity = b.complexity();
|
||||
report.complexity_lambda = b.complexity_lambda();
|
||||
report.statistics = &b.statistics();
|
||||
report.counters = results.counters;
|
||||
|
||||
if (memory_iterations > 0) {
|
||||
report.has_memory_result = true;
|
||||
assert(memory_result != nullptr);
|
||||
report.memory_result = memory_result;
|
||||
report.allocs_per_iter =
|
||||
memory_iterations ? static_cast<double>(memory_result.num_allocs) /
|
||||
memory_iterations ? static_cast<double>(memory_result->num_allocs) /
|
||||
memory_iterations
|
||||
: 0;
|
||||
report.max_bytes_used = memory_result.max_bytes_used;
|
||||
}
|
||||
|
||||
internal::Finish(&report.counters, results.iterations, seconds, b.threads);
|
||||
internal::Finish(&report.counters, results.iterations, seconds,
|
||||
b.threads());
|
||||
}
|
||||
return report;
|
||||
}
|
||||
|
||||
// Execute one thread of benchmark b for the specified number of iterations.
|
||||
// Adds the stats collected for the thread into *total.
|
||||
// Adds the stats collected for the thread into manager->results.
|
||||
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
|
||||
int thread_id, ThreadManager* manager) {
|
||||
int thread_id, ThreadManager* manager,
|
||||
PerfCountersMeasurement* perf_counters_measurement) {
|
||||
internal::ThreadTimer timer(
|
||||
b->measure_process_cpu_time
|
||||
b->measure_process_cpu_time()
|
||||
? internal::ThreadTimer::CreateProcessCpuTime()
|
||||
: internal::ThreadTimer::Create());
|
||||
State st = b->Run(iters, thread_id, &timer, manager);
|
||||
CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
|
||||
|
||||
State st =
|
||||
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
|
||||
BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
|
||||
<< "Benchmark returned before State::KeepRunning() returned false!";
|
||||
{
|
||||
MutexLock l(manager->GetBenchmarkMutex());
|
||||
@ -132,229 +145,351 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
|
||||
manager->NotifyThreadComplete();
|
||||
}
|
||||
|
||||
class BenchmarkRunner {
|
||||
public:
|
||||
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
|
||||
std::vector<BenchmarkReporter::Run>* complexity_reports_)
|
||||
: b(b_),
|
||||
complexity_reports(*complexity_reports_),
|
||||
min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
|
||||
repeats(b.repetitions != 0 ? b.repetitions
|
||||
: FLAGS_benchmark_repetitions),
|
||||
has_explicit_iteration_count(b.iterations != 0),
|
||||
pool(b.threads - 1),
|
||||
iters(has_explicit_iteration_count ? b.iterations : 1) {
|
||||
run_results.display_report_aggregates_only =
|
||||
(FLAGS_benchmark_report_aggregates_only ||
|
||||
FLAGS_benchmark_display_aggregates_only);
|
||||
run_results.file_report_aggregates_only =
|
||||
FLAGS_benchmark_report_aggregates_only;
|
||||
if (b.aggregation_report_mode != internal::ARM_Unspecified) {
|
||||
run_results.display_report_aggregates_only =
|
||||
(b.aggregation_report_mode &
|
||||
internal::ARM_DisplayReportAggregatesOnly);
|
||||
run_results.file_report_aggregates_only =
|
||||
(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
|
||||
}
|
||||
double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
|
||||
const BenchTimeType& iters_or_time) {
|
||||
if (!IsZero(b.min_time())) return b.min_time();
|
||||
// If the flag was used to specify number of iters, then return the default
|
||||
// min_time.
|
||||
if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime;
|
||||
|
||||
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
|
||||
DoOneRepetition(repetition_num);
|
||||
}
|
||||
return iters_or_time.time;
|
||||
}
|
||||
|
||||
// Calculate additional statistics
|
||||
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
|
||||
IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
|
||||
const BenchTimeType& iters_or_time) {
|
||||
if (b.iterations() != 0) return b.iterations();
|
||||
|
||||
// Maybe calculate complexity report
|
||||
if ((b.complexity != oNone) && b.last_benchmark_instance) {
|
||||
auto additional_run_stats = ComputeBigO(complexity_reports);
|
||||
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
|
||||
additional_run_stats.begin(),
|
||||
additional_run_stats.end());
|
||||
complexity_reports.clear();
|
||||
}
|
||||
}
|
||||
|
||||
RunResults&& get_results() { return std::move(run_results); }
|
||||
|
||||
private:
|
||||
RunResults run_results;
|
||||
|
||||
const benchmark::internal::BenchmarkInstance& b;
|
||||
std::vector<BenchmarkReporter::Run>& complexity_reports;
|
||||
|
||||
const double min_time;
|
||||
const int repeats;
|
||||
const bool has_explicit_iteration_count;
|
||||
|
||||
std::vector<std::thread> pool;
|
||||
|
||||
IterationCount iters; // preserved between repetitions!
|
||||
// So only the first repetition has to find/calculate it,
|
||||
// the other repetitions will just use that precomputed iteration count.
|
||||
|
||||
struct IterationResults {
|
||||
internal::ThreadManager::Result results;
|
||||
IterationCount iters;
|
||||
double seconds;
|
||||
};
|
||||
IterationResults DoNIterations() {
|
||||
VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n";
|
||||
|
||||
std::unique_ptr<internal::ThreadManager> manager;
|
||||
manager.reset(new internal::ThreadManager(b.threads));
|
||||
|
||||
// Run all but one thread in separate threads
|
||||
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
|
||||
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
|
||||
manager.get());
|
||||
}
|
||||
// And run one thread here directly.
|
||||
// (If we were asked to run just one thread, we don't create new threads.)
|
||||
// Yes, we need to do this here *after* we start the separate threads.
|
||||
RunInThread(&b, iters, 0, manager.get());
|
||||
|
||||
// The main thread has finished. Now let's wait for the other threads.
|
||||
manager->WaitForAllThreads();
|
||||
for (std::thread& thread : pool) thread.join();
|
||||
|
||||
IterationResults i;
|
||||
// Acquire the measurements/counters from the manager, UNDER THE LOCK!
|
||||
{
|
||||
MutexLock l(manager->GetBenchmarkMutex());
|
||||
i.results = manager->results;
|
||||
}
|
||||
|
||||
// And get rid of the manager.
|
||||
manager.reset();
|
||||
|
||||
// Adjust real/manual time stats since they were reported per thread.
|
||||
i.results.real_time_used /= b.threads;
|
||||
i.results.manual_time_used /= b.threads;
|
||||
// If we were measuring whole-process CPU usage, adjust the CPU time too.
|
||||
if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads;
|
||||
|
||||
VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
|
||||
<< i.results.real_time_used << "\n";
|
||||
|
||||
// So for how long were we running?
|
||||
i.iters = iters;
|
||||
// Base decisions off of real time if requested by this benchmark.
|
||||
i.seconds = i.results.cpu_time_used;
|
||||
if (b.use_manual_time) {
|
||||
i.seconds = i.results.manual_time_used;
|
||||
} else if (b.use_real_time) {
|
||||
i.seconds = i.results.real_time_used;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
IterationCount PredictNumItersNeeded(const IterationResults& i) const {
|
||||
// See how much iterations should be increased by.
|
||||
// Note: Avoid division by zero with max(seconds, 1ns).
|
||||
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
|
||||
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
|
||||
// use the multiplier directly.
|
||||
// Otherwise we use at most 10 times expansion.
|
||||
// NOTE: When the last run was at least 10% of the min time the max
|
||||
// expansion should be 14x.
|
||||
bool is_significant = (i.seconds / min_time) > 0.1;
|
||||
multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
|
||||
if (multiplier <= 1.0) multiplier = 2.0;
|
||||
|
||||
// So what seems to be the sufficiently-large iteration count? Round up.
|
||||
const IterationCount max_next_iters = static_cast<IterationCount>(
|
||||
std::lround(std::max(multiplier * static_cast<double>(i.iters),
|
||||
static_cast<double>(i.iters) + 1.0)));
|
||||
// But we do have *some* sanity limits though..
|
||||
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
|
||||
|
||||
VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
|
||||
return next_iters; // round up before conversion to integer.
|
||||
}
|
||||
|
||||
bool ShouldReportIterationResults(const IterationResults& i) const {
|
||||
// Determine if this run should be reported;
|
||||
// Either it has run for a sufficient amount of time
|
||||
// or because an error was reported.
|
||||
return i.results.has_error_ ||
|
||||
i.iters >= kMaxIterations || // Too many iterations already.
|
||||
i.seconds >= min_time || // The elapsed time is large enough.
|
||||
// CPU time is specified but the elapsed real time greatly exceeds
|
||||
// the minimum time.
|
||||
// Note that user provided timers are except from this sanity check.
|
||||
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time);
|
||||
}
|
||||
|
||||
void DoOneRepetition(int64_t repetition_index) {
|
||||
const bool is_the_first_repetition = repetition_index == 0;
|
||||
IterationResults i;
|
||||
|
||||
// We *may* be gradually increasing the length (iteration count)
|
||||
// of the benchmark until we decide the results are significant.
|
||||
// And once we do, we report those last results and exit.
|
||||
// Please do note that the if there are repetitions, the iteration count
|
||||
// is *only* calculated for the *first* repetition, and other repetitions
|
||||
// simply use that precomputed iteration count.
|
||||
for (;;) {
|
||||
i = DoNIterations();
|
||||
|
||||
// Do we consider the results to be significant?
|
||||
// If we are doing repetitions, and the first repetition was already done,
|
||||
// it has calculated the correct iteration time, so we have run that very
|
||||
// iteration count just now. No need to calculate anything. Just report.
|
||||
// Else, the normal rules apply.
|
||||
const bool results_are_significant = !is_the_first_repetition ||
|
||||
has_explicit_iteration_count ||
|
||||
ShouldReportIterationResults(i);
|
||||
|
||||
if (results_are_significant) break; // Good, let's report them!
|
||||
|
||||
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
|
||||
// iteration count, and run the benchmark again...
|
||||
|
||||
iters = PredictNumItersNeeded(i);
|
||||
assert(iters > i.iters &&
|
||||
"if we did more iterations than we want to do the next time, "
|
||||
"then we should have accepted the current iteration run.");
|
||||
}
|
||||
|
||||
// Oh, one last thing, we need to also produce the 'memory measurements'..
|
||||
MemoryManager::Result memory_result;
|
||||
IterationCount memory_iterations = 0;
|
||||
if (memory_manager != nullptr) {
|
||||
// Only run a few iterations to reduce the impact of one-time
|
||||
// allocations in benchmarks that are not properly managed.
|
||||
memory_iterations = std::min<IterationCount>(16, iters);
|
||||
memory_manager->Start();
|
||||
std::unique_ptr<internal::ThreadManager> manager;
|
||||
manager.reset(new internal::ThreadManager(1));
|
||||
RunInThread(&b, memory_iterations, 0, manager.get());
|
||||
manager->WaitForAllThreads();
|
||||
manager.reset();
|
||||
|
||||
memory_manager->Stop(&memory_result);
|
||||
}
|
||||
|
||||
// Ok, now actualy report.
|
||||
BenchmarkReporter::Run report =
|
||||
CreateRunReport(b, i.results, memory_iterations, memory_result,
|
||||
i.seconds, repetition_index);
|
||||
|
||||
if (!report.error_occurred && b.complexity != oNone)
|
||||
complexity_reports.push_back(report);
|
||||
|
||||
run_results.non_aggregates.push_back(report);
|
||||
}
|
||||
};
|
||||
// We've already concluded that this flag is currently used to pass
|
||||
// iters but do a check here again anyway.
|
||||
BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS);
|
||||
return iters_or_time.iters;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
RunResults RunBenchmark(
|
||||
const benchmark::internal::BenchmarkInstance& b,
|
||||
std::vector<BenchmarkReporter::Run>* complexity_reports) {
|
||||
internal::BenchmarkRunner r(b, complexity_reports);
|
||||
return r.get_results();
|
||||
BenchTimeType ParseBenchMinTime(const std::string& value) {
|
||||
BenchTimeType ret;
|
||||
|
||||
if (value.empty()) {
|
||||
ret.tag = BenchTimeType::TIME;
|
||||
ret.time = 0.0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (value.back() == 'x') {
|
||||
char* p_end;
|
||||
// Reset errno before it's changed by strtol.
|
||||
errno = 0;
|
||||
IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
|
||||
|
||||
// After a valid parse, p_end should have been set to
|
||||
// point to the 'x' suffix.
|
||||
BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x')
|
||||
<< "Malformed iters value passed to --benchmark_min_time: `" << value
|
||||
<< "`. Expected --benchmark_min_time=<integer>x.";
|
||||
|
||||
ret.tag = BenchTimeType::ITERS;
|
||||
ret.iters = num_iters;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool has_suffix = value.back() == 's';
|
||||
if (!has_suffix) {
|
||||
BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
|
||||
"Eg., `30s` for 30-seconds.";
|
||||
}
|
||||
|
||||
char* p_end;
|
||||
// Reset errno before it's changed by strtod.
|
||||
errno = 0;
|
||||
double min_time = std::strtod(value.c_str(), &p_end);
|
||||
|
||||
// After a successful parse, p_end should point to the suffix 's',
|
||||
// or the end of the string if the suffix was omitted.
|
||||
BM_CHECK(errno == 0 && p_end != nullptr &&
|
||||
((has_suffix && *p_end == 's') || *p_end == '\0'))
|
||||
<< "Malformed seconds value passed to --benchmark_min_time: `" << value
|
||||
<< "`. Expected --benchmark_min_time=<float>x.";
|
||||
|
||||
ret.tag = BenchTimeType::TIME;
|
||||
ret.time = min_time;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
BenchmarkRunner::BenchmarkRunner(
|
||||
const benchmark::internal::BenchmarkInstance& b_,
|
||||
PerfCountersMeasurement* pcm_,
|
||||
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
|
||||
: b(b_),
|
||||
reports_for_family(reports_for_family_),
|
||||
parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
|
||||
min_time(ComputeMinTime(b_, parsed_benchtime_flag)),
|
||||
min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
|
||||
? b.min_warmup_time()
|
||||
: FLAGS_benchmark_min_warmup_time),
|
||||
warmup_done(!(min_warmup_time > 0.0)),
|
||||
repeats(b.repetitions() != 0 ? b.repetitions()
|
||||
: FLAGS_benchmark_repetitions),
|
||||
has_explicit_iteration_count(b.iterations() != 0 ||
|
||||
parsed_benchtime_flag.tag ==
|
||||
BenchTimeType::ITERS),
|
||||
pool(b.threads() - 1),
|
||||
iters(has_explicit_iteration_count
|
||||
? ComputeIters(b_, parsed_benchtime_flag)
|
||||
: 1),
|
||||
perf_counters_measurement_ptr(pcm_) {
|
||||
run_results.display_report_aggregates_only =
|
||||
(FLAGS_benchmark_report_aggregates_only ||
|
||||
FLAGS_benchmark_display_aggregates_only);
|
||||
run_results.file_report_aggregates_only =
|
||||
FLAGS_benchmark_report_aggregates_only;
|
||||
if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
|
||||
run_results.display_report_aggregates_only =
|
||||
(b.aggregation_report_mode() &
|
||||
internal::ARM_DisplayReportAggregatesOnly);
|
||||
run_results.file_report_aggregates_only =
|
||||
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
|
||||
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
|
||||
(perf_counters_measurement_ptr->num_counters() == 0))
|
||||
<< "Perf counters were requested but could not be set up.";
|
||||
}
|
||||
}
|
||||
|
||||
BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
|
||||
BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
|
||||
|
||||
std::unique_ptr<internal::ThreadManager> manager;
|
||||
manager.reset(new internal::ThreadManager(b.threads()));
|
||||
|
||||
// Run all but one thread in separate threads
|
||||
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
|
||||
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
|
||||
manager.get(), perf_counters_measurement_ptr);
|
||||
}
|
||||
// And run one thread here directly.
|
||||
// (If we were asked to run just one thread, we don't create new threads.)
|
||||
// Yes, we need to do this here *after* we start the separate threads.
|
||||
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
|
||||
|
||||
// The main thread has finished. Now let's wait for the other threads.
|
||||
manager->WaitForAllThreads();
|
||||
for (std::thread& thread : pool) thread.join();
|
||||
|
||||
IterationResults i;
|
||||
// Acquire the measurements/counters from the manager, UNDER THE LOCK!
|
||||
{
|
||||
MutexLock l(manager->GetBenchmarkMutex());
|
||||
i.results = manager->results;
|
||||
}
|
||||
|
||||
// And get rid of the manager.
|
||||
manager.reset();
|
||||
|
||||
// Adjust real/manual time stats since they were reported per thread.
|
||||
i.results.real_time_used /= b.threads();
|
||||
i.results.manual_time_used /= b.threads();
|
||||
// If we were measuring whole-process CPU usage, adjust the CPU time too.
|
||||
if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();
|
||||
|
||||
BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
|
||||
<< i.results.real_time_used << "\n";
|
||||
|
||||
// By using KeepRunningBatch a benchmark can iterate more times than
|
||||
// requested, so take the iteration count from i.results.
|
||||
i.iters = i.results.iterations / b.threads();
|
||||
|
||||
// Base decisions off of real time if requested by this benchmark.
|
||||
i.seconds = i.results.cpu_time_used;
|
||||
if (b.use_manual_time()) {
|
||||
i.seconds = i.results.manual_time_used;
|
||||
} else if (b.use_real_time()) {
|
||||
i.seconds = i.results.real_time_used;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
IterationCount BenchmarkRunner::PredictNumItersNeeded(
|
||||
const IterationResults& i) const {
|
||||
// See how much iterations should be increased by.
|
||||
// Note: Avoid division by zero with max(seconds, 1ns).
|
||||
double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9);
|
||||
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
|
||||
// use the multiplier directly.
|
||||
// Otherwise we use at most 10 times expansion.
|
||||
// NOTE: When the last run was at least 10% of the min time the max
|
||||
// expansion should be 14x.
|
||||
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
|
||||
multiplier = is_significant ? multiplier : 10.0;
|
||||
|
||||
// So what seems to be the sufficiently-large iteration count? Round up.
|
||||
const IterationCount max_next_iters = static_cast<IterationCount>(
|
||||
std::lround(std::max(multiplier * static_cast<double>(i.iters),
|
||||
static_cast<double>(i.iters) + 1.0)));
|
||||
// But we do have *some* limits though..
|
||||
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
|
||||
|
||||
BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
|
||||
return next_iters; // round up before conversion to integer.
|
||||
}
|
||||
|
||||
bool BenchmarkRunner::ShouldReportIterationResults(
|
||||
const IterationResults& i) const {
|
||||
// Determine if this run should be reported;
|
||||
// Either it has run for a sufficient amount of time
|
||||
// or because an error was reported.
|
||||
return i.results.skipped_ ||
|
||||
i.iters >= kMaxIterations || // Too many iterations already.
|
||||
i.seconds >=
|
||||
GetMinTimeToApply() || // The elapsed time is large enough.
|
||||
// CPU time is specified but the elapsed real time greatly exceeds
|
||||
// the minimum time.
|
||||
// Note that user provided timers are except from this test.
|
||||
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
|
||||
!b.use_manual_time());
|
||||
}
|
||||
|
||||
double BenchmarkRunner::GetMinTimeToApply() const {
|
||||
// In order to re-use functionality to run and measure benchmarks for running
|
||||
// a warmup phase of the benchmark, we need a way of telling whether to apply
|
||||
// min_time or min_warmup_time. This function will figure out if we are in the
|
||||
// warmup phase and therefore need to apply min_warmup_time or if we already
|
||||
// in the benchmarking phase and min_time needs to be applied.
|
||||
return warmup_done ? min_time : min_warmup_time;
|
||||
}
|
||||
|
||||
void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
|
||||
warmup_done = true;
|
||||
iters = i;
|
||||
}
|
||||
|
||||
void BenchmarkRunner::RunWarmUp() {
|
||||
// Use the same mechanisms for warming up the benchmark as used for actually
|
||||
// running and measuring the benchmark.
|
||||
IterationResults i_warmup;
|
||||
// Dont use the iterations determined in the warmup phase for the actual
|
||||
// measured benchmark phase. While this may be a good starting point for the
|
||||
// benchmark and it would therefore get rid of the need to figure out how many
|
||||
// iterations are needed if min_time is set again, this may also be a complete
|
||||
// wrong guess since the warmup loops might be considerably slower (e.g
|
||||
// because of caching effects).
|
||||
const IterationCount i_backup = iters;
|
||||
|
||||
for (;;) {
|
||||
b.Setup();
|
||||
i_warmup = DoNIterations();
|
||||
b.Teardown();
|
||||
|
||||
const bool finish = ShouldReportIterationResults(i_warmup);
|
||||
|
||||
if (finish) {
|
||||
FinishWarmUp(i_backup);
|
||||
break;
|
||||
}
|
||||
|
||||
// Although we are running "only" a warmup phase where running enough
|
||||
// iterations at once without measuring time isn't as important as it is for
|
||||
// the benchmarking phase, we still do it the same way as otherwise it is
|
||||
// very confusing for the user to know how to choose a proper value for
|
||||
// min_warmup_time if a different approach on running it is used.
|
||||
iters = PredictNumItersNeeded(i_warmup);
|
||||
assert(iters > i_warmup.iters &&
|
||||
"if we did more iterations than we want to do the next time, "
|
||||
"then we should have accepted the current iteration run.");
|
||||
}
|
||||
}
|
||||
|
||||
void BenchmarkRunner::DoOneRepetition() {
|
||||
assert(HasRepeatsRemaining() && "Already done all repetitions?");
|
||||
|
||||
const bool is_the_first_repetition = num_repetitions_done == 0;
|
||||
|
||||
// In case a warmup phase is requested by the benchmark, run it now.
|
||||
// After running the warmup phase the BenchmarkRunner should be in a state as
|
||||
// this warmup never happened except the fact that warmup_done is set. Every
|
||||
// other manipulation of the BenchmarkRunner instance would be a bug! Please
|
||||
// fix it.
|
||||
if (!warmup_done) RunWarmUp();
|
||||
|
||||
IterationResults i;
|
||||
// We *may* be gradually increasing the length (iteration count)
|
||||
// of the benchmark until we decide the results are significant.
|
||||
// And once we do, we report those last results and exit.
|
||||
// Please do note that the if there are repetitions, the iteration count
|
||||
// is *only* calculated for the *first* repetition, and other repetitions
|
||||
// simply use that precomputed iteration count.
|
||||
for (;;) {
|
||||
b.Setup();
|
||||
i = DoNIterations();
|
||||
b.Teardown();
|
||||
|
||||
// Do we consider the results to be significant?
|
||||
// If we are doing repetitions, and the first repetition was already done,
|
||||
// it has calculated the correct iteration time, so we have run that very
|
||||
// iteration count just now. No need to calculate anything. Just report.
|
||||
// Else, the normal rules apply.
|
||||
const bool results_are_significant = !is_the_first_repetition ||
|
||||
has_explicit_iteration_count ||
|
||||
ShouldReportIterationResults(i);
|
||||
|
||||
if (results_are_significant) break; // Good, let's report them!
|
||||
|
||||
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
|
||||
// iteration count, and run the benchmark again...
|
||||
|
||||
iters = PredictNumItersNeeded(i);
|
||||
assert(iters > i.iters &&
|
||||
"if we did more iterations than we want to do the next time, "
|
||||
"then we should have accepted the current iteration run.");
|
||||
}
|
||||
|
||||
// Oh, one last thing, we need to also produce the 'memory measurements'..
|
||||
MemoryManager::Result* memory_result = nullptr;
|
||||
IterationCount memory_iterations = 0;
|
||||
if (memory_manager != nullptr) {
|
||||
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
|
||||
// optional so we don't have to own the Result here.
|
||||
// Can't do it now due to cxx03.
|
||||
memory_results.push_back(MemoryManager::Result());
|
||||
memory_result = &memory_results.back();
|
||||
// Only run a few iterations to reduce the impact of one-time
|
||||
// allocations in benchmarks that are not properly managed.
|
||||
memory_iterations = std::min<IterationCount>(16, iters);
|
||||
memory_manager->Start();
|
||||
std::unique_ptr<internal::ThreadManager> manager;
|
||||
manager.reset(new internal::ThreadManager(1));
|
||||
b.Setup();
|
||||
RunInThread(&b, memory_iterations, 0, manager.get(),
|
||||
perf_counters_measurement_ptr);
|
||||
manager->WaitForAllThreads();
|
||||
manager.reset();
|
||||
b.Teardown();
|
||||
memory_manager->Stop(*memory_result);
|
||||
}
|
||||
|
||||
// Ok, now actually report.
|
||||
BenchmarkReporter::Run report =
|
||||
CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
|
||||
num_repetitions_done, repeats);
|
||||
|
||||
if (reports_for_family) {
|
||||
++reports_for_family->num_runs_done;
|
||||
if (!report.skipped) reports_for_family->Runs.push_back(report);
|
||||
}
|
||||
|
||||
run_results.non_aggregates.push_back(report);
|
||||
|
||||
++num_repetitions_done;
|
||||
}
|
||||
|
||||
RunResults&& BenchmarkRunner::GetResults() {
|
||||
assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
|
||||
|
||||
// Calculate additional statistics over the repetitions of this instance.
|
||||
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
|
||||
|
||||
return std::move(run_results);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -15,19 +15,23 @@
|
||||
#ifndef BENCHMARK_RUNNER_H_
|
||||
#define BENCHMARK_RUNNER_H_
|
||||
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark_api_internal.h"
|
||||
#include "internal_macros.h"
|
||||
|
||||
DECLARE_double(benchmark_min_time);
|
||||
|
||||
DECLARE_int32(benchmark_repetitions);
|
||||
|
||||
DECLARE_bool(benchmark_report_aggregates_only);
|
||||
|
||||
DECLARE_bool(benchmark_display_aggregates_only);
|
||||
#include "perf_counters.h"
|
||||
#include "thread_manager.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
BM_DECLARE_string(benchmark_min_time);
|
||||
BM_DECLARE_double(benchmark_min_warmup_time);
|
||||
BM_DECLARE_int32(benchmark_repetitions);
|
||||
BM_DECLARE_bool(benchmark_report_aggregates_only);
|
||||
BM_DECLARE_bool(benchmark_display_aggregates_only);
|
||||
BM_DECLARE_string(benchmark_perf_counters);
|
||||
|
||||
namespace internal {
|
||||
|
||||
extern MemoryManager* memory_manager;
|
||||
@ -40,9 +44,85 @@ struct RunResults {
|
||||
bool file_report_aggregates_only = false;
|
||||
};
|
||||
|
||||
RunResults RunBenchmark(
|
||||
const benchmark::internal::BenchmarkInstance& b,
|
||||
std::vector<BenchmarkReporter::Run>* complexity_reports);
|
||||
struct BENCHMARK_EXPORT BenchTimeType {
|
||||
enum { ITERS, TIME } tag;
|
||||
union {
|
||||
IterationCount iters;
|
||||
double time;
|
||||
};
|
||||
};
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
BenchTimeType ParseBenchMinTime(const std::string& value);
|
||||
|
||||
class BenchmarkRunner {
|
||||
public:
|
||||
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
|
||||
benchmark::internal::PerfCountersMeasurement* pmc_,
|
||||
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
|
||||
|
||||
int GetNumRepeats() const { return repeats; }
|
||||
|
||||
bool HasRepeatsRemaining() const {
|
||||
return GetNumRepeats() != num_repetitions_done;
|
||||
}
|
||||
|
||||
void DoOneRepetition();
|
||||
|
||||
RunResults&& GetResults();
|
||||
|
||||
BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const {
|
||||
return reports_for_family;
|
||||
}
|
||||
|
||||
double GetMinTime() const { return min_time; }
|
||||
|
||||
bool HasExplicitIters() const { return has_explicit_iteration_count; }
|
||||
|
||||
IterationCount GetIters() const { return iters; }
|
||||
|
||||
private:
|
||||
RunResults run_results;
|
||||
|
||||
const benchmark::internal::BenchmarkInstance& b;
|
||||
BenchmarkReporter::PerFamilyRunReports* reports_for_family;
|
||||
|
||||
BenchTimeType parsed_benchtime_flag;
|
||||
const double min_time;
|
||||
const double min_warmup_time;
|
||||
bool warmup_done;
|
||||
const int repeats;
|
||||
const bool has_explicit_iteration_count;
|
||||
|
||||
int num_repetitions_done = 0;
|
||||
|
||||
std::vector<std::thread> pool;
|
||||
|
||||
std::vector<MemoryManager::Result> memory_results;
|
||||
|
||||
IterationCount iters; // preserved between repetitions!
|
||||
// So only the first repetition has to find/calculate it,
|
||||
// the other repetitions will just use that precomputed iteration count.
|
||||
|
||||
PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
|
||||
|
||||
struct IterationResults {
|
||||
internal::ThreadManager::Result results;
|
||||
IterationCount iters;
|
||||
double seconds;
|
||||
};
|
||||
IterationResults DoNIterations();
|
||||
|
||||
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
|
||||
|
||||
bool ShouldReportIterationResults(const IterationResults& i) const;
|
||||
|
||||
double GetMinTimeToApply() const;
|
||||
|
||||
void FinishWarmUp(const IterationCount& i);
|
||||
|
||||
void RunWarmUp();
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
|
11
src/check.cc
Normal file
11
src/check.cc
Normal file
@ -0,0 +1,11 @@
|
||||
#include "check.h"
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
static AbortHandlerT* handler = &std::abort;
|
||||
|
||||
BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; }
|
||||
|
||||
} // namespace internal
|
||||
} // namespace benchmark
|
68
src/check.h
68
src/check.h
@ -5,26 +5,43 @@
|
||||
#include <cstdlib>
|
||||
#include <ostream>
|
||||
|
||||
#include "benchmark/export.h"
|
||||
#include "internal_macros.h"
|
||||
#include "log.h"
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define BENCHMARK_NOEXCEPT noexcept
|
||||
#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
|
||||
#elif defined(_MSC_VER) && !defined(__clang__)
|
||||
#if _MSC_VER >= 1900
|
||||
#define BENCHMARK_NOEXCEPT noexcept
|
||||
#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
|
||||
#else
|
||||
#define BENCHMARK_NOEXCEPT
|
||||
#define BENCHMARK_NOEXCEPT_OP(x)
|
||||
#endif
|
||||
#define __func__ __FUNCTION__
|
||||
#else
|
||||
#define BENCHMARK_NOEXCEPT
|
||||
#define BENCHMARK_NOEXCEPT_OP(x)
|
||||
#endif
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
typedef void(AbortHandlerT)();
|
||||
|
||||
inline AbortHandlerT*& GetAbortHandler() {
|
||||
static AbortHandlerT* handler = &std::abort;
|
||||
return handler;
|
||||
}
|
||||
BENCHMARK_EXPORT
|
||||
AbortHandlerT*& GetAbortHandler();
|
||||
|
||||
BENCHMARK_NORETURN inline void CallAbortHandler() {
|
||||
GetAbortHandler()();
|
||||
std::abort(); // fallback to enforce noreturn
|
||||
}
|
||||
|
||||
// CheckHandler is the class constructed by failing CHECK macros. CheckHandler
|
||||
// will log information about the failures and abort when it is destructed.
|
||||
// CheckHandler is the class constructed by failing BM_CHECK macros.
|
||||
// CheckHandler will log information about the failures and abort when it is
|
||||
// destructed.
|
||||
class CheckHandler {
|
||||
public:
|
||||
CheckHandler(const char* check, const char* file, const char* func, int line)
|
||||
@ -35,10 +52,17 @@ class CheckHandler {
|
||||
|
||||
LogType& GetLog() { return log_; }
|
||||
|
||||
#if defined(COMPILER_MSVC)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4722)
|
||||
#endif
|
||||
BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
|
||||
log_ << std::endl;
|
||||
CallAbortHandler();
|
||||
}
|
||||
#if defined(COMPILER_MSVC)
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
CheckHandler& operator=(const CheckHandler&) = delete;
|
||||
CheckHandler(const CheckHandler&) = delete;
|
||||
@ -51,32 +75,32 @@ class CheckHandler {
|
||||
} // end namespace internal
|
||||
} // end namespace benchmark
|
||||
|
||||
// The CHECK macro returns a std::ostream object that can have extra information
|
||||
// written to it.
|
||||
// The BM_CHECK macro returns a std::ostream object that can have extra
|
||||
// information written to it.
|
||||
#ifndef NDEBUG
|
||||
#define CHECK(b) \
|
||||
#define BM_CHECK(b) \
|
||||
(b ? ::benchmark::internal::GetNullLogInstance() \
|
||||
: ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \
|
||||
.GetLog())
|
||||
#else
|
||||
#define CHECK(b) ::benchmark::internal::GetNullLogInstance()
|
||||
#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance()
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
// preserve whitespacing between operators for alignment
|
||||
#define CHECK_EQ(a, b) CHECK((a) == (b))
|
||||
#define CHECK_NE(a, b) CHECK((a) != (b))
|
||||
#define CHECK_GE(a, b) CHECK((a) >= (b))
|
||||
#define CHECK_LE(a, b) CHECK((a) <= (b))
|
||||
#define CHECK_GT(a, b) CHECK((a) > (b))
|
||||
#define CHECK_LT(a, b) CHECK((a) < (b))
|
||||
#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b))
|
||||
#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b))
|
||||
#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b))
|
||||
#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b))
|
||||
#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b))
|
||||
#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b))
|
||||
|
||||
#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps))
|
||||
#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps))
|
||||
#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps))
|
||||
#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps))
|
||||
#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps))
|
||||
#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps))
|
||||
#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps))
|
||||
#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps))
|
||||
#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps))
|
||||
#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps))
|
||||
#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps))
|
||||
#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps))
|
||||
//clang-format on
|
||||
|
||||
#endif // CHECK_H_
|
||||
|
@ -25,8 +25,8 @@
|
||||
#include "internal_macros.h"
|
||||
|
||||
#ifdef BENCHMARK_OS_WINDOWS
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif // BENCHMARK_OS_WINDOWS
|
||||
@ -94,20 +94,20 @@ std::string FormatString(const char* msg, va_list args) {
|
||||
va_end(args_cp);
|
||||
|
||||
// currently there is no error handling for failure, so this is hack.
|
||||
CHECK(ret >= 0);
|
||||
BM_CHECK(ret >= 0);
|
||||
|
||||
if (ret == 0) // handle empty expansion
|
||||
if (ret == 0) { // handle empty expansion
|
||||
return {};
|
||||
else if (static_cast<size_t>(ret) < size)
|
||||
return local_buff;
|
||||
else {
|
||||
// we did not provide a long enough buffer on our first attempt.
|
||||
size = (size_t)ret + 1; // + 1 for the null byte
|
||||
std::unique_ptr<char[]> buff(new char[size]);
|
||||
ret = vsnprintf(buff.get(), size, msg, args);
|
||||
CHECK(ret > 0 && ((size_t)ret) < size);
|
||||
return buff.get();
|
||||
}
|
||||
if (static_cast<size_t>(ret) < size) {
|
||||
return local_buff;
|
||||
}
|
||||
// we did not provide a long enough buffer on our first attempt.
|
||||
size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
|
||||
std::unique_ptr<char[]> buff(new char[size]);
|
||||
ret = vsnprintf(buff.get(), size, msg, args);
|
||||
BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
|
||||
return buff.get();
|
||||
}
|
||||
|
||||
std::string FormatString(const char* msg, ...) {
|
||||
@ -163,12 +163,24 @@ bool IsColorTerminal() {
|
||||
#else
|
||||
// On non-Windows platforms, we rely on the TERM variable. This list of
|
||||
// supported TERM values is copied from Google Test:
|
||||
// <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>.
|
||||
// <https://github.com/google/googletest/blob/v1.13.0/googletest/src/gtest.cc#L3225-L3259>.
|
||||
const char* const SUPPORTED_TERM_VALUES[] = {
|
||||
"xterm", "xterm-color", "xterm-256color",
|
||||
"screen", "screen-256color", "tmux",
|
||||
"tmux-256color", "rxvt-unicode", "rxvt-unicode-256color",
|
||||
"linux", "cygwin",
|
||||
"xterm",
|
||||
"xterm-color",
|
||||
"xterm-256color",
|
||||
"screen",
|
||||
"screen-256color",
|
||||
"tmux",
|
||||
"tmux-256color",
|
||||
"rxvt-unicode",
|
||||
"rxvt-unicode-256color",
|
||||
"linux",
|
||||
"cygwin",
|
||||
"xterm-kitty",
|
||||
"alacritty",
|
||||
"foot",
|
||||
"foot-extra",
|
||||
"wezterm",
|
||||
};
|
||||
|
||||
const char* const term = getenv("TERM");
|
||||
|
@ -20,6 +20,10 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include "../src/string_util.h"
|
||||
|
||||
namespace benchmark {
|
||||
namespace {
|
||||
@ -78,6 +82,30 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Parses 'str' into KV pairs. If successful, writes the result to *value and
|
||||
// returns true; otherwise leaves *value unchanged and returns false.
|
||||
bool ParseKvPairs(const std::string& src_text, const char* str,
|
||||
std::map<std::string, std::string>* value) {
|
||||
std::map<std::string, std::string> kvs;
|
||||
for (const auto& kvpair : StrSplit(str, ',')) {
|
||||
const auto kv = StrSplit(kvpair, '=');
|
||||
if (kv.size() != 2) {
|
||||
std::cerr << src_text << " is expected to be a comma-separated list of "
|
||||
<< "<key>=<value> strings, but actually has value \"" << str
|
||||
<< "\".\n";
|
||||
return false;
|
||||
}
|
||||
if (!kvs.emplace(kv[0], kv[1]).second) {
|
||||
std::cerr << src_text << " is expected to contain unique keys but key \""
|
||||
<< kv[0] << "\" was repeated.\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
*value = kvs;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the name of the environment variable corresponding to the
|
||||
// given flag. For example, FlagToEnvVar("foo") will return
|
||||
// "BENCHMARK_FOO" in the open-source version.
|
||||
@ -93,12 +121,14 @@ static std::string FlagToEnvVar(const char* flag) {
|
||||
|
||||
} // namespace
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool BoolFromEnv(const char* flag, bool default_val) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const value_str = getenv(env_var.c_str());
|
||||
return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str);
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
int32_t Int32FromEnv(const char* flag, int32_t default_val) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const value_str = getenv(env_var.c_str());
|
||||
@ -111,6 +141,7 @@ int32_t Int32FromEnv(const char* flag, int32_t default_val) {
|
||||
return value;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
double DoubleFromEnv(const char* flag, double default_val) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const value_str = getenv(env_var.c_str());
|
||||
@ -123,12 +154,28 @@ double DoubleFromEnv(const char* flag, double default_val) {
|
||||
return value;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
const char* StringFromEnv(const char* flag, const char* default_val) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const value = getenv(env_var.c_str());
|
||||
return value == nullptr ? default_val : value;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
std::map<std::string, std::string> KvPairsFromEnv(
|
||||
const char* flag, std::map<std::string, std::string> default_val) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const value_str = getenv(env_var.c_str());
|
||||
|
||||
if (value_str == nullptr) return default_val;
|
||||
|
||||
std::map<std::string, std::string> value;
|
||||
if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) {
|
||||
return default_val;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// Parses a string as a command line flag. The string should have
|
||||
// the format "--flag=value". When def_optional is true, the "=value"
|
||||
// part can be omitted.
|
||||
@ -159,6 +206,7 @@ const char* ParseFlagValue(const char* str, const char* flag,
|
||||
return flag_end + 1;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
|
||||
// Gets the value of the flag as a string.
|
||||
const char* const value_str = ParseFlagValue(str, flag, true);
|
||||
@ -171,6 +219,7 @@ bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
|
||||
// Gets the value of the flag as a string.
|
||||
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||
@ -183,6 +232,7 @@ bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
|
||||
value);
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
|
||||
// Gets the value of the flag as a string.
|
||||
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||
@ -195,6 +245,7 @@ bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
|
||||
value);
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
|
||||
// Gets the value of the flag as a string.
|
||||
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||
@ -206,23 +257,42 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseKeyValueFlag(const char* str, const char* flag,
|
||||
std::map<std::string, std::string>* value) {
|
||||
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||
|
||||
if (value_str == nullptr) return false;
|
||||
|
||||
for (const auto& kvpair : StrSplit(value_str, ',')) {
|
||||
const auto kv = StrSplit(kvpair, '=');
|
||||
if (kv.size() != 2) return false;
|
||||
value->emplace(kv[0], kv[1]);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool IsFlag(const char* str, const char* flag) {
|
||||
return (ParseFlagValue(str, flag, true) != nullptr);
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool IsTruthyFlagValue(const std::string& value) {
|
||||
if (value.size() == 1) {
|
||||
char v = value[0];
|
||||
return isalnum(v) &&
|
||||
!(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
|
||||
} else if (!value.empty()) {
|
||||
}
|
||||
if (!value.empty()) {
|
||||
std::string value_lower(value);
|
||||
std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
|
||||
[](char c) { return static_cast<char>(::tolower(c)); });
|
||||
return !(value_lower == "false" || value_lower == "no" ||
|
||||
value_lower == "off");
|
||||
} else
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace benchmark
|
||||
|
@ -2,61 +2,80 @@
|
||||
#define BENCHMARK_COMMANDLINEFLAGS_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "benchmark/export.h"
|
||||
|
||||
// Macro for referencing flags.
|
||||
#define FLAG(name) FLAGS_##name
|
||||
|
||||
// Macros for declaring flags.
|
||||
#define DECLARE_bool(name) extern bool FLAG(name)
|
||||
#define DECLARE_int32(name) extern int32_t FLAG(name)
|
||||
#define DECLARE_double(name) extern double FLAG(name)
|
||||
#define DECLARE_string(name) extern std::string FLAG(name)
|
||||
#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name)
|
||||
#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name)
|
||||
#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name)
|
||||
#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name)
|
||||
#define BM_DECLARE_kvpairs(name) \
|
||||
BENCHMARK_EXPORT extern std::map<std::string, std::string> FLAG(name)
|
||||
|
||||
// Macros for defining flags.
|
||||
#define DEFINE_bool(name, default_val) \
|
||||
bool FLAG(name) = \
|
||||
benchmark::BoolFromEnv(#name, default_val)
|
||||
#define DEFINE_int32(name, default_val) \
|
||||
int32_t FLAG(name) = \
|
||||
benchmark::Int32FromEnv(#name, default_val)
|
||||
#define DEFINE_double(name, default_val) \
|
||||
double FLAG(name) = \
|
||||
benchmark::DoubleFromEnv(#name, default_val)
|
||||
#define DEFINE_string(name, default_val) \
|
||||
std::string FLAG(name) = \
|
||||
benchmark::StringFromEnv(#name, default_val)
|
||||
#define BM_DEFINE_bool(name, default_val) \
|
||||
BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val)
|
||||
#define BM_DEFINE_int32(name, default_val) \
|
||||
BENCHMARK_EXPORT int32_t FLAG(name) = \
|
||||
benchmark::Int32FromEnv(#name, default_val)
|
||||
#define BM_DEFINE_double(name, default_val) \
|
||||
BENCHMARK_EXPORT double FLAG(name) = \
|
||||
benchmark::DoubleFromEnv(#name, default_val)
|
||||
#define BM_DEFINE_string(name, default_val) \
|
||||
BENCHMARK_EXPORT std::string FLAG(name) = \
|
||||
benchmark::StringFromEnv(#name, default_val)
|
||||
#define BM_DEFINE_kvpairs(name, default_val) \
|
||||
BENCHMARK_EXPORT std::map<std::string, std::string> FLAG(name) = \
|
||||
benchmark::KvPairsFromEnv(#name, default_val)
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
// Parses a bool from the environment variable
|
||||
// corresponding to the given flag.
|
||||
// Parses a bool from the environment variable corresponding to the given flag.
|
||||
//
|
||||
// If the variable exists, returns IsTruthyFlagValue() value; if not,
|
||||
// returns the given default value.
|
||||
BENCHMARK_EXPORT
|
||||
bool BoolFromEnv(const char* flag, bool default_val);
|
||||
|
||||
// Parses an Int32 from the environment variable
|
||||
// corresponding to the given flag.
|
||||
// Parses an Int32 from the environment variable corresponding to the given
|
||||
// flag.
|
||||
//
|
||||
// If the variable exists, returns ParseInt32() value; if not, returns
|
||||
// the given default value.
|
||||
BENCHMARK_EXPORT
|
||||
int32_t Int32FromEnv(const char* flag, int32_t default_val);
|
||||
|
||||
// Parses an Double from the environment variable
|
||||
// corresponding to the given flag.
|
||||
// Parses an Double from the environment variable corresponding to the given
|
||||
// flag.
|
||||
//
|
||||
// If the variable exists, returns ParseDouble(); if not, returns
|
||||
// the given default value.
|
||||
BENCHMARK_EXPORT
|
||||
double DoubleFromEnv(const char* flag, double default_val);
|
||||
|
||||
// Parses a string from the environment variable
|
||||
// corresponding to the given flag.
|
||||
// Parses a string from the environment variable corresponding to the given
|
||||
// flag.
|
||||
//
|
||||
// If variable exists, returns its value; if not, returns
|
||||
// the given default value.
|
||||
BENCHMARK_EXPORT
|
||||
const char* StringFromEnv(const char* flag, const char* default_val);
|
||||
|
||||
// Parses a set of kvpairs from the environment variable corresponding to the
|
||||
// given flag.
|
||||
//
|
||||
// If variable exists, returns its value; if not, returns
|
||||
// the given default value.
|
||||
BENCHMARK_EXPORT
|
||||
std::map<std::string, std::string> KvPairsFromEnv(
|
||||
const char* flag, std::map<std::string, std::string> default_val);
|
||||
|
||||
// Parses a string for a bool flag, in the form of either
|
||||
// "--flag=value" or "--flag".
|
||||
//
|
||||
@ -66,36 +85,47 @@ const char* StringFromEnv(const char* flag, const char* default_val);
|
||||
//
|
||||
// On success, stores the value of the flag in *value, and returns
|
||||
// true. On failure, returns false without changing *value.
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
|
||||
|
||||
// Parses a string for an Int32 flag, in the form of
|
||||
// "--flag=value".
|
||||
// Parses a string for an Int32 flag, in the form of "--flag=value".
|
||||
//
|
||||
// On success, stores the value of the flag in *value, and returns
|
||||
// true. On failure, returns false without changing *value.
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
|
||||
|
||||
// Parses a string for a Double flag, in the form of
|
||||
// "--flag=value".
|
||||
// Parses a string for a Double flag, in the form of "--flag=value".
|
||||
//
|
||||
// On success, stores the value of the flag in *value, and returns
|
||||
// true. On failure, returns false without changing *value.
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
|
||||
|
||||
// Parses a string for a string flag, in the form of
|
||||
// "--flag=value".
|
||||
// Parses a string for a string flag, in the form of "--flag=value".
|
||||
//
|
||||
// On success, stores the value of the flag in *value, and returns
|
||||
// true. On failure, returns false without changing *value.
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
|
||||
|
||||
// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value"
|
||||
//
|
||||
// On success, stores the value of the flag in *value and returns true. On
|
||||
// failure returns false, though *value may have been mutated.
|
||||
BENCHMARK_EXPORT
|
||||
bool ParseKeyValueFlag(const char* str, const char* flag,
|
||||
std::map<std::string, std::string>* value);
|
||||
|
||||
// Returns true if the string matches the flag.
|
||||
BENCHMARK_EXPORT
|
||||
bool IsFlag(const char* str, const char* flag);
|
||||
|
||||
// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or
|
||||
// some non-alphanumeric character. Also returns false if the value matches
|
||||
// one of 'no', 'false', 'off' (case-insensitive). As a special case, also
|
||||
// returns true if value is the empty string.
|
||||
BENCHMARK_EXPORT
|
||||
bool IsTruthyFlagValue(const std::string& value);
|
||||
|
||||
} // end namespace benchmark
|
||||
|
@ -15,12 +15,13 @@
|
||||
// Source project : https://github.com/ismaelJimenez/cpp.leastsq
|
||||
// Adapted to be used with google benchmark
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "complexity.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "check.h"
|
||||
#include "complexity.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
@ -82,7 +83,6 @@ std::string GetBigOString(BigO complexity) {
|
||||
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
|
||||
const std::vector<double>& time,
|
||||
BigOFunc* fitting_curve) {
|
||||
double sigma_gn = 0.0;
|
||||
double sigma_gn_squared = 0.0;
|
||||
double sigma_time = 0.0;
|
||||
double sigma_time_gn = 0.0;
|
||||
@ -90,7 +90,6 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
|
||||
// Calculate least square fitting parameter
|
||||
for (size_t i = 0; i < n.size(); ++i) {
|
||||
double gn_i = fitting_curve(n[i]);
|
||||
sigma_gn += gn_i;
|
||||
sigma_gn_squared += gn_i * gn_i;
|
||||
sigma_time += time[i];
|
||||
sigma_time_gn += time[i] * gn_i;
|
||||
@ -125,10 +124,10 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
|
||||
// fitting curve.
|
||||
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
|
||||
const std::vector<double>& time, const BigO complexity) {
|
||||
CHECK_EQ(n.size(), time.size());
|
||||
CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
|
||||
// benchmark runs are given
|
||||
CHECK_NE(complexity, oNone);
|
||||
BM_CHECK_EQ(n.size(), time.size());
|
||||
BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
|
||||
// benchmark runs are given
|
||||
BM_CHECK_NE(complexity, oNone);
|
||||
|
||||
LeastSq best_fit;
|
||||
|
||||
@ -169,7 +168,8 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
|
||||
|
||||
// Populate the accumulators.
|
||||
for (const Run& run : reports) {
|
||||
CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?";
|
||||
BM_CHECK_GT(run.complexity_n, 0)
|
||||
<< "Did you forget to call SetComplexityN?";
|
||||
n.push_back(run.complexity_n);
|
||||
real_time.push_back(run.real_accumulated_time / run.iterations);
|
||||
cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
|
||||
@ -193,11 +193,14 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
|
||||
// Get the data from the accumulator to BenchmarkReporter::Run's.
|
||||
Run big_o;
|
||||
big_o.run_name = run_name;
|
||||
big_o.family_index = reports[0].family_index;
|
||||
big_o.per_family_instance_index = reports[0].per_family_instance_index;
|
||||
big_o.run_type = BenchmarkReporter::Run::RT_Aggregate;
|
||||
big_o.repetitions = reports[0].repetitions;
|
||||
big_o.repetition_index = Run::no_repetition_index;
|
||||
big_o.threads = reports[0].threads;
|
||||
big_o.aggregate_name = "BigO";
|
||||
big_o.aggregate_unit = StatisticUnit::kTime;
|
||||
big_o.report_label = reports[0].report_label;
|
||||
big_o.iterations = 0;
|
||||
big_o.real_accumulated_time = result_real.coef;
|
||||
@ -215,8 +218,11 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
|
||||
// Only add label to mean/stddev if it is same for all runs
|
||||
Run rms;
|
||||
rms.run_name = run_name;
|
||||
rms.family_index = reports[0].family_index;
|
||||
rms.per_family_instance_index = reports[0].per_family_instance_index;
|
||||
rms.run_type = BenchmarkReporter::Run::RT_Aggregate;
|
||||
rms.aggregate_name = "RMS";
|
||||
rms.aggregate_unit = StatisticUnit::kPercentage;
|
||||
rms.report_label = big_o.report_label;
|
||||
rms.iterations = 0;
|
||||
rms.repetition_index = Run::no_repetition_index;
|
||||
|
@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
|
||||
const std::vector<BenchmarkReporter::Run>& reports);
|
||||
|
||||
// This data structure will contain the result returned by MinimalLeastSq
|
||||
// - coef : Estimated coeficient for the high-order term as
|
||||
// - coef : Estimated coefficient for the high-order term as
|
||||
// interpolated from data.
|
||||
// - rms : Normalized Root Mean Squared Error.
|
||||
// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability
|
||||
|
@ -33,6 +33,7 @@
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool ConsoleReporter::ReportContext(const Context& context) {
|
||||
name_field_width_ = context.name_field_width;
|
||||
printed_header_ = false;
|
||||
@ -45,19 +46,21 @@ bool ConsoleReporter::ReportContext(const Context& context) {
|
||||
GetErrorStream()
|
||||
<< "Color printing is only supported for stdout on windows."
|
||||
" Disabling color printing\n";
|
||||
output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color);
|
||||
output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
void ConsoleReporter::PrintHeader(const Run& run) {
|
||||
std::string str = FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
|
||||
"Benchmark", "Time", "CPU", "Iterations");
|
||||
if(!run.counters.empty()) {
|
||||
if(output_options_ & OO_Tabular) {
|
||||
for(auto const& c : run.counters) {
|
||||
std::string str =
|
||||
FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
|
||||
"Benchmark", "Time", "CPU", "Iterations");
|
||||
if (!run.counters.empty()) {
|
||||
if (output_options_ & OO_Tabular) {
|
||||
for (auto const& c : run.counters) {
|
||||
str += FormatString(" %10s", c.first.c_str());
|
||||
}
|
||||
} else {
|
||||
@ -68,6 +71,7 @@ void ConsoleReporter::PrintHeader(const Run& run) {
|
||||
GetOutputStream() << line << "\n" << str << "\n" << line << "\n";
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
|
||||
for (const auto& run : reports) {
|
||||
// print the header:
|
||||
@ -97,8 +101,10 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
|
||||
static std::string FormatTime(double time) {
|
||||
// For the time columns of the console printer 13 digits are reserved. One of
|
||||
// them is a space and max two of them are the time unit (e.g ns). That puts
|
||||
// us at 10 digits usable for the number.
|
||||
// Align decimal places...
|
||||
if (time < 1.0) {
|
||||
return FormatString("%10.3f", time);
|
||||
@ -109,22 +115,33 @@ static std::string FormatTime(double time) {
|
||||
if (time < 100.0) {
|
||||
return FormatString("%10.1f", time);
|
||||
}
|
||||
// Assuming the time is at max 9.9999e+99 and we have 10 digits for the
|
||||
// number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
|
||||
if (time > 9999999999 /*max 10 digit number*/) {
|
||||
return FormatString("%1.4e", time);
|
||||
}
|
||||
return FormatString("%10.0f", time);
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
void ConsoleReporter::PrintRunData(const Run& result) {
|
||||
typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
|
||||
auto& Out = GetOutputStream();
|
||||
PrinterFn* printer = (output_options_ & OO_Color) ?
|
||||
(PrinterFn*)ColorPrintf : IgnoreColorPrint;
|
||||
PrinterFn* printer = (output_options_ & OO_Color)
|
||||
? static_cast<PrinterFn*>(ColorPrintf)
|
||||
: IgnoreColorPrint;
|
||||
auto name_color =
|
||||
(result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
|
||||
printer(Out, name_color, "%-*s ", name_field_width_,
|
||||
result.benchmark_name().c_str());
|
||||
|
||||
if (result.error_occurred) {
|
||||
if (internal::SkippedWithError == result.skipped) {
|
||||
printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
|
||||
result.error_message.c_str());
|
||||
result.skip_message.c_str());
|
||||
printer(Out, COLOR_DEFAULT, "\n");
|
||||
return;
|
||||
} else if (internal::SkippedWithMessage == result.skipped) {
|
||||
printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
|
||||
printer(Out, COLOR_DEFAULT, "\n");
|
||||
return;
|
||||
}
|
||||
@ -134,18 +151,23 @@ void ConsoleReporter::PrintRunData(const Run& result) {
|
||||
const std::string real_time_str = FormatTime(real_time);
|
||||
const std::string cpu_time_str = FormatTime(cpu_time);
|
||||
|
||||
|
||||
if (result.report_big_o) {
|
||||
std::string big_o = GetBigOString(result.complexity);
|
||||
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(),
|
||||
cpu_time, big_o.c_str());
|
||||
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time,
|
||||
big_o.c_str(), cpu_time, big_o.c_str());
|
||||
} else if (result.report_rms) {
|
||||
printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%",
|
||||
cpu_time * 100, "%");
|
||||
} else {
|
||||
} else if (result.run_type != Run::RT_Aggregate ||
|
||||
result.aggregate_unit == StatisticUnit::kTime) {
|
||||
const char* timeLabel = GetTimeUnitString(result.time_unit);
|
||||
printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel,
|
||||
cpu_time_str.c_str(), timeLabel);
|
||||
printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(),
|
||||
timeLabel, cpu_time_str.c_str(), timeLabel);
|
||||
} else {
|
||||
assert(result.aggregate_unit == StatisticUnit::kPercentage);
|
||||
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ",
|
||||
(100. * result.real_accumulated_time), "%",
|
||||
(100. * result.cpu_accumulated_time), "%");
|
||||
}
|
||||
|
||||
if (!result.report_big_o && !result.report_rms) {
|
||||
@ -153,12 +175,19 @@ void ConsoleReporter::PrintRunData(const Run& result) {
|
||||
}
|
||||
|
||||
for (auto& c : result.counters) {
|
||||
const std::size_t cNameLen = std::max(std::string::size_type(10),
|
||||
c.first.length());
|
||||
auto const& s = HumanReadableNumber(c.second.value, c.second.oneK);
|
||||
const std::size_t cNameLen =
|
||||
std::max(std::string::size_type(10), c.first.length());
|
||||
std::string s;
|
||||
const char* unit = "";
|
||||
if (c.second.flags & Counter::kIsRate)
|
||||
unit = (c.second.flags & Counter::kInvert) ? "s" : "/s";
|
||||
if (result.run_type == Run::RT_Aggregate &&
|
||||
result.aggregate_unit == StatisticUnit::kPercentage) {
|
||||
s = StrFormat("%.2f", 100. * c.second.value);
|
||||
unit = "%";
|
||||
} else {
|
||||
s = HumanReadableNumber(c.second.value, c.second.oneK);
|
||||
if (c.second.flags & Counter::kIsRate)
|
||||
unit = (c.second.flags & Counter::kInvert) ? "s" : "/s";
|
||||
}
|
||||
if (output_options_ & OO_Tabular) {
|
||||
printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(),
|
||||
unit);
|
||||
|
@ -12,9 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "complexity.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
@ -22,7 +19,9 @@
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "check.h"
|
||||
#include "complexity.h"
|
||||
#include "string_util.h"
|
||||
#include "timers.h"
|
||||
|
||||
@ -37,23 +36,29 @@ std::vector<std::string> elements = {
|
||||
"error_occurred", "error_message"};
|
||||
} // namespace
|
||||
|
||||
std::string CsvEscape(const std::string & s) {
|
||||
std::string CsvEscape(const std::string& s) {
|
||||
std::string tmp;
|
||||
tmp.reserve(s.size() + 2);
|
||||
for (char c : s) {
|
||||
switch (c) {
|
||||
case '"' : tmp += "\"\""; break;
|
||||
default : tmp += c; break;
|
||||
case '"':
|
||||
tmp += "\"\"";
|
||||
break;
|
||||
default:
|
||||
tmp += c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return '"' + tmp + '"';
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
bool CSVReporter::ReportContext(const Context& context) {
|
||||
PrintBasicContext(&GetErrorStream(), context);
|
||||
return true;
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
|
||||
std::ostream& Out = GetOutputStream();
|
||||
|
||||
@ -85,7 +90,8 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
|
||||
for (const auto& cnt : run.counters) {
|
||||
if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second")
|
||||
continue;
|
||||
CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end())
|
||||
BM_CHECK(user_counter_names_.find(cnt.first) !=
|
||||
user_counter_names_.end())
|
||||
<< "All counters must be present in each run. "
|
||||
<< "Counter named \"" << cnt.first
|
||||
<< "\" was not in a run after being added to the header";
|
||||
@ -99,13 +105,14 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK_EXPORT
|
||||
void CSVReporter::PrintRunData(const Run& run) {
|
||||
std::ostream& Out = GetOutputStream();
|
||||
Out << CsvEscape(run.benchmark_name()) << ",";
|
||||
if (run.error_occurred) {
|
||||
if (run.skipped) {
|
||||
Out << std::string(elements.size() - 3, ',');
|
||||
Out << "true,";
|
||||
Out << CsvEscape(run.error_message) << "\n";
|
||||
Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
|
||||
Out << CsvEscape(run.skip_message) << "\n";
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,8 @@
|
||||
// declarations of some other intrinsics, breaking compilation.
|
||||
// Therefore, we simply declare __rdtsc ourselves. See also
|
||||
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
|
||||
#if defined(COMPILER_MSVC) && !defined(_M_IX86)
|
||||
#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
|
||||
!defined(_M_ARM64EC)
|
||||
extern "C" uint64_t __rdtsc();
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#endif
|
||||
@ -92,7 +93,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
|
||||
uint32_t tbl, tbu0, tbu1;
|
||||
asm volatile(
|
||||
"mftbu %0\n"
|
||||
"mftbl %1\n"
|
||||
"mftb %1\n"
|
||||
"mftbu %2"
|
||||
: "=r"(tbu0), "=r"(tbl), "=r"(tbu1));
|
||||
tbl &= -static_cast<int32_t>(tbu0 == tbu1);
|
||||
@ -114,6 +115,12 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
|
||||
// when I know it will work. Otherwise, I'll use __rdtsc and hope
|
||||
// the code is being compiled with a non-ancient compiler.
|
||||
_asm rdtsc
|
||||
#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
|
||||
// See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
|
||||
// and https://reviews.llvm.org/D53115
|
||||
int64_t virtual_timer_value;
|
||||
virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT);
|
||||
return virtual_timer_value;
|
||||
#elif defined(COMPILER_MSVC)
|
||||
return __rdtsc();
|
||||
#elif defined(BENCHMARK_OS_NACL)
|
||||
@ -126,7 +133,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
|
||||
|
||||
// Native Client does not provide any API to access cycle counter.
|
||||
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
|
||||
// because is provides nanosecond resolution (which is noticable at
|
||||
// because is provides nanosecond resolution (which is noticeable at
|
||||
// least for PNaCl modules running on x86 Mac & Linux).
|
||||
// Initialize to always return 0 if clock_gettime fails.
|
||||
struct timespec ts = {0, 0};
|
||||
@ -161,18 +168,27 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, nullptr);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
#elif defined(__mips__)
|
||||
#elif defined(__mips__) || defined(__m68k__)
|
||||
// mips apparently only allows rdtsc for superusers, so we fall
|
||||
// back to gettimeofday. It's possible clock_gettime would be better.
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, nullptr);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
#elif defined(__loongarch__) || defined(__csky__)
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, nullptr);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
#elif defined(__s390__) // Covers both s390 and s390x.
|
||||
// Return the CPU clock.
|
||||
uint64_t tsc;
|
||||
#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL)
|
||||
// z/OS XL compiler HLASM syntax.
|
||||
asm(" stck %0" : "=m"(tsc) : : "cc");
|
||||
#else
|
||||
asm("stck %0" : "=Q"(tsc) : : "cc");
|
||||
#endif
|
||||
return tsc;
|
||||
#elif defined(__riscv) // RISC-V
|
||||
#elif defined(__riscv) // RISC-V
|
||||
// Use RDCYCLE (and RDCYCLEH on riscv32)
|
||||
#if __riscv_xlen == 32
|
||||
uint32_t cycles_lo, cycles_hi0, cycles_hi1;
|
||||
@ -193,6 +209,14 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
|
||||
asm volatile("rdcycle %0" : "=r"(cycles));
|
||||
return cycles;
|
||||
#endif
|
||||
#elif defined(__e2k__) || defined(__elbrus__)
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, nullptr);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
#elif defined(__hexagon__)
|
||||
uint64_t pcycle;
|
||||
asm volatile("%0 = C15:14" : "=r"(pcycle));
|
||||
return static_cast<double>(pcycle);
|
||||
#else
|
||||
// The soft failover to a generic implementation is automatic only for ARM.
|
||||
// For other platforms the developer is expected to make an attempt to create
|
||||
|
@ -1,8 +1,6 @@
|
||||
#ifndef BENCHMARK_INTERNAL_MACROS_H_
|
||||
#define BENCHMARK_INTERNAL_MACROS_H_
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
/* Needed to detect STL */
|
||||
#include <cstdlib>
|
||||
|
||||
@ -13,7 +11,11 @@
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#if !defined(COMPILER_CLANG)
|
||||
#if defined(__ibmxl__)
|
||||
#if !defined(COMPILER_IBMXL)
|
||||
#define COMPILER_IBMXL
|
||||
#endif
|
||||
#elif !defined(COMPILER_CLANG)
|
||||
#define COMPILER_CLANG
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
@ -40,6 +42,19 @@
|
||||
#define BENCHMARK_OS_CYGWIN 1
|
||||
#elif defined(_WIN32)
|
||||
#define BENCHMARK_OS_WINDOWS 1
|
||||
// WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
|
||||
// We include windows.h which implicitly includes winapifamily.h for compatibility.
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#if defined(WINAPI_FAMILY_PARTITION)
|
||||
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
|
||||
#define BENCHMARK_OS_WINDOWS_WIN32 1
|
||||
#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
|
||||
#define BENCHMARK_OS_WINDOWS_RT 1
|
||||
#endif
|
||||
#endif
|
||||
#if defined(__MINGW32__)
|
||||
#define BENCHMARK_OS_MINGW 1
|
||||
#endif
|
||||
@ -58,6 +73,8 @@
|
||||
#define BENCHMARK_OS_NETBSD 1
|
||||
#elif defined(__OpenBSD__)
|
||||
#define BENCHMARK_OS_OPENBSD 1
|
||||
#elif defined(__DragonFly__)
|
||||
#define BENCHMARK_OS_DRAGONFLY 1
|
||||
#elif defined(__linux__)
|
||||
#define BENCHMARK_OS_LINUX 1
|
||||
#elif defined(__native_client__)
|
||||
@ -72,6 +89,10 @@
|
||||
#define BENCHMARK_OS_SOLARIS 1
|
||||
#elif defined(__QNX__)
|
||||
#define BENCHMARK_OS_QNX 1
|
||||
#elif defined(__MVS__)
|
||||
#define BENCHMARK_OS_ZOS 1
|
||||
#elif defined(__hexagon__)
|
||||
#define BENCHMARK_OS_QURT 1
|
||||
#endif
|
||||
|
||||
#if defined(__ANDROID__) && defined(__GLIBCXX__)
|
||||
|
@ -12,9 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "complexity.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
@ -25,41 +22,61 @@
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "complexity.h"
|
||||
#include "string_util.h"
|
||||
#include "timers.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
namespace {
|
||||
|
||||
std::string StrEscape(const std::string & s) {
|
||||
std::string StrEscape(const std::string& s) {
|
||||
std::string tmp;
|
||||
tmp.reserve(s.size());
|
||||
for (char c : s) {
|
||||
switch (c) {
|
||||
case '\b': tmp += "\\b"; break;
|
||||
case '\f': tmp += "\\f"; break;
|
||||
case '\n': tmp += "\\n"; break;
|
||||
case '\r': tmp += "\\r"; break;
|
||||
case '\t': tmp += "\\t"; break;
|
||||
case '\\': tmp += "\\\\"; break;
|
||||
case '"' : tmp += "\\\""; break;
|
||||
default : tmp += c; break;
|
||||
case '\b':
|
||||
tmp += "\\b";
|
||||
break;
|
||||
case '\f':
|
||||
tmp += "\\f";
|
||||
break;
|
||||
case '\n':
|
||||
tmp += "\\n";
|
||||
break;
|
||||
case '\r':
|
||||
tmp += "\\r";
|
||||
break;
|
||||
case '\t':
|
||||
tmp += "\\t";
|
||||
break;
|
||||
case '\\':
|
||||
tmp += "\\\\";
|
||||
break;
|
||||
case '"':
|
||||
tmp += "\\\"";
|
||||
break;
|
||||
default:
|
||||
tmp += c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
std::string FormatKV(std::string const& key, std::string const& value) {
|
||||
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str());
|
||||
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
|
||||
StrEscape(value).c_str());
|
||||
}
|
||||
|
||||
std::string FormatKV(std::string const& key, const char* value) {
|
||||
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str());
|
||||
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
|
||||
StrEscape(value).c_str());
|
||||
}
|
||||
|
||||
std::string FormatKV(std::string const& key, bool value) {
|
||||
return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false");
|
||||
return StrFormat("\"%s\": %s", StrEscape(key).c_str(),
|
||||
value ? "true" : "false");
|
||||
}
|
||||
|
||||
std::string FormatKV(std::string const& key, int64_t value) {
|
||||
@ -68,12 +85,6 @@ std::string FormatKV(std::string const& key, int64_t value) {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string FormatKV(std::string const& key, IterationCount value) {
|
||||
std::stringstream ss;
|
||||
ss << '"' << StrEscape(key) << "\": " << value;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string FormatKV(std::string const& key, double value) {
|
||||
std::stringstream ss;
|
||||
ss << '"' << StrEscape(key) << "\": ";
|
||||
@ -123,7 +134,9 @@ bool JSONReporter::ReportContext(const Context& context) {
|
||||
RoundDouble(info.cycles_per_second / 1000000.0))
|
||||
<< ",\n";
|
||||
if (CPUInfo::Scaling::UNKNOWN != info.scaling) {
|
||||
out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false)
|
||||
out << indent
|
||||
<< FormatKV("cpu_scaling_enabled",
|
||||
info.scaling == CPUInfo::Scaling::ENABLED ? true : false)
|
||||
<< ",\n";
|
||||
}
|
||||
|
||||
@ -136,8 +149,8 @@ bool JSONReporter::ReportContext(const Context& context) {
|
||||
out << cache_indent << FormatKV("type", CI.type) << ",\n";
|
||||
out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
|
||||
<< ",\n";
|
||||
out << cache_indent
|
||||
<< FormatKV("size", static_cast<int64_t>(CI.size)) << ",\n";
|
||||
out << cache_indent << FormatKV("size", static_cast<int64_t>(CI.size))
|
||||
<< ",\n";
|
||||
out << cache_indent
|
||||
<< FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
|
||||
<< "\n";
|
||||
@ -159,7 +172,19 @@ bool JSONReporter::ReportContext(const Context& context) {
|
||||
#else
|
||||
const char build_type[] = "debug";
|
||||
#endif
|
||||
out << indent << FormatKV("library_build_type", build_type) << "\n";
|
||||
out << indent << FormatKV("library_build_type", build_type);
|
||||
|
||||
std::map<std::string, std::string>* global_context =
|
||||
internal::GetGlobalContext();
|
||||
|
||||
if (global_context != nullptr) {
|
||||
for (const auto& kv : *global_context) {
|
||||
out << ",\n";
|
||||
out << indent << FormatKV(kv.first, kv.second);
|
||||
}
|
||||
}
|
||||
out << "\n";
|
||||
|
||||
// Close context block and open the list of benchmarks.
|
||||
out << inner_indent << "},\n";
|
||||
out << inner_indent << "\"benchmarks\": [\n";
|
||||
@ -197,6 +222,10 @@ void JSONReporter::PrintRunData(Run const& run) {
|
||||
std::string indent(6, ' ');
|
||||
std::ostream& out = GetOutputStream();
|
||||
out << indent << FormatKV("name", run.benchmark_name()) << ",\n";
|
||||
out << indent << FormatKV("family_index", run.family_index) << ",\n";
|
||||
out << indent
|
||||
<< FormatKV("per_family_instance_index", run.per_family_instance_index)
|
||||
<< ",\n";
|
||||
out << indent << FormatKV("run_name", run.run_name.str()) << ",\n";
|
||||
out << indent << FormatKV("run_type", [&run]() -> const char* {
|
||||
switch (run.run_type) {
|
||||
@ -215,15 +244,36 @@ void JSONReporter::PrintRunData(Run const& run) {
|
||||
out << indent << FormatKV("threads", run.threads) << ",\n";
|
||||
if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) {
|
||||
out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n";
|
||||
out << indent << FormatKV("aggregate_unit", [&run]() -> const char* {
|
||||
switch (run.aggregate_unit) {
|
||||
case StatisticUnit::kTime:
|
||||
return "time";
|
||||
case StatisticUnit::kPercentage:
|
||||
return "percentage";
|
||||
}
|
||||
BENCHMARK_UNREACHABLE();
|
||||
}()) << ",\n";
|
||||
}
|
||||
if (run.error_occurred) {
|
||||
out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
|
||||
out << indent << FormatKV("error_message", run.error_message) << ",\n";
|
||||
if (internal::SkippedWithError == run.skipped) {
|
||||
out << indent << FormatKV("error_occurred", true) << ",\n";
|
||||
out << indent << FormatKV("error_message", run.skip_message) << ",\n";
|
||||
} else if (internal::SkippedWithMessage == run.skipped) {
|
||||
out << indent << FormatKV("skipped", true) << ",\n";
|
||||
out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
|
||||
}
|
||||
if (!run.report_big_o && !run.report_rms) {
|
||||
out << indent << FormatKV("iterations", run.iterations) << ",\n";
|
||||
out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n";
|
||||
out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
|
||||
if (run.run_type != Run::RT_Aggregate ||
|
||||
run.aggregate_unit == StatisticUnit::kTime) {
|
||||
out << indent << FormatKV("real_time", run.GetAdjustedRealTime())
|
||||
<< ",\n";
|
||||
out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
|
||||
} else {
|
||||
assert(run.aggregate_unit == StatisticUnit::kPercentage);
|
||||
out << indent << FormatKV("real_time", run.real_accumulated_time)
|
||||
<< ",\n";
|
||||
out << indent << FormatKV("cpu_time", run.cpu_accumulated_time);
|
||||
}
|
||||
out << ",\n"
|
||||
<< indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
|
||||
} else if (run.report_big_o) {
|
||||
@ -241,9 +291,21 @@ void JSONReporter::PrintRunData(Run const& run) {
|
||||
out << ",\n" << indent << FormatKV(c.first, c.second);
|
||||
}
|
||||
|
||||
if (run.has_memory_result) {
|
||||
if (run.memory_result) {
|
||||
const MemoryManager::Result memory_result = *run.memory_result;
|
||||
out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter);
|
||||
out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used);
|
||||
out << ",\n"
|
||||
<< indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);
|
||||
|
||||
auto report_if_present = [&out, &indent](const std::string& label,
|
||||
int64_t val) {
|
||||
if (val != MemoryManager::TombstoneValue)
|
||||
out << ",\n" << indent << FormatKV(label, val);
|
||||
};
|
||||
|
||||
report_if_present("total_allocated_bytes",
|
||||
memory_result.total_allocated_bytes);
|
||||
report_if_present("net_heap_growth", memory_result.net_heap_growth);
|
||||
}
|
||||
|
||||
if (!run.report_label.empty()) {
|
||||
@ -252,4 +314,7 @@ void JSONReporter::PrintRunData(Run const& run) {
|
||||
out << '\n';
|
||||
}
|
||||
|
||||
const int64_t MemoryManager::TombstoneValue =
|
||||
std::numeric_limits<int64_t>::max();
|
||||
|
||||
} // end namespace benchmark
|
||||
|
28
src/log.h
28
src/log.h
@ -4,7 +4,12 @@
|
||||
#include <iostream>
|
||||
#include <ostream>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
// NOTE: this is also defined in benchmark.h but we're trying to avoid a
|
||||
// dependency.
|
||||
// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
|
||||
#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
|
||||
#define BENCHMARK_HAS_CXX11
|
||||
#endif
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
@ -23,7 +28,16 @@ class LogType {
|
||||
private:
|
||||
LogType(std::ostream* out) : out_(out) {}
|
||||
std::ostream* out_;
|
||||
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
|
||||
|
||||
// NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have
|
||||
// a dependency on benchmark.h from here.
|
||||
#ifndef BENCHMARK_HAS_CXX11
|
||||
LogType(const LogType&);
|
||||
LogType& operator=(const LogType&);
|
||||
#else
|
||||
LogType(const LogType&) = delete;
|
||||
LogType& operator=(const LogType&) = delete;
|
||||
#endif
|
||||
};
|
||||
|
||||
template <class Tp>
|
||||
@ -47,13 +61,13 @@ inline int& LogLevel() {
|
||||
}
|
||||
|
||||
inline LogType& GetNullLogInstance() {
|
||||
static LogType log(nullptr);
|
||||
return log;
|
||||
static LogType null_log(static_cast<std::ostream*>(nullptr));
|
||||
return null_log;
|
||||
}
|
||||
|
||||
inline LogType& GetErrorLogInstance() {
|
||||
static LogType log(&std::clog);
|
||||
return log;
|
||||
static LogType error_log(&std::clog);
|
||||
return error_log;
|
||||
}
|
||||
|
||||
inline LogType& GetLogInstanceForLevel(int level) {
|
||||
@ -67,7 +81,7 @@ inline LogType& GetLogInstanceForLevel(int level) {
|
||||
} // end namespace benchmark
|
||||
|
||||
// clang-format off
|
||||
#define VLOG(x) \
|
||||
#define BM_VLOG(x) \
|
||||
(::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \
|
||||
" ")
|
||||
// clang-format on
|
||||
|
44
src/mutex.h
44
src/mutex.h
@ -9,60 +9,60 @@
|
||||
// Enable thread safety attributes only with clang.
|
||||
// The attributes can be safely erased when compiling with other compilers.
|
||||
#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
|
||||
#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
|
||||
#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x))
|
||||
#else
|
||||
#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op
|
||||
#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op
|
||||
#endif
|
||||
|
||||
#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
|
||||
#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x))
|
||||
|
||||
#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
|
||||
#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable)
|
||||
|
||||
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
|
||||
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x))
|
||||
|
||||
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
|
||||
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x))
|
||||
|
||||
#define ACQUIRED_BEFORE(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__))
|
||||
|
||||
#define ACQUIRED_AFTER(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__))
|
||||
|
||||
#define REQUIRES(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__))
|
||||
|
||||
#define REQUIRES_SHARED(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__))
|
||||
|
||||
#define ACQUIRE(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__))
|
||||
|
||||
#define ACQUIRE_SHARED(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__))
|
||||
|
||||
#define RELEASE(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__))
|
||||
|
||||
#define RELEASE_SHARED(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__))
|
||||
|
||||
#define TRY_ACQUIRE(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__))
|
||||
|
||||
#define TRY_ACQUIRE_SHARED(...) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__))
|
||||
|
||||
#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
|
||||
#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__))
|
||||
|
||||
#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
|
||||
#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x))
|
||||
|
||||
#define ASSERT_SHARED_CAPABILITY(x) \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x))
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x))
|
||||
|
||||
#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
|
||||
#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x))
|
||||
|
||||
#define NO_THREAD_SAFETY_ANALYSIS \
|
||||
THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
|
||||
THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis)
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
@ -130,7 +130,7 @@ class Barrier {
|
||||
// entered the barrier. Returns iff this is the last thread to
|
||||
// enter the barrier.
|
||||
bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
|
||||
CHECK_LT(entered_, running_threads_);
|
||||
BM_CHECK_LT(entered_, running_threads_);
|
||||
entered_++;
|
||||
if (entered_ < running_threads_) {
|
||||
// Wait for all threads to enter
|
||||
|
282
src/perf_counters.cc
Normal file
282
src/perf_counters.cc
Normal file
@ -0,0 +1,282 @@
|
||||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "perf_counters.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#if defined HAVE_LIBPFM
|
||||
#include "perfmon/pfmlib.h"
|
||||
#include "perfmon/pfmlib_perf_event.h"
|
||||
#endif
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
constexpr size_t PerfCounterValues::kMaxCounters;
|
||||
|
||||
#if defined HAVE_LIBPFM
|
||||
|
||||
size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
|
||||
// Create a pointer for multiple reads
|
||||
const size_t bufsize = values_.size() * sizeof(values_[0]);
|
||||
char* ptr = reinterpret_cast<char*>(values_.data());
|
||||
size_t size = bufsize;
|
||||
for (int lead : leaders) {
|
||||
auto read_bytes = ::read(lead, ptr, size);
|
||||
if (read_bytes >= ssize_t(sizeof(uint64_t))) {
|
||||
// Actual data bytes are all bytes minus initial padding
|
||||
std::size_t data_bytes = read_bytes - sizeof(uint64_t);
|
||||
// This should be very cheap since it's in hot cache
|
||||
std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
|
||||
// Increment our counters
|
||||
ptr += data_bytes;
|
||||
size -= data_bytes;
|
||||
} else {
|
||||
int err = errno;
|
||||
GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
|
||||
<< " " << ::strerror(err) << "\n";
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return (bufsize - size) / sizeof(uint64_t);
|
||||
}
|
||||
|
||||
const bool PerfCounters::kSupported = true;
|
||||
|
||||
// Initializes libpfm only on the first call. Returns whether that single
|
||||
// initialization was successful.
|
||||
bool PerfCounters::Initialize() {
|
||||
// Function-scope static gets initialized only once on first call.
|
||||
static const bool success = []() {
|
||||
return pfm_initialize() == PFM_SUCCESS;
|
||||
}();
|
||||
return success;
|
||||
}
|
||||
|
||||
bool PerfCounters::IsCounterSupported(const std::string& name) {
|
||||
Initialize();
|
||||
perf_event_attr_t attr;
|
||||
std::memset(&attr, 0, sizeof(attr));
|
||||
pfm_perf_encode_arg_t arg;
|
||||
std::memset(&arg, 0, sizeof(arg));
|
||||
arg.attr = &attr;
|
||||
const int mode = PFM_PLM3; // user mode only
|
||||
int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
|
||||
&arg);
|
||||
return (ret == PFM_SUCCESS);
|
||||
}
|
||||
|
||||
PerfCounters PerfCounters::Create(
|
||||
const std::vector<std::string>& counter_names) {
|
||||
if (!counter_names.empty()) {
|
||||
Initialize();
|
||||
}
|
||||
|
||||
// Valid counters will populate these arrays but we start empty
|
||||
std::vector<std::string> valid_names;
|
||||
std::vector<int> counter_ids;
|
||||
std::vector<int> leader_ids;
|
||||
|
||||
// Resize to the maximum possible
|
||||
valid_names.reserve(counter_names.size());
|
||||
counter_ids.reserve(counter_names.size());
|
||||
|
||||
const int kCounterMode = PFM_PLM3; // user mode only
|
||||
|
||||
// Group leads will be assigned on demand. The idea is that once we cannot
|
||||
// create a counter descriptor, the reason is that this group has maxed out
|
||||
// so we set the group_id again to -1 and retry - giving the algorithm a
|
||||
// chance to create a new group leader to hold the next set of counters.
|
||||
int group_id = -1;
|
||||
|
||||
// Loop through all performance counters
|
||||
for (size_t i = 0; i < counter_names.size(); ++i) {
|
||||
// we are about to push into the valid names vector
|
||||
// check if we did not reach the maximum
|
||||
if (valid_names.size() == PerfCounterValues::kMaxCounters) {
|
||||
// Log a message if we maxed out and stop adding
|
||||
GetErrorLogInstance()
|
||||
<< counter_names.size() << " counters were requested. The maximum is "
|
||||
<< PerfCounterValues::kMaxCounters << " and " << valid_names.size()
|
||||
<< " were already added. All remaining counters will be ignored\n";
|
||||
// stop the loop and return what we have already
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if this name is empty
|
||||
const auto& name = counter_names[i];
|
||||
if (name.empty()) {
|
||||
GetErrorLogInstance()
|
||||
<< "A performance counter name was the empty string\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
// Here first means first in group, ie the group leader
|
||||
const bool is_first = (group_id < 0);
|
||||
|
||||
// This struct will be populated by libpfm from the counter string
|
||||
// and then fed into the syscall perf_event_open
|
||||
struct perf_event_attr attr {};
|
||||
attr.size = sizeof(attr);
|
||||
|
||||
// This is the input struct to libpfm.
|
||||
pfm_perf_encode_arg_t arg{};
|
||||
arg.attr = &attr;
|
||||
const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
|
||||
PFM_OS_PERF_EVENT, &arg);
|
||||
if (pfm_get != PFM_SUCCESS) {
|
||||
GetErrorLogInstance()
|
||||
<< "Unknown performance counter name: " << name << "\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
// We then proceed to populate the remaining fields in our attribute struct
|
||||
// Note: the man page for perf_event_create suggests inherit = true and
|
||||
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
|
||||
// case.
|
||||
attr.disabled = is_first;
|
||||
attr.inherit = true;
|
||||
attr.pinned = is_first;
|
||||
attr.exclude_kernel = true;
|
||||
attr.exclude_user = false;
|
||||
attr.exclude_hv = true;
|
||||
|
||||
// Read all counters in a group in one read.
|
||||
attr.read_format = PERF_FORMAT_GROUP;
|
||||
|
||||
int id = -1;
|
||||
while (id < 0) {
|
||||
static constexpr size_t kNrOfSyscallRetries = 5;
|
||||
// Retry syscall as it was interrupted often (b/64774091).
|
||||
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
|
||||
++num_retries) {
|
||||
id = perf_event_open(&attr, 0, -1, group_id, 0);
|
||||
if (id >= 0 || errno != EINTR) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (id < 0) {
|
||||
// If the file descriptor is negative we might have reached a limit
|
||||
// in the current group. Set the group_id to -1 and retry
|
||||
if (group_id >= 0) {
|
||||
// Create a new group
|
||||
group_id = -1;
|
||||
} else {
|
||||
// At this point we have already retried to set a new group id and
|
||||
// failed. We then give up.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We failed to get a new file descriptor. We might have reached a hard
|
||||
// hardware limit that cannot be resolved even with group multiplexing
|
||||
if (id < 0) {
|
||||
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
|
||||
"for performance counter "
|
||||
<< name << ". Ignoring\n";
|
||||
|
||||
// We give up on this counter but try to keep going
|
||||
// as the others would be fine
|
||||
continue;
|
||||
}
|
||||
if (group_id < 0) {
|
||||
// This is a leader, store and assign it to the current file descriptor
|
||||
leader_ids.push_back(id);
|
||||
group_id = id;
|
||||
}
|
||||
// This is a valid counter, add it to our descriptor's list
|
||||
counter_ids.push_back(id);
|
||||
valid_names.push_back(name);
|
||||
}
|
||||
|
||||
// Loop through all group leaders activating them
|
||||
// There is another option of starting ALL counters in a process but
|
||||
// that would be far reaching an intrusion. If the user is using PMCs
|
||||
// by themselves then this would have a side effect on them. It is
|
||||
// friendlier to loop through all groups individually.
|
||||
for (int lead : leader_ids) {
|
||||
if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
|
||||
// This should never happen but if it does, we give up on the
|
||||
// entire batch as recovery would be a mess.
|
||||
GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
|
||||
"Claring out all counters.\n";
|
||||
|
||||
// Close all peformance counters
|
||||
for (int id : counter_ids) {
|
||||
::close(id);
|
||||
}
|
||||
|
||||
// Return an empty object so our internal state is still good and
|
||||
// the process can continue normally without impact
|
||||
return NoCounters();
|
||||
}
|
||||
}
|
||||
|
||||
return PerfCounters(std::move(valid_names), std::move(counter_ids),
|
||||
std::move(leader_ids));
|
||||
}
|
||||
|
||||
void PerfCounters::CloseCounters() const {
|
||||
if (counter_ids_.empty()) {
|
||||
return;
|
||||
}
|
||||
for (int lead : leader_ids_) {
|
||||
ioctl(lead, PERF_EVENT_IOC_DISABLE);
|
||||
}
|
||||
for (int fd : counter_ids_) {
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
#else // defined HAVE_LIBPFM
|
||||
size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
|
||||
|
||||
const bool PerfCounters::kSupported = false;
|
||||
|
||||
bool PerfCounters::Initialize() { return false; }
|
||||
|
||||
bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
|
||||
|
||||
PerfCounters PerfCounters::Create(
|
||||
const std::vector<std::string>& counter_names) {
|
||||
if (!counter_names.empty()) {
|
||||
GetErrorLogInstance() << "Performance counters not supported.";
|
||||
}
|
||||
return NoCounters();
|
||||
}
|
||||
|
||||
void PerfCounters::CloseCounters() const {}
|
||||
#endif // defined HAVE_LIBPFM
|
||||
|
||||
PerfCountersMeasurement::PerfCountersMeasurement(
|
||||
const std::vector<std::string>& counter_names)
|
||||
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
|
||||
counters_ = PerfCounters::Create(counter_names);
|
||||
}
|
||||
|
||||
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
|
||||
if (this != &other) {
|
||||
CloseCounters();
|
||||
|
||||
counter_ids_ = std::move(other.counter_ids_);
|
||||
leader_ids_ = std::move(other.leader_ids_);
|
||||
counter_names_ = std::move(other.counter_names_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace benchmark
|
200
src/perf_counters.h
Normal file
200
src/perf_counters.h
Normal file
@ -0,0 +1,200 @@
|
||||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef BENCHMARK_PERF_COUNTERS_H
|
||||
#define BENCHMARK_PERF_COUNTERS_H
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "check.h"
|
||||
#include "log.h"
|
||||
#include "mutex.h"
|
||||
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(push)
|
||||
// C4251: <symbol> needs to have dll-interface to be used by clients of class
|
||||
#pragma warning(disable : 4251)
|
||||
#endif
|
||||
|
||||
namespace benchmark {
|
||||
namespace internal {
|
||||
|
||||
// Typically, we can only read a small number of counters. There is also a
|
||||
// padding preceding counter values, when reading multiple counters with one
|
||||
// syscall (which is desirable). PerfCounterValues abstracts these details.
|
||||
// The implementation ensures the storage is inlined, and allows 0-based
|
||||
// indexing into the counter values.
|
||||
// The object is used in conjunction with a PerfCounters object, by passing it
|
||||
// to Snapshot(). The Read() method relocates individual reads, discarding
|
||||
// the initial padding from each group leader in the values buffer such that
|
||||
// all user accesses through the [] operator are correct.
|
||||
class BENCHMARK_EXPORT PerfCounterValues {
|
||||
public:
|
||||
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
|
||||
BM_CHECK_LE(nr_counters_, kMaxCounters);
|
||||
}
|
||||
|
||||
// We are reading correctly now so the values don't need to skip padding
|
||||
uint64_t operator[](size_t pos) const { return values_[pos]; }
|
||||
|
||||
// Increased the maximum to 32 only since the buffer
|
||||
// is std::array<> backed
|
||||
static constexpr size_t kMaxCounters = 32;
|
||||
|
||||
private:
|
||||
friend class PerfCounters;
|
||||
// Get the byte buffer in which perf counters can be captured.
|
||||
// This is used by PerfCounters::Read
|
||||
std::pair<char*, size_t> get_data_buffer() {
|
||||
return {reinterpret_cast<char*>(values_.data()),
|
||||
sizeof(uint64_t) * (kPadding + nr_counters_)};
|
||||
}
|
||||
|
||||
// This reading is complex and as the goal of this class is to
|
||||
// abstract away the intrincacies of the reading process, this is
|
||||
// a better place for it
|
||||
size_t Read(const std::vector<int>& leaders);
|
||||
|
||||
// Move the padding to 2 due to the reading algorithm (1st padding plus a
|
||||
// current read padding)
|
||||
static constexpr size_t kPadding = 2;
|
||||
std::array<uint64_t, kPadding + kMaxCounters> values_;
|
||||
const size_t nr_counters_;
|
||||
};
|
||||
|
||||
// Collect PMU counters. The object, once constructed, is ready to be used by
|
||||
// calling read(). PMU counter collection is enabled from the time create() is
|
||||
// called, to obtain the object, until the object's destructor is called.
|
||||
class BENCHMARK_EXPORT PerfCounters final {
|
||||
public:
|
||||
// True iff this platform supports performance counters.
|
||||
static const bool kSupported;
|
||||
|
||||
// Returns an empty object
|
||||
static PerfCounters NoCounters() { return PerfCounters(); }
|
||||
|
||||
~PerfCounters() { CloseCounters(); }
|
||||
PerfCounters() = default;
|
||||
PerfCounters(PerfCounters&&) = default;
|
||||
PerfCounters(const PerfCounters&) = delete;
|
||||
PerfCounters& operator=(PerfCounters&&) noexcept;
|
||||
PerfCounters& operator=(const PerfCounters&) = delete;
|
||||
|
||||
// Platform-specific implementations may choose to do some library
|
||||
// initialization here.
|
||||
static bool Initialize();
|
||||
|
||||
// Check if the given counter is supported, if the app wants to
|
||||
// check before passing
|
||||
static bool IsCounterSupported(const std::string& name);
|
||||
|
||||
// Return a PerfCounters object ready to read the counters with the names
|
||||
// specified. The values are user-mode only. The counter name format is
|
||||
// implementation and OS specific.
|
||||
// In case of failure, this method will in the worst case return an
|
||||
// empty object whose state will still be valid.
|
||||
static PerfCounters Create(const std::vector<std::string>& counter_names);
|
||||
|
||||
// Take a snapshot of the current value of the counters into the provided
|
||||
// valid PerfCounterValues storage. The values are populated such that:
|
||||
// names()[i]'s value is (*values)[i]
|
||||
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
|
||||
#ifndef BENCHMARK_OS_WINDOWS
|
||||
assert(values != nullptr);
|
||||
return values->Read(leader_ids_) == counter_ids_.size();
|
||||
#else
|
||||
(void)values;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
const std::vector<std::string>& names() const { return counter_names_; }
|
||||
size_t num_counters() const { return counter_names_.size(); }
|
||||
|
||||
private:
|
||||
PerfCounters(const std::vector<std::string>& counter_names,
|
||||
std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
|
||||
: counter_ids_(std::move(counter_ids)),
|
||||
leader_ids_(std::move(leader_ids)),
|
||||
counter_names_(counter_names) {}
|
||||
|
||||
void CloseCounters() const;
|
||||
|
||||
std::vector<int> counter_ids_;
|
||||
std::vector<int> leader_ids_;
|
||||
std::vector<std::string> counter_names_;
|
||||
};
|
||||
|
||||
// Typical usage of the above primitives.
|
||||
class BENCHMARK_EXPORT PerfCountersMeasurement final {
|
||||
public:
|
||||
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
|
||||
|
||||
size_t num_counters() const { return counters_.num_counters(); }
|
||||
|
||||
std::vector<std::string> names() const { return counters_.names(); }
|
||||
|
||||
BENCHMARK_ALWAYS_INLINE bool Start() {
|
||||
if (num_counters() == 0) return true;
|
||||
// Tell the compiler to not move instructions above/below where we take
|
||||
// the snapshot.
|
||||
ClobberMemory();
|
||||
valid_read_ &= counters_.Snapshot(&start_values_);
|
||||
ClobberMemory();
|
||||
|
||||
return valid_read_;
|
||||
}
|
||||
|
||||
BENCHMARK_ALWAYS_INLINE bool Stop(
|
||||
std::vector<std::pair<std::string, double>>& measurements) {
|
||||
if (num_counters() == 0) return true;
|
||||
// Tell the compiler to not move instructions above/below where we take
|
||||
// the snapshot.
|
||||
ClobberMemory();
|
||||
valid_read_ &= counters_.Snapshot(&end_values_);
|
||||
ClobberMemory();
|
||||
|
||||
for (size_t i = 0; i < counters_.names().size(); ++i) {
|
||||
double measurement = static_cast<double>(end_values_[i]) -
|
||||
static_cast<double>(start_values_[i]);
|
||||
measurements.push_back({counters_.names()[i], measurement});
|
||||
}
|
||||
|
||||
return valid_read_;
|
||||
}
|
||||
|
||||
private:
|
||||
PerfCounters counters_;
|
||||
bool valid_read_ = true;
|
||||
PerfCounterValues start_values_;
|
||||
PerfCounterValues end_values_;
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace benchmark
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#endif // BENCHMARK_PERF_COUNTERS_H
|
4
src/re.h
4
src/re.h
@ -33,7 +33,7 @@
|
||||
// Prefer C regex libraries when compiling w/o exceptions so that we can
|
||||
// correctly report errors.
|
||||
#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \
|
||||
defined(BENCHMARK_HAVE_STD_REGEX) && \
|
||||
defined(HAVE_STD_REGEX) && \
|
||||
(defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
|
||||
#undef HAVE_STD_REGEX
|
||||
#endif
|
||||
@ -126,7 +126,7 @@ inline bool Regex::Init(const std::string& spec, std::string* error) {
|
||||
|
||||
// regerror returns the number of bytes necessary to null terminate
|
||||
// the string, so we move that when assigning to error.
|
||||
CHECK_NE(needed, 0);
|
||||
BM_CHECK_NE(needed, 0);
|
||||
error->assign(errbuf, needed - 1);
|
||||
|
||||
delete[] errbuf;
|
||||
|
@ -12,17 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "timers.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "check.h"
|
||||
#include "string_util.h"
|
||||
#include "timers.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
@ -33,10 +33,14 @@ BenchmarkReporter::~BenchmarkReporter() {}
|
||||
|
||||
void BenchmarkReporter::PrintBasicContext(std::ostream *out,
|
||||
Context const &context) {
|
||||
CHECK(out) << "cannot be null";
|
||||
BM_CHECK(out) << "cannot be null";
|
||||
auto &Out = *out;
|
||||
|
||||
#ifndef BENCHMARK_OS_QURT
|
||||
// Date/time information is not available on QuRT.
|
||||
// Attempting to get it via this call cause the binary to crash.
|
||||
Out << LocalDateTimeString() << "\n";
|
||||
#endif
|
||||
|
||||
if (context.executable_name)
|
||||
Out << "Running " << context.executable_name << "\n";
|
||||
@ -64,6 +68,15 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
|
||||
Out << "\n";
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> *global_context =
|
||||
internal::GetGlobalContext();
|
||||
|
||||
if (global_context != nullptr) {
|
||||
for (const auto &kv : *global_context) {
|
||||
Out << kv.first << ": " << kv.second << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (CPUInfo::Scaling::ENABLED == info.scaling) {
|
||||
Out << "***WARNING*** CPU scaling is enabled, the benchmark "
|
||||
"real time measurements may be noisy and will incur extra "
|
||||
|
51
src/sleep.cc
51
src/sleep.cc
@ -1,51 +0,0 @@
|
||||
// Copyright 2015 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "sleep.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
|
||||
#include "internal_macros.h"
|
||||
|
||||
#ifdef BENCHMARK_OS_WINDOWS
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
namespace benchmark {
|
||||
#ifdef BENCHMARK_OS_WINDOWS
|
||||
// Window's Sleep takes milliseconds argument.
|
||||
void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
|
||||
void SleepForSeconds(double seconds) {
|
||||
SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
|
||||
}
|
||||
#else // BENCHMARK_OS_WINDOWS
|
||||
void SleepForMicroseconds(int microseconds) {
|
||||
struct timespec sleep_time;
|
||||
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
|
||||
sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
|
||||
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
|
||||
; // Ignore signals and wait for the full interval to elapse.
|
||||
}
|
||||
|
||||
void SleepForMilliseconds(int milliseconds) {
|
||||
SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
|
||||
}
|
||||
|
||||
void SleepForSeconds(double seconds) {
|
||||
SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
|
||||
}
|
||||
#endif // BENCHMARK_OS_WINDOWS
|
||||
} // end namespace benchmark
|
15
src/sleep.h
15
src/sleep.h
@ -1,15 +0,0 @@
|
||||
#ifndef BENCHMARK_SLEEP_H_
|
||||
#define BENCHMARK_SLEEP_H_
|
||||
|
||||
namespace benchmark {
|
||||
const int kNumMillisPerSecond = 1000;
|
||||
const int kNumMicrosPerMilli = 1000;
|
||||
const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
|
||||
const int kNumNanosPerMicro = 1000;
|
||||
const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
|
||||
|
||||
void SleepForMilliseconds(int milliseconds);
|
||||
void SleepForSeconds(double seconds);
|
||||
} // end namespace benchmark
|
||||
|
||||
#endif // BENCHMARK_SLEEP_H_
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user