1
0
mirror of https://github.com/google/benchmark.git synced 2025-04-29 22:40:33 +08:00

Merge branch 'google:main' into master

This commit is contained in:
krzikalla 2023-09-15 15:43:08 +02:00 committed by GitHub
commit a75f6cfffc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
167 changed files with 10698 additions and 3553 deletions
.clang-tidy
.github
.gitignore.travis-libcxx-setup.sh.travis.ymlAUTHORSBUILD.bazelCMakeLists.txtCONTRIBUTORSMODULE.bazelREADME.mdWORKSPACEWORKSPACE.bzlmod_config.yml
bazel
bindings/python
cmake
conan
conanfile.pydependencies.md
docs
include/benchmark
pyproject.tomlsetup.py
src

7
.clang-tidy Normal file
View File

@ -0,0 +1,7 @@
---
Checks: 'clang-analyzer-*,readability-redundant-*,performance-*'
WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*'
HeaderFilterRegex: '.*'
AnalyzeTemporaryDtors: false
FormatStyle: none
User: user

13
.github/install_bazel.sh vendored Normal file
View File

@ -0,0 +1,13 @@
if ! bazel version; then
arch=$(uname -m)
if [ "$arch" == "aarch64" ]; then
arch="arm64"
fi
echo "Installing wget and downloading $arch Bazel binary from GitHub releases."
yum install -y wget
wget "https://github.com/bazelbuild/bazel/releases/download/6.3.0/bazel-6.3.0-linux-$arch" -O /usr/local/bin/bazel
chmod +x /usr/local/bin/bazel
else
# bazel is installed for the correct architecture
exit 0
fi

26
.github/libcxx-setup.sh vendored Executable file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -e
# Checkout LLVM sources
git clone --depth=1 https://github.com/llvm/llvm-project.git llvm-project
## Setup libc++ options
if [ -z "$BUILD_32_BITS" ]; then
export BUILD_32_BITS=OFF && echo disabling 32 bit build
fi
## Build and install libc++ (Use unstable ABI for better sanitizer coverage)
mkdir llvm-build && cd llvm-build
cmake -DCMAKE_C_COMPILER=${CC} \
-DCMAKE_CXX_COMPILER=${CXX} \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_INSTALL_PREFIX=/usr \
-DLIBCXX_ABI_UNSTABLE=OFF \
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
-DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi;libunwind' \
-G "Unix Makefiles" \
../llvm-project/runtimes/
make -j cxx cxxabi unwind
cd ..

35
.github/workflows/bazel.yml vendored Normal file
View File

@ -0,0 +1,35 @@
name: bazel
on:
push: {}
pull_request: {}
jobs:
build_and_test_default:
name: bazel.${{ matrix.os }}.${{ matrix.bzlmod && 'bzlmod' || 'no_bzlmod' }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
bzlmod: [false, true]
steps:
- uses: actions/checkout@v3
- name: mount bazel cache
uses: actions/cache@v3
env:
cache-name: bazel-cache
with:
path: "~/.cache/bazel"
key: ${{ env.cache-name }}-${{ matrix.os }}-${{ github.ref }}
restore-keys: |
${{ env.cache-name }}-${{ matrix.os }}-main
- name: build
run: |
bazel build ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} //:benchmark //:benchmark_main //test/...
- name: test
run: |
bazel test ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} --test_output=all //test/...

View File

@ -0,0 +1,46 @@
name: build-and-test-min-cmake
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
job:
name: ${{ matrix.os }}.min-cmake
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v3
- uses: lukka/get-cmake@latest
with:
cmakeVersion: 3.10.0
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: setup cmake initial cache
run: touch compiler-cache.cmake
- name: configure cmake
env:
CXX: ${{ matrix.compiler }}
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
cmake -C ${{ github.workspace }}/compiler-cache.cmake
$GITHUB_WORKSPACE
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_CXX_VISIBILITY_PRESET=hidden
-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build .

View File

@ -0,0 +1,51 @@
name: build-and-test-perfcounters
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
job:
# TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
name: ${{ matrix.os }}.${{ matrix.build_type }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04, ubuntu-20.04]
build_type: ['Release', 'Debug']
steps:
- uses: actions/checkout@v3
- name: install libpfm
run: |
sudo apt update
sudo apt -y install libpfm4-dev
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
cmake $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_LIBPFM=1
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build . --config ${{ matrix.build_type }}
# Skip testing, for now. It seems perf_event_open does not succeed on the
# hosting machine, very likely a permissions issue.
# TODO(mtrofin): Enable test.
# - name: test
# shell: bash
# working-directory: ${{ runner.workspace }}/_build
# run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure

View File

@ -2,37 +2,113 @@ name: build-and-test
on:
push:
branches: [ master ]
branches: [ main ]
pull_request:
branches: [ master ]
branches: [ main ]
jobs:
# TODO: add 32-bit builds (g++ and clang++) for ubuntu
# (requires g++-multilib and libc6:i386)
# TODO: add coverage build (requires lcov)
# TODO: add clang + libc++ builds for ubuntu
job:
# TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
name: ${{ matrix.os }}.${{ matrix.build_type }}
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.compiler }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04, macos-latest, windows-latest]
os: [ubuntu-22.04, ubuntu-20.04, macos-latest]
build_type: ['Release', 'Debug']
compiler: ['g++', 'clang++']
lib: ['shared', 'static']
steps:
- uses: actions/checkout@v2
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build . --config ${{ matrix.build_type }}
- name: test
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: ctest -C ${{ matrix.build_type }}
- uses: actions/checkout@v3
- uses: lukka/get-cmake@latest
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: setup cmake initial cache
run: touch compiler-cache.cmake
- name: configure cmake
env:
CXX: ${{ matrix.compiler }}
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
cmake -C ${{ github.workspace }}/compiler-cache.cmake
$GITHUB_WORKSPACE
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-DCMAKE_CXX_COMPILER=${{ env.CXX }}
-DCMAKE_CXX_VISIBILITY_PRESET=hidden
-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build . --config ${{ matrix.build_type }}
- name: test
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: ctest -C ${{ matrix.build_type }} -VV
msvc:
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msvc }}
runs-on: ${{ matrix.os }}
defaults:
run:
shell: powershell
strategy:
fail-fast: false
matrix:
msvc:
- VS-16-2019
- VS-17-2022
arch:
- x64
build_type:
- Debug
- Release
lib:
- shared
- static
include:
- msvc: VS-16-2019
os: windows-2019
generator: 'Visual Studio 16 2019'
- msvc: VS-17-2022
os: windows-2022
generator: 'Visual Studio 17 2022'
steps:
- uses: actions/checkout@v2
- uses: lukka/get-cmake@latest
- name: configure cmake
run: >
cmake -S . -B _build/
-A ${{ matrix.arch }}
-G "${{ matrix.generator }}"
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
- name: build
run: cmake --build _build/ --config ${{ matrix.build_type }}
- name: setup test environment
# Make sure gmock and benchmark DLLs can be found
run: >
echo "$((Get-Item .).FullName)/_build/bin/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append;
echo "$((Get-Item .).FullName)/_build/src/${{ matrix.build_type }}" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append;
- name: test
run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV

17
.github/workflows/clang-format-lint.yml vendored Normal file
View File

@ -0,0 +1,17 @@
name: clang-format-lint
on:
push: {}
pull_request: {}
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: DoozyX/clang-format-lint-action@v0.13
with:
source: './include/benchmark ./src ./test'
extensions: 'h,cc'
clangFormatVersion: 12
style: Google

38
.github/workflows/clang-tidy.yml vendored Normal file
View File

@ -0,0 +1,38 @@
name: clang-tidy
on:
push: {}
pull_request: {}
jobs:
job:
name: run-clang-tidy
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- uses: actions/checkout@v3
- name: install clang-tidy
run: sudo apt update && sudo apt -y install clang-tidy
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
cmake $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
-DBENCHMARK_ENABLE_LIBPFM=OFF
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_C_COMPILER=clang
-DCMAKE_CXX_COMPILER=clang++
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DGTEST_COMPILE_COMMANDS=OFF
- name: run
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: run-clang-tidy

28
.github/workflows/doxygen.yml vendored Normal file
View File

@ -0,0 +1,28 @@
name: doxygen
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
build-and-deploy:
name: Build HTML documentation
runs-on: ubuntu-latest
steps:
- name: Fetching sources
uses: actions/checkout@v3
- name: Installing build dependencies
run: |
sudo apt update
sudo apt install doxygen gcc git
- name: Creating build directory
run: mkdir build
- name: Building HTML documentation with Doxygen
run: |
cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON
cmake --build build --target benchmark_doxygen

28
.github/workflows/pylint.yml vendored Normal file
View File

@ -0,0 +1,28 @@
name: pylint
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
pylint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.8
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint pylint-exit conan
- name: Run pylint
run: |
pylint `find . -name '*.py'|xargs` || pylint-exit $?

96
.github/workflows/sanitizer.yml vendored Normal file
View File

@ -0,0 +1,96 @@
name: sanitizer
on:
push: {}
pull_request: {}
env:
UBSAN_OPTIONS: "print_stacktrace=1"
jobs:
job:
name: ${{ matrix.sanitizer }}.${{ matrix.build_type }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
build_type: ['Debug', 'RelWithDebInfo']
sanitizer: ['asan', 'ubsan', 'tsan', 'msan']
steps:
- uses: actions/checkout@v3
- name: configure msan env
if: matrix.sanitizer == 'msan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV
- name: configure ubsan env
if: matrix.sanitizer == 'ubsan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV
- name: configure asan env
if: matrix.sanitizer == 'asan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV
- name: configure tsan env
if: matrix.sanitizer == 'tsan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV
- name: fine-tune asan options
# in asan we get an error from std::regex. ignore it.
if: matrix.sanitizer == 'asan'
run: |
echo "ASAN_OPTIONS=alloc_dealloc_mismatch=0" >> $GITHUB_ENV
- name: setup clang
uses: egor-tensin/setup-clang@v1
with:
version: latest
platform: x64
- name: configure clang
run: |
echo "CC=cc" >> $GITHUB_ENV
echo "CXX=c++" >> $GITHUB_ENV
- name: build libc++ (non-asan)
if: matrix.sanitizer != 'asan'
run: |
"${GITHUB_WORKSPACE}/.github/libcxx-setup.sh"
echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L ${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -Isystem${GITHUB_WORKSPACE}/llvm-build/include -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
VERBOSE=1
cmake $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
-DBENCHMARK_ENABLE_LIBPFM=OFF
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_C_COMPILER=${{ env.CC }}
-DCMAKE_CXX_COMPILER=${{ env.CXX }}
-DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}"
-DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}"
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build . --config ${{ matrix.build_type }}
- name: test
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: ctest -C ${{ matrix.build_type }} -VV

29
.github/workflows/test_bindings.yml vendored Normal file
View File

@ -0,0 +1,29 @@
name: test-bindings
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
python_bindings:
name: Test GBM Python bindings on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest, macos-latest, windows-2019 ]
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Install GBM Python bindings on ${{ matrix.os}}
run:
python -m pip install wheel .
- name: Run bindings example on ${{ matrix.os }}
run:
python bindings/python/google_benchmark/example.py

79
.github/workflows/wheels.yml vendored Normal file
View File

@ -0,0 +1,79 @@
name: Build and upload Python wheels
on:
workflow_dispatch:
release:
types:
- published
jobs:
build_sdist:
name: Build source distribution
runs-on: ubuntu-latest
steps:
- name: Check out repo
uses: actions/checkout@v3
- name: Install Python 3.11
uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Build and check sdist
run: |
python setup.py sdist
- name: Upload sdist
uses: actions/upload-artifact@v3
with:
name: dist
path: dist/*.tar.gz
build_wheels:
name: Build Google Benchmark wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
steps:
- name: Check out Google Benchmark
uses: actions/checkout@v3
- name: Set up QEMU
if: runner.os == 'Linux'
uses: docker/setup-qemu-action@v2
with:
platforms: all
- name: Build wheels on ${{ matrix.os }} using cibuildwheel
uses: pypa/cibuildwheel@v2.14.1
env:
CIBW_BUILD: 'cp38-* cp39-* cp310-* cp311-*'
CIBW_SKIP: "*-musllinux_*"
CIBW_TEST_SKIP: "*-macosx_arm64"
CIBW_ARCHS_LINUX: x86_64 aarch64
CIBW_ARCHS_MACOS: x86_64 arm64
CIBW_ARCHS_WINDOWS: AMD64
CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh
CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py
- name: Upload Google Benchmark ${{ matrix.os }} wheels
uses: actions/upload-artifact@v3
with:
name: dist
path: ./wheelhouse/*.whl
pypi_upload:
name: Publish google-benchmark wheels to PyPI
needs: [build_sdist, build_wheels]
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v3
with:
name: dist
path: dist
- uses: pypa/gh-action-pypi-publish@v1.6.4
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}

1
.gitignore vendored
View File

@ -11,6 +11,7 @@
*.swp
*.pyc
__pycache__
.DS_Store
# lcov
*.lcov

View File

@ -1,28 +0,0 @@
#!/usr/bin/env bash
# Install a newer CMake version
curl -sSL https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh -o install-cmake.sh
chmod +x install-cmake.sh
sudo ./install-cmake.sh --prefix=/usr/local --skip-license
# Checkout LLVM sources
git clone --depth=1 https://github.com/llvm-mirror/llvm.git llvm-source
git clone --depth=1 https://github.com/llvm-mirror/libcxx.git llvm-source/projects/libcxx
git clone --depth=1 https://github.com/llvm-mirror/libcxxabi.git llvm-source/projects/libcxxabi
# Setup libc++ options
if [ -z "$BUILD_32_BITS" ]; then
export BUILD_32_BITS=OFF && echo disabling 32 bit build
fi
# Build and install libc++ (Use unstable ABI for better sanitizer coverage)
mkdir llvm-build && cd llvm-build
cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} \
-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr \
-DLIBCXX_ABI_UNSTABLE=ON \
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
../llvm-source
make cxx -j2
sudo make install-cxxabi install-cxx
cd ../

View File

@ -10,10 +10,6 @@ matrix:
packages:
- lcov
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage
- compiler: gcc
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug
- compiler: gcc
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release
- compiler: gcc
addons:
apt:
@ -44,10 +40,6 @@ matrix:
- COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold"
- compiler: clang
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug
- compiler: clang
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Release
# Clang w/ libc++
- compiler: clang
dist: xenial
@ -146,16 +138,6 @@ matrix:
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all"
- EXTRA_CXX_FLAGS="-stdlib=libc++"
- os: osx
osx_image: xcode8.3
compiler: clang
env:
- COMPILER=clang++ BUILD_TYPE=Debug
- os: osx
osx_image: xcode8.3
compiler: clang
env:
- COMPILER=clang++ BUILD_TYPE=Release
- os: osx
osx_image: xcode8.3
compiler: clang
@ -164,15 +146,10 @@ matrix:
- BUILD_TYPE=Release
- BUILD_32_BITS=ON
- EXTRA_FLAGS="-m32"
- os: osx
osx_image: xcode9.4
compiler: gcc
env:
- COMPILER=g++-7 C_COMPILER=gcc-7 BUILD_TYPE=Debug
before_script:
- if [ -n "${LIBCXX_BUILD}" ]; then
source .travis-libcxx-setup.sh;
source .libcxx-setup.sh;
fi
- if [ -n "${ENABLE_SANITIZER}" ]; then
export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF";

15
AUTHORS
View File

@ -13,6 +13,8 @@ Alex Steele <steeleal123@gmail.com>
Andriy Berestovskyy <berestovskyy@gmail.com>
Arne Beer <arne@twobeer.de>
Carto
Cezary Skrzyński <czars1988@gmail.com>
Christian Wassermann <christian_wassermann@web.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
Colin Braley <braley.colin@gmail.com>
Daniel Harvey <danielharvey458@gmail.com>
@ -20,14 +22,18 @@ David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
Deniz Evrenci <denizevrenci@gmail.com>
Dirac Research
Dominik Czarnota <dominik.b.czarnota@gmail.com>
Dominik Korman <kormandominik@gmail.com>
Donald Aingworth <donalds_junk_mail@yahoo.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com>
Fabien Pichot <pichot.fabien@gmail.com>
Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Gergő Szitár <szitar.gergo@gmail.com>
Google Inc.
Henrique Bucher <hbucher@gmail.com>
International Business Machines Corporation
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
@ -38,19 +44,28 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Lei Xu <eddyxu@gmail.com>
Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
Mike Apodaca <gatorfax@gmail.com>
Min-Yih Hsu <yihshyng223@gmail.com>
MongoDB Inc.
Nick Hutchinson <nshutchinson@gmail.com>
Norman Heino <norman.heino@gmail.com>
Oleksandr Sochka <sasha.sochka@gmail.com>
Ori Livneh <ori.livneh@gmail.com>
Paul Redmond <paul.redmond@gmail.com>
Radoslav Yovchev <radoslav.tm@gmail.com>
Raghu Raja <raghu@enfabrica.net>
Rainer Orth <ro@cebitec.uni-bielefeld.de>
Roman Lebedev <lebedev.ri@gmail.com>
Sayan Bhattacharjee <aero.sayan@gmail.com>
Shapr3D <google-contributors@shapr3d.com>
Shuo Chen <chenshuo@chenshuo.com>
Staffan Tjernstrom <staffantj@gmail.com>
Steinar H. Gunderson <sgunderson@bigfoot.com>
Stripe, Inc.
Tobias Schmidt <tobias.schmidt@in.tum.de>
Yixuan Qiu <yixuanq@gmail.com>
Yusuke Suzuki <utatane.tea@gmail.com>
Zbigniew Skowron <zbychs@gmail.com>

View File

@ -1,14 +1,36 @@
licenses(["notice"])
config_setting(
name = "qnx",
constraint_values = ["@platforms//os:qnx"],
values = {
"cpu": "x64_qnx",
},
visibility = [":__subpackages__"],
)
config_setting(
name = "windows",
constraint_values = ["@platforms//os:windows"],
values = {
"cpu": "x64_windows",
},
visibility = [":__subpackages__"],
)
load("@rules_cc//cc:defs.bzl", "cc_library")
config_setting(
name = "macos",
constraint_values = ["@platforms//os:macos"],
visibility = ["//visibility:public"],
)
config_setting(
name = "perfcounters",
define_values = {
"pfm": "1",
},
visibility = [":__subpackages__"],
)
cc_library(
name = "benchmark",
@ -19,19 +41,40 @@ cc_library(
],
exclude = ["src/benchmark_main.cc"],
),
hdrs = ["include/benchmark/benchmark.h"],
hdrs = [
"include/benchmark/benchmark.h",
"include/benchmark/export.h",
],
linkopts = select({
":windows": ["-DEFAULTLIB:shlwapi.lib"],
"//conditions:default": ["-pthread"],
}),
copts = select({
":windows": [],
"//conditions:default": ["-Werror=old-style-cast"],
}),
strip_include_prefix = "include",
visibility = ["//visibility:public"],
# Only static linking is allowed; no .so will be produced.
# Using `defines` (i.e. not `local_defines`) means that no
# dependent rules need to bother about defining the macro.
linkstatic = True,
defines = [
"BENCHMARK_STATIC_DEFINE",
] + select({
":perfcounters": ["HAVE_LIBPFM"],
"//conditions:default": [],
}),
deps = select({
":perfcounters": ["@libpfm//:libpfm"],
"//conditions:default": [],
}),
)
cc_library(
name = "benchmark_main",
srcs = ["src/benchmark_main.cc"],
hdrs = ["include/benchmark/benchmark.h"],
hdrs = ["include/benchmark/benchmark.h", "include/benchmark/export.h"],
strip_include_prefix = "include",
visibility = ["//visibility:public"],
deps = [":benchmark"],

View File

@ -1,30 +1,34 @@
cmake_minimum_required (VERSION 3.5.1)
# Require CMake 3.10. If available, use the policies up to CMake 3.22.
cmake_minimum_required (VERSION 3.10...3.22)
foreach(p
CMP0048 # OK to clear PROJECT_VERSION on project()
CMP0054 # CMake 3.1
CMP0056 # export EXE_LINKER_FLAGS to try_run
CMP0057 # Support no if() IN_LIST operator
CMP0063 # Honor visibility properties for all targets
CMP0077 # Allow option() overrides in importing projects
)
if(POLICY ${p})
cmake_policy(SET ${p} NEW)
endif()
endforeach()
project (benchmark CXX)
project (benchmark VERSION 1.8.3 LANGUAGES CXX)
option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON)
option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF)
option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF)
option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON)
option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
# PGC++ maybe reporting false positives.
set(BENCHMARK_ENABLE_WERROR OFF)
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
set(BENCHMARK_ENABLE_WERROR OFF)
endif()
if(BENCHMARK_FORCE_WERROR)
set(BENCHMARK_ENABLE_WERROR ON)
endif(BENCHMARK_FORCE_WERROR)
if(NOT MSVC)
option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF)
else()
set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE)
endif()
option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON)
option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF)
option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON)
# Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which
# may require downloading the source code.
@ -33,8 +37,25 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi
# This option can be used to disable building and running unit tests which depend on gtest
# in cases where it is not possible to build or find a valid version of gtest.
option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON)
option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON)
option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF)
# Export only public symbols
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
if(MSVC)
# As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and
# cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the
# undocumented, but working variable.
# See https://gitlab.kitware.com/cmake/cmake/-/issues/15170
set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID})
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM")
set(CMAKE_CROSSCOMPILING TRUE)
endif()
endif()
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF)
function(should_enable_assembly_tests)
if(CMAKE_BUILD_TYPE)
@ -81,23 +102,42 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(GetGitVersion)
get_git_version(GIT_VERSION)
# If no git version can be determined, use the version
# from the project() command
if ("${GIT_VERSION}" STREQUAL "0.0.0")
set(VERSION "${benchmark_VERSION}")
else()
set(VERSION "${GIT_VERSION}")
endif()
# Tell the user what versions we are using
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION})
message(STATUS "Version: ${VERSION}")
message(STATUS "Google Benchmark version: ${VERSION}")
# The version of the libraries
set(GENERIC_LIB_VERSION ${VERSION})
string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION)
# Import our CMake modules
include(CheckCXXCompilerFlag)
include(AddCXXCompilerFlag)
include(CheckCXXCompilerFlag)
include(CheckLibraryExists)
include(CXXFeatureCheck)
check_library_exists(rt shm_open "" HAVE_LIB_RT)
if (BENCHMARK_BUILD_32_BITS)
add_required_cxx_compiler_flag(-m32)
endif()
if (MSVC)
set(BENCHMARK_CXX_STANDARD 14)
else()
set(BENCHMARK_CXX_STANDARD 11)
endif()
set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD})
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS OFF)
if (MSVC)
# Turn compiler warnings up to 11
string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
@ -130,44 +170,43 @@ if (MSVC)
set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG")
endif()
else()
# Try and enable C++11. Don't use C++14 because it doesn't work in some
# configurations.
add_cxx_compiler_flag(-std=c++11)
if (NOT HAVE_CXX_FLAG_STD_CXX11)
add_cxx_compiler_flag(-std=c++0x)
endif()
# Turn compiler warnings up to 11
add_cxx_compiler_flag(-Wall)
add_cxx_compiler_flag(-Wextra)
add_cxx_compiler_flag(-Wshadow)
add_cxx_compiler_flag(-Werror RELEASE)
add_cxx_compiler_flag(-Werror RELWITHDEBINFO)
add_cxx_compiler_flag(-Werror MINSIZEREL)
# Disabled until googletest (gmock) stops emitting variadic macro warnings
#add_cxx_compiler_flag(-pedantic)
#add_cxx_compiler_flag(-pedantic-errors)
add_cxx_compiler_flag(-Wfloat-equal)
add_cxx_compiler_flag(-Wold-style-cast)
if(BENCHMARK_ENABLE_WERROR)
add_cxx_compiler_flag(-Werror)
endif()
if (NOT BENCHMARK_ENABLE_TESTING)
# Disable warning when compiling tests as gtest does not use 'override'.
add_cxx_compiler_flag(-Wsuggest-override)
endif()
add_cxx_compiler_flag(-pedantic)
add_cxx_compiler_flag(-pedantic-errors)
add_cxx_compiler_flag(-Wshorten-64-to-32)
add_cxx_compiler_flag(-fstrict-aliasing)
# Disable warnings regarding deprecated parts of the library while building
# and testing those parts of the library.
add_cxx_compiler_flag(-Wno-deprecated-declarations)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
# Intel silently ignores '-Wno-deprecated-declarations',
# warning no. 1786 must be explicitly disabled.
# See #631 for rationale.
add_cxx_compiler_flag(-wd1786)
add_cxx_compiler_flag(-fno-finite-math-only)
endif()
# Disable deprecation warnings for release builds (when -Werror is enabled).
add_cxx_compiler_flag(-Wno-deprecated RELEASE)
add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO)
add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL)
if(BENCHMARK_ENABLE_WERROR)
add_cxx_compiler_flag(-Wno-deprecated)
endif()
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-fno-exceptions)
endif()
if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") #ICC17u2: Many false positives for Wstrict-aliasing
add_cxx_compiler_flag(-Wstrict-aliasing)
endif()
endif()
@ -176,12 +215,12 @@ else()
add_cxx_compiler_flag(-wd654)
add_cxx_compiler_flag(-Wthread-safety)
if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include")
endif()
# On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a
# predefined macro, which turns on all of the wonderful libc extensions.
# However g++ doesn't do this in Cygwin so we have to define it ourselfs
# However g++ doesn't do this in Cygwin so we have to define it ourselves
# since we depend on GNU/POSIX/BSD extensions.
if (CYGWIN)
add_definitions(-D_GNU_SOURCE=1)
@ -194,6 +233,7 @@ else()
# Link time optimisation
if (BENCHMARK_ENABLE_LTO)
add_cxx_compiler_flag(-flto)
add_cxx_compiler_flag(-Wno-lto-type-mismatch)
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
find_program(GCC_AR gcc-ar)
if (GCC_AR)
@ -231,7 +271,8 @@ if (BENCHMARK_USE_LIBCXX)
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
add_cxx_compiler_flag(-stdlib=libc++)
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" OR
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM")
add_cxx_compiler_flag(-nostdinc++)
message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS")
# Adding -nodefaultlibs directly to CMAKE_<TYPE>_LINKER_FLAGS will break
@ -245,11 +286,17 @@ if (BENCHMARK_USE_LIBCXX)
endif()
endif(BENCHMARK_USE_LIBCXX)
set(EXTRA_CXX_FLAGS "")
if (WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
# Clang on Windows fails to compile the regex feature check under C++11
set(EXTRA_CXX_FLAGS "-DCMAKE_CXX_STANDARD=14")
endif()
# C++ feature checks
# Determine the correct regular expression engine to use
cxx_feature_check(STD_REGEX)
cxx_feature_check(GNU_POSIX_REGEX)
cxx_feature_check(POSIX_REGEX)
cxx_feature_check(STD_REGEX ${EXTRA_CXX_FLAGS})
cxx_feature_check(GNU_POSIX_REGEX ${EXTRA_CXX_FLAGS})
cxx_feature_check(POSIX_REGEX ${EXTRA_CXX_FLAGS})
if(NOT HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX)
message(FATAL_ERROR "Failed to determine the source files for the regular expression backend")
endif()
@ -257,10 +304,16 @@ if (NOT BENCHMARK_ENABLE_EXCEPTIONS AND HAVE_STD_REGEX
AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX)
message(WARNING "Using std::regex with exceptions disabled is not fully supported")
endif()
cxx_feature_check(STEADY_CLOCK)
# Ensure we have pthreads
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
cxx_feature_check(PTHREAD_AFFINITY)
if (BENCHMARK_ENABLE_LIBPFM)
find_package(PFM)
endif()
# Set up directories
include_directories(${PROJECT_SOURCE_DIR}/include)
@ -273,7 +326,15 @@ if (BENCHMARK_ENABLE_TESTING)
if (BENCHMARK_ENABLE_GTEST_TESTS AND
NOT (TARGET gtest AND TARGET gtest_main AND
TARGET gmock AND TARGET gmock_main))
include(GoogleTest)
if (BENCHMARK_USE_BUNDLED_GTEST)
include(GoogleTest)
else()
find_package(GTest CONFIG REQUIRED)
add_library(gtest ALIAS GTest::gtest)
add_library(gtest_main ALIAS GTest::gtest_main)
add_library(gmock ALIAS GTest::gmock)
add_library(gmock_main ALIAS GTest::gmock_main)
endif()
endif()
add_subdirectory(test)
endif()

View File

@ -22,12 +22,16 @@
#
# Please keep the list sorted.
Abhina Sreeskantharajan <abhina.sreeskantharajan@ibm.com>
Albert Pretorius <pretoalb@gmail.com>
Alex Steele <steelal123@gmail.com>
Andriy Berestovskyy <berestovskyy@gmail.com>
Arne Beer <arne@twobeer.de>
Bátor Tallér <bator.taller@shapr3d.com>
Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
Cezary Skrzyński <czars1988@gmail.com>
Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
Christian Wassermann <christian_wassermann@web.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
Colin Braley <braley.colin@gmail.com>
Cyrille Faucheux <cyrille.faucheux@gmail.com>
@ -36,15 +40,20 @@ David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
Deniz Evrenci <denizevrenci@gmail.com>
Dominic Hamon <dma@stripysock.com> <dominic@google.com>
Dominik Czarnota <dominik.b.czarnota@gmail.com>
Dominik Korman <kormandominik@gmail.com>
Donald Aingworth <donalds_junk_mail@yahoo.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com>
Fabien Pichot <pichot.fabien@gmail.com>
Fanbo Meng <fanbo.meng@ibm.com>
Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Geoffrey Martin-Noble <gcmn@google.com> <gmngeoffrey@gmail.com>
Gergő Szitár <szitar.gergo@gmail.com>
Hannes Hauswedell <h2@fsfe.org>
Henrique Bucher <hbucher@gmail.com>
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
JianXiong Zhou <zhoujianxiong2@gmail.com>
@ -52,25 +61,33 @@ Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
John Millikin <jmillikin@stripe.com>
Jordan Williams <jwillikers@protonmail.com>
Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kai Wolf <kai.wolf@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kai Wolf <kai.wolf@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Lei Xu <eddyxu@gmail.com>
Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
Mike Apodaca <gatorfax@gmail.com>
Min-Yih Hsu <yihshyng223@gmail.com>
Nick Hutchinson <nshutchinson@gmail.com>
Norman Heino <norman.heino@gmail.com>
Oleksandr Sochka <sasha.sochka@gmail.com>
Ori Livneh <ori.livneh@gmail.com>
Pascal Leroy <phl@google.com>
Paul Redmond <paul.redmond@gmail.com>
Pierre Phaneuf <pphaneuf@google.com>
Radoslav Yovchev <radoslav.tm@gmail.com>
Raghu Raja <raghu@enfabrica.net>
Rainer Orth <ro@cebitec.uni-bielefeld.de>
Raul Marin <rmrodriguez@cartodb.com>
Ray Glover <ray.glover@uk.ibm.com>
Robert Guo <robert.guo@mongodb.com>
Roman Lebedev <lebedev.ri@gmail.com>
Sayan Bhattacharjee <aero.sayan@gmail.com>
Shuo Chen <chenshuo@chenshuo.com>
Steven Wan <wan.yu@ibm.com>
Tobias Schmidt <tobias.schmidt@in.tum.de>
Tobias Ulvgård <tobias.ulvgard@dirac.se>
Tom Madams <tom.ej.madams@gmail.com> <tmadams@google.com>
Yixuan Qiu <yixuanq@gmail.com>

24
MODULE.bazel Normal file
View File

@ -0,0 +1,24 @@
module(name = "google_benchmark", version="1.8.3")
bazel_dep(name = "bazel_skylib", version = "1.4.1")
bazel_dep(name = "platforms", version = "0.0.6")
bazel_dep(name = "rules_foreign_cc", version = "0.9.0")
bazel_dep(name = "rules_cc", version = "0.0.6")
bazel_dep(name = "rules_python", version = "0.24.0", dev_dependency = True)
bazel_dep(name = "googletest", version = "1.12.1", repo_name = "com_google_googletest", dev_dependency = True)
bazel_dep(name = "libpfm", version = "4.11.0")
# Register a toolchain for Python 3.9 to be able to build numpy. Python
# versions >=3.10 are problematic.
# A second reason for this is to be able to build Python hermetically instead
# of relying on the changing default version from rules_python.
python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True)
python.toolchain(python_version = "3.9")
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
pip.parse(
hub_name="tools_pip_deps",
python_version = "3.9",
requirements_lock="//tools:requirements.txt")
use_repo(pip, "tools_pip_deps")

1131
README.md

File diff suppressed because it is too large Load Diff

View File

@ -1,37 +1,22 @@
workspace(name = "com_github_google_benchmark")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("//:bazel/benchmark_deps.bzl", "benchmark_deps")
http_archive(
name = "rules_cc",
strip_prefix = "rules_cc-a508235df92e71d537fcbae0c7c952ea6957a912",
urls = ["https://github.com/bazelbuild/rules_cc/archive/a508235df92e71d537fcbae0c7c952ea6957a912.zip"],
)
benchmark_deps()
http_archive(
name = "com_google_absl",
sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111",
strip_prefix = "abseil-cpp-20200225.2",
urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"],
)
load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies")
http_archive(
name = "com_google_googletest",
strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e",
urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"],
)
rules_foreign_cc_dependencies()
http_archive(
name = "pybind11",
build_file = "@//bindings/python:pybind11.BUILD",
sha256 = "1eed57bc6863190e35637290f97a20c81cfe4d9090ac0a24f3bbf08f265eb71d",
strip_prefix = "pybind11-2.4.3",
urls = ["https://github.com/pybind/pybind11/archive/v2.4.3.tar.gz"],
load("@rules_python//python:pip.bzl", pip3_install="pip_install")
pip3_install(
name = "tools_pip_deps",
requirements = "//tools:requirements.txt",
)
new_local_repository(
name = "python_headers",
build_file = "@//bindings/python:python_headers.BUILD",
path = "/usr/include/python3.6", # May be overwritten by setup.py.
path = "<PYTHON_INCLUDE_PATH>", # May be overwritten by setup.py.
)

2
WORKSPACE.bzlmod Normal file
View File

@ -0,0 +1,2 @@
# This file marks the root of the Bazel workspace.
# See MODULE.bazel for dependencies and setup.

View File

@ -1 +1,2 @@
theme: jekyll-theme-midnight
theme: jekyll-theme-midnight
markdown: GFM

65
bazel/benchmark_deps.bzl Normal file
View File

@ -0,0 +1,65 @@
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
def benchmark_deps():
"""Loads dependencies required to build Google Benchmark."""
if "bazel_skylib" not in native.existing_rules():
http_archive(
name = "bazel_skylib",
sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
],
)
if "rules_foreign_cc" not in native.existing_rules():
http_archive(
name = "rules_foreign_cc",
sha256 = "bcd0c5f46a49b85b384906daae41d277b3dc0ff27c7c752cc51e43048a58ec83",
strip_prefix = "rules_foreign_cc-0.7.1",
url = "https://github.com/bazelbuild/rules_foreign_cc/archive/0.7.1.tar.gz",
)
if "rules_python" not in native.existing_rules():
http_archive(
name = "rules_python",
url = "https://github.com/bazelbuild/rules_python/releases/download/0.1.0/rules_python-0.1.0.tar.gz",
sha256 = "b6d46438523a3ec0f3cead544190ee13223a52f6a6765a29eae7b7cc24cc83a0",
)
if "com_google_absl" not in native.existing_rules():
http_archive(
name = "com_google_absl",
sha256 = "f41868f7a938605c92936230081175d1eae87f6ea2c248f41077c8f88316f111",
strip_prefix = "abseil-cpp-20200225.2",
urls = ["https://github.com/abseil/abseil-cpp/archive/20200225.2.tar.gz"],
)
if "com_google_googletest" not in native.existing_rules():
new_git_repository(
name = "com_google_googletest",
remote = "https://github.com/google/googletest.git",
tag = "release-1.11.0",
)
if "nanobind" not in native.existing_rules():
new_git_repository(
name = "nanobind",
remote = "https://github.com/wjakob/nanobind.git",
tag = "v1.4.0",
build_file = "@//bindings/python:nanobind.BUILD",
recursive_init_submodules = True,
)
if "libpfm" not in native.existing_rules():
# Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/
http_archive(
name = "libpfm",
build_file = str(Label("//tools:libpfm.BUILD.bazel")),
sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc",
type = "tar.gz",
strip_prefix = "libpfm-4.11.0",
urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"],
)

View File

@ -8,8 +8,8 @@ def py_extension(name, srcs, hdrs = [], copts = [], features = [], deps = []):
shared_lib_name = name + shared_lib_suffix
native.cc_binary(
name = shared_lib_name,
linkshared = 1,
linkstatic = 1,
linkshared = True,
linkstatic = True,
srcs = srcs + hdrs,
copts = copts,
features = features,

View File

@ -6,7 +6,6 @@ py_library(
visibility = ["//visibility:public"],
deps = [
":_benchmark",
# pip; absl:app
],
)
@ -17,10 +16,13 @@ py_extension(
"-fexceptions",
"-fno-strict-aliasing",
],
features = ["-use_header_modules"],
features = [
"-use_header_modules",
"-parse_headers",
],
deps = [
"//:benchmark",
"@pybind11",
"@nanobind",
"@python_headers",
],
)

View File

@ -26,42 +26,137 @@ Example usage:
if __name__ == '__main__':
benchmark.main()
"""
import atexit
from absl import app
from google_benchmark import _benchmark
from google_benchmark._benchmark import (
Counter,
kNanosecond,
kMicrosecond,
kMillisecond,
kSecond,
oNone,
o1,
oN,
oNSquared,
oNCubed,
oLogN,
oNLogN,
oAuto,
oLambda,
State,
)
__all__ = [
"register",
"main",
"Counter",
"kNanosecond",
"kMicrosecond",
"kMillisecond",
"kSecond",
"oNone",
"o1",
"oN",
"oNSquared",
"oNCubed",
"oLogN",
"oNLogN",
"oAuto",
"oLambda",
"State",
]
__version__ = "0.1.0"
__version__ = "1.8.3"
def register(f=None, *, name=None):
if f is None:
return lambda f: register(f, name=name)
if name is None:
name = f.__name__
_benchmark.RegisterBenchmark(name, f)
return f
class __OptionMaker:
"""A stateless class to collect benchmark options.
Collect all decorator calls like @option.range(start=0, limit=1<<5).
"""
class Options:
"""Pure data class to store options calls, along with the benchmarked function."""
def __init__(self, func):
self.func = func
self.builder_calls = []
@classmethod
def make(cls, func_or_options):
"""Make Options from Options or the benchmarked function."""
if isinstance(func_or_options, cls.Options):
return func_or_options
return cls.Options(func_or_options)
def __getattr__(self, builder_name):
"""Append option call in the Options."""
# The function that get returned on @option.range(start=0, limit=1<<5).
def __builder_method(*args, **kwargs):
# The decorator that get called, either with the benchmared function
# or the previous Options
def __decorator(func_or_options):
options = self.make(func_or_options)
options.builder_calls.append((builder_name, args, kwargs))
# The decorator returns Options so it is not technically a decorator
# and needs a final call to @register
return options
return __decorator
return __builder_method
# Alias for nicer API.
# We have to instantiate an object, even if stateless, to be able to use __getattr__
# on option.range
option = __OptionMaker()
def register(undefined=None, *, name=None):
"""Register function for benchmarking."""
if undefined is None:
# Decorator is called without parenthesis so we return a decorator
return lambda f: register(f, name=name)
# We have either the function to benchmark (simple case) or an instance of Options
# (@option._ case).
options = __OptionMaker.make(undefined)
if name is None:
name = options.func.__name__
# We register the benchmark and reproduce all the @option._ calls onto the
# benchmark builder pattern
benchmark = _benchmark.RegisterBenchmark(name, options.func)
for name, args, kwargs in options.builder_calls[::-1]:
getattr(benchmark, name)(*args, **kwargs)
# return the benchmarked function because the decorator does not modify it
return options.func
def _flags_parser(argv):
argv = _benchmark.Initialize(argv)
return app.parse_flags_with_usage(argv)
argv = _benchmark.Initialize(argv)
return app.parse_flags_with_usage(argv)
def _run_benchmarks(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
return _benchmark.RunSpecifiedBenchmarks()
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
return _benchmark.RunSpecifiedBenchmarks()
def main(argv=None):
return app.run(_run_benchmarks, argv=argv, flags_parser=_flags_parser)
return app.run(_run_benchmarks, argv=argv, flags_parser=_flags_parser)
# Methods for use with custom main function.
initialize = _benchmark.Initialize
run_benchmarks = _benchmark.RunSpecifiedBenchmarks
atexit.register(_benchmark.ClearRegisteredBenchmarks)

View File

@ -1,11 +1,17 @@
// Benchmark for Python.
#include "benchmark/benchmark.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include "nanobind/nanobind.h"
#include "nanobind/operators.h"
#include "nanobind/stl/bind_map.h"
#include "nanobind/stl/string.h"
#include "nanobind/stl/vector.h"
NB_MAKE_OPAQUE(benchmark::UserCounters);
namespace {
namespace py = ::pybind11;
namespace nb = nanobind;
std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
// The `argv` pointers here become invalid when this function returns, but
@ -28,21 +34,151 @@ std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
return remaining_argv;
}
void RegisterBenchmark(const char* name, py::function f) {
benchmark::RegisterBenchmark(name, [f](benchmark::State& state) {
f(&state);
});
benchmark::internal::Benchmark* RegisterBenchmark(const std::string& name,
nb::callable f) {
return benchmark::RegisterBenchmark(
name, [f](benchmark::State& state) { f(&state); });
}
PYBIND11_MODULE(_benchmark, m) {
NB_MODULE(_benchmark, m) {
using benchmark::TimeUnit;
nb::enum_<TimeUnit>(m, "TimeUnit")
.value("kNanosecond", TimeUnit::kNanosecond)
.value("kMicrosecond", TimeUnit::kMicrosecond)
.value("kMillisecond", TimeUnit::kMillisecond)
.value("kSecond", TimeUnit::kSecond)
.export_values();
using benchmark::BigO;
nb::enum_<BigO>(m, "BigO")
.value("oNone", BigO::oNone)
.value("o1", BigO::o1)
.value("oN", BigO::oN)
.value("oNSquared", BigO::oNSquared)
.value("oNCubed", BigO::oNCubed)
.value("oLogN", BigO::oLogN)
.value("oNLogN", BigO::oNLogN)
.value("oAuto", BigO::oAuto)
.value("oLambda", BigO::oLambda)
.export_values();
using benchmark::internal::Benchmark;
nb::class_<Benchmark>(m, "Benchmark")
// For methods returning a pointer to the current object, reference
// return policy is used to ask nanobind not to take ownership of the
// returned object and avoid calling delete on it.
// https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies
//
// For methods taking a const std::vector<...>&, a copy is created
// because a it is bound to a Python list.
// https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html
.def("unit", &Benchmark::Unit, nb::rv_policy::reference)
.def("arg", &Benchmark::Arg, nb::rv_policy::reference)
.def("args", &Benchmark::Args, nb::rv_policy::reference)
.def("range", &Benchmark::Range, nb::rv_policy::reference,
nb::arg("start"), nb::arg("limit"))
.def("dense_range", &Benchmark::DenseRange,
nb::rv_policy::reference, nb::arg("start"),
nb::arg("limit"), nb::arg("step") = 1)
.def("ranges", &Benchmark::Ranges, nb::rv_policy::reference)
.def("args_product", &Benchmark::ArgsProduct,
nb::rv_policy::reference)
.def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference)
.def("arg_names", &Benchmark::ArgNames,
nb::rv_policy::reference)
.def("range_pair", &Benchmark::RangePair,
nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"),
nb::arg("lo2"), nb::arg("hi2"))
.def("range_multiplier", &Benchmark::RangeMultiplier,
nb::rv_policy::reference)
.def("min_time", &Benchmark::MinTime, nb::rv_policy::reference)
.def("min_warmup_time", &Benchmark::MinWarmUpTime,
nb::rv_policy::reference)
.def("iterations", &Benchmark::Iterations,
nb::rv_policy::reference)
.def("repetitions", &Benchmark::Repetitions,
nb::rv_policy::reference)
.def("report_aggregates_only", &Benchmark::ReportAggregatesOnly,
nb::rv_policy::reference, nb::arg("value") = true)
.def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly,
nb::rv_policy::reference, nb::arg("value") = true)
.def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime,
nb::rv_policy::reference)
.def("use_real_time", &Benchmark::UseRealTime,
nb::rv_policy::reference)
.def("use_manual_time", &Benchmark::UseManualTime,
nb::rv_policy::reference)
.def(
"complexity",
(Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity,
nb::rv_policy::reference,
nb::arg("complexity") = benchmark::oAuto);
using benchmark::Counter;
nb::class_<Counter> py_counter(m, "Counter");
nb::enum_<Counter::Flags>(py_counter, "Flags")
.value("kDefaults", Counter::Flags::kDefaults)
.value("kIsRate", Counter::Flags::kIsRate)
.value("kAvgThreads", Counter::Flags::kAvgThreads)
.value("kAvgThreadsRate", Counter::Flags::kAvgThreadsRate)
.value("kIsIterationInvariant", Counter::Flags::kIsIterationInvariant)
.value("kIsIterationInvariantRate",
Counter::Flags::kIsIterationInvariantRate)
.value("kAvgIterations", Counter::Flags::kAvgIterations)
.value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate)
.value("kInvert", Counter::Flags::kInvert)
.export_values()
.def(nb::self | nb::self);
nb::enum_<Counter::OneK>(py_counter, "OneK")
.value("kIs1000", Counter::OneK::kIs1000)
.value("kIs1024", Counter::OneK::kIs1024)
.export_values();
py_counter
.def(nb::init<double, Counter::Flags, Counter::OneK>(),
nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults,
nb::arg("k") = Counter::kIs1000)
.def("__init__", ([](Counter *c, double value) { new (c) Counter(value); }))
.def_rw("value", &Counter::value)
.def_rw("flags", &Counter::flags)
.def_rw("oneK", &Counter::oneK)
.def(nb::init_implicit<double>());
nb::implicitly_convertible<nb::int_, Counter>();
nb::bind_map<benchmark::UserCounters>(m, "UserCounters");
using benchmark::State;
nb::class_<State>(m, "State")
.def("__bool__", &State::KeepRunning)
.def_prop_ro("keep_running", &State::KeepRunning)
.def("pause_timing", &State::PauseTiming)
.def("resume_timing", &State::ResumeTiming)
.def("skip_with_error", &State::SkipWithError)
.def_prop_ro("error_occurred", &State::error_occurred)
.def("set_iteration_time", &State::SetIterationTime)
.def_prop_rw("bytes_processed", &State::bytes_processed,
&State::SetBytesProcessed)
.def_prop_rw("complexity_n", &State::complexity_length_n,
&State::SetComplexityN)
.def_prop_rw("items_processed", &State::items_processed,
&State::SetItemsProcessed)
.def("set_label", &State::SetLabel)
.def("range", &State::range, nb::arg("pos") = 0)
.def_prop_ro("iterations", &State::iterations)
.def_prop_ro("name", &State::name)
.def_rw("counters", &State::counters)
.def_prop_ro("thread_index", &State::thread_index)
.def_prop_ro("threads", &State::threads);
m.def("Initialize", Initialize);
m.def("RegisterBenchmark", RegisterBenchmark);
m.def("RegisterBenchmark", RegisterBenchmark,
nb::rv_policy::reference);
m.def("RunSpecifiedBenchmarks",
[]() { benchmark::RunSpecifiedBenchmarks(); });
py::class_<benchmark::State>(m, "State")
.def("__bool__", &benchmark::State::KeepRunning)
.def_property_readonly("keep_running", &benchmark::State::KeepRunning)
.def("skip_with_error", &benchmark::State::SkipWithError);
m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks);
};
} // namespace

View File

@ -20,29 +20,117 @@ In the extracted directory, execute:
python setup.py install
"""
import random
import time
import google_benchmark as benchmark
from google_benchmark import Counter
@benchmark.register
def empty(state):
while state:
pass
while state:
pass
@benchmark.register
def sum_million(state):
while state:
sum(range(1_000_000))
while state:
sum(range(1_000_000))
@benchmark.register
def pause_timing(state):
"""Pause timing every iteration."""
while state:
# Construct a list of random ints every iteration without timing it
state.pause_timing()
random_list = [random.randint(0, 100) for _ in range(100)]
state.resume_timing()
# Time the in place sorting algorithm
random_list.sort()
@benchmark.register
def skipped(state):
if True: # Test some predicate here.
state.skip_with_error('some error')
return # NOTE: You must explicitly return, or benchmark will continue.
if True: # Test some predicate here.
state.skip_with_error("some error")
return # NOTE: You must explicitly return, or benchmark will continue.
... # Benchmark code would be here.
... # Benchmark code would be here.
if __name__ == '__main__':
benchmark.main()
@benchmark.register
def manual_timing(state):
while state:
# Manually count Python CPU time
start = time.perf_counter() # perf_counter_ns() in Python 3.7+
# Something to benchmark
time.sleep(0.01)
end = time.perf_counter()
state.set_iteration_time(end - start)
@benchmark.register
def custom_counters(state):
"""Collect custom metric using benchmark.Counter."""
num_foo = 0.0
while state:
# Benchmark some code here
pass
# Collect some custom metric named foo
num_foo += 0.13
# Automatic Counter from numbers.
state.counters["foo"] = num_foo
# Set a counter as a rate.
state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate)
# Set a counter as an inverse of rate.
state.counters["foo_inv_rate"] = Counter(num_foo, Counter.kIsRate | Counter.kInvert)
# Set a counter as a thread-average quantity.
state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads)
# There's also a combined flag:
state.counters["foo_avg_rate"] = Counter(num_foo, Counter.kAvgThreadsRate)
@benchmark.register
@benchmark.option.measure_process_cpu_time()
@benchmark.option.use_real_time()
def with_options(state):
while state:
sum(range(1_000_000))
@benchmark.register(name="sum_million_microseconds")
@benchmark.option.unit(benchmark.kMicrosecond)
def with_options2(state):
while state:
sum(range(1_000_000))
@benchmark.register
@benchmark.option.arg(100)
@benchmark.option.arg(1000)
def passing_argument(state):
while state:
sum(range(state.range(0)))
@benchmark.register
@benchmark.option.range(8, limit=8 << 10)
def using_range(state):
while state:
sum(range(state.range(0)))
@benchmark.register
@benchmark.option.range_multiplier(2)
@benchmark.option.range(1 << 10, 1 << 18)
@benchmark.option.complexity(benchmark.oN)
def computing_complexity(state):
while state:
sum(range(state.range(0)))
state.complexity_n = state.range(0)
if __name__ == "__main__":
benchmark.main()

View File

@ -0,0 +1,17 @@
cc_library(
name = "nanobind",
srcs = glob([
"src/*.cpp"
]),
copts = ["-fexceptions"],
includes = ["include", "ext/robin_map/include"],
textual_hdrs = glob(
[
"include/**/*.h",
"src/*.h",
"ext/robin_map/include/tsl/*.h",
],
),
deps = ["@python_headers"],
visibility = ["//visibility:public"],
)

View File

@ -1,20 +0,0 @@
cc_library(
name = "pybind11",
hdrs = glob(
include = [
"include/pybind11/*.h",
"include/pybind11/detail/*.h",
],
exclude = [
"include/pybind11/common.h",
"include/pybind11/eigen.h",
],
),
copts = [
"-fexceptions",
"-Wno-undefined-inline",
"-Wno-pragma-once-outside-header",
],
includes = ["include"],
visibility = ["//visibility:public"],
)

View File

@ -1,2 +0,0 @@
absl-py>=0.7.1

View File

@ -34,9 +34,11 @@ function(add_cxx_compiler_flag FLAG)
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
if(${MANGLED_FLAG})
set(VARIANT ${ARGV1})
if(ARGV1)
if(ARGC GREATER 1)
set(VARIANT ${ARGV1})
string(TOUPPER "_${VARIANT}" VARIANT)
else()
set(VARIANT "")
endif()
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
endif()
@ -49,9 +51,11 @@ function(add_required_cxx_compiler_flag FLAG)
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
if(${MANGLED_FLAG})
set(VARIANT ${ARGV1})
if(ARGV1)
if(ARGC GREATER 1)
set(VARIANT ${ARGV1})
string(TOUPPER "_${VARIANT}" VARIANT)
else()
set(VARIANT "")
endif()
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE)

View File

@ -17,6 +17,8 @@ if(__cxx_feature_check)
endif()
set(__cxx_feature_check INCLUDED)
option(CXXFEATURECHECK_DEBUG OFF)
function(cxx_feature_check FILE)
string(TOLOWER ${FILE} FILE)
string(TOUPPER ${FILE} VAR)
@ -27,13 +29,22 @@ function(cxx_feature_check FILE)
return()
endif()
set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
if (ARGC GREATER 1)
message(STATUS "Enabling additional flags: ${ARGV1}")
list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1})
endif()
if (NOT DEFINED COMPILE_${FEATURE})
message(STATUS "Performing Test ${FEATURE}")
if(CMAKE_CROSSCOMPILING)
message(STATUS "Cross-compiling to test ${FEATURE}")
try_compile(COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
CXX_STANDARD 11
CXX_STANDARD_REQUIRED ON
CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
if(COMPILE_${FEATURE})
message(WARNING
"If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
@ -42,11 +53,14 @@ function(cxx_feature_check FILE)
set(RUN_${FEATURE} 1 CACHE INTERNAL "")
endif()
else()
message(STATUS "Performing Test ${FEATURE}")
message(STATUS "Compiling and running to test ${FEATURE}")
try_run(RUN_${FEATURE} COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
CXX_STANDARD 11
CXX_STANDARD_REQUIRED ON
CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
endif()
endif()
@ -56,7 +70,11 @@ function(cxx_feature_check FILE)
add_definitions(-DHAVE_${VAR})
else()
if(NOT COMPILE_${FEATURE})
message(STATUS "Performing Test ${FEATURE} -- failed to compile")
if(CXXFEATURECHECK_DEBUG)
message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}")
else()
message(STATUS "Performing Test ${FEATURE} -- failed to compile")
endif()
else()
message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run")
endif()

View File

@ -1 +1,7 @@
@PACKAGE_INIT@
include (CMakeFindDependencyMacro)
find_dependency (Threads)
include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake")

View File

@ -20,16 +20,20 @@ set(__get_git_version INCLUDED)
function(get_git_version var)
if(GIT_EXECUTABLE)
execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
RESULT_VARIABLE status
OUTPUT_VARIABLE GIT_VERSION
OUTPUT_VARIABLE GIT_DESCRIBE_VERSION
ERROR_QUIET)
if(${status})
set(GIT_VERSION "v0.0.0")
if(status)
set(GIT_DESCRIBE_VERSION "v0.0.0")
endif()
string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION)
if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-)
string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION})
else()
string(STRIP ${GIT_VERSION} GIT_VERSION)
string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION})
string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION})
endif()
# Work out if the repository is dirty
@ -43,12 +47,12 @@ function(get_git_version var)
ERROR_QUIET)
string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
if (${GIT_DIRTY})
set(GIT_VERSION "${GIT_VERSION}-dirty")
set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty")
endif()
message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}")
else()
set(GIT_VERSION "v0.0.0")
set(GIT_VERSION "0.0.0")
endif()
message(STATUS "git Version: ${GIT_VERSION}")
set(${var} ${GIT_VERSION} PARENT_SCOPE)
endfunction()

View File

@ -35,7 +35,24 @@ add_subdirectory(${GOOGLETEST_SOURCE_DIR}
${GOOGLETEST_BINARY_DIR}
EXCLUDE_FROM_ALL)
set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES>)
# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves.
if (MSVC)
target_compile_options(gtest PRIVATE "/wd4244" "/wd4722")
target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722")
target_compile_options(gmock PRIVATE "/wd4244" "/wd4722")
target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722")
else()
target_compile_options(gtest PRIVATE "-w")
target_compile_options(gtest_main PRIVATE "-w")
target_compile_options(gmock PRIVATE "-w")
target_compile_options(gmock_main PRIVATE "-w")
endif()
if(NOT DEFINED GTEST_COMPILE_COMMANDS)
set(GTEST_COMPILE_COMMANDS ON)
endif()
set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})

View File

@ -31,13 +31,14 @@ if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}"
)
else()
if(NOT ALLOW_DOWNLOADING_GOOGLETEST)
message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
return()
else()
message(WARNING "Did not find Google Test sources! Fetching from web...")
ExternalProject_Add(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG master
GIT_TAG "release-1.11.0"
PREFIX "${CMAKE_BINARY_DIR}"
STAMP_DIR "${CMAKE_BINARY_DIR}/stamp"
DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download"

View File

@ -0,0 +1,28 @@
# If successful, the following variables will be defined:
# PFM_FOUND.
# PFM_LIBRARIES
# PFM_INCLUDE_DIRS
# the following target will be defined:
# PFM::libpfm
include(FeatureSummary)
include(FindPackageHandleStandardArgs)
set_package_properties(PFM PROPERTIES
URL http://perfmon2.sourceforge.net/
DESCRIPTION "A helper library to develop monitoring tools"
PURPOSE "Used to program specific performance monitoring events")
find_library(PFM_LIBRARY NAMES pfm)
find_path(PFM_INCLUDE_DIR NAMES perfmon/pfmlib.h)
find_package_handle_standard_args(PFM REQUIRED_VARS PFM_LIBRARY PFM_INCLUDE_DIR)
if (PFM_FOUND AND NOT TARGET PFM::libpfm)
add_library(PFM::libpfm UNKNOWN IMPORTED)
set_target_properties(PFM::libpfm PROPERTIES
IMPORTED_LOCATION "${PFM_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${PFM_INCLUDE_DIR}")
endif()
mark_as_advanced(PFM_LIBRARY PFM_INCLUDE_DIR)

View File

@ -1,7 +1,7 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=${prefix}/lib
includedir=${prefix}/include
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
Name: @PROJECT_NAME@
Description: Google microbenchmark framework

View File

@ -0,0 +1,16 @@
#include <pthread.h>
int main() {
cpu_set_t set;
CPU_ZERO(&set);
for (int i = 0; i < CPU_SETSIZE; ++i) {
CPU_SET(i, &set);
CPU_CLR(i, &set);
}
pthread_t self = pthread_self();
int ret;
ret = pthread_getaffinity_np(self, sizeof(set), &set);
if (ret != 0) return ret;
ret = pthread_setaffinity_np(self, sizeof(set), &set);
if (ret != 0) return ret;
return 0;
}

View File

@ -1,7 +0,0 @@
cmake_minimum_required(VERSION 2.8.11)
project(cmake_wrapper)
include(conanbuildinfo.cmake)
conan_basic_setup()
include(${CMAKE_SOURCE_DIR}/CMakeListsOriginal.txt)

View File

@ -1,10 +0,0 @@
cmake_minimum_required(VERSION 2.8.11)
project(test_package)
set(CMAKE_VERBOSE_MAKEFILE TRUE)
include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake)
conan_basic_setup()
add_executable(${PROJECT_NAME} test_package.cpp)
target_link_libraries(${PROJECT_NAME} ${CONAN_LIBS})

View File

@ -1,19 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from conans import ConanFile, CMake
import os
class TestPackageConan(ConanFile):
settings = "os", "compiler", "build_type", "arch"
generators = "cmake"
def build(self):
cmake = CMake(self)
cmake.configure()
cmake.build()
def test(self):
bin_path = os.path.join("bin", "test_package")
self.run(bin_path, run_environment=True)

View File

@ -1,18 +0,0 @@
#include "benchmark/benchmark.h"
void BM_StringCreation(benchmark::State& state) {
while (state.KeepRunning())
std::string empty_string;
}
BENCHMARK(BM_StringCreation);
void BM_StringCopy(benchmark::State& state) {
std::string x = "hello";
while (state.KeepRunning())
std::string copy(x);
}
BENCHMARK(BM_StringCopy);
BENCHMARK_MAIN();

View File

@ -1,79 +0,0 @@
from conans import ConanFile, CMake, tools
from conans.errors import ConanInvalidConfiguration
import shutil
import os
class GoogleBenchmarkConan(ConanFile):
name = "benchmark"
description = "A microbenchmark support library."
topics = ("conan", "benchmark", "google", "microbenchmark")
url = "https://github.com/google/benchmark"
homepage = "https://github.com/google/benchmark"
author = "Google Inc."
license = "Apache-2.0"
exports_sources = ["*"]
generators = "cmake"
settings = "arch", "build_type", "compiler", "os"
options = {
"shared": [True, False],
"fPIC": [True, False],
"enable_lto": [True, False],
"enable_exceptions": [True, False]
}
default_options = {"shared": False, "fPIC": True, "enable_lto": False, "enable_exceptions": True}
_build_subfolder = "."
def source(self):
# Wrap the original CMake file to call conan_basic_setup
shutil.move("CMakeLists.txt", "CMakeListsOriginal.txt")
shutil.move(os.path.join("conan", "CMakeLists.txt"), "CMakeLists.txt")
def config_options(self):
if self.settings.os == "Windows":
if self.settings.compiler == "Visual Studio" and float(self.settings.compiler.version.value) <= 12:
raise ConanInvalidConfiguration("{} {} does not support Visual Studio <= 12".format(self.name, self.version))
del self.options.fPIC
def configure(self):
if self.settings.os == "Windows" and self.options.shared:
raise ConanInvalidConfiguration("Windows shared builds are not supported right now, see issue #639")
def _configure_cmake(self):
cmake = CMake(self)
cmake.definitions["BENCHMARK_ENABLE_TESTING"] = "OFF"
cmake.definitions["BENCHMARK_ENABLE_GTEST_TESTS"] = "OFF"
cmake.definitions["BENCHMARK_ENABLE_LTO"] = "ON" if self.options.enable_lto else "OFF"
cmake.definitions["BENCHMARK_ENABLE_EXCEPTIONS"] = "ON" if self.options.enable_exceptions else "OFF"
# See https://github.com/google/benchmark/pull/638 for Windows 32 build explanation
if self.settings.os != "Windows":
cmake.definitions["BENCHMARK_BUILD_32_BITS"] = "ON" if "64" not in str(self.settings.arch) else "OFF"
cmake.definitions["BENCHMARK_USE_LIBCXX"] = "ON" if (str(self.settings.compiler.libcxx) == "libc++") else "OFF"
else:
cmake.definitions["BENCHMARK_USE_LIBCXX"] = "OFF"
cmake.configure(build_folder=self._build_subfolder)
return cmake
def build(self):
cmake = self._configure_cmake()
cmake.build()
def package(self):
cmake = self._configure_cmake()
cmake.install()
self.copy(pattern="LICENSE", dst="licenses")
def package_info(self):
self.cpp_info.libs = tools.collect_libs(self)
if self.settings.os == "Linux":
self.cpp_info.libs.extend(["pthread", "rt"])
elif self.settings.os == "Windows":
self.cpp_info.libs.append("shlwapi")
elif self.settings.os == "SunOS":
self.cpp_info.libs.append("kstat")

View File

@ -1,18 +0,0 @@
# Build tool dependency policy
To ensure the broadest compatibility when building the benchmark library, but
still allow forward progress, we require any build tooling to be available for:
* Debian stable AND
* The last two Ubuntu LTS releases AND
Currently, this means using build tool versions that are available for Ubuntu
16.04 (Xenial), Ubuntu 18.04 (Bionic), and Debian stretch.
_Note, [travis](.travis.yml) runs under Ubuntu 14.04 (Trusty) for linux builds._
## cmake
The current supported version is cmake 3.5.1 as of 2018-06-06.
_Note, this version is also available for Ubuntu 14.04, the previous Ubuntu LTS
release, as `cmake3`._

View File

@ -111,6 +111,7 @@ between compilers or compiler versions. A common example of this
is matching stack frame addresses. In this case regular expressions
can be used to match the differing bits of output. For example:
<!-- {% raw %} -->
```c++
int ExternInt;
struct Point { int x, y, z; };
@ -127,6 +128,7 @@ extern "C" void test_store_point() {
// CHECK: ret
}
```
<!-- {% endraw %} -->
## Current Requirements and Limitations

View File

@ -1 +1,3 @@
theme: jekyll-theme-midnight
theme: jekyll-theme-minimal
logo: /assets/images/icon_black.png
show_downloads: true

BIN
docs/assets/images/icon.png Normal file

Binary file not shown.

After

(image error) Size: 11 KiB

BIN
docs/assets/images/icon.xcf Normal file

Binary file not shown.

Binary file not shown.

After

(image error) Size: 11 KiB

Binary file not shown.

13
docs/dependencies.md Normal file
View File

@ -0,0 +1,13 @@
# Build tool dependency policy
We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In
particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems).
## CMake
The current supported version is CMake 3.10 as of 2023-08-10. Most modern
distributions include newer versions, for example:
* Ubuntu 20.04 provides CMake 3.16.3
* Debian 11.4 provides CMake 3.18.4
* Ubuntu 22.04 provides CMake 3.22.1

12
docs/index.md Normal file
View File

@ -0,0 +1,12 @@
# Benchmark
* [Assembly Tests](AssemblyTests.md)
* [Dependencies](dependencies.md)
* [Perf Counters](perf_counters.md)
* [Platform Specific Build Instructions](platform_specific_build_instructions.md)
* [Python Bindings](python_bindings.md)
* [Random Interleaving](random_interleaving.md)
* [Reducing Variance](reducing_variance.md)
* [Releasing](releasing.md)
* [Tools](tools.md)
* [User Guide](user_guide.md)

35
docs/perf_counters.md Normal file
View File

@ -0,0 +1,35 @@
<a name="perf-counters" />
# User-Requested Performance Counters
When running benchmarks, the user may choose to request collection of
performance counters. This may be useful in investigation scenarios - narrowing
down the cause of a regression; or verifying that the underlying cause of a
performance improvement matches expectations.
This feature is available if:
* The benchmark is run on an architecture featuring a Performance Monitoring
Unit (PMU),
* The benchmark is compiled with support for collecting counters. Currently,
this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a
dependency via Bazel.
The feature does not require modifying benchmark code. Counter collection is
handled at the boundaries where timer collection is also handled.
To opt-in:
* If using a Bazel build, add `--define pfm=1` to your build flags
* If using CMake:
* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
* Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`.
To use, pass a comma-separated list of counter names through the
`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
mapped by libpfm to platform-specifics - see libpfm
[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
The counter values are reported back through the [User Counters](../README.md#custom-counters)
mechanism, meaning, they are available in all the formats (e.g. JSON) supported
by User Counters.

View File

@ -0,0 +1,48 @@
# Platform Specific Build Instructions
## Building with GCC
When the library is built using GCC it is necessary to link with the pthread
library due to how GCC implements `std::thread`. Failing to link to pthread will
lead to runtime exceptions (unless you're using libc++), not linker errors. See
[issue #67](https://github.com/google/benchmark/issues/67) for more details. You
can link to pthread by adding `-pthread` to your linker command. Note, you can
also use `-lpthread`, but there are potential issues with ordering of command
line parameters if you use that.
On QNX, the pthread library is part of libc and usually included automatically
(see
[`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)).
There's no separate pthread library to link.
## Building with Visual Studio 2015 or 2017
The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following:
```
// Alternatively, can add libraries using linker options.
#ifdef _WIN32
#pragma comment ( lib, "Shlwapi.lib" )
#ifdef _DEBUG
#pragma comment ( lib, "benchmarkd.lib" )
#else
#pragma comment ( lib, "benchmark.lib" )
#endif
#endif
```
Can also use the graphical version of CMake:
* Open `CMake GUI`.
* Under `Where to build the binaries`, same path as source plus `build`.
* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`.
* Click `Configure`, `Generate`, `Open Project`.
* If build fails, try deleting entire directory and starting again, or unticking options to build less.
## Building with Intel 2015 Update 1 or Intel System Studio Update 4
See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel.
## Building on Solaris
If you're running benchmarks on solaris, you'll want the kstat library linked in
too (`-lkstat`).

34
docs/python_bindings.md Normal file
View File

@ -0,0 +1,34 @@
# Building and installing Python bindings
Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and
using Google Benchmark directly in Python.
Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows.
Supported Python versions are Python 3.7 - 3.10.
To install Google Benchmark's Python bindings, run:
```bash
python -m pip install --upgrade pip # for manylinux2014 support
python -m pip install google-benchmark
```
In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual
environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html)
on how to create virtual environments.
To build a wheel directly from source, you can follow these steps:
```bash
git clone https://github.com/google/benchmark.git
cd benchmark
# create a virtual environment and activate it
python3 -m venv venv --system-site-packages
source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows
# upgrade Python's system-wide packages
python -m pip install --upgrade pip setuptools wheel
# builds the wheel and stores it in the directory "wheelhouse".
python -m pip wheel . -w wheelhouse
```
NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel,
refer to the [Bazel installation docs](https://bazel.build/install).

View File

@ -0,0 +1,13 @@
<a name="interleaving" />
# Random Interleaving
[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
technique to lower run-to-run variance. It randomly interleaves repetitions of a
microbenchmark with repetitions from other microbenchmarks in the same benchmark
test. Data shows it is able to lower run-to-run variance by
[40%](https://github.com/google/benchmark/issues/1051) on average.
To use, you mainly need to set `--benchmark_enable_random_interleaving=true`,
and optionally specify non-zero repetition count `--benchmark_repetitions=9`
and optionally decrease the per-repetition time `--benchmark_min_time=0.1`.

100
docs/reducing_variance.md Normal file
View File

@ -0,0 +1,100 @@
# Reducing Variance
<a name="disabling-cpu-frequency-scaling" />
## Disabling CPU Frequency Scaling
If you see this error:
```
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
```
you might want to disable the CPU frequency scaling while running the
benchmark, as well as consider other ways to stabilize the performance of
your system while benchmarking.
See [Reducing Variance](reducing_variance.md) for more information.
Exactly how to do this depends on the Linux distribution,
desktop environment, and installed programs. Specific details are a moving
target, so we will not attempt to exhaustively document them here.
One simple option is to use the `cpupower` program to change the
performance governor to "performance". This tool is maintained along with
the Linux kernel and provided by your distribution.
It must be run as root, like this:
```bash
sudo cpupower frequency-set --governor performance
```
After this you can verify that all CPUs are using the performance governor
by running this command:
```bash
cpupower frequency-info -o proc
```
The benchmarks you subsequently run will have less variance.
<a name="reducing-variance" />
## Reducing Variance in Benchmarks
The Linux CPU frequency governor [discussed
above](user_guide#disabling-cpu-frequency-scaling) is not the only source
of noise in benchmarks. Some, but not all, of the sources of variance
include:
1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same
speed, so running a benchmark one time and then again may give a
different result depending on which CPU it ran on.
2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and
AMD Turbo Core and Precision Boost, can temporarily change the CPU
frequency even when the using the "performance" governor on Linux.
3. Context switching between CPUs, or scheduling competition on the CPU the
benchmark is running on.
4. Intel Hyperthreading or AMD SMT causing the same issue as above.
5. Cache effects caused by code running on other CPUs.
6. Non-uniform memory architectures (NUMA).
These can cause variance in benchmarks results within a single run
(`--benchmark_repetitions=N`) or across multiple runs of the benchmark
program.
Reducing sources of variance is OS and architecture dependent, which is one
reason some companies maintain machines dedicated to performance testing.
Some of the easier and and effective ways of reducing variance on a typical
Linux workstation are:
1. Use the performance governor as [discussed
above](user_guide#disabling-cpu-frequency-scaling).
1. Disable processor boosting by:
```sh
echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
```
See the Linux kernel's
[boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt)
for more information.
2. Set the benchmark program's task affinity to a fixed cpu. For example:
```sh
taskset -c 0 ./mybenchmark
```
3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the
`/sys` file system (see the LLVM project's [Benchmarking
tips](https://llvm.org/docs/Benchmarking.html)).
4. Close other programs that do non-trivial things based on timers, such as
your web browser, desktop environment, etc.
5. Reduce the working set of your benchmark to fit within the L1 cache, but
do be aware that this may lead you to optimize for an unrelistic
situation.
Further resources on this topic:
1. The LLVM project's [Benchmarking
tips](https://llvm.org/docs/Benchmarking.html).
1. The Arch Wiki [Cpu frequency
scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page.

View File

@ -1,16 +1,41 @@
# How to release
* Make sure you're on master and synced to HEAD
* Ensure the project builds and tests run (sanity check only, obviously)
* Make sure you're on main and synced to HEAD
* Ensure the project builds and tests run
* `parallel -j0 exec ::: test/*_test` can help ensure everything at least
passes
* Prepare release notes
* `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
commits between the last annotated tag and HEAD
* Pick the most interesting.
* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel`
and the `__version__` variable in `bindings/python/google_benchmark/__init__.py`to the
release version you're creating. (This version will be used if benchmark is installed
from the archive you'll be creating in the next step.)
```
project (benchmark VERSION 1.8.0 LANGUAGES CXX)
```
```
module(name = "com_github_google_benchmark", version="1.8.0")
```
```python
# bindings/python/google_benchmark/__init__.py
# ...
__version__ = "1.8.0" # <-- change this to the release version you are creating
# ...
```
* Create a release through github's interface
* Note this will create a lightweight tag.
* Update this to an annotated tag:
* `git pull --tags`
* `git tag -a -f <tag> <tag>`
* `git push --force origin`
* `git push --force --tags origin`
* Confirm that the "Build and upload Python wheels" action runs to completion
* run it manually if it hasn't run

View File

@ -186,6 +186,146 @@ Benchmark Time CPU Time Old
This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one.
As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
### Note: Interpreting the output
Performance measurements are an art, and performance comparisons are doubly so.
Results are often noisy and don't necessarily have large absolute differences to
them, so just by visual inspection, it is not at all apparent if two
measurements are actually showing a performance change or not. It is even more
confusing with multiple benchmark repetitions.
Thankfully, what we can do, is use statistical tests on the results to determine
whether the performance has statistically-significantly changed. `compare.py`
uses [MannWhitney U
test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null
hypothesis being that there's no difference in performance.
**The below output is a summary of a benchmark comparison with statistics
provided for a multi-threaded process.**
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-----------------------------------------------------------------------------------------------------------------------------
benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27
benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77
benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77
benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0
benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0
OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0
```
--------------------------------------------
Here's a breakdown of each row:
**benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for
the statistical test comparing the performance of the process running with one
thread. A value of 0.0000 suggests a statistically significant difference in
performance. The comparison was conducted using the U Test (Mann-Whitney
U Test) with 27 repetitions for each case.
**benchmark/threads:1/process_time/real_time_mean**: This shows the relative
difference in mean execution time between two different cases. The negative
value (-0.1442) implies that the new process is faster by about 14.42%. The old
time was 90 units, while the new time is 77 units.
**benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the
relative difference in the median execution time. Again, the new process is
faster by 14.44%.
**benchmark/threads:1/process_time/real_time_stddev**: This is the relative
difference in the standard deviation of the execution time, which is a measure
of how much variation or dispersion there is from the mean. A positive value
(+0.3974) implies there is more variance in the execution time in the new
process.
**benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of
Variation. It is the ratio of the standard deviation to the mean. It provides a
standardized measure of dispersion. An increase (+0.6329) indicates more
relative variability in the new process.
**OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is
less influenced by outliers. The negative value indicates a general improvement
in the new process. However, given the values are all zero for the old and new
times, this seems to be a mistake or placeholder in the output.
-----------------------------------------
Let's first try to see what the different columns represent in the above
`compare.py` benchmarking output:
1. **Benchmark:** The name of the function being benchmarked, along with the
size of the input (after the slash).
2. **Time:** The average time per operation, across all iterations.
3. **CPU:** The average CPU time per operation, across all iterations.
4. **Iterations:** The number of iterations the benchmark was run to get a
stable estimate.
5. **Time Old and Time New:** These represent the average time it takes for a
function to run in two different scenarios or versions. For example, you
might be comparing how fast a function runs before and after you make some
changes to it.
6. **CPU Old and CPU New:** These show the average amount of CPU time that the
function uses in two different scenarios or versions. This is similar to
Time Old and Time New, but focuses on CPU usage instead of overall time.
In the comparison section, the relative differences in both time and CPU time
are displayed for each input size.
A statistically-significant difference is determined by a **p-value**, which is
a measure of the probability that the observed difference could have occurred
just by random chance. A smaller p-value indicates stronger evidence against the
null hypothesis.
**Therefore:**
1. If the p-value is less than the chosen significance level (alpha), we
reject the null hypothesis and conclude the benchmarks are significantly
different.
2. If the p-value is greater than or equal to alpha, we fail to reject the
null hypothesis and treat the two benchmarks as similar.
The result of said the statistical test is additionally communicated through color coding:
```diff
+ Green:
```
The benchmarks are _**statistically different**_. This could mean the
performance has either **significantly improved** or **significantly
deteriorated**. You should look at the actual performance numbers to see which
is the case.
```diff
- Red:
```
The benchmarks are _**statistically similar**_. This means the performance
**hasn't significantly changed**.
In statistical terms, **'green'** means we reject the null hypothesis that
there's no difference in performance, and **'red'** means we fail to reject the
null hypothesis. This might seem counter-intuitive if you're expecting 'green'
to mean 'improved performance' and 'red' to mean 'worsened performance'.
```bash
But remember, in this context:
'Success' means 'successfully finding a difference'.
'Failure' means 'failing to find a difference'.
```
Also, please note that **even if** we determine that there **is** a
statistically-significant difference between the two measurements, it does not
_necessarily_ mean that the actual benchmarks that were measured **are**
different, or vice versa, even if we determine that there is **no**
statistically-significant difference between the two measurements, it does not
necessarily mean that the actual benchmarks that were measured **are not**
different.
### U test
If there is a sufficient repetition count of the benchmarks, the tool can do

1266
docs/user_guide.md Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,47 @@
#ifndef BENCHMARK_EXPORT_H
#define BENCHMARK_EXPORT_H
#if defined(_WIN32)
#define EXPORT_ATTR __declspec(dllexport)
#define IMPORT_ATTR __declspec(dllimport)
#define NO_EXPORT_ATTR
#define DEPRECATED_ATTR __declspec(deprecated)
#else // _WIN32
#define EXPORT_ATTR __attribute__((visibility("default")))
#define IMPORT_ATTR __attribute__((visibility("default")))
#define NO_EXPORT_ATTR __attribute__((visibility("hidden")))
#define DEPRECATE_ATTR __attribute__((__deprecated__))
#endif // _WIN32
#ifdef BENCHMARK_STATIC_DEFINE
#define BENCHMARK_EXPORT
#define BENCHMARK_NO_EXPORT
#else // BENCHMARK_STATIC_DEFINE
#ifndef BENCHMARK_EXPORT
#ifdef benchmark_EXPORTS
/* We are building this library */
#define BENCHMARK_EXPORT EXPORT_ATTR
#else // benchmark_EXPORTS
/* We are using this library */
#define BENCHMARK_EXPORT IMPORT_ATTR
#endif // benchmark_EXPORTS
#endif // !BENCHMARK_EXPORT
#ifndef BENCHMARK_NO_EXPORT
#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR
#endif // !BENCHMARK_NO_EXPORT
#endif // BENCHMARK_STATIC_DEFINE
#ifndef BENCHMARK_DEPRECATED
#define BENCHMARK_DEPRECATED DEPRECATE_ATTR
#endif // BENCHMARK_DEPRECATED
#ifndef BENCHMARK_DEPRECATED_EXPORT
#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED
#endif // BENCHMARK_DEPRECATED_EXPORT
#ifndef BENCHMARK_DEPRECATED_NO_EXPORT
#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED
#endif // BENCHMARK_DEPRECATED_EXPORT
#endif /* BENCHMARK_EXPORT_H */

50
pyproject.toml Normal file
View File

@ -0,0 +1,50 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "google_benchmark"
description = "A library to benchmark code snippets."
requires-python = ">=3.8"
license = {file = "LICENSE"}
keywords = ["benchmark"]
authors = [
{name = "Google", email = "benchmark-discuss@googlegroups.com"},
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Topic :: Software Development :: Testing",
"Topic :: System :: Benchmark",
]
dynamic = ["readme", "version"]
dependencies = [
"absl-py>=0.7.1",
]
[project.urls]
Homepage = "https://github.com/google/benchmark"
Documentation = "https://github.com/google/benchmark/tree/main/docs"
Repository = "https://github.com/google/benchmark.git"
Discord = "https://discord.gg/cz7UX7wKC2"
[tool.setuptools]
package-dir = {"" = "bindings/python"}
zip-safe = false
[tool.setuptools.packages.find]
where = ["bindings/python"]
[tool.setuptools.dynamic]
version = { attr = "google_benchmark.__version__" }
readme = { file = "README.md", content-type = "text/markdown" }

178
setup.py
View File

@ -1,125 +1,113 @@
import contextlib
import os
import posixpath
import re
import platform
import shutil
import sys
import sysconfig
from pathlib import Path
from distutils import sysconfig
import setuptools
from setuptools.command import build_ext
here = os.path.dirname(os.path.abspath(__file__))
PYTHON_INCLUDE_PATH_PLACEHOLDER = "<PYTHON_INCLUDE_PATH>"
IS_WINDOWS = platform.system() == "Windows"
IS_MAC = platform.system() == "Darwin"
IS_WINDOWS = sys.platform.startswith('win')
def _get_version():
"""Parse the version string from __init__.py."""
with open(os.path.join(here, 'bindings', 'python', 'google_benchmark', '__init__.py')) as f:
try:
version_line = next(
line for line in f if line.startswith('__version__'))
except StopIteration:
raise ValueError('__version__ not defined in __init__.py')
else:
ns = {}
exec(version_line, ns) # pylint: disable=exec-used
return ns['__version__']
def _parse_requirements(path):
with open(os.path.join(here, path)) as f:
return [
line.rstrip() for line in f
if not (line.isspace() or line.startswith('#'))
]
@contextlib.contextmanager
def temp_fill_include_path(fp: str):
"""Temporarily set the Python include path in a file."""
with open(fp, "r+") as f:
try:
content = f.read()
replaced = content.replace(
PYTHON_INCLUDE_PATH_PLACEHOLDER,
Path(sysconfig.get_paths()['include']).as_posix(),
)
f.seek(0)
f.write(replaced)
f.truncate()
yield
finally:
# revert to the original content after exit
f.seek(0)
f.write(content)
f.truncate()
class BazelExtension(setuptools.Extension):
"""A C/C++ extension that is defined as a Bazel BUILD target."""
"""A C/C++ extension that is defined as a Bazel BUILD target."""
def __init__(self, name, bazel_target):
self.bazel_target = bazel_target
self.relpath, self.target_name = (
posixpath.relpath(bazel_target, '//').split(':'))
setuptools.Extension.__init__(self, name, sources=[])
def __init__(self, name: str, bazel_target: str):
super().__init__(name=name, sources=[])
self.bazel_target = bazel_target
stripped_target = bazel_target.split("//")[-1]
self.relpath, self.target_name = stripped_target.split(":")
class BuildBazelExtension(build_ext.build_ext):
"""A command that runs Bazel to build a C/C++ extension."""
"""A command that runs Bazel to build a C/C++ extension."""
def run(self):
for ext in self.extensions:
self.bazel_build(ext)
build_ext.build_ext.run(self)
def run(self):
for ext in self.extensions:
self.bazel_build(ext)
build_ext.build_ext.run(self)
def bazel_build(self, ext):
with open('WORKSPACE', 'r') as f:
workspace_contents = f.read()
def bazel_build(self, ext: BazelExtension):
"""Runs the bazel build to create the package."""
with temp_fill_include_path("WORKSPACE"):
temp_path = Path(self.build_temp)
with open('WORKSPACE', 'w') as f:
f.write(re.sub(
r'(?<=path = ").*(?=", # May be overwritten by setup\.py\.)',
sysconfig.get_python_inc().replace(os.path.sep, posixpath.sep),
workspace_contents))
bazel_argv = [
"bazel",
"build",
ext.bazel_target,
f"--symlink_prefix={temp_path / 'bazel-'}",
f"--compilation_mode={'dbg' if self.debug else 'opt'}",
# C++17 is required by nanobind
f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
]
if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)
if IS_WINDOWS:
# Link with python*.lib.
for library_dir in self.library_dirs:
bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
elif IS_MAC:
if platform.machine() == "x86_64":
# C++17 needs macOS 10.14 at minimum
bazel_argv.append("--macos_minimum_os=10.14")
bazel_argv = [
'bazel',
'build',
ext.bazel_target,
'--symlink_prefix=' + os.path.join(self.build_temp, 'bazel-'),
'--compilation_mode=' + ('dbg' if self.debug else 'opt'),
]
# cross-compilation for Mac ARM64 on GitHub Mac x86 runners.
# ARCHFLAGS is set by cibuildwheel before macOS wheel builds.
archflags = os.getenv("ARCHFLAGS", "")
if "arm64" in archflags:
bazel_argv.append("--cpu=darwin_arm64")
bazel_argv.append("--macos_cpus=arm64")
if IS_WINDOWS:
# Link with python*.lib.
for library_dir in self.library_dirs:
bazel_argv.append('--linkopt=/LIBPATH:' + library_dir)
elif platform.machine() == "arm64":
bazel_argv.append("--macos_minimum_os=11.0")
self.spawn(bazel_argv)
self.spawn(bazel_argv)
shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
ext_bazel_bin_path = os.path.join(
self.build_temp, 'bazel-bin',
ext.relpath, ext.target_name + shared_lib_suffix)
ext_dest_path = self.get_ext_fullpath(ext.name)
ext_dest_dir = os.path.dirname(ext_dest_path)
if not os.path.exists(ext_dest_dir):
os.makedirs(ext_dest_dir)
shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
shared_lib_suffix = '.dll' if IS_WINDOWS else '.so'
ext_name = ext.target_name + shared_lib_suffix
ext_bazel_bin_path = temp_path / 'bazel-bin' / ext.relpath / ext_name
ext_dest_path = Path(self.get_ext_fullpath(ext.name))
shutil.copyfile(ext_bazel_bin_path, ext_dest_path)
# explicitly call `bazel shutdown` for graceful exit
self.spawn(["bazel", "shutdown"])
setuptools.setup(
name='google_benchmark',
version=_get_version(),
url='https://github.com/google/benchmark',
description='A library to benchmark code snippets.',
author='Google',
author_email='benchmark-py@google.com',
# Contained modules and scripts.
package_dir={'': 'bindings/python'},
packages=setuptools.find_packages('bindings/python'),
install_requires=_parse_requirements('bindings/python/requirements.txt'),
cmdclass=dict(build_ext=BuildBazelExtension),
ext_modules=[BazelExtension('google_benchmark._benchmark', '//bindings/python/google_benchmark:_benchmark')],
zip_safe=False,
# PyPI package information.
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Topic :: Software Development :: Testing',
'Topic :: System :: Benchmark',
ext_modules=[
BazelExtension(
name="google_benchmark._benchmark",
bazel_target="//bindings/python/google_benchmark:_benchmark",
)
],
license='Apache 2.0',
keywords='benchmark',
)

View File

@ -25,32 +25,42 @@ set_target_properties(benchmark PROPERTIES
SOVERSION ${GENERIC_LIB_SOVERSION}
)
target_include_directories(benchmark PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
)
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
)
# libpfm, if available
if (PFM_FOUND)
target_link_libraries(benchmark PRIVATE PFM::libpfm)
target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
endif()
# pthread affinity, if available
if(HAVE_PTHREAD_AFFINITY)
target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY)
endif()
# Link threads.
target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
find_library(LIBRT rt)
if(LIBRT)
target_link_libraries(benchmark ${LIBRT})
endif()
target_link_libraries(benchmark PRIVATE Threads::Threads)
target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES})
if(HAVE_LIB_RT)
target_link_libraries(benchmark PRIVATE rt)
endif(HAVE_LIB_RT)
if(CMAKE_BUILD_TYPE)
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER)
endif()
if(NOT CMAKE_THREAD_LIBS_INIT AND "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}" MATCHES ".*-fsanitize=[^ ]*address.*")
message(WARNING "CMake's FindThreads.cmake did not fail, but CMAKE_THREAD_LIBS_INIT ended up being empty. This was fixed in https://github.com/Kitware/CMake/commit/d53317130e84898c5328c237186dbd995aaf1c12 Let's guess that -pthread is sufficient.")
target_link_libraries(benchmark -pthread)
endif()
# We need extra libraries on Windows
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
target_link_libraries(benchmark shlwapi)
target_link_libraries(benchmark PRIVATE shlwapi)
endif()
# We need extra libraries on Solaris
if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
target_link_libraries(benchmark kstat)
target_link_libraries(benchmark PRIVATE kstat)
endif()
if (NOT BUILD_SHARED_LIBS)
target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE)
endif()
# Benchmark main library
@ -60,34 +70,45 @@ set_target_properties(benchmark_main PROPERTIES
OUTPUT_NAME "benchmark_main"
VERSION ${GENERIC_LIB_VERSION}
SOVERSION ${GENERIC_LIB_SOVERSION}
DEFINE_SYMBOL benchmark_EXPORTS
)
target_include_directories(benchmark PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
)
target_link_libraries(benchmark_main benchmark::benchmark)
target_link_libraries(benchmark_main PUBLIC benchmark::benchmark)
set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
set(generated_dir "${PROJECT_BINARY_DIR}")
set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
set(targets_to_export benchmark benchmark_main)
set(targets_export_name "${PROJECT_NAME}Targets")
set(namespace "${PROJECT_NAME}::")
include(CMakePackageConfigHelpers)
configure_package_config_file (
${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in
${project_config}
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
NO_SET_AND_CHECK_MACRO
NO_CHECK_REQUIRED_COMPONENTS_MACRO
)
write_basic_package_version_file(
"${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion
)
configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
export (
TARGETS ${targets_to_export}
NAMESPACE "${namespace}"
FILE ${generated_dir}/${targets_export_name}.cmake
)
if (BENCHMARK_ENABLE_INSTALL)
# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
install(
TARGETS benchmark benchmark_main
TARGETS ${targets_to_export}
EXPORT ${targets_export_name}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
@ -96,6 +117,7 @@ if (BENCHMARK_ENABLE_INSTALL)
install(
DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
"${PROJECT_BINARY_DIR}/include/benchmark"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING PATTERN "*.*h")
@ -112,3 +134,37 @@ if (BENCHMARK_ENABLE_INSTALL)
NAMESPACE "${namespace}"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
endif()
if (BENCHMARK_ENABLE_DOXYGEN)
find_package(Doxygen REQUIRED)
set(DOXYGEN_QUIET YES)
set(DOXYGEN_RECURSIVE YES)
set(DOXYGEN_GENERATE_HTML YES)
set(DOXYGEN_GENERATE_MAN NO)
set(DOXYGEN_MARKDOWN_SUPPORT YES)
set(DOXYGEN_BUILTIN_STL_SUPPORT YES)
set(DOXYGEN_EXTRACT_PACKAGE YES)
set(DOXYGEN_EXTRACT_STATIC YES)
set(DOXYGEN_SHOW_INCLUDE_FILES YES)
set(DOXYGEN_BINARY_TOC YES)
set(DOXYGEN_TOC_EXPAND YES)
set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md")
doxygen_add_docs(benchmark_doxygen
docs
include
src
ALL
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "Building documentation with Doxygen.")
if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
install(
DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/"
DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif()
else()
if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
install(
DIRECTORY "${PROJECT_SOURCE_DIR}/docs/"
DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif()
endif()

View File

@ -13,12 +13,13 @@
// limitations under the License.
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "benchmark_runner.h"
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
#ifndef BENCHMARK_OS_FUCHSIA
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@ -32,7 +33,10 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <limits>
#include <map>
#include <memory>
#include <random>
#include <string>
#include <thread>
#include <utility>
@ -45,94 +49,146 @@
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
#include "thread_manager.h"
#include "thread_timer.h"
namespace benchmark {
// Print a list of benchmarks. This option overrides all other options.
DEFINE_bool(benchmark_list_tests, false);
BM_DEFINE_bool(benchmark_list_tests, false);
// A regular expression that specifies the set of benchmarks to execute. If
// this flag is empty, or if this flag is the string \"all\", all benchmarks
// linked into the binary are run.
DEFINE_string(benchmark_filter, ".");
BM_DEFINE_string(benchmark_filter, "");
// Minimum number of seconds we should run benchmark before results are
// considered significant. For cpu-time based tests, this is the lower bound
// Specification of how long to run the benchmark.
//
// It can be either an exact number of iterations (specified as `<integer>x`),
// or a minimum number of seconds (specified as `<float>s`). If the latter
// format (ie., min seconds) is used, the system may run the benchmark longer
// until the results are considered significant.
//
// For backward compatibility, the `s` suffix may be omitted, in which case,
// the specified number is interpreted as the number of seconds.
//
// For cpu-time based tests, this is the lower bound
// on the total cpu time used by all threads that make up the test. For
// real-time based tests, this is the lower bound on the elapsed time of the
// benchmark execution, regardless of number of threads.
DEFINE_double(benchmark_min_time, 0.5);
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
// Minimum number of seconds a benchmark should be run before results should be
// taken into account. This e.g can be necessary for benchmarks of code which
// needs to fill some form of cache before performance is of interest.
// Note: results gathered within this period are discarded and not used for
// reported result.
BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
// The number of runs of each benchmark. If greater than 1, the mean and
// standard deviation of the runs will be reported.
DEFINE_int32(benchmark_repetitions, 1);
BM_DEFINE_int32(benchmark_repetitions, 1);
// If set, enable random interleaving of repetitions of all benchmarks.
// See http://github.com/google/benchmark/issues/1051 for details.
BM_DEFINE_bool(benchmark_enable_random_interleaving, false);
// Report the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are reported for
// repeated benchmarks. Affects all reporters.
DEFINE_bool(benchmark_report_aggregates_only, false);
BM_DEFINE_bool(benchmark_report_aggregates_only, false);
// Display the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are displayed for
// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects
// the display reporter, but *NOT* file reporter, which will still contain
// all the output.
DEFINE_bool(benchmark_display_aggregates_only, false);
BM_DEFINE_bool(benchmark_display_aggregates_only, false);
// The format to use for console output.
// Valid values are 'console', 'json', or 'csv'.
DEFINE_string(benchmark_format, "console");
BM_DEFINE_string(benchmark_format, "console");
// The format to use for file output.
// Valid values are 'console', 'json', or 'csv'.
DEFINE_string(benchmark_out_format, "json");
BM_DEFINE_string(benchmark_out_format, "json");
// The file to write additional output to.
DEFINE_string(benchmark_out, "");
BM_DEFINE_string(benchmark_out, "");
// Whether to use colors in the output. Valid values:
// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if
// the output is being sent to a terminal and the TERM environment variable is
// set to a terminal type that supports colors.
DEFINE_string(benchmark_color, "auto");
BM_DEFINE_string(benchmark_color, "auto");
// Whether to use tabular format when printing user counters to the console.
// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false.
DEFINE_bool(benchmark_counters_tabular, false);
BM_DEFINE_bool(benchmark_counters_tabular, false);
// List of additional perf counters to collect, in libpfm format. For more
// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
BM_DEFINE_string(benchmark_perf_counters, "");
// Extra context to include in the output formatted as comma-separated key-value
// pairs. Kept internal as it's only used for parsing from env/command line.
BM_DEFINE_kvpairs(benchmark_context, {});
// Set the default time unit to use for reports
// Valid values are 'ns', 'us', 'ms' or 's'
BM_DEFINE_string(benchmark_time_unit, "");
// The level of verbose logging to output
DEFINE_int32(v, 0);
namespace benchmark {
BM_DEFINE_int32(v, 0);
namespace internal {
std::map<std::string, std::string>* global_context = nullptr;
BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
return global_context;
}
// FIXME: wouldn't LTO mess this up?
void UseCharPointer(char const volatile*) {}
} // namespace internal
State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
int thread_i, int n_threads, internal::ThreadTimer* timer,
internal::ThreadManager* manager)
State::State(std::string name, IterationCount max_iters,
const std::vector<int64_t>& ranges, int thread_i, int n_threads,
internal::ThreadTimer* timer, internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement)
: total_iterations_(0),
batch_leftover_(0),
max_iterations(max_iters),
started_(false),
finished_(false),
error_occurred_(false),
skipped_(internal::NotSkipped),
range_(ranges),
complexity_n_(0),
counters(),
thread_index(thread_i),
threads(n_threads),
name_(std::move(name)),
thread_index_(thread_i),
threads_(n_threads),
timer_(timer),
manager_(manager) {
CHECK(max_iterations != 0) << "At least one iteration must be run";
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
manager_(manager),
perf_counters_measurement_(perf_counters_measurement) {
BM_CHECK(max_iterations != 0) << "At least one iteration must be run";
BM_CHECK_LT(thread_index_, threads_)
<< "thread_index must be less than threads";
// Add counters with correct flag now. If added with `counters[name]` in
// `PauseTiming`, a new `Counter` will be inserted the first time, which
// won't have the flag. Inserting them now also reduces the allocations
// during the benchmark.
if (perf_counters_measurement_) {
for (const std::string& counter_name :
perf_counters_measurement_->names()) {
counters[counter_name] = Counter(0.0, Counter::kAvgIterations);
}
}
// Note: The use of offsetof below is technically undefined until C++17
// because State is not a standard layout type. However, all compilers
@ -146,38 +202,79 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
#elif defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Winvalid-offsetof"
#endif
#if defined(__NVCC__)
#pragma nv_diagnostic push
#pragma nv_diag_suppress 1427
#endif
#if defined(__NVCOMPILER)
#pragma diagnostic push
#pragma diag_suppress offset_in_non_POD_nonstandard
#endif
// Offset tests to ensure commonly accessed data is on the first cache line.
const int cache_line_size = 64;
static_assert(offsetof(State, error_occurred_) <=
(cache_line_size - sizeof(error_occurred_)),
"");
static_assert(
offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), "");
#if defined(__INTEL_COMPILER)
#pragma warning pop
#elif defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
#if defined(__NVCC__)
#pragma nv_diagnostic pop
#endif
#if defined(__NVCOMPILER)
#pragma diagnostic pop
#endif
}
void State::PauseTiming() {
// Add in time accumulated so far
CHECK(started_ && !finished_ && !error_occurred_);
BM_CHECK(started_ && !finished_ && !skipped());
timer_->StopTimer();
if (perf_counters_measurement_) {
std::vector<std::pair<std::string, double>> measurements;
if (!perf_counters_measurement_->Stop(measurements)) {
BM_CHECK(false) << "Perf counters read the value failed.";
}
for (const auto& name_and_measurement : measurements) {
const std::string& name = name_and_measurement.first;
const double measurement = name_and_measurement.second;
// Counter was inserted with `kAvgIterations` flag by the constructor.
assert(counters.find(name) != counters.end());
counters[name].value += measurement;
}
}
}
void State::ResumeTiming() {
CHECK(started_ && !finished_ && !error_occurred_);
BM_CHECK(started_ && !finished_ && !skipped());
timer_->StartTimer();
if (perf_counters_measurement_) {
perf_counters_measurement_->Start();
}
}
void State::SkipWithError(const char* msg) {
CHECK(msg);
error_occurred_ = true;
void State::SkipWithMessage(const std::string& msg) {
skipped_ = internal::SkippedWithMessage;
{
MutexLock l(manager_->GetBenchmarkMutex());
if (manager_->results.has_error_ == false) {
manager_->results.error_message_ = msg;
manager_->results.has_error_ = true;
if (internal::NotSkipped == manager_->results.skipped_) {
manager_->results.skip_message_ = msg;
manager_->results.skipped_ = skipped_;
}
}
total_iterations_ = 0;
if (timer_->running()) timer_->StopTimer();
}
void State::SkipWithError(const std::string& msg) {
skipped_ = internal::SkippedWithError;
{
MutexLock l(manager_->GetBenchmarkMutex());
if (internal::NotSkipped == manager_->results.skipped_) {
manager_->results.skip_message_ = msg;
manager_->results.skipped_ = skipped_;
}
}
total_iterations_ = 0;
@ -188,22 +285,22 @@ void State::SetIterationTime(double seconds) {
timer_->SetIterationTime(seconds);
}
void State::SetLabel(const char* label) {
void State::SetLabel(const std::string& label) {
MutexLock l(manager_->GetBenchmarkMutex());
manager_->results.report_label_ = label;
}
void State::StartKeepRunning() {
CHECK(!started_ && !finished_);
BM_CHECK(!started_ && !finished_);
started_ = true;
total_iterations_ = error_occurred_ ? 0 : max_iterations;
total_iterations_ = skipped() ? 0 : max_iterations;
manager_->StartStopBarrier();
if (!error_occurred_) ResumeTiming();
if (!skipped()) ResumeTiming();
}
void State::FinishKeepRunning() {
CHECK(started_ && (!finished_ || error_occurred_));
if (!error_occurred_) {
BM_CHECK(started_ && (!finished_ || skipped()));
if (!skipped()) {
PauseTiming();
}
// Total iterations has now wrapped around past 0. Fix this.
@ -215,11 +312,42 @@ void State::FinishKeepRunning() {
namespace internal {
namespace {
// Flushes streams after invoking reporter methods that write to them. This
// ensures users get timely updates even when streams are not line-buffered.
void FlushStreams(BenchmarkReporter* reporter) {
if (!reporter) return;
std::flush(reporter->GetOutputStream());
std::flush(reporter->GetErrorStream());
}
// Reports in both display and file reporters.
void Report(BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter, const RunResults& run_results) {
auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only,
const RunResults& results) {
assert(reporter);
// If there are no aggregates, do output non-aggregates.
aggregates_only &= !results.aggregates_only.empty();
if (!aggregates_only) reporter->ReportRuns(results.non_aggregates);
if (!results.aggregates_only.empty())
reporter->ReportRuns(results.aggregates_only);
};
report_one(display_reporter, run_results.display_report_aggregates_only,
run_results);
if (file_reporter)
report_one(file_reporter, run_results.file_report_aggregates_only,
run_results);
FlushStreams(display_reporter);
FlushStreams(file_reporter);
}
void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter) {
// Note the file_reporter can be null.
CHECK(display_reporter != nullptr);
BM_CHECK(display_reporter != nullptr);
// Determine the width of the name field using a minimum width of 10.
bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
@ -227,10 +355,10 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
size_t stat_field_width = 0;
for (const BenchmarkInstance& benchmark : benchmarks) {
name_field_width =
std::max<size_t>(name_field_width, benchmark.name.str().size());
might_have_aggregates |= benchmark.repetitions > 1;
std::max<size_t>(name_field_width, benchmark.name().str().size());
might_have_aggregates |= benchmark.repetitions() > 1;
for (const auto& Stat : *benchmark.statistics)
for (const auto& Stat : benchmark.statistics())
stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
}
if (might_have_aggregates) name_field_width += 1 + stat_field_width;
@ -239,75 +367,129 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
BenchmarkReporter::Context context;
context.name_field_width = name_field_width;
// Keep track of running times of all instances of current benchmark
std::vector<BenchmarkReporter::Run> complexity_reports;
// We flush streams after invoking reporter methods that write to them. This
// ensures users get timely updates even when streams are not line-buffered.
auto flushStreams = [](BenchmarkReporter* reporter) {
if (!reporter) return;
std::flush(reporter->GetOutputStream());
std::flush(reporter->GetErrorStream());
};
// Keep track of running times of all instances of each benchmark family.
std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
per_family_reports;
if (display_reporter->ReportContext(context) &&
(!file_reporter || file_reporter->ReportContext(context))) {
flushStreams(display_reporter);
flushStreams(file_reporter);
FlushStreams(display_reporter);
FlushStreams(file_reporter);
for (const auto& benchmark : benchmarks) {
RunResults run_results = RunBenchmark(benchmark, &complexity_reports);
size_t num_repetitions_total = 0;
auto report = [&run_results](BenchmarkReporter* reporter,
bool report_aggregates_only) {
assert(reporter);
// If there are no aggregates, do output non-aggregates.
report_aggregates_only &= !run_results.aggregates_only.empty();
if (!report_aggregates_only)
reporter->ReportRuns(run_results.non_aggregates);
if (!run_results.aggregates_only.empty())
reporter->ReportRuns(run_results.aggregates_only);
};
// This perfcounters object needs to be created before the runners vector
// below so it outlasts their lifetime.
PerfCountersMeasurement perfcounters(
StrSplit(FLAGS_benchmark_perf_counters, ','));
report(display_reporter, run_results.display_report_aggregates_only);
// Vector of benchmarks to run
std::vector<internal::BenchmarkRunner> runners;
runners.reserve(benchmarks.size());
// Count the number of benchmarks with threads to warn the user in case
// performance counters are used.
int benchmarks_with_threads = 0;
// Loop through all benchmarks
for (const BenchmarkInstance& benchmark : benchmarks) {
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
if (benchmark.complexity() != oNone)
reports_for_family = &per_family_reports[benchmark.family_index()];
benchmarks_with_threads += (benchmark.threads() > 1);
runners.emplace_back(benchmark, &perfcounters, reports_for_family);
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
num_repetitions_total += num_repeats_of_this_instance;
if (reports_for_family)
reports_for_family->num_runs_total += num_repeats_of_this_instance;
}
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
// The use of performance counters with threads would be unintuitive for
// the average user so we need to warn them about this case
if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
GetErrorLogInstance()
<< "***WARNING*** There are " << benchmarks_with_threads
<< " benchmarks with threads and " << perfcounters.num_counters()
<< " performance counters were requested. Beware counters will "
"reflect the combined usage across all "
"threads.\n";
}
std::vector<size_t> repetition_indices;
repetition_indices.reserve(num_repetitions_total);
for (size_t runner_index = 0, num_runners = runners.size();
runner_index != num_runners; ++runner_index) {
const internal::BenchmarkRunner& runner = runners[runner_index];
std::fill_n(std::back_inserter(repetition_indices),
runner.GetNumRepeats(), runner_index);
}
assert(repetition_indices.size() == num_repetitions_total &&
"Unexpected number of repetition indexes.");
if (FLAGS_benchmark_enable_random_interleaving) {
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
}
for (size_t repetition_index : repetition_indices) {
internal::BenchmarkRunner& runner = runners[repetition_index];
runner.DoOneRepetition();
if (runner.HasRepeatsRemaining()) continue;
// FIXME: report each repetition separately, not all of them in bulk.
display_reporter->ReportRunsConfig(
runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
if (file_reporter)
report(file_reporter, run_results.file_report_aggregates_only);
file_reporter->ReportRunsConfig(
runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
flushStreams(display_reporter);
flushStreams(file_reporter);
RunResults run_results = runner.GetResults();
// Maybe calculate complexity report
if (const auto* reports_for_family = runner.GetReportsForFamily()) {
if (reports_for_family->num_runs_done ==
reports_for_family->num_runs_total) {
auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
per_family_reports.erase(
static_cast<int>(reports_for_family->Runs.front().family_index));
}
}
Report(display_reporter, file_reporter, run_results);
}
}
display_reporter->Finalize();
if (file_reporter) file_reporter->Finalize();
flushStreams(display_reporter);
flushStreams(file_reporter);
FlushStreams(display_reporter);
FlushStreams(file_reporter);
}
// Disable deprecated warnings temporarily because we need to reference
// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
BENCHMARK_DISABLE_DEPRECATED_WARNING
std::unique_ptr<BenchmarkReporter> CreateReporter(
std::string const& name, ConsoleReporter::OutputOptions output_opts) {
typedef std::unique_ptr<BenchmarkReporter> PtrType;
if (name == "console") {
return PtrType(new ConsoleReporter(output_opts));
} else if (name == "json") {
return PtrType(new JSONReporter);
} else if (name == "csv") {
return PtrType(new CSVReporter);
} else {
std::cerr << "Unexpected format: '" << name << "'\n";
std::exit(1);
}
if (name == "json") {
return PtrType(new JSONReporter());
}
if (name == "csv") {
return PtrType(new CSVReporter());
}
std::cerr << "Unexpected format: '" << name << "'\n";
std::exit(1);
}
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
BENCHMARK_RESTORE_DEPRECATED_WARNING
} // end namespace
@ -341,17 +523,41 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
} // end namespace internal
BenchmarkReporter* CreateDefaultDisplayReporter() {
static auto default_display_reporter =
internal::CreateReporter(FLAGS_benchmark_format,
internal::GetOutputOptions())
.release();
return default_display_reporter;
}
size_t RunSpecifiedBenchmarks() {
return RunSpecifiedBenchmarks(nullptr, nullptr);
return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter);
}
size_t RunSpecifiedBenchmarks(std::string spec) {
return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec));
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) {
return RunSpecifiedBenchmarks(display_reporter, nullptr);
return RunSpecifiedBenchmarks(display_reporter, nullptr,
FLAGS_benchmark_filter);
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
std::string spec) {
return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec));
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter) {
std::string spec = FLAGS_benchmark_filter;
return RunSpecifiedBenchmarks(display_reporter, file_reporter,
FLAGS_benchmark_filter);
}
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
BenchmarkReporter* file_reporter,
std::string spec) {
if (spec.empty() || spec == "all")
spec = "."; // Regexp that matches all benchmarks
@ -360,8 +566,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
std::unique_ptr<BenchmarkReporter> default_display_reporter;
std::unique_ptr<BenchmarkReporter> default_file_reporter;
if (!display_reporter) {
default_display_reporter = internal::CreateReporter(
FLAGS_benchmark_format, internal::GetOutputOptions());
default_display_reporter.reset(CreateDefaultDisplayReporter());
display_reporter = default_display_reporter.get();
}
auto& Out = display_reporter->GetOutputStream();
@ -377,12 +582,14 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
if (!fname.empty()) {
output_file.open(fname);
if (!output_file.is_open()) {
Err << "invalid file name: '" << fname << std::endl;
Err << "invalid file name: '" << fname << "'" << std::endl;
std::exit(1);
}
if (!file_reporter) {
default_file_reporter = internal::CreateReporter(
FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular
? ConsoleReporter::OO_Tabular
: ConsoleReporter::OO_None);
file_reporter = default_file_reporter.get();
}
file_reporter->SetOutputStream(&output_file);
@ -399,7 +606,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
if (FLAGS_benchmark_list_tests) {
for (auto const& benchmark : benchmarks)
Out << benchmark.name.str() << "\n";
Out << benchmark.name().str() << "\n";
} else {
internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
}
@ -407,30 +614,64 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
return benchmarks.size();
}
namespace {
// stores the time unit benchmarks use by default
TimeUnit default_time_unit = kNanosecond;
} // namespace
TimeUnit GetDefaultTimeUnit() { return default_time_unit; }
void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; }
std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; }
void SetBenchmarkFilter(std::string value) {
FLAGS_benchmark_filter = std::move(value);
}
int32_t GetBenchmarkVerbosity() { return FLAGS_v; }
void RegisterMemoryManager(MemoryManager* manager) {
internal::memory_manager = manager;
}
void AddCustomContext(const std::string& key, const std::string& value) {
if (internal::global_context == nullptr) {
internal::global_context = new std::map<std::string, std::string>();
}
if (!internal::global_context->emplace(key, value).second) {
std::cerr << "Failed to add custom context \"" << key << "\" as it already "
<< "exists with value \"" << value << "\"\n";
}
}
namespace internal {
void (*HelperPrintf)();
void PrintUsageAndExit() {
fprintf(stdout,
"benchmark"
" [--benchmark_list_tests={true|false}]\n"
" [--benchmark_filter=<regex>]\n"
" [--benchmark_min_time=<min_time>]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--benchmark_report_aggregates_only={true|false}]\n"
" [--benchmark_display_aggregates_only={true|false}]\n"
" [--benchmark_format=<console|json|csv>]\n"
" [--benchmark_out=<filename>]\n"
" [--benchmark_out_format=<json|console|csv>]\n"
" [--benchmark_color={auto|true|false}]\n"
" [--benchmark_counters_tabular={true|false}]\n"
" [--v=<verbosity>]\n");
HelperPrintf();
exit(0);
}
void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
if (time_unit_flag == "s") {
return SetDefaultTimeUnit(kSecond);
}
if (time_unit_flag == "ms") {
return SetDefaultTimeUnit(kMillisecond);
}
if (time_unit_flag == "us") {
return SetDefaultTimeUnit(kMicrosecond);
}
if (time_unit_flag == "ns") {
return SetDefaultTimeUnit(kNanosecond);
}
if (!time_unit_flag.empty()) {
PrintUsageAndExit();
}
}
void ParseCommandLineFlags(int* argc, char** argv) {
using namespace benchmark;
BenchmarkReporter::Context::executable_name =
@ -439,10 +680,14 @@ void ParseCommandLineFlags(int* argc, char** argv) {
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
&FLAGS_benchmark_list_tests) ||
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
ParseDoubleFlag(argv[i], "benchmark_min_time",
ParseStringFlag(argv[i], "benchmark_min_time",
&FLAGS_benchmark_min_time) ||
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
&FLAGS_benchmark_min_warmup_time) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) ||
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
&FLAGS_benchmark_enable_random_interleaving) ||
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
&FLAGS_benchmark_report_aggregates_only) ||
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
@ -452,11 +697,14 @@ void ParseCommandLineFlags(int* argc, char** argv) {
ParseStringFlag(argv[i], "benchmark_out_format",
&FLAGS_benchmark_out_format) ||
ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
// "color_print" is the deprecated name for "benchmark_color".
// TODO: Remove this.
ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
&FLAGS_benchmark_counters_tabular) ||
ParseStringFlag(argv[i], "benchmark_perf_counters",
&FLAGS_benchmark_perf_counters) ||
ParseKeyValueFlag(argv[i], "benchmark_context",
&FLAGS_benchmark_context) ||
ParseStringFlag(argv[i], "benchmark_time_unit",
&FLAGS_benchmark_time_unit) ||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
@ -467,13 +715,18 @@ void ParseCommandLineFlags(int* argc, char** argv) {
}
}
for (auto const* flag :
{&FLAGS_benchmark_format, &FLAGS_benchmark_out_format})
{&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) {
if (*flag != "console" && *flag != "json" && *flag != "csv") {
PrintUsageAndExit();
}
}
SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit);
if (FLAGS_benchmark_color.empty()) {
PrintUsageAndExit();
}
for (const auto& kv : FLAGS_benchmark_context) {
AddCustomContext(kv.first, kv.second);
}
}
int InitializeStreams() {
@ -483,11 +736,38 @@ int InitializeStreams() {
} // end namespace internal
void Initialize(int* argc, char** argv) {
void PrintDefaultHelp() {
fprintf(stdout,
"benchmark"
" [--benchmark_list_tests={true|false}]\n"
" [--benchmark_filter=<regex>]\n"
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--benchmark_enable_random_interleaving={true|false}]\n"
" [--benchmark_report_aggregates_only={true|false}]\n"
" [--benchmark_display_aggregates_only={true|false}]\n"
" [--benchmark_format=<console|json|csv>]\n"
" [--benchmark_out=<filename>]\n"
" [--benchmark_out_format=<json|console|csv>]\n"
" [--benchmark_color={auto|true|false}]\n"
" [--benchmark_counters_tabular={true|false}]\n"
#if defined HAVE_LIBPFM
" [--benchmark_perf_counters=<counter>,...]\n"
#endif
" [--benchmark_context=<key>=<value>,...]\n"
" [--benchmark_time_unit={ns|us|ms|s}]\n"
" [--v=<verbosity>]\n");
}
void Initialize(int* argc, char** argv, void (*HelperPrintf)()) {
internal::HelperPrintf = HelperPrintf;
internal::ParseCommandLineFlags(argc, argv);
internal::LogLevel() = FLAGS_v;
}
void Shutdown() { delete internal::global_context; }
bool ReportUnrecognizedArguments(int argc, char** argv) {
for (int i = 1; i < argc; ++i) {
fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0],

View File

@ -1,15 +1,118 @@
#include "benchmark_api_internal.h"
#include <cinttypes>
#include "string_util.h"
namespace benchmark {
namespace internal {
State BenchmarkInstance::Run(IterationCount iters, int thread_id,
internal::ThreadTimer* timer,
internal::ThreadManager* manager) const {
State st(iters, arg, thread_id, threads, timer, manager);
benchmark->Run(st);
BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
int per_family_instance_idx,
const std::vector<int64_t>& args,
int thread_count)
: benchmark_(*benchmark),
family_index_(family_idx),
per_family_instance_index_(per_family_instance_idx),
aggregation_report_mode_(benchmark_.aggregation_report_mode_),
args_(args),
time_unit_(benchmark_.GetTimeUnit()),
measure_process_cpu_time_(benchmark_.measure_process_cpu_time_),
use_real_time_(benchmark_.use_real_time_),
use_manual_time_(benchmark_.use_manual_time_),
complexity_(benchmark_.complexity_),
complexity_lambda_(benchmark_.complexity_lambda_),
statistics_(benchmark_.statistics_),
repetitions_(benchmark_.repetitions_),
min_time_(benchmark_.min_time_),
min_warmup_time_(benchmark_.min_warmup_time_),
iterations_(benchmark_.iterations_),
threads_(thread_count) {
name_.function_name = benchmark_.name_;
size_t arg_i = 0;
for (const auto& arg : args) {
if (!name_.args.empty()) {
name_.args += '/';
}
if (arg_i < benchmark->arg_names_.size()) {
const auto& arg_name = benchmark_.arg_names_[arg_i];
if (!arg_name.empty()) {
name_.args += StrFormat("%s:", arg_name.c_str());
}
}
name_.args += StrFormat("%" PRId64, arg);
++arg_i;
}
if (!IsZero(benchmark->min_time_)) {
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
}
if (!IsZero(benchmark->min_warmup_time_)) {
name_.min_warmup_time =
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);
}
if (benchmark_.iterations_ != 0) {
name_.iterations = StrFormat(
"iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_));
}
if (benchmark_.repetitions_ != 0) {
name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_);
}
if (benchmark_.measure_process_cpu_time_) {
name_.time_type = "process_time";
}
if (benchmark_.use_manual_time_) {
if (!name_.time_type.empty()) {
name_.time_type += '/';
}
name_.time_type += "manual_time";
} else if (benchmark_.use_real_time_) {
if (!name_.time_type.empty()) {
name_.time_type += '/';
}
name_.time_type += "real_time";
}
if (!benchmark_.thread_counts_.empty()) {
name_.threads = StrFormat("threads:%d", threads_);
}
setup_ = benchmark_.setup_;
teardown_ = benchmark_.teardown_;
}
State BenchmarkInstance::Run(
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement) const {
State st(name_.function_name, iters, args_, thread_id, threads_, timer,
manager, perf_counters_measurement);
benchmark_.Run(st);
return st;
}
} // internal
} // benchmark
void BenchmarkInstance::Setup() const {
if (setup_) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr);
setup_(st);
}
}
void BenchmarkInstance::Teardown() const {
if (teardown_) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr);
teardown_(st);
}
}
} // namespace internal
} // namespace benchmark

View File

@ -1,9 +1,6 @@
#ifndef BENCHMARK_API_INTERNAL_H
#define BENCHMARK_API_INTERNAL_H
#include "benchmark/benchmark.h"
#include "commandlineflags.h"
#include <cmath>
#include <iosfwd>
#include <limits>
@ -11,32 +8,68 @@
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#include "commandlineflags.h"
namespace benchmark {
namespace internal {
// Information kept per benchmark we may want to run
struct BenchmarkInstance {
BenchmarkName name;
Benchmark* benchmark;
AggregationReportMode aggregation_report_mode;
std::vector<int64_t> arg;
TimeUnit time_unit;
int range_multiplier;
bool measure_process_cpu_time;
bool use_real_time;
bool use_manual_time;
BigO complexity;
BigOFunc* complexity_lambda;
UserCounters counters;
const std::vector<Statistics>* statistics;
bool last_benchmark_instance;
int repetitions;
double min_time;
IterationCount iterations;
int threads; // Number of concurrent threads to us
class BenchmarkInstance {
public:
BenchmarkInstance(Benchmark* benchmark, int family_index,
int per_family_instance_index,
const std::vector<int64_t>& args, int threads);
const BenchmarkName& name() const { return name_; }
int family_index() const { return family_index_; }
int per_family_instance_index() const { return per_family_instance_index_; }
AggregationReportMode aggregation_report_mode() const {
return aggregation_report_mode_;
}
TimeUnit time_unit() const { return time_unit_; }
bool measure_process_cpu_time() const { return measure_process_cpu_time_; }
bool use_real_time() const { return use_real_time_; }
bool use_manual_time() const { return use_manual_time_; }
BigO complexity() const { return complexity_; }
BigOFunc* complexity_lambda() const { return complexity_lambda_; }
const std::vector<Statistics>& statistics() const { return statistics_; }
int repetitions() const { return repetitions_; }
double min_time() const { return min_time_; }
double min_warmup_time() const { return min_warmup_time_; }
IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; }
void Setup() const;
void Teardown() const;
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager) const;
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement) const;
private:
BenchmarkName name_;
Benchmark& benchmark_;
const int family_index_;
const int per_family_instance_index_;
AggregationReportMode aggregation_report_mode_;
const std::vector<int64_t>& args_;
TimeUnit time_unit_;
bool measure_process_cpu_time_;
bool use_real_time_;
bool use_manual_time_;
BigO complexity_;
BigOFunc* complexity_lambda_;
UserCounters counters_;
const std::vector<Statistics>& statistics_;
int repetitions_;
double min_time_;
double min_warmup_time_;
IterationCount iterations_;
int threads_; // Number of concurrent threads to us
typedef void (*callback_function)(const benchmark::State&);
callback_function setup_ = nullptr;
callback_function teardown_ = nullptr;
};
bool FindBenchmarksInternal(const std::string& re,
@ -45,6 +78,7 @@ bool FindBenchmarksInternal(const std::string& re,
bool IsZero(double n);
BENCHMARK_EXPORT
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
} // end namespace internal

View File

@ -14,4 +14,5 @@
#include "benchmark/benchmark.h"
BENCHMARK_EXPORT int main(int, char**);
BENCHMARK_MAIN();

View File

@ -51,8 +51,9 @@ std::string join(char delimiter, const Ts&... ts) {
}
} // namespace
BENCHMARK_EXPORT
std::string BenchmarkName::str() const {
return join('/', function_name, args, min_time, iterations, repetitions,
time_type, threads);
return join('/', function_name, args, min_time, min_warmup_time, iterations,
repetitions, time_type, threads);
}
} // namespace benchmark

View File

@ -15,7 +15,7 @@
#include "benchmark_register.h"
#ifndef BENCHMARK_OS_WINDOWS
#ifndef BENCHMARK_OS_FUCHSIA
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@ -24,6 +24,7 @@
#include <algorithm>
#include <atomic>
#include <cinttypes>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
@ -31,14 +32,10 @@
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <sstream>
#include <thread>
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "check.h"
@ -56,10 +53,13 @@ namespace benchmark {
namespace {
// For non-dense Range, intermediate values are powers of kRangeMultiplier.
static const int kRangeMultiplier = 8;
static constexpr int kRangeMultiplier = 8;
// The size of a benchmark family determines is the number of inputs to repeat
// the benchmark on. If this is "large" then warn the user during configuration.
static const size_t kMaxFamilySize = 100;
static constexpr size_t kMaxFamilySize = 100;
static constexpr char kDisabledPrefix[] = "DISABLED_";
} // end namespace
namespace internal {
@ -114,15 +114,15 @@ void BenchmarkFamilies::ClearBenchmarks() {
bool BenchmarkFamilies::FindBenchmarks(
std::string spec, std::vector<BenchmarkInstance>* benchmarks,
std::ostream* ErrStream) {
CHECK(ErrStream);
BM_CHECK(ErrStream);
auto& Err = *ErrStream;
// Make regular expression out of command-line flag
std::string error_msg;
Regex re;
bool isNegativeFilter = false;
bool is_negative_filter = false;
if (spec[0] == '-') {
spec.replace(0, 1, "");
isNegativeFilter = true;
is_negative_filter = true;
}
if (!re.Init(spec, &error_msg)) {
Err << "Could not compile benchmark re: " << error_msg << std::endl;
@ -132,8 +132,13 @@ bool BenchmarkFamilies::FindBenchmarks(
// Special list of thread counts to use when none are specified
const std::vector<int> one_thread = {1};
int next_family_index = 0;
MutexLock l(mutex_);
for (std::unique_ptr<Benchmark>& family : families_) {
int family_index = next_family_index;
int per_family_instance_index = 0;
// Family was deleted or benchmark doesn't match
if (!family) continue;
@ -152,85 +157,27 @@ bool BenchmarkFamilies::FindBenchmarks(
<< " will be repeated at least " << family_size << " times.\n";
}
// reserve in the special case the regex ".", since we know the final
// family size.
if (spec == ".") benchmarks->reserve(family_size);
// family size. this doesn't take into account any disabled benchmarks
// so worst case we reserve more than we need.
if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size);
for (auto const& args : family->args_) {
for (int num_threads : *thread_counts) {
BenchmarkInstance instance;
instance.name.function_name = family->name_;
instance.benchmark = family.get();
instance.aggregation_report_mode = family->aggregation_report_mode_;
instance.arg = args;
instance.time_unit = family->time_unit_;
instance.range_multiplier = family->range_multiplier_;
instance.min_time = family->min_time_;
instance.iterations = family->iterations_;
instance.repetitions = family->repetitions_;
instance.measure_process_cpu_time = family->measure_process_cpu_time_;
instance.use_real_time = family->use_real_time_;
instance.use_manual_time = family->use_manual_time_;
instance.complexity = family->complexity_;
instance.complexity_lambda = family->complexity_lambda_;
instance.statistics = &family->statistics_;
instance.threads = num_threads;
BenchmarkInstance instance(family.get(), family_index,
per_family_instance_index, args,
num_threads);
// Add arguments to instance name
size_t arg_i = 0;
for (auto const& arg : args) {
if (!instance.name.args.empty()) {
instance.name.args += '/';
}
if (arg_i < family->arg_names_.size()) {
const auto& arg_name = family->arg_names_[arg_i];
if (!arg_name.empty()) {
instance.name.args += StrFormat("%s:", arg_name.c_str());
}
}
instance.name.args += StrFormat("%" PRId64, arg);
++arg_i;
}
if (!IsZero(family->min_time_))
instance.name.min_time =
StrFormat("min_time:%0.3f", family->min_time_);
if (family->iterations_ != 0) {
instance.name.iterations =
StrFormat("iterations:%lu",
static_cast<unsigned long>(family->iterations_));
}
if (family->repetitions_ != 0)
instance.name.repetitions =
StrFormat("repeats:%d", family->repetitions_);
if (family->measure_process_cpu_time_) {
instance.name.time_type = "process_time";
}
if (family->use_manual_time_) {
if (!instance.name.time_type.empty()) {
instance.name.time_type += '/';
}
instance.name.time_type += "manual_time";
} else if (family->use_real_time_) {
if (!instance.name.time_type.empty()) {
instance.name.time_type += '/';
}
instance.name.time_type += "real_time";
}
// Add the number of threads used to the name
if (!family->thread_counts_.empty()) {
instance.name.threads = StrFormat("threads:%d", instance.threads);
}
const auto full_name = instance.name.str();
if ((re.Match(full_name) && !isNegativeFilter) ||
(!re.Match(full_name) && isNegativeFilter)) {
instance.last_benchmark_instance = (&args == &family->args_.back());
const auto full_name = instance.name().str();
if (full_name.rfind(kDisabledPrefix, 0) != 0 &&
((re.Match(full_name) && !is_negative_filter) ||
(!re.Match(full_name) && is_negative_filter))) {
benchmarks->push_back(std::move(instance));
++per_family_instance_index;
// Only bump the next family index once we've estabilished that
// at least one instance of this family will be run.
if (next_family_index == family_index) ++next_family_index;
}
}
}
@ -257,39 +204,50 @@ bool FindBenchmarksInternal(const std::string& re,
// Benchmark
//=============================================================================//
Benchmark::Benchmark(const char* name)
Benchmark::Benchmark(const std::string& name)
: name_(name),
aggregation_report_mode_(ARM_Unspecified),
time_unit_(kNanosecond),
time_unit_(GetDefaultTimeUnit()),
use_default_time_unit_(true),
range_multiplier_(kRangeMultiplier),
min_time_(0),
min_warmup_time_(0),
iterations_(0),
repetitions_(0),
measure_process_cpu_time_(false),
use_real_time_(false),
use_manual_time_(false),
complexity_(oNone),
complexity_lambda_(nullptr) {
complexity_lambda_(nullptr),
setup_(nullptr),
teardown_(nullptr) {
ComputeStatistics("mean", StatisticsMean);
ComputeStatistics("median", StatisticsMedian);
ComputeStatistics("stddev", StatisticsStdDev);
ComputeStatistics("cv", StatisticsCV, kPercentage);
}
Benchmark::~Benchmark() {}
Benchmark* Benchmark::Name(const std::string& name) {
SetName(name);
return this;
}
Benchmark* Benchmark::Arg(int64_t x) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
args_.push_back({x});
return this;
}
Benchmark* Benchmark::Unit(TimeUnit unit) {
time_unit_ = unit;
use_default_time_unit_ = false;
return this;
}
Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
std::vector<int64_t> arglist;
AddRange(&arglist, start, limit, range_multiplier_);
@ -301,53 +259,61 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
Benchmark* Benchmark::Ranges(
const std::vector<std::pair<int64_t, int64_t>>& ranges) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
std::vector<std::vector<int64_t>> arglists(ranges.size());
std::size_t total = 1;
for (std::size_t i = 0; i < ranges.size(); i++) {
AddRange(&arglists[i], ranges[i].first, ranges[i].second,
range_multiplier_);
total *= arglists[i].size();
}
std::vector<std::size_t> ctr(arglists.size(), 0);
ArgsProduct(arglists);
return this;
}
Benchmark* Benchmark::ArgsProduct(
const std::vector<std::vector<int64_t>>& arglists) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
std::vector<std::size_t> indices(arglists.size());
const std::size_t total = std::accumulate(
std::begin(arglists), std::end(arglists), std::size_t{1},
[](const std::size_t res, const std::vector<int64_t>& arglist) {
return res * arglist.size();
});
std::vector<int64_t> args;
args.reserve(arglists.size());
for (std::size_t i = 0; i < total; i++) {
std::vector<int64_t> tmp;
tmp.reserve(arglists.size());
for (std::size_t j = 0; j < arglists.size(); j++) {
tmp.push_back(arglists[j].at(ctr[j]));
for (std::size_t arg = 0; arg < arglists.size(); arg++) {
args.push_back(arglists[arg][indices[arg]]);
}
args_.push_back(args);
args.clear();
args_.push_back(std::move(tmp));
for (std::size_t j = 0; j < arglists.size(); j++) {
if (ctr[j] + 1 < arglists[j].size()) {
++ctr[j];
break;
}
ctr[j] = 0;
}
std::size_t arg = 0;
do {
indices[arg] = (indices[arg] + 1) % arglists[arg].size();
} while (indices[arg++] == 0 && arg < arglists.size());
}
return this;
}
Benchmark* Benchmark::ArgName(const std::string& name) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
arg_names_ = {name};
return this;
}
Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
arg_names_ = names;
return this;
}
Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
CHECK_LE(start, limit);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK_LE(start, limit);
for (int64_t arg = start; arg <= limit; arg += step) {
args_.push_back({arg});
}
@ -355,7 +321,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
}
Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
args_.push_back(args);
return this;
}
@ -365,28 +331,48 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
return this;
}
Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) {
BM_CHECK(setup != nullptr);
setup_ = setup;
return this;
}
Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) {
BM_CHECK(teardown != nullptr);
teardown_ = teardown;
return this;
}
Benchmark* Benchmark::RangeMultiplier(int multiplier) {
CHECK(multiplier > 1);
BM_CHECK(multiplier > 1);
range_multiplier_ = multiplier;
return this;
}
Benchmark* Benchmark::MinTime(double t) {
CHECK(t > 0.0);
CHECK(iterations_ == 0);
BM_CHECK(t > 0.0);
BM_CHECK(iterations_ == 0);
min_time_ = t;
return this;
}
Benchmark* Benchmark::MinWarmUpTime(double t) {
BM_CHECK(t >= 0.0);
BM_CHECK(iterations_ == 0);
min_warmup_time_ = t;
return this;
}
Benchmark* Benchmark::Iterations(IterationCount n) {
CHECK(n > 0);
CHECK(IsZero(min_time_));
BM_CHECK(n > 0);
BM_CHECK(IsZero(min_time_));
BM_CHECK(IsZero(min_warmup_time_));
iterations_ = n;
return this;
}
Benchmark* Benchmark::Repetitions(int n) {
CHECK(n > 0);
BM_CHECK(n > 0);
repetitions_ = n;
return this;
}
@ -419,14 +405,14 @@ Benchmark* Benchmark::MeasureProcessCPUTime() {
}
Benchmark* Benchmark::UseRealTime() {
CHECK(!use_manual_time_)
BM_CHECK(!use_manual_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_real_time_ = true;
return this;
}
Benchmark* Benchmark::UseManualTime() {
CHECK(!use_real_time_)
BM_CHECK(!use_real_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_manual_time_ = true;
return this;
@ -443,21 +429,22 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
return this;
}
Benchmark* Benchmark::ComputeStatistics(std::string name,
StatisticsFunc* statistics) {
statistics_.emplace_back(name, statistics);
Benchmark* Benchmark::ComputeStatistics(const std::string& name,
StatisticsFunc* statistics,
StatisticUnit unit) {
statistics_.emplace_back(name, statistics, unit);
return this;
}
Benchmark* Benchmark::Threads(int t) {
CHECK_GT(t, 0);
BM_CHECK_GT(t, 0);
thread_counts_.push_back(t);
return this;
}
Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
CHECK_GT(min_threads, 0);
CHECK_GE(max_threads, min_threads);
BM_CHECK_GT(min_threads, 0);
BM_CHECK_GE(max_threads, min_threads);
AddRange(&thread_counts_, min_threads, max_threads, 2);
return this;
@ -465,9 +452,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
int stride) {
CHECK_GT(min_threads, 0);
CHECK_GE(max_threads, min_threads);
CHECK_GE(stride, 1);
BM_CHECK_GT(min_threads, 0);
BM_CHECK_GE(max_threads, min_threads);
BM_CHECK_GE(stride, 1);
for (auto i = min_threads; i < max_threads; i += stride) {
thread_counts_.push_back(i);
@ -481,7 +468,9 @@ Benchmark* Benchmark::ThreadPerCpu() {
return this;
}
void Benchmark::SetName(const char* name) { name_ = name; }
void Benchmark::SetName(const std::string& name) { name_ = name; }
const char* Benchmark::GetName() const { return name_.c_str(); }
int Benchmark::ArgsCnt() const {
if (args_.empty()) {
@ -491,6 +480,16 @@ int Benchmark::ArgsCnt() const {
return static_cast<int>(args_.front().size());
}
const char* Benchmark::GetArgName(int arg) const {
BM_CHECK_GE(arg, 0);
BM_CHECK_LT(arg, static_cast<int>(arg_names_.size()));
return arg_names_[arg].c_str();
}
TimeUnit Benchmark::GetTimeUnit() const {
return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
}
//=============================================================================//
// FunctionBenchmark
//=============================================================================//
@ -503,4 +502,19 @@ void ClearRegisteredBenchmarks() {
internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
}
std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi) {
std::vector<int64_t> args;
internal::AddRange(&args, lo, hi, multi);
return args;
}
std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step) {
BM_CHECK_LE(start, limit);
std::vector<int64_t> args;
for (int64_t arg = start; arg <= limit; arg += step) {
args.push_back(arg);
}
return args;
}
} // end namespace benchmark

View File

@ -1,6 +1,8 @@
#ifndef BENCHMARK_REGISTER_H
#define BENCHMARK_REGISTER_H
#include <algorithm>
#include <limits>
#include <vector>
#include "check.h"
@ -11,18 +13,18 @@ namespace internal {
// Append the powers of 'mult' in the closed interval [lo, hi].
// Returns iterator to the start of the inserted range.
template <typename T>
typename std::vector<T>::iterator
AddPowers(std::vector<T>* dst, T lo, T hi, int mult) {
CHECK_GE(lo, 0);
CHECK_GE(hi, lo);
CHECK_GE(mult, 2);
typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi,
int mult) {
BM_CHECK_GE(lo, 0);
BM_CHECK_GE(hi, lo);
BM_CHECK_GE(mult, 2);
const size_t start_offset = dst->size();
static const T kmax = std::numeric_limits<T>::max();
// Space out the values in multiples of "mult"
for (T i = 1; i <= hi; i *= mult) {
for (T i = static_cast<T>(1); i <= hi; i *= static_cast<T>(mult)) {
if (i >= lo) {
dst->push_back(i);
}
@ -31,16 +33,16 @@ AddPowers(std::vector<T>* dst, T lo, T hi, int mult) {
if (i > kmax / mult) break;
}
return dst->begin() + start_offset;
return dst->begin() + static_cast<int>(start_offset);
}
template <typename T>
void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) {
// We negate lo and hi so we require that they cannot be equal to 'min'.
CHECK_GT(lo, std::numeric_limits<T>::min());
CHECK_GT(hi, std::numeric_limits<T>::min());
CHECK_GE(hi, lo);
CHECK_LE(hi, 0);
BM_CHECK_GT(lo, std::numeric_limits<T>::min());
BM_CHECK_GT(hi, std::numeric_limits<T>::min());
BM_CHECK_GE(hi, lo);
BM_CHECK_LE(hi, 0);
// Add positive powers, then negate and reverse.
// Casts necessary since small integers get promoted
@ -59,8 +61,8 @@ void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,
"Args type must be a signed integer");
CHECK_GE(hi, lo);
CHECK_GE(mult, 2);
BM_CHECK_GE(hi, lo);
BM_CHECK_GE(mult, 2);
// Add "lo"
dst->push_back(lo);
@ -86,7 +88,7 @@ void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
}
// Treat 0 as a special case (see discussion on #762).
if (lo <= 0 && hi >= 0) {
if (lo < 0 && hi >= 0) {
dst->push_back(0);
}

View File

@ -13,12 +13,13 @@
// limitations under the License.
#include "benchmark_runner.h"
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
#ifndef BENCHMARK_OS_FUCHSIA
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@ -27,11 +28,14 @@
#include <algorithm>
#include <atomic>
#include <climits>
#include <cmath>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <string>
#include <thread>
@ -45,6 +49,7 @@
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
@ -60,64 +65,72 @@ MemoryManager* memory_manager = nullptr;
namespace {
static constexpr IterationCount kMaxIterations = 1000000000;
const double kDefaultMinTime =
std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
BenchmarkReporter::Run CreateRunReport(
const benchmark::internal::BenchmarkInstance& b,
const internal::ThreadManager::Result& results,
IterationCount memory_iterations,
const MemoryManager::Result& memory_result, double seconds,
int64_t repetition_index) {
const MemoryManager::Result* memory_result, double seconds,
int64_t repetition_index, int64_t repeats) {
// Create report about this benchmark run.
BenchmarkReporter::Run report;
report.run_name = b.name;
report.error_occurred = results.has_error_;
report.error_message = results.error_message_;
report.run_name = b.name();
report.family_index = b.family_index();
report.per_family_instance_index = b.per_family_instance_index();
report.skipped = results.skipped_;
report.skip_message = results.skip_message_;
report.report_label = results.report_label_;
// This is the total iterations across all threads.
report.iterations = results.iterations;
report.time_unit = b.time_unit;
report.threads = b.threads;
report.time_unit = b.time_unit();
report.threads = b.threads();
report.repetition_index = repetition_index;
report.repetitions = b.repetitions;
report.repetitions = repeats;
if (!report.error_occurred) {
if (b.use_manual_time) {
if (!report.skipped) {
if (b.use_manual_time()) {
report.real_accumulated_time = results.manual_time_used;
} else {
report.real_accumulated_time = results.real_time_used;
}
report.cpu_accumulated_time = results.cpu_time_used;
report.complexity_n = results.complexity_n;
report.complexity = b.complexity;
report.complexity_lambda = b.complexity_lambda;
report.statistics = b.statistics;
report.complexity = b.complexity();
report.complexity_lambda = b.complexity_lambda();
report.statistics = &b.statistics();
report.counters = results.counters;
if (memory_iterations > 0) {
report.has_memory_result = true;
assert(memory_result != nullptr);
report.memory_result = memory_result;
report.allocs_per_iter =
memory_iterations ? static_cast<double>(memory_result.num_allocs) /
memory_iterations ? static_cast<double>(memory_result->num_allocs) /
memory_iterations
: 0;
report.max_bytes_used = memory_result.max_bytes_used;
}
internal::Finish(&report.counters, results.iterations, seconds, b.threads);
internal::Finish(&report.counters, results.iterations, seconds,
b.threads());
}
return report;
}
// Execute one thread of benchmark b for the specified number of iterations.
// Adds the stats collected for the thread into *total.
// Adds the stats collected for the thread into manager->results.
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
int thread_id, ThreadManager* manager) {
int thread_id, ThreadManager* manager,
PerfCountersMeasurement* perf_counters_measurement) {
internal::ThreadTimer timer(
b->measure_process_cpu_time
b->measure_process_cpu_time()
? internal::ThreadTimer::CreateProcessCpuTime()
: internal::ThreadTimer::Create());
State st = b->Run(iters, thread_id, &timer, manager);
CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
State st =
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
<< "Benchmark returned before State::KeepRunning() returned false!";
{
MutexLock l(manager->GetBenchmarkMutex());
@ -132,229 +145,351 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
manager->NotifyThreadComplete();
}
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
std::vector<BenchmarkReporter::Run>* complexity_reports_)
: b(b_),
complexity_reports(*complexity_reports_),
min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
repeats(b.repetitions != 0 ? b.repetitions
: FLAGS_benchmark_repetitions),
has_explicit_iteration_count(b.iterations != 0),
pool(b.threads - 1),
iters(has_explicit_iteration_count ? b.iterations : 1) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
run_results.file_report_aggregates_only =
FLAGS_benchmark_report_aggregates_only;
if (b.aggregation_report_mode != internal::ARM_Unspecified) {
run_results.display_report_aggregates_only =
(b.aggregation_report_mode &
internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only =
(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
}
double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
const BenchTimeType& iters_or_time) {
if (!IsZero(b.min_time())) return b.min_time();
// If the flag was used to specify number of iters, then return the default
// min_time.
if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime;
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
DoOneRepetition(repetition_num);
}
return iters_or_time.time;
}
// Calculate additional statistics
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
const BenchTimeType& iters_or_time) {
if (b.iterations() != 0) return b.iterations();
// Maybe calculate complexity report
if ((b.complexity != oNone) && b.last_benchmark_instance) {
auto additional_run_stats = ComputeBigO(complexity_reports);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
complexity_reports.clear();
}
}
RunResults&& get_results() { return std::move(run_results); }
private:
RunResults run_results;
const benchmark::internal::BenchmarkInstance& b;
std::vector<BenchmarkReporter::Run>& complexity_reports;
const double min_time;
const int repeats;
const bool has_explicit_iteration_count;
std::vector<std::thread> pool;
IterationCount iters; // preserved between repetitions!
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
struct IterationResults {
internal::ThreadManager::Result results;
IterationCount iters;
double seconds;
};
IterationResults DoNIterations() {
VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n";
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(b.threads));
// Run all but one thread in separate threads
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
manager.get());
}
// And run one thread here directly.
// (If we were asked to run just one thread, we don't create new threads.)
// Yes, we need to do this here *after* we start the separate threads.
RunInThread(&b, iters, 0, manager.get());
// The main thread has finished. Now let's wait for the other threads.
manager->WaitForAllThreads();
for (std::thread& thread : pool) thread.join();
IterationResults i;
// Acquire the measurements/counters from the manager, UNDER THE LOCK!
{
MutexLock l(manager->GetBenchmarkMutex());
i.results = manager->results;
}
// And get rid of the manager.
manager.reset();
// Adjust real/manual time stats since they were reported per thread.
i.results.real_time_used /= b.threads;
i.results.manual_time_used /= b.threads;
// If we were measuring whole-process CPU usage, adjust the CPU time too.
if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads;
VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
<< i.results.real_time_used << "\n";
// So for how long were we running?
i.iters = iters;
// Base decisions off of real time if requested by this benchmark.
i.seconds = i.results.cpu_time_used;
if (b.use_manual_time) {
i.seconds = i.results.manual_time_used;
} else if (b.use_real_time) {
i.seconds = i.results.real_time_used;
}
return i;
}
IterationCount PredictNumItersNeeded(const IterationResults& i) const {
// See how much iterations should be increased by.
// Note: Avoid division by zero with max(seconds, 1ns).
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
// use the multiplier directly.
// Otherwise we use at most 10 times expansion.
// NOTE: When the last run was at least 10% of the min time the max
// expansion should be 14x.
bool is_significant = (i.seconds / min_time) > 0.1;
multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
if (multiplier <= 1.0) multiplier = 2.0;
// So what seems to be the sufficiently-large iteration count? Round up.
const IterationCount max_next_iters = static_cast<IterationCount>(
std::lround(std::max(multiplier * static_cast<double>(i.iters),
static_cast<double>(i.iters) + 1.0)));
// But we do have *some* sanity limits though..
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
return next_iters; // round up before conversion to integer.
}
bool ShouldReportIterationResults(const IterationResults& i) const {
// Determine if this run should be reported;
// Either it has run for a sufficient amount of time
// or because an error was reported.
return i.results.has_error_ ||
i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >= min_time || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this sanity check.
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time);
}
void DoOneRepetition(int64_t repetition_index) {
const bool is_the_first_repetition = repetition_index == 0;
IterationResults i;
// We *may* be gradually increasing the length (iteration count)
// of the benchmark until we decide the results are significant.
// And once we do, we report those last results and exit.
// Please do note that the if there are repetitions, the iteration count
// is *only* calculated for the *first* repetition, and other repetitions
// simply use that precomputed iteration count.
for (;;) {
i = DoNIterations();
// Do we consider the results to be significant?
// If we are doing repetitions, and the first repetition was already done,
// it has calculated the correct iteration time, so we have run that very
// iteration count just now. No need to calculate anything. Just report.
// Else, the normal rules apply.
const bool results_are_significant = !is_the_first_repetition ||
has_explicit_iteration_count ||
ShouldReportIterationResults(i);
if (results_are_significant) break; // Good, let's report them!
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
// iteration count, and run the benchmark again...
iters = PredictNumItersNeeded(i);
assert(iters > i.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
// Oh, one last thing, we need to also produce the 'memory measurements'..
MemoryManager::Result memory_result;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters);
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
RunInThread(&b, memory_iterations, 0, manager.get());
manager->WaitForAllThreads();
manager.reset();
memory_manager->Stop(&memory_result);
}
// Ok, now actualy report.
BenchmarkReporter::Run report =
CreateRunReport(b, i.results, memory_iterations, memory_result,
i.seconds, repetition_index);
if (!report.error_occurred && b.complexity != oNone)
complexity_reports.push_back(report);
run_results.non_aggregates.push_back(report);
}
};
// We've already concluded that this flag is currently used to pass
// iters but do a check here again anyway.
BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS);
return iters_or_time.iters;
}
} // end namespace
RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b,
std::vector<BenchmarkReporter::Run>* complexity_reports) {
internal::BenchmarkRunner r(b, complexity_reports);
return r.get_results();
BenchTimeType ParseBenchMinTime(const std::string& value) {
BenchTimeType ret;
if (value.empty()) {
ret.tag = BenchTimeType::TIME;
ret.time = 0.0;
return ret;
}
if (value.back() == 'x') {
char* p_end;
// Reset errno before it's changed by strtol.
errno = 0;
IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
// After a valid parse, p_end should have been set to
// point to the 'x' suffix.
BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x')
<< "Malformed iters value passed to --benchmark_min_time: `" << value
<< "`. Expected --benchmark_min_time=<integer>x.";
ret.tag = BenchTimeType::ITERS;
ret.iters = num_iters;
return ret;
}
bool has_suffix = value.back() == 's';
if (!has_suffix) {
BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
"Eg., `30s` for 30-seconds.";
}
char* p_end;
// Reset errno before it's changed by strtod.
errno = 0;
double min_time = std::strtod(value.c_str(), &p_end);
// After a successful parse, p_end should point to the suffix 's',
// or the end of the string if the suffix was omitted.
BM_CHECK(errno == 0 && p_end != nullptr &&
((has_suffix && *p_end == 's') || *p_end == '\0'))
<< "Malformed seconds value passed to --benchmark_min_time: `" << value
<< "`. Expected --benchmark_min_time=<float>x.";
ret.tag = BenchTimeType::TIME;
ret.time = min_time;
return ret;
}
BenchmarkRunner::BenchmarkRunner(
const benchmark::internal::BenchmarkInstance& b_,
PerfCountersMeasurement* pcm_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
: b(b_),
reports_for_family(reports_for_family_),
parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
min_time(ComputeMinTime(b_, parsed_benchtime_flag)),
min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
? b.min_warmup_time()
: FLAGS_benchmark_min_warmup_time),
warmup_done(!(min_warmup_time > 0.0)),
repeats(b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions),
has_explicit_iteration_count(b.iterations() != 0 ||
parsed_benchtime_flag.tag ==
BenchTimeType::ITERS),
pool(b.threads() - 1),
iters(has_explicit_iteration_count
? ComputeIters(b_, parsed_benchtime_flag)
: 1),
perf_counters_measurement_ptr(pcm_) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
run_results.file_report_aggregates_only =
FLAGS_benchmark_report_aggregates_only;
if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
run_results.display_report_aggregates_only =
(b.aggregation_report_mode() &
internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only =
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
(perf_counters_measurement_ptr->num_counters() == 0))
<< "Perf counters were requested but could not be set up.";
}
}
BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(b.threads()));
// Run all but one thread in separate threads
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
manager.get(), perf_counters_measurement_ptr);
}
// And run one thread here directly.
// (If we were asked to run just one thread, we don't create new threads.)
// Yes, we need to do this here *after* we start the separate threads.
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
// The main thread has finished. Now let's wait for the other threads.
manager->WaitForAllThreads();
for (std::thread& thread : pool) thread.join();
IterationResults i;
// Acquire the measurements/counters from the manager, UNDER THE LOCK!
{
MutexLock l(manager->GetBenchmarkMutex());
i.results = manager->results;
}
// And get rid of the manager.
manager.reset();
// Adjust real/manual time stats since they were reported per thread.
i.results.real_time_used /= b.threads();
i.results.manual_time_used /= b.threads();
// If we were measuring whole-process CPU usage, adjust the CPU time too.
if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();
BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
<< i.results.real_time_used << "\n";
// By using KeepRunningBatch a benchmark can iterate more times than
// requested, so take the iteration count from i.results.
i.iters = i.results.iterations / b.threads();
// Base decisions off of real time if requested by this benchmark.
i.seconds = i.results.cpu_time_used;
if (b.use_manual_time()) {
i.seconds = i.results.manual_time_used;
} else if (b.use_real_time()) {
i.seconds = i.results.real_time_used;
}
return i;
}
IterationCount BenchmarkRunner::PredictNumItersNeeded(
const IterationResults& i) const {
// See how much iterations should be increased by.
// Note: Avoid division by zero with max(seconds, 1ns).
double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9);
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
// use the multiplier directly.
// Otherwise we use at most 10 times expansion.
// NOTE: When the last run was at least 10% of the min time the max
// expansion should be 14x.
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
multiplier = is_significant ? multiplier : 10.0;
// So what seems to be the sufficiently-large iteration count? Round up.
const IterationCount max_next_iters = static_cast<IterationCount>(
std::lround(std::max(multiplier * static_cast<double>(i.iters),
static_cast<double>(i.iters) + 1.0)));
// But we do have *some* limits though..
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
return next_iters; // round up before conversion to integer.
}
bool BenchmarkRunner::ShouldReportIterationResults(
const IterationResults& i) const {
// Determine if this run should be reported;
// Either it has run for a sufficient amount of time
// or because an error was reported.
return i.results.skipped_ ||
i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >=
GetMinTimeToApply() || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this test.
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
!b.use_manual_time());
}
double BenchmarkRunner::GetMinTimeToApply() const {
// In order to re-use functionality to run and measure benchmarks for running
// a warmup phase of the benchmark, we need a way of telling whether to apply
// min_time or min_warmup_time. This function will figure out if we are in the
// warmup phase and therefore need to apply min_warmup_time or if we already
// in the benchmarking phase and min_time needs to be applied.
return warmup_done ? min_time : min_warmup_time;
}
void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
warmup_done = true;
iters = i;
}
void BenchmarkRunner::RunWarmUp() {
// Use the same mechanisms for warming up the benchmark as used for actually
// running and measuring the benchmark.
IterationResults i_warmup;
// Dont use the iterations determined in the warmup phase for the actual
// measured benchmark phase. While this may be a good starting point for the
// benchmark and it would therefore get rid of the need to figure out how many
// iterations are needed if min_time is set again, this may also be a complete
// wrong guess since the warmup loops might be considerably slower (e.g
// because of caching effects).
const IterationCount i_backup = iters;
for (;;) {
b.Setup();
i_warmup = DoNIterations();
b.Teardown();
const bool finish = ShouldReportIterationResults(i_warmup);
if (finish) {
FinishWarmUp(i_backup);
break;
}
// Although we are running "only" a warmup phase where running enough
// iterations at once without measuring time isn't as important as it is for
// the benchmarking phase, we still do it the same way as otherwise it is
// very confusing for the user to know how to choose a proper value for
// min_warmup_time if a different approach on running it is used.
iters = PredictNumItersNeeded(i_warmup);
assert(iters > i_warmup.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
}
void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");
const bool is_the_first_repetition = num_repetitions_done == 0;
// In case a warmup phase is requested by the benchmark, run it now.
// After running the warmup phase the BenchmarkRunner should be in a state as
// this warmup never happened except the fact that warmup_done is set. Every
// other manipulation of the BenchmarkRunner instance would be a bug! Please
// fix it.
if (!warmup_done) RunWarmUp();
IterationResults i;
// We *may* be gradually increasing the length (iteration count)
// of the benchmark until we decide the results are significant.
// And once we do, we report those last results and exit.
// Please do note that the if there are repetitions, the iteration count
// is *only* calculated for the *first* repetition, and other repetitions
// simply use that precomputed iteration count.
for (;;) {
b.Setup();
i = DoNIterations();
b.Teardown();
// Do we consider the results to be significant?
// If we are doing repetitions, and the first repetition was already done,
// it has calculated the correct iteration time, so we have run that very
// iteration count just now. No need to calculate anything. Just report.
// Else, the normal rules apply.
const bool results_are_significant = !is_the_first_repetition ||
has_explicit_iteration_count ||
ShouldReportIterationResults(i);
if (results_are_significant) break; // Good, let's report them!
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
// iteration count, and run the benchmark again...
iters = PredictNumItersNeeded(i);
assert(iters > i.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
// Oh, one last thing, we need to also produce the 'memory measurements'..
MemoryManager::Result* memory_result = nullptr;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
// optional so we don't have to own the Result here.
// Can't do it now due to cxx03.
memory_results.push_back(MemoryManager::Result());
memory_result = &memory_results.back();
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters);
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
memory_manager->Stop(*memory_result);
}
// Ok, now actually report.
BenchmarkReporter::Run report =
CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
num_repetitions_done, repeats);
if (reports_for_family) {
++reports_for_family->num_runs_done;
if (!report.skipped) reports_for_family->Runs.push_back(report);
}
run_results.non_aggregates.push_back(report);
++num_repetitions_done;
}
RunResults&& BenchmarkRunner::GetResults() {
assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
// Calculate additional statistics over the repetitions of this instance.
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
return std::move(run_results);
}
} // end namespace internal

View File

@ -15,19 +15,23 @@
#ifndef BENCHMARK_RUNNER_H_
#define BENCHMARK_RUNNER_H_
#include <thread>
#include <vector>
#include "benchmark_api_internal.h"
#include "internal_macros.h"
DECLARE_double(benchmark_min_time);
DECLARE_int32(benchmark_repetitions);
DECLARE_bool(benchmark_report_aggregates_only);
DECLARE_bool(benchmark_display_aggregates_only);
#include "perf_counters.h"
#include "thread_manager.h"
namespace benchmark {
BM_DECLARE_string(benchmark_min_time);
BM_DECLARE_double(benchmark_min_warmup_time);
BM_DECLARE_int32(benchmark_repetitions);
BM_DECLARE_bool(benchmark_report_aggregates_only);
BM_DECLARE_bool(benchmark_display_aggregates_only);
BM_DECLARE_string(benchmark_perf_counters);
namespace internal {
extern MemoryManager* memory_manager;
@ -40,9 +44,85 @@ struct RunResults {
bool file_report_aggregates_only = false;
};
RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b,
std::vector<BenchmarkReporter::Run>* complexity_reports);
struct BENCHMARK_EXPORT BenchTimeType {
enum { ITERS, TIME } tag;
union {
IterationCount iters;
double time;
};
};
BENCHMARK_EXPORT
BenchTimeType ParseBenchMinTime(const std::string& value);
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
benchmark::internal::PerfCountersMeasurement* pmc_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
int GetNumRepeats() const { return repeats; }
bool HasRepeatsRemaining() const {
return GetNumRepeats() != num_repetitions_done;
}
void DoOneRepetition();
RunResults&& GetResults();
BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const {
return reports_for_family;
}
double GetMinTime() const { return min_time; }
bool HasExplicitIters() const { return has_explicit_iteration_count; }
IterationCount GetIters() const { return iters; }
private:
RunResults run_results;
const benchmark::internal::BenchmarkInstance& b;
BenchmarkReporter::PerFamilyRunReports* reports_for_family;
BenchTimeType parsed_benchtime_flag;
const double min_time;
const double min_warmup_time;
bool warmup_done;
const int repeats;
const bool has_explicit_iteration_count;
int num_repetitions_done = 0;
std::vector<std::thread> pool;
std::vector<MemoryManager::Result> memory_results;
IterationCount iters; // preserved between repetitions!
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
struct IterationResults {
internal::ThreadManager::Result results;
IterationCount iters;
double seconds;
};
IterationResults DoNIterations();
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const;
double GetMinTimeToApply() const;
void FinishWarmUp(const IterationCount& i);
void RunWarmUp();
};
} // namespace internal

11
src/check.cc Normal file
View File

@ -0,0 +1,11 @@
#include "check.h"
namespace benchmark {
namespace internal {
static AbortHandlerT* handler = &std::abort;
BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; }
} // namespace internal
} // namespace benchmark

View File

@ -5,26 +5,43 @@
#include <cstdlib>
#include <ostream>
#include "benchmark/export.h"
#include "internal_macros.h"
#include "log.h"
#if defined(__GNUC__) || defined(__clang__)
#define BENCHMARK_NOEXCEPT noexcept
#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
#elif defined(_MSC_VER) && !defined(__clang__)
#if _MSC_VER >= 1900
#define BENCHMARK_NOEXCEPT noexcept
#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
#else
#define BENCHMARK_NOEXCEPT
#define BENCHMARK_NOEXCEPT_OP(x)
#endif
#define __func__ __FUNCTION__
#else
#define BENCHMARK_NOEXCEPT
#define BENCHMARK_NOEXCEPT_OP(x)
#endif
namespace benchmark {
namespace internal {
typedef void(AbortHandlerT)();
inline AbortHandlerT*& GetAbortHandler() {
static AbortHandlerT* handler = &std::abort;
return handler;
}
BENCHMARK_EXPORT
AbortHandlerT*& GetAbortHandler();
BENCHMARK_NORETURN inline void CallAbortHandler() {
GetAbortHandler()();
std::abort(); // fallback to enforce noreturn
}
// CheckHandler is the class constructed by failing CHECK macros. CheckHandler
// will log information about the failures and abort when it is destructed.
// CheckHandler is the class constructed by failing BM_CHECK macros.
// CheckHandler will log information about the failures and abort when it is
// destructed.
class CheckHandler {
public:
CheckHandler(const char* check, const char* file, const char* func, int line)
@ -35,10 +52,17 @@ class CheckHandler {
LogType& GetLog() { return log_; }
#if defined(COMPILER_MSVC)
#pragma warning(push)
#pragma warning(disable : 4722)
#endif
BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
log_ << std::endl;
CallAbortHandler();
}
#if defined(COMPILER_MSVC)
#pragma warning(pop)
#endif
CheckHandler& operator=(const CheckHandler&) = delete;
CheckHandler(const CheckHandler&) = delete;
@ -51,32 +75,32 @@ class CheckHandler {
} // end namespace internal
} // end namespace benchmark
// The CHECK macro returns a std::ostream object that can have extra information
// written to it.
// The BM_CHECK macro returns a std::ostream object that can have extra
// information written to it.
#ifndef NDEBUG
#define CHECK(b) \
#define BM_CHECK(b) \
(b ? ::benchmark::internal::GetNullLogInstance() \
: ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \
.GetLog())
#else
#define CHECK(b) ::benchmark::internal::GetNullLogInstance()
#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance()
#endif
// clang-format off
// preserve whitespacing between operators for alignment
#define CHECK_EQ(a, b) CHECK((a) == (b))
#define CHECK_NE(a, b) CHECK((a) != (b))
#define CHECK_GE(a, b) CHECK((a) >= (b))
#define CHECK_LE(a, b) CHECK((a) <= (b))
#define CHECK_GT(a, b) CHECK((a) > (b))
#define CHECK_LT(a, b) CHECK((a) < (b))
#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b))
#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b))
#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b))
#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b))
#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b))
#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b))
#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps))
#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps))
#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps))
#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps))
#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps))
#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps))
#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps))
#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps))
#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps))
#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps))
#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps))
#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps))
//clang-format on
#endif // CHECK_H_

View File

@ -25,8 +25,8 @@
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
#include <windows.h>
#include <io.h>
#include <windows.h>
#else
#include <unistd.h>
#endif // BENCHMARK_OS_WINDOWS
@ -94,20 +94,20 @@ std::string FormatString(const char* msg, va_list args) {
va_end(args_cp);
// currently there is no error handling for failure, so this is hack.
CHECK(ret >= 0);
BM_CHECK(ret >= 0);
if (ret == 0) // handle empty expansion
if (ret == 0) { // handle empty expansion
return {};
else if (static_cast<size_t>(ret) < size)
return local_buff;
else {
// we did not provide a long enough buffer on our first attempt.
size = (size_t)ret + 1; // + 1 for the null byte
std::unique_ptr<char[]> buff(new char[size]);
ret = vsnprintf(buff.get(), size, msg, args);
CHECK(ret > 0 && ((size_t)ret) < size);
return buff.get();
}
if (static_cast<size_t>(ret) < size) {
return local_buff;
}
// we did not provide a long enough buffer on our first attempt.
size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
std::unique_ptr<char[]> buff(new char[size]);
ret = vsnprintf(buff.get(), size, msg, args);
BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
return buff.get();
}
std::string FormatString(const char* msg, ...) {
@ -163,12 +163,24 @@ bool IsColorTerminal() {
#else
// On non-Windows platforms, we rely on the TERM variable. This list of
// supported TERM values is copied from Google Test:
// <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>.
// <https://github.com/google/googletest/blob/v1.13.0/googletest/src/gtest.cc#L3225-L3259>.
const char* const SUPPORTED_TERM_VALUES[] = {
"xterm", "xterm-color", "xterm-256color",
"screen", "screen-256color", "tmux",
"tmux-256color", "rxvt-unicode", "rxvt-unicode-256color",
"linux", "cygwin",
"xterm",
"xterm-color",
"xterm-256color",
"screen",
"screen-256color",
"tmux",
"tmux-256color",
"rxvt-unicode",
"rxvt-unicode-256color",
"linux",
"cygwin",
"xterm-kitty",
"alacritty",
"foot",
"foot-extra",
"wezterm",
};
const char* const term = getenv("TERM");

View File

@ -20,6 +20,10 @@
#include <cstring>
#include <iostream>
#include <limits>
#include <map>
#include <utility>
#include "../src/string_util.h"
namespace benchmark {
namespace {
@ -78,6 +82,30 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) {
return true;
}
// Parses 'str' into KV pairs. If successful, writes the result to *value and
// returns true; otherwise leaves *value unchanged and returns false.
bool ParseKvPairs(const std::string& src_text, const char* str,
std::map<std::string, std::string>* value) {
std::map<std::string, std::string> kvs;
for (const auto& kvpair : StrSplit(str, ',')) {
const auto kv = StrSplit(kvpair, '=');
if (kv.size() != 2) {
std::cerr << src_text << " is expected to be a comma-separated list of "
<< "<key>=<value> strings, but actually has value \"" << str
<< "\".\n";
return false;
}
if (!kvs.emplace(kv[0], kv[1]).second) {
std::cerr << src_text << " is expected to contain unique keys but key \""
<< kv[0] << "\" was repeated.\n";
return false;
}
}
*value = kvs;
return true;
}
// Returns the name of the environment variable corresponding to the
// given flag. For example, FlagToEnvVar("foo") will return
// "BENCHMARK_FOO" in the open-source version.
@ -93,12 +121,14 @@ static std::string FlagToEnvVar(const char* flag) {
} // namespace
BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str);
}
BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
@ -111,6 +141,7 @@ int32_t Int32FromEnv(const char* flag, int32_t default_val) {
return value;
}
BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
@ -123,12 +154,28 @@ double DoubleFromEnv(const char* flag, double default_val) {
return value;
}
BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value = getenv(env_var.c_str());
return value == nullptr ? default_val : value;
}
BENCHMARK_EXPORT
std::map<std::string, std::string> KvPairsFromEnv(
const char* flag, std::map<std::string, std::string> default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
if (value_str == nullptr) return default_val;
std::map<std::string, std::string> value;
if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) {
return default_val;
}
return value;
}
// Parses a string as a command line flag. The string should have
// the format "--flag=value". When def_optional is true, the "=value"
// part can be omitted.
@ -159,6 +206,7 @@ const char* ParseFlagValue(const char* str, const char* flag,
return flag_end + 1;
}
BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, true);
@ -171,6 +219,7 @@ bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
return true;
}
BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@ -183,6 +232,7 @@ bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
value);
}
BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@ -195,6 +245,7 @@ bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
value);
}
BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
@ -206,23 +257,42 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
return true;
}
BENCHMARK_EXPORT
bool ParseKeyValueFlag(const char* str, const char* flag,
std::map<std::string, std::string>* value) {
const char* const value_str = ParseFlagValue(str, flag, false);
if (value_str == nullptr) return false;
for (const auto& kvpair : StrSplit(value_str, ',')) {
const auto kv = StrSplit(kvpair, '=');
if (kv.size() != 2) return false;
value->emplace(kv[0], kv[1]);
}
return true;
}
BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag) {
return (ParseFlagValue(str, flag, true) != nullptr);
}
BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value) {
if (value.size() == 1) {
char v = value[0];
return isalnum(v) &&
!(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
} else if (!value.empty()) {
}
if (!value.empty()) {
std::string value_lower(value);
std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
[](char c) { return static_cast<char>(::tolower(c)); });
return !(value_lower == "false" || value_lower == "no" ||
value_lower == "off");
} else
return true;
}
return true;
}
} // end namespace benchmark

View File

@ -2,61 +2,80 @@
#define BENCHMARK_COMMANDLINEFLAGS_H_
#include <cstdint>
#include <map>
#include <string>
#include "benchmark/export.h"
// Macro for referencing flags.
#define FLAG(name) FLAGS_##name
// Macros for declaring flags.
#define DECLARE_bool(name) extern bool FLAG(name)
#define DECLARE_int32(name) extern int32_t FLAG(name)
#define DECLARE_double(name) extern double FLAG(name)
#define DECLARE_string(name) extern std::string FLAG(name)
#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name)
#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name)
#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name)
#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name)
#define BM_DECLARE_kvpairs(name) \
BENCHMARK_EXPORT extern std::map<std::string, std::string> FLAG(name)
// Macros for defining flags.
#define DEFINE_bool(name, default_val) \
bool FLAG(name) = \
benchmark::BoolFromEnv(#name, default_val)
#define DEFINE_int32(name, default_val) \
int32_t FLAG(name) = \
benchmark::Int32FromEnv(#name, default_val)
#define DEFINE_double(name, default_val) \
double FLAG(name) = \
benchmark::DoubleFromEnv(#name, default_val)
#define DEFINE_string(name, default_val) \
std::string FLAG(name) = \
benchmark::StringFromEnv(#name, default_val)
#define BM_DEFINE_bool(name, default_val) \
BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val)
#define BM_DEFINE_int32(name, default_val) \
BENCHMARK_EXPORT int32_t FLAG(name) = \
benchmark::Int32FromEnv(#name, default_val)
#define BM_DEFINE_double(name, default_val) \
BENCHMARK_EXPORT double FLAG(name) = \
benchmark::DoubleFromEnv(#name, default_val)
#define BM_DEFINE_string(name, default_val) \
BENCHMARK_EXPORT std::string FLAG(name) = \
benchmark::StringFromEnv(#name, default_val)
#define BM_DEFINE_kvpairs(name, default_val) \
BENCHMARK_EXPORT std::map<std::string, std::string> FLAG(name) = \
benchmark::KvPairsFromEnv(#name, default_val)
namespace benchmark {
// Parses a bool from the environment variable
// corresponding to the given flag.
// Parses a bool from the environment variable corresponding to the given flag.
//
// If the variable exists, returns IsTruthyFlagValue() value; if not,
// returns the given default value.
BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val);
// Parses an Int32 from the environment variable
// corresponding to the given flag.
// Parses an Int32 from the environment variable corresponding to the given
// flag.
//
// If the variable exists, returns ParseInt32() value; if not, returns
// the given default value.
BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val);
// Parses an Double from the environment variable
// corresponding to the given flag.
// Parses an Double from the environment variable corresponding to the given
// flag.
//
// If the variable exists, returns ParseDouble(); if not, returns
// the given default value.
BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val);
// Parses a string from the environment variable
// corresponding to the given flag.
// Parses a string from the environment variable corresponding to the given
// flag.
//
// If variable exists, returns its value; if not, returns
// the given default value.
BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val);
// Parses a set of kvpairs from the environment variable corresponding to the
// given flag.
//
// If variable exists, returns its value; if not, returns
// the given default value.
BENCHMARK_EXPORT
std::map<std::string, std::string> KvPairsFromEnv(
const char* flag, std::map<std::string, std::string> default_val);
// Parses a string for a bool flag, in the form of either
// "--flag=value" or "--flag".
//
@ -66,36 +85,47 @@ const char* StringFromEnv(const char* flag, const char* default_val);
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
// Parses a string for an Int32 flag, in the form of
// "--flag=value".
// Parses a string for an Int32 flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
// Parses a string for a Double flag, in the form of
// "--flag=value".
// Parses a string for a Double flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
// Parses a string for a string flag, in the form of
// "--flag=value".
// Parses a string for a string flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value"
//
// On success, stores the value of the flag in *value and returns true. On
// failure returns false, though *value may have been mutated.
BENCHMARK_EXPORT
bool ParseKeyValueFlag(const char* str, const char* flag,
std::map<std::string, std::string>* value);
// Returns true if the string matches the flag.
BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag);
// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or
// some non-alphanumeric character. Also returns false if the value matches
// one of 'no', 'false', 'off' (case-insensitive). As a special case, also
// returns true if value is the empty string.
BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value);
} // end namespace benchmark

View File

@ -15,12 +15,13 @@
// Source project : https://github.com/ismaelJimenez/cpp.leastsq
// Adapted to be used with google benchmark
#include "benchmark/benchmark.h"
#include "complexity.h"
#include <algorithm>
#include <cmath>
#include "benchmark/benchmark.h"
#include "check.h"
#include "complexity.h"
namespace benchmark {
@ -82,7 +83,6 @@ std::string GetBigOString(BigO complexity) {
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
const std::vector<double>& time,
BigOFunc* fitting_curve) {
double sigma_gn = 0.0;
double sigma_gn_squared = 0.0;
double sigma_time = 0.0;
double sigma_time_gn = 0.0;
@ -90,7 +90,6 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
// Calculate least square fitting parameter
for (size_t i = 0; i < n.size(); ++i) {
double gn_i = fitting_curve(n[i]);
sigma_gn += gn_i;
sigma_gn_squared += gn_i * gn_i;
sigma_time += time[i];
sigma_time_gn += time[i] * gn_i;
@ -125,10 +124,10 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
// fitting curve.
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
const std::vector<double>& time, const BigO complexity) {
CHECK_EQ(n.size(), time.size());
CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
// benchmark runs are given
CHECK_NE(complexity, oNone);
BM_CHECK_EQ(n.size(), time.size());
BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
// benchmark runs are given
BM_CHECK_NE(complexity, oNone);
LeastSq best_fit;
@ -169,7 +168,8 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
// Populate the accumulators.
for (const Run& run : reports) {
CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?";
BM_CHECK_GT(run.complexity_n, 0)
<< "Did you forget to call SetComplexityN?";
n.push_back(run.complexity_n);
real_time.push_back(run.real_accumulated_time / run.iterations);
cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
@ -193,11 +193,14 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
// Get the data from the accumulator to BenchmarkReporter::Run's.
Run big_o;
big_o.run_name = run_name;
big_o.family_index = reports[0].family_index;
big_o.per_family_instance_index = reports[0].per_family_instance_index;
big_o.run_type = BenchmarkReporter::Run::RT_Aggregate;
big_o.repetitions = reports[0].repetitions;
big_o.repetition_index = Run::no_repetition_index;
big_o.threads = reports[0].threads;
big_o.aggregate_name = "BigO";
big_o.aggregate_unit = StatisticUnit::kTime;
big_o.report_label = reports[0].report_label;
big_o.iterations = 0;
big_o.real_accumulated_time = result_real.coef;
@ -215,8 +218,11 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
// Only add label to mean/stddev if it is same for all runs
Run rms;
rms.run_name = run_name;
rms.family_index = reports[0].family_index;
rms.per_family_instance_index = reports[0].per_family_instance_index;
rms.run_type = BenchmarkReporter::Run::RT_Aggregate;
rms.aggregate_name = "RMS";
rms.aggregate_unit = StatisticUnit::kPercentage;
rms.report_label = big_o.report_label;
rms.iterations = 0;
rms.repetition_index = Run::no_repetition_index;

View File

@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
const std::vector<BenchmarkReporter::Run>& reports);
// This data structure will contain the result returned by MinimalLeastSq
// - coef : Estimated coeficient for the high-order term as
// - coef : Estimated coefficient for the high-order term as
// interpolated from data.
// - rms : Normalized Root Mean Squared Error.
// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability

View File

@ -33,6 +33,7 @@
namespace benchmark {
BENCHMARK_EXPORT
bool ConsoleReporter::ReportContext(const Context& context) {
name_field_width_ = context.name_field_width;
printed_header_ = false;
@ -45,19 +46,21 @@ bool ConsoleReporter::ReportContext(const Context& context) {
GetErrorStream()
<< "Color printing is only supported for stdout on windows."
" Disabling color printing\n";
output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color);
output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
}
#endif
return true;
}
BENCHMARK_EXPORT
void ConsoleReporter::PrintHeader(const Run& run) {
std::string str = FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
"Benchmark", "Time", "CPU", "Iterations");
if(!run.counters.empty()) {
if(output_options_ & OO_Tabular) {
for(auto const& c : run.counters) {
std::string str =
FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
"Benchmark", "Time", "CPU", "Iterations");
if (!run.counters.empty()) {
if (output_options_ & OO_Tabular) {
for (auto const& c : run.counters) {
str += FormatString(" %10s", c.first.c_str());
}
} else {
@ -68,6 +71,7 @@ void ConsoleReporter::PrintHeader(const Run& run) {
GetOutputStream() << line << "\n" << str << "\n" << line << "\n";
}
BENCHMARK_EXPORT
void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
for (const auto& run : reports) {
// print the header:
@ -97,8 +101,10 @@ static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
va_end(args);
}
static std::string FormatTime(double time) {
// For the time columns of the console printer 13 digits are reserved. One of
// them is a space and max two of them are the time unit (e.g ns). That puts
// us at 10 digits usable for the number.
// Align decimal places...
if (time < 1.0) {
return FormatString("%10.3f", time);
@ -109,22 +115,33 @@ static std::string FormatTime(double time) {
if (time < 100.0) {
return FormatString("%10.1f", time);
}
// Assuming the time is at max 9.9999e+99 and we have 10 digits for the
// number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
if (time > 9999999999 /*max 10 digit number*/) {
return FormatString("%1.4e", time);
}
return FormatString("%10.0f", time);
}
BENCHMARK_EXPORT
void ConsoleReporter::PrintRunData(const Run& result) {
typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
auto& Out = GetOutputStream();
PrinterFn* printer = (output_options_ & OO_Color) ?
(PrinterFn*)ColorPrintf : IgnoreColorPrint;
PrinterFn* printer = (output_options_ & OO_Color)
? static_cast<PrinterFn*>(ColorPrintf)
: IgnoreColorPrint;
auto name_color =
(result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
printer(Out, name_color, "%-*s ", name_field_width_,
result.benchmark_name().c_str());
if (result.error_occurred) {
if (internal::SkippedWithError == result.skipped) {
printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
result.error_message.c_str());
result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
} else if (internal::SkippedWithMessage == result.skipped) {
printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
@ -134,18 +151,23 @@ void ConsoleReporter::PrintRunData(const Run& result) {
const std::string real_time_str = FormatTime(real_time);
const std::string cpu_time_str = FormatTime(cpu_time);
if (result.report_big_o) {
std::string big_o = GetBigOString(result.complexity);
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time, big_o.c_str(),
cpu_time, big_o.c_str());
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time,
big_o.c_str(), cpu_time, big_o.c_str());
} else if (result.report_rms) {
printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%",
cpu_time * 100, "%");
} else {
} else if (result.run_type != Run::RT_Aggregate ||
result.aggregate_unit == StatisticUnit::kTime) {
const char* timeLabel = GetTimeUnitString(result.time_unit);
printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel,
cpu_time_str.c_str(), timeLabel);
printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(),
timeLabel, cpu_time_str.c_str(), timeLabel);
} else {
assert(result.aggregate_unit == StatisticUnit::kPercentage);
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ",
(100. * result.real_accumulated_time), "%",
(100. * result.cpu_accumulated_time), "%");
}
if (!result.report_big_o && !result.report_rms) {
@ -153,12 +175,19 @@ void ConsoleReporter::PrintRunData(const Run& result) {
}
for (auto& c : result.counters) {
const std::size_t cNameLen = std::max(std::string::size_type(10),
c.first.length());
auto const& s = HumanReadableNumber(c.second.value, c.second.oneK);
const std::size_t cNameLen =
std::max(std::string::size_type(10), c.first.length());
std::string s;
const char* unit = "";
if (c.second.flags & Counter::kIsRate)
unit = (c.second.flags & Counter::kInvert) ? "s" : "/s";
if (result.run_type == Run::RT_Aggregate &&
result.aggregate_unit == StatisticUnit::kPercentage) {
s = StrFormat("%.2f", 100. * c.second.value);
unit = "%";
} else {
s = HumanReadableNumber(c.second.value, c.second.oneK);
if (c.second.flags & Counter::kIsRate)
unit = (c.second.flags & Counter::kInvert) ? "s" : "/s";
}
if (output_options_ & OO_Tabular) {
printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(),
unit);

View File

@ -12,9 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "complexity.h"
#include <algorithm>
#include <cstdint>
#include <iostream>
@ -22,7 +19,9 @@
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "complexity.h"
#include "string_util.h"
#include "timers.h"
@ -37,23 +36,29 @@ std::vector<std::string> elements = {
"error_occurred", "error_message"};
} // namespace
std::string CsvEscape(const std::string & s) {
std::string CsvEscape(const std::string& s) {
std::string tmp;
tmp.reserve(s.size() + 2);
for (char c : s) {
switch (c) {
case '"' : tmp += "\"\""; break;
default : tmp += c; break;
case '"':
tmp += "\"\"";
break;
default:
tmp += c;
break;
}
}
return '"' + tmp + '"';
}
BENCHMARK_EXPORT
bool CSVReporter::ReportContext(const Context& context) {
PrintBasicContext(&GetErrorStream(), context);
return true;
}
BENCHMARK_EXPORT
void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
std::ostream& Out = GetOutputStream();
@ -85,7 +90,8 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
for (const auto& cnt : run.counters) {
if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second")
continue;
CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end())
BM_CHECK(user_counter_names_.find(cnt.first) !=
user_counter_names_.end())
<< "All counters must be present in each run. "
<< "Counter named \"" << cnt.first
<< "\" was not in a run after being added to the header";
@ -99,13 +105,14 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
}
}
BENCHMARK_EXPORT
void CSVReporter::PrintRunData(const Run& run) {
std::ostream& Out = GetOutputStream();
Out << CsvEscape(run.benchmark_name()) << ",";
if (run.error_occurred) {
if (run.skipped) {
Out << std::string(elements.size() - 3, ',');
Out << "true,";
Out << CsvEscape(run.error_message) << "\n";
Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
Out << CsvEscape(run.skip_message) << "\n";
return;
}

View File

@ -36,7 +36,8 @@
// declarations of some other intrinsics, breaking compilation.
// Therefore, we simply declare __rdtsc ourselves. See also
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
#if defined(COMPILER_MSVC) && !defined(_M_IX86)
#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
!defined(_M_ARM64EC)
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
@ -92,7 +93,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
uint32_t tbl, tbu0, tbu1;
asm volatile(
"mftbu %0\n"
"mftbl %1\n"
"mftb %1\n"
"mftbu %2"
: "=r"(tbu0), "=r"(tbl), "=r"(tbu1));
tbl &= -static_cast<int32_t>(tbu0 == tbu1);
@ -114,6 +115,12 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
// See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
// and https://reviews.llvm.org/D53115
int64_t virtual_timer_value;
virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT);
return virtual_timer_value;
#elif defined(COMPILER_MSVC)
return __rdtsc();
#elif defined(BENCHMARK_OS_NACL)
@ -126,7 +133,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// Native Client does not provide any API to access cycle counter.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
// because is provides nanosecond resolution (which is noticable at
// because is provides nanosecond resolution (which is noticeable at
// least for PNaCl modules running on x86 Mac & Linux).
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = {0, 0};
@ -161,18 +168,27 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__mips__)
#elif defined(__mips__) || defined(__m68k__)
// mips apparently only allows rdtsc for superusers, so we fall
// back to gettimeofday. It's possible clock_gettime would be better.
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__loongarch__) || defined(__csky__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__s390__) // Covers both s390 and s390x.
// Return the CPU clock.
uint64_t tsc;
#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL)
// z/OS XL compiler HLASM syntax.
asm(" stck %0" : "=m"(tsc) : : "cc");
#else
asm("stck %0" : "=Q"(tsc) : : "cc");
#endif
return tsc;
#elif defined(__riscv) // RISC-V
#elif defined(__riscv) // RISC-V
// Use RDCYCLE (and RDCYCLEH on riscv32)
#if __riscv_xlen == 32
uint32_t cycles_lo, cycles_hi0, cycles_hi1;
@ -193,6 +209,14 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
asm volatile("rdcycle %0" : "=r"(cycles));
return cycles;
#endif
#elif defined(__e2k__) || defined(__elbrus__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__hexagon__)
uint64_t pcycle;
asm volatile("%0 = C15:14" : "=r"(pcycle));
return static_cast<double>(pcycle);
#else
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create

View File

@ -1,8 +1,6 @@
#ifndef BENCHMARK_INTERNAL_MACROS_H_
#define BENCHMARK_INTERNAL_MACROS_H_
#include "benchmark/benchmark.h"
/* Needed to detect STL */
#include <cstdlib>
@ -13,7 +11,11 @@
#endif
#if defined(__clang__)
#if !defined(COMPILER_CLANG)
#if defined(__ibmxl__)
#if !defined(COMPILER_IBMXL)
#define COMPILER_IBMXL
#endif
#elif !defined(COMPILER_CLANG)
#define COMPILER_CLANG
#endif
#elif defined(_MSC_VER)
@ -40,6 +42,19 @@
#define BENCHMARK_OS_CYGWIN 1
#elif defined(_WIN32)
#define BENCHMARK_OS_WINDOWS 1
// WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
// We include windows.h which implicitly includes winapifamily.h for compatibility.
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#if defined(WINAPI_FAMILY_PARTITION)
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#define BENCHMARK_OS_WINDOWS_WIN32 1
#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
#define BENCHMARK_OS_WINDOWS_RT 1
#endif
#endif
#if defined(__MINGW32__)
#define BENCHMARK_OS_MINGW 1
#endif
@ -58,6 +73,8 @@
#define BENCHMARK_OS_NETBSD 1
#elif defined(__OpenBSD__)
#define BENCHMARK_OS_OPENBSD 1
#elif defined(__DragonFly__)
#define BENCHMARK_OS_DRAGONFLY 1
#elif defined(__linux__)
#define BENCHMARK_OS_LINUX 1
#elif defined(__native_client__)
@ -72,6 +89,10 @@
#define BENCHMARK_OS_SOLARIS 1
#elif defined(__QNX__)
#define BENCHMARK_OS_QNX 1
#elif defined(__MVS__)
#define BENCHMARK_OS_ZOS 1
#elif defined(__hexagon__)
#define BENCHMARK_OS_QURT 1
#endif
#if defined(__ANDROID__) && defined(__GLIBCXX__)

View File

@ -12,9 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "complexity.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
@ -25,41 +22,61 @@
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "complexity.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
namespace {
std::string StrEscape(const std::string & s) {
std::string StrEscape(const std::string& s) {
std::string tmp;
tmp.reserve(s.size());
for (char c : s) {
switch (c) {
case '\b': tmp += "\\b"; break;
case '\f': tmp += "\\f"; break;
case '\n': tmp += "\\n"; break;
case '\r': tmp += "\\r"; break;
case '\t': tmp += "\\t"; break;
case '\\': tmp += "\\\\"; break;
case '"' : tmp += "\\\""; break;
default : tmp += c; break;
case '\b':
tmp += "\\b";
break;
case '\f':
tmp += "\\f";
break;
case '\n':
tmp += "\\n";
break;
case '\r':
tmp += "\\r";
break;
case '\t':
tmp += "\\t";
break;
case '\\':
tmp += "\\\\";
break;
case '"':
tmp += "\\\"";
break;
default:
tmp += c;
break;
}
}
return tmp;
}
std::string FormatKV(std::string const& key, std::string const& value) {
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str());
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
StrEscape(value).c_str());
}
std::string FormatKV(std::string const& key, const char* value) {
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str());
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
StrEscape(value).c_str());
}
std::string FormatKV(std::string const& key, bool value) {
return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false");
return StrFormat("\"%s\": %s", StrEscape(key).c_str(),
value ? "true" : "false");
}
std::string FormatKV(std::string const& key, int64_t value) {
@ -68,12 +85,6 @@ std::string FormatKV(std::string const& key, int64_t value) {
return ss.str();
}
std::string FormatKV(std::string const& key, IterationCount value) {
std::stringstream ss;
ss << '"' << StrEscape(key) << "\": " << value;
return ss.str();
}
std::string FormatKV(std::string const& key, double value) {
std::stringstream ss;
ss << '"' << StrEscape(key) << "\": ";
@ -123,7 +134,9 @@ bool JSONReporter::ReportContext(const Context& context) {
RoundDouble(info.cycles_per_second / 1000000.0))
<< ",\n";
if (CPUInfo::Scaling::UNKNOWN != info.scaling) {
out << indent << FormatKV("cpu_scaling_enabled", info.scaling == CPUInfo::Scaling::ENABLED ? true : false)
out << indent
<< FormatKV("cpu_scaling_enabled",
info.scaling == CPUInfo::Scaling::ENABLED ? true : false)
<< ",\n";
}
@ -136,8 +149,8 @@ bool JSONReporter::ReportContext(const Context& context) {
out << cache_indent << FormatKV("type", CI.type) << ",\n";
out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
<< ",\n";
out << cache_indent
<< FormatKV("size", static_cast<int64_t>(CI.size)) << ",\n";
out << cache_indent << FormatKV("size", static_cast<int64_t>(CI.size))
<< ",\n";
out << cache_indent
<< FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
<< "\n";
@ -159,7 +172,19 @@ bool JSONReporter::ReportContext(const Context& context) {
#else
const char build_type[] = "debug";
#endif
out << indent << FormatKV("library_build_type", build_type) << "\n";
out << indent << FormatKV("library_build_type", build_type);
std::map<std::string, std::string>* global_context =
internal::GetGlobalContext();
if (global_context != nullptr) {
for (const auto& kv : *global_context) {
out << ",\n";
out << indent << FormatKV(kv.first, kv.second);
}
}
out << "\n";
// Close context block and open the list of benchmarks.
out << inner_indent << "},\n";
out << inner_indent << "\"benchmarks\": [\n";
@ -197,6 +222,10 @@ void JSONReporter::PrintRunData(Run const& run) {
std::string indent(6, ' ');
std::ostream& out = GetOutputStream();
out << indent << FormatKV("name", run.benchmark_name()) << ",\n";
out << indent << FormatKV("family_index", run.family_index) << ",\n";
out << indent
<< FormatKV("per_family_instance_index", run.per_family_instance_index)
<< ",\n";
out << indent << FormatKV("run_name", run.run_name.str()) << ",\n";
out << indent << FormatKV("run_type", [&run]() -> const char* {
switch (run.run_type) {
@ -215,15 +244,36 @@ void JSONReporter::PrintRunData(Run const& run) {
out << indent << FormatKV("threads", run.threads) << ",\n";
if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) {
out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n";
out << indent << FormatKV("aggregate_unit", [&run]() -> const char* {
switch (run.aggregate_unit) {
case StatisticUnit::kTime:
return "time";
case StatisticUnit::kPercentage:
return "percentage";
}
BENCHMARK_UNREACHABLE();
}()) << ",\n";
}
if (run.error_occurred) {
out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
out << indent << FormatKV("error_message", run.error_message) << ",\n";
if (internal::SkippedWithError == run.skipped) {
out << indent << FormatKV("error_occurred", true) << ",\n";
out << indent << FormatKV("error_message", run.skip_message) << ",\n";
} else if (internal::SkippedWithMessage == run.skipped) {
out << indent << FormatKV("skipped", true) << ",\n";
out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
}
if (!run.report_big_o && !run.report_rms) {
out << indent << FormatKV("iterations", run.iterations) << ",\n";
out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n";
out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
if (run.run_type != Run::RT_Aggregate ||
run.aggregate_unit == StatisticUnit::kTime) {
out << indent << FormatKV("real_time", run.GetAdjustedRealTime())
<< ",\n";
out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
} else {
assert(run.aggregate_unit == StatisticUnit::kPercentage);
out << indent << FormatKV("real_time", run.real_accumulated_time)
<< ",\n";
out << indent << FormatKV("cpu_time", run.cpu_accumulated_time);
}
out << ",\n"
<< indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
} else if (run.report_big_o) {
@ -241,9 +291,21 @@ void JSONReporter::PrintRunData(Run const& run) {
out << ",\n" << indent << FormatKV(c.first, c.second);
}
if (run.has_memory_result) {
if (run.memory_result) {
const MemoryManager::Result memory_result = *run.memory_result;
out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter);
out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used);
out << ",\n"
<< indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);
auto report_if_present = [&out, &indent](const std::string& label,
int64_t val) {
if (val != MemoryManager::TombstoneValue)
out << ",\n" << indent << FormatKV(label, val);
};
report_if_present("total_allocated_bytes",
memory_result.total_allocated_bytes);
report_if_present("net_heap_growth", memory_result.net_heap_growth);
}
if (!run.report_label.empty()) {
@ -252,4 +314,7 @@ void JSONReporter::PrintRunData(Run const& run) {
out << '\n';
}
const int64_t MemoryManager::TombstoneValue =
std::numeric_limits<int64_t>::max();
} // end namespace benchmark

View File

@ -4,7 +4,12 @@
#include <iostream>
#include <ostream>
#include "benchmark/benchmark.h"
// NOTE: this is also defined in benchmark.h but we're trying to avoid a
// dependency.
// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
#define BENCHMARK_HAS_CXX11
#endif
namespace benchmark {
namespace internal {
@ -23,7 +28,16 @@ class LogType {
private:
LogType(std::ostream* out) : out_(out) {}
std::ostream* out_;
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
// NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have
// a dependency on benchmark.h from here.
#ifndef BENCHMARK_HAS_CXX11
LogType(const LogType&);
LogType& operator=(const LogType&);
#else
LogType(const LogType&) = delete;
LogType& operator=(const LogType&) = delete;
#endif
};
template <class Tp>
@ -47,13 +61,13 @@ inline int& LogLevel() {
}
inline LogType& GetNullLogInstance() {
static LogType log(nullptr);
return log;
static LogType null_log(static_cast<std::ostream*>(nullptr));
return null_log;
}
inline LogType& GetErrorLogInstance() {
static LogType log(&std::clog);
return log;
static LogType error_log(&std::clog);
return error_log;
}
inline LogType& GetLogInstanceForLevel(int level) {
@ -67,7 +81,7 @@ inline LogType& GetLogInstanceForLevel(int level) {
} // end namespace benchmark
// clang-format off
#define VLOG(x) \
#define BM_VLOG(x) \
(::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \
" ")
// clang-format on

View File

@ -9,60 +9,60 @@
// Enable thread safety attributes only with clang.
// The attributes can be safely erased when compiling with other compilers.
#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x))
#else
#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op
#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op
#endif
#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x))
#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable)
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x))
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x))
#define ACQUIRED_BEFORE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__))
#define ACQUIRED_AFTER(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__))
#define REQUIRES(...) \
THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__))
#define REQUIRES_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__))
#define ACQUIRE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__))
#define ACQUIRE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__))
#define RELEASE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__))
#define RELEASE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__))
#define TRY_ACQUIRE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__))
#define TRY_ACQUIRE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__))
#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__))
#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x))
#define ASSERT_SHARED_CAPABILITY(x) \
THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x))
THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x))
#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x))
#define NO_THREAD_SAFETY_ANALYSIS \
THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis)
namespace benchmark {
@ -130,7 +130,7 @@ class Barrier {
// entered the barrier. Returns iff this is the last thread to
// enter the barrier.
bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
CHECK_LT(entered_, running_threads_);
BM_CHECK_LT(entered_, running_threads_);
entered_++;
if (entered_ < running_threads_) {
// Wait for all threads to enter

282
src/perf_counters.cc Normal file
View File

@ -0,0 +1,282 @@
// Copyright 2021 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "perf_counters.h"
#include <cstring>
#include <memory>
#include <vector>
#if defined HAVE_LIBPFM
#include "perfmon/pfmlib.h"
#include "perfmon/pfmlib_perf_event.h"
#endif
namespace benchmark {
namespace internal {
constexpr size_t PerfCounterValues::kMaxCounters;
#if defined HAVE_LIBPFM
size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
// Create a pointer for multiple reads
const size_t bufsize = values_.size() * sizeof(values_[0]);
char* ptr = reinterpret_cast<char*>(values_.data());
size_t size = bufsize;
for (int lead : leaders) {
auto read_bytes = ::read(lead, ptr, size);
if (read_bytes >= ssize_t(sizeof(uint64_t))) {
// Actual data bytes are all bytes minus initial padding
std::size_t data_bytes = read_bytes - sizeof(uint64_t);
// This should be very cheap since it's in hot cache
std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
// Increment our counters
ptr += data_bytes;
size -= data_bytes;
} else {
int err = errno;
GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
<< " " << ::strerror(err) << "\n";
return 0;
}
}
return (bufsize - size) / sizeof(uint64_t);
}
const bool PerfCounters::kSupported = true;
// Initializes libpfm only on the first call. Returns whether that single
// initialization was successful.
bool PerfCounters::Initialize() {
// Function-scope static gets initialized only once on first call.
static const bool success = []() {
return pfm_initialize() == PFM_SUCCESS;
}();
return success;
}
bool PerfCounters::IsCounterSupported(const std::string& name) {
Initialize();
perf_event_attr_t attr;
std::memset(&attr, 0, sizeof(attr));
pfm_perf_encode_arg_t arg;
std::memset(&arg, 0, sizeof(arg));
arg.attr = &attr;
const int mode = PFM_PLM3; // user mode only
int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
&arg);
return (ret == PFM_SUCCESS);
}
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
Initialize();
}
// Valid counters will populate these arrays but we start empty
std::vector<std::string> valid_names;
std::vector<int> counter_ids;
std::vector<int> leader_ids;
// Resize to the maximum possible
valid_names.reserve(counter_names.size());
counter_ids.reserve(counter_names.size());
const int kCounterMode = PFM_PLM3; // user mode only
// Group leads will be assigned on demand. The idea is that once we cannot
// create a counter descriptor, the reason is that this group has maxed out
// so we set the group_id again to -1 and retry - giving the algorithm a
// chance to create a new group leader to hold the next set of counters.
int group_id = -1;
// Loop through all performance counters
for (size_t i = 0; i < counter_names.size(); ++i) {
// we are about to push into the valid names vector
// check if we did not reach the maximum
if (valid_names.size() == PerfCounterValues::kMaxCounters) {
// Log a message if we maxed out and stop adding
GetErrorLogInstance()
<< counter_names.size() << " counters were requested. The maximum is "
<< PerfCounterValues::kMaxCounters << " and " << valid_names.size()
<< " were already added. All remaining counters will be ignored\n";
// stop the loop and return what we have already
break;
}
// Check if this name is empty
const auto& name = counter_names[i];
if (name.empty()) {
GetErrorLogInstance()
<< "A performance counter name was the empty string\n";
continue;
}
// Here first means first in group, ie the group leader
const bool is_first = (group_id < 0);
// This struct will be populated by libpfm from the counter string
// and then fed into the syscall perf_event_open
struct perf_event_attr attr {};
attr.size = sizeof(attr);
// This is the input struct to libpfm.
pfm_perf_encode_arg_t arg{};
arg.attr = &attr;
const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
PFM_OS_PERF_EVENT, &arg);
if (pfm_get != PFM_SUCCESS) {
GetErrorLogInstance()
<< "Unknown performance counter name: " << name << "\n";
continue;
}
// We then proceed to populate the remaining fields in our attribute struct
// Note: the man page for perf_event_create suggests inherit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
// case.
attr.disabled = is_first;
attr.inherit = true;
attr.pinned = is_first;
attr.exclude_kernel = true;
attr.exclude_user = false;
attr.exclude_hv = true;
// Read all counters in a group in one read.
attr.read_format = PERF_FORMAT_GROUP;
int id = -1;
while (id < 0) {
static constexpr size_t kNrOfSyscallRetries = 5;
// Retry syscall as it was interrupted often (b/64774091).
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
++num_retries) {
id = perf_event_open(&attr, 0, -1, group_id, 0);
if (id >= 0 || errno != EINTR) {
break;
}
}
if (id < 0) {
// If the file descriptor is negative we might have reached a limit
// in the current group. Set the group_id to -1 and retry
if (group_id >= 0) {
// Create a new group
group_id = -1;
} else {
// At this point we have already retried to set a new group id and
// failed. We then give up.
break;
}
}
}
// We failed to get a new file descriptor. We might have reached a hard
// hardware limit that cannot be resolved even with group multiplexing
if (id < 0) {
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
"for performance counter "
<< name << ". Ignoring\n";
// We give up on this counter but try to keep going
// as the others would be fine
continue;
}
if (group_id < 0) {
// This is a leader, store and assign it to the current file descriptor
leader_ids.push_back(id);
group_id = id;
}
// This is a valid counter, add it to our descriptor's list
counter_ids.push_back(id);
valid_names.push_back(name);
}
// Loop through all group leaders activating them
// There is another option of starting ALL counters in a process but
// that would be far reaching an intrusion. If the user is using PMCs
// by themselves then this would have a side effect on them. It is
// friendlier to loop through all groups individually.
for (int lead : leader_ids) {
if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
// This should never happen but if it does, we give up on the
// entire batch as recovery would be a mess.
GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
"Claring out all counters.\n";
// Close all peformance counters
for (int id : counter_ids) {
::close(id);
}
// Return an empty object so our internal state is still good and
// the process can continue normally without impact
return NoCounters();
}
}
return PerfCounters(std::move(valid_names), std::move(counter_ids),
std::move(leader_ids));
}
void PerfCounters::CloseCounters() const {
if (counter_ids_.empty()) {
return;
}
for (int lead : leader_ids_) {
ioctl(lead, PERF_EVENT_IOC_DISABLE);
}
for (int fd : counter_ids_) {
close(fd);
}
}
#else // defined HAVE_LIBPFM
size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
const bool PerfCounters::kSupported = false;
bool PerfCounters::Initialize() { return false; }
bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
GetErrorLogInstance() << "Performance counters not supported.";
}
return NoCounters();
}
void PerfCounters::CloseCounters() const {}
#endif // defined HAVE_LIBPFM
PerfCountersMeasurement::PerfCountersMeasurement(
const std::vector<std::string>& counter_names)
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
counters_ = PerfCounters::Create(counter_names);
}
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
if (this != &other) {
CloseCounters();
counter_ids_ = std::move(other.counter_ids_);
leader_ids_ = std::move(other.leader_ids_);
counter_names_ = std::move(other.counter_names_);
}
return *this;
}
} // namespace internal
} // namespace benchmark

200
src/perf_counters.h Normal file
View File

@ -0,0 +1,200 @@
// Copyright 2021 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BENCHMARK_PERF_COUNTERS_H
#define BENCHMARK_PERF_COUNTERS_H
#include <array>
#include <cstdint>
#include <cstring>
#include <memory>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "log.h"
#include "mutex.h"
#ifndef BENCHMARK_OS_WINDOWS
#include <unistd.h>
#endif
#if defined(_MSC_VER)
#pragma warning(push)
// C4251: <symbol> needs to have dll-interface to be used by clients of class
#pragma warning(disable : 4251)
#endif
namespace benchmark {
namespace internal {
// Typically, we can only read a small number of counters. There is also a
// padding preceding counter values, when reading multiple counters with one
// syscall (which is desirable). PerfCounterValues abstracts these details.
// The implementation ensures the storage is inlined, and allows 0-based
// indexing into the counter values.
// The object is used in conjunction with a PerfCounters object, by passing it
// to Snapshot(). The Read() method relocates individual reads, discarding
// the initial padding from each group leader in the values buffer such that
// all user accesses through the [] operator are correct.
class BENCHMARK_EXPORT PerfCounterValues {
public:
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
BM_CHECK_LE(nr_counters_, kMaxCounters);
}
// We are reading correctly now so the values don't need to skip padding
uint64_t operator[](size_t pos) const { return values_[pos]; }
// Increased the maximum to 32 only since the buffer
// is std::array<> backed
static constexpr size_t kMaxCounters = 32;
private:
friend class PerfCounters;
// Get the byte buffer in which perf counters can be captured.
// This is used by PerfCounters::Read
std::pair<char*, size_t> get_data_buffer() {
return {reinterpret_cast<char*>(values_.data()),
sizeof(uint64_t) * (kPadding + nr_counters_)};
}
// This reading is complex and as the goal of this class is to
// abstract away the intrincacies of the reading process, this is
// a better place for it
size_t Read(const std::vector<int>& leaders);
// Move the padding to 2 due to the reading algorithm (1st padding plus a
// current read padding)
static constexpr size_t kPadding = 2;
std::array<uint64_t, kPadding + kMaxCounters> values_;
const size_t nr_counters_;
};
// Collect PMU counters. The object, once constructed, is ready to be used by
// calling read(). PMU counter collection is enabled from the time create() is
// called, to obtain the object, until the object's destructor is called.
class BENCHMARK_EXPORT PerfCounters final {
public:
// True iff this platform supports performance counters.
static const bool kSupported;
// Returns an empty object
static PerfCounters NoCounters() { return PerfCounters(); }
~PerfCounters() { CloseCounters(); }
PerfCounters() = default;
PerfCounters(PerfCounters&&) = default;
PerfCounters(const PerfCounters&) = delete;
PerfCounters& operator=(PerfCounters&&) noexcept;
PerfCounters& operator=(const PerfCounters&) = delete;
// Platform-specific implementations may choose to do some library
// initialization here.
static bool Initialize();
// Check if the given counter is supported, if the app wants to
// check before passing
static bool IsCounterSupported(const std::string& name);
// Return a PerfCounters object ready to read the counters with the names
// specified. The values are user-mode only. The counter name format is
// implementation and OS specific.
// In case of failure, this method will in the worst case return an
// empty object whose state will still be valid.
static PerfCounters Create(const std::vector<std::string>& counter_names);
// Take a snapshot of the current value of the counters into the provided
// valid PerfCounterValues storage. The values are populated such that:
// names()[i]'s value is (*values)[i]
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
#ifndef BENCHMARK_OS_WINDOWS
assert(values != nullptr);
return values->Read(leader_ids_) == counter_ids_.size();
#else
(void)values;
return false;
#endif
}
const std::vector<std::string>& names() const { return counter_names_; }
size_t num_counters() const { return counter_names_.size(); }
private:
PerfCounters(const std::vector<std::string>& counter_names,
std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
: counter_ids_(std::move(counter_ids)),
leader_ids_(std::move(leader_ids)),
counter_names_(counter_names) {}
void CloseCounters() const;
std::vector<int> counter_ids_;
std::vector<int> leader_ids_;
std::vector<std::string> counter_names_;
};
// Typical usage of the above primitives.
class BENCHMARK_EXPORT PerfCountersMeasurement final {
public:
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
size_t num_counters() const { return counters_.num_counters(); }
std::vector<std::string> names() const { return counters_.names(); }
BENCHMARK_ALWAYS_INLINE bool Start() {
if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&start_values_);
ClobberMemory();
return valid_read_;
}
BENCHMARK_ALWAYS_INLINE bool Stop(
std::vector<std::pair<std::string, double>>& measurements) {
if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&end_values_);
ClobberMemory();
for (size_t i = 0; i < counters_.names().size(); ++i) {
double measurement = static_cast<double>(end_values_[i]) -
static_cast<double>(start_values_[i]);
measurements.push_back({counters_.names()[i], measurement});
}
return valid_read_;
}
private:
PerfCounters counters_;
bool valid_read_ = true;
PerfCounterValues start_values_;
PerfCounterValues end_values_;
};
} // namespace internal
} // namespace benchmark
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#endif // BENCHMARK_PERF_COUNTERS_H

View File

@ -33,7 +33,7 @@
// Prefer C regex libraries when compiling w/o exceptions so that we can
// correctly report errors.
#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \
defined(BENCHMARK_HAVE_STD_REGEX) && \
defined(HAVE_STD_REGEX) && \
(defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
#undef HAVE_STD_REGEX
#endif
@ -126,7 +126,7 @@ inline bool Regex::Init(const std::string& spec, std::string* error) {
// regerror returns the number of bytes necessary to null terminate
// the string, so we move that when assigning to error.
CHECK_NE(needed, 0);
BM_CHECK_NE(needed, 0);
error->assign(errbuf, needed - 1);
delete[] errbuf;

View File

@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "timers.h"
#include <cstdlib>
#include <iostream>
#include <map>
#include <string>
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
@ -33,10 +33,14 @@ BenchmarkReporter::~BenchmarkReporter() {}
void BenchmarkReporter::PrintBasicContext(std::ostream *out,
Context const &context) {
CHECK(out) << "cannot be null";
BM_CHECK(out) << "cannot be null";
auto &Out = *out;
#ifndef BENCHMARK_OS_QURT
// Date/time information is not available on QuRT.
// Attempting to get it via this call cause the binary to crash.
Out << LocalDateTimeString() << "\n";
#endif
if (context.executable_name)
Out << "Running " << context.executable_name << "\n";
@ -64,6 +68,15 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
Out << "\n";
}
std::map<std::string, std::string> *global_context =
internal::GetGlobalContext();
if (global_context != nullptr) {
for (const auto &kv : *global_context) {
Out << kv.first << ": " << kv.second << "\n";
}
}
if (CPUInfo::Scaling::ENABLED == info.scaling) {
Out << "***WARNING*** CPU scaling is enabled, the benchmark "
"real time measurements may be noisy and will incur extra "

View File

@ -1,51 +0,0 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "sleep.h"
#include <cerrno>
#include <cstdlib>
#include <ctime>
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
#include <windows.h>
#endif
namespace benchmark {
#ifdef BENCHMARK_OS_WINDOWS
// Window's Sleep takes milliseconds argument.
void SleepForMilliseconds(int milliseconds) { Sleep(milliseconds); }
void SleepForSeconds(double seconds) {
SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
}
#else // BENCHMARK_OS_WINDOWS
void SleepForMicroseconds(int microseconds) {
struct timespec sleep_time;
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
; // Ignore signals and wait for the full interval to elapse.
}
void SleepForMilliseconds(int milliseconds) {
SleepForMicroseconds(milliseconds * kNumMicrosPerMilli);
}
void SleepForSeconds(double seconds) {
SleepForMicroseconds(static_cast<int>(seconds * kNumMicrosPerSecond));
}
#endif // BENCHMARK_OS_WINDOWS
} // end namespace benchmark

View File

@ -1,15 +0,0 @@
#ifndef BENCHMARK_SLEEP_H_
#define BENCHMARK_SLEEP_H_
namespace benchmark {
const int kNumMillisPerSecond = 1000;
const int kNumMicrosPerMilli = 1000;
const int kNumMicrosPerSecond = kNumMillisPerSecond * 1000;
const int kNumNanosPerMicro = 1000;
const int kNumNanosPerSecond = kNumNanosPerMicro * kNumMicrosPerSecond;
void SleepForMilliseconds(int milliseconds);
void SleepForSeconds(double seconds);
} // end namespace benchmark
#endif // BENCHMARK_SLEEP_H_

Some files were not shown because too many files have changed in this diff Show More