Deprecate ubuntu-20.04 images in actions (#1971 )

https://github.com/actions/runner-images/issues/11101
Fixed private macro name issue (#1968 )
2025-04-21 17:00:28 +08:00 · 2025-04-16 11:29:10 +01:00 · 2025-04-11 15:02:03 +01:00 · 2025-04-11 12:25:46 +01:00 · 2025-03-29 10:49:25 +03:00 · 2025-03-27 18:10:05 +03:00
110 changed files with 2446 additions and 1323 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -1,6 +1,37 @@
 ---
-Checks:          'clang-analyzer-*,readability-redundant-*,performance-*'
-WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*'
-HeaderFilterRegex: '.*'
+Checks: >
+  abseil-*,
+  bugprone-*,
+  clang-analyzer-*,
+  cppcoreguidelines-*,
+  google-*,
+  misc-*,
+  performance-*,
+  readability-*,
+  -clang-analyzer-deadcode*,
+  -clang-analyzer-optin*,
+  -readability-identifier-length
+WarningsAsErrors: ''
+HeaderFilterRegex: ''
 FormatStyle:     none
-User:            user
+CheckOptions:
+  llvm-else-after-return.WarnOnConditionVariables: 'false'
+  modernize-loop-convert.MinConfidence: reasonable
+  modernize-replace-auto-ptr.IncludeStyle: llvm
+  cert-str34-c.DiagnoseSignedUnsignedCharComparisons: 'false'
+  google-readability-namespace-comments.ShortNamespaceLines: '10'
+  cert-err33-c.CheckedFunctions: '::aligned_alloc;::asctime_s;::at_quick_exit;::atexit;::bsearch;::bsearch_s;::btowc;::c16rtomb;::c32rtomb;::calloc;::clock;::cnd_broadcast;::cnd_init;::cnd_signal;::cnd_timedwait;::cnd_wait;::ctime_s;::fclose;::fflush;::fgetc;::fgetpos;::fgets;::fgetwc;::fopen;::fopen_s;::fprintf;::fprintf_s;::fputc;::fputs;::fputwc;::fputws;::fread;::freopen;::freopen_s;::fscanf;::fscanf_s;::fseek;::fsetpos;::ftell;::fwprintf;::fwprintf_s;::fwrite;::fwscanf;::fwscanf_s;::getc;::getchar;::getenv;::getenv_s;::gets_s;::getwc;::getwchar;::gmtime;::gmtime_s;::localtime;::localtime_s;::malloc;::mbrtoc16;::mbrtoc32;::mbsrtowcs;::mbsrtowcs_s;::mbstowcs;::mbstowcs_s;::memchr;::mktime;::mtx_init;::mtx_lock;::mtx_timedlock;::mtx_trylock;::mtx_unlock;::printf_s;::putc;::putwc;::raise;::realloc;::remove;::rename;::scanf;::scanf_s;::setlocale;::setvbuf;::signal;::snprintf;::snprintf_s;::sprintf;::sprintf_s;::sscanf;::sscanf_s;::strchr;::strerror_s;::strftime;::strpbrk;::strrchr;::strstr;::strtod;::strtof;::strtoimax;::strtok;::strtok_s;::strtol;::strtold;::strtoll;::strtoul;::strtoull;::strtoumax;::strxfrm;::swprintf;::swprintf_s;::swscanf;::swscanf_s;::thrd_create;::thrd_detach;::thrd_join;::thrd_sleep;::time;::timespec_get;::tmpfile;::tmpfile_s;::tmpnam;::tmpnam_s;::tss_create;::tss_get;::tss_set;::ungetc;::ungetwc;::vfprintf;::vfprintf_s;::vfscanf;::vfscanf_s;::vfwprintf;::vfwprintf_s;::vfwscanf;::vfwscanf_s;::vprintf_s;::vscanf;::vscanf_s;::vsnprintf;::vsnprintf_s;::vsprintf;::vsprintf_s;::vsscanf;::vsscanf_s;::vswprintf;::vswprintf_s;::vswscanf;::vswscanf_s;::vwprintf_s;::vwscanf;::vwscanf_s;::wcrtomb;::wcschr;::wcsftime;::wcspbrk;::wcsrchr;::wcsrtombs;::wcsrtombs_s;::wcsstr;::wcstod;::wcstof;::wcstoimax;::wcstok;::wcstok_s;::wcstol;::wcstold;::wcstoll;::wcstombs;::wcstombs_s;::wcstoul;::wcstoull;::wcstoumax;::wcsxfrm;::wctob;::wctrans;::wctype;::wmemchr;::wprintf_s;::wscanf;::wscanf_s;'
+  cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField: 'false'
+  cert-dcl16-c.NewSuffixes: 'L;LL;LU;LLU'
+  google-readability-braces-around-statements.ShortStatementLines: '1'
+  cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: 'true'
+  google-readability-namespace-comments.SpacesBeforeComments: '2'
+  modernize-loop-convert.MaxCopySize: '16'
+  modernize-pass-by-value.IncludeStyle: llvm
+  modernize-use-nullptr.NullMacros: 'NULL'
+  llvm-qualified-auto.AddConstToQualified: 'false'
+  modernize-loop-convert.NamingStyle: CamelCase
+  llvm-else-after-return.WarnOnUnfixable: 'false'
+  google-readability-function-size.StatementThreshold: '800'
+...
+
--- a/.clang-tidy.ignore
+++ b/.clang-tidy.ignore
@ -0,0 +1 @@
+.*third_party/.*
--- a/.github/libcxx-setup.sh
+++ b/.github/libcxx-setup.sh
@ -3,7 +3,12 @@
 set -e

 # Checkout LLVM sources
-git clone --depth=1 --branch llvmorg-16.0.6 https://github.com/llvm/llvm-project.git llvm-project
+git clone --filter=blob:none --depth=1 --branch llvmorg-19.1.6 --no-checkout https://github.com/llvm/llvm-project.git llvm-project
+cd llvm-project
+git sparse-checkout set --cone
+git checkout llvmorg-19.1.6
+git sparse-checkout set cmake llvm/cmake runtimes libcxx libcxxabi
+cd ..

 ## Setup libc++ options
 if [ -z "$BUILD_32_BITS" ]; then
@ -12,15 +17,19 @@ fi

 ## Build and install libc++ (Use unstable ABI for better sanitizer coverage)
 mkdir llvm-build && cd llvm-build
-cmake -DCMAKE_C_COMPILER=${CC}                  \
+cmake -GNinja                                   \
+      -DCMAKE_C_COMPILER=${CC}                  \
      -DCMAKE_CXX_COMPILER=${CXX}               \
      -DCMAKE_BUILD_TYPE=RelWithDebInfo         \
      -DCMAKE_INSTALL_PREFIX=/usr               \
      -DLIBCXX_ABI_UNSTABLE=OFF                 \
      -DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER}  \
      -DLLVM_BUILD_32_BITS=${BUILD_32_BITS}     \
-      -DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi;libunwind' \
-      -G "Unix Makefiles" \
+      -DLIBCXXABI_USE_LLVM_UNWINDER=OFF         \
+      -DLLVM_INCLUDE_TESTS=OFF                  \
+      -DLIBCXX_INCLUDE_TESTS=OFF                \
+      -DLIBCXX_INCLUDE_BENCHMARKS=OFF           \
+      -DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi' \
      ../llvm-project/runtimes/
-make -j cxx cxxabi unwind
+cmake --build . -- cxx cxxabi
 cd ..
--- a/.github/workflows/bazel.yml
+++ b/.github/workflows/bazel.yml
@ -4,15 +4,17 @@ on:
  push: {}
  pull_request: {}

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  build_and_test_default:
-    name: bazel.${{ matrix.os }}.${{ matrix.bzlmod && 'bzlmod' || 'no_bzlmod' }}
+    name: bazel.${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
-        bzlmod: [false, true]
    steps:
    - uses: actions/checkout@v4

@ -28,8 +30,8 @@ jobs:

    - name: build
      run: |
-        bazel build ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} //:benchmark //:benchmark_main //test/...
+        bazel build //:benchmark //:benchmark_main //test/...

    - name: test
      run: |
-        bazel test ${{ matrix.bzlmod && '--enable_bzlmod' || '--noenable_bzlmod' }} --test_output=all //test/...
+        bazel test --test_output=all //test/...
--- a/.github/workflows/build-and-test-min-cmake.yml
+++ b/.github/workflows/build-and-test-min-cmake.yml
@ -6,6 +6,9 @@ on:
  pull_request:
    branches: [ main ]

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  job:
    name: ${{ matrix.os }}.min-cmake
@ -20,7 +23,7 @@ jobs:

      - uses: lukka/get-cmake@latest
        with:
-          cmakeVersion: 3.10.0
+          cmakeVersion: 3.13.0

      - name: create build environment
        run: cmake -E make_directory ${{ runner.workspace }}/_build
--- a/.github/workflows/build-and-test-perfcounters.yml
+++ b/.github/workflows/build-and-test-perfcounters.yml
@ -6,6 +6,9 @@ on:
  pull_request:
    branches: [ main ]

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  job:
    # TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
@ -14,7 +17,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-22.04, ubuntu-20.04]
+        os: [ubuntu-latest]
        build_type: ['Release', 'Debug']
    steps:
    - uses: actions/checkout@v4
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@ -6,6 +6,9 @@ on:
  pull_request:
    branches: [ main ]

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  # TODO: add 32-bit builds (g++ and clang++) for ubuntu
  #   (requires g++-multilib and libc6:i386)
@ -17,41 +20,30 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-22.04, ubuntu-20.04, macos-latest]
+        os: [ubuntu-24.04, ubuntu-22.04, ubuntu-24.04-arm, macos-latest]
        build_type: ['Release', 'Debug']
        compiler: ['g++', 'clang++']
        lib: ['shared', 'static']

    steps:
+      - name: Install dependencies (macos)
+        if: runner.os == 'macOS'
+        run: brew install ninja
+
      - uses: actions/checkout@v4

-      - uses: lukka/get-cmake@latest
-
-      - name: create build environment
-        run: cmake -E make_directory ${{ runner.workspace }}/_build
-
-      - name: setup cmake initial cache
-        run: touch compiler-cache.cmake
-
-      - name: configure cmake
-        env:
-          CXX: ${{ matrix.compiler }}
-        shell: bash
-        working-directory: ${{ runner.workspace }}/_build
-        run: >
-          cmake -C ${{ github.workspace }}/compiler-cache.cmake
-          $GITHUB_WORKSPACE
-          -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-          -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
-          -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-          -DCMAKE_CXX_COMPILER=${{ env.CXX }}
-          -DCMAKE_CXX_VISIBILITY_PRESET=hidden
-          -DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
-
      - name: build
-        shell: bash
-        working-directory: ${{ runner.workspace }}/_build
-        run: cmake --build . --config ${{ matrix.build_type }}
+        uses: threeal/cmake-action@v2.1.0
+        with:
+          build-dir: ${{ runner.workspace }}/_build
+          cxx-compiler: ${{ matrix.compiler }}
+          options: |
+            BENCHMARK_DOWNLOAD_DEPENDENCIES=ON
+            BUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
+            CMAKE_BUILD_TYPE=${{ matrix.build_type }}
+            CMAKE_CXX_COMPILER=${{ matrix.compiler }}
+            CMAKE_CXX_VISIBILITY_PRESET=hidden
+            CMAKE_VISIBILITY_INLINES_HIDDEN=ON

      - name: test
        shell: bash
@ -70,8 +62,6 @@ jobs:
        msvc:
          - VS-16-2019
          - VS-17-2022
-        arch:
-          - x64
        build_type:
          - Debug
          - Release
@ -93,17 +83,16 @@ jobs:

      - name: configure cmake
        run: >
-          cmake -S . -B _build/
-          -A ${{ matrix.arch }}
+          cmake -S . -B ${{ runner.workspace }}/_build/
          -G "${{ matrix.generator }}"
          -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
          -DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}

      - name: build
-        run: cmake --build _build/ --config ${{ matrix.build_type }}
+        run: cmake --build ${{ runner.workspace }}/_build/ --config ${{ matrix.build_type }}

      - name: test
-        run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV
+        run: ctest --test-dir ${{ runner.workspace }}/_build/ -C ${{ matrix.build_type }} -VV

  msys2:
    name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msys2.msystem }}
@ -117,9 +106,7 @@ jobs:
        os: [ windows-latest ]
        msys2:
          - { msystem: MINGW64,    arch: x86_64,  family: GNU,  compiler: g++ }
-          - { msystem: MINGW32,    arch: i686,    family: GNU,  compiler: g++ }
          - { msystem: CLANG64,    arch: x86_64,  family: LLVM, compiler: clang++ }
-          - { msystem: CLANG32,    arch: i686,    family: LLVM, compiler: clang++ }
          - { msystem: UCRT64,     arch: x86_64,  family: GNU,  compiler: g++ }
        build_type:
          - Debug
@ -129,9 +116,7 @@ jobs:
          - static

    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install Base Dependencies
+      - name: setup msys2
        uses: msys2/setup-msys2@v2
        with:
          cache: false
@ -141,10 +126,14 @@ jobs:
            git
            base-devel
          pacboy: >-
-            cc:p
+            gcc:p
+            clang:p
            cmake:p
            ninja:p

+      - uses: actions/checkout@v4
+
+      # NOTE: we can't use cmake actions here as we need to do everything in msys2 shell.
      - name: configure cmake
        env:
          CXX: ${{ matrix.msys2.compiler }}
@ -158,4 +147,5 @@ jobs:
        run: cmake --build _build/ --config ${{ matrix.build_type }}

      - name: test
-        run: ctest --test-dir _build/ -C ${{ matrix.build_type }} -VV
+        working-directory: _build
+        run: ctest -C ${{ matrix.build_type }} -VV
--- a/.github/workflows/clang-format-lint.yml
+++ b/.github/workflows/clang-format-lint.yml
@ -3,6 +3,9 @@ on:
  push: {}
  pull_request: {}

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  job:
    name: check-clang-format
@ -10,9 +13,7 @@ jobs:

    steps:
    - uses: actions/checkout@v4
-    - uses: DoozyX/clang-format-lint-action@v0.15
+    - uses: DoozyX/clang-format-lint-action@v0.18.2
      with:
        source: './include/benchmark ./src ./test'
-        extensions: 'h,cc'
-        clangFormatVersion: 12
-        style: Google
+        clangFormatVersion: 18
--- a/.github/workflows/clang-tidy-lint.yml
+++ b/.github/workflows/clang-tidy-lint.yml
@ -4,6 +4,9 @@ on:
  push: {}
  pull_request: {}

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  job:
    name: run-clang-tidy
@ -17,11 +20,11 @@ jobs:
      run: sudo apt update && sudo apt -y install clang-tidy

    - name: create build environment
-      run: cmake -E make_directory ${{ runner.workspace }}/_build
+      run: cmake -E make_directory ${{ github.workspace }}/_build

    - name: configure cmake
      shell: bash
-      working-directory: ${{ runner.workspace }}/_build
+      working-directory: ${{ github.workspace }}/_build
      run: >
        cmake $GITHUB_WORKSPACE
        -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
@ -34,5 +37,5 @@ jobs:

    - name: run
      shell: bash
-      working-directory: ${{ runner.workspace }}/_build
-      run: run-clang-tidy
+      working-directory: ${{ github.workspace }}/_build
+      run: run-clang-tidy -config-file=$GITHUB_WORKSPACE/.clang-tidy
--- a/.github/workflows/doxygen.yml
+++ b/.github/workflows/doxygen.yml
@ -6,6 +6,9 @@ on:
  pull_request:
    branches: [main]

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  build-and-deploy:
    name: Build HTML documentation
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -6,6 +6,9 @@ on:
  pull_request:
    branches: [ main ]

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  pre-commit:
    runs-on: ubuntu-latest
--- a/.github/workflows/sanitizer.yml
+++ b/.github/workflows/sanitizer.yml
@ -5,6 +5,7 @@ on:
  pull_request: {}

 env:
+  CMAKE_GENERATOR: Ninja
  UBSAN_OPTIONS: "print_stacktrace=1"

 jobs:
@ -65,7 +66,7 @@ jobs:
      if: matrix.sanitizer != 'asan'
      run: |
        "${GITHUB_WORKSPACE}/.github/libcxx-setup.sh"
-        echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L ${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -Isystem${GITHUB_WORKSPACE}/llvm-build/include -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV
+        echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -I${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV

    - name: create build environment
      run: cmake -E make_directory ${{ runner.workspace }}/_build
@ -75,7 +76,7 @@ jobs:
      working-directory: ${{ runner.workspace }}/_build
      run: >
        VERBOSE=1
-        cmake $GITHUB_WORKSPACE
+        cmake -GNinja $GITHUB_WORKSPACE
        -DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
        -DBENCHMARK_ENABLE_LIBPFM=OFF
        -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
--- a/.github/workflows/test_bindings.yml
+++ b/.github/workflows/test_bindings.yml
@ -6,25 +6,28 @@ on:
  pull_request:
    branches: [main]

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  python_bindings:
-    name: Test GBM Python bindings on ${{ matrix.os }}
+    name: Test GBM Python ${{ matrix.python-version }} bindings on ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ ubuntu-latest, macos-latest, windows-latest ]
+        python-version: [ "3.10", "3.11", "3.12", "3.13" ]

    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
-      - name: Set up Python 3.11
+      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
-          python-version: 3.11
+          python-version: ${{ matrix.python-version }}
      - name: Install GBM Python bindings on ${{ matrix.os }}
        run: python -m pip install .
-      - name: Run bindings example on ${{ matrix.os }}
-        run:
-          python bindings/python/google_benchmark/example.py
+      - name: Run example on ${{ matrix.os }} under Python ${{ matrix.python-version }}
+        run: python bindings/python/google_benchmark/example.py
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@ -6,6 +6,9 @@ on:
    types:
      - published

+env:
+  CMAKE_GENERATOR: Ninja
+
 jobs:
  build_sdist:
    name: Build source distribution
@ -18,7 +21,7 @@ jobs:
      - name: Install Python 3.12
        uses: actions/setup-python@v5
        with:
-          python-version: 3.12
+          python-version: "3.12"
      - run: python -m pip install build
      - name: Build sdist
        run: python -m build --sdist
@ -40,6 +43,12 @@ jobs:
        with:
          fetch-depth: 0

+      - uses: actions/setup-python@v5
+        name: Install Python 3.12
+        with:
+          python-version: "3.12"
+      - run: pip install --upgrade pip uv
+
      - name: Set up QEMU
        if: runner.os == 'Linux'
        uses: docker/setup-qemu-action@v3
@ -47,17 +56,19 @@ jobs:
          platforms: all

      - name: Build wheels on ${{ matrix.os }} using cibuildwheel
-        uses: pypa/cibuildwheel@v2.17
+        uses: pypa/cibuildwheel@v2.22.0
        env:
-          CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-*"
+          CIBW_BUILD_FRONTEND: "build[uv]"
          CIBW_SKIP: "*-musllinux_*"
-          CIBW_TEST_SKIP: "cp38-macosx_*:arm64"
          CIBW_ARCHS_LINUX: auto64 aarch64
          CIBW_ARCHS_WINDOWS: auto64
          CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh
          # Grab the rootless Bazel installation inside the container.
          CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin
          CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py
+          # unused by Bazel, but needed explicitly by delocate on MacOS.
+          MACOSX_DEPLOYMENT_TARGET: "10.14"

      - name: Upload Google Benchmark ${{ matrix.os }} wheels
        uses: actions/upload-artifact@v4
@ -65,27 +76,16 @@ jobs:
          name: dist-${{ matrix.os }}
          path: wheelhouse/*.whl

-  merge_wheels:
-    name: Merge all built wheels into one artifact
-    runs-on: ubuntu-latest
-    needs: build_wheels
-    steps:
-      - name: Merge wheels
-        uses: actions/upload-artifact/merge@v4
-        with:
-          name: dist
-          pattern: dist-*
-          delete-merged: true
-
  pypi_upload:
    name: Publish google-benchmark wheels to PyPI
-    needs: [merge_wheels]
+    needs: [build_sdist, build_wheels]
    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
    steps:
      - uses: actions/download-artifact@v4
        with:
          path: dist
+          pattern: dist-*
+          merge-multiple: true
      - uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          user: __token__
-          password: ${{ secrets.PYPI_PASSWORD }}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,17 +1,17 @@
 repos:
  -   repo: https://github.com/keith/pre-commit-buildifier
-      rev: 6.4.0
+      rev: 8.0.1
      hooks:
      -   id: buildifier
      -   id: buildifier-lint
  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.11.0
+    rev: v1.15.0
    hooks:
      - id: mypy
        types_or: [ python, pyi ]
        args: [ "--ignore-missing-imports", "--scripts-are-modules" ]
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.10
+    rev: v0.9.6
    hooks:
      - id: ruff
        args: [ --fix, --exit-non-zero-on-fix ]
--- a/.ycm_extra_conf.py
+++ b/.ycm_extra_conf.py
@ -83,10 +83,10 @@ def IsHeaderFile(filename):


 def GetCompilationInfoForFile(filename):
-    # The compilation_commands.json file generated by CMake does not have entries
-    # for header files. So we do our best by asking the db for flags for a
-    # corresponding source file, if any. If one exists, the flags for that file
-    # should be good enough.
+    # The compilation_commands.json file generated by CMake does not have
+    # entries for header files. So we do our best by asking the db for flags for
+    # a corresponding source file, if any. If one exists, the flags for that
+    # file should be good enough.
    if IsHeaderFile(filename):
        basename = os.path.splitext(filename)[0]
        for extension in SOURCE_EXTENSIONS:
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -1,9 +1,11 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
 licenses(["notice"])

 COPTS = [
    "-pedantic",
    "-pedantic-errors",
-    "-std=c++14",
+    "-std=c++17",
    "-Wall",
    "-Wconversion",
    "-Wextra",
@ -17,30 +19,16 @@ COPTS = [
    "-Werror=old-style-cast",
 ]

-config_setting(
-    name = "qnx",
-    constraint_values = ["@platforms//os:qnx"],
-    values = {
-        "cpu": "x64_qnx",
-    },
-    visibility = [":__subpackages__"],
-)
+MSVC_COPTS = [
+    "/std:c++17",
+]

 config_setting(
    name = "windows",
    constraint_values = ["@platforms//os:windows"],
-    values = {
-        "cpu": "x64_windows",
-    },
    visibility = [":__subpackages__"],
 )

-config_setting(
-    name = "macos",
-    constraint_values = ["@platforms//os:macos"],
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
    name = "perfcounters",
    define_values = {
@ -63,7 +51,7 @@ cc_library(
        "include/benchmark/export.h",
    ],
    copts = select({
-        ":windows": [],
+        ":windows": MSVC_COPTS,
        "//conditions:default": COPTS,
    }),
    defines = [
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,7 +1,7 @@
 # Require CMake 3.10. If available, use the policies up to CMake 3.22.
-cmake_minimum_required (VERSION 3.10...3.22)
+cmake_minimum_required (VERSION 3.13...3.22)

-project (benchmark VERSION 1.9.0 LANGUAGES CXX)
+project (benchmark VERSION 1.9.2 LANGUAGES CXX)

 option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
 option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON)
@ -138,7 +138,7 @@ if (BENCHMARK_BUILD_32_BITS)
  add_required_cxx_compiler_flag(-m32)
 endif()

-set(BENCHMARK_CXX_STANDARD 14)
+set(BENCHMARK_CXX_STANDARD 17)

 set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD})
 set(CMAKE_CXX_STANDARD_REQUIRED YES)
@ -148,8 +148,17 @@ if (MSVC)
  # Turn compiler warnings up to 11
  string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
+
+  # MP flag only applies to cl, not cl frontends to other compilers (e.g. clang-cl, icx-cl etc)
+  if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
+  endif()
  add_definitions(-D_CRT_SECURE_NO_WARNINGS)

+  if(BENCHMARK_ENABLE_WERROR)
+      add_cxx_compiler_flag(-WX)
+  endif()
+
  if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
    add_cxx_compiler_flag(-EHs-)
    add_cxx_compiler_flag(-EHa-)
@ -297,17 +306,11 @@ if (BENCHMARK_USE_LIBCXX)
  endif()
 endif(BENCHMARK_USE_LIBCXX)

-set(EXTRA_CXX_FLAGS "")
-if (WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-  # Clang on Windows fails to compile the regex feature check under C++11
-  set(EXTRA_CXX_FLAGS "-DCMAKE_CXX_STANDARD=14")
-endif()
-
 # C++ feature checks
 # Determine the correct regular expression engine to use
-cxx_feature_check(STD_REGEX ${EXTRA_CXX_FLAGS})
-cxx_feature_check(GNU_POSIX_REGEX ${EXTRA_CXX_FLAGS})
-cxx_feature_check(POSIX_REGEX ${EXTRA_CXX_FLAGS})
+cxx_feature_check(STD_REGEX)
+cxx_feature_check(GNU_POSIX_REGEX)
+cxx_feature_check(POSIX_REGEX)
 if(NOT HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX)
  message(FATAL_ERROR "Failed to determine the source files for the regular expression backend")
 endif()
--- a/MODULE.bazel
+++ b/MODULE.bazel
@ -1,17 +1,16 @@
 module(
    name = "google_benchmark",
-    version = "1.9.0",
+    version = "1.9.2",
 )

-bazel_dep(name = "bazel_skylib", version = "1.5.0")
-bazel_dep(name = "platforms", version = "0.0.8")
-bazel_dep(name = "rules_foreign_cc", version = "0.10.1")
+bazel_dep(name = "bazel_skylib", version = "1.7.1")
+bazel_dep(name = "platforms", version = "0.0.10")
 bazel_dep(name = "rules_cc", version = "0.0.9")

-bazel_dep(name = "rules_python", version = "0.31.0", dev_dependency = True)
-bazel_dep(name = "googletest", version = "1.12.1", dev_dependency = True, repo_name = "com_google_googletest")
+bazel_dep(name = "rules_python", version = "1.0.0", dev_dependency = True)
+bazel_dep(name = "googletest", version = "1.14.0", dev_dependency = True, repo_name = "com_google_googletest")

-bazel_dep(name = "libpfm", version = "4.11.0")
+bazel_dep(name = "libpfm", version = "4.11.0.bcr.1")

 # Register a toolchain for Python 3.9 to be able to build numpy. Python
 # versions >=3.10 are problematic.
@ -27,6 +26,7 @@ python.toolchain(
    is_default = True,
    python_version = "3.12",
 )
+python.toolchain(python_version = "3.13")

 pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
 pip.parse(
@ -38,4 +38,4 @@ use_repo(pip, "tools_pip_deps")

 # -- bazel_dep definitions -- #

-bazel_dep(name = "nanobind_bazel", version = "2.1.0", dev_dependency = True)
+bazel_dep(name = "nanobind_bazel", version = "2.5.0", dev_dependency = True)
--- a/README.md
+++ b/README.md
@ -50,7 +50,7 @@ IRC channels:

 ## Requirements

-The library can be used with C++03. However, it requires C++14 to build,
+The library can be used with C++11. However, it requires C++17 to build,
 including compiler and standard library support.

 _See [dependencies.md](docs/dependencies.md) for more details regarding supported
@ -78,7 +78,7 @@ $ cmake -E make_directory "build"
 # Generate build system files with cmake, and download any dependencies.
 $ cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../
 # or, starting with CMake 3.13, use a simpler form:
-# cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build"
+# cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release -S . -B "build"
 # Build the library.
 $ cmake --build "build" --config Release
 ```
--- a/4
+++ b/4
@ -4,10 +4,6 @@ load("//:bazel/benchmark_deps.bzl", "benchmark_deps")

 benchmark_deps()

-load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies")
-
-rules_foreign_cc_dependencies()
-
 load("@rules_python//python:repositories.bzl", "py_repositories")

 py_repositories()
--- a/bazel/benchmark_deps.bzl
+++ b/bazel/benchmark_deps.bzl
@ -18,14 +18,6 @@ def benchmark_deps():
            ],
        )

-    if "rules_foreign_cc" not in native.existing_rules():
-        http_archive(
-            name = "rules_foreign_cc",
-            sha256 = "476303bd0f1b04cc311fc258f1708a5f6ef82d3091e53fd1977fa20383425a6a",
-            strip_prefix = "rules_foreign_cc-0.10.1",
-            url = "https://github.com/bazelbuild/rules_foreign_cc/releases/download/0.10.1/rules_foreign_cc-0.10.1.tar.gz",
-        )
-
    if "rules_python" not in native.existing_rules():
        http_archive(
            name = "rules_python",
--- a/bindings/python/google_benchmark/BUILD
+++ b/bindings/python/google_benchmark/BUILD
@ -1,4 +1,5 @@
 load("@nanobind_bazel//:build_defs.bzl", "nanobind_extension", "nanobind_stubgen")
+load("@rules_python//python:defs.bzl", "py_library", "py_test")

 py_library(
    name = "google_benchmark",
--- a/bindings/python/google_benchmark/init.py
+++ b/bindings/python/google_benchmark/init.py
@ -49,7 +49,8 @@ from google_benchmark._benchmark import (
    oNone as oNone,
    oNSquared as oNSquared,
 )
-from google_benchmark.version import __version__ as __version__
+
+__version__ = "1.9.2"


 class __OptionMaker:
@ -59,7 +60,8 @@ class __OptionMaker:
    """

    class Options:
-        """Pure data class to store options calls, along with the benchmarked function."""
+        """Pure data class to store options calls, along with the benchmarked
+        function."""

        def __init__(self, func):
            self.func = func
@ -82,8 +84,8 @@ class __OptionMaker:
            def __decorator(func_or_options):
                options = self.make(func_or_options)
                options.builder_calls.append((builder_name, args, kwargs))
-                # The decorator returns Options so it is not technically a decorator
-                # and needs a final call to @register
+                # The decorator returns Options so it is not technically a
+                # decorator and needs a final call to @register
                return options

            return __decorator
@ -92,8 +94,8 @@ class __OptionMaker:


 # Alias for nicer API.
-# We have to instantiate an object, even if stateless, to be able to use __getattr__
-# on option.range
+# We have to instantiate an object, even if stateless, to be able to use
+# __getattr__ on option.range
 option = __OptionMaker()


@ -103,8 +105,8 @@ def register(undefined=None, *, name=None):
        # Decorator is called without parenthesis so we return a decorator
        return lambda f: register(f, name=name)

-    # We have either the function to benchmark (simple case) or an instance of Options
-    # (@option._ case).
+    # We have either the function to benchmark (simple case) or an instance of
+    # Options (@option._ case).
    options = __OptionMaker.make(undefined)

    if name is None:
--- a/bindings/python/google_benchmark/benchmark.cc
+++ b/bindings/python/google_benchmark/benchmark.cc
@ -118,7 +118,7 @@ NB_MODULE(_benchmark, m) {
  using benchmark::Counter;
  nb::class_<Counter> py_counter(m, "Counter");

-  nb::enum_<Counter::Flags>(py_counter, "Flags", nb::is_arithmetic())
+  nb::enum_<Counter::Flags>(py_counter, "Flags", nb::is_arithmetic(), nb::is_flag())
      .value("kDefaults", Counter::Flags::kDefaults)
      .value("kIsRate", Counter::Flags::kIsRate)
      .value("kAvgThreads", Counter::Flags::kAvgThreads)
@ -129,10 +129,7 @@ NB_MODULE(_benchmark, m) {
      .value("kAvgIterations", Counter::Flags::kAvgIterations)
      .value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate)
      .value("kInvert", Counter::Flags::kInvert)
-      .export_values()
-      .def("__or__", [](Counter::Flags a, Counter::Flags b) {
-        return static_cast<int>(a) | static_cast<int>(b);
-      });
+      .export_values();

  nb::enum_<Counter::OneK>(py_counter, "OneK")
      .value("kIs1000", Counter::OneK::kIs1000)
@ -140,13 +137,9 @@ NB_MODULE(_benchmark, m) {
      .export_values();

  py_counter
-      .def(
-          "__init__",
-          [](Counter* c, double value, int flags, Counter::OneK oneK) {
-            new (c) Counter(value, static_cast<Counter::Flags>(flags), oneK);
-          },
-          nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults,
-          nb::arg("k") = Counter::kIs1000)
+      .def(nb::init<double, Counter::Flags, Counter::OneK>(),
+           nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults,
+           nb::arg("k") = Counter::kIs1000)
      .def("__init__",
           ([](Counter* c, double value) { new (c) Counter(value); }))
      .def_rw("value", &Counter::value)
--- a/bindings/python/google_benchmark/example.py
+++ b/bindings/python/google_benchmark/example.py
@ -13,7 +13,8 @@
 # limitations under the License.
 """Example of Python using C++ benchmark framework.

-To run this example, you must first install the `google_benchmark` Python package.
+To run this example, you must first install the `google_benchmark` Python
+package.

 To install using `setup.py`, download and extract the `google_benchmark` source.
 In the extracted directory, execute:
@ -57,10 +58,11 @@ def skipped(state):
        state.skip_with_error("some error")
        return  # NOTE: You must explicitly return, or benchmark will continue.

-    ...  # Benchmark code would be here.
+    # Benchmark code would be here.


@benchmark.register
+@benchmark.option.use_manual_time()
 def manual_timing(state):
    while state:
        # Manually count Python CPU time
@ -77,7 +79,6 @@ def custom_counters(state):
    num_foo = 0.0
    while state:
        # Benchmark some code here
-        pass
        # Collect some custom metric named foo
        num_foo += 0.13

--- a/bindings/python/google_benchmark/version.py
+++ b/bindings/python/google_benchmark/version.py
@ -1,7 +0,0 @@
-from importlib.metadata import PackageNotFoundError, version
-
-try:
-    __version__ = version("google-benchmark")
-except PackageNotFoundError:
-    # package is not installed
-    pass
--- a/cmake/CXXFeatureCheck.cmake
+++ b/cmake/CXXFeatureCheck.cmake
@ -40,7 +40,7 @@ function(cxx_feature_check FILE)
      message(STATUS "Cross-compiling to test ${FEATURE}")
      try_compile(COMPILE_${FEATURE}
              ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
-              CXX_STANDARD 11
+              CXX_STANDARD 17
              CXX_STANDARD_REQUIRED ON
              CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
              LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
@ -56,7 +56,7 @@ function(cxx_feature_check FILE)
      message(STATUS "Compiling and running to test ${FEATURE}")
      try_run(RUN_${FEATURE} COMPILE_${FEATURE}
              ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
-              CXX_STANDARD 11
+              CXX_STANDARD 17
              CXX_STANDARD_REQUIRED ON
              CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
              LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
--- a/cmake/Config.cmake.in
+++ b/cmake/Config.cmake.in
@ -5,6 +5,7 @@ include (CMakeFindDependencyMacro)
 find_dependency (Threads)

 if (@BENCHMARK_ENABLE_LIBPFM@)
+    list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
    find_dependency (PFM)
 endif()

--- a/cmake/GoogleTest.cmake.in
+++ b/cmake/GoogleTest.cmake.in
@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 2.8.12)
+cmake_minimum_required (VERSION 3.13...3.22)

 project(googletest-download NONE)

@ -34,11 +34,12 @@ else()
    message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
    return()
  else()
-    message(WARNING "Did not find Google Test sources! Fetching from web...")
+    message(STATUS "Did not find Google Test sources! Fetching from web...")
    ExternalProject_Add(
      googletest
      GIT_REPOSITORY    https://github.com/google/googletest.git
-      GIT_TAG           "release-1.11.0"
+      GIT_TAG           "v1.15.2"
+      GIT_SHALLOW       "ON"
      PREFIX            "${CMAKE_BINARY_DIR}"
      STAMP_DIR         "${CMAKE_BINARY_DIR}/stamp"
      DOWNLOAD_DIR      "${CMAKE_BINARY_DIR}/download"
--- a/cmake/benchmark.pc.in
+++ b/cmake/benchmark.pc.in
@ -5,7 +5,7 @@ includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@

 Name: @PROJECT_NAME@
 Description: Google microbenchmark framework
-Version: @VERSION@
+Version: @NORMALIZED_VERSION@

 Libs: -L${libdir} -lbenchmark
 Libs.private: -lpthread @BENCHMARK_PRIVATE_LINK_LIBRARIES@
--- a/cmake/benchmark_main.pc.in
+++ b/cmake/benchmark_main.pc.in
@ -2,6 +2,6 @@ libdir=@CMAKE_INSTALL_FULL_LIBDIR@

 Name: @PROJECT_NAME@
 Description: Google microbenchmark framework (with main() function)
-Version: @VERSION@
+Version: @NORMALIZED_VERSION@
 Requires: benchmark
 Libs: -L${libdir} -lbenchmark_main
--- a/docs/dependencies.md
+++ b/docs/dependencies.md
@ -11,3 +11,9 @@ distributions include newer versions, for example:
 * Ubuntu 20.04 provides CMake 3.16.3
 * Debian 11.4 provides CMake 3.18.4
 * Ubuntu 22.04 provides CMake 3.22.1
+
+## Python
+
+The Python bindings require Python 3.10+ as of v1.9.0 (2024-08-16) for installation from PyPI.
+Building from source for older versions probably still works, though. See the [user guide](python_bindings.md) for details on how to build from source.
+The minimum theoretically supported version is Python 3.8, since the used bindings generator (nanobind) only supports Python 3.8+.
--- a/docs/releasing.md
+++ b/docs/releasing.md
@ -8,16 +8,24 @@
    * `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
      commits between the last annotated tag and HEAD
    * Pick the most interesting.
-* Create one last commit that updates the version saved in `CMakeLists.txt` and `MODULE.bazel`
-  to the release version you're creating. (This version will be used if benchmark is installed
-  from the archive you'll be creating in the next step.)
+* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel`,
+  and `bindings/python/google_benchmark/__init__.py` to the release version you're creating.
+  (This version will be used if benchmark is installed from the archive you'll be creating
+  in the next step.)

 ```
-project (benchmark VERSION 1.8.0 LANGUAGES CXX)
+# CMakeLists.txt
+project (benchmark VERSION 1.9.0 LANGUAGES CXX)
 ```

 ```
-module(name = "com_github_google_benchmark", version="1.8.0")
+# MODULE.bazel
+module(name = "com_github_google_benchmark", version="1.9.0")
+```
+
+```
+# google_benchmark/__init__.py
+__version__ = "1.9.0"
 ```

 * Create a release through github's interface
@ -28,4 +36,3 @@ module(name = "com_github_google_benchmark", version="1.8.0")
      * `git push --force --tags origin`
 * Confirm that the "Build and upload Python wheels" action runs to completion
    * Run it manually if it hasn't run.
-    * IMPORTANT: When re-running manually, make sure to select the newly created `<tag>` as the workflow version in the "Run workflow" tab on the GitHub Actions page. 
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@ -82,9 +82,9 @@ tabular data on stdout. Example tabular output looks like:
 ```
 Benchmark                               Time(ns)    CPU(ns) Iterations
 ----------------------------------------------------------------------
-BM_SetInsert/1024/1                        28928      29349      23853  133.097kB/s   33.2742k items/s
-BM_SetInsert/1024/8                        32065      32913      21375  949.487kB/s   237.372k items/s
-BM_SetInsert/1024/10                       33157      33648      21431  1.13369MB/s   290.225k items/s
+BM_SetInsert/1024/1                        28928      29349      23853  133.097kiB/s   33.2742k items/s
+BM_SetInsert/1024/8                        32065      32913      21375  949.487kiB/s   237.372k items/s
+BM_SetInsert/1024/10                       33157      33648      21431  1.13369MiB/s   290.225k items/s
 ```

 The JSON format outputs human readable json split into two top level attributes.
@ -167,6 +167,13 @@ line interface or by setting environment variables before execution. For every
 prevails). A complete list of CLI options is available running benchmarks
 with the `--help` switch.

+### Dry runs
+
+To confirm that benchmarks can run successfully without needing to wait for
+multiple repetitions and iterations, the `--benchmark_dry_run` flag can be
+used.  This will run the benchmarks as normal, but for 1 iteration and 1
+repetition only.
+
 <a name="running-a-subset-of-benchmarks" />

 ## Running a Subset of Benchmarks
@ -455,7 +462,7 @@ BENCHMARK(BM_SetInsert)->Apply(CustomArguments);

 ### Passing Arbitrary Arguments to a Benchmark

-In C++11 it is possible to define a benchmark that takes an arbitrary number
+It is possible to define a benchmark that takes an arbitrary number
 of extra arguments. The `BENCHMARK_CAPTURE(func, test_case_name, ...args)`
 macro creates a benchmark that invokes `func`  with the `benchmark::State` as
 the first argument followed by the specified `args...`.
@ -556,22 +563,19 @@ template <class Q> void BM_Sequential(benchmark::State& state) {
  state.SetBytesProcessed(
      static_cast<int64_t>(state.iterations())*state.range(0));
 }
-// C++03
-BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);

-// C++11 or newer, you can use the BENCHMARK macro with template parameters:
+// You can use the BENCHMARK macro with template parameters:
 BENCHMARK(BM_Sequential<WaitQueue<int>>)->Range(1<<0, 1<<10);

+// Old, legacy verbose C++03 syntax:
+BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
+
 ```

 Three macros are provided for adding benchmark templates.

 ```c++
-#ifdef BENCHMARK_HAS_CXX11
 #define BENCHMARK(func<...>) // Takes any number of parameters.
-#else // C++ < C++11
-#define BENCHMARK_TEMPLATE(func, arg1)
-#endif
 #define BENCHMARK_TEMPLATE1(func, arg1)
 #define BENCHMARK_TEMPLATE2(func, arg1, arg2)
 ```
@ -673,6 +677,54 @@ BENCHMARK_REGISTER_F(MyFixture, DoubleTest)->Threads(2);
 // `DoubleTest` is now registered.
 ```

+If you want to use a method template for your fixtures,
+which you instantiate afterward, use the following macros:
+
+* `BENCHMARK_TEMPLATE_METHOD_F(ClassName, Method)`
+* `BENCHMARK_TEMPLATE_INSTANTIATE_F(ClassName, Method, ...)`
+
+With these macros you can define one method for several instantiations.
+Example (using `MyFixture` from above):
+
+```c++
+// Defines `Test` using the class template `MyFixture`.
+BENCHMARK_TEMPLATE_METHOD_F(MyFixture, Test)(benchmark::State& st) {
+   for (auto _ : st) {
+     ...
+  }
+}
+
+// Instantiates and registers the benchmark `MyFixture<int>::Test`.
+BENCHMARK_TEMPLATE_INSTANTIATE_F(MyFixture, Test, int)->Threads(2);
+// Instantiates and registers the benchmark `MyFixture<double>::Test`.
+BENCHMARK_TEMPLATE_INSTANTIATE_F(MyFixture, Test, double)->Threads(4);
+```
+
+Inside the method definition of `BENCHMARK_TEMPLATE_METHOD_F` the type `Base` refers
+to the type of the instantiated fixture.
+Accesses to members of the fixture must be prefixed by `this->`.
+
+`BENCHMARK_TEMPLATE_METHOD_F`and `BENCHMARK_TEMPLATE_INSTANTIATE_F` can only be used,
+if the fixture does not use non-type template parameters.
+If you want to pass values as template parameters, use e.g. `std::integral_constant`.
+For example:
+
+```c++
+template<typename Sz>
+class SizedFixture : public benchmark::Fixture {
+  static constexpr auto Size = Sz::value;
+  int myValue;
+};
+
+BENCHMARK_TEMPLATE_METHOD_F(SizedFixture, Test)(benchmark::State& st) {
+   for (auto _ : st) {
+     this->myValue = Base::Size;
+  }
+}
+
+BENCHMARK_TEMPLATE_INSTANTIATE_F(SizedFixture, Test, std::integral_constant<5>)->Threads(2);
+```
+
 <a name="custom-counters" />

 ## Custom Counters
@ -733,12 +785,10 @@ is 1k a 1000 (default, `benchmark::Counter::OneK::kIs1000`), or 1024
  state.counters["BytesProcessed"] = Counter(state.range(0), benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1024);
 ```

-When you're compiling in C++11 mode or later you can use `insert()` with
-`std::initializer_list`:
+You can use `insert()` with `std::initializer_list`:

 <!-- {% raw %} -->
 ```c++
-  // With C++11, this can be done:
  state.counters.insert({{"Foo", numFoos}, {"Bar", numBars}, {"Baz", numBazs}});
  // ... instead of:
  state.counters["Foo"] = numFoos;
@ -861,6 +911,46 @@ BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime();

 Without `UseRealTime`, CPU time is used by default.

+### Manual Multithreaded Benchmarks
+
+Google/benchmark uses `std::thread` as multithreading environment per default.
+If you want to use another multithreading environment (e.g. OpenMP), you can provide
+a factory function to your benchmark using the `ThreadRunner` function.
+The factory function takes the number of threads as argument and creates a custom class
+derived from `benchmark::ThreadRunnerBase`.
+This custom class must override the function
+`void RunThreads(const std::function<void(int)>& fn)`.
+`RunThreads` is called by the main thread and spawns the requested number of threads.
+Each spawned thread must call `fn(thread_index)`, where `thread_index` is its own
+thread index. Before `RunThreads` returns, all spawned threads must be joined.
+```c++
+class OpenMPThreadRunner : public benchmark::ThreadRunnerBase
+{
+  OpenMPThreadRunner(int num_threads)
+  : num_threads_(num_threads)
+  {}
+
+  void RunThreads(const std::function<void(int)>& fn) final
+  {
+#pragma omp parallel num_threads(num_threads_)
+    fn(omp_get_thread_num());
+  }
+
+private:
+  int num_threads_;
+};
+
+BENCHMARK(BM_MultiThreaded)
+  ->ThreadRunner([](int num_threads) {
+    return std::make_unique<OpenMPThreadRunner>(num_threads);
+  })
+  ->Threads(1)->Threads(2)->Threads(4);
+```
+The above example creates a parallel OpenMP region before it enters `BM_MultiThreaded`.
+The actual benchmark code can remain the same and is therefore not tied to a specific
+thread runner. The measurement does not include the time for creating and joining the
+threads.
+
 <a name="cpu-timers" />

 ## CPU Timers
@ -1099,6 +1189,7 @@ void BM_spin_empty(benchmark::State& state) {
 }

 BENCHMARK(BM_spin_empty)
+  ->Repetitions(3) // or add option --benchmark_repetitions=3
  ->ComputeStatistics("max", [](const std::vector<double>& v) -> double {
    return *(std::max_element(std::begin(v), std::end(v)));
  })
@ -1118,8 +1209,9 @@ void BM_spin_empty(benchmark::State& state) {
 }

 BENCHMARK(BM_spin_empty)
+  ->Repetitions(3) // or add option --benchmark_repetitions=3
  ->ComputeStatistics("ratio", [](const std::vector<double>& v) -> double {
-    return std::begin(v) / std::end(v);
+    return v.front() / v.back();
  }, benchmark::StatisticUnit::kPercentage)
  ->Arg(512);
 ```
@ -1240,7 +1332,7 @@ static void BM_test_ranged_fo(benchmark::State & state) {

 ## A Faster KeepRunning Loop

-In C++11 mode, a ranged-based for loop should be used in preference to
+A ranged-based for loop should be used in preference to
 the `KeepRunning` loop for running the benchmarks. For example:

 ```c++
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@ -163,52 +163,33 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #ifndef BENCHMARK_BENCHMARK_H_
 #define BENCHMARK_BENCHMARK_H_

-// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
-#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
-#define BENCHMARK_HAS_CXX11
-#endif
-
-// This _MSC_VER check should detect VS 2017 v15.3 and newer.
-#if __cplusplus >= 201703L || \
-    (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L)
-#define BENCHMARK_HAS_CXX17
-#endif
-
 #include <stdint.h>

 #include <algorithm>
+#include <atomic>
 #include <cassert>
 #include <cstddef>
+#include <functional>
+#include <initializer_list>
 #include <iosfwd>
 #include <limits>
 #include <map>
+#include <memory>
 #include <set>
 #include <string>
+#include <type_traits>
 #include <utility>
 #include <vector>

 #include "benchmark/export.h"

-#if defined(BENCHMARK_HAS_CXX11)
-#include <atomic>
-#include <initializer_list>
-#include <type_traits>
-#include <utility>
-#endif
-
 #if defined(_MSC_VER)
 #include <intrin.h>  // for _ReadWriteBarrier
 #endif

-#ifndef BENCHMARK_HAS_CXX11
-#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&);                         \
-  TypeName& operator=(const TypeName&)
-#else
 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
  TypeName(const TypeName&) = delete;                \
  TypeName& operator=(const TypeName&) = delete
-#endif

 #ifdef BENCHMARK_HAS_CXX17
 #define BENCHMARK_UNUSED [[maybe_unused]]
@ -284,20 +265,71 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
 #define BENCHMARK_UNREACHABLE() ((void)0)
 #endif

-#ifdef BENCHMARK_HAS_CXX11
-#define BENCHMARK_OVERRIDE override
+#if defined(__GNUC__)
+// Determine the cacheline size based on architecture
+#if defined(__i386__) || defined(__x86_64__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
+#elif defined(__powerpc64__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 128
+#elif defined(__aarch64__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
+#elif defined(__arm__)
+// Cache line sizes for ARM: These values are not strictly correct since
+// cache line sizes depend on implementations, not architectures.  There
+// are even implementations with cache line sizes configurable at boot
+// time.
+#if defined(__ARM_ARCH_5T__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 32
+#elif defined(__ARM_ARCH_7A__)
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
+#endif  // ARM_ARCH
+#endif  // arches
+#endif  // __GNUC__
+
+#ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE
+// A reasonable default guess.  Note that overestimates tend to waste more
+// space, while underestimates tend to waste more time.
+#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
+#endif
+
+#if defined(__GNUC__)
+// Indicates that the declared object be cache aligned using
+// `BENCHMARK_INTERNAL_CACHELINE_SIZE` (see above).
+#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
+  __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE)))
+#elif defined(_MSC_VER)
+#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
+  __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE))
 #else
-#define BENCHMARK_OVERRIDE
+#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED
 #endif

 #if defined(_MSC_VER)
 #pragma warning(push)
 // C4251: <symbol> needs to have dll-interface to be used by clients of class
 #pragma warning(disable : 4251)
-#endif
+#endif  // _MSC_VER_

 namespace benchmark {
+
+namespace internal {
+#if (__cplusplus < 201402L || (defined(_MSC_VER) && _MSVC_LANG < 201402L))
+template <typename T, typename... Args>
+std::unique_ptr<T> make_unique(Args&&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+#else
+using ::std::make_unique;
+#endif
+}  // namespace internal
+
 class BenchmarkReporter;
+class State;
+
+using IterationCount = int64_t;
+
+// Define alias of Setup/Teardown callback function type
+using callback_function = std::function<void(const benchmark::State&)>;

 // Default number of minimum benchmark running time in seconds.
 const char kDefaultMinTimeStr[] = "0.5s";
@ -308,7 +340,7 @@ BENCHMARK_EXPORT std::string GetBenchmarkVersion();
 BENCHMARK_EXPORT void PrintDefaultHelp();

 BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
-                                 void (*HelperPrinterf)() = PrintDefaultHelp);
+                                 void (*HelperPrintf)() = PrintDefaultHelp);
 BENCHMARK_EXPORT void Shutdown();

 // Report to stdout all arguments in 'argv' as unrecognized except the first.
@ -377,14 +409,15 @@ BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit);
 // benchmark.
 class MemoryManager {
 public:
-  static const int64_t TombstoneValue;
+  static constexpr int64_t TombstoneValue = std::numeric_limits<int64_t>::max();

  struct Result {
    Result()
        : num_allocs(0),
          max_bytes_used(0),
          total_allocated_bytes(TombstoneValue),
-          net_heap_growth(TombstoneValue) {}
+          net_heap_growth(TombstoneValue),
+          memory_iterations(0) {}

    // The number of allocations made in total between Start and Stop.
    int64_t num_allocs;
@ -400,6 +433,8 @@ class MemoryManager {
    // ie., total_allocated_bytes - total_deallocated_bytes.
    // Init'ed to TombstoneValue if metric not available.
    int64_t net_heap_growth;
+
+    IterationCount memory_iterations;
  };

  virtual ~MemoryManager() {}
@ -452,7 +487,8 @@ void UseCharPointer(char const volatile*);

 // Take ownership of the pointer and register the benchmark. Return the
 // registered benchmark.
-BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(Benchmark*);
+BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(
+    std::unique_ptr<Benchmark>);

 // Ensure that the standard streams are properly initialized in every TU.
 BENCHMARK_EXPORT int InitializeStreams();
@ -467,11 +503,9 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();

 // Force the compiler to flush pending writes to global memory. Acts as an
 // effective read/write barrier
-#ifdef BENCHMARK_HAS_CXX11
 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
  std::atomic_signal_fence(std::memory_order_acq_rel);
 }
-#endif

 // The DoNotOptimize(...) function can be used to prevent a value or
 // expression from being optimized away by the compiler. This function is
@ -496,7 +530,6 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
 #endif
 }

-#ifdef BENCHMARK_HAS_CXX11
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
 #if defined(__clang__)
@ -505,8 +538,8 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
  asm volatile("" : "+m,r"(value) : : "memory");
 #endif
 }
-#endif
-#elif defined(BENCHMARK_HAS_CXX11) && (__GNUC__ >= 5)
+// !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
+#elif (__GNUC__ >= 5)
 // Workaround for a bug with full argument copy overhead with GCC.
 // See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
 template <class Tp>
@ -562,37 +595,9 @@ inline BENCHMARK_ALWAYS_INLINE
    DoNotOptimize(Tp&& value) {
  asm volatile("" : "+m"(value) : : "memory");
 }
-
-#else
-// Fallback for GCC < 5. Can add some overhead because the compiler is forced
-// to use memory operations instead of operations with registers.
-// TODO: Remove if GCC < 5 will be unsupported.
-template <class Tp>
-BENCHMARK_DEPRECATED_MSG(
-    "The const-ref version of this method can permit "
-    "undesired compiler optimizations in benchmarks")
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-  asm volatile("" : : "m"(value) : "memory");
-}
-
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
-  asm volatile("" : "+m"(value) : : "memory");
-}
-
-#ifdef BENCHMARK_HAS_CXX11
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
-  asm volatile("" : "+m"(value) : : "memory");
-}
-#endif
+// !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
 #endif

-#ifndef BENCHMARK_HAS_CXX11
-inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
-  asm volatile("" : : : "memory");
-}
-#endif
 #elif defined(_MSC_VER)
 template <class Tp>
 BENCHMARK_DEPRECATED_MSG(
@ -603,29 +608,11 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
  _ReadWriteBarrier();
 }

-#ifndef BENCHMARK_HAS_CXX11
-inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
-#endif
 #else
-#ifdef BENCHMARK_HAS_CXX11
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
 }
-#else
-template <class Tp>
-BENCHMARK_DEPRECATED_MSG(
-    "The const-ref version of this method can permit "
-    "undesired compiler optimizations in benchmarks")
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
-}
-
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
-  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
-}
-#endif
 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
 #endif

@ -674,7 +661,7 @@ class Counter {
  Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
      : value(v), flags(f), oneK(k) {}

-  BENCHMARK_ALWAYS_INLINE operator double const &() const { return value; }
+  BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
  BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
 };

@ -697,8 +684,6 @@ enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };

 typedef int64_t ComplexityN;

-typedef int64_t IterationCount;
-
 enum StatisticUnit { kTime, kPercentage };

 // BigOFunc is passed to a benchmark in order to specify the asymptotic
@ -725,12 +710,7 @@ class ThreadTimer;
 class ThreadManager;
 class PerfCountersMeasurement;

-enum AggregationReportMode
-#if defined(BENCHMARK_HAS_CXX11)
-    : unsigned
-#else
-#endif
-{
+enum AggregationReportMode : unsigned {
  // The mode has not been manually specified
  ARM_Unspecified = 0,
  // The mode is user-specified.
@ -745,11 +725,7 @@ enum AggregationReportMode
      ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
 };

-enum Skipped
-#if defined(BENCHMARK_HAS_CXX11)
-    : unsigned
-#endif
-{
+enum Skipped : unsigned {
  NotSkipped = 0,
  SkippedWithMessage,
  SkippedWithError
@ -757,9 +733,14 @@ enum Skipped

 }  // namespace internal

+#if defined(_MSC_VER)
+#pragma warning(push)
+// C4324: 'benchmark::State': structure was padded due to alignment specifier
+#pragma warning(disable : 4324)
+#endif  // _MSC_VER_
 // State is passed to a running Benchmark and contains state for the
 // benchmark to use.
-class BENCHMARK_EXPORT State {
+class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State {
 public:
  struct StateIterator;
  friend struct StateIterator;
@ -1024,6 +1005,9 @@ class BENCHMARK_EXPORT State {

  friend class internal::BenchmarkInstance;
 };
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif  // _MSC_VER_

 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
  return KeepRunningInternal(1, /*is_batch=*/false);
@ -1109,8 +1093,18 @@ inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
  return StateIterator();
 }

+// Base class for user-defined multi-threading
+struct ThreadRunnerBase {
+  virtual ~ThreadRunnerBase() {}
+  virtual void RunThreads(const std::function<void(int)>& fn) = 0;
+};
+
 namespace internal {

+// Define alias of ThreadRunner factory function type
+using threadrunner_factory =
+    std::function<std::unique_ptr<ThreadRunnerBase>(int)>;
+
 typedef void(Function)(State&);

 // ------------------------------------------------------
@ -1165,12 +1159,12 @@ class BENCHMARK_EXPORT Benchmark {
  // Run this benchmark once for a number of values picked from the
  // ranges [start..limit].  (starts and limits are always picked.)
  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
-  Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
+  Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t>>& ranges);

  // Run this benchmark once for each combination of values in the (cartesian)
  // product of the supplied argument lists.
  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
-  Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists);
+  Benchmark* ArgsProduct(const std::vector<std::vector<int64_t>>& arglists);

  // Equivalent to ArgNames({name})
  Benchmark* ArgName(const std::string& name);
@ -1183,7 +1177,7 @@ class BENCHMARK_EXPORT Benchmark {
  // NOTE: This is a legacy C++03 interface provided for compatibility only.
  //   New code should use 'Ranges'.
  Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
-    std::vector<std::pair<int64_t, int64_t> > ranges;
+    std::vector<std::pair<int64_t, int64_t>> ranges;
    ranges.push_back(std::make_pair(lo1, hi1));
    ranges.push_back(std::make_pair(lo2, hi2));
    return Ranges(ranges);
@ -1201,15 +1195,15 @@ class BENCHMARK_EXPORT Benchmark {
  //
  // The callback will be passed a State object, which includes the number
  // of threads, thread-index, benchmark arguments, etc.
-  //
-  // The callback must not be NULL or self-deleting.
-  Benchmark* Setup(void (*setup)(const benchmark::State&));
-  Benchmark* Teardown(void (*teardown)(const benchmark::State&));
+  Benchmark* Setup(callback_function&&);
+  Benchmark* Setup(const callback_function&);
+  Benchmark* Teardown(callback_function&&);
+  Benchmark* Teardown(const callback_function&);

  // Pass this benchmark object to *func, which can customize
  // the benchmark by calling various methods like Arg, Args,
  // Threads, etc.
-  Benchmark* Apply(void (*func)(Benchmark* benchmark));
+  Benchmark* Apply(void (*custom_arguments)(Benchmark* benchmark));

  // Set the range multiplier for non-dense range. If not called, the range
  // multiplier kRangeMultiplier will be used.
@ -1315,6 +1309,9 @@ class BENCHMARK_EXPORT Benchmark {
  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
  Benchmark* ThreadPerCpu();

+  // Sets a user-defined threadrunner (see ThreadRunnerBase)
+  Benchmark* ThreadRunner(threadrunner_factory&& factory);
+
  virtual void Run(State& state) = 0;

  TimeUnit GetTimeUnit() const;
@ -1334,8 +1331,8 @@ class BENCHMARK_EXPORT Benchmark {

  std::string name_;
  AggregationReportMode aggregation_report_mode_;
-  std::vector<std::string> arg_names_;       // Args for all benchmark runs
-  std::vector<std::vector<int64_t> > args_;  // Args for all benchmark runs
+  std::vector<std::string> arg_names_;      // Args for all benchmark runs
+  std::vector<std::vector<int64_t>> args_;  // Args for all benchmark runs

  TimeUnit time_unit_;
  bool use_default_time_unit_;
@ -1353,21 +1350,12 @@ class BENCHMARK_EXPORT Benchmark {
  std::vector<Statistics> statistics_;
  std::vector<int> thread_counts_;

-  typedef void (*callback_function)(const benchmark::State&);
  callback_function setup_;
  callback_function teardown_;

-  Benchmark(Benchmark const&)
-#if defined(BENCHMARK_HAS_CXX11)
-      = delete
-#endif
-      ;
+  threadrunner_factory threadrunner_;

-  Benchmark& operator=(Benchmark const&)
-#if defined(BENCHMARK_HAS_CXX11)
-      = delete
-#endif
-      ;
+  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(Benchmark);
 };

 }  // namespace internal
@ -1379,10 +1367,8 @@ class BENCHMARK_EXPORT Benchmark {
 internal::Benchmark* RegisterBenchmark(const std::string& name,
                                       internal::Function* fn);

-#if defined(BENCHMARK_HAS_CXX11)
 template <class Lambda>
 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
-#endif

 // Remove all registered benchmarks. All pointers to previously registered
 // benchmarks are invalidated.
@ -1396,71 +1382,56 @@ class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
  FunctionBenchmark(const std::string& name, Function* func)
      : Benchmark(name), func_(func) {}

-  void Run(State& st) BENCHMARK_OVERRIDE;
+  void Run(State& st) override;

 private:
  Function* func_;
 };

-#ifdef BENCHMARK_HAS_CXX11
 template <class Lambda>
 class LambdaBenchmark : public Benchmark {
 public:
-  void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
+  void Run(State& st) override { lambda_(st); }

- private:
  template <class OLambda>
  LambdaBenchmark(const std::string& name, OLambda&& lam)
      : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}

+ private:
  LambdaBenchmark(LambdaBenchmark const&) = delete;
-
-  template <class Lam>  // NOLINTNEXTLINE(readability-redundant-declaration)
-  friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&);
-
  Lambda lambda_;
 };
-#endif
 }  // namespace internal

 inline internal::Benchmark* RegisterBenchmark(const std::string& name,
                                              internal::Function* fn) {
-  // FIXME: this should be a `std::make_unique<>()` but we don't have C++14.
-  // codechecker_intentional [cplusplus.NewDeleteLeaks]
  return internal::RegisterBenchmarkInternal(
-      ::new internal::FunctionBenchmark(name, fn));
+      ::benchmark::internal::make_unique<internal::FunctionBenchmark>(name,
+                                                                      fn));
 }

-#ifdef BENCHMARK_HAS_CXX11
 template <class Lambda>
 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
  using BenchType =
      internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
-  // FIXME: this should be a `std::make_unique<>()` but we don't have C++14.
-  // codechecker_intentional [cplusplus.NewDeleteLeaks]
  return internal::RegisterBenchmarkInternal(
-      ::new BenchType(name, std::forward<Lambda>(fn)));
+      ::benchmark::internal::make_unique<BenchType>(name,
+                                                    std::forward<Lambda>(fn)));
 }
-#endif

-#if defined(BENCHMARK_HAS_CXX11) && \
-    (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
 template <class Lambda, class... Args>
 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
                                       Args&&... args) {
  return benchmark::RegisterBenchmark(
      name, [=](benchmark::State& st) { fn(st, args...); });
 }
-#else
-#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
-#endif

 // The base class for all fixture tests.
 class Fixture : public internal::Benchmark {
 public:
  Fixture() : internal::Benchmark("") {}

-  void Run(State& st) BENCHMARK_OVERRIDE {
+  void Run(State& st) override {
    this->SetUp(st);
    this->BenchmarkCase(st);
    this->TearDown(st);
@ -1491,14 +1462,9 @@ class Fixture : public internal::Benchmark {
 #endif

 // Helpers for generating unique variable names
-#ifdef BENCHMARK_HAS_CXX11
 #define BENCHMARK_PRIVATE_NAME(...)                                      \
  BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
                           __VA_ARGS__)
-#else
-#define BENCHMARK_PRIVATE_NAME(n) \
-  BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
-#endif  // BENCHMARK_HAS_CXX11

 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
@ -1506,22 +1472,17 @@ class Fixture : public internal::Benchmark {
 #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
  BaseClass##_##Method##_Benchmark

-#define BENCHMARK_PRIVATE_DECLARE(n)                                 \
-  static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
-      BENCHMARK_UNUSED
+#define BENCHMARK_PRIVATE_DECLARE(n)                                           \
+  /* NOLINTNEXTLINE(misc-use-anonymous-namespace) */                           \
+  static ::benchmark::internal::Benchmark const* const BENCHMARK_PRIVATE_NAME( \
+      n) BENCHMARK_UNUSED

-#ifdef BENCHMARK_HAS_CXX11
-#define BENCHMARK(...)                                               \
-  BENCHMARK_PRIVATE_DECLARE(_benchmark_) =                           \
-      (::benchmark::internal::RegisterBenchmarkInternal(             \
-          new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
-                                                       __VA_ARGS__)))
-#else
-#define BENCHMARK(n)                                     \
-  BENCHMARK_PRIVATE_DECLARE(n) =                         \
-      (::benchmark::internal::RegisterBenchmarkInternal( \
-          new ::benchmark::internal::FunctionBenchmark(#n, n)))
-#endif  // BENCHMARK_HAS_CXX11
+#define BENCHMARK(...)                                                \
+  BENCHMARK_PRIVATE_DECLARE(_benchmark_) =                            \
+      (::benchmark::internal::RegisterBenchmarkInternal(              \
+          ::benchmark::internal::make_unique<                         \
+              ::benchmark::internal::FunctionBenchmark>(#__VA_ARGS__, \
+                                                        __VA_ARGS__)))

 // Old-style macros
 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
@ -1531,8 +1492,6 @@ class Fixture : public internal::Benchmark {
 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
  BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})

-#ifdef BENCHMARK_HAS_CXX11
-
 // Register a benchmark which invokes the function specified by `func`
 // with the additional arguments specified by `...`.
 //
@ -1547,12 +1506,11 @@ class Fixture : public internal::Benchmark {
 #define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
  BENCHMARK_PRIVATE_DECLARE(_benchmark_) =               \
      (::benchmark::internal::RegisterBenchmarkInternal( \
-          new ::benchmark::internal::FunctionBenchmark(  \
+          ::benchmark::internal::make_unique<            \
+              ::benchmark::internal::FunctionBenchmark>( \
              #func "/" #test_case_name,                 \
              [](::benchmark::State& st) { func(st, __VA_ARGS__); })))

-#endif  // BENCHMARK_HAS_CXX11
-
 // This will register a benchmark for a templatized function.  For example:
 //
 // template<int arg>
@ -1564,25 +1522,23 @@ class Fixture : public internal::Benchmark {
 #define BENCHMARK_TEMPLATE1(n, a)                        \
  BENCHMARK_PRIVATE_DECLARE(n) =                         \
      (::benchmark::internal::RegisterBenchmarkInternal( \
-          new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
+          ::benchmark::internal::make_unique<            \
+              ::benchmark::internal::FunctionBenchmark>(#n "<" #a ">", n<a>)))

-#define BENCHMARK_TEMPLATE2(n, a, b)                                         \
-  BENCHMARK_PRIVATE_DECLARE(n) =                                             \
-      (::benchmark::internal::RegisterBenchmarkInternal(                     \
-          new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
-                                                       n<a, b>)))
+#define BENCHMARK_TEMPLATE2(n, a, b)                                          \
+  BENCHMARK_PRIVATE_DECLARE(n) =                                              \
+      (::benchmark::internal::RegisterBenchmarkInternal(                      \
+          ::benchmark::internal::make_unique<                                 \
+              ::benchmark::internal::FunctionBenchmark>(#n "<" #a "," #b ">", \
+                                                        n<a, b>)))

-#ifdef BENCHMARK_HAS_CXX11
 #define BENCHMARK_TEMPLATE(n, ...)                       \
  BENCHMARK_PRIVATE_DECLARE(n) =                         \
      (::benchmark::internal::RegisterBenchmarkInternal( \
-          new ::benchmark::internal::FunctionBenchmark(  \
+          ::benchmark::internal::make_unique<            \
+              ::benchmark::internal::FunctionBenchmark>( \
              #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
-#else
-#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
-#endif

-#ifdef BENCHMARK_HAS_CXX11
 // This will register a benchmark for a templatized function,
 // with the additional arguments specified by `...`.
 //
@ -1601,21 +1557,21 @@ class Fixture : public internal::Benchmark {
 #define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \
  BENCHMARK_PRIVATE_DECLARE(func) =                                  \
      (::benchmark::internal::RegisterBenchmarkInternal(             \
-          new ::benchmark::internal::FunctionBenchmark(              \
+          ::benchmark::internal::make_unique<                        \
+              ::benchmark::internal::FunctionBenchmark>(             \
              #func "<" #a "," #b ">"                                \
                    "/" #test_case_name,                             \
              [](::benchmark::State& st) { func<a, b>(st, __VA_ARGS__); })))
-#endif  // BENCHMARK_HAS_CXX11

-#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)          \
-  class BaseClass##_##Method##_Benchmark : public BaseClass {   \
-   public:                                                      \
-    BaseClass##_##Method##_Benchmark() {                        \
-      this->SetName(#BaseClass "/" #Method);                    \
-    }                                                           \
-                                                                \
-   protected:                                                   \
-    void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
+#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)        \
+  class BaseClass##_##Method##_Benchmark : public BaseClass { \
+   public:                                                    \
+    BaseClass##_##Method##_Benchmark() {                      \
+      this->SetName(#BaseClass "/" #Method);                  \
+    }                                                         \
+                                                              \
+   protected:                                                 \
+    void BenchmarkCase(::benchmark::State&) override;         \
  };

 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
@ -1626,7 +1582,7 @@ class Fixture : public internal::Benchmark {
    }                                                               \
                                                                    \
   protected:                                                       \
-    void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE;     \
+    void BenchmarkCase(::benchmark::State&) override;               \
  };

 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
@ -1637,10 +1593,9 @@ class Fixture : public internal::Benchmark {
    }                                                                  \
                                                                       \
   protected:                                                          \
-    void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE;        \
+    void BenchmarkCase(::benchmark::State&) override;                  \
  };

-#ifdef BENCHMARK_HAS_CXX11
 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...)       \
  class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
   public:                                                                 \
@ -1649,12 +1604,8 @@ class Fixture : public internal::Benchmark {
    }                                                                      \
                                                                           \
   protected:                                                              \
-    void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE;            \
+    void BenchmarkCase(::benchmark::State&) override;                      \
  };
-#else
-#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
-  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
-#endif

 #define BENCHMARK_DEFINE_F(BaseClass, Method)    \
  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
@ -1668,21 +1619,48 @@ class Fixture : public internal::Benchmark {
  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase

-#ifdef BENCHMARK_HAS_CXX11
 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...)            \
  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
-#else
-#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
-  BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
-#endif

 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
  BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))

-#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
-  BENCHMARK_PRIVATE_DECLARE(TestName) =        \
-      (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
+#define BENCHMARK_PRIVATE_REGISTER_F(TestName)           \
+  BENCHMARK_PRIVATE_DECLARE(TestName) =                  \
+      (::benchmark::internal::RegisterBenchmarkInternal( \
+          ::benchmark::internal::make_unique<TestName>()))
+
+#define BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(BaseClass, Method) \
+  BaseClass##_##Method##_BenchmarkTemplate
+
+#define BENCHMARK_TEMPLATE_METHOD_F(BaseClass, Method)              \
+  template <class... Args>                                          \
+  class BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(BaseClass, Method) \
+      : public BaseClass<Args...> {                                 \
+   protected:                                                       \
+    using Base = BaseClass<Args...>;                                \
+    void BenchmarkCase(::benchmark::State&) override;               \
+  };                                                                \
+  template <class... Args>                                          \
+  void BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(                    \
+      BaseClass, Method)<Args...>::BenchmarkCase
+
+#define BENCHMARK_TEMPLATE_PRIVATE_INSTANTIATE_F(BaseClass, Method,           \
+                                                 UniqueName, ...)             \
+  class UniqueName : public BENCHMARK_TEMPLATE_PRIVATE_CONCAT_NAME_F(         \
+                         BaseClass, Method)<__VA_ARGS__> {                    \
+   public:                                                                    \
+    UniqueName() { this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); } \
+  };                                                                          \
+  BENCHMARK_PRIVATE_DECLARE(BaseClass##_##Method##_Benchmark) =               \
+      (::benchmark::internal::RegisterBenchmarkInternal(                      \
+          ::benchmark::internal::make_unique<UniqueName>()))
+
+#define BENCHMARK_TEMPLATE_INSTANTIATE_F(BaseClass, Method, ...)    \
+  BENCHMARK_TEMPLATE_PRIVATE_INSTANTIATE_F(                         \
+      BaseClass, Method, BENCHMARK_PRIVATE_NAME(BaseClass##Method), \
+      __VA_ARGS__)

 // This macro will define and register a benchmark within a fixture class.
 #define BENCHMARK_F(BaseClass, Method)           \
@ -1700,22 +1678,17 @@ class Fixture : public internal::Benchmark {
  BENCHMARK_REGISTER_F(BaseClass, Method);                       \
  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase

-#ifdef BENCHMARK_HAS_CXX11
 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...)                   \
  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
  BENCHMARK_REGISTER_F(BaseClass, Method);                             \
  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
-#else
-#define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
-  BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
-#endif

 // Helper macro to create a main routine in a test that runs the benchmarks
 // Note the workaround for Hexagon simulator passing argc != 0, argv = NULL.
 #define BENCHMARK_MAIN()                                                \
  int main(int argc, char** argv) {                                     \
    char arg0_default[] = "benchmark";                                  \
-    char* args_default = arg0_default;                                  \
+    char* args_default = reinterpret_cast<char*>(arg0_default);         \
    if (!argv) {                                                        \
      argc = 1;                                                         \
      argv = &args_default;                                             \
@ -1795,7 +1768,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
    CPUInfo const& cpu_info;
    SystemInfo const& sys_info;
    // The number of chars in the longest benchmark name.
-    size_t name_field_width;
+    size_t name_field_width = 0;
    static const char* executable_name;
    Context();
  };
@ -1820,7 +1793,6 @@ class BENCHMARK_EXPORT BenchmarkReporter {
          complexity_n(0),
          report_big_o(false),
          report_rms(false),
-          memory_result(NULL),
          allocs_per_iter(0.0) {}

    std::string benchmark_name() const;
@ -1876,7 +1848,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
    UserCounters counters;

    // Memory metrics.
-    const MemoryManager::Result* memory_result;
+    MemoryManager::Result memory_result;
    double allocs_per_iter;
  };

@ -1968,12 +1940,12 @@ class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
  explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
      : output_options_(opts_), name_field_width_(0), printed_header_(false) {}

-  bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
-  void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+  bool ReportContext(const Context& context) override;
+  void ReportRuns(const std::vector<Run>& reports) override;

 protected:
-  virtual void PrintRunData(const Run& report);
-  virtual void PrintHeader(const Run& report);
+  virtual void PrintRunData(const Run& result);
+  virtual void PrintHeader(const Run& run);

  OutputOptions output_options_;
  size_t name_field_width_;
@ -1984,12 +1956,12 @@ class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
 class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
 public:
  JSONReporter() : first_report_(true) {}
-  bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
-  void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
-  void Finalize() BENCHMARK_OVERRIDE;
+  bool ReportContext(const Context& context) override;
+  void ReportRuns(const std::vector<Run>& reports) override;
+  void Finalize() override;

 private:
-  void PrintRunData(const Run& report);
+  void PrintRunData(const Run& run);

  bool first_report_;
 };
@ -1999,11 +1971,11 @@ class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
    : public BenchmarkReporter {
 public:
  CSVReporter() : printed_header_(false) {}
-  bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
-  void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
+  bool ReportContext(const Context& context) override;
+  void ReportRuns(const std::vector<Run>& reports) override;

 private:
-  void PrintRunData(const Run& report);
+  void PrintRunData(const Run& run);

  bool printed_header_;
  std::set<std::string> user_counter_names_;
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,25 +1,21 @@
 [build-system]
-requires = ["setuptools", "setuptools-scm[toml]", "wheel"]
+requires = ["setuptools"]
 build-backend = "setuptools.build_meta"

 [project]
 name = "google_benchmark"
 description = "A library to benchmark code snippets."
-requires-python = ">=3.8"
-license = {file = "LICENSE"}
+requires-python = ">=3.10"
+license = { file = "LICENSE" }
 keywords = ["benchmark"]

-authors = [
-    {name = "Google", email = "benchmark-discuss@googlegroups.com"},
-]
+authors = [{ name = "Google", email = "benchmark-discuss@googlegroups.com" }]

 classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: Apache Software License",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
@ -29,14 +25,10 @@ classifiers = [

 dynamic = ["readme", "version"]

-dependencies = [
-    "absl-py>=0.7.1",
-]
+dependencies = ["absl-py>=0.7.1"]

 [project.optional-dependencies]
-dev = [
-    "pre-commit>=3.3.3",
-]
+dev = ["pre-commit>=3.3.3"]

 [project.urls]
 Homepage = "https://github.com/google/benchmark"
@ -45,7 +37,7 @@ Repository = "https://github.com/google/benchmark.git"
 Discord = "https://discord.gg/cz7UX7wKC2"

 [tool.setuptools]
-package-dir = {"" = "bindings/python"}
+package-dir = { "" = "bindings/python" }
 zip-safe = false

 [tool.setuptools.packages.find]
@ -53,8 +45,7 @@ where = ["bindings/python"]

 [tool.setuptools.dynamic]
 readme = { file = "README.md", content-type = "text/markdown" }
-
-[tool.setuptools_scm]
+version = { attr = "google_benchmark.__version__" }

 [tool.mypy]
 check_untyped_defs = true
@ -77,9 +68,10 @@ target-version = "py311"

 [tool.ruff.lint]
 # Enable pycodestyle (`E`, `W`), Pyflakes (`F`), and isort (`I`) codes by default.
-select = ["E", "F", "I", "W"]
+select = ["ASYNC", "B", "C4", "C90", "E", "F", "I", "PERF", "PIE", "PT018", "RUF", "SIM", "UP", "W"]
 ignore = [
-    "E501", # line too long
+    "PLW2901",  # redefined-loop-name
+    "UP031",    # printf-string-formatting
 ]

 [tool.ruff.lint.isort]
--- a/setup.py
+++ b/setup.py
@ -3,8 +3,10 @@ import os
 import platform
 import re
 import shutil
+import sys
+from collections.abc import Generator
 from pathlib import Path
-from typing import Any, Generator
+from typing import Any

 import setuptools
 from setuptools.command import build_ext
@ -15,8 +17,7 @@ IS_LINUX = platform.system() == "Linux"

 # hardcoded SABI-related options. Requires that each Python interpreter
 # (hermetic or not) participating is of the same major-minor version.
-version_tuple = tuple(int(i) for i in platform.python_version_tuple())
-py_limited_api = version_tuple >= (3, 12)
+py_limited_api = sys.version_info >= (3, 12)
 options = {"bdist_wheel": {"py_limited_api": "cp312"}} if py_limited_api else {}


@ -43,10 +44,10 @@ def _maybe_patch_toolchains() -> Generator[None, None, None]:
        return "python.toolchain(" + callargs + ")"

    CIBW_LINUX = is_cibuildwheel() and IS_LINUX
+    module_bazel = Path("MODULE.bazel")
+    content: str = module_bazel.read_text()
    try:
        if CIBW_LINUX:
-            module_bazel = Path("MODULE.bazel")
-            content: str = module_bazel.read_text()
            module_bazel.write_text(
                re.sub(
                    r"python.toolchain\(([\w\"\s,.=]*)\)",
@ -77,7 +78,6 @@ class BuildBazelExtension(build_ext.build_ext):
    def run(self):
        for ext in self.extensions:
            self.bazel_build(ext)
-        super().run()
        # explicitly call `bazel shutdown` for graceful exit
        self.spawn(["bazel", "shutdown"])

@ -87,15 +87,14 @@ class BuildBazelExtension(build_ext.build_ext):
        This is done in the ``bazel_build`` method, so it's not necessary to
        do again in the `build_ext` base class.
        """
-        pass

-    def bazel_build(self, ext: BazelExtension) -> None:
+    def bazel_build(self, ext: BazelExtension) -> None:  # noqa: C901
        """Runs the bazel build to create the package."""
        temp_path = Path(self.build_temp)
-        # omit the patch version to avoid build errors if the toolchain is not
-        # yet registered in the current @rules_python version.
-        # patch version differences should be fine.
-        python_version = ".".join(platform.python_version_tuple()[:2])
+
+        # We round to the minor version, which makes rules_python
+        # look up the latest available patch version internally.
+        python_version = "{}.{}".format(*sys.version_info[:2])

        bazel_argv = [
            "bazel",
@ -138,15 +137,12 @@ class BuildBazelExtension(build_ext.build_ext):
            dirs[:] = [d for d in dirs if "runfiles" not in d]

            for f in files:
-                print(f)
                fp = Path(f)
                should_copy = False
                # we do not want the bare .so file included
                # when building for ABI3, so we require a
                # full and exact match on the file extension.
-                if "".join(fp.suffixes) == suffix:
-                    should_copy = True
-                elif fp.suffix == ".pyi":
+                if "".join(fp.suffixes) == suffix or fp.suffix == ".pyi":
                    should_copy = True
                elif Path(root) == srcdir and f == "py.typed":
                    # copy py.typed, but only at the package root.
@ -157,7 +153,7 @@ class BuildBazelExtension(build_ext.build_ext):


 setuptools.setup(
-    cmdclass=dict(build_ext=BuildBazelExtension),
+    cmdclass={"build_ext": BuildBazelExtension},
    package_data={"google_benchmark": ["py.typed", "*.pyi"]},
    ext_modules=[
        BazelExtension(
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -39,6 +39,9 @@ set_property(
 if (PFM_FOUND)
  target_link_libraries(benchmark PRIVATE PFM::libpfm)
  target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
+  install(
+      FILES "${PROJECT_SOURCE_DIR}/cmake/Modules/FindPFM.cmake"
+      DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
 endif()

 # pthread affinity, if available
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@ -46,7 +46,6 @@
 #include "commandlineflags.h"
 #include "complexity.h"
 #include "counter.h"
-#include "internal_macros.h"
 #include "log.h"
 #include "mutex.h"
 #include "perf_counters.h"
@ -92,6 +91,11 @@ BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
 // standard deviation of the runs will be reported.
 BM_DEFINE_int32(benchmark_repetitions, 1);

+// If enabled, forces each benchmark to execute exactly one iteration and one
+// repetition, bypassing any configured
+// MinTime()/MinWarmUpTime()/Iterations()/Repetitions()
+BM_DEFINE_bool(benchmark_dry_run, false);
+
 // If set, enable random interleaving of repetitions of all benchmarks.
 // See http://github.com/google/benchmark/issues/1051 for details.
 BM_DEFINE_bool(benchmark_enable_random_interleaving, false);
@ -146,13 +150,17 @@ BM_DEFINE_int32(v, 0);

 namespace internal {

+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
 std::map<std::string, std::string>* global_context = nullptr;

 BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
  return global_context;
 }

-static void const volatile* volatile global_force_escape_pointer;
+namespace {
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+void const volatile* volatile global_force_escape_pointer;
+}  // namespace

 // FIXME: Verify if LTO still messes this up?
 void UseCharPointer(char const volatile* const v) {
@ -193,7 +201,7 @@ State::State(std::string name, IterationCount max_iters,
  // `PauseTiming`, a new `Counter` will be inserted the first time, which
  // won't have the flag.  Inserting them now also reduces the allocations
  // during the benchmark.
-  if (perf_counters_measurement_) {
+  if (perf_counters_measurement_ != nullptr) {
    for (const std::string& counter_name :
         perf_counters_measurement_->names()) {
      counters[counter_name] = Counter(0.0, Counter::kAvgIterations);
@ -242,7 +250,7 @@ void State::PauseTiming() {
  // Add in time accumulated so far
  BM_CHECK(started_ && !finished_ && !skipped());
  timer_->StopTimer();
-  if (perf_counters_measurement_) {
+  if (perf_counters_measurement_ != nullptr) {
    std::vector<std::pair<std::string, double>> measurements;
    if (!perf_counters_measurement_->Stop(measurements)) {
      BM_CHECK(false) << "Perf counters read the value failed.";
@ -260,7 +268,7 @@ void State::PauseTiming() {
 void State::ResumeTiming() {
  BM_CHECK(started_ && !finished_ && !skipped());
  timer_->StartTimer();
-  if (perf_counters_measurement_) {
+  if (perf_counters_measurement_ != nullptr) {
    perf_counters_measurement_->Start();
  }
 }
@ -275,7 +283,9 @@ void State::SkipWithMessage(const std::string& msg) {
    }
  }
  total_iterations_ = 0;
-  if (timer_->running()) timer_->StopTimer();
+  if (timer_->running()) {
+    timer_->StopTimer();
+  }
 }

 void State::SkipWithError(const std::string& msg) {
@ -288,7 +298,9 @@ void State::SkipWithError(const std::string& msg) {
    }
  }
  total_iterations_ = 0;
-  if (timer_->running()) timer_->StopTimer();
+  if (timer_->running()) {
+    timer_->StopTimer();
+  }
 }

 void State::SetIterationTime(double seconds) {
@ -304,10 +316,13 @@ void State::StartKeepRunning() {
  BM_CHECK(!started_ && !finished_);
  started_ = true;
  total_iterations_ = skipped() ? 0 : max_iterations;
-  if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false))
+  if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false)) {
    profiler_manager_->AfterSetupStart();
+  }
  manager_->StartStopBarrier();
-  if (!skipped()) ResumeTiming();
+  if (!skipped()) {
+    ResumeTiming();
+  }
 }

 void State::FinishKeepRunning() {
@ -319,8 +334,9 @@ void State::FinishKeepRunning() {
  total_iterations_ = 0;
  finished_ = true;
  manager_->StartStopBarrier();
-  if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false))
+  if (BENCHMARK_BUILTIN_EXPECT(profiler_manager_ != nullptr, false)) {
    profiler_manager_->BeforeTeardownStop();
+  }
 }

 namespace internal {
@ -329,7 +345,9 @@ namespace {
 // Flushes streams after invoking reporter methods that write to them. This
 // ensures users get timely updates even when streams are not line-buffered.
 void FlushStreams(BenchmarkReporter* reporter) {
-  if (!reporter) return;
+  if (reporter == nullptr) {
+    return;
+  }
  std::flush(reporter->GetOutputStream());
  std::flush(reporter->GetErrorStream());
 }
@ -342,16 +360,20 @@ void Report(BenchmarkReporter* display_reporter,
    assert(reporter);
    // If there are no aggregates, do output non-aggregates.
    aggregates_only &= !results.aggregates_only.empty();
-    if (!aggregates_only) reporter->ReportRuns(results.non_aggregates);
-    if (!results.aggregates_only.empty())
+    if (!aggregates_only) {
+      reporter->ReportRuns(results.non_aggregates);
+    }
+    if (!results.aggregates_only.empty()) {
      reporter->ReportRuns(results.aggregates_only);
+    }
  };

  report_one(display_reporter, run_results.display_report_aggregates_only,
             run_results);
-  if (file_reporter)
+  if (file_reporter != nullptr) {
    report_one(file_reporter, run_results.file_report_aggregates_only,
               run_results);
+  }

  FlushStreams(display_reporter);
  FlushStreams(file_reporter);
@ -372,10 +394,13 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
        std::max<size_t>(name_field_width, benchmark.name().str().size());
    might_have_aggregates |= benchmark.repetitions() > 1;

-    for (const auto& Stat : benchmark.statistics())
+    for (const auto& Stat : benchmark.statistics()) {
      stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
+    }
+  }
+  if (might_have_aggregates) {
+    name_field_width += 1 + stat_field_width;
  }
-  if (might_have_aggregates) name_field_width += 1 + stat_field_width;

  // Print header here
  BenchmarkReporter::Context context;
@ -386,7 +411,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
      per_family_reports;

  if (display_reporter->ReportContext(context) &&
-      (!file_reporter || file_reporter->ReportContext(context))) {
+      ((file_reporter == nullptr) || file_reporter->ReportContext(context))) {
    FlushStreams(display_reporter);
    FlushStreams(file_reporter);

@ -408,15 +433,17 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
    // Loop through all benchmarks
    for (const BenchmarkInstance& benchmark : benchmarks) {
      BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
-      if (benchmark.complexity() != oNone)
+      if (benchmark.complexity() != oNone) {
        reports_for_family = &per_family_reports[benchmark.family_index()];
-      benchmarks_with_threads += (benchmark.threads() > 1);
+      }
+      benchmarks_with_threads += static_cast<int>(benchmark.threads() > 1);
      runners.emplace_back(benchmark, &perfcounters, reports_for_family);
      int num_repeats_of_this_instance = runners.back().GetNumRepeats();
      num_repetitions_total +=
          static_cast<size_t>(num_repeats_of_this_instance);
-      if (reports_for_family)
+      if (reports_for_family != nullptr) {
        reports_for_family->num_runs_total += num_repeats_of_this_instance;
+      }
    }
    assert(runners.size() == benchmarks.size() && "Unexpected runner count.");

@ -451,14 +478,17 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
    for (size_t repetition_index : repetition_indices) {
      internal::BenchmarkRunner& runner = runners[repetition_index];
      runner.DoOneRepetition();
-      if (runner.HasRepeatsRemaining()) continue;
+      if (runner.HasRepeatsRemaining()) {
+        continue;
+      }
      // FIXME: report each repetition separately, not all of them in bulk.

      display_reporter->ReportRunsConfig(
          runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
-      if (file_reporter)
+      if (file_reporter != nullptr) {
        file_reporter->ReportRunsConfig(
            runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
+      }

      RunResults run_results = runner.GetResults();

@ -479,7 +509,9 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
    }
  }
  display_reporter->Finalize();
-  if (file_reporter) file_reporter->Finalize();
+  if (file_reporter != nullptr) {
+    file_reporter->Finalize();
+  }
  FlushStreams(display_reporter);
  FlushStreams(file_reporter);
 }
@ -501,6 +533,7 @@ std::unique_ptr<BenchmarkReporter> CreateReporter(
    return PtrType(new CSVReporter());
  }
  std::cerr << "Unexpected format: '" << name << "'\n";
+  std::flush(std::cerr);
  std::exit(1);
 }

@ -539,7 +572,7 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
 }  // end namespace internal

 BenchmarkReporter* CreateDefaultDisplayReporter() {
-  static auto default_display_reporter =
+  static auto* default_display_reporter =
      internal::CreateReporter(FLAGS_benchmark_format,
                               internal::GetOutputOptions())
          .release();
@ -573,14 +606,15 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
                              BenchmarkReporter* file_reporter,
                              std::string spec) {
-  if (spec.empty() || spec == "all")
+  if (spec.empty() || spec == "all") {
    spec = ".";  // Regexp that matches all benchmarks
+  }

  // Setup the reporters
  std::ofstream output_file;
  std::unique_ptr<BenchmarkReporter> default_display_reporter;
  std::unique_ptr<BenchmarkReporter> default_file_reporter;
-  if (!display_reporter) {
+  if (display_reporter == nullptr) {
    default_display_reporter.reset(CreateDefaultDisplayReporter());
    display_reporter = default_display_reporter.get();
  }
@ -588,10 +622,9 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
  auto& Err = display_reporter->GetErrorStream();

  std::string const& fname = FLAGS_benchmark_out;
-  if (fname.empty() && file_reporter) {
+  if (fname.empty() && (file_reporter != nullptr)) {
    Err << "A custom file reporter was provided but "
-           "--benchmark_out=<file> was not specified."
-        << std::endl;
+           "--benchmark_out=<file> was not specified.\n";
    Out.flush();
    Err.flush();
    std::exit(1);
@ -599,12 +632,12 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
  if (!fname.empty()) {
    output_file.open(fname);
    if (!output_file.is_open()) {
-      Err << "invalid file name: '" << fname << "'" << std::endl;
+      Err << "invalid file name: '" << fname << "'\n";
      Out.flush();
      Err.flush();
      std::exit(1);
    }
-    if (!file_reporter) {
+    if (file_reporter == nullptr) {
      default_file_reporter = internal::CreateReporter(
          FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular
                                          ? ConsoleReporter::OO_Tabular
@ -630,8 +663,9 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
  }

  if (FLAGS_benchmark_list_tests) {
-    for (auto const& benchmark : benchmarks)
+    for (auto const& benchmark : benchmarks) {
      Out << benchmark.name().str() << "\n";
+    }
  } else {
    internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
  }
@ -663,6 +697,10 @@ void RegisterMemoryManager(MemoryManager* manager) {
 }

 void RegisterProfilerManager(ProfilerManager* manager) {
+  // Don't allow overwriting an existing manager.
+  if (manager != nullptr) {
+    BM_CHECK_EQ(internal::profiler_manager, nullptr);
+  }
  internal::profiler_manager = manager;
 }

@ -682,7 +720,9 @@ void (*HelperPrintf)();

 void PrintUsageAndExit() {
  HelperPrintf();
-  exit(0);
+  std::flush(std::cout);
+  std::flush(std::cerr);
+  std::exit(0);
 }

 void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
@ -706,8 +746,8 @@ void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
 void ParseCommandLineFlags(int* argc, char** argv) {
  using namespace benchmark;
  BenchmarkReporter::Context::executable_name =
-      (argc && *argc > 0) ? argv[0] : "unknown";
-  for (int i = 1; argc && i < *argc; ++i) {
+      ((argc != nullptr) && *argc > 0) ? argv[0] : "unknown";
+  for (int i = 1; (argc != nullptr) && i < *argc; ++i) {
    if (ParseBoolFlag(argv[i], "benchmark_list_tests",
                      &FLAGS_benchmark_list_tests) ||
        ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
@ -717,6 +757,7 @@ void ParseCommandLineFlags(int* argc, char** argv) {
                        &FLAGS_benchmark_min_warmup_time) ||
        ParseInt32Flag(argv[i], "benchmark_repetitions",
                       &FLAGS_benchmark_repetitions) ||
+        ParseBoolFlag(argv[i], "benchmark_dry_run", &FLAGS_benchmark_dry_run) ||
        ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
                      &FLAGS_benchmark_enable_random_interleaving) ||
        ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
@ -737,7 +778,9 @@ void ParseCommandLineFlags(int* argc, char** argv) {
        ParseStringFlag(argv[i], "benchmark_time_unit",
                        &FLAGS_benchmark_time_unit) ||
        ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
-      for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
+      for (int j = i; j != *argc - 1; ++j) {
+        argv[j] = argv[j + 1];
+      }

      --(*argc);
      --i;
@ -755,6 +798,9 @@ void ParseCommandLineFlags(int* argc, char** argv) {
  if (FLAGS_benchmark_color.empty()) {
    PrintUsageAndExit();
  }
+  if (FLAGS_benchmark_dry_run) {
+    AddCustomContext("dry_run", "true");
+  }
  for (const auto& kv : FLAGS_benchmark_context) {
    AddCustomContext(kv.first, kv.second);
  }
@ -783,6 +829,7 @@ void PrintDefaultHelp() {
          "          [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
          "          [--benchmark_min_warmup_time=<min_warmup_time>]\n"
          "          [--benchmark_repetitions=<num_repetitions>]\n"
+          "          [--benchmark_dry_run={true|false}]\n"
          "          [--benchmark_enable_random_interleaving={true|false}]\n"
          "          [--benchmark_report_aggregates_only={true|false}]\n"
          "          [--benchmark_display_aggregates_only={true|false}]\n"
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@ -27,7 +27,9 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
      min_time_(benchmark_.min_time_),
      min_warmup_time_(benchmark_.min_warmup_time_),
      iterations_(benchmark_.iterations_),
-      threads_(thread_count) {
+      threads_(thread_count),
+      setup_(benchmark_.setup_),
+      teardown_(benchmark_.teardown_) {
  name_.function_name = benchmark_.name_;

  size_t arg_i = 0;
@ -84,9 +86,6 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
  if (!benchmark_.thread_counts_.empty()) {
    name_.threads = StrFormat("threads:%d", threads_);
  }
-
-  setup_ = benchmark_.setup_;
-  teardown_ = benchmark_.teardown_;
 }

 State BenchmarkInstance::Run(
@ -101,7 +100,7 @@ State BenchmarkInstance::Run(
 }

 void BenchmarkInstance::Setup() const {
-  if (setup_) {
+  if (setup_ != nullptr) {
    State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
             nullptr, nullptr, nullptr, nullptr);
    setup_(st);
@ -109,7 +108,7 @@ void BenchmarkInstance::Setup() const {
 }

 void BenchmarkInstance::Teardown() const {
-  if (teardown_) {
+  if (teardown_ != nullptr) {
    State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
             nullptr, nullptr, nullptr, nullptr);
    teardown_(st);
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@ -17,9 +17,9 @@ namespace internal {
 // Information kept per benchmark we may want to run
 class BenchmarkInstance {
 public:
-  BenchmarkInstance(Benchmark* benchmark, int family_index,
-                    int per_family_instance_index,
-                    const std::vector<int64_t>& args, int threads);
+  BenchmarkInstance(Benchmark* benchmark, int family_idx,
+                    int per_family_instance_idx,
+                    const std::vector<int64_t>& args, int thread_count);

  const BenchmarkName& name() const { return name_; }
  int family_index() const { return family_index_; }
@ -41,6 +41,9 @@ class BenchmarkInstance {
  int threads() const { return threads_; }
  void Setup() const;
  void Teardown() const;
+  const auto& GetUserThreadRunnerFactory() const {
+    return benchmark_.threadrunner_;
+  }

  State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
            internal::ThreadManager* manager,
@ -68,9 +71,8 @@ class BenchmarkInstance {
  IterationCount iterations_;
  int threads_;  // Number of concurrent threads to us

-  typedef void (*callback_function)(const benchmark::State&);
-  callback_function setup_ = nullptr;
-  callback_function teardown_ = nullptr;
+  callback_function setup_;
+  callback_function teardown_;
 };

 bool FindBenchmarksInternal(const std::string& re,
--- a/src/benchmark_main.cc
+++ b/src/benchmark_main.cc
@ -14,5 +14,5 @@

 #include "benchmark/benchmark.h"

-BENCHMARK_EXPORT int main(int, char**);
+BENCHMARK_EXPORT int main(int /*argc*/, char** /*argv*/);
 BENCHMARK_MAIN();
--- a/src/benchmark_name.cc
+++ b/src/benchmark_name.cc
@ -27,8 +27,8 @@ size_t size_impl(const Head& head, const Tail&... tail) {
 }

 // Join a pack of std::strings using a delimiter
-// TODO: use absl::StrJoin
-void join_impl(std::string&, char) {}
+// TODO(dominic): use absl::StrJoin
+void join_impl(std::string& /*unused*/, char /*unused*/) {}

 template <typename Head, typename... Tail>
 void join_impl(std::string& s, const char delimiter, const Head& head,
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@ -53,13 +53,13 @@ namespace benchmark {

 namespace {
 // For non-dense Range, intermediate values are powers of kRangeMultiplier.
-static constexpr int kRangeMultiplier = 8;
+constexpr int kRangeMultiplier = 8;

 // The size of a benchmark family determines is the number of inputs to repeat
 // the benchmark on. If this is "large" then warn the user during configuration.
-static constexpr size_t kMaxFamilySize = 100;
+constexpr size_t kMaxFamilySize = 100;

-static constexpr char kDisabledPrefix[] = "DISABLED_";
+constexpr char kDisabledPrefix[] = "DISABLED_";
 }  // end namespace

 namespace internal {
@ -82,7 +82,7 @@ class BenchmarkFamilies {

  // Extract the list of benchmark instances that match the specified
  // regular expression.
-  bool FindBenchmarks(std::string re,
+  bool FindBenchmarks(std::string spec,
                      std::vector<BenchmarkInstance>* benchmarks,
                      std::ostream* Err);

@ -125,7 +125,7 @@ bool BenchmarkFamilies::FindBenchmarks(
    is_negative_filter = true;
  }
  if (!re.Init(spec, &error_msg)) {
-    Err << "Could not compile benchmark re: " << error_msg << std::endl;
+    Err << "Could not compile benchmark re: " << error_msg << '\n';
    return false;
  }

@ -140,7 +140,9 @@ bool BenchmarkFamilies::FindBenchmarks(
    int per_family_instance_index = 0;

    // Family was deleted or benchmark doesn't match
-    if (!family) continue;
+    if (!family) {
+      continue;
+    }

    if (family->ArgsCnt() == -1) {
      family->Args({});
@ -159,7 +161,9 @@ bool BenchmarkFamilies::FindBenchmarks(
    // reserve in the special case the regex ".", since we know the final
    // family size.  this doesn't take into account any disabled benchmarks
    // so worst case we reserve more than we need.
-    if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size);
+    if (spec == ".") {
+      benchmarks->reserve(benchmarks->size() + family_size);
+    }

    for (auto const& args : family->args_) {
      for (int num_threads : *thread_counts) {
@ -177,7 +181,9 @@ bool BenchmarkFamilies::FindBenchmarks(

          // Only bump the next family index once we've estabilished that
          // at least one instance of this family will be run.
-          if (next_family_index == family_index) ++next_family_index;
+          if (next_family_index == family_index) {
+            ++next_family_index;
+          }
        }
      }
    }
@ -185,11 +191,11 @@ bool BenchmarkFamilies::FindBenchmarks(
  return true;
 }

-Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
-  std::unique_ptr<Benchmark> bench_ptr(bench);
+Benchmark* RegisterBenchmarkInternal(std::unique_ptr<Benchmark> bench) {
+  Benchmark* bench_ptr = bench.get();
  BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
-  families->AddBenchmark(std::move(bench_ptr));
-  return bench;
+  families->AddBenchmark(std::move(bench));
+  return bench_ptr;
 }

 // FIXME: This function is a hack so that benchmark.cc can access
@ -218,9 +224,7 @@ Benchmark::Benchmark(const std::string& name)
      use_real_time_(false),
      use_manual_time_(false),
      complexity_(oNone),
-      complexity_lambda_(nullptr),
-      setup_(nullptr),
-      teardown_(nullptr) {
+      complexity_lambda_(nullptr) {
  ComputeStatistics("mean", StatisticsMean);
  ComputeStatistics("median", StatisticsMedian);
  ComputeStatistics("stddev", StatisticsStdDev);
@ -331,13 +335,25 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
  return this;
 }

-Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) {
+Benchmark* Benchmark::Setup(callback_function&& setup) {
+  BM_CHECK(setup != nullptr);
+  setup_ = std::forward<callback_function>(setup);
+  return this;
+}
+
+Benchmark* Benchmark::Setup(const callback_function& setup) {
  BM_CHECK(setup != nullptr);
  setup_ = setup;
  return this;
 }

-Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) {
+Benchmark* Benchmark::Teardown(callback_function&& teardown) {
+  BM_CHECK(teardown != nullptr);
+  teardown_ = std::forward<callback_function>(teardown);
+  return this;
+}
+
+Benchmark* Benchmark::Teardown(const callback_function& teardown) {
  BM_CHECK(teardown != nullptr);
  teardown_ = teardown;
  return this;
@ -468,13 +484,20 @@ Benchmark* Benchmark::ThreadPerCpu() {
  return this;
 }

+Benchmark* Benchmark::ThreadRunner(threadrunner_factory&& factory) {
+  threadrunner_ = std::move(factory);
+  return this;
+}
+
 void Benchmark::SetName(const std::string& name) { name_ = name; }

 const char* Benchmark::GetName() const { return name_.c_str(); }

 int Benchmark::ArgsCnt() const {
  if (args_.empty()) {
-    if (arg_names_.empty()) return -1;
+    if (arg_names_.empty()) {
+      return -1;
+    }
    return static_cast<int>(arg_names_.size());
  }
  return static_cast<int>(args_.front().size());
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@ -34,6 +34,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <fstream>
+#include <functional>
 #include <iostream>
 #include <limits>
 #include <memory>
@ -46,7 +47,6 @@
 #include "commandlineflags.h"
 #include "complexity.h"
 #include "counter.h"
-#include "internal_macros.h"
 #include "log.h"
 #include "mutex.h"
 #include "perf_counters.h"
@ -58,6 +58,14 @@

 namespace benchmark {

+BM_DECLARE_bool(benchmark_dry_run);
+BM_DECLARE_string(benchmark_min_time);
+BM_DECLARE_double(benchmark_min_warmup_time);
+BM_DECLARE_int32(benchmark_repetitions);
+BM_DECLARE_bool(benchmark_report_aggregates_only);
+BM_DECLARE_bool(benchmark_display_aggregates_only);
+BM_DECLARE_string(benchmark_perf_counters);
+
 namespace internal {

 MemoryManager* memory_manager = nullptr;
@ -66,7 +74,7 @@ ProfilerManager* profiler_manager = nullptr;

 namespace {

-static constexpr IterationCount kMaxIterations = 1000000000000;
+constexpr IterationCount kMaxIterations = 1000000000000;
 const double kDefaultMinTime =
    std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);

@ -74,7 +82,7 @@ BenchmarkReporter::Run CreateRunReport(
    const benchmark::internal::BenchmarkInstance& b,
    const internal::ThreadManager::Result& results,
    IterationCount memory_iterations,
-    const MemoryManager::Result* memory_result, double seconds,
+    const MemoryManager::Result& memory_result, double seconds,
    int64_t repetition_index, int64_t repeats) {
  // Create report about this benchmark run.
  BenchmarkReporter::Run report;
@ -92,7 +100,7 @@ BenchmarkReporter::Run CreateRunReport(
  report.repetition_index = repetition_index;
  report.repetitions = repeats;

-  if (!report.skipped) {
+  if (report.skipped == 0u) {
    if (b.use_manual_time()) {
      report.real_accumulated_time = results.manual_time_used;
    } else {
@ -107,12 +115,12 @@ BenchmarkReporter::Run CreateRunReport(
    report.counters = results.counters;

    if (memory_iterations > 0) {
-      assert(memory_result != nullptr);
      report.memory_result = memory_result;
      report.allocs_per_iter =
-          memory_iterations ? static_cast<double>(memory_result->num_allocs) /
-                                  static_cast<double>(memory_iterations)
-                            : 0;
+          memory_iterations != 0
+              ? static_cast<double>(memory_result.num_allocs) /
+                    static_cast<double>(memory_iterations)
+              : 0;
    }

    internal::Finish(&report.counters, results.iterations, seconds,
@ -126,14 +134,14 @@ BenchmarkReporter::Run CreateRunReport(
 void RunInThread(const BenchmarkInstance* b, IterationCount iters,
                 int thread_id, ThreadManager* manager,
                 PerfCountersMeasurement* perf_counters_measurement,
-                 ProfilerManager* profiler_manager) {
+                 ProfilerManager* profiler_manager_) {
  internal::ThreadTimer timer(
      b->measure_process_cpu_time()
          ? internal::ThreadTimer::CreateProcessCpuTime()
          : internal::ThreadTimer::Create());

  State st = b->Run(iters, thread_id, &timer, manager,
-                    perf_counters_measurement, profiler_manager);
+                    perf_counters_measurement, profiler_manager_);
  BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
      << "Benchmark returned before State::KeepRunning() returned false!";
  {
@ -151,17 +159,23 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,

 double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
                      const BenchTimeType& iters_or_time) {
-  if (!IsZero(b.min_time())) return b.min_time();
+  if (!IsZero(b.min_time())) {
+    return b.min_time();
+  }
  // If the flag was used to specify number of iters, then return the default
  // min_time.
-  if (iters_or_time.tag == BenchTimeType::ITERS) return kDefaultMinTime;
+  if (iters_or_time.tag == BenchTimeType::ITERS) {
+    return kDefaultMinTime;
+  }

  return iters_or_time.time;
 }

 IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
                            const BenchTimeType& iters_or_time) {
-  if (b.iterations() != 0) return b.iterations();
+  if (b.iterations() != 0) {
+    return b.iterations();
+  }

  // We've already concluded that this flag is currently used to pass
  // iters but do a check here again anyway.
@ -169,10 +183,42 @@ IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
  return iters_or_time.iters;
 }

+class ThreadRunnerDefault : public ThreadRunnerBase {
+ public:
+  explicit ThreadRunnerDefault(int num_threads)
+      : pool(static_cast<size_t>(num_threads - 1)) {}
+
+  void RunThreads(const std::function<void(int)>& fn) final {
+    // Run all but one thread in separate threads
+    for (std::size_t ti = 0; ti < pool.size(); ++ti) {
+      pool[ti] = std::thread(fn, static_cast<int>(ti + 1));
+    }
+    // And run one thread here directly.
+    // (If we were asked to run just one thread, we don't create new threads.)
+    // Yes, we need to do this here *after* we start the separate threads.
+    fn(0);
+
+    // The main thread has finished. Now let's wait for the other threads.
+    for (std::thread& thread : pool) {
+      thread.join();
+    }
+  }
+
+ private:
+  std::vector<std::thread> pool;
+};
+
+std::unique_ptr<ThreadRunnerBase> GetThreadRunner(
+    const threadrunner_factory& userThreadRunnerFactory, int num_threads) {
+  return userThreadRunnerFactory
+             ? userThreadRunnerFactory(num_threads)
+             : std::make_unique<ThreadRunnerDefault>(num_threads);
+}
+
 }  // end namespace

 BenchTimeType ParseBenchMinTime(const std::string& value) {
-  BenchTimeType ret;
+  BenchTimeType ret = {};

  if (value.empty()) {
    ret.tag = BenchTimeType::TIME;
@ -181,7 +227,7 @@ BenchTimeType ParseBenchMinTime(const std::string& value) {
  }

  if (value.back() == 'x') {
-    char* p_end;
+    char* p_end = nullptr;
    // Reset errno before it's changed by strtol.
    errno = 0;
    IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
@ -203,7 +249,7 @@ BenchTimeType ParseBenchMinTime(const std::string& value) {
                  "Eg., `30s` for 30-seconds.";
  }

-  char* p_end;
+  char* p_end = nullptr;
  // Reset errno before it's changed by strtod.
  errno = 0;
  double min_time = std::strtod(value.c_str(), &p_end);
@ -228,20 +274,30 @@ BenchmarkRunner::BenchmarkRunner(
    : b(b_),
      reports_for_family(reports_for_family_),
      parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
-      min_time(ComputeMinTime(b_, parsed_benchtime_flag)),
-      min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
-                          ? b.min_warmup_time()
-                          : FLAGS_benchmark_min_warmup_time),
-      warmup_done(!(min_warmup_time > 0.0)),
-      repeats(b.repetitions() != 0 ? b.repetitions()
-                                   : FLAGS_benchmark_repetitions),
+      min_time(FLAGS_benchmark_dry_run
+                   ? 0
+                   : ComputeMinTime(b_, parsed_benchtime_flag)),
+      min_warmup_time(
+          FLAGS_benchmark_dry_run
+              ? 0
+              : ((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
+                     ? b.min_warmup_time()
+                     : FLAGS_benchmark_min_warmup_time)),
+      warmup_done(FLAGS_benchmark_dry_run ? true : !(min_warmup_time > 0.0)),
+      repeats(FLAGS_benchmark_dry_run
+                  ? 1
+                  : (b.repetitions() != 0 ? b.repetitions()
+                                          : FLAGS_benchmark_repetitions)),
      has_explicit_iteration_count(b.iterations() != 0 ||
                                   parsed_benchtime_flag.tag ==
                                       BenchTimeType::ITERS),
-      pool(static_cast<size_t>(b.threads() - 1)),
-      iters(has_explicit_iteration_count
-                ? ComputeIters(b_, parsed_benchtime_flag)
-                : 1),
+      thread_runner(
+          GetThreadRunner(b.GetUserThreadRunnerFactory(), b.threads())),
+      iters(FLAGS_benchmark_dry_run
+                ? 1
+                : (has_explicit_iteration_count
+                       ? ComputeIters(b_, parsed_benchtime_flag)
+                       : 1)),
      perf_counters_measurement_ptr(pcm_) {
  run_results.display_report_aggregates_only =
      (FLAGS_benchmark_report_aggregates_only ||
@ -250,10 +306,11 @@ BenchmarkRunner::BenchmarkRunner(
      FLAGS_benchmark_report_aggregates_only;
  if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
    run_results.display_report_aggregates_only =
-        (b.aggregation_report_mode() &
-         internal::ARM_DisplayReportAggregatesOnly);
+        ((b.aggregation_report_mode() &
+          internal::ARM_DisplayReportAggregatesOnly) != 0u);
    run_results.file_report_aggregates_only =
-        (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
+        ((b.aggregation_report_mode() &
+          internal::ARM_FileReportAggregatesOnly) != 0u);
    BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
             (perf_counters_measurement_ptr->num_counters() == 0))
        << "Perf counters were requested but could not be set up.";
@ -266,21 +323,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
  std::unique_ptr<internal::ThreadManager> manager;
  manager.reset(new internal::ThreadManager(b.threads()));

-  // Run all but one thread in separate threads
-  for (std::size_t ti = 0; ti < pool.size(); ++ti) {
-    pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
-                           manager.get(), perf_counters_measurement_ptr,
-                           /*profiler_manager=*/nullptr);
-  }
-  // And run one thread here directly.
-  // (If we were asked to run just one thread, we don't create new threads.)
-  // Yes, we need to do this here *after* we start the separate threads.
-  RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr,
-              /*profiler_manager=*/nullptr);
-
-  // The main thread has finished. Now let's wait for the other threads.
-  manager->WaitForAllThreads();
-  for (std::thread& thread : pool) thread.join();
+  thread_runner->RunThreads([&](int thread_idx) {
+    RunInThread(&b, iters, thread_idx, manager.get(),
+                perf_counters_measurement_ptr, /*profiler_manager=*/nullptr);
+  });

  IterationResults i;
  // Acquire the measurements/counters from the manager, UNDER THE LOCK!
@ -339,7 +385,7 @@ bool BenchmarkRunner::ShouldReportIterationResults(
  // Determine if this run should be reported;
  // Either it has run for a sufficient amount of time
  // or because an error was reported.
-  return i.results.skipped_ ||
+  return (i.results.skipped_ != 0u) || FLAGS_benchmark_dry_run ||
         i.iters >= kMaxIterations ||  // Too many iterations already.
         i.seconds >=
             GetMinTimeToApply() ||  // The elapsed time is large enough.
@ -400,13 +446,8 @@ void BenchmarkRunner::RunWarmUp() {
  }
 }

-MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
+MemoryManager::Result BenchmarkRunner::RunMemoryManager(
    IterationCount memory_iterations) {
-  // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
-  // optional so we don't have to own the Result here.
-  // Can't do it now due to cxx03.
-  memory_results.push_back(MemoryManager::Result());
-  MemoryManager::Result* memory_result = &memory_results.back();
  memory_manager->Start();
  std::unique_ptr<internal::ThreadManager> manager;
  manager.reset(new internal::ThreadManager(1));
@ -414,23 +455,21 @@ MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
  RunInThread(&b, memory_iterations, 0, manager.get(),
              perf_counters_measurement_ptr,
              /*profiler_manager=*/nullptr);
-  manager->WaitForAllThreads();
  manager.reset();
  b.Teardown();
-  memory_manager->Stop(*memory_result);
+  MemoryManager::Result memory_result;
+  memory_manager->Stop(memory_result);
+  memory_result.memory_iterations = memory_iterations;
  return memory_result;
 }

-void BenchmarkRunner::RunProfilerManager() {
-  // TODO: Provide a way to specify the number of iterations.
-  IterationCount profile_iterations = 1;
+void BenchmarkRunner::RunProfilerManager(IterationCount profile_iterations) {
  std::unique_ptr<internal::ThreadManager> manager;
  manager.reset(new internal::ThreadManager(1));
  b.Setup();
  RunInThread(&b, profile_iterations, 0, manager.get(),
              /*perf_counters_measurement_ptr=*/nullptr,
              /*profiler_manager=*/profiler_manager);
-  manager->WaitForAllThreads();
  manager.reset();
  b.Teardown();
 }
@ -445,7 +484,9 @@ void BenchmarkRunner::DoOneRepetition() {
  // this warmup never happened except the fact that warmup_done is set. Every
  // other manipulation of the BenchmarkRunner instance would be a bug! Please
  // fix it.
-  if (!warmup_done) RunWarmUp();
+  if (!warmup_done) {
+    RunWarmUp();
+  }

  IterationResults i;
  // We *may* be gradually increasing the length (iteration count)
@ -467,8 +508,10 @@ void BenchmarkRunner::DoOneRepetition() {
    const bool results_are_significant = !is_the_first_repetition ||
                                         has_explicit_iteration_count ||
                                         ShouldReportIterationResults(i);
-
-    if (results_are_significant) break;  // Good, let's report them!
+    // Good, let's report them!
+    if (results_are_significant) {
+      break;
+    }

    // Nope, bad iteration. Let's re-estimate the hopefully-sufficient
    // iteration count, and run the benchmark again...
@ -480,7 +523,7 @@ void BenchmarkRunner::DoOneRepetition() {
  }

  // Produce memory measurements if requested.
-  MemoryManager::Result* memory_result = nullptr;
+  MemoryManager::Result memory_result;
  IterationCount memory_iterations = 0;
  if (memory_manager != nullptr) {
    // Only run a few iterations to reduce the impact of one-time
@ -490,7 +533,10 @@ void BenchmarkRunner::DoOneRepetition() {
  }

  if (profiler_manager != nullptr) {
-    RunProfilerManager();
+    // We want to externally profile the benchmark for the same number of
+    // iterations because, for example, if we're tracing the benchmark then we
+    // want trace data to reasonably match PMU data.
+    RunProfilerManager(iters);
  }

  // Ok, now actually report.
@ -498,9 +544,11 @@ void BenchmarkRunner::DoOneRepetition() {
      CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
                      num_repetitions_done, repeats);

-  if (reports_for_family) {
+  if (reports_for_family != nullptr) {
    ++reports_for_family->num_runs_done;
-    if (!report.skipped) reports_for_family->Runs.push_back(report);
+    if (report.skipped == 0u) {
+      reports_for_family->Runs.push_back(report);
+    }
  }

  run_results.non_aggregates.push_back(report);
--- a/src/benchmark_runner.h
+++ b/src/benchmark_runner.h
@ -15,23 +15,16 @@
 #ifndef BENCHMARK_RUNNER_H_
 #define BENCHMARK_RUNNER_H_

+#include <memory>
 #include <thread>
 #include <vector>

 #include "benchmark_api_internal.h"
-#include "internal_macros.h"
 #include "perf_counters.h"
 #include "thread_manager.h"

 namespace benchmark {

-BM_DECLARE_string(benchmark_min_time);
-BM_DECLARE_double(benchmark_min_warmup_time);
-BM_DECLARE_int32(benchmark_repetitions);
-BM_DECLARE_bool(benchmark_report_aggregates_only);
-BM_DECLARE_bool(benchmark_display_aggregates_only);
-BM_DECLARE_string(benchmark_perf_counters);
-
 namespace internal {

 extern MemoryManager* memory_manager;
@ -46,7 +39,7 @@ struct RunResults {
 };

 struct BENCHMARK_EXPORT BenchTimeType {
-  enum { ITERS, TIME } tag;
+  enum { UNSPECIFIED, ITERS, TIME } tag;
  union {
    IterationCount iters;
    double time;
@ -59,7 +52,7 @@ BenchTimeType ParseBenchMinTime(const std::string& value);
 class BenchmarkRunner {
 public:
  BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
-                  benchmark::internal::PerfCountersMeasurement* pmc_,
+                  benchmark::internal::PerfCountersMeasurement* pcm_,
                  BenchmarkReporter::PerFamilyRunReports* reports_for_family);

  int GetNumRepeats() const { return repeats; }
@ -97,9 +90,7 @@ class BenchmarkRunner {

  int num_repetitions_done = 0;

-  std::vector<std::thread> pool;
-
-  std::vector<MemoryManager::Result> memory_results;
+  std::unique_ptr<ThreadRunnerBase> thread_runner;

  IterationCount iters;  // preserved between repetitions!
  // So only the first repetition has to find/calculate it,
@ -114,9 +105,9 @@ class BenchmarkRunner {
  };
  IterationResults DoNIterations();

-  MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations);
+  MemoryManager::Result RunMemoryManager(IterationCount memory_iterations);

-  void RunProfilerManager();
+  void RunProfilerManager(IterationCount profile_iterations);

  IterationCount PredictNumItersNeeded(const IterationResults& i) const;

--- a/src/check.cc
+++ b/src/check.cc
@ -3,7 +3,10 @@
 namespace benchmark {
 namespace internal {

-static AbortHandlerT* handler = &std::abort;
+namespace {
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+AbortHandlerT* handler = &std::abort;
+}  // namespace

 BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; }

--- a/src/check.h
+++ b/src/check.h
@ -4,6 +4,7 @@
 #include <cmath>
 #include <cstdlib>
 #include <ostream>
+#include <string_view>

 #include "benchmark/export.h"
 #include "internal_macros.h"
@ -36,6 +37,8 @@ AbortHandlerT*& GetAbortHandler();

 BENCHMARK_NORETURN inline void CallAbortHandler() {
  GetAbortHandler()();
+  std::flush(std::cout);
+  std::flush(std::cerr);
  std::abort();  // fallback to enforce noreturn
 }

@ -44,7 +47,8 @@ BENCHMARK_NORETURN inline void CallAbortHandler() {
 // destructed.
 class CheckHandler {
 public:
-  CheckHandler(const char* check, const char* file, const char* func, int line)
+  CheckHandler(std::string_view check, std::string_view file,
+               std::string_view func, int line)
      : log_(GetErrorLogInstance()) {
    log_ << file << ":" << line << ": " << func << ": Check `" << check
         << "' failed. ";
@ -57,7 +61,7 @@ class CheckHandler {
 #pragma warning(disable : 4722)
 #endif
  BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
-    log_ << std::endl;
+    log_ << '\n';
    CallAbortHandler();
  }
 #if defined(COMPILER_MSVC)
@ -78,9 +82,11 @@ class CheckHandler {
 // The BM_CHECK macro returns a std::ostream object that can have extra
 // information written to it.
 #ifndef NDEBUG
-#define BM_CHECK(b)                                                          \
-  (b ? ::benchmark::internal::GetNullLogInstance()                           \
-     : ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \
+#define BM_CHECK(b)                                          \
+  (b ? ::benchmark::internal::GetNullLogInstance()           \
+     : ::benchmark::internal::CheckHandler(                  \
+           std::string_view(#b), std::string_view(__FILE__), \
+           std::string_view(__func__), __LINE__)             \
           .GetLog())
 #else
 #define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance()
--- a/src/colorprint.cc
+++ b/src/colorprint.cc
@ -135,22 +135,30 @@ void ColorPrintf(std::ostream& out, LogColor color, const char* fmt,
  // Gets the current text color.
  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
-  const WORD old_color_attrs = buffer_info.wAttributes;
+  const WORD original_color_attrs = buffer_info.wAttributes;

  // We need to flush the stream buffers into the console before each
  // SetConsoleTextAttribute call lest it affect the text that is already
  // printed but has not yet reached the console.
  out.flush();
-  SetConsoleTextAttribute(stdout_handle,
-                          GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
+
+  const WORD original_background_attrs =
+      original_color_attrs & (BACKGROUND_RED | BACKGROUND_GREEN |
+                              BACKGROUND_BLUE | BACKGROUND_INTENSITY);
+
+  SetConsoleTextAttribute(stdout_handle, GetPlatformColorCode(color) |
+                                             FOREGROUND_INTENSITY |
+                                             original_background_attrs);
  out << FormatString(fmt, args);

  out.flush();
-  // Restores the text color.
-  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+  // Restores the text and background color.
+  SetConsoleTextAttribute(stdout_handle, original_color_attrs);
 #else
  const char* color_code = GetPlatformColorCode(color);
-  if (color_code) out << FormatString("\033[0;3%sm", color_code);
+  if (color_code != nullptr) {
+    out << FormatString("\033[0;3%sm", color_code);
+  }
  out << FormatString(fmt, args) << "\033[m";
 #endif
 }
@ -187,7 +195,7 @@ bool IsColorTerminal() {

  bool term_supports_color = false;
  for (const char* candidate : SUPPORTED_TERM_VALUES) {
-    if (term && 0 == strcmp(term, candidate)) {
+    if ((term != nullptr) && 0 == strcmp(term, candidate)) {
      term_supports_color = true;
      break;
    }
--- a/src/commandlineflags.cc
+++ b/src/commandlineflags.cc
@ -109,12 +109,13 @@ bool ParseKvPairs(const std::string& src_text, const char* str,
 // Returns the name of the environment variable corresponding to the
 // given flag.  For example, FlagToEnvVar("foo") will return
 // "BENCHMARK_FOO" in the open-source version.
-static std::string FlagToEnvVar(const char* flag) {
+std::string FlagToEnvVar(const char* flag) {
  const std::string flag_str(flag);

  std::string env_var;
-  for (size_t i = 0; i != flag_str.length(); ++i)
+  for (size_t i = 0; i != flag_str.length(); ++i) {
    env_var += static_cast<char>(::toupper(flag_str.c_str()[i]));
+  }

  return env_var;
 }
@ -167,7 +168,9 @@ std::map<std::string, std::string> KvPairsFromEnv(
  const std::string env_var = FlagToEnvVar(flag);
  const char* const value_str = getenv(env_var.c_str());

-  if (value_str == nullptr) return default_val;
+  if (value_str == nullptr) {
+    return default_val;
+  }

  std::map<std::string, std::string> value;
  if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) {
@ -184,23 +187,31 @@ std::map<std::string, std::string> KvPairsFromEnv(
 const char* ParseFlagValue(const char* str, const char* flag,
                           bool def_optional) {
  // str and flag must not be nullptr.
-  if (str == nullptr || flag == nullptr) return nullptr;
+  if (str == nullptr || flag == nullptr) {
+    return nullptr;
+  }

  // The flag must start with "--".
  const std::string flag_str = std::string("--") + std::string(flag);
  const size_t flag_len = flag_str.length();
-  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
+  if (strncmp(str, flag_str.c_str(), flag_len) != 0) {
+    return nullptr;
+  }

  // Skips the flag name.
  const char* flag_end = str + flag_len;

  // When def_optional is true, it's OK to not have a "=value" part.
-  if (def_optional && (flag_end[0] == '\0')) return flag_end;
+  if (def_optional && (flag_end[0] == '\0')) {
+    return flag_end;
+  }

  // If def_optional is true and there are more characters after the
  // flag name, or if def_optional is false, there must be a '=' after
  // the flag name.
-  if (flag_end[0] != '=') return nullptr;
+  if (flag_end[0] != '=') {
+    return nullptr;
+  }

  // Returns the string after "=".
  return flag_end + 1;
@ -212,7 +223,9 @@ bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
  const char* const value_str = ParseFlagValue(str, flag, true);

  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
+  if (value_str == nullptr) {
+    return false;
+  }

  // Converts the string value to a bool.
  *value = IsTruthyFlagValue(value_str);
@ -225,7 +238,9 @@ bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
+  if (value_str == nullptr) {
+    return false;
+  }

  // Sets *value to the value of the flag.
  return ParseInt32(std::string("The value of flag --") + flag, value_str,
@ -238,7 +253,9 @@ bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
+  if (value_str == nullptr) {
+    return false;
+  }

  // Sets *value to the value of the flag.
  return ParseDouble(std::string("The value of flag --") + flag, value_str,
@ -251,7 +268,9 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
  const char* const value_str = ParseFlagValue(str, flag, false);

  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
+  if (value_str == nullptr) {
+    return false;
+  }

  *value = value_str;
  return true;
@ -262,11 +281,15 @@ bool ParseKeyValueFlag(const char* str, const char* flag,
                       std::map<std::string, std::string>* value) {
  const char* const value_str = ParseFlagValue(str, flag, false);

-  if (value_str == nullptr) return false;
+  if (value_str == nullptr) {
+    return false;
+  }

  for (const auto& kvpair : StrSplit(value_str, ',')) {
    const auto kv = StrSplit(kvpair, '=');
-    if (kv.size() != 2) return false;
+    if (kv.size() != 2) {
+      return false;
+    }
    value->emplace(kv[0], kv[1]);
  }

--- a/src/commandlineflags.h
+++ b/src/commandlineflags.h
@ -11,14 +11,17 @@
 #define FLAG(name) FLAGS_##name

 // Macros for declaring flags.
+// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
 #define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name)
 #define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name)
 #define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name)
 #define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name)
 #define BM_DECLARE_kvpairs(name) \
  BENCHMARK_EXPORT extern std::map<std::string, std::string> FLAG(name)
+// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)

 // Macros for defining flags.
+// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
 #define BM_DEFINE_bool(name, default_val) \
  BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val)
 #define BM_DEFINE_int32(name, default_val) \
@ -33,6 +36,7 @@
 #define BM_DEFINE_kvpairs(name, default_val)                       \
  BENCHMARK_EXPORT std::map<std::string, std::string> FLAG(name) = \
      benchmark::KvPairsFromEnv(#name, default_val)
+// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)

 namespace benchmark {

--- a/src/complexity.cc
+++ b/src/complexity.cc
@ -157,7 +157,9 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
  typedef BenchmarkReporter::Run Run;
  std::vector<Run> results;

-  if (reports.size() < 2) return results;
+  if (reports.size() < 2) {
+    return results;
+  }

  // Accumulators.
  std::vector<ComplexityN> n;
--- a/src/console_reporter.cc
+++ b/src/console_reporter.cc
@ -63,7 +63,7 @@ void ConsoleReporter::PrintHeader(const Run& run) {
      FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
                   "Benchmark", "Time", "CPU", "Iterations");
  if (!run.counters.empty()) {
-    if (output_options_ & OO_Tabular) {
+    if ((output_options_ & OO_Tabular) != 0) {
      for (auto const& c : run.counters) {
        str += FormatString(" %10s", c.first.c_str());
      }
@ -83,7 +83,7 @@ void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
    bool print_header = !printed_header_;
    // --- or if the format is tabular and this run
    //     has different fields from the prev header
-    print_header |= (output_options_ & OO_Tabular) &&
+    print_header |= ((output_options_ & OO_Tabular) != 0) &&
                    (!internal::SameNames(run.counters, prev_counters_));
    if (print_header) {
      printed_header_ = true;
@ -97,8 +97,8 @@ void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
  }
 }

-static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
-                             ...) {
+static void IgnoreColorPrint(std::ostream& out, LogColor /*unused*/,
+                             const char* fmt, ...) {
  va_list args;
  va_start(args, fmt);
  out << FormatString(fmt, args);
@ -131,7 +131,7 @@ BENCHMARK_EXPORT
 void ConsoleReporter::PrintRunData(const Run& result) {
  typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
  auto& Out = GetOutputStream();
-  PrinterFn* printer = (output_options_ & OO_Color)
+  PrinterFn* printer = (output_options_ & OO_Color) != 0
                           ? static_cast<PrinterFn*>(ColorPrintf)
                           : IgnoreColorPrint;
  auto name_color =
@ -144,7 +144,8 @@ void ConsoleReporter::PrintRunData(const Run& result) {
            result.skip_message.c_str());
    printer(Out, COLOR_DEFAULT, "\n");
    return;
-  } else if (internal::SkippedWithMessage == result.skipped) {
+  }
+  if (internal::SkippedWithMessage == result.skipped) {
    printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
    printer(Out, COLOR_DEFAULT, "\n");
    return;
@ -178,9 +179,9 @@ void ConsoleReporter::PrintRunData(const Run& result) {
    printer(Out, COLOR_CYAN, "%10lld", result.iterations);
  }

-  for (auto& c : result.counters) {
+  for (const auto& c : result.counters) {
    const std::size_t cNameLen =
-        std::max(std::string::size_type(10), c.first.length());
+        std::max(static_cast<std::size_t>(10), c.first.length());
    std::string s;
    const char* unit = "";
    if (result.run_type == Run::RT_Aggregate &&
@ -189,10 +190,11 @@ void ConsoleReporter::PrintRunData(const Run& result) {
      unit = "%";
    } else {
      s = HumanReadableNumber(c.second.value, c.second.oneK);
-      if (c.second.flags & Counter::kIsRate)
-        unit = (c.second.flags & Counter::kInvert) ? "s" : "/s";
+      if ((c.second.flags & Counter::kIsRate) != 0) {
+        unit = (c.second.flags & Counter::kInvert) != 0 ? "s" : "/s";
+      }
    }
-    if (output_options_ & OO_Tabular) {
+    if ((output_options_ & OO_Tabular) != 0) {
      printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(),
              unit);
    } else {
--- a/src/counter.cc
+++ b/src/counter.cc
@ -20,20 +20,20 @@ namespace internal {
 double Finish(Counter const& c, IterationCount iterations, double cpu_time,
              double num_threads) {
  double v = c.value;
-  if (c.flags & Counter::kIsRate) {
+  if ((c.flags & Counter::kIsRate) != 0) {
    v /= cpu_time;
  }
-  if (c.flags & Counter::kAvgThreads) {
+  if ((c.flags & Counter::kAvgThreads) != 0) {
    v /= num_threads;
  }
-  if (c.flags & Counter::kIsIterationInvariant) {
+  if ((c.flags & Counter::kIsIterationInvariant) != 0) {
    v *= static_cast<double>(iterations);
  }
-  if (c.flags & Counter::kAvgIterations) {
+  if ((c.flags & Counter::kAvgIterations) != 0) {
    v /= static_cast<double>(iterations);
  }

-  if (c.flags & Counter::kInvert) {  // Invert is *always* last.
+  if ((c.flags & Counter::kInvert) != 0) {  // Invert is *always* last.
    v = 1.0 / v;
  }
  return v;
@ -64,7 +64,9 @@ void Increment(UserCounters* l, UserCounters const& r) {
 }

 bool SameNames(UserCounters const& l, UserCounters const& r) {
-  if (&l == &r) return true;
+  if (&l == &r) {
+    return true;
+  }
  if (l.size() != r.size()) {
    return false;
  }
--- a/src/csv_reporter.cc
+++ b/src/csv_reporter.cc
@ -66,8 +66,10 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
    // save the names of all the user counters
    for (const auto& run : reports) {
      for (const auto& cnt : run.counters) {
-        if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second")
+        if (cnt.first == "bytes_per_second" ||
+            cnt.first == "items_per_second") {
          continue;
+        }
        user_counter_names_.insert(cnt.first);
      }
    }
@ -75,7 +77,9 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
    // print the header
    for (auto B = elements.begin(); B != elements.end();) {
      Out << *B++;
-      if (B != elements.end()) Out << ",";
+      if (B != elements.end()) {
+        Out << ",";
+      }
    }
    for (auto B = user_counter_names_.begin();
         B != user_counter_names_.end();) {
@ -88,8 +92,10 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
    // check that all the current counters are saved in the name set
    for (const auto& run : reports) {
      for (const auto& cnt : run.counters) {
-        if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second")
+        if (cnt.first == "bytes_per_second" ||
+            cnt.first == "items_per_second") {
          continue;
+        }
        BM_CHECK(user_counter_names_.find(cnt.first) !=
                 user_counter_names_.end())
            << "All counters must be present in each run. "
@ -109,7 +115,7 @@ BENCHMARK_EXPORT
 void CSVReporter::PrintRunData(const Run& run) {
  std::ostream& Out = GetOutputStream();
  Out << CsvEscape(run.benchmark_name()) << ",";
-  if (run.skipped) {
+  if (run.skipped != 0u) {
    Out << std::string(elements.size() - 3, ',');
    Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
    Out << CsvEscape(run.skip_message) << "\n";
--- a/src/cycleclock.h
+++ b/src/cycleclock.h
@ -219,7 +219,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
 #elif defined(__hexagon__)
  uint64_t pcycle;
  asm volatile("%0 = C15:14" : "=r"(pcycle));
-  return static_cast<double>(pcycle);
+  return static_cast<int64_t>(pcycle);
 #elif defined(__alpha__)
  // Alpha has a cycle counter, the PCC register, but it is an unsigned 32-bit
  // integer and thus wraps every ~4s, making using it for tick counts
@ -229,6 +229,18 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
  struct timeval tv;
  gettimeofday(&tv, nullptr);
  return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__hppa__) || defined(__linux__)
+  // Fallback for all other architectures with a recent Linux kernel, e.g.:
+  // HP PA-RISC provides a user-readable clock counter (cr16), but
+  // it's not syncronized across CPUs and only 32-bit wide when programs
+  // are built as 32-bit binaries.
+  // Same for SH-4 and possibly others.
+  // Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
+  // because is provides nanosecond resolution.
+  // Initialize to always return 0 if clock_gettime fails.
+  struct timespec ts = {0, 0};
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
 #else
  // The soft failover to a generic implementation is automatic only for ARM.
  // For other platforms the developer is expected to make an attempt to create
--- a/src/json_reporter.cc
+++ b/src/json_reporter.cc
@ -85,15 +85,19 @@ std::string FormatKV(std::string const& key, int64_t value) {
  return ss.str();
 }

+std::string FormatKV(std::string const& key, int value) {
+  return FormatKV(key, static_cast<int64_t>(value));
+}
+
 std::string FormatKV(std::string const& key, double value) {
  std::stringstream ss;
  ss << '"' << StrEscape(key) << "\": ";

-  if (std::isnan(value))
+  if (std::isnan(value)) {
    ss << (value < 0 ? "-" : "") << "NaN";
-  else if (std::isinf(value))
+  } else if (std::isinf(value)) {
    ss << (value < 0 ? "-" : "") << "Infinity";
-  else {
+  } else {
    const auto max_digits10 =
        std::numeric_limits<decltype(value)>::max_digits10;
    const auto max_fractional_digits10 = max_digits10 - 1;
@ -122,7 +126,7 @@ bool JSONReporter::ReportContext(const Context& context) {

  out << indent << FormatKV("host_name", context.sys_info.name) << ",\n";

-  if (Context::executable_name) {
+  if (Context::executable_name != nullptr) {
    out << indent << FormatKV("executable", Context::executable_name) << ",\n";
  }

@ -136,7 +140,7 @@ bool JSONReporter::ReportContext(const Context& context) {
  if (CPUInfo::Scaling::UNKNOWN != info.scaling) {
    out << indent
        << FormatKV("cpu_scaling_enabled",
-                    info.scaling == CPUInfo::Scaling::ENABLED ? true : false)
+                    info.scaling == CPUInfo::Scaling::ENABLED)
        << ",\n";
  }

@ -144,7 +148,7 @@ bool JSONReporter::ReportContext(const Context& context) {
  indent = std::string(6, ' ');
  std::string cache_indent(8, ' ');
  for (size_t i = 0; i < info.caches.size(); ++i) {
-    auto& CI = info.caches[i];
+    const auto& CI = info.caches[i];
    out << indent << "{\n";
    out << cache_indent << FormatKV("type", CI.type) << ",\n";
    out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
@ -155,7 +159,9 @@ bool JSONReporter::ReportContext(const Context& context) {
        << FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
        << "\n";
    out << indent << "}";
-    if (i != info.caches.size() - 1) out << ",";
+    if (i != info.caches.size() - 1) {
+      out << ",";
+    }
    out << "\n";
  }
  indent = std::string(4, ' ');
@ -163,7 +169,9 @@ bool JSONReporter::ReportContext(const Context& context) {
  out << indent << "\"load_avg\": [";
  for (auto it = info.load_avg.begin(); it != info.load_avg.end();) {
    out << *it++;
-    if (it != info.load_avg.end()) out << ",";
+    if (it != info.load_avg.end()) {
+      out << ",";
+    }
  }
  out << "],\n";

@ -179,7 +187,7 @@ bool JSONReporter::ReportContext(const Context& context) {
  out << ",\n";

  // NOTE: our json schema is not strictly tied to the library version!
-  out << indent << FormatKV("json_schema_version", int64_t(1));
+  out << indent << FormatKV("json_schema_version", 1);

  std::map<std::string, std::string>* global_context =
      internal::GetGlobalContext();
@ -294,20 +302,21 @@ void JSONReporter::PrintRunData(Run const& run) {
    out << indent << FormatKV("rms", run.GetAdjustedCPUTime());
  }

-  for (auto& c : run.counters) {
+  for (const auto& c : run.counters) {
    out << ",\n" << indent << FormatKV(c.first, c.second);
  }

-  if (run.memory_result) {
-    const MemoryManager::Result memory_result = *run.memory_result;
+  if (run.memory_result.memory_iterations > 0) {
+    const auto& memory_result = run.memory_result;
    out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter);
    out << ",\n"
        << indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);

    auto report_if_present = [&out, &indent](const std::string& label,
                                             int64_t val) {
-      if (val != MemoryManager::TombstoneValue)
+      if (val != MemoryManager::TombstoneValue) {
        out << ",\n" << indent << FormatKV(label, val);
+      }
    };

    report_if_present("total_allocated_bytes",
@ -321,7 +330,4 @@ void JSONReporter::PrintRunData(Run const& run) {
  out << '\n';
 }

-const int64_t MemoryManager::TombstoneValue =
-    std::numeric_limits<int64_t>::max();
-
 }  // end namespace benchmark
--- a/src/log.h
+++ b/src/log.h
@ -4,13 +4,6 @@
 #include <iostream>
 #include <ostream>

-// NOTE: this is also defined in benchmark.h but we're trying to avoid a
-// dependency.
-// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
-#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
-#define BENCHMARK_HAS_CXX11
-#endif
-
 namespace benchmark {
 namespace internal {

@ -31,13 +24,8 @@ class LogType {

  // NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have
  // a dependency on benchmark.h from here.
-#ifndef BENCHMARK_HAS_CXX11
-  LogType(const LogType&);
-  LogType& operator=(const LogType&);
-#else
  LogType(const LogType&) = delete;
  LogType& operator=(const LogType&) = delete;
-#endif
 };

 template <class Tp>
--- a/src/perf_counters.cc
+++ b/src/perf_counters.cc
@ -26,8 +26,6 @@
 namespace benchmark {
 namespace internal {

-constexpr size_t PerfCounterValues::kMaxCounters;
-
 #if defined HAVE_LIBPFM

 size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
--- a/src/re.h
+++ b/src/re.h
@ -121,15 +121,13 @@ inline bool Regex::Init(const std::string& spec, std::string* error) {
  if (ec != 0) {
    if (error) {
      size_t needed = regerror(ec, &re_, nullptr, 0);
-      char* errbuf = new char[needed];
-      regerror(ec, &re_, errbuf, needed);
+      std::vector<char> errbuf(needed);
+      regerror(ec, &re_, errbuf.data(), needed);

      // regerror returns the number of bytes necessary to null terminate
      // the string, so we move that when assigning to error.
      BM_CHECK_NE(needed, 0);
-      error->assign(errbuf, needed - 1);
-
-      delete[] errbuf;
+      error->assign(errbuf.data(), needed - 1);
    }

    return false;
--- a/src/reporter.cc
+++ b/src/reporter.cc
@ -42,20 +42,23 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
  Out << LocalDateTimeString() << "\n";
 #endif

-  if (context.executable_name)
-    Out << "Running " << context.executable_name << "\n";
+  if (benchmark::BenchmarkReporter::Context::executable_name != nullptr) {
+    Out << "Running " << benchmark::BenchmarkReporter::Context::executable_name
+        << "\n";
+  }

  const CPUInfo &info = context.cpu_info;
  Out << "Run on (" << info.num_cpus << " X "
      << (info.cycles_per_second / 1000000.0) << " MHz CPU "
      << ((info.num_cpus > 1) ? "s" : "") << ")\n";
-  if (info.caches.size() != 0) {
+  if (!info.caches.empty()) {
    Out << "CPU Caches:\n";
-    for (auto &CInfo : info.caches) {
+    for (const auto &CInfo : info.caches) {
      Out << "  L" << CInfo.level << " " << CInfo.type << " "
          << (CInfo.size / 1024) << " KiB";
-      if (CInfo.num_sharing != 0)
+      if (CInfo.num_sharing != 0) {
        Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")";
+      }
      Out << "\n";
    }
  }
@ -63,7 +66,9 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
    Out << "Load Average: ";
    for (auto It = info.load_avg.begin(); It != info.load_avg.end();) {
      Out << StrFormat("%.2f", *It++);
-      if (It != info.load_avg.end()) Out << ", ";
+      if (It != info.load_avg.end()) {
+        Out << ", ";
+      }
    }
    Out << "\n";
  }
@ -105,13 +110,17 @@ std::string BenchmarkReporter::Run::benchmark_name() const {

 double BenchmarkReporter::Run::GetAdjustedRealTime() const {
  double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
-  if (iterations != 0) new_time /= static_cast<double>(iterations);
+  if (iterations != 0) {
+    new_time /= static_cast<double>(iterations);
+  }
  return new_time;
 }

 double BenchmarkReporter::Run::GetAdjustedCPUTime() const {
  double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit);
-  if (iterations != 0) new_time /= static_cast<double>(iterations);
+  if (iterations != 0) {
+    new_time /= static_cast<double>(iterations);
+  }
  return new_time;
 }

--- a/src/statistics.cc
+++ b/src/statistics.cc
@ -26,17 +26,21 @@

 namespace benchmark {

-auto StatisticsSum = [](const std::vector<double>& v) {
+const auto StatisticsSum = [](const std::vector<double>& v) {
  return std::accumulate(v.begin(), v.end(), 0.0);
 };

 double StatisticsMean(const std::vector<double>& v) {
-  if (v.empty()) return 0.0;
+  if (v.empty()) {
+    return 0.0;
+  }
  return StatisticsSum(v) * (1.0 / static_cast<double>(v.size()));
 }

 double StatisticsMedian(const std::vector<double>& v) {
-  if (v.size() < 3) return StatisticsMean(v);
+  if (v.size() < 3) {
+    return StatisticsMean(v);
+  }
  std::vector<double> copy(v);

  auto center = copy.begin() + v.size() / 2;
@ -47,29 +51,37 @@ double StatisticsMedian(const std::vector<double>& v) {
  // before.  Instead of resorting, we just look for the max value before it,
  // which is not necessarily the element immediately preceding `center` Since
  // `copy` is only partially sorted by `nth_element`.
-  if (v.size() % 2 == 1) return *center;
+  if (v.size() % 2 == 1) {
+    return *center;
+  }
  auto center2 = std::max_element(copy.begin(), center);
  return (*center + *center2) / 2.0;
 }

 // Return the sum of the squares of this sample set
-auto SumSquares = [](const std::vector<double>& v) {
+const auto SumSquares = [](const std::vector<double>& v) {
  return std::inner_product(v.begin(), v.end(), v.begin(), 0.0);
 };

-auto Sqr = [](const double dat) { return dat * dat; };
-auto Sqrt = [](const double dat) {
+const auto Sqr = [](const double dat) { return dat * dat; };
+const auto Sqrt = [](const double dat) {
  // Avoid NaN due to imprecision in the calculations
-  if (dat < 0.0) return 0.0;
+  if (dat < 0.0) {
+    return 0.0;
+  }
  return std::sqrt(dat);
 };

 double StatisticsStdDev(const std::vector<double>& v) {
  const auto mean = StatisticsMean(v);
-  if (v.empty()) return mean;
+  if (v.empty()) {
+    return mean;
+  }

  // Sample standard deviation is undefined for n = 1
-  if (v.size() == 1) return 0.0;
+  if (v.size() == 1) {
+    return 0.0;
+  }

  const double avg_squares =
      SumSquares(v) * (1.0 / static_cast<double>(v.size()));
@ -79,12 +91,16 @@ double StatisticsStdDev(const std::vector<double>& v) {
 }

 double StatisticsCV(const std::vector<double>& v) {
-  if (v.size() < 2) return 0.0;
+  if (v.size() < 2) {
+    return 0.0;
+  }

  const auto stddev = StatisticsStdDev(v);
  const auto mean = StatisticsMean(v);

-  if (std::fpclassify(mean) == FP_ZERO) return 0.0;
+  if (std::fpclassify(mean) == FP_ZERO) {
+    return 0.0;
+  }

  return stddev / mean;
 }
@ -137,7 +153,9 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
  for (Run const& run : reports) {
    BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name());
    BM_CHECK_EQ(run_iterations, run.iterations);
-    if (run.skipped) continue;
+    if (run.skipped != 0u) {
+      continue;
+    }
    real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
    cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
    // user counters
@ -158,7 +176,7 @@ std::vector<BenchmarkReporter::Run> ComputeStats(
  }

  const double iteration_rescale_factor =
-      double(reports.size()) / double(run_iterations);
+      static_cast<double>(reports.size()) / static_cast<double>(run_iterations);

  for (const auto& Stat : *reports[0].statistics) {
    // Get the data from the accumulator to BenchmarkReporter::Run's.
--- a/src/string_util.cc
+++ b/src/string_util.cc
@ -29,7 +29,7 @@ static_assert(arraysize(kBigSIUnits) == arraysize(kBigIECUnits),
 static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
              "Small SI and Big SI unit arrays must be the same size");

-static const int64_t kUnitsSize = arraysize(kBigSIUnits);
+const int64_t kUnitsSize = arraysize(kBigSIUnits);

 void ToExponentAndMantissa(double val, int precision, double one_k,
                           std::string* mantissa, int64_t* exponent) {
@ -87,10 +87,14 @@ void ToExponentAndMantissa(double val, int precision, double one_k,
 }

 std::string ExponentToPrefix(int64_t exponent, bool iec) {
-  if (exponent == 0) return "";
+  if (exponent == 0) {
+    return {};
+  }

  const int64_t index = (exponent > 0 ? exponent - 1 : -exponent - 1);
-  if (index >= kUnitsSize) return "";
+  if (index >= kUnitsSize) {
+    return {};
+  }

  const char* const* array =
      (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
@ -101,7 +105,7 @@ std::string ExponentToPrefix(int64_t exponent, bool iec) {
 std::string ToBinaryStringFullySpecified(double value, int precision,
                                         Counter::OneK one_k) {
  std::string mantissa;
-  int64_t exponent;
+  int64_t exponent = 0;
  ToExponentAndMantissa(value, precision,
                        one_k == Counter::kIs1024 ? 1024.0 : 1000.0, &mantissa,
                        &exponent);
@ -115,7 +119,7 @@ std::string StrFormatImp(const char* msg, va_list args) {

  // TODO(ericwf): use std::array for first attempt to avoid one memory
  // allocation guess what the size might be
-  std::array<char, 256> local_buff;
+  std::array<char, 256> local_buff = {};

  // 2015-10-08: vsnprintf is used instead of snd::vsnprintf due to a limitation
  // in the android-ndk
@ -124,9 +128,12 @@ std::string StrFormatImp(const char* msg, va_list args) {
  va_end(args_cp);

  // handle empty expansion
-  if (ret == 0) return std::string{};
-  if (static_cast<std::size_t>(ret) < local_buff.size())
+  if (ret == 0) {
+    return {};
+  }
+  if (static_cast<std::size_t>(ret) < local_buff.size()) {
    return std::string(local_buff.data());
+  }

  // we did not provide a long enough buffer on our first attempt.
  // add 1 to size to account for null-byte in size cast to prevent overflow
@ -153,7 +160,9 @@ std::string StrFormat(const char* format, ...) {
 }

 std::vector<std::string> StrSplit(const std::string& str, char delim) {
-  if (str.empty()) return {};
+  if (str.empty()) {
+    return {};
+  }
  std::vector<std::string> ret;
  size_t first = 0;
  size_t next = str.find(delim);
--- a/src/string_util.h
+++ b/src/string_util.h
@ -9,7 +9,6 @@
 #include "benchmark/benchmark.h"
 #include "benchmark/export.h"
 #include "check.h"
-#include "internal_macros.h"

 namespace benchmark {

--- a/src/sysinfo.cc
+++ b/src/sysinfo.cc
@ -76,7 +76,6 @@
 #include "benchmark/benchmark.h"
 #include "check.h"
 #include "cycleclock.h"
-#include "internal_macros.h"
 #include "log.h"
 #include "string_util.h"
 #include "timers.h"
@ -84,7 +83,7 @@
 namespace benchmark {
 namespace {

-void PrintImp(std::ostream& out) { out << std::endl; }
+void PrintImp(std::ostream& out) { out << '\n'; }

 template <class First, class... Rest>
 void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
@ -95,6 +94,7 @@ void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
 template <class... Args>
 BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
  PrintImp(std::cerr, std::forward<Args>(args)...);
+  std::cerr << std::flush;
  std::exit(EXIT_FAILURE);
 }

@ -120,7 +120,7 @@ struct ValueUnion {

  explicit ValueUnion(std::size_t buff_size)
      : size(sizeof(DataT) + buff_size),
-        buff(::new (std::malloc(size)) DataT(), &std::free) {}
+        buff(::new(std::malloc(size)) DataT(), &std::free) {}

  ValueUnion(ValueUnion&& other) = default;

@ -153,11 +153,11 @@ ValueUnion GetSysctlImp(std::string const& name) {
  int mib[2];

  mib[0] = CTL_HW;
-  if ((name == "hw.ncpu") || (name == "hw.cpuspeed")) {
+  if ((name == "hw.ncpuonline") || (name == "hw.cpuspeed")) {
    ValueUnion buff(sizeof(int));

-    if (name == "hw.ncpu") {
-      mib[1] = HW_NCPU;
+    if (name == "hw.ncpuonline") {
+      mib[1] = HW_NCPUONLINE;
    } else {
      mib[1] = HW_CPUSPEED;
    }
@ -212,14 +212,18 @@ template <class ArgT>
 bool ReadFromFile(std::string const& fname, ArgT* arg) {
  *arg = ArgT();
  std::ifstream f(fname.c_str());
-  if (!f.is_open()) return false;
+  if (!f.is_open()) {
+    return false;
+  }
  f >> *arg;
  return f.good();
 }

 CPUInfo::Scaling CpuScaling(int num_cpus) {
  // We don't have a valid CPU count, so don't even bother.
-  if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN;
+  if (num_cpus <= 0) {
+    return CPUInfo::Scaling::UNKNOWN;
+  }
 #if defined(BENCHMARK_OS_QNX)
  return CPUInfo::Scaling::UNKNOWN;
 #elif !defined(BENCHMARK_OS_WINDOWS)
@ -230,8 +234,9 @@ CPUInfo::Scaling CpuScaling(int num_cpus) {
  for (int cpu = 0; cpu < num_cpus; ++cpu) {
    std::string governor_file =
        StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
-    if (ReadFromFile(governor_file, &res) && res != "performance")
+    if (ReadFromFile(governor_file, &res) && res != "performance") {
      return CPUInfo::Scaling::ENABLED;
+    }
  }
  return CPUInfo::Scaling::DISABLED;
 #else
@ -246,7 +251,7 @@ int CountSetBitsInCPUMap(std::string val) {
    CPUMask mask(benchmark::stoul(part, nullptr, 16));
    return static_cast<int>(mask.count());
  };
-  std::size_t pos;
+  std::size_t pos = 0;
  int total = 0;
  while ((pos = val.find(',')) != std::string::npos) {
    total += CountBits(val.substr(0, pos));
@ -267,28 +272,35 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
    CPUInfo::CacheInfo info;
    std::string fpath = StrCat(dir, "index", idx++, "/");
    std::ifstream f(StrCat(fpath, "size").c_str());
-    if (!f.is_open()) break;
+    if (!f.is_open()) {
+      break;
+    }
    std::string suffix;
    f >> info.size;
-    if (f.fail())
+    if (f.fail()) {
      PrintErrorAndDie("Failed while reading file '", fpath, "size'");
+    }
    if (f.good()) {
      f >> suffix;
-      if (f.bad())
+      if (f.bad()) {
        PrintErrorAndDie(
            "Invalid cache size format: failed to read size suffix");
-      else if (f && suffix != "K")
+      } else if (f && suffix != "K") {
        PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
-      else if (suffix == "K")
+      } else if (suffix == "K") {
        info.size *= 1024;
+      }
    }
-    if (!ReadFromFile(StrCat(fpath, "type"), &info.type))
+    if (!ReadFromFile(StrCat(fpath, "type"), &info.type)) {
      PrintErrorAndDie("Failed to read from file ", fpath, "type");
-    if (!ReadFromFile(StrCat(fpath, "level"), &info.level))
+    }
+    if (!ReadFromFile(StrCat(fpath, "level"), &info.level)) {
      PrintErrorAndDie("Failed to read from file ", fpath, "level");
+    }
    std::string map_str;
-    if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str))
+    if (!ReadFromFile(StrCat(fpath, "shared_cpu_map"), &map_str)) {
      PrintErrorAndDie("Failed to read from file ", fpath, "shared_cpu_map");
+    }
    info.num_sharing = CountSetBitsInCPUMap(map_str);
    res.push_back(info);
  }
@ -333,15 +345,18 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
  using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
  GetLogicalProcessorInformation(nullptr, &buffer_size);
  UPtr buff(static_cast<PInfo*>(std::malloc(buffer_size)), &std::free);
-  if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
+  if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) {
    PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
                     GetLastError());
+  }

  PInfo* it = buff.get();
  PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));

  for (; it != end; ++it) {
-    if (it->Relationship != RelationCache) continue;
+    if (it->Relationship != RelationCache) {
+      continue;
+    }
    using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
    BitSet b(it->ProcessorMask);
    // To prevent duplicates, only consider caches where CPU 0 is specified
@ -353,6 +368,11 @@ std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
    C.size = static_cast<int>(cache.Size);
    C.type = "Unknown";
    switch (cache.Type) {
+// Windows SDK version >= 10.0.26100.0
+#ifdef NTDDI_WIN11_GE
+      case CacheUnknown:
+        break;
+#endif
      case CacheUnified:
        C.type = "Unified";
        break;
@ -469,17 +489,12 @@ std::string GetSystemName() {
 #endif  // def HOST_NAME_MAX
  char hostname[HOST_NAME_MAX];
  int retVal = gethostname(hostname, HOST_NAME_MAX);
-  if (retVal != 0) return std::string("");
-  return std::string(hostname);
+  return retVal != 0 ? std::string() : std::string(hostname);
 #endif  // Catch-all POSIX block.
 }

 int GetNumCPUsImpl() {
-#ifdef BENCHMARK_HAS_SYSCTL
-  int num_cpu = -1;
-  if (GetSysctl("hw.ncpu", &num_cpu)) return num_cpu;
-  PrintErrorAndDie("Err: ", strerror(errno));
-#elif defined(BENCHMARK_OS_WINDOWS)
+#ifdef BENCHMARK_OS_WINDOWS
  SYSTEM_INFO sysinfo;
  // Use memset as opposed to = {} to avoid GCC missing initializer false
  // positives.
@ -487,14 +502,6 @@ int GetNumCPUsImpl() {
  GetSystemInfo(&sysinfo);
  // number of logical processors in the current group
  return static_cast<int>(sysinfo.dwNumberOfProcessors);
-#elif defined(BENCHMARK_OS_SOLARIS)
-  // Returns -1 in case of a failure.
-  long num_cpu = sysconf(_SC_NPROCESSORS_ONLN);
-  if (num_cpu < 0) {
-    PrintErrorAndDie("sysconf(_SC_NPROCESSORS_ONLN) failed with error: ",
-                     strerror(errno));
-  }
-  return (int)num_cpu;
 #elif defined(BENCHMARK_OS_QNX)
  return static_cast<int>(_syspage_ptr->num_cpu);
 #elif defined(BENCHMARK_OS_QURT)
@ -502,64 +509,42 @@ int GetNumCPUsImpl() {
  if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) {
    hardware_threads.max_hthreads = 1;
  }
-  return hardware_threads.max_hthreads;
+  return static_cast<int>(hardware_threads.max_hthreads);
+#elif defined(BENCHMARK_HAS_SYSCTL)
+  // *BSD, macOS
+  int num_cpu = -1;
+  constexpr auto* hwncpu =
+#if defined BENCHMARK_OS_MACOSX
+      "hw.logicalcpu";
+#elif defined(HW_NCPUONLINE)
+      "hw.ncpuonline";
 #else
-  int num_cpus = 0;
-  int max_id = -1;
-  std::ifstream f("/proc/cpuinfo");
-  if (!f.is_open()) {
-    std::cerr << "Failed to open /proc/cpuinfo\n";
-    return -1;
-  }
-#if defined(__alpha__)
-  const std::string Key = "cpus detected";
-#else
-  const std::string Key = "processor";
+      "hw.ncpu";
 #endif
-  std::string ln;
-  while (std::getline(f, ln)) {
-    if (ln.empty()) continue;
-    std::size_t split_idx = ln.find(':');
-    std::string value;
-#if defined(__s390__)
-    // s390 has another format in /proc/cpuinfo
-    // it needs to be parsed differently
-    if (split_idx != std::string::npos)
-      value = ln.substr(Key.size() + 1, split_idx - Key.size() - 1);
+  if (GetSysctl(hwncpu, &num_cpu)) return num_cpu;
+  PrintErrorAndDie("Err: ", strerror(errno));
+#elif defined(_SC_NPROCESSORS_ONLN)
+  // Linux, Solaris, AIX, Haiku, WASM, etc.
+  // Returns -1 in case of a failure.
+  int num_cpu = static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN));
+  if (num_cpu < 0) {
+    PrintErrorAndDie("sysconf(_SC_NPROCESSORS_ONLN) failed with error: ",
+                     strerror(errno));
+  }
+  return num_cpu;
 #else
-    if (split_idx != std::string::npos) value = ln.substr(split_idx + 1);
-#endif
-    if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
-      num_cpus++;
-      if (!value.empty()) {
-        const int cur_id = benchmark::stoi(value);
-        max_id = std::max(cur_id, max_id);
-      }
-    }
-  }
-  if (f.bad()) {
-    PrintErrorAndDie("Failure reading /proc/cpuinfo");
-  }
-  if (!f.eof()) {
-    PrintErrorAndDie("Failed to read to end of /proc/cpuinfo");
-  }
-  f.close();
-
-  if ((max_id + 1) != num_cpus) {
-    fprintf(stderr,
-            "CPU ID assignments in /proc/cpuinfo seem messed up."
-            " This is usually caused by a bad BIOS.\n");
-  }
-  return num_cpus;
+  // Fallback, no other API exists.
+  return -1;
 #endif
  BENCHMARK_UNREACHABLE();
 }

 int GetNumCPUs() {
-  const int num_cpus = GetNumCPUsImpl();
+  int num_cpus = GetNumCPUsImpl();
  if (num_cpus < 1) {
-    std::cerr << "Unable to extract number of CPUs.  If your platform uses "
-                 "/proc/cpuinfo, custom support may need to be added.\n";
+    std::cerr << "Unable to extract number of CPUs.\n";
+    // There must be at least one CPU on which we're running.
+    num_cpus = 1;
  }
  return num_cpus;
 }
@ -567,22 +552,28 @@ int GetNumCPUs() {
 class ThreadAffinityGuard final {
 public:
  ThreadAffinityGuard() : reset_affinity(SetAffinity()) {
-    if (!reset_affinity)
+    if (!reset_affinity) {
      std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU "
-                   "frequency may be incorrect."
-                << std::endl;
+                   "frequency may be incorrect.\n";
+    }
  }

  ~ThreadAffinityGuard() {
-    if (!reset_affinity) return;
+    if (!reset_affinity) {
+      return;
+    }

 #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
    int ret = pthread_setaffinity_np(self, sizeof(previous_affinity),
                                     &previous_affinity);
-    if (ret == 0) return;
+    if (ret == 0) {
+      return;
+    }
 #elif defined(BENCHMARK_OS_WINDOWS_WIN32)
    DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity);
-    if (ret != 0) return;
+    if (ret != 0) {
+      return;
+    }
 #endif  // def BENCHMARK_HAS_PTHREAD_AFFINITY
    PrintErrorAndDie("Failed to reset thread affinity");
  }
@ -595,26 +586,32 @@ class ThreadAffinityGuard final {
 private:
  bool SetAffinity() {
 #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
-    int ret;
+    int ret = 0;
    self = pthread_self();
    ret = pthread_getaffinity_np(self, sizeof(previous_affinity),
                                 &previous_affinity);
-    if (ret != 0) return false;
+    if (ret != 0) {
+      return false;
+    }

    cpu_set_t affinity;
    memcpy(&affinity, &previous_affinity, sizeof(affinity));

    bool is_first_cpu = true;

-    for (int i = 0; i < CPU_SETSIZE; ++i)
+    for (int i = 0; i < CPU_SETSIZE; ++i) {
      if (CPU_ISSET(i, &affinity)) {
-        if (is_first_cpu)
+        if (is_first_cpu) {
          is_first_cpu = false;
-        else
+        } else {
          CPU_CLR(i, &affinity);
+        }
      }
+    }

-    if (is_first_cpu) return false;
+    if (is_first_cpu) {
+      return false;
+    }

    ret = pthread_setaffinity_np(self, sizeof(affinity), &affinity);
    return ret == 0;
@ -629,8 +626,8 @@ class ThreadAffinityGuard final {
  }

 #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY)
-  pthread_t self;
-  cpu_set_t previous_affinity;
+  pthread_t self{};
+  cpu_set_t previous_affinity{};
 #elif defined(BENCHMARK_OS_WINDOWS_WIN32)
  HANDLE self;
  DWORD_PTR previous_affinity;
@ -644,7 +641,7 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
  (void)scaling;

 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
-  long freq;
+  long freq = 0;

  // If the kernel is exporting the tsc frequency use that. There are issues
  // where cpuinfo_max_freq cannot be relied on because the BIOS may be
@ -679,7 +676,9 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
  }

  auto StartsWithKey = [](std::string const& Value, std::string const& Key) {
-    if (Key.size() > Value.size()) return false;
+    if (Key.size() > Value.size()) {
+      return false;
+    }
    auto Cmp = [&](char X, char Y) {
      return std::tolower(X) == std::tolower(Y);
    };
@ -688,22 +687,30 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {

  std::string ln;
  while (std::getline(f, ln)) {
-    if (ln.empty()) continue;
+    if (ln.empty()) {
+      continue;
+    }
    std::size_t split_idx = ln.find(':');
    std::string value;
-    if (split_idx != std::string::npos) value = ln.substr(split_idx + 1);
+    if (split_idx != std::string::npos) {
+      value = ln.substr(split_idx + 1);
+    }
    // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
    // accept positive values. Some environments (virtual machines) report zero,
    // which would cause infinite looping in WallTime_Init.
    if (StartsWithKey(ln, "cpu MHz")) {
      if (!value.empty()) {
        double cycles_per_second = benchmark::stod(value) * 1000000.0;
-        if (cycles_per_second > 0) return cycles_per_second;
+        if (cycles_per_second > 0) {
+          return cycles_per_second;
+        }
      }
    } else if (StartsWithKey(ln, "bogomips")) {
      if (!value.empty()) {
        bogo_clock = benchmark::stod(value) * 1000000.0;
-        if (bogo_clock < 0.0) bogo_clock = error_value;
+        if (bogo_clock < 0.0) {
+          bogo_clock = error_value;
+        }
      }
    }
  }
@ -719,7 +726,9 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
  // If we found the bogomips clock, but nothing better, we'll use it (but
  // we're not happy about it); otherwise, fallback to the rough estimation
  // below.
-  if (bogo_clock >= 0.0) return bogo_clock;
+  if (bogo_clock >= 0.0) {
+    return bogo_clock;
+  }

 #elif defined BENCHMARK_HAS_SYSCTL
  constexpr auto* freqStr =
@ -734,9 +743,13 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
 #endif
  unsigned long long hz = 0;
 #if defined BENCHMARK_OS_OPENBSD
-  if (GetSysctl(freqStr, &hz)) return static_cast<double>(hz * 1000000);
+  if (GetSysctl(freqStr, &hz)) {
+    return static_cast<double>(hz * 1000000);
+  }
 #else
-  if (GetSysctl(freqStr, &hz)) return static_cast<double>(hz);
+  if (GetSysctl(freqStr, &hz)) {
+    return static_cast<double>(hz);
+  }
 #endif
  fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
          freqStr, strerror(errno));
@ -752,9 +765,10 @@ double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) {
      SUCCEEDED(
          SHGetValueA(HKEY_LOCAL_MACHINE,
                      "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
-                      "~MHz", nullptr, &data, &data_size)))
+                      "~MHz", nullptr, &data, &data_size))) {
    return static_cast<double>(static_cast<int64_t>(data) *
                               static_cast<int64_t>(1000 * 1000));  // was mhz
+  }
 #elif defined(BENCHMARK_OS_SOLARIS)
  kstat_ctl_t* kc = kstat_open();
  if (!kc) {
--- a/src/thread_manager.h
+++ b/src/thread_manager.h
@ -11,30 +11,15 @@ namespace internal {

 class ThreadManager {
 public:
-  explicit ThreadManager(int num_threads)
-      : alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
+  explicit ThreadManager(int num_threads) : start_stop_barrier_(num_threads) {}

  Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) {
    return benchmark_mutex_;
  }

-  bool StartStopBarrier() EXCLUDES(end_cond_mutex_) {
-    return start_stop_barrier_.wait();
-  }
+  bool StartStopBarrier() { return start_stop_barrier_.wait(); }

-  void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) {
-    start_stop_barrier_.removeThread();
-    if (--alive_threads_ == 0) {
-      MutexLock lock(end_cond_mutex_);
-      end_condition_.notify_all();
-    }
-  }
-
-  void WaitForAllThreads() EXCLUDES(end_cond_mutex_) {
-    MutexLock lock(end_cond_mutex_);
-    end_condition_.wait(lock.native_handle(),
-                        [this]() { return alive_threads_ == 0; });
-  }
+  void NotifyThreadComplete() { start_stop_barrier_.removeThread(); }

  struct Result {
    IterationCount iterations = 0;
@ -51,10 +36,7 @@ class ThreadManager {

 private:
  mutable Mutex benchmark_mutex_;
-  std::atomic<int> alive_threads_;
  Barrier start_stop_barrier_;
-  Mutex end_cond_mutex_;
-  Condition end_condition_;
 };

 }  // namespace internal
--- a/src/timers.cc
+++ b/src/timers.cc
@ -107,8 +107,9 @@ double MakeTime(struct timespec const& ts) {
 }
 #endif

-BENCHMARK_NORETURN static void DiagnoseAndExit(const char* msg) {
-  std::cerr << "ERROR: " << msg << std::endl;
+BENCHMARK_NORETURN void DiagnoseAndExit(const char* msg) {
+  std::cerr << "ERROR: " << msg << '\n';
+  std::flush(std::cerr);
  std::exit(EXIT_FAILURE);
 }

@ -142,9 +143,10 @@ double ProcessCPUUsage() {
 #elif defined(CLOCK_PROCESS_CPUTIME_ID) && !defined(BENCHMARK_OS_MACOSX)
  // FIXME We want to use clock_gettime, but its not available in MacOS 10.11.
  // See https://github.com/google/benchmark/pull/292
-  struct timespec spec;
-  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0)
+  struct timespec spec {};
+  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &spec) == 0) {
    return MakeTime(spec);
+  }
  DiagnoseAndExit("clock_gettime(CLOCK_PROCESS_CPUTIME_ID, ...) failed");
 #else
  struct rusage ru;
@ -198,8 +200,10 @@ double ThreadCPUUsage() {
  if (getrusage(RUSAGE_LWP, &ru) == 0) return MakeTime(ru);
  DiagnoseAndExit("getrusage(RUSAGE_LWP, ...) failed");
 #elif defined(CLOCK_THREAD_CPUTIME_ID)
-  struct timespec ts;
-  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) return MakeTime(ts);
+  struct timespec ts {};
+  if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) == 0) {
+    return MakeTime(ts);
+  }
  DiagnoseAndExit("clock_gettime(CLOCK_THREAD_CPUTIME_ID, ...) failed");
 #else
 #error Per-thread timing is not available on your system.
@ -213,9 +217,9 @@ std::string LocalDateTimeString() {
  const std::size_t kTzOffsetLen = 6;
  const std::size_t kTimestampLen = 19;

-  std::size_t tz_len;
-  std::size_t timestamp_len;
-  long int offset_minutes;
+  std::size_t tz_len = 0;
+  std::size_t timestamp_len = 0;
+  long int offset_minutes = 0;
  char tz_offset_sign = '+';
  // tz_offset is set in one of three ways:
  // * strftime with %z - This either returns empty or the ISO 8601 time.  The
@ -235,7 +239,7 @@ std::string LocalDateTimeString() {
 #if defined(BENCHMARK_OS_WINDOWS)
  std::tm* timeinfo_p = ::localtime(&now);
 #else
-  std::tm timeinfo;
+  std::tm timeinfo{};
  std::tm* timeinfo_p = &timeinfo;
  ::localtime_r(&now, &timeinfo);
 #endif
--- a/test/BUILD
+++ b/test/BUILD
@ -10,7 +10,7 @@ platform(
 TEST_COPTS = [
    "-pedantic",
    "-pedantic-errors",
-    "-std=c++11",
+    "-std=c++17",
    "-Wall",
    "-Wconversion",
    "-Wextra",
@ -24,6 +24,10 @@ TEST_COPTS = [
    "-Werror=old-style-cast",
 ]

+TEST_MSVC_OPTS = [
+    "/std:c++17",
+]
+
 # Some of the issues with DoNotOptimize only occur when optimization is enabled
 PER_SRC_COPTS = {
    "donotoptimize_test.cc": ["-O3"],
@ -45,7 +49,7 @@ cc_library(
    srcs = ["output_test_helper.cc"],
    hdrs = ["output_test.h"],
    copts = select({
-        "//:windows": [],
+        "//:windows": TEST_MSVC_OPTS,
        "//conditions:default": TEST_COPTS,
    }),
    deps = [
@ -61,7 +65,7 @@ cc_library(
        size = "small",
        srcs = [test_src],
        copts = select({
-            "//:windows": [],
+            "//:windows": TEST_MSVC_OPTS,
            "//conditions:default": TEST_COPTS,
        }) + PER_SRC_COPTS.get(test_src, []),
        deps = [
@ -82,7 +86,7 @@ cc_library(
        srcs = [test_src],
        args = TEST_ARGS + PER_SRC_TEST_ARGS.get(test_src, []),
        copts = select({
-            "//:windows": [],
+            "//:windows": TEST_MSVC_OPTS,
            "//conditions:default": TEST_COPTS,
        }) + PER_SRC_COPTS.get(test_src, []),
        deps = [
@ -98,25 +102,24 @@ cc_library(
        ["*_test.cc"],
        exclude = [
            "*_assembly_test.cc",
-            "cxx03_test.cc",
+            "cxx11_test.cc",
            "link_main_test.cc",
        ],
    )
 ]

 cc_test(
-    name = "cxx03_test",
+    name = "cxx11_test",
    size = "small",
-    srcs = ["cxx03_test.cc"],
-    copts = TEST_COPTS + ["-std=c++03"],
+    srcs = ["cxx11_test.cc"],
+    copts = TEST_COPTS + ["-std=c++11"],
    target_compatible_with = select({
        "//:windows": ["@platforms//:incompatible"],
        "//conditions:default": [],
    }),
    deps = [
        ":output_test_helper",
-        "//:benchmark",
-        "//:benchmark_internal_headers",
+        "//:benchmark_main",
    ],
 )

@ -125,7 +128,7 @@ cc_test(
    size = "small",
    srcs = ["link_main_test.cc"],
    copts = select({
-        "//:windows": [],
+        "//:windows": TEST_MSVC_OPTS,
        "//conditions:default": TEST_COPTS,
    }),
    deps = ["//:benchmark_main"],
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -1,4 +1,4 @@
-# Enable the tests
+#Enable the tests

 set(THREADS_PREFER_PTHREAD_FLAG ON)

@ -73,6 +73,18 @@ macro(benchmark_add_test)
 endmacro(benchmark_add_test)

 # Demonstration executable
+
+compile_benchmark_test_with_main(cxx11_test)
+if(DEFINED MSVC)
+  # MSVC does not really support C++11.
+  set_property(TARGET cxx11_test PROPERTY CXX_STANDARD 14)
+else()
+  set_property(TARGET cxx11_test PROPERTY CXX_STANDARD 11)
+endif()
+set_property(TARGET cxx11_test PROPERTY CXX_STANDARD_REQUIRED ON)
+set_property(TARGET cxx11_test PROPERTY CXX_EXTENSIONS OFF)
+benchmark_add_test(NAME cxx11_test COMMAND cxx11_test --benchmark_min_time=0.01s)
+
 compile_benchmark_test(benchmark_test)
 benchmark_add_test(NAME benchmark COMMAND benchmark_test --benchmark_min_time=0.01s)

@ -168,6 +180,9 @@ benchmark_add_test(NAME reporter_output_test COMMAND reporter_output_test --benc
 compile_output_test(templated_fixture_test)
 benchmark_add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_min_time=0.01s)

+compile_output_test(templated_fixture_method_test)
+benchmark_add_test(NAME templated_fixture_method_test COMMAND templated_fixture_method_test --benchmark_min_time=0.01s)
+
 compile_output_test(user_counters_test)
 benchmark_add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01s)

@ -177,6 +192,9 @@ benchmark_add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmar
 compile_output_test(internal_threading_test)
 benchmark_add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s)

+compile_output_test(manual_threading_test)
+benchmark_add_test(NAME manual_threading_test COMMAND manual_threading_test --benchmark_min_time=0.01s)
+
 compile_output_test(report_aggregates_only_test)
 benchmark_add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s)

@ -195,31 +213,8 @@ benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchm
 compile_output_test(profiler_manager_test)
 benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s)

-# MSVC does not allow to set the language standard to C++98/03.
-if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
-  compile_benchmark_test(cxx03_test)
-  set_target_properties(cxx03_test
-      PROPERTIES
-      CXX_STANDARD 98
-      CXX_STANDARD_REQUIRED YES)
-  # libstdc++ provides different definitions within <map> between dialects. When
-  # LTO is enabled and -Werror is specified GCC diagnoses this ODR violation
-  # causing the test to fail to compile. To prevent this we explicitly disable
-  # the warning.
-  check_cxx_compiler_flag(-Wno-odr BENCHMARK_HAS_WNO_ODR)
-  check_cxx_compiler_flag(-Wno-lto-type-mismatch BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH)
-  # Cannot set_target_properties multiple times here because the warnings will
-  # be overwritten on each call
-  set (DISABLE_LTO_WARNINGS "")
-  if (BENCHMARK_HAS_WNO_ODR)
-    set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-odr")
-  endif()
-  if (BENCHMARK_HAS_WNO_LTO_TYPE_MISMATCH)
-    set(DISABLE_LTO_WARNINGS "${DISABLE_LTO_WARNINGS} -Wno-lto-type-mismatch")
-  endif()
-  set_target_properties(cxx03_test PROPERTIES LINK_FLAGS "${DISABLE_LTO_WARNINGS}")
-  benchmark_add_test(NAME cxx03 COMMAND cxx03_test --benchmark_min_time=0.01s)
-endif()
+compile_benchmark_test(profiler_manager_iterations_test)
+benchmark_add_test(NAME profiler_manager_iterations COMMAND profiler_manager_iterations_test)

 compile_output_test(complexity_test)
 benchmark_add_test(NAME complexity_benchmark COMMAND complexity_test --benchmark_min_time=1000000x)
@ -254,6 +249,9 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
  add_gtest(perf_counters_gtest)
  add_gtest(time_unit_gtest)
  add_gtest(min_time_parse_gtest)
+  add_gtest(profiler_manager_gtest)
+  add_gtest(benchmark_setup_teardown_cb_types_gtest)
+  add_gtest(memory_results_gtest)
 endif(BENCHMARK_ENABLE_GTEST_TESTS)

 ###############################################################################
@ -295,7 +293,7 @@ if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage")
      COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov
      COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov
      COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark
-      DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test complexity_test
+      DEPENDS filter_test benchmark_test options_test basic_test fixture_test complexity_test
      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
      COMMENT "Running LCOV"
    )
--- a/test/basic_test.cc
+++ b/test/basic_test.cc
@ -5,7 +5,8 @@

 void BM_empty(benchmark::State& state) {
  for (auto _ : state) {
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }
 }
@ -142,7 +143,6 @@ void BM_RangedFor(benchmark::State& state) {
 }
 BENCHMARK(BM_RangedFor);

-#ifdef BENCHMARK_HAS_CXX11
 template <typename T>
 void BM_OneTemplateFunc(benchmark::State& state) {
  auto arg = state.range(0);
@ -167,8 +167,6 @@ void BM_TwoTemplateFunc(benchmark::State& state) {
 BENCHMARK(BM_TwoTemplateFunc<int, double>)->Arg(1);
 BENCHMARK(BM_TwoTemplateFunc<double, int>)->Arg(1);

-#endif  // BENCHMARK_HAS_CXX11
-
 // Ensure that StateIterator provides all the necessary typedefs required to
 // instantiate std::iterator_traits.
 static_assert(
--- a/test/benchmark_min_time_flag_iters_test.cc
+++ b/test/benchmark_min_time_flag_iters_test.cc
@ -13,11 +13,11 @@ namespace {

 class TestReporter : public benchmark::ConsoleReporter {
 public:
-  virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+  bool ReportContext(const Context& context) override {
    return ConsoleReporter::ReportContext(context);
  };

-  virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+  void ReportRuns(const std::vector<Run>& report) override {
    assert(report.size() == 1);
    iter_nums_.push_back(report[0].iterations);
    ConsoleReporter::ReportRuns(report);
@ -25,7 +25,7 @@ class TestReporter : public benchmark::ConsoleReporter {

  TestReporter() {}

-  virtual ~TestReporter() {}
+  ~TestReporter() override {}

  const std::vector<benchmark::IterationCount>& GetIters() const {
    return iter_nums_;
@ -46,11 +46,13 @@ BENCHMARK(BM_MyBench);
 int main(int argc, char** argv) {
  // Make a fake argv and append the new --benchmark_min_time=<foo> to it.
  int fake_argc = argc + 1;
-  const char** fake_argv = new const char*[static_cast<size_t>(fake_argc)];
-  for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i];
-  fake_argv[argc] = "--benchmark_min_time=4x";
+  std::vector<const char*> fake_argv(static_cast<size_t>(fake_argc));
+  for (size_t i = 0; i < static_cast<size_t>(argc); ++i) {
+    fake_argv[i] = argv[i];
+  }
+  fake_argv[static_cast<size_t>(argc)] = "--benchmark_min_time=4x";

-  benchmark::Initialize(&fake_argc, const_cast<char**>(fake_argv));
+  benchmark::Initialize(&fake_argc, const_cast<char**>(fake_argv.data()));

  TestReporter test_reporter;
  const size_t returned_count =
@ -61,6 +63,5 @@ int main(int argc, char** argv) {
  const std::vector<benchmark::IterationCount> iters = test_reporter.GetIters();
  assert(!iters.empty() && iters[0] == 4);

-  delete[] fake_argv;
  return 0;
 }
--- a/test/benchmark_min_time_flag_time_test.cc
+++ b/test/benchmark_min_time_flag_time_test.cc
@ -19,23 +19,23 @@ typedef int64_t IterationCount;

 class TestReporter : public benchmark::ConsoleReporter {
 public:
-  virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE {
+  bool ReportContext(const Context& context) override {
    return ConsoleReporter::ReportContext(context);
  };

-  virtual void ReportRuns(const std::vector<Run>& report) BENCHMARK_OVERRIDE {
+  void ReportRuns(const std::vector<Run>& report) override {
    assert(report.size() == 1);
    ConsoleReporter::ReportRuns(report);
  };

-  virtual void ReportRunsConfig(double min_time, bool /* has_explicit_iters */,
-                                IterationCount /* iters */) BENCHMARK_OVERRIDE {
+  void ReportRunsConfig(double min_time, bool /* has_explicit_iters */,
+                        IterationCount /* iters */) override {
    min_times_.push_back(min_time);
  }

  TestReporter() {}

-  virtual ~TestReporter() {}
+  ~TestReporter() override {}

  const std::vector<double>& GetMinTimes() const { return min_times_; }

@ -71,20 +71,21 @@ BENCHMARK(BM_MyBench);
 int main(int argc, char** argv) {
  // Make a fake argv and append the new --benchmark_min_time=<foo> to it.
  int fake_argc = argc + 1;
-  const char** fake_argv = new const char*[static_cast<size_t>(fake_argc)];
+  std::vector<const char*> fake_argv(static_cast<size_t>(fake_argc));

-  for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i];
+  for (size_t i = 0; i < static_cast<size_t>(argc); ++i) {
+    fake_argv[i] = argv[i];
+  }

  const char* no_suffix = "--benchmark_min_time=4";
  const char* with_suffix = "--benchmark_min_time=4.0s";
  double expected = 4.0;

-  fake_argv[argc] = no_suffix;
-  DoTestHelper(&fake_argc, fake_argv, expected);
+  fake_argv[static_cast<size_t>(argc)] = no_suffix;
+  DoTestHelper(&fake_argc, fake_argv.data(), expected);

-  fake_argv[argc] = with_suffix;
-  DoTestHelper(&fake_argc, fake_argv, expected);
+  fake_argv[static_cast<size_t>(argc)] = with_suffix;
+  DoTestHelper(&fake_argc, fake_argv.data(), expected);

-  delete[] fake_argv;
  return 0;
 }
--- a/test/benchmark_random_interleaving_gtest.cc
+++ b/test/benchmark_random_interleaving_gtest.cc
@ -34,7 +34,8 @@ class EventQueue : public std::queue<std::string> {
  }
 };

-EventQueue* queue = new EventQueue();
+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
+EventQueue* const queue = new EventQueue();

 class NullReporter : public BenchmarkReporter {
 public:
@ -48,7 +49,7 @@ class BenchmarkTest : public testing::Test {

  static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); }

-  void Execute(const std::string& pattern) {
+  static void Execute(const std::string& pattern) {
    queue->Clear();

    std::unique_ptr<BenchmarkReporter> reporter(new NullReporter());
--- a/test/benchmark_setup_teardown_cb_types_gtest.cc
+++ b/test/benchmark_setup_teardown_cb_types_gtest.cc
@ -0,0 +1,126 @@
+#include "benchmark/benchmark.h"
+#include "gtest/gtest.h"
+
+using benchmark::BenchmarkReporter;
+using benchmark::callback_function;
+using benchmark::ClearRegisteredBenchmarks;
+using benchmark::RegisterBenchmark;
+using benchmark::RunSpecifiedBenchmarks;
+using benchmark::State;
+using benchmark::internal::Benchmark;
+
+static int functor_called = 0;
+struct Functor {
+  void operator()(const benchmark::State& /*unused*/) { functor_called++; }
+};
+
+class NullReporter : public BenchmarkReporter {
+ public:
+  bool ReportContext(const Context& /*context*/) override { return true; }
+  void ReportRuns(const std::vector<Run>& /* report */) override {}
+};
+
+class BenchmarkTest : public testing::Test {
+ public:
+  Benchmark* bm;
+  NullReporter null_reporter;
+
+  int setup_calls;
+  int teardown_calls;
+
+  void SetUp() override {
+    setup_calls = 0;
+    teardown_calls = 0;
+    functor_called = 0;
+
+    bm = RegisterBenchmark("BM", [](State& st) {
+      for (auto _ : st) {
+      }
+    });
+    bm->Iterations(1);
+  }
+
+  void TearDown() override { ClearRegisteredBenchmarks(); }
+};
+
+// Test that Setup/Teardown can correctly take a lambda expressions
+TEST_F(BenchmarkTest, LambdaTestCopy) {
+  auto setup_lambda = [this](const State&) { setup_calls++; };
+  auto teardown_lambda = [this](const State&) { teardown_calls++; };
+  bm->Setup(setup_lambda);
+  bm->Teardown(teardown_lambda);
+  RunSpecifiedBenchmarks(&null_reporter);
+  EXPECT_EQ(setup_calls, 1);
+  EXPECT_EQ(teardown_calls, 1);
+}
+
+// Test that Setup/Teardown can correctly take a lambda expressions
+TEST_F(BenchmarkTest, LambdaTestMove) {
+  auto setup_lambda = [this](const State&) { setup_calls++; };
+  auto teardown_lambda = [this](const State&) { teardown_calls++; };
+  bm->Setup(std::move(setup_lambda));
+  bm->Teardown(std::move(teardown_lambda));
+  RunSpecifiedBenchmarks(&null_reporter);
+  EXPECT_EQ(setup_calls, 1);
+  EXPECT_EQ(teardown_calls, 1);
+}
+
+// Test that Setup/Teardown can correctly take std::function
+TEST_F(BenchmarkTest, CallbackFunctionCopy) {
+  callback_function setup_lambda = [this](const State&) { setup_calls++; };
+  callback_function teardown_lambda = [this](const State&) {
+    teardown_calls++;
+  };
+  bm->Setup(setup_lambda);
+  bm->Teardown(teardown_lambda);
+  RunSpecifiedBenchmarks(&null_reporter);
+  EXPECT_EQ(setup_calls, 1);
+  EXPECT_EQ(teardown_calls, 1);
+}
+
+// Test that Setup/Teardown can correctly take std::function
+TEST_F(BenchmarkTest, CallbackFunctionMove) {
+  callback_function setup_lambda = [this](const State&) { setup_calls++; };
+  callback_function teardown_lambda = [this](const State&) {
+    teardown_calls++;
+  };
+  bm->Setup(std::move(setup_lambda));
+  bm->Teardown(std::move(teardown_lambda));
+  RunSpecifiedBenchmarks(&null_reporter);
+  EXPECT_EQ(setup_calls, 1);
+  EXPECT_EQ(teardown_calls, 1);
+}
+
+// Test that Setup/Teardown can correctly take functors
+TEST_F(BenchmarkTest, FunctorCopy) {
+  Functor func;
+  bm->Setup(func);
+  bm->Teardown(func);
+  RunSpecifiedBenchmarks(&null_reporter);
+  EXPECT_EQ(functor_called, 2);
+}
+
+// Test that Setup/Teardown can correctly take functors
+TEST_F(BenchmarkTest, FunctorMove) {
+  Functor func1;
+  Functor func2;
+  bm->Setup(std::move(func1));
+  bm->Teardown(std::move(func2));
+  RunSpecifiedBenchmarks(&null_reporter);
+  EXPECT_EQ(functor_called, 2);
+}
+
+// Test that Setup/Teardown can not take nullptr
+TEST_F(BenchmarkTest, NullptrTest) {
+#if GTEST_HAS_DEATH_TEST
+  // Tests only runnable in debug mode (when BM_CHECK is enabled).
+#ifndef NDEBUG
+#ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS
+  EXPECT_DEATH(bm->Setup(nullptr), "setup != nullptr");
+  EXPECT_DEATH(bm->Teardown(nullptr), "teardown != nullptr");
+#else
+  GTEST_SKIP() << "Test skipped because BM_CHECK is disabled";
+#endif
+#endif
+#endif
+}
--- a/test/benchmark_setup_teardown_test.cc
+++ b/test/benchmark_setup_teardown_test.cc
@ -10,10 +10,12 @@

 // Test that Setup() and Teardown() are called exactly once
 // for each benchmark run (single-threaded).
+namespace {
 namespace singlethreaded {
 static int setup_call = 0;
 static int teardown_call = 0;
 }  // namespace singlethreaded
+}  // namespace
 static void DoSetup1(const benchmark::State& state) {
  ++singlethreaded::setup_call;

@ -40,11 +42,13 @@ BENCHMARK(BM_with_setup)
    ->Teardown(DoTeardown1);

 // Test that Setup() and Teardown() are called once for each group of threads.
+namespace {
 namespace concurrent {
 static std::atomic<int> setup_call(0);
 static std::atomic<int> teardown_call(0);
 static std::atomic<int> func_call(0);
 }  // namespace concurrent
+}  // namespace

 static void DoSetup2(const benchmark::State& state) {
  concurrent::setup_call.fetch_add(1, std::memory_order_acquire);
@ -71,16 +75,18 @@ BENCHMARK(BM_concurrent)
    ->Threads(15);

 // Testing interaction with Fixture::Setup/Teardown
+namespace {
 namespace fixture_interaction {
 int setup = 0;
 int fixture_setup = 0;
 }  // namespace fixture_interaction
+}  // namespace

 #define FIXTURE_BECHMARK_NAME MyFixture

 class FIXTURE_BECHMARK_NAME : public ::benchmark::Fixture {
 public:
-  void SetUp(const ::benchmark::State&) override {
+  void SetUp(const ::benchmark::State& /*unused*/) override {
    fixture_interaction::fixture_setup++;
  }

@ -92,7 +98,7 @@ BENCHMARK_F(FIXTURE_BECHMARK_NAME, BM_WithFixture)(benchmark::State& st) {
  }
 }

-static void DoSetupWithFixture(const benchmark::State&) {
+static void DoSetupWithFixture(const benchmark::State& /*unused*/) {
  fixture_interaction::setup++;
 }

@ -110,7 +116,7 @@ namespace repetitions {
 int setup = 0;
 }

-static void DoSetupWithRepetitions(const benchmark::State&) {
+static void DoSetupWithRepetitions(const benchmark::State& /*unused*/) {
  repetitions::setup++;
 }
 static void BM_WithRep(benchmark::State& state) {
--- a/test/benchmark_test.cc
+++ b/test/benchmark_test.cc
@ -12,6 +12,7 @@
 #include <list>
 #include <map>
 #include <mutex>
+#include <optional>
 #include <set>
 #include <sstream>
 #include <string>
@ -44,18 +45,24 @@ double CalculatePi(int depth) {

 std::set<int64_t> ConstructRandomSet(int64_t size) {
  std::set<int64_t> s;
-  for (int i = 0; i < size; ++i) s.insert(s.end(), i);
+  for (int i = 0; i < size; ++i) {
+    s.insert(s.end(), i);
+  }
  return s;
 }

+// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
 std::mutex test_vector_mu;
-std::vector<int>* test_vector = nullptr;
+std::optional<std::vector<int>> test_vector;
+// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)

 }  // end namespace

 static void BM_Factorial(benchmark::State& state) {
  int fac_42 = 0;
-  for (auto _ : state) fac_42 = Factorial(8);
+  for (auto _ : state) {
+    fac_42 = Factorial(8);
+  }
  // Prevent compiler optimizations
  std::stringstream ss;
  ss << fac_42;
@ -66,7 +73,9 @@ BENCHMARK(BM_Factorial)->UseRealTime();

 static void BM_CalculatePiRange(benchmark::State& state) {
  double pi = 0.0;
-  for (auto _ : state) pi = CalculatePi(static_cast<int>(state.range(0)));
+  for (auto _ : state) {
+    pi = CalculatePi(static_cast<int>(state.range(0)));
+  }
  std::stringstream ss;
  ss << pi;
  state.SetLabel(ss.str());
@ -90,7 +99,9 @@ static void BM_SetInsert(benchmark::State& state) {
    state.PauseTiming();
    data = ConstructRandomSet(state.range(0));
    state.ResumeTiming();
-    for (int j = 0; j < state.range(1); ++j) data.insert(rand());
+    for (int j = 0; j < state.range(1); ++j) {
+      data.insert(rand());
+    }
  }
  state.SetItemsProcessed(state.iterations() * state.range(1));
  state.SetBytesProcessed(state.iterations() * state.range(1) *
@ -108,7 +119,9 @@ static void BM_Sequential(benchmark::State& state) {
  ValueType v = 42;
  for (auto _ : state) {
    Container c;
-    for (int64_t i = state.range(0); --i;) c.push_back(v);
+    for (int64_t i = state.range(0); --i;) {
+      c.push_back(v);
+    }
  }
  const int64_t items_processed = state.iterations() * state.range(0);
  state.SetItemsProcessed(items_processed);
@ -118,9 +131,7 @@ BENCHMARK_TEMPLATE2(BM_Sequential, std::vector<int>, int)
    ->Range(1 << 0, 1 << 10);
 BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
 // Test the variadic version of BENCHMARK_TEMPLATE in C++11 and beyond.
-#ifdef BENCHMARK_HAS_CXX11
 BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>, int)->Arg(512);
-#endif

 static void BM_StringCompare(benchmark::State& state) {
  size_t len = static_cast<size_t>(state.range(0));
@ -136,19 +147,20 @@ BENCHMARK(BM_StringCompare)->Range(1, 1 << 20);
 static void BM_SetupTeardown(benchmark::State& state) {
  if (state.thread_index() == 0) {
    // No need to lock test_vector_mu here as this is running single-threaded.
-    test_vector = new std::vector<int>();
+    test_vector = std::vector<int>();
  }
  int i = 0;
  for (auto _ : state) {
    std::lock_guard<std::mutex> l(test_vector_mu);
-    if (i % 2 == 0)
+    if (i % 2 == 0) {
      test_vector->push_back(i);
-    else
+    } else {
      test_vector->pop_back();
+    }
    ++i;
  }
  if (state.thread_index() == 0) {
-    delete test_vector;
+    test_vector.reset();
  }
 }
 BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
@ -156,8 +168,9 @@ BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
 static void BM_LongTest(benchmark::State& state) {
  double tracker = 0.0;
  for (auto _ : state) {
-    for (int i = 0; i < state.range(0); ++i)
+    for (int i = 0; i < state.range(0); ++i) {
      benchmark::DoNotOptimize(tracker += i);
+    }
  }
 }
 BENCHMARK(BM_LongTest)->Range(1 << 16, 1 << 28);
@ -169,7 +182,7 @@ static void BM_ParallelMemset(benchmark::State& state) {
  int to = from + thread_size;

  if (state.thread_index() == 0) {
-    test_vector = new std::vector<int>(static_cast<size_t>(size));
+    test_vector = std::vector<int>(static_cast<size_t>(size));
  }

  for (auto _ : state) {
@ -181,7 +194,7 @@ static void BM_ParallelMemset(benchmark::State& state) {
  }

  if (state.thread_index() == 0) {
-    delete test_vector;
+    test_vector.reset();
  }
 }
 BENCHMARK(BM_ParallelMemset)->Arg(10 << 20)->ThreadRange(1, 4);
@ -210,8 +223,6 @@ static void BM_ManualTiming(benchmark::State& state) {
 BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseRealTime();
 BENCHMARK(BM_ManualTiming)->Range(1, 1 << 14)->UseManualTime();

-#ifdef BENCHMARK_HAS_CXX11
-
 template <class... Args>
 void BM_with_args(benchmark::State& state, Args&&...) {
  for (auto _ : state) {
@ -252,8 +263,6 @@ void BM_template1_capture(benchmark::State& state, ExtraArgs&&... extra_args) {
 BENCHMARK_TEMPLATE1_CAPTURE(BM_template1_capture, void, foo, 24UL);
 BENCHMARK_CAPTURE(BM_template1_capture<void>, foo, 24UL);

-#endif  // BENCHMARK_HAS_CXX11
-
 static void BM_DenseThreadRanges(benchmark::State& st) {
  switch (st.range(0)) {
    case 1:
@ -294,7 +303,8 @@ static void BM_templated_test(benchmark::State& state) {
  }
 }

-static auto BM_templated_test_double = BM_templated_test<std::complex<double>>;
+static const auto BM_templated_test_double =
+    BM_templated_test<std::complex<double>>;
 BENCHMARK(BM_templated_test_double);

 BENCHMARK_MAIN();
--- a/test/complexity_test.cc
+++ b/test/complexity_test.cc
@ -11,7 +11,7 @@
 namespace {

 #define ADD_COMPLEXITY_CASES(...) \
-  int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__)
+  const int CONCAT(dummy, __LINE__) = AddComplexityTest(__VA_ARGS__)

 int AddComplexityTest(const std::string &test_name,
                      const std::string &big_o_test_name,
@ -94,11 +94,11 @@ BENCHMARK(BM_Complexity_O1)
    ->UseManualTime()
    ->Complexity([](benchmark::IterationCount) { return 1.0; });

-const char *one_test_name = "BM_Complexity_O1/manual_time";
-const char *big_o_1_test_name = "BM_Complexity_O1/manual_time_BigO";
-const char *rms_o_1_test_name = "BM_Complexity_O1/manual_time_RMS";
-const char *enum_auto_big_o_1 = "\\([0-9]+\\)";
-const char *lambda_big_o_1 = "f\\(N\\)";
+constexpr char one_test_name[] = "BM_Complexity_O1/manual_time";
+constexpr char big_o_1_test_name[] = "BM_Complexity_O1/manual_time_BigO";
+constexpr char rms_o_1_test_name[] = "BM_Complexity_O1/manual_time_RMS";
+constexpr char enum_auto_big_o_1[] = "\\([0-9]+\\)";
+constexpr char lambda_big_o_1[] = "f\\(N\\)";

 // Add enum tests
 ADD_COMPLEXITY_CASES(one_test_name, big_o_1_test_name, rms_o_1_test_name,
@ -151,11 +151,11 @@ BENCHMARK(BM_Complexity_O_N)
      return static_cast<double>(n);
    });

-const char *n_test_name = "BM_Complexity_O_N/manual_time";
-const char *big_o_n_test_name = "BM_Complexity_O_N/manual_time_BigO";
-const char *rms_o_n_test_name = "BM_Complexity_O_N/manual_time_RMS";
-const char *enum_auto_big_o_n = "N";
-const char *lambda_big_o_n = "f\\(N\\)";
+constexpr char n_test_name[] = "BM_Complexity_O_N/manual_time";
+constexpr char big_o_n_test_name[] = "BM_Complexity_O_N/manual_time_BigO";
+constexpr char rms_o_n_test_name[] = "BM_Complexity_O_N/manual_time_RMS";
+constexpr char enum_auto_big_o_n[] = "N";
+constexpr char lambda_big_o_n[] = "f\\(N\\)";

 // Add enum tests
 ADD_COMPLEXITY_CASES(n_test_name, big_o_n_test_name, rms_o_n_test_name,
@ -209,11 +209,13 @@ BENCHMARK(BM_Complexity_O_N_log_N)
      return kLog2E * static_cast<double>(n) * std::log(static_cast<double>(n));
    });

-const char *n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time";
-const char *big_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time_BigO";
-const char *rms_o_n_lg_n_test_name = "BM_Complexity_O_N_log_N/manual_time_RMS";
-const char *enum_auto_big_o_n_lg_n = "NlgN";
-const char *lambda_big_o_n_lg_n = "f\\(N\\)";
+constexpr char n_lg_n_test_name[] = "BM_Complexity_O_N_log_N/manual_time";
+constexpr char big_o_n_lg_n_test_name[] =
+    "BM_Complexity_O_N_log_N/manual_time_BigO";
+constexpr char rms_o_n_lg_n_test_name[] =
+    "BM_Complexity_O_N_log_N/manual_time_RMS";
+constexpr char enum_auto_big_o_n_lg_n[] = "NlgN";
+constexpr char lambda_big_o_n_lg_n[] = "f\\(N\\)";

 // Add enum tests
 ADD_COMPLEXITY_CASES(n_lg_n_test_name, big_o_n_lg_n_test_name,
--- a/test/cxx03_test.cc
+++ b/test/cxx03_test.cc
@ -1,62 +0,0 @@
-#undef NDEBUG
-#include <cassert>
-#include <cstddef>
-
-#include "benchmark/benchmark.h"
-
-#if __cplusplus >= 201103L
-#error C++11 or greater detected. Should be C++03.
-#endif
-
-#ifdef BENCHMARK_HAS_CXX11
-#error C++11 or greater detected by the library. BENCHMARK_HAS_CXX11 is defined.
-#endif
-
-void BM_empty(benchmark::State& state) {
-  while (state.KeepRunning()) {
-    volatile benchmark::IterationCount x = state.iterations();
-    ((void)x);
-  }
-}
-BENCHMARK(BM_empty);
-
-// The new C++11 interface for args/ranges requires initializer list support.
-// Therefore we provide the old interface to support C++03.
-void BM_old_arg_range_interface(benchmark::State& state) {
-  assert((state.range(0) == 1 && state.range(1) == 2) ||
-         (state.range(0) == 5 && state.range(1) == 6));
-  while (state.KeepRunning()) {
-  }
-}
-BENCHMARK(BM_old_arg_range_interface)->ArgPair(1, 2)->RangePair(5, 5, 6, 6);
-
-template <class T, class U>
-void BM_template2(benchmark::State& state) {
-  BM_empty(state);
-}
-BENCHMARK_TEMPLATE2(BM_template2, int, long);
-
-template <class T>
-void BM_template1(benchmark::State& state) {
-  BM_empty(state);
-}
-BENCHMARK_TEMPLATE(BM_template1, long);
-BENCHMARK_TEMPLATE1(BM_template1, int);
-
-template <class T>
-struct BM_Fixture : public ::benchmark::Fixture {};
-
-BENCHMARK_TEMPLATE_F(BM_Fixture, BM_template1, long)(benchmark::State& state) {
-  BM_empty(state);
-}
-BENCHMARK_TEMPLATE1_F(BM_Fixture, BM_template2, int)(benchmark::State& state) {
-  BM_empty(state);
-}
-
-void BM_counters(benchmark::State& state) {
-  BM_empty(state);
-  state.counters["Foo"] = 2;
-}
-BENCHMARK(BM_counters);
-
-BENCHMARK_MAIN();
--- a/test/cxx11_test.cc
+++ b/test/cxx11_test.cc
@ -0,0 +1,12 @@
+#include "benchmark/benchmark.h"
+
+#if defined(_MSC_VER)
+#if _MSVC_LANG != 201402L
+// MSVC, even in C++11 mode, dooes not claim to be in C++11 mode.
+#error "Trying to compile C++11 test with wrong C++ standard"
+#endif  //  _MSVC_LANG
+#else   // Non-MSVC
+#if __cplusplus != 201103L
+#error "Trying to compile C++11 test with wrong C++ standard"
+#endif  // Non-MSVC
+#endif
--- a/test/diagnostics_test.cc
+++ b/test/diagnostics_test.cc
@ -46,14 +46,19 @@ void try_invalid_pause_resume(benchmark::State& state) {
 void BM_diagnostic_test(benchmark::State& state) {
  static bool called_once = false;

-  if (called_once == false) try_invalid_pause_resume(state);
+  if (!called_once) {
+    try_invalid_pause_resume(state);
+  }

  for (auto _ : state) {
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }

-  if (called_once == false) try_invalid_pause_resume(state);
+  if (!called_once) {
+    try_invalid_pause_resume(state);
+  }

  called_once = true;
 }
@ -62,14 +67,19 @@ BENCHMARK(BM_diagnostic_test);
 void BM_diagnostic_test_keep_running(benchmark::State& state) {
  static bool called_once = false;

-  if (called_once == false) try_invalid_pause_resume(state);
+  if (!called_once) {
+    try_invalid_pause_resume(state);
+  }

  while (state.KeepRunning()) {
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }

-  if (called_once == false) try_invalid_pause_resume(state);
+  if (!called_once) {
+    try_invalid_pause_resume(state);
+  }

  called_once = true;
 }
@ -80,7 +90,7 @@ int main(int argc, char* argv[]) {
  // This test is exercising functionality for debug builds, which are not
  // available in release builds. Skip the test if we are in that environment
  // to avoid a test failure.
-  std::cout << "Diagnostic test disabled in release build" << std::endl;
+  std::cout << "Diagnostic test disabled in release build\n";
  (void)argc;
  (void)argv;
 #else
--- a/test/donotoptimize_test.cc
+++ b/test/donotoptimize_test.cc
@ -4,7 +4,7 @@

 namespace {
 #if defined(__GNUC__)
-std::int64_t double_up(const std::int64_t x) __attribute__((const));
+std::int64_t double_up(std::int64_t x) __attribute__((const));
 #endif
 std::int64_t double_up(const std::int64_t x) { return x * 2; }
 }  // namespace
@ -26,7 +26,7 @@ struct BitRef {
  BitRef(int i, unsigned char& b) : index(i), byte(b) {}
 };

-int main(int, char*[]) {
+int main(int /*unused*/, char* /*unused*/[]) {
  // this test verifies compilation of DoNotOptimize() for some types

  char buffer1[1] = "";
@ -62,8 +62,6 @@ int main(int, char*[]) {
  BitRef lval = BitRef::Make();
  benchmark::DoNotOptimize(lval);

-#ifdef BENCHMARK_HAS_CXX11
  // Check that accept rvalue.
  benchmark::DoNotOptimize(BitRef::Make());
-#endif
 }
--- a/test/filter_test.cc
+++ b/test/filter_test.cc
@ -71,9 +71,10 @@ BENCHMARK(BM_FooBa);

 int main(int argc, char** argv) {
  bool list_only = false;
-  for (int i = 0; i < argc; ++i)
+  for (int i = 0; i < argc; ++i) {
    list_only |= std::string(argv[i]).find("--benchmark_list_tests") !=
                 std::string::npos;
+  }

  benchmark::Initialize(&argc, argv);

@ -84,13 +85,13 @@ int main(int argc, char** argv) {
  if (argc == 2) {
    // Make sure we ran all of the tests
    std::stringstream ss(argv[1]);
-    int64_t expected_return;
+    int64_t expected_return = 0;
    ss >> expected_return;

    if (returned_count != expected_return) {
      std::cerr << "ERROR: Expected " << expected_return
                << " tests to match the filter but returned_count = "
-                << returned_count << std::endl;
+                << returned_count << '\n';
      return -1;
    }

@ -99,7 +100,7 @@ int main(int argc, char** argv) {
    if (reports_count != expected_reports) {
      std::cerr << "ERROR: Expected " << expected_reports
                << " tests to be run but reported_count = " << reports_count
-                << std::endl;
+                << '\n';
      return -1;
    }

@ -108,7 +109,7 @@ int main(int argc, char** argv) {
    if (num_families != expected_reports) {
      std::cerr << "ERROR: Expected " << expected_reports
                << " test families to be run but num_families = "
-                << num_families << std::endl;
+                << num_families << '\n';
      return -1;
    }
  }
--- a/test/internal_threading_test.cc
+++ b/test/internal_threading_test.cc
@ -22,8 +22,9 @@ void MyBusySpinwait() {
    const auto elapsed = now - start;

    if (std::chrono::duration<double, std::chrono::seconds::period>(elapsed) >=
-        time_frame)
+        time_frame) {
      return;
+    }
  }
 }

--- a/test/link_main_test.cc
+++ b/test/link_main_test.cc
@ -2,7 +2,8 @@

 void BM_empty(benchmark::State& state) {
  for (auto _ : state) {
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }
 }
--- a/test/manual_threading_test.cc
+++ b/test/manual_threading_test.cc
@ -0,0 +1,174 @@
+
+#include <memory>
+#undef NDEBUG
+
+#include <chrono>
+#include <thread>
+
+#include "../src/timers.h"
+#include "benchmark/benchmark.h"
+
+namespace {
+
+const std::chrono::duration<double, std::milli> time_frame(50);
+const double time_frame_in_sec(
+    std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1>>>(
+        time_frame)
+        .count());
+
+void MyBusySpinwait() {
+  const auto start = benchmark::ChronoClockNow();
+
+  while (true) {
+    const auto now = benchmark::ChronoClockNow();
+    const auto elapsed = now - start;
+
+    if (std::chrono::duration<double, std::chrono::seconds::period>(elapsed) >=
+        time_frame) {
+      return;
+    }
+  }
+}
+
+int numRunThreadsCalled_ = 0;
+
+class ManualThreadRunner : public benchmark::ThreadRunnerBase {
+ public:
+  explicit ManualThreadRunner(int num_threads)
+      : pool(static_cast<size_t>(num_threads - 1)) {}
+
+  void RunThreads(const std::function<void(int)>& fn) final {
+    for (std::size_t ti = 0; ti < pool.size(); ++ti) {
+      pool[ti] = std::thread(fn, static_cast<int>(ti + 1));
+    }
+
+    fn(0);
+
+    for (std::thread& thread : pool) {
+      thread.join();
+    }
+
+    ++numRunThreadsCalled_;
+  }
+
+ private:
+  std::vector<std::thread> pool;
+};
+
+// ========================================================================= //
+// --------------------------- TEST CASES BEGIN ---------------------------- //
+// ========================================================================= //
+
+// ========================================================================= //
+// BM_ManualThreading
+// Creation of threads is done before the start of the measurement,
+// joining after the finish of the measurement.
+void BM_ManualThreading(benchmark::State& state) {
+  for (auto _ : state) {
+    MyBusySpinwait();
+    state.SetIterationTime(time_frame_in_sec);
+  }
+  state.counters["invtime"] =
+      benchmark::Counter{1, benchmark::Counter::kIsRate};
+}
+
+}  // end namespace
+
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(1);
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(1)
+    ->UseRealTime();
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(1)
+    ->UseManualTime();
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(1)
+    ->MeasureProcessCPUTime();
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(1)
+    ->MeasureProcessCPUTime()
+    ->UseRealTime();
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(1)
+    ->MeasureProcessCPUTime()
+    ->UseManualTime();
+
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(2);
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(2)
+    ->UseRealTime();
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(2)
+    ->UseManualTime();
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(2)
+    ->MeasureProcessCPUTime();
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(2)
+    ->MeasureProcessCPUTime()
+    ->UseRealTime();
+BENCHMARK(BM_ManualThreading)
+    ->Iterations(1)
+    ->ThreadRunner([](int num_threads) {
+      return std::make_unique<ManualThreadRunner>(num_threads);
+    })
+    ->Threads(2)
+    ->MeasureProcessCPUTime()
+    ->UseManualTime();
+
+// ========================================================================= //
+// ---------------------------- TEST CASES END ----------------------------- //
+// ========================================================================= //
+
+int main(int argc, char* argv[]) {
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+  benchmark::Shutdown();
+  assert(numRunThreadsCalled_ > 0);
+}
--- a/test/map_test.cc
+++ b/test/map_test.cc
@ -39,7 +39,7 @@ class MapFixture : public ::benchmark::Fixture {
    m = ConstructRandomMap(static_cast<int>(st.range(0)));
  }

-  void TearDown(const ::benchmark::State&) override { m.clear(); }
+  void TearDown(const ::benchmark::State& /*unused*/) override { m.clear(); }

  std::map<int, int> m;
 };
--- a/test/memory_manager_test.cc
+++ b/test/memory_manager_test.cc
@ -14,7 +14,8 @@ class TestMemoryManager : public benchmark::MemoryManager {

 void BM_empty(benchmark::State& state) {
  for (auto _ : state) {
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }
 }
--- a/test/memory_results_gtest.cc
+++ b/test/memory_results_gtest.cc
@ -0,0 +1,101 @@
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+using benchmark::ClearRegisteredBenchmarks;
+using benchmark::ConsoleReporter;
+using benchmark::MemoryManager;
+using benchmark::RegisterBenchmark;
+using benchmark::RunSpecifiedBenchmarks;
+using benchmark::State;
+using benchmark::internal::Benchmark;
+
+constexpr int N_REPETITIONS = 100;
+constexpr int N_ITERATIONS = 1;
+
+int num_allocs = 0;
+int max_bytes_used = 0;
+int total_allocated_bytes = 0;
+int net_heap_growth = 0;
+
+void reset() {
+  num_allocs = 0;
+  max_bytes_used = 0;
+  total_allocated_bytes = 0;
+  net_heap_growth = 0;
+}
+class TestMemoryManager : public MemoryManager {
+  void Start() override {}
+  void Stop(Result& result) override {
+    result.num_allocs = num_allocs;
+    result.net_heap_growth = net_heap_growth;
+    result.max_bytes_used = max_bytes_used;
+    result.total_allocated_bytes = total_allocated_bytes;
+
+    num_allocs += 1;
+    max_bytes_used += 2;
+    net_heap_growth += 4;
+    total_allocated_bytes += 10;
+  }
+};
+
+class TestReporter : public ConsoleReporter {
+ public:
+  TestReporter() = default;
+  virtual ~TestReporter() = default;
+
+  bool ReportContext(const Context& /*unused*/) override { return true; }
+
+  void PrintHeader(const Run&) override {}
+  void PrintRunData(const Run& run) override {
+    if (run.repetition_index == -1) return;
+    if (!run.memory_result.memory_iterations) return;
+
+    store.push_back(run.memory_result);
+  }
+
+  std::vector<MemoryManager::Result> store;
+};
+
+class MemoryResultsTest : public testing::Test {
+ public:
+  Benchmark* bm;
+  TestReporter reporter;
+
+  void SetUp() override {
+    bm = RegisterBenchmark("BM", [](State& st) {
+      for (auto _ : st) {
+      }
+    });
+    bm->Repetitions(N_REPETITIONS);
+    bm->Iterations(N_ITERATIONS);
+    reset();
+  }
+  void TearDown() override { ClearRegisteredBenchmarks(); }
+};
+
+TEST_F(MemoryResultsTest, NoMMTest) {
+  RunSpecifiedBenchmarks(&reporter);
+  EXPECT_EQ(reporter.store.size(), 0);
+}
+
+TEST_F(MemoryResultsTest, ResultsTest) {
+  auto mm = std::make_unique<TestMemoryManager>();
+  RegisterMemoryManager(mm.get());
+
+  RunSpecifiedBenchmarks(&reporter);
+  EXPECT_EQ(reporter.store.size(), N_REPETITIONS);
+
+  for (size_t i = 0; i < reporter.store.size(); i++) {
+    EXPECT_EQ(reporter.store[i].num_allocs, static_cast<int64_t>(i));
+    EXPECT_EQ(reporter.store[i].max_bytes_used, static_cast<int64_t>(i) * 2);
+    EXPECT_EQ(reporter.store[i].net_heap_growth, static_cast<int64_t>(i) * 4);
+    EXPECT_EQ(reporter.store[i].total_allocated_bytes,
+              static_cast<int64_t>(i) * 10);
+  }
+}
+
+}  // namespace
--- a/test/output_test.h
+++ b/test/output_test.h
@ -16,12 +16,13 @@
 #define CONCAT2(x, y) x##y
 #define CONCAT(x, y) CONCAT2(x, y)

-#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = ::AddCases(__VA_ARGS__)
+#define ADD_CASES(...) \
+  const int CONCAT(dummy, __LINE__) = ::AddCases(__VA_ARGS__)

 #define SET_SUBSTITUTIONS(...) \
-  int CONCAT(dummy, __LINE__) = ::SetSubstitutions(__VA_ARGS__)
+  const int CONCAT(dummy, __LINE__) = ::SetSubstitutions(__VA_ARGS__)

-enum MatchRules {
+enum MatchRules : uint8_t {
  MR_Default,  // Skip non-matching lines until a match is found.
  MR_Next,     // Match must occur on the next line.
  MR_Not  // No line between the current position and the next match matches
@ -37,7 +38,7 @@ struct TestCase {
  std::shared_ptr<benchmark::Regex> regex;
 };

-enum TestCaseID {
+enum TestCaseID : uint8_t {
  TC_ConsoleOut,
  TC_ConsoleErr,
  TC_JSONOut,
@ -80,7 +81,8 @@ std::string GetFileReporterOutput(int argc, char* argv[]);
 //                  will be the subject of a call to checker_function
 // checker_function: should be of type ResultsCheckFn (see below)
 #define CHECK_BENCHMARK_RESULTS(bm_name_pattern, checker_function) \
-  size_t CONCAT(dummy, __LINE__) = AddChecker(bm_name_pattern, checker_function)
+  const size_t CONCAT(dummy, __LINE__) =                           \
+      AddChecker(bm_name_pattern, checker_function)

 struct Results;
 typedef std::function<void(Results const&)> ResultsCheckFn;
@ -101,7 +103,7 @@ struct Results {

  double NumIterations() const;

-  typedef enum { kCpuTime, kRealTime } BenchmarkTime;
+  typedef enum : uint8_t { kCpuTime, kRealTime } BenchmarkTime;

  // get cpu_time or real_time in seconds
  double GetTime(BenchmarkTime which) const;
--- a/test/output_test_helper.cc
+++ b/test/output_test_helper.cc
@ -83,7 +83,7 @@ std::string PerformSubstitutions(std::string source) {
  SubMap const& subs = GetSubstitutions();
  using SizeT = std::string::size_type;
  for (auto const& KV : subs) {
-    SizeT pos;
+    SizeT pos = 0;
    SizeT next_start = 0;
    while ((pos = source.find(KV.first, next_start)) != std::string::npos) {
      next_start = pos + KV.second.size();
@ -98,7 +98,7 @@ void CheckCase(std::stringstream& remaining_output, TestCase const& TC,
  std::string first_line;
  bool on_first = true;
  std::string line;
-  while (remaining_output.eof() == false) {
+  while (!remaining_output.eof()) {
    BM_CHECK(remaining_output.good());
    std::getline(remaining_output, line);
    if (on_first) {
@ -112,7 +112,9 @@ void CheckCase(std::stringstream& remaining_output, TestCase const& TC,
          << "\n    actual regex string \"" << TC.substituted_regex << "\""
          << "\n    started matching near: " << first_line;
    }
-    if (TC.regex->Match(line)) return;
+    if (TC.regex->Match(line)) {
+      return;
+    }
    BM_CHECK(TC.match_rule != MR_Next)
        << "Expected line \"" << line << "\" to match regex \"" << TC.regex_str
        << "\""
@ -147,7 +149,7 @@ class TestReporter : public benchmark::BenchmarkReporter {
  bool ReportContext(const Context& context) override {
    bool last_ret = false;
    bool first = true;
-    for (auto rep : reporters_) {
+    for (auto* rep : reporters_) {
      bool new_ret = rep->ReportContext(context);
      BM_CHECK(first || new_ret == last_ret)
          << "Reports return different values for ReportContext";
@ -159,10 +161,14 @@ class TestReporter : public benchmark::BenchmarkReporter {
  }

  void ReportRuns(const std::vector<Run>& report) override {
-    for (auto rep : reporters_) rep->ReportRuns(report);
+    for (auto* rep : reporters_) {
+      rep->ReportRuns(report);
+    }
  }
  void Finalize() override {
-    for (auto rep : reporters_) rep->Finalize();
+    for (auto* rep : reporters_) {
+      rep->Finalize();
+    }
  }

 private:
@ -200,7 +206,7 @@ class ResultsChecker {
  void SetHeader_(const std::string& csv_header);
  void SetValues_(const std::string& entry_csv_line);

-  std::vector<std::string> SplitCsv_(const std::string& line);
+  std::vector<std::string> SplitCsv_(const std::string& line) const;
 };

 // store the static ResultsChecker in a function to prevent initialization
@ -224,14 +230,16 @@ void ResultsChecker::CheckResults(std::stringstream& output) {
    // clear before calling tellg()
    output.clear();
    // seek to zero only when needed
-    if (output.tellg() > start) output.seekg(start);
+    if (output.tellg() > start) {
+      output.seekg(start);
+    }
    // and just in case
    output.clear();
  }
  // now go over every line and publish it to the ResultsChecker
  std::string line;
  bool on_first = true;
-  while (output.eof() == false) {
+  while (!output.eof()) {
    BM_CHECK(output.good());
    std::getline(output, line);
    if (on_first) {
@ -265,7 +273,9 @@ void ResultsChecker::SetHeader_(const std::string& csv_header) {

 // set the values for a benchmark
 void ResultsChecker::SetValues_(const std::string& entry_csv_line) {
-  if (entry_csv_line.empty()) return;  // some lines are empty
+  if (entry_csv_line.empty()) {
+    return;
+  }  // some lines are empty
  BM_CHECK(!field_names.empty());
  auto vals = SplitCsv_(entry_csv_line);
  BM_CHECK_EQ(vals.size(), field_names.size());
@ -277,23 +287,38 @@ void ResultsChecker::SetValues_(const std::string& entry_csv_line) {
 }

 // a quick'n'dirty csv splitter (eliminating quotes)
-std::vector<std::string> ResultsChecker::SplitCsv_(const std::string& line) {
+std::vector<std::string> ResultsChecker::SplitCsv_(
+    const std::string& line) const {
  std::vector<std::string> out;
-  if (line.empty()) return out;
-  if (!field_names.empty()) out.reserve(field_names.size());
-  size_t prev = 0, pos = line.find_first_of(','), curr = pos;
-  while (pos != line.npos) {
+  if (line.empty()) {
+    return out;
+  }
+  if (!field_names.empty()) {
+    out.reserve(field_names.size());
+  }
+  size_t prev = 0;
+  size_t pos = line.find_first_of(',');
+  size_t curr = pos;
+  while (pos != std::string::npos) {
    BM_CHECK(curr > 0);
-    if (line[prev] == '"') ++prev;
-    if (line[curr - 1] == '"') --curr;
+    if (line[prev] == '"') {
+      ++prev;
+    }
+    if (line[curr - 1] == '"') {
+      --curr;
+    }
    out.push_back(line.substr(prev, curr - prev));
    prev = pos + 1;
    pos = line.find_first_of(',', pos + 1);
    curr = pos;
  }
  curr = line.size();
-  if (line[prev] == '"') ++prev;
-  if (line[curr - 1] == '"') --curr;
+  if (line[prev] == '"') {
+    ++prev;
+  }
+  if (line[curr - 1] == '"') {
+    --curr;
+  }
  out.push_back(line.substr(prev, curr - prev));
  return out;
 }
@ -308,7 +333,9 @@ size_t AddChecker(const std::string& bm_name, const ResultsCheckFn& fn) {

 int Results::NumThreads() const {
  auto pos = name.find("/threads:");
-  if (pos == name.npos) return 1;
+  if (pos == std::string::npos) {
+    return 1;
+  }
  auto end = name.find('/', pos + 9);
  std::stringstream ss;
  ss << name.substr(pos + 9, end);
@ -324,7 +351,7 @@ double Results::GetTime(BenchmarkTime which) const {
  BM_CHECK(which == kCpuTime || which == kRealTime);
  const char* which_str = which == kCpuTime ? "cpu_time" : "real_time";
  double val = GetAs<double>(which_str);
-  auto unit = Get("time_unit");
+  const auto* unit = Get("time_unit");
  BM_CHECK(unit);
  if (*unit == "ns") {
    return val * 1.e-9;
@ -378,7 +405,9 @@ int SetSubstitutions(
        break;
      }
    }
-    if (!exists) subs.push_back(std::move(KV));
+    if (!exists) {
+      subs.push_back(std::move(KV));
+    }
  }
  return 0;
 }
@ -449,11 +478,14 @@ void RunOutputTests(int argc, char* argv[]) {
 BENCHMARK_RESTORE_DEPRECATED_WARNING

 int SubstrCnt(const std::string& haystack, const std::string& pat) {
-  if (pat.length() == 0) return 0;
+  if (pat.length() == 0) {
+    return 0;
+  }
  int count = 0;
  for (size_t offset = haystack.find(pat); offset != std::string::npos;
-       offset = haystack.find(pat, offset + pat.length()))
+       offset = haystack.find(pat, offset + pat.length())) {
    ++count;
+  }
  return count;
 }

@ -471,7 +503,9 @@ static char RandomHexChar() {
 static std::string GetRandomFileName() {
  std::string model = "test.%%%%%%";
  for (auto& ch : model) {
-    if (ch == '%') ch = RandomHexChar();
+    if (ch == '%') {
+      ch = RandomHexChar();
+    }
  }
  return model;
 }
@ -486,12 +520,15 @@ static std::string GetTempFileName() {
  // create the same file at the same time. However, it still introduces races
  // similar to tmpnam.
  int retries = 3;
-  while (--retries) {
+  while (--retries != 0) {
    std::string name = GetRandomFileName();
-    if (!FileExists(name)) return name;
+    if (!FileExists(name)) {
+      return name;
+    }
  }
-  std::cerr << "Failed to create unique temporary file name" << std::endl;
-  std::abort();
+  std::cerr << "Failed to create unique temporary file name\n";
+  std::flush(std::cerr);
+  std::exit(1);
 }

 std::string GetFileReporterOutput(int argc, char* argv[]) {
@ -505,7 +542,7 @@ std::string GetFileReporterOutput(int argc, char* argv[]) {
  tmp += tmp_file_name;
  new_argv.emplace_back(const_cast<char*>(tmp.c_str()));

-  argc = int(new_argv.size());
+  argc = static_cast<int>(new_argv.size());

  benchmark::Initialize(&argc, new_argv.data());
  benchmark::RunSpecifiedBenchmarks();
--- a/test/perf_counters_gtest.cc
+++ b/test/perf_counters_gtest.cc
@ -226,9 +226,13 @@ void measure(size_t threadcount, PerfCounterValues* before,
  // threadpool.
  auto counters =
      PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
-  for (auto& t : threads) t = std::thread(work);
+  for (auto& t : threads) {
+    t = std::thread(work);
+  }
  counters.Snapshot(before);
-  for (auto& t : threads) t.join();
+  for (auto& t : threads) {
+    t.join();
+  }
  counters.Snapshot(after);
 }

--- a/test/profiler_manager_gtest.cc
+++ b/test/profiler_manager_gtest.cc
@ -0,0 +1,42 @@
+#include <memory>
+
+#include "benchmark/benchmark.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+class TestProfilerManager : public benchmark::ProfilerManager {
+ public:
+  void AfterSetupStart() override { ++start_called; }
+  void BeforeTeardownStop() override { ++stop_called; }
+
+  int start_called = 0;
+  int stop_called = 0;
+};
+
+void BM_empty(benchmark::State& state) {
+  for (auto _ : state) {
+    auto iterations = state.iterations();
+    benchmark::DoNotOptimize(iterations);
+  }
+}
+BENCHMARK(BM_empty);
+
+TEST(ProfilerManager, ReregisterManager) {
+#if GTEST_HAS_DEATH_TEST
+  // Tests only runnable in debug mode (when BM_CHECK is enabled).
+#ifndef NDEBUG
+#ifndef TEST_BENCHMARK_LIBRARY_HAS_NO_ASSERTIONS
+  ASSERT_DEATH_IF_SUPPORTED(
+      {
+        std::unique_ptr<TestProfilerManager> pm(new TestProfilerManager());
+        benchmark::RegisterProfilerManager(pm.get());
+        benchmark::RegisterProfilerManager(pm.get());
+      },
+      "RegisterProfilerManager");
+#endif
+#endif
+#endif
+}
+
+}  // namespace
--- a/test/profiler_manager_iterations_test.cc
+++ b/test/profiler_manager_iterations_test.cc
@ -0,0 +1,62 @@
+#include <cassert>
+#include <cstdlib>
+#include <memory>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+
+// Tests that we can specify the number of profiler iterations with
+// --benchmark_min_time=<NUM>x.
+namespace {
+
+int iteration_count = 0;
+int end_profiler_iteration_count = 0;
+
+class TestProfilerManager : public benchmark::ProfilerManager {
+  void AfterSetupStart() override { iteration_count = 0; }
+  void BeforeTeardownStop() override {
+    end_profiler_iteration_count = iteration_count;
+  }
+};
+
+class NullReporter : public benchmark::BenchmarkReporter {
+ public:
+  bool ReportContext(const Context& /*context*/) override { return true; }
+  void ReportRuns(const std::vector<Run>& /* report */) override {}
+};
+
+}  // end namespace
+
+static void BM_MyBench(benchmark::State& state) {
+  for (auto s : state) {
+    ++iteration_count;
+  }
+}
+BENCHMARK(BM_MyBench);
+
+int main(int argc, char** argv) {
+  // Make a fake argv and append the new --benchmark_profiler_iterations=<foo>
+  // to it.
+  int fake_argc = argc + 1;
+  std::vector<const char*> fake_argv(static_cast<size_t>(fake_argc));
+  for (size_t i = 0; i < static_cast<size_t>(argc); ++i) {
+    fake_argv[i] = argv[i];
+  }
+  fake_argv[static_cast<size_t>(argc)] = "--benchmark_min_time=4x";
+
+  std::unique_ptr<benchmark::ProfilerManager> pm(new TestProfilerManager());
+  benchmark::RegisterProfilerManager(pm.get());
+
+  benchmark::Initialize(&fake_argc, const_cast<char**>(fake_argv.data()));
+
+  NullReporter null_reporter;
+  const size_t returned_count =
+      benchmark::RunSpecifiedBenchmarks(&null_reporter, "BM_MyBench");
+  assert(returned_count == 1);
+
+  // Check the executed iters.
+  assert(end_profiler_iteration_count == 4);
+
+  benchmark::RegisterProfilerManager(nullptr);
+  return 0;
+}
--- a/test/profiler_manager_test.cc
+++ b/test/profiler_manager_test.cc
@ -1,5 +1,6 @@
 // FIXME: WIP

+#include <cassert>
 #include <memory>

 #include "benchmark/benchmark.h"
--- a/test/register_benchmark_test.cc
+++ b/test/register_benchmark_test.cc
@ -53,11 +53,12 @@ int AddCases(std::initializer_list<TestCase> const& v) {

 #define CONCAT(x, y) CONCAT2(x, y)
 #define CONCAT2(x, y) x##y
-#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__})
+#define ADD_CASES(...) \
+  const int CONCAT(dummy, __LINE__) = AddCases({__VA_ARGS__})

 }  // end namespace

-typedef benchmark::internal::Benchmark* ReturnVal;
+using ReturnVal = benchmark::internal::Benchmark const* const;

 //----------------------------------------------------------------------------//
 // Test RegisterBenchmark with no additional arguments
@ -76,7 +77,6 @@ ADD_CASES({"BM_function"}, {"BM_function_manual_registration"});
 // Note: GCC <= 4.8 do not support this form of RegisterBenchmark because they
 //       reject the variadic pack expansion of lambda captures.
 //----------------------------------------------------------------------------//
-#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK

 void BM_extra_args(benchmark::State& st, const char* label) {
  for (auto _ : st) {
@ -86,15 +86,14 @@ void BM_extra_args(benchmark::State& st, const char* label) {
 int RegisterFromFunction() {
  std::pair<const char*, const char*> cases[] = {
      {"test1", "One"}, {"test2", "Two"}, {"test3", "Three"}};
-  for (auto const& c : cases)
+  for (auto const& c : cases) {
    benchmark::RegisterBenchmark(c.first, &BM_extra_args, c.second);
+  }
  return 0;
 }
-int dummy2 = RegisterFromFunction();
+const int dummy2 = RegisterFromFunction();
 ADD_CASES({"test1", "One"}, {"test2", "Two"}, {"test3", "Three"});

-#endif  // BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
-
 //----------------------------------------------------------------------------//
 // Test RegisterBenchmark with DISABLED_ benchmark
 //----------------------------------------------------------------------------//
@ -119,14 +118,11 @@ struct CustomFixture {
 };

 void TestRegistrationAtRuntime() {
-#ifdef BENCHMARK_HAS_CXX11
  {
    CustomFixture fx;
    benchmark::RegisterBenchmark("custom_fixture", fx);
    AddCases({std::string("custom_fixture")});
  }
-#endif
-#ifndef BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
  {
    const char* x = "42";
    auto capturing_lam = [=](benchmark::State& st) {
@ -137,7 +133,6 @@ void TestRegistrationAtRuntime() {
    benchmark::RegisterBenchmark("lambda_benchmark", capturing_lam);
    AddCases({{"lambda_benchmark", x}});
  }
-#endif
 }

 // Test that all benchmarks, registered at either during static init or runtime,
@ -163,7 +158,7 @@ void RunTestOne() {
 // benchmarks.
 // Also test that new benchmarks can be registered and ran afterwards.
 void RunTestTwo() {
-  assert(ExpectedResults.size() != 0 &&
+  assert(!ExpectedResults.empty() &&
         "must have at least one registered benchmark");
  ExpectedResults.clear();
  benchmark::ClearRegisteredBenchmarks();
--- a/test/reporter_output_test.cc
+++ b/test/reporter_output_test.cc
@ -60,7 +60,7 @@ static int AddContextCases() {
  AddCases(TC_JSONOut, {{"\"json_schema_version\": 1$", MR_Next}});
  return 0;
 }
-int dummy_register = AddContextCases();
+const int dummy_register = AddContextCases();
 ADD_CASES(TC_CSVOut, {{"%csv_header"}});

 // ========================================================================= //
@ -96,7 +96,8 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_basic\",%csv_report$"}});
 void BM_bytes_per_second(benchmark::State& state) {
  for (auto _ : state) {
    // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }
  state.SetBytesProcessed(1);
@ -128,7 +129,8 @@ ADD_CASES(TC_CSVOut, {{"^\"BM_bytes_per_second\",%csv_bytes_report$"}});
 void BM_items_per_second(benchmark::State& state) {
  for (auto _ : state) {
    // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }
  state.SetItemsProcessed(1);
@ -409,7 +411,8 @@ ADD_CASES(TC_ConsoleOut, {{"^BM_BigArgs/1073741824 %console_report$"},
 void BM_Complexity_O1(benchmark::State& state) {
  for (auto _ : state) {
    // This test requires a non-zero CPU time to avoid divide-by-zero
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }
  state.SetComplexityN(state.range(0));
--- a/test/skip_with_error_test.cc
+++ b/test/skip_with_error_test.cc
@ -46,6 +46,7 @@ struct TestCase {
  }
 };

+// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
 std::vector<TestCase> ExpectedResults;

 int AddCases(const std::string& base_name,
@ -59,7 +60,7 @@ int AddCases(const std::string& base_name,

 #define CONCAT(x, y) CONCAT2(x, y)
 #define CONCAT2(x, y) x##y
-#define ADD_CASES(...) int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)
+#define ADD_CASES(...) const int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)

 }  // end namespace

@ -97,11 +98,11 @@ BENCHMARK(BM_error_before_running_range_for);
 ADD_CASES("BM_error_before_running_range_for", {{"", true, "error message"}});

 void BM_error_during_running(benchmark::State& state) {
-  int first_iter = true;
+  int first_iter = 1;
  while (state.KeepRunning()) {
    if (state.range(0) == 1 && state.thread_index() <= (state.threads() / 2)) {
      assert(first_iter);
-      first_iter = false;
+      first_iter = 0;
      state.SkipWithError("error message");
    } else {
      state.PauseTiming();
@ -143,11 +144,13 @@ ADD_CASES("BM_error_during_running_ranged_for",

 void BM_error_after_running(benchmark::State& state) {
  for (auto _ : state) {
-    auto iterations = double(state.iterations()) * double(state.iterations());
+    auto iterations = static_cast<double>(state.iterations()) *
+                      static_cast<double>(state.iterations());
    benchmark::DoNotOptimize(iterations);
  }
-  if (state.thread_index() <= (state.threads() / 2))
+  if (state.thread_index() <= (state.threads() / 2)) {
    state.SkipWithError("error message");
+  }
 }
 BENCHMARK(BM_error_after_running)->ThreadRange(1, 8);
 ADD_CASES("BM_error_after_running", {{"/threads:1", true, "error message"},
--- a/test/string_util_gtest.cc
+++ b/test/string_util_gtest.cc
@ -13,18 +13,18 @@ namespace {
 TEST(StringUtilTest, stoul) {
  {
    size_t pos = 0;
-    EXPECT_EQ(0ul, benchmark::stoul("0", &pos));
-    EXPECT_EQ(1ul, pos);
+    EXPECT_EQ(0UL, benchmark::stoul("0", &pos));
+    EXPECT_EQ(1UL, pos);
  }
  {
    size_t pos = 0;
-    EXPECT_EQ(7ul, benchmark::stoul("7", &pos));
-    EXPECT_EQ(1ul, pos);
+    EXPECT_EQ(7UL, benchmark::stoul("7", &pos));
+    EXPECT_EQ(1UL, pos);
  }
  {
    size_t pos = 0;
-    EXPECT_EQ(135ul, benchmark::stoul("135", &pos));
-    EXPECT_EQ(3ul, pos);
+    EXPECT_EQ(135UL, benchmark::stoul("135", &pos));
+    EXPECT_EQ(3UL, pos);
  }
 #if ULONG_MAX == 0xFFFFFFFFul
  {
@ -35,35 +35,35 @@ TEST(StringUtilTest, stoul) {
 #elif ULONG_MAX == 0xFFFFFFFFFFFFFFFFul
  {
    size_t pos = 0;
-    EXPECT_EQ(0xFFFFFFFFFFFFFFFFul,
+    EXPECT_EQ(0xFFFFFFFFFFFFFFFFUL,
              benchmark::stoul("18446744073709551615", &pos));
-    EXPECT_EQ(20ul, pos);
+    EXPECT_EQ(20UL, pos);
  }
 #endif
  {
    size_t pos = 0;
-    EXPECT_EQ(10ul, benchmark::stoul("1010", &pos, 2));
-    EXPECT_EQ(4ul, pos);
+    EXPECT_EQ(10UL, benchmark::stoul("1010", &pos, 2));
+    EXPECT_EQ(4UL, pos);
  }
  {
    size_t pos = 0;
-    EXPECT_EQ(520ul, benchmark::stoul("1010", &pos, 8));
-    EXPECT_EQ(4ul, pos);
+    EXPECT_EQ(520UL, benchmark::stoul("1010", &pos, 8));
+    EXPECT_EQ(4UL, pos);
  }
  {
    size_t pos = 0;
-    EXPECT_EQ(1010ul, benchmark::stoul("1010", &pos, 10));
-    EXPECT_EQ(4ul, pos);
+    EXPECT_EQ(1010UL, benchmark::stoul("1010", &pos, 10));
+    EXPECT_EQ(4UL, pos);
  }
  {
    size_t pos = 0;
-    EXPECT_EQ(4112ul, benchmark::stoul("1010", &pos, 16));
-    EXPECT_EQ(4ul, pos);
+    EXPECT_EQ(4112UL, benchmark::stoul("1010", &pos, 16));
+    EXPECT_EQ(4UL, pos);
  }
  {
    size_t pos = 0;
-    EXPECT_EQ(0xBEEFul, benchmark::stoul("BEEF", &pos, 16));
-    EXPECT_EQ(4ul, pos);
+    EXPECT_EQ(0xBEEFUL, benchmark::stoul("BEEF", &pos, 16));
+    EXPECT_EQ(4UL, pos);
  }
 #ifndef BENCHMARK_HAS_NO_EXCEPTIONS
  {
@ -73,83 +73,87 @@ TEST(StringUtilTest, stoul) {
 #endif
 }

-TEST(StringUtilTest, stoi){{size_t pos = 0;
-EXPECT_EQ(0, benchmark::stoi("0", &pos));
-EXPECT_EQ(1ul, pos);
-}  // namespace
-{
-  size_t pos = 0;
-  EXPECT_EQ(-17, benchmark::stoi("-17", &pos));
-  EXPECT_EQ(3ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(1357, benchmark::stoi("1357", &pos));
-  EXPECT_EQ(4ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2));
-  EXPECT_EQ(4ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8));
-  EXPECT_EQ(4ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10));
-  EXPECT_EQ(4ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16));
-  EXPECT_EQ(4ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16));
-  EXPECT_EQ(4ul, pos);
-}
+TEST(StringUtilTest, stoi) {
+  {
+    size_t pos = 0;
+    EXPECT_EQ(0, benchmark::stoi("0", &pos));
+    EXPECT_EQ(1UL, pos);
+  }  // namespace
+  {
+    size_t pos = 0;
+    EXPECT_EQ(-17, benchmark::stoi("-17", &pos));
+    EXPECT_EQ(3UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(1357, benchmark::stoi("1357", &pos));
+    EXPECT_EQ(4UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(10, benchmark::stoi("1010", &pos, 2));
+    EXPECT_EQ(4UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(520, benchmark::stoi("1010", &pos, 8));
+    EXPECT_EQ(4UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(1010, benchmark::stoi("1010", &pos, 10));
+    EXPECT_EQ(4UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(4112, benchmark::stoi("1010", &pos, 16));
+    EXPECT_EQ(4UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(0xBEEF, benchmark::stoi("BEEF", &pos, 16));
+    EXPECT_EQ(4UL, pos);
+  }
 #ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-{
-  ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"),
-               std::invalid_argument);
-}
+  {
+    ASSERT_THROW(std::ignore = benchmark::stoi("this is a test"),
+                 std::invalid_argument);
+  }
 #endif
 }

-TEST(StringUtilTest, stod){{size_t pos = 0;
-EXPECT_EQ(0.0, benchmark::stod("0", &pos));
-EXPECT_EQ(1ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(-84.0, benchmark::stod("-84", &pos));
-  EXPECT_EQ(3ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(1234.0, benchmark::stod("1234", &pos));
-  EXPECT_EQ(4ul, pos);
-}
-{
-  size_t pos = 0;
-  EXPECT_EQ(1.5, benchmark::stod("1.5", &pos));
-  EXPECT_EQ(3ul, pos);
-}
-{
-  size_t pos = 0;
-  /* Note: exactly representable as double */
-  EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos));
-  EXPECT_EQ(8ul, pos);
-}
+TEST(StringUtilTest, stod) {
+  {
+    size_t pos = 0;
+    EXPECT_EQ(0.0, benchmark::stod("0", &pos));
+    EXPECT_EQ(1UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(-84.0, benchmark::stod("-84", &pos));
+    EXPECT_EQ(3UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(1234.0, benchmark::stod("1234", &pos));
+    EXPECT_EQ(4UL, pos);
+  }
+  {
+    size_t pos = 0;
+    EXPECT_EQ(1.5, benchmark::stod("1.5", &pos));
+    EXPECT_EQ(3UL, pos);
+  }
+  {
+    size_t pos = 0;
+    /* Note: exactly representable as double */
+    EXPECT_EQ(-1.25e+9, benchmark::stod("-1.25e+9", &pos));
+    EXPECT_EQ(8UL, pos);
+  }
 #ifndef BENCHMARK_HAS_NO_EXCEPTIONS
-{
-  ASSERT_THROW(std::ignore = benchmark::stod("this is a test"),
-               std::invalid_argument);
-}
+  {
+    ASSERT_THROW(std::ignore = benchmark::stod("this is a test"),
+                 std::invalid_argument);
+  }
 #endif
 }

--- a/test/templated_fixture_method_test.cc
+++ b/test/templated_fixture_method_test.cc
@ -0,0 +1,26 @@
+
+#include <cassert>
+#include <memory>
+
+#include "benchmark/benchmark.h"
+
+template <typename T>
+class MyFixture : public ::benchmark::Fixture {
+ public:
+  MyFixture() : data(0) {}
+
+  T data;
+
+  using type = T;
+};
+
+BENCHMARK_TEMPLATE_METHOD_F(MyFixture, Foo)(benchmark::State& st) {
+  for (auto _ : st) {
+    this->data += typename Base::type(1);
+  }
+}
+
+BENCHMARK_TEMPLATE_INSTANTIATE_F(MyFixture, Foo, int);
+BENCHMARK_TEMPLATE_INSTANTIATE_F(MyFixture, Foo, double);
+
+BENCHMARK_MAIN();
--- a/Show More
+++ b/Show More