From e73915667c21faccd7019c6da8ab083b0264db13 Mon Sep 17 00:00:00 2001 From: Jesse Rosenstock Date: Mon, 21 Aug 2023 16:35:42 +0200 Subject: [PATCH] State: Initialize counters with kAvgIteration in constructor (#1652) * State: Initialize counters with kAvgIteration in constructor Previously, `counters` was updated in `PauseTiming()` with `counters[name] += Counter(measurement, kAvgIteration)`. The first `counters[name]` call inserts a counter with no flags. There is no `operator+=` for `Counter`, so the insertion is done by converting the `Counter` to a `double`, then constructing a `Counter` to insert from the `double`, which drops the flags. Pre-insert the `Counter` with the correct flags, then only update `Counter::value`. Introduced in 1c64a36 ([perf-counters] Fix pause/resume (#1643)). * perf_counters_test.cc: Don't divide by iterations Perf counters are now divided by iterations, so dividing again in the test is wrong. * State: Fix shadowed param error * benchmark.cc: Fix clang-tidy error --------- Co-authored-by: dominic <510002+dmah42@users.noreply.github.com> --- src/benchmark.cc | 19 ++++++++++++++++--- test/perf_counters_test.cc | 17 ++++++----------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 974cde6c..6139e59d 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -179,6 +179,17 @@ State::State(std::string name, IterationCount max_iters, BM_CHECK_LT(thread_index_, threads_) << "thread_index must be less than threads"; + // Add counters with correct flag now. If added with `counters[name]` in + // `PauseTiming`, a new `Counter` will be inserted the first time, which + // won't have the flag. Inserting them now also reduces the allocations + // during the benchmark. + if (perf_counters_measurement_) { + for (const std::string& counter_name : + perf_counters_measurement_->names()) { + counters[counter_name] = Counter(0.0, Counter::kAvgIterations); + } + } + // Note: The use of offsetof below is technically undefined until C++17 // because State is not a standard layout type. However, all compilers // currently provide well-defined behavior as an extension (which is @@ -227,9 +238,11 @@ void State::PauseTiming() { BM_CHECK(false) << "Perf counters read the value failed."; } for (const auto& name_and_measurement : measurements) { - auto name = name_and_measurement.first; - auto measurement = name_and_measurement.second; - counters[name] += Counter(measurement, Counter::kAvgIterations); + const std::string& name = name_and_measurement.first; + const double measurement = name_and_measurement.second; + // Counter was inserted with `kAvgIterations` flag by the constructor. + assert(counters.find(name) != counters.end()); + counters[name].value += measurement; } } } diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc index f2ef9be2..b0a3ab06 100644 --- a/test/perf_counters_test.cc +++ b/test/perf_counters_test.cc @@ -66,22 +66,17 @@ static void CheckSimple(Results const& e) { double withoutPauseResumeInstrCount = 0.0; double withPauseResumeInstrCount = 0.0; -static void CheckInstrCount(double* counter, Results const& e) { - BM_CHECK_GT(e.NumIterations(), 0); - *counter = e.GetAs("INSTRUCTIONS") / e.NumIterations(); +static void SaveInstrCountWithoutResume(Results const& e) { + withoutPauseResumeInstrCount = e.GetAs("INSTRUCTIONS"); } -static void CheckInstrCountWithoutResume(Results const& e) { - CheckInstrCount(&withoutPauseResumeInstrCount, e); -} - -static void CheckInstrCountWithResume(Results const& e) { - CheckInstrCount(&withPauseResumeInstrCount, e); +static void SaveInstrCountWithResume(Results const& e) { + withPauseResumeInstrCount = e.GetAs("INSTRUCTIONS"); } CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple); -CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &CheckInstrCountWithoutResume); -CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &CheckInstrCountWithResume); +CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &SaveInstrCountWithoutResume); +CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &SaveInstrCountWithResume); int main(int argc, char* argv[]) { if (!benchmark::internal::PerfCounters::kSupported) {