diff --git a/src/benchmark.cc b/src/benchmark.cc index 974cde6c..6139e59d 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -179,6 +179,17 @@ State::State(std::string name, IterationCount max_iters, BM_CHECK_LT(thread_index_, threads_) << "thread_index must be less than threads"; + // Add counters with correct flag now. If added with `counters[name]` in + // `PauseTiming`, a new `Counter` will be inserted the first time, which + // won't have the flag. Inserting them now also reduces the allocations + // during the benchmark. + if (perf_counters_measurement_) { + for (const std::string& counter_name : + perf_counters_measurement_->names()) { + counters[counter_name] = Counter(0.0, Counter::kAvgIterations); + } + } + // Note: The use of offsetof below is technically undefined until C++17 // because State is not a standard layout type. However, all compilers // currently provide well-defined behavior as an extension (which is @@ -227,9 +238,11 @@ void State::PauseTiming() { BM_CHECK(false) << "Perf counters read the value failed."; } for (const auto& name_and_measurement : measurements) { - auto name = name_and_measurement.first; - auto measurement = name_and_measurement.second; - counters[name] += Counter(measurement, Counter::kAvgIterations); + const std::string& name = name_and_measurement.first; + const double measurement = name_and_measurement.second; + // Counter was inserted with `kAvgIterations` flag by the constructor. + assert(counters.find(name) != counters.end()); + counters[name].value += measurement; } } } diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc index f2ef9be2..b0a3ab06 100644 --- a/test/perf_counters_test.cc +++ b/test/perf_counters_test.cc @@ -66,22 +66,17 @@ static void CheckSimple(Results const& e) { double withoutPauseResumeInstrCount = 0.0; double withPauseResumeInstrCount = 0.0; -static void CheckInstrCount(double* counter, Results const& e) { - BM_CHECK_GT(e.NumIterations(), 0); - *counter = e.GetAs("INSTRUCTIONS") / e.NumIterations(); +static void SaveInstrCountWithoutResume(Results const& e) { + withoutPauseResumeInstrCount = e.GetAs("INSTRUCTIONS"); } -static void CheckInstrCountWithoutResume(Results const& e) { - CheckInstrCount(&withoutPauseResumeInstrCount, e); -} - -static void CheckInstrCountWithResume(Results const& e) { - CheckInstrCount(&withPauseResumeInstrCount, e); +static void SaveInstrCountWithResume(Results const& e) { + withPauseResumeInstrCount = e.GetAs("INSTRUCTIONS"); } CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple); -CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &CheckInstrCountWithoutResume); -CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &CheckInstrCountWithResume); +CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &SaveInstrCountWithoutResume); +CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &SaveInstrCountWithResume); int main(int argc, char* argv[]) { if (!benchmark::internal::PerfCounters::kSupported) {