[perf-counters] Fix pause/resume (#1643)

* [perf-counters] Fix pause/resume

Using `state.PauseTiming() / state.ResumeTiming()` was broken.

Thanks [@virajbshah] for the the repro testcase.

* ran clang-format over the whole perf_counters_test.cc

* Remove check that perf counters are 0 on `Pause`, since `Pause`/`Resume`
sequences would cause a non-0 counter value

* both upper and lower bound for the with/without resume counters

---------

Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
This commit is contained in:
Mircea Trofin 2023-08-11 04:46:36 -07:00 committed by GitHub
parent cbecc8ffc7
commit 1c64a36c5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 5 deletions

View File

@ -229,8 +229,7 @@ void State::PauseTiming() {
for (const auto& name_and_measurement : measurements) { for (const auto& name_and_measurement : measurements) {
auto name = name_and_measurement.first; auto name = name_and_measurement.first;
auto measurement = name_and_measurement.second; auto measurement = name_and_measurement.second;
BM_CHECK_EQ(std::fpclassify(double{counters[name]}), FP_ZERO); counters[name] += Counter(measurement, Counter::kAvgIterations);
counters[name] = Counter(measurement, Counter::kAvgIterations);
} }
} }
} }

View File

@ -1,8 +1,8 @@
#include <cstdarg>
#undef NDEBUG #undef NDEBUG
#include "../src/perf_counters.h"
#include "../src/commandlineflags.h" #include "../src/commandlineflags.h"
#include "../src/perf_counters.h"
#include "benchmark/benchmark.h" #include "benchmark/benchmark.h"
#include "output_test.h" #include "output_test.h"
@ -21,17 +21,78 @@ static void BM_Simple(benchmark::State& state) {
BENCHMARK(BM_Simple); BENCHMARK(BM_Simple);
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}}); ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}});
const int kIters = 1000000;
void BM_WithoutPauseResume(benchmark::State& state) {
int n = 0;
for (auto _ : state) {
for (auto i = 0; i < kIters; ++i) {
n = 1 - n;
benchmark::DoNotOptimize(n);
}
}
}
BENCHMARK(BM_WithoutPauseResume);
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithoutPauseResume\",$"}});
void BM_WithPauseResume(benchmark::State& state) {
int m = 0, n = 0;
for (auto _ : state) {
for (auto i = 0; i < kIters; ++i) {
n = 1 - n;
benchmark::DoNotOptimize(n);
}
state.PauseTiming();
for (auto j = 0; j < kIters; ++j) {
m = 1 - m;
benchmark::DoNotOptimize(m);
}
state.ResumeTiming();
}
}
BENCHMARK(BM_WithPauseResume);
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_WithPauseResume\",$"}});
static void CheckSimple(Results const& e) { static void CheckSimple(Results const& e) {
CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0); CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0);
CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0); CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0);
} }
double withoutPauseResumeInstrCount = 0.0;
double withPauseResumeInstrCount = 0.0;
static void CheckInstrCount(double* counter, Results const& e) {
BM_CHECK_GT(e.NumIterations(), 0);
*counter = e.GetAs<double>("INSTRUCTIONS") / e.NumIterations();
}
static void CheckInstrCountWithoutResume(Results const& e) {
CheckInstrCount(&withoutPauseResumeInstrCount, e);
}
static void CheckInstrCountWithResume(Results const& e) {
CheckInstrCount(&withPauseResumeInstrCount, e);
}
CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple); CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple);
CHECK_BENCHMARK_RESULTS("BM_WithoutPauseResume", &CheckInstrCountWithoutResume);
CHECK_BENCHMARK_RESULTS("BM_WithPauseResume", &CheckInstrCountWithResume);
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if (!benchmark::internal::PerfCounters::kSupported) { if (!benchmark::internal::PerfCounters::kSupported) {
return 0; return 0;
} }
benchmark::FLAGS_benchmark_perf_counters = "CYCLES,BRANCHES"; benchmark::FLAGS_benchmark_perf_counters = "CYCLES,BRANCHES,INSTRUCTIONS";
benchmark::internal::PerfCounters::Initialize(); benchmark::internal::PerfCounters::Initialize();
RunOutputTests(argc, argv); RunOutputTests(argc, argv);
BM_CHECK_GT(withPauseResumeInstrCount, kIters);
BM_CHECK_GT(withoutPauseResumeInstrCount, kIters);
BM_CHECK_LT(withPauseResumeInstrCount, 1.5 * withoutPauseResumeInstrCount);
} }