[FR] Add API to provide custom profilers #1807 (#1809)

This API is akin to the MemoryManager API and lets tools provide their own profiler which is wrapped in the same way MemoryManager is wrapped. Namely, the profiler provides Start/Stop methods that are called at the start/end of running the benchmark in a separate pass. Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
2025-03-29 13:30:38 +08:00 · 2024-07-16 01:56:40 -07:00 · 2024-07-16 01:56:40 -07:00 · 7c8ed6b082
commit 7c8ed6b082
parent d2cd246e19
8 changed files with 134 additions and 16 deletions
--- a/1
+++ b/1
@ -42,6 +42,7 @@ Dominic Hamon <dma@stripysock.com> <dominic@google.com>
 Dominik Czarnota <dominik.b.czarnota@gmail.com>
 Dominik Korman <kormandominik@gmail.com>
 Donald Aingworth <donalds_junk_mail@yahoo.com>
+Doug Evans <xdje42@gmail.com>
 Eric Backus <eric_backus@alum.mit.edu>
 Eric Fiselier <eric@efcs.ca>
 Eugene Zhuk <eugene.zhuk@gmail.com>
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@ -1139,6 +1139,21 @@ a report on the number of allocations, bytes used, etc.
 This data will then be reported alongside other performance data, currently
 only when using JSON output.

+<a name="profiling" />
+
+## Profiling
+
+It's often useful to also profile benchmarks in particular ways, in addition to
+CPU performance. For this reason, benchmark offers the `RegisterProfilerManager`
+method that allows a custom `ProfilerManager` to be injected.
+
+If set, the `ProfilerManager::AfterSetupStart` and
+`ProfilerManager::BeforeTeardownStop` methods will be called at the start and
+end of a separate benchmark run to allow user code to collect and report
+user-provided profile metrics.
+
+Output collected from this profiling run must be reported separately.
+
 <a name="using-register-benchmark" />

 ## Using RegisterBenchmark(name, fn, args...)
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@ -416,6 +416,26 @@ class MemoryManager {
 BENCHMARK_EXPORT
 void RegisterMemoryManager(MemoryManager* memory_manager);

+// If a ProfilerManager is registered (via RegisterProfilerManager()), the
+// benchmark will be run an additional time under the profiler to collect and
+// report profile metrics for the run of the benchmark.
+class ProfilerManager {
+ public:
+  virtual ~ProfilerManager() {}
+
+  // This is called after `Setup()` code and right before the benchmark is run.
+  virtual void AfterSetupStart() = 0;
+
+  // This is called before `Teardown()` code and right after the benchmark
+  // completes.
+  virtual void BeforeTeardownStop() = 0;
+};
+
+// Register a ProfilerManager instance that will be used to collect and report
+// profile measurements for benchmark runs.
+BENCHMARK_EXPORT
+void RegisterProfilerManager(ProfilerManager* profiler_manager);
+
 // Add a key-value pair to output as part of the context stanza in the report.
 BENCHMARK_EXPORT
 void AddCustomContext(const std::string& key, const std::string& value);
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) {
  internal::memory_manager = manager;
 }

+void RegisterProfilerManager(ProfilerManager* manager) {
+  internal::profiler_manager = manager;
+}
+
 void AddCustomContext(const std::string& key, const std::string& value) {
  if (internal::global_context == nullptr) {
    internal::global_context = new std::map<std::string, std::string>();
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@ -62,6 +62,8 @@ namespace internal {

 MemoryManager* memory_manager = nullptr;

+ProfilerManager* profiler_manager = nullptr;
+
 namespace {

 static constexpr IterationCount kMaxIterations = 1000000000000;
@ -401,6 +403,41 @@ void BenchmarkRunner::RunWarmUp() {
  }
 }

+MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
+    IterationCount memory_iterations) {
+  // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
+  // optional so we don't have to own the Result here.
+  // Can't do it now due to cxx03.
+  memory_results.push_back(MemoryManager::Result());
+  MemoryManager::Result* memory_result = &memory_results.back();
+  memory_manager->Start();
+  std::unique_ptr<internal::ThreadManager> manager;
+  manager.reset(new internal::ThreadManager(1));
+  b.Setup();
+  RunInThread(&b, memory_iterations, 0, manager.get(),
+              perf_counters_measurement_ptr);
+  manager->WaitForAllThreads();
+  manager.reset();
+  b.Teardown();
+  memory_manager->Stop(*memory_result);
+  return memory_result;
+}
+
+void BenchmarkRunner::RunProfilerManager() {
+  // TODO: Provide a way to specify the number of iterations.
+  IterationCount profile_iterations = 1;
+  std::unique_ptr<internal::ThreadManager> manager;
+  manager.reset(new internal::ThreadManager(1));
+  b.Setup();
+  profiler_manager->AfterSetupStart();
+  RunInThread(&b, profile_iterations, 0, manager.get(),
+              /*perf_counters_measurement_ptr=*/nullptr);
+  manager->WaitForAllThreads();
+  profiler_manager->BeforeTeardownStop();
+  manager.reset();
+  b.Teardown();
+}
+
 void BenchmarkRunner::DoOneRepetition() {
  assert(HasRepeatsRemaining() && "Already done all repetitions?");

@ -445,28 +482,18 @@ void BenchmarkRunner::DoOneRepetition() {
           "then we should have accepted the current iteration run.");
  }

-  // Oh, one last thing, we need to also produce the 'memory measurements'..
+  // Produce memory measurements if requested.
  MemoryManager::Result* memory_result = nullptr;
  IterationCount memory_iterations = 0;
  if (memory_manager != nullptr) {
-    // TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
-    // optional so we don't have to own the Result here.
-    // Can't do it now due to cxx03.
-    memory_results.push_back(MemoryManager::Result());
-    memory_result = &memory_results.back();
    // Only run a few iterations to reduce the impact of one-time
    // allocations in benchmarks that are not properly managed.
    memory_iterations = std::min<IterationCount>(16, iters);
-    memory_manager->Start();
-    std::unique_ptr<internal::ThreadManager> manager;
-    manager.reset(new internal::ThreadManager(1));
-    b.Setup();
-    RunInThread(&b, memory_iterations, 0, manager.get(),
-                perf_counters_measurement_ptr);
-    manager->WaitForAllThreads();
-    manager.reset();
-    b.Teardown();
-    memory_manager->Stop(*memory_result);
+    memory_result = RunMemoryManager(memory_iterations);
+  }
+
+  if (profiler_manager != nullptr) {
+    RunProfilerManager();
  }

  // Ok, now actually report.
--- a/src/benchmark_runner.h
+++ b/src/benchmark_runner.h
@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters);
 namespace internal {

 extern MemoryManager* memory_manager;
+extern ProfilerManager* profiler_manager;

 struct RunResults {
  std::vector<BenchmarkReporter::Run> non_aggregates;
@ -113,6 +114,10 @@ class BenchmarkRunner {
  };
  IterationResults DoNIterations();

+  MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations);
+
+  void RunProfilerManager();
+
  IterationCount PredictNumItersNeeded(const IterationResults& i) const;

  bool ShouldReportIterationResults(const IterationResults& i) const;
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -192,6 +192,9 @@ benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thous
 compile_output_test(memory_manager_test)
 benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)

+compile_output_test(profiler_manager_test)
+benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s)
+
 # MSVC does not allow to set the language standard to C++98/03.
 if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
  compile_benchmark_test(cxx03_test)
--- a/test/profiler_manager_test.cc
+++ b/test/profiler_manager_test.cc
@ -0,0 +1,43 @@
+// FIXME: WIP
+
+#include <memory>
+
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+
+class TestProfilerManager : public benchmark::ProfilerManager {
+  void AfterSetupStart() override {}
+  void BeforeTeardownStop() override {}
+};
+
+void BM_empty(benchmark::State& state) {
+  for (auto _ : state) {
+    auto iterations = state.iterations();
+    benchmark::DoNotOptimize(iterations);
+  }
+}
+BENCHMARK(BM_empty);
+
+ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}});
+ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"},
+                       {"\"family_index\": 0,$", MR_Next},
+                       {"\"per_family_instance_index\": 0,$", MR_Next},
+                       {"\"run_name\": \"BM_empty\",$", MR_Next},
+                       {"\"run_type\": \"iteration\",$", MR_Next},
+                       {"\"repetitions\": 1,$", MR_Next},
+                       {"\"repetition_index\": 0,$", MR_Next},
+                       {"\"threads\": 1,$", MR_Next},
+                       {"\"iterations\": %int,$", MR_Next},
+                       {"\"real_time\": %float,$", MR_Next},
+                       {"\"cpu_time\": %float,$", MR_Next},
+                       {"\"time_unit\": \"ns\"$", MR_Next},
+                       {"}", MR_Next}});
+ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}});
+
+int main(int argc, char* argv[]) {
+  std::unique_ptr<benchmark::ProfilerManager> pm(new TestProfilerManager());
+
+  benchmark::RegisterProfilerManager(pm.get());
+  RunOutputTests(argc, argv);
+  benchmark::RegisterProfilerManager(nullptr);
+}