Refactor BenchmarkInstance (#1148)

* Refactor BenchmarkInstance (precursor to #1105) * fix bazel (debug) build * clang-format on header * fix build error on g++-4.8
2025-03-14 03:10:22 +08:00 · 2021-05-10 17:12:09 +01:00 · 2021-05-10 17:12:09 +01:00 · 3b508fad1f
commit 3b508fad1f
parent 1f47b6b64c
6 changed files with 159 additions and 130 deletions
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@ -448,7 +448,7 @@ struct Statistics {
      : name_(name), compute_(compute) {}
 };

-struct BenchmarkInstance;
+class BenchmarkInstance;
 class ThreadTimer;
 class ThreadManager;
 class PerfCountersMeasurement;
@ -703,7 +703,7 @@ class State {
  internal::ThreadManager* const manager_;
  internal::PerfCountersMeasurement* const perf_counters_measurement_;

-  friend struct internal::BenchmarkInstance;
+  friend class internal::BenchmarkInstance;
 };

 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
@ -981,6 +981,7 @@ class Benchmark {

 private:
  friend class BenchmarkFamilies;
+  friend class BenchmarkInstance;

  std::string name_;
  AggregationReportMode aggregation_report_mode_;
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@ -253,10 +253,10 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
  size_t stat_field_width = 0;
  for (const BenchmarkInstance& benchmark : benchmarks) {
    name_field_width =
-        std::max<size_t>(name_field_width, benchmark.name.str().size());
-    might_have_aggregates |= benchmark.repetitions > 1;
+        std::max<size_t>(name_field_width, benchmark.name().str().size());
+    might_have_aggregates |= benchmark.repetitions() > 1;

-    for (const auto& Stat : *benchmark.statistics)
+    for (const auto& Stat : benchmark.statistics())
      stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
  }
  if (might_have_aggregates) name_field_width += 1 + stat_field_width;
@ -425,7 +425,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,

  if (FLAGS_benchmark_list_tests) {
    for (auto const& benchmark : benchmarks)
-      Out << benchmark.name.str() << "\n";
+      Out << benchmark.name().str() << "\n";
  } else {
    internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
  }
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@ -1,17 +1,91 @@
 #include "benchmark_api_internal.h"

+#include <cinttypes>
+
+#include "string_util.h"
+
 namespace benchmark {
 namespace internal {

+BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
+                                     const std::vector<int64_t>& args,
+                                     int thread_count)
+    : benchmark_(*benchmark),
+      aggregation_report_mode_(benchmark_.aggregation_report_mode_),
+      args_(args),
+      time_unit_(benchmark_.time_unit_),
+      measure_process_cpu_time_(benchmark_.measure_process_cpu_time_),
+      use_real_time_(benchmark_.use_real_time_),
+      use_manual_time_(benchmark_.use_manual_time_),
+      complexity_(benchmark_.complexity_),
+      complexity_lambda_(benchmark_.complexity_lambda_),
+      statistics_(benchmark_.statistics_),
+      repetitions_(benchmark_.repetitions_),
+      min_time_(benchmark_.min_time_),
+      iterations_(benchmark_.iterations_),
+      threads_(thread_count) {
+  name_.function_name = benchmark_.name_;
+
+  size_t arg_i = 0;
+  for (const auto& arg : args) {
+    if (!name_.args.empty()) {
+      name_.args += '/';
+    }
+
+    if (arg_i < benchmark->arg_names_.size()) {
+      const auto& arg_name = benchmark_.arg_names_[arg_i];
+      if (!arg_name.empty()) {
+        name_.args += StrFormat("%s:", arg_name.c_str());
+      }
+    }
+
+    name_.args += StrFormat("%" PRId64, arg);
+    ++arg_i;
+  }
+
+  if (!IsZero(benchmark->min_time_)) {
+    name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
+  }
+
+  if (benchmark_.iterations_ != 0) {
+    name_.iterations = StrFormat(
+        "iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_));
+  }
+
+  if (benchmark_.repetitions_ != 0) {
+    name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_);
+  }
+
+  if (benchmark_.measure_process_cpu_time_) {
+    name_.time_type = "process_time";
+  }
+
+  if (benchmark_.use_manual_time_) {
+    if (!name_.time_type.empty()) {
+      name_.time_type += '/';
+    }
+    name_.time_type += "manual_time";
+  } else if (benchmark_.use_real_time_) {
+    if (!name_.time_type.empty()) {
+      name_.time_type += '/';
+    }
+    name_.time_type += "real_time";
+  }
+
+  if (!benchmark_.thread_counts_.empty()) {
+    name_.threads = StrFormat("threads:%d", threads_);
+  }
+}
+
 State BenchmarkInstance::Run(
    IterationCount iters, int thread_id, internal::ThreadTimer* timer,
    internal::ThreadManager* manager,
    internal::PerfCountersMeasurement* perf_counters_measurement) const {
-  State st(iters, arg, thread_id, threads, timer, manager,
+  State st(iters, args_, thread_id, threads_, timer, manager,
           perf_counters_measurement);
-  benchmark->Run(st);
+  benchmark_.Run(st);
  return st;
 }

-}  // internal
-}  // benchmark
+}  // namespace internal
+}  // namespace benchmark
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@ -1,9 +1,6 @@
 #ifndef BENCHMARK_API_INTERNAL_H
 #define BENCHMARK_API_INTERNAL_H

-#include "benchmark/benchmark.h"
-#include "commandlineflags.h"
-
 #include <cmath>
 #include <iosfwd>
 #include <limits>
@ -11,33 +8,57 @@
 #include <string>
 #include <vector>

+#include "benchmark/benchmark.h"
+#include "commandlineflags.h"
+
 namespace benchmark {
 namespace internal {

 // Information kept per benchmark we may want to run
-struct BenchmarkInstance {
-  BenchmarkName name;
-  Benchmark* benchmark;
-  AggregationReportMode aggregation_report_mode;
-  std::vector<int64_t> arg;
-  TimeUnit time_unit;
-  int range_multiplier;
-  bool measure_process_cpu_time;
-  bool use_real_time;
-  bool use_manual_time;
-  BigO complexity;
-  BigOFunc* complexity_lambda;
-  UserCounters counters;
-  const std::vector<Statistics>* statistics;
+class BenchmarkInstance {
+ public:
+  BenchmarkInstance(Benchmark* benchmark, const std::vector<int64_t>& args,
+                    int threads);
+
+  const BenchmarkName& name() const { return name_; }
+  AggregationReportMode aggregation_report_mode() const {
+    return aggregation_report_mode_;
+  }
+  TimeUnit time_unit() const { return time_unit_; }
+  bool measure_process_cpu_time() const { return measure_process_cpu_time_; }
+  bool use_real_time() const { return use_real_time_; }
+  bool use_manual_time() const { return use_manual_time_; }
+  BigO complexity() const { return complexity_; }
+  BigOFunc& complexity_lambda() const { return *complexity_lambda_; }
+  const std::vector<Statistics>& statistics() const { return statistics_; }
+  int repetitions() const { return repetitions_; }
+  double min_time() const { return min_time_; }
+  IterationCount iterations() const { return iterations_; }
+  int threads() const { return threads_; }
+
  bool last_benchmark_instance;
-  int repetitions;
-  double min_time;
-  IterationCount iterations;
-  int threads;  // Number of concurrent threads to us

  State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
            internal::ThreadManager* manager,
            internal::PerfCountersMeasurement* perf_counters_measurement) const;
+
+ private:
+  BenchmarkName name_;
+  Benchmark& benchmark_;
+  AggregationReportMode aggregation_report_mode_;
+  const std::vector<int64_t>& args_;
+  TimeUnit time_unit_;
+  bool measure_process_cpu_time_;
+  bool use_real_time_;
+  bool use_manual_time_;
+  BigO complexity_;
+  BigOFunc* complexity_lambda_;
+  UserCounters counters_;
+  const std::vector<Statistics>& statistics_;
+  int repetitions_;
+  double min_time_;
+  IterationCount iterations_;
+  int threads_;  // Number of concurrent threads to us
 };

 bool FindBenchmarksInternal(const std::string& re,
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@ -154,76 +154,9 @@ bool BenchmarkFamilies::FindBenchmarks(

    for (auto const& args : family->args_) {
      for (int num_threads : *thread_counts) {
-        BenchmarkInstance instance;
-        instance.name.function_name = family->name_;
-        instance.benchmark = family.get();
-        instance.aggregation_report_mode = family->aggregation_report_mode_;
-        instance.arg = args;
-        instance.time_unit = family->time_unit_;
-        instance.range_multiplier = family->range_multiplier_;
-        instance.min_time = family->min_time_;
-        instance.iterations = family->iterations_;
-        instance.repetitions = family->repetitions_;
-        instance.measure_process_cpu_time = family->measure_process_cpu_time_;
-        instance.use_real_time = family->use_real_time_;
-        instance.use_manual_time = family->use_manual_time_;
-        instance.complexity = family->complexity_;
-        instance.complexity_lambda = family->complexity_lambda_;
-        instance.statistics = &family->statistics_;
-        instance.threads = num_threads;
+        BenchmarkInstance instance(family.get(), args, num_threads);

-        // Add arguments to instance name
-        size_t arg_i = 0;
-        for (auto const& arg : args) {
-          if (!instance.name.args.empty()) {
-            instance.name.args += '/';
-          }
-
-          if (arg_i < family->arg_names_.size()) {
-            const auto& arg_name = family->arg_names_[arg_i];
-            if (!arg_name.empty()) {
-              instance.name.args += StrFormat("%s:", arg_name.c_str());
-            }
-          }
-
-          instance.name.args += StrFormat("%" PRId64, arg);
-          ++arg_i;
-        }
-
-        if (!IsZero(family->min_time_))
-          instance.name.min_time =
-              StrFormat("min_time:%0.3f", family->min_time_);
-        if (family->iterations_ != 0) {
-          instance.name.iterations =
-              StrFormat("iterations:%lu",
-                        static_cast<unsigned long>(family->iterations_));
-        }
-        if (family->repetitions_ != 0)
-          instance.name.repetitions =
-              StrFormat("repeats:%d", family->repetitions_);
-
-        if (family->measure_process_cpu_time_) {
-          instance.name.time_type = "process_time";
-        }
-
-        if (family->use_manual_time_) {
-          if (!instance.name.time_type.empty()) {
-            instance.name.time_type += '/';
-          }
-          instance.name.time_type += "manual_time";
-        } else if (family->use_real_time_) {
-          if (!instance.name.time_type.empty()) {
-            instance.name.time_type += '/';
-          }
-          instance.name.time_type += "real_time";
-        }
-
-        // Add the number of threads used to the name
-        if (!family->thread_counts_.empty()) {
-          instance.name.threads = StrFormat("threads:%d", instance.threads);
-        }
-
-        const auto full_name = instance.name.str();
+        const auto full_name = instance.name().str();
        if ((re.Match(full_name) && !isNegativeFilter) ||
            (!re.Match(full_name) && isNegativeFilter)) {
          instance.last_benchmark_instance = (&args == &family->args_.back());
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@ -71,28 +71,28 @@ BenchmarkReporter::Run CreateRunReport(
  // Create report about this benchmark run.
  BenchmarkReporter::Run report;

-  report.run_name = b.name;
+  report.run_name = b.name();
  report.error_occurred = results.has_error_;
  report.error_message = results.error_message_;
  report.report_label = results.report_label_;
  // This is the total iterations across all threads.
  report.iterations = results.iterations;
-  report.time_unit = b.time_unit;
-  report.threads = b.threads;
+  report.time_unit = b.time_unit();
+  report.threads = b.threads();
  report.repetition_index = repetition_index;
-  report.repetitions = b.repetitions;
+  report.repetitions = b.repetitions();

  if (!report.error_occurred) {
-    if (b.use_manual_time) {
+    if (b.use_manual_time()) {
      report.real_accumulated_time = results.manual_time_used;
    } else {
      report.real_accumulated_time = results.real_time_used;
    }
    report.cpu_accumulated_time = results.cpu_time_used;
    report.complexity_n = results.complexity_n;
-    report.complexity = b.complexity;
-    report.complexity_lambda = b.complexity_lambda;
-    report.statistics = b.statistics;
+    report.complexity = b.complexity();
+    report.complexity_lambda = b.complexity_lambda();
+    report.statistics = &b.statistics();
    report.counters = results.counters;

    if (memory_iterations > 0) {
@ -104,7 +104,7 @@ BenchmarkReporter::Run CreateRunReport(
      report.max_bytes_used = memory_result.max_bytes_used;
    }

-    internal::Finish(&report.counters, results.iterations, seconds, b.threads);
+    internal::Finish(&report.counters, results.iterations, seconds, b.threads());
  }
  return report;
 }
@ -115,7 +115,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
                 int thread_id, ThreadManager* manager,
                 PerfCountersMeasurement* perf_counters_measurement) {
  internal::ThreadTimer timer(
-      b->measure_process_cpu_time
+      b->measure_process_cpu_time()
          ? internal::ThreadTimer::CreateProcessCpuTime()
          : internal::ThreadTimer::Create());
  State st =
@ -141,12 +141,12 @@ class BenchmarkRunner {
                  std::vector<BenchmarkReporter::Run>* complexity_reports_)
      : b(b_),
        complexity_reports(*complexity_reports_),
-        min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
-        repeats(b.repetitions != 0 ? b.repetitions
+        min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
+        repeats(b.repetitions() != 0 ? b.repetitions()
                                   : FLAGS_benchmark_repetitions),
-        has_explicit_iteration_count(b.iterations != 0),
-        pool(b.threads - 1),
-        iters(has_explicit_iteration_count ? b.iterations : 1),
+        has_explicit_iteration_count(b.iterations() != 0),
+        pool(b.threads() - 1),
+        iters(has_explicit_iteration_count ? b.iterations() : 1),
        perf_counters_measurement(
            PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
        perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
@ -157,13 +157,13 @@ class BenchmarkRunner {
         FLAGS_benchmark_display_aggregates_only);
    run_results.file_report_aggregates_only =
        FLAGS_benchmark_report_aggregates_only;
-    if (b.aggregation_report_mode != internal::ARM_Unspecified) {
+    if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
      run_results.display_report_aggregates_only =
-          (b.aggregation_report_mode &
+          (b.aggregation_report_mode() &
           internal::ARM_DisplayReportAggregatesOnly);
      run_results.file_report_aggregates_only =
-          (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
-      CHECK(b.threads == 1 || !perf_counters_measurement.IsValid())
+          (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
+      CHECK(b.threads() == 1 || !perf_counters_measurement.IsValid())
          << "Perf counters are not supported in multi-threaded cases.\n";
      CHECK(FLAGS_benchmark_perf_counters.empty() ||
            perf_counters_measurement.IsValid())
@ -178,7 +178,7 @@ class BenchmarkRunner {
    run_results.aggregates_only = ComputeStats(run_results.non_aggregates);

    // Maybe calculate complexity report
-    if ((b.complexity != oNone) && b.last_benchmark_instance) {
+    if ((b.complexity() != oNone) && b.last_benchmark_instance) {
      auto additional_run_stats = ComputeBigO(complexity_reports);
      run_results.aggregates_only.insert(run_results.aggregates_only.end(),
                                         additional_run_stats.begin(),
@ -214,10 +214,10 @@ class BenchmarkRunner {
    double seconds;
  };
  IterationResults DoNIterations() {
-    VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n";
+    VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";

    std::unique_ptr<internal::ThreadManager> manager;
-    manager.reset(new internal::ThreadManager(b.threads));
+    manager.reset(new internal::ThreadManager(b.threads()));

    // Run all but one thread in separate threads
    for (std::size_t ti = 0; ti < pool.size(); ++ti) {
@ -244,23 +244,23 @@ class BenchmarkRunner {
    manager.reset();

    // Adjust real/manual time stats since they were reported per thread.
-    i.results.real_time_used /= b.threads;
-    i.results.manual_time_used /= b.threads;
+    i.results.real_time_used /= b.threads();
+    i.results.manual_time_used /= b.threads();
    // If we were measuring whole-process CPU usage, adjust the CPU time too.
-    if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads;
+    if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();

    VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
            << i.results.real_time_used << "\n";

    // By using KeepRunningBatch a benchmark can iterate more times than
    // requested, so take the iteration count from i.results.
-    i.iters = i.results.iterations / b.threads;
+    i.iters = i.results.iterations / b.threads();

    // Base decisions off of real time if requested by this benchmark.
    i.seconds = i.results.cpu_time_used;
-    if (b.use_manual_time) {
+    if (b.use_manual_time()) {
      i.seconds = i.results.manual_time_used;
-    } else if (b.use_real_time) {
+    } else if (b.use_real_time()) {
      i.seconds = i.results.real_time_used;
    }

@ -301,7 +301,7 @@ class BenchmarkRunner {
           // CPU time is specified but the elapsed real time greatly exceeds
           // the minimum time.
           // Note that user provided timers are except from this sanity check.
-           ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time);
+           ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
  }

  void DoOneRepetition(int64_t repetition_index) {
@ -360,7 +360,7 @@ class BenchmarkRunner {
        CreateRunReport(b, i.results, memory_iterations, memory_result,
                        i.seconds, repetition_index);

-    if (!report.error_occurred && b.complexity != oNone)
+    if (!report.error_occurred && b.complexity() != oNone)
      complexity_reports.push_back(report);

    run_results.non_aggregates.push_back(report);