finish merge

2025-04-12 20:41:14 +08:00 · 2015-02-18 16:36:59 -05:00 · 2015-02-18 16:36:59 -05:00 · ed5145afea
commit ed5145afea
parent 4c786b1671
6 changed files with 201 additions and 219 deletions
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@ -35,7 +35,7 @@ namespace benchmark {
 class Benchmark {
 public:
  // The Benchmark takes ownership of the Callback pointed to by f.
-  Benchmark(const std::string& name, const Function& f);
+  Benchmark(const std::string& name, Function* f);

  ~Benchmark();

@ -97,23 +97,6 @@ class Benchmark {
  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
  Benchmark* ThreadPerCpu();

-  // Have "setup" and/or "teardown" invoked once for every benchmark run.
-  // If the benchmark is multi-threaded (will run in k threads concurrently),
-  // the setup callback will be be invoked exactly once (not k times) before
-  // each run with k threads. Time allowing (e.g. for a short benchmark), there
-  // may be multiple such runs per benchmark, each run with its own
-  // "setup"/"teardown".
-  //
-  // If the benchmark uses different size groups of threads (e.g. via
-  // ThreadRange), the above will be true for each size group.
-  //
-  // The callback will be passed the number of threads for this benchmark run.
-  //
-  // The callback must not be self-deleting.  The Benchmark
-  // object takes ownership of the callback object.
-  Benchmark* Setup(const Function& setup);
-  Benchmark* Teardown(const Function& teardown);
-
  // TODO(sanjay): Control whether or not real-time is used for this benchmark
  // TODO(sanjay): Control the default number of iterations

@ -130,10 +113,9 @@ class Benchmark {

 private:
  std::string name_;
-  Function function_;
-  Function setup_;
-  Function teardown_;
+  Function* function_;
  int registration_index_;
+  int arg_count_;
  std::vector< std::pair<int, int> > args_;  // Args for all benchmark runs
  std::vector<int> thread_counts_;

--- a/include/benchmark/minimal_benchmark.h
+++ b/include/benchmark/minimal_benchmark.h
@ -146,6 +146,7 @@ BENCHMARK(BM_memcpy)->Setup(NewPermanentCallback(MemcpySetup))
 #ifndef BENCHMARK_MINIMAL_BENCHMARK_H_
 #define BENCHMARK_MINIMAL_BENCHMARK_H_

+#include <assert.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include "macros.h"
@ -252,38 +253,115 @@ extern void BenchmarkUseRealTime();

 class Benchmark;

-// A Function object wraps together a callback of one of several
-// possible types and allows it to be invoked without the caller
-// having to know which type is being invoked.  The callback must be
-// repeatable.
-class Function {
-   private:
-    typedef void(F0Type)(int);
-    typedef void(F1Type)(int, int);
-    typedef void(F2Type)(int, int, int);

-    F0Type *f0_;
-    F1Type *f1_;
-    F2Type *f2_;
-   public:
-    Function() : f0_(NULL), f1_(NULL), f2_(NULL) {}
-    Function(F0Type* f)
-        : f0_(f), f1_(NULL), f2_(NULL) {
+// State is passed to a running Benchmark and contains state for the
+// benchmark to use.
+class State {
+public:
+  State(int max_iters, bool has_x, int x, bool has_y, int y, int thread_i)
+    : started_(false), total_iterations_(0), max_iterations_(max_iters),
+      has_range_x_(has_x), range_x_(x),
+      has_range_y_(has_y), range_y_(y),
+      thread_index(thread_i)
+  {}
+
+  // Returns true iff the benchmark should continue through another iteration.
+  bool KeepRunning() {
+    if (__builtin_expect(!started_, false)) {
+        StartBenchmarkTiming();
+        started_ = true;
    }
-    Function(F1Type* f)
-        : f0_(NULL), f1_(f), f2_(NULL) {
+    bool const res = total_iterations_++ < max_iterations_;
+    if (__builtin_expect(!res, false)) {
+        assert(started_);
+        StopBenchmarkTiming();
    }
-    Function(F2Type* f)
-        : f0_(NULL), f1_(NULL), f2_(f) {
-    }
-    void Run(int iters, int arg1, int arg2) const;
-    int args() const;
+    return res;
+  }
+
+  void PauseTiming() {
+    StopBenchmarkTiming();
+  }
+
+  void ResumeTiming() {
+    StartBenchmarkTiming();
+  }
+
+  // Set the number of bytes processed by the current benchmark
+  // execution.  This routine is typically called once at the end of a
+  // throughput oriented benchmark.  If this routine is called with a
+  // value > 0, the report is printed in MB/sec instead of nanoseconds
+  // per iteration.
+  //
+  // REQUIRES: a benchmark has exited its KeepRunning loop.
+  void SetBytesProcessed(int64_t bytes) {
+    SetBenchmarkBytesProcessed(bytes);
+  }
+
+  // If this routine is called with items > 0, then an items/s
+  // label is printed on the benchmark report line for the currently
+  // executing benchmark. It is typically called at the end of a processing
+  // benchmark where a processing items/second output is desired.
+  //
+  // REQUIRES: a benchmark has exited its KeepRunning loop.
+  void SetItemsProcessed(int64_t items) {
+    SetBenchmarkItemsProcessed(items);
+  }
+
+  // If this routine is called, the specified label is printed at the
+  // end of the benchmark report line for the currently executing
+  // benchmark.  Example:
+  //  static void BM_Compress(int iters) {
+  //    ...
+  //    double compress = input_size / output_size;
+  //    benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
+  //  }
+  // Produces output that looks like:
+  //  BM_Compress   50         50   14115038  compress:27.3%
+  //
+  // REQUIRES: a benchmark has exited its KeepRunning loop.
+  void SetLabel(const char* label) {
+    SetBenchmarkLabel(label);
+  }
+
+  // Range arguments for this run. CHECKs if the argument has been set.
+  int range_x() const {
+    assert(has_range_x_);
+    return range_x_;
+  }
+
+  int range_y() const {
+    assert(has_range_y_);
+    return range_y_;
+  }
+
+  int iterations() const { return total_iterations_; }
+  int max_iterations() const { return max_iterations_; }
+
+
+private:
+    bool started_;
+    unsigned total_iterations_, max_iterations_;
+
+    bool has_range_x_;
+    int range_x_;
+
+    bool has_range_y_;
+    int range_y_;
+
+public:
+    const int thread_index;
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(State)
 };

+typedef void(Function)(State&);
+
 class MinimalBenchmark
 {
 public:
-  MinimalBenchmark(const char* name, const Function& f);
+  MinimalBenchmark(const char* name, Function* ptr);

  ~MinimalBenchmark();

@ -345,23 +423,6 @@ public:
  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
  MinimalBenchmark& ThreadPerCpu();

-  // Have "setup" and/or "teardown" invoked once for every benchmark run.
-  // If the benchmark is multi-threaded (will run in k threads concurrently),
-  // the setup callback will be be invoked exactly once (not k times) before
-  // each run with k threads. Time allowing (e.g. for a short benchmark), there
-  // may be multiple such runs per benchmark, each run with its own
-  // "setup"/"teardown".
-  //
-  // If the benchmark uses different size groups of threads (e.g. via
-  // ThreadRange), the above will be true for each size group.
-  //
-  // The callback will be passed the number of threads for this benchmark run.
-  //
-  // The callback must not be self-deleting.  The Benchmark
-  // object takes ownership of the callback object.
-  MinimalBenchmark& Setup(const Function& setup);
-  MinimalBenchmark& Teardown(const Function& teardown);
-
  MinimalBenchmark* operator->() {
    return this;
  }
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@ -261,56 +261,30 @@ const int Benchmark::kNumCpuMarker;
 // Information kept per benchmark we may want to run
 struct Benchmark::Instance {
  std::string   name;
-  Function      function;
-  Function      setup;
-  Function      teardown;
+  Function*     function;
+  bool          has_arg1;
  int           arg1;
+  bool          has_arg2;
  int           arg2;
  int           threads;    // Number of concurrent threads to use
  bool          multithreaded;  // Is benchmark multi-threaded?

-  void Run(int iters) const {
-    function.Run(iters, arg1, arg2);
+  void Run(int iters, int thread_id) const {
+    State st(iters, has_arg1, arg1, has_arg2, arg2, thread_id);
+    function(st);
  }
 };

-void Function::Run(int iters, int arg1, int arg2) const {
-  if (f0_ != NULL) {
-    (*f0_)(iters);
-  } else if (f1_ != NULL) {
-    (*f1_)(iters, arg1);
-  } else if (f2_ != NULL) {
-    (*f2_)(iters, arg1, arg2);
-  } else {
-    // NULL function; do nothing.
-  }
-}
-
-int Function::args() const {
-  if (f0_ != NULL) {
-    return 0;
-  } else if (f1_ != NULL) {
-    return 1;
-  } else if (f2_ != NULL) {
-    return 2;
-  } else {
-    return -1;
-  }
-}

 Benchmark::Benchmark(const std::string& name,
-                     const Function& f) EXCLUDES(GetBenchmarkLock())
-                    : name_(name), function_(f) {
+                     Function* f) EXCLUDES(GetBenchmarkLock())
+                    : name_(name), function_(f), arg_count_(-1) {
  MutexLock l(GetBenchmarkLock());
  if (families == NULL) {
    families = new std::vector<Benchmark*>;
  }
  registration_index_ = families->size();
  families->push_back(this);
-  if (f.args() == 0) {
-    // Run it exactly once regardless of Arg/Range calls.
-    args_.emplace_back(-1, -1);
-  }
 }

 Benchmark::~Benchmark() EXCLUDES(GetBenchmarkLock()) {
@ -324,13 +298,18 @@ Benchmark::~Benchmark() EXCLUDES(GetBenchmarkLock()) {
 }

 Benchmark* Benchmark::Arg(int x) {
-  CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
+   // TODO(remove)
+  //CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
+  CHECK(arg_count_ == -1 || arg_count_ == 1);
+  arg_count_ = 1;
  args_.emplace_back(x, -1);
  return this;
 }

 Benchmark* Benchmark::Range(int start, int limit) {
-  CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
+  //CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
+  CHECK(arg_count_ == -1 || arg_count_ == 1);
+  arg_count_ = 1;
  std::vector<int> arglist;
  AddRange(&arglist, start, limit, kRangeMultiplier);

@ -341,7 +320,9 @@ Benchmark* Benchmark::Range(int start, int limit) {
 }

 Benchmark* Benchmark::DenseRange(int start, int limit) {
-  CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
+  //CHECK_EQ(function_.args(), 1) << "Wrong number of args for " << name_;
+  CHECK(arg_count_ == -1 || arg_count_ == 1);
+  arg_count_ = 1;
  CHECK_GE(start, 0);
  CHECK_LE(start, limit);
  for (int arg = start; arg <= limit; arg++) {
@ -351,13 +332,17 @@ Benchmark* Benchmark::DenseRange(int start, int limit) {
 }

 Benchmark* Benchmark::ArgPair(int x, int y) {
-  CHECK_EQ(function_.args(), 2) << "Wrong number of args for " << name_;
+  //CHECK_EQ(function_.args(), 2) << "Wrong number of args for " << name_;
+  CHECK(arg_count_ == -1 || arg_count_ == 2);
+  arg_count_ = 2;
  args_.emplace_back(x, y);
  return this;
 }

 Benchmark* Benchmark::RangePair(int lo1, int hi1, int lo2, int hi2) {
-  CHECK_EQ(function_.args(), 2) << "Wrong number of args for " << name_;
+  //CHECK_EQ(function_.args(), 2) << "Wrong number of args for " << name_;
+  CHECK(arg_count_ == -1 || arg_count_ == 2);
+  arg_count_ = 2;
  std::vector<int> arglist1, arglist2;
  AddRange(&arglist1, lo1, hi1, kRangeMultiplier);
  AddRange(&arglist2, lo2, hi2, kRangeMultiplier);
@ -416,15 +401,6 @@ void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
  }
 }

-Benchmark* Benchmark::Setup(const Function& setup) {
-  setup_ = setup;
-  return this;
-}
-
-Benchmark* Benchmark::Teardown(const Function& teardown) {
-  teardown_ = teardown;
-  return this;
-}

 // Extract the list of benchmark instances that match the specified
 // regular expression.
@ -448,7 +424,10 @@ void Benchmark::FindBenchmarks(
    for (Benchmark* family : *families) {
      if (family == NULL) continue;  // Family was deleted

-      const int num_args = family->function_.args();
+      if (family->arg_count_ == -1) {
+        family->arg_count_ = 0;
+        family->args_.emplace_back(-1, -1);
+      }
      for (auto const& args : family->args_) {
        const std::vector<int>* thread_counts =
            (family->thread_counts_.empty()
@ -462,18 +441,18 @@ void Benchmark::FindBenchmarks(
          Instance instance;
          instance.name = family->name_;
          instance.function = family->function_;
+          instance.has_arg1 = family->arg_count_ >= 1;
          instance.arg1 = args.first;
+          instance.has_arg2 = family->arg_count_ == 2;
          instance.arg2 = args.second;
          instance.threads = num_threads;
          instance.multithreaded = !(family->thread_counts_.empty());
-          instance.setup = family->setup_;
-          instance.teardown = family->teardown_;

          // Add arguments to instance name
-          if (num_args >= 1) {
+          if (family->arg_count_ >= 1) {
            AppendHumanReadable(instance.arg1, &instance.name);
          }
-          if (num_args >= 2) {
+          if (family->arg_count_ >= 2) {
            AppendHumanReadable(instance.arg2, &instance.name);
          }

@ -536,19 +515,17 @@ static bool CpuScalingEnabled() {
 // Execute one thread of benchmark b for the specified number of iterations.
 // Adds the stats collected for the thread into *total.
 void RunInThread(const benchmark::Benchmark::Instance* b,
-                 int iters,
+                 int iters, int thread_id,
                 ThreadStats* total) EXCLUDES(GetBenchmarkLock()) {
  ThreadStats* my_stats = &thread_stats;
  ResetThreadStats(my_stats);
-  timer_manager->StartTimer();
-  b->Run(iters);
+  b->Run(iters, thread_id);
+  timer_manager->Finalize();

  {
    MutexLock l(GetBenchmarkLock());
    AddThreadStats(total, *my_stats);
  }
-
-  timer_manager->Finalize();
 }

 void RunBenchmark(const benchmark::Benchmark::Instance& b,
@ -571,7 +548,6 @@ void RunBenchmark(const benchmark::Benchmark::Instance& b,
        report_label.clear();
        use_real_time = false;
      }
-      b.setup.Run(b.threads, b.arg1, b.arg2);

      Notification done;
      timer_manager = new TimerManager(b.threads, &done);
@ -586,12 +562,12 @@ void RunBenchmark(const benchmark::Benchmark::Instance& b,
          if (thread.joinable())
            thread.join();
        }
-        for (std::thread& thread : pool) {
-          thread = std::thread(&RunInThread, &b, iters, &total);
+        for (std::size_t ti = 0; ti < pool.size(); ++ti) {
+            pool[i] = std::thread(&RunInThread, &b, iters, ti, &total);
        }
      } else {
        // Run directly in this thread
-        RunInThread(&b, iters, &total);
+        RunInThread(&b, iters, 0, &total);
      }
      done.WaitForNotification();
      running_benchmark = false;
@ -600,7 +576,6 @@ void RunBenchmark(const benchmark::Benchmark::Instance& b,
      const double real_accumulated_time = timer_manager->real_time_used();
      delete timer_manager;
      timer_manager = NULL;
-      b.teardown.Run(b.threads, b.arg1, b.arg2);

      VLOG(1) << "Ran in " << cpu_accumulated_time << "/"
            << real_accumulated_time << "\n";
--- a/src/minimal_benchmark.cc
+++ b/src/minimal_benchmark.cc
@ -17,7 +17,7 @@

 namespace benchmark {

-MinimalBenchmark::MinimalBenchmark(const char* name, const Function& f)
+MinimalBenchmark::MinimalBenchmark(const char* name, Function* f)
  : imp_(new Benchmark(name, f))
 { }

@ -73,14 +73,4 @@ MinimalBenchmark& MinimalBenchmark::ThreadPerCpu() {
  return *this;
 }

-MinimalBenchmark& MinimalBenchmark::Setup(const Function& setup) {
-  imp_->Setup(setup);
-  return *this;
-}
-
-MinimalBenchmark& MinimalBenchmark::Teardown(const Function& teardown) {
-  imp_->Teardown(teardown);
-  return *this;
-}
-
 } // end namespace benchmark
--- a/test/basic_test.cc
+++ b/test/basic_test.cc
@ -4,22 +4,20 @@
 using benchmark::StartBenchmarkTiming;
 using benchmark::StopBenchmarkTiming;

-void BM_empty(int iters) {
-  while (iters-- > 0) { }
+void BM_empty(benchmark::State& state) {
+  while (state.KeepRunning()) { }
 }
 BENCHMARK(BM_empty);
 BENCHMARK(BM_empty)->ThreadPerCpu();

-void BM_empty2(int iters) {
-  StopBenchmarkTiming();
-  StartBenchmarkTiming();
-  while (iters-- > 0) { }
+void BM_empty2(benchmark::State& state) {
+  while (state.KeepRunning()) { }
 }
 BENCHMARK(BM_empty2)->ThreadPerCpu();

-void BM_spin(int iters, int xrange) {
-  while (iters-- > 0) {
-    for (int x = 0; x < xrange; ++x) {
+void BM_spin(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    for (int x = 0; x < state.range_x(); ++x) {
      volatile int dummy = x;
      ((void)dummy);
    }
@ -27,15 +25,13 @@ void BM_spin(int iters, int xrange) {
 }
 BENCHMARK(BM_spin)->Range(8, 8<<10);

-void BM_spin_pause_before(int iters, int xrange) {
-  StopBenchmarkTiming();
-  for (int i = 0; i < xrange; ++i) {
+void BM_spin_pause_before(benchmark::State& state) {
+  for (int i = 0; i < state.range_y(); ++i) {
    volatile int dummy = i;
    ((void)dummy);
  }
-  StartBenchmarkTiming();
-  while(iters-- > 0) {
-    for (int i = 0; i < xrange; ++i) {
+  while(state.KeepRunning()) {
+    for (int i = 0; i < state.range_x(); ++i) {
      volatile int dummy = i;
      ((void)dummy);
    }
@ -45,15 +41,15 @@ BENCHMARK(BM_spin_pause_before)->Range(8, 8<<10);
 BENCHMARK(BM_spin_pause_before)->Range(8, 8<<10)->ThreadPerCpu();


-void BM_spin_pause_during(int iters, int xrange) {
-  while(iters-- > 0) {
+void BM_spin_pause_during(benchmark::State& state) {
+  while(state.KeepRunning()) {
    StopBenchmarkTiming();
-    for (int i = 0; i < xrange; ++i) {
+    for (int i = 0; i < state.range_x(); ++i) {
      volatile int dummy = i;
      ((void)dummy);
    }
    StartBenchmarkTiming();
-    for (int i = 0; i < xrange; ++i) {
+    for (int i = 0; i < state.range_y(); ++i) {
      volatile int dummy = i;
      ((void)dummy);
    }
--- a/test/benchmark_test.cc
+++ b/test/benchmark_test.cc
@ -16,11 +16,6 @@

 #include <gtest/gtest.h>

-using benchmark::StartBenchmarkTiming;
-using benchmark::StopBenchmarkTiming;
-using benchmark::SetBenchmarkBytesProcessed;
-using benchmark::SetBenchmarkItemsProcessed;
-
 namespace {

 #ifdef DEBUG
@ -48,14 +43,13 @@ std::set<int> ConstructRandomSet(int size) {

 std::mutex test_vector_mu;
 std::vector<int>* test_vector = nullptr;
-static bool setup_called = false;

 }  // end namespace

 #ifdef DEBUG
-static void BM_Factorial(int iters) {
+static void BM_Factorial(benchmark::State& state) {
  int fac_42 = 0;
-  while (iters-- > 0)
+  while (state.KeepRunning())
    fac_42 = Factorial(8);
  // Prevent compiler optimizations
  EXPECT_NE(fac_42, std::numeric_limits<int>::max());
@ -63,20 +57,20 @@ static void BM_Factorial(int iters) {
 BENCHMARK(BM_Factorial);
 #endif

-static void BM_CalculatePiRange(int iters, int rangex) {
+static void BM_CalculatePiRange(benchmark::State& state) {
  double pi = 0.0;
-  while (iters-- > 0)
-    pi = CalculatePi(rangex);
+  while (state.KeepRunning())
+    pi = CalculatePi(state.range_x());
  std::stringstream ss;
  ss << pi;
-  //state.SetLabel(ss.str().c_str());
+  state.SetLabel(ss.str().c_str());
 }
 BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);

-static void BM_CalculatePi(int iters) {
+static void BM_CalculatePi(benchmark::State& state) {
  static const int depth = 1024;
  double pi ATTRIBUTE_UNUSED = 0.0;
-  while (iters-- > 0) {
+  while (state.KeepRunning()) {
    pi = CalculatePi(depth);
  }
 }
@ -84,68 +78,53 @@ BENCHMARK(BM_CalculatePi)->Threads(8);
 BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
 BENCHMARK(BM_CalculatePi)->ThreadPerCpu();

-
-static void BM_SetInsert(int iters, int xrange, int yrange) {
-  const int total_iters = iters;
-  while (iters-->0) {
-    StopBenchmarkTiming();
-    std::set<int> data = ConstructRandomSet(xrange);
-    StartBenchmarkTiming();
-    for (int j = 0; j < yrange; ++j)
+static void BM_SetInsert(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    state.PauseTiming();
+    std::set<int> data = ConstructRandomSet(state.range_x());
+    state.ResumeTiming();
+    for (int j = 0; j < state.range_y(); ++j)
      data.insert(rand());
  }
-  SetBenchmarkItemsProcessed(total_iters * yrange);
-  SetBenchmarkBytesProcessed(total_iters * yrange * sizeof(int));
+  state.SetItemsProcessed(state.iterations() * state.range_y());
+  state.SetBytesProcessed(state.iterations() * state.range_y() * sizeof(int));
 }
 BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);

-
 template<typename Q>
-static void BM_Sequential(int iters, int xrange) {
-  const int total_iters = iters;
+static void BM_Sequential(benchmark::State& state) {
  typename Q::value_type v = 42;
-  while (iters-->0) {
+  while (state.KeepRunning()) {
    Q q;
-    for (int i = xrange; --i; )
+    for (int i = state.range_x(); --i; )
      q.push_back(v);
  }
  const int64_t items_processed =
-      static_cast<int64_t>(total_iters) * xrange;
-  SetBenchmarkItemsProcessed(items_processed);
-  SetBenchmarkBytesProcessed(items_processed * sizeof(v));
+      static_cast<int64_t>(state.iterations()) * state.range_x();
+  state.SetItemsProcessed(items_processed);
+  state.SetBytesProcessed(items_processed * sizeof(v));
 }
 BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>)->Range(1 << 0, 1 << 10);
 BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);

-
-static void BM_StringCompare(int iters, int xrange) {
-  StopBenchmarkTiming();
-  std::string s1(xrange, '-');
-  std::string s2(xrange, '-');
+static void BM_StringCompare(benchmark::State& state) {
+  std::string s1(state.range_x(), '-');
+  std::string s2(state.range_x(), '-');
  int r = 0;
-  StartBenchmarkTiming();
-  while (iters-->0)
+  while (state.KeepRunning())
    r |= s1.compare(s2);
  // Prevent compiler optimizations
  assert(r != std::numeric_limits<int>::max());
 }
 BENCHMARK(BM_StringCompare)->Range(1, 1<<20);

-static void BM_SetupTeardown_Setup(int) {
-  assert(setup_called == false);
-  setup_called = true;
-  test_vector = new std::vector<int>();
-}
-
-static void BM_SetupTeardown_Teardown(int) {
-  assert(setup_called);
-  setup_called = false;
-  delete test_vector;
-}
-
-static void BM_SetupTeardown(int iters) {
+static void BM_SetupTeardown(benchmark::State& state) {
+  if (state.thread_index == 0) {
+    // No need to lock test_vector_mu here as this is running single-threaded.
+    test_vector = new std::vector<int>();
+  }
  int i = 0;
-  while (iters-->0) {
+  while (state.KeepRunning()) {
    std::lock_guard<std::mutex> l(test_vector_mu);
    if (i%2 == 0)
      test_vector->push_back(i);
@ -153,18 +132,17 @@ static void BM_SetupTeardown(int iters) {
      test_vector->pop_back();
    ++i;
  }
-}
-BENCHMARK(BM_SetupTeardown)->Setup(&BM_SetupTeardown_Setup)
-                           ->Teardown(&BM_SetupTeardown_Teardown)
-                           ->ThreadPerCpu();
-
-
-static void BM_LongTest(int iters, int xrange) {
-  double tracker = 0.0;
-  while (iters-->0) {
-    for (int i = 0; i < xrange; ++i)
-      tracker += i;
+  if (state.thread_index == 0) {
+    delete test_vector;
  }
+}
+BENCHMARK(BM_SetupTeardown)->ThreadPerCpu();
+
+static void BM_LongTest(benchmark::State& state) {
+  double tracker = 0.0;
+  while (state.KeepRunning())
+    for (int i = 0; i < state.range_x(); ++i)
+      tracker += i;
  assert(tracker != 0.0);
 }
 BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);