mirror of
https://github.com/google/benchmark.git
synced 2025-04-03 16:10:58 +08:00
This reverts commit a6a738c1cc
.
This commit is contained in:
parent
6f094ba13e
commit
e025dd5a54
@ -179,7 +179,7 @@ BENCHMARK_MAIN();
|
|||||||
```
|
```
|
||||||
|
|
||||||
To run the benchmark, compile and link against the `benchmark` library
|
To run the benchmark, compile and link against the `benchmark` library
|
||||||
(libbenchmark.a/.so). If you followed the build steps above, this library will
|
(libbenchmark.a/.so). If you followed the build steps above, this library will
|
||||||
be under the build directory you created.
|
be under the build directory you created.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@ -299,8 +299,6 @@ too (`-lkstat`).
|
|||||||
|
|
||||||
[Setting the Time Unit](#setting-the-time-unit)
|
[Setting the Time Unit](#setting-the-time-unit)
|
||||||
|
|
||||||
[Random Interleaving](docs/random_interleaving.md)
|
|
||||||
|
|
||||||
[User-Requested Performance Counters](docs/perf_counters.md)
|
[User-Requested Performance Counters](docs/perf_counters.md)
|
||||||
|
|
||||||
[Preventing Optimization](#preventing-optimization)
|
[Preventing Optimization](#preventing-optimization)
|
||||||
@ -401,8 +399,8 @@ Write benchmark results to a file with the `--benchmark_out=<filename>` option
|
|||||||
(or set `BENCHMARK_OUT`). Specify the output format with
|
(or set `BENCHMARK_OUT`). Specify the output format with
|
||||||
`--benchmark_out_format={json|console|csv}` (or set
|
`--benchmark_out_format={json|console|csv}` (or set
|
||||||
`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is
|
`BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is
|
||||||
deprecated and the saved `.csv` file
|
deprecated and the saved `.csv` file
|
||||||
[is not parsable](https://github.com/google/benchmark/issues/794) by csv
|
[is not parsable](https://github.com/google/benchmark/issues/794) by csv
|
||||||
parsers.
|
parsers.
|
||||||
|
|
||||||
Specifying `--benchmark_out` does not suppress the console output.
|
Specifying `--benchmark_out` does not suppress the console output.
|
||||||
|
@ -1,26 +0,0 @@
|
|||||||
<a name="interleaving" />
|
|
||||||
|
|
||||||
# Random Interleaving
|
|
||||||
|
|
||||||
[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
|
|
||||||
technique to lower run-to-run variance. It breaks the execution of a
|
|
||||||
microbenchmark into multiple chunks and randomly interleaves them with chunks
|
|
||||||
from other microbenchmarks in the same benchmark test. Data shows it is able to
|
|
||||||
lower run-to-run variance by
|
|
||||||
[40%](https://github.com/google/benchmark/issues/1051) on average.
|
|
||||||
|
|
||||||
To use, set `--benchmark_enable_random_interleaving=true`.
|
|
||||||
|
|
||||||
It's a known issue that random interleaving may increase the benchmark execution
|
|
||||||
time, if:
|
|
||||||
|
|
||||||
1. A benchmark has costly setup and / or teardown. Random interleaving will run
|
|
||||||
setup and teardown many times and may increase test execution time
|
|
||||||
significantly.
|
|
||||||
2. The time to run a single benchmark iteration is larger than the desired time
|
|
||||||
per repetition (i.e., `benchmark_min_time / benchmark_repetitions`).
|
|
||||||
|
|
||||||
The overhead of random interleaving can be controlled by
|
|
||||||
`--benchmark_random_interleaving_max_overhead`. The default value is 0.4 meaning
|
|
||||||
the total execution time under random interlaving is limited by 1.4 x original
|
|
||||||
total execution time. Set it to `inf` for unlimited overhead.
|
|
206
src/benchmark.cc
206
src/benchmark.cc
@ -33,10 +33,8 @@
|
|||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <limits>
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <random>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -56,18 +54,6 @@
|
|||||||
#include "thread_manager.h"
|
#include "thread_manager.h"
|
||||||
#include "thread_timer.h"
|
#include "thread_timer.h"
|
||||||
|
|
||||||
// Each benchmark can be repeated a number of times, and within each
|
|
||||||
// *repetition*, we run the user-defined benchmark function a number of
|
|
||||||
// *iterations*. The number of repetitions is determined based on flags
|
|
||||||
// (--benchmark_repetitions).
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Attempt to make each repetition run for at least this much of time.
|
|
||||||
constexpr double kDefaultMinTimeTotalSecs = 0.5;
|
|
||||||
constexpr int kRandomInterleavingDefaultRepetitions = 12;
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
// Print a list of benchmarks. This option overrides all other options.
|
// Print a list of benchmarks. This option overrides all other options.
|
||||||
DEFINE_bool(benchmark_list_tests, false);
|
DEFINE_bool(benchmark_list_tests, false);
|
||||||
|
|
||||||
@ -76,39 +62,16 @@ DEFINE_bool(benchmark_list_tests, false);
|
|||||||
// linked into the binary are run.
|
// linked into the binary are run.
|
||||||
DEFINE_string(benchmark_filter, ".");
|
DEFINE_string(benchmark_filter, ".");
|
||||||
|
|
||||||
// Do NOT read these flags directly. Use Get*() to read them.
|
// Minimum number of seconds we should run benchmark before results are
|
||||||
namespace do_not_read_flag_directly {
|
// considered significant. For cpu-time based tests, this is the lower bound
|
||||||
|
// on the total cpu time used by all threads that make up the test. For
|
||||||
// Minimum number of seconds we should run benchmark per repetition before
|
// real-time based tests, this is the lower bound on the elapsed time of the
|
||||||
// results are considered significant. For cpu-time based tests, this is the
|
// benchmark execution, regardless of number of threads.
|
||||||
// lower bound on the total cpu time used by all threads that make up the test.
|
DEFINE_double(benchmark_min_time, 0.5);
|
||||||
// For real-time based tests, this is the lower bound on the elapsed time of the
|
|
||||||
// benchmark execution, regardless of number of threads. If left unset, will use
|
|
||||||
// kDefaultMinTimeTotalSecs / FLAGS_benchmark_repetitions, if random
|
|
||||||
// interleaving is enabled. Otherwise, will use kDefaultMinTimeTotalSecs.
|
|
||||||
// Do NOT read this flag directly. Use GetMinTime() to read this flag.
|
|
||||||
DEFINE_double(benchmark_min_time, -1.0);
|
|
||||||
|
|
||||||
// The number of runs of each benchmark. If greater than 1, the mean and
|
// The number of runs of each benchmark. If greater than 1, the mean and
|
||||||
// standard deviation of the runs will be reported. By default, the number of
|
// standard deviation of the runs will be reported.
|
||||||
// repetitions is 1 if random interleaving is disabled, and up to
|
DEFINE_int32(benchmark_repetitions, 1);
|
||||||
// kDefaultRepetitions if random interleaving is enabled. (Read the
|
|
||||||
// documentation for random interleaving to see why it might be less than
|
|
||||||
// kDefaultRepetitions.)
|
|
||||||
// Do NOT read this flag directly, Use GetRepetitions() to access this flag.
|
|
||||||
DEFINE_int32(benchmark_repetitions, -1);
|
|
||||||
|
|
||||||
} // namespace do_not_read_flag_directly
|
|
||||||
|
|
||||||
// The maximum overhead allowed for random interleaving. A value X means total
|
|
||||||
// execution time under random interleaving is limited by
|
|
||||||
// (1 + X) * original total execution time. Set to 'inf' to allow infinite
|
|
||||||
// overhead.
|
|
||||||
DEFINE_double(benchmark_random_interleaving_max_overhead, 0.4);
|
|
||||||
|
|
||||||
// If set, enable random interleaving. See
|
|
||||||
// http://github.com/google/benchmark/issues/1051 for details.
|
|
||||||
DEFINE_bool(benchmark_enable_random_interleaving, false);
|
|
||||||
|
|
||||||
// Report the result of each benchmark repetitions. When 'true' is specified
|
// Report the result of each benchmark repetitions. When 'true' is specified
|
||||||
// only the mean, standard deviation, and other statistics are reported for
|
// only the mean, standard deviation, and other statistics are reported for
|
||||||
@ -159,30 +122,6 @@ DEFINE_kvpairs(benchmark_context, {});
|
|||||||
|
|
||||||
std::map<std::string, std::string>* global_context = nullptr;
|
std::map<std::string, std::string>* global_context = nullptr;
|
||||||
|
|
||||||
// Performance measurements always come with random variances. Defines a
|
|
||||||
// factor by which the required number of iterations is overestimated in order
|
|
||||||
// to reduce the probability that the minimum time requirement will not be met.
|
|
||||||
const double kSafetyMultiplier = 1.4;
|
|
||||||
|
|
||||||
// Wraps --benchmark_min_time and returns valid default values if not supplied.
|
|
||||||
double GetMinTime() {
|
|
||||||
const double default_min_time = kDefaultMinTimeTotalSecs / GetRepetitions();
|
|
||||||
const double flag_min_time =
|
|
||||||
do_not_read_flag_directly::FLAGS_benchmark_min_time;
|
|
||||||
return flag_min_time >= 0.0 ? flag_min_time : default_min_time;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wraps --benchmark_repetitions and return valid default value if not supplied.
|
|
||||||
int GetRepetitions() {
|
|
||||||
const int default_repetitions =
|
|
||||||
FLAGS_benchmark_enable_random_interleaving
|
|
||||||
? kRandomInterleavingDefaultRepetitions
|
|
||||||
: 1;
|
|
||||||
const int flag_repetitions =
|
|
||||||
do_not_read_flag_directly::FLAGS_benchmark_repetitions;
|
|
||||||
return flag_repetitions >= 0 ? flag_repetitions : default_repetitions;
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: wouldn't LTO mess this up?
|
// FIXME: wouldn't LTO mess this up?
|
||||||
void UseCharPointer(char const volatile*) {}
|
void UseCharPointer(char const volatile*) {}
|
||||||
|
|
||||||
@ -302,39 +241,6 @@ void State::FinishKeepRunning() {
|
|||||||
namespace internal {
|
namespace internal {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// Flushes streams after invoking reporter methods that write to them. This
|
|
||||||
// ensures users get timely updates even when streams are not line-buffered.
|
|
||||||
void FlushStreams(BenchmarkReporter* reporter) {
|
|
||||||
if (!reporter) return;
|
|
||||||
std::flush(reporter->GetOutputStream());
|
|
||||||
std::flush(reporter->GetErrorStream());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reports in both display and file reporters.
|
|
||||||
void Report(BenchmarkReporter* display_reporter,
|
|
||||||
BenchmarkReporter* file_reporter, const RunResults& run_results) {
|
|
||||||
auto report_one = [](BenchmarkReporter* reporter,
|
|
||||||
bool aggregates_only,
|
|
||||||
const RunResults& results) {
|
|
||||||
assert(reporter);
|
|
||||||
// If there are no aggregates, do output non-aggregates.
|
|
||||||
aggregates_only &= !results.aggregates_only.empty();
|
|
||||||
if (!aggregates_only)
|
|
||||||
reporter->ReportRuns(results.non_aggregates);
|
|
||||||
if (!results.aggregates_only.empty())
|
|
||||||
reporter->ReportRuns(results.aggregates_only);
|
|
||||||
};
|
|
||||||
|
|
||||||
report_one(display_reporter, run_results.display_report_aggregates_only,
|
|
||||||
run_results);
|
|
||||||
if (file_reporter)
|
|
||||||
report_one(file_reporter, run_results.file_report_aggregates_only,
|
|
||||||
run_results);
|
|
||||||
|
|
||||||
FlushStreams(display_reporter);
|
|
||||||
FlushStreams(file_reporter);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
||||||
BenchmarkReporter* display_reporter,
|
BenchmarkReporter* display_reporter,
|
||||||
BenchmarkReporter* file_reporter) {
|
BenchmarkReporter* file_reporter) {
|
||||||
@ -342,7 +248,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
|||||||
CHECK(display_reporter != nullptr);
|
CHECK(display_reporter != nullptr);
|
||||||
|
|
||||||
// Determine the width of the name field using a minimum width of 10.
|
// Determine the width of the name field using a minimum width of 10.
|
||||||
bool might_have_aggregates = GetRepetitions() > 1;
|
bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
|
||||||
size_t name_field_width = 10;
|
size_t name_field_width = 10;
|
||||||
size_t stat_field_width = 0;
|
size_t stat_field_width = 0;
|
||||||
for (const BenchmarkInstance& benchmark : benchmarks) {
|
for (const BenchmarkInstance& benchmark : benchmarks) {
|
||||||
@ -350,9 +256,8 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
|||||||
std::max<size_t>(name_field_width, benchmark.name().str().size());
|
std::max<size_t>(name_field_width, benchmark.name().str().size());
|
||||||
might_have_aggregates |= benchmark.repetitions() > 1;
|
might_have_aggregates |= benchmark.repetitions() > 1;
|
||||||
|
|
||||||
for (const auto& Stat : benchmark.statistics()) {
|
for (const auto& Stat : benchmark.statistics())
|
||||||
stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
|
stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (might_have_aggregates) name_field_width += 1 + stat_field_width;
|
if (might_have_aggregates) name_field_width += 1 + stat_field_width;
|
||||||
|
|
||||||
@ -363,61 +268,45 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
|||||||
// Keep track of running times of all instances of current benchmark
|
// Keep track of running times of all instances of current benchmark
|
||||||
std::vector<BenchmarkReporter::Run> complexity_reports;
|
std::vector<BenchmarkReporter::Run> complexity_reports;
|
||||||
|
|
||||||
|
// We flush streams after invoking reporter methods that write to them. This
|
||||||
|
// ensures users get timely updates even when streams are not line-buffered.
|
||||||
|
auto flushStreams = [](BenchmarkReporter* reporter) {
|
||||||
|
if (!reporter) return;
|
||||||
|
std::flush(reporter->GetOutputStream());
|
||||||
|
std::flush(reporter->GetErrorStream());
|
||||||
|
};
|
||||||
|
|
||||||
if (display_reporter->ReportContext(context) &&
|
if (display_reporter->ReportContext(context) &&
|
||||||
(!file_reporter || file_reporter->ReportContext(context))) {
|
(!file_reporter || file_reporter->ReportContext(context))) {
|
||||||
FlushStreams(display_reporter);
|
flushStreams(display_reporter);
|
||||||
FlushStreams(file_reporter);
|
flushStreams(file_reporter);
|
||||||
|
|
||||||
// Without random interleaving, benchmarks are executed in the order of:
|
for (const auto& benchmark : benchmarks) {
|
||||||
// A, A, ..., A, B, B, ..., B, C, C, ..., C, ...
|
RunResults run_results = RunBenchmark(benchmark, &complexity_reports);
|
||||||
// That is, repetition is within RunBenchmark(), hence the name
|
|
||||||
// inner_repetitions.
|
|
||||||
// With random interleaving, benchmarks are executed in the order of:
|
|
||||||
// {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ...
|
|
||||||
// That is, repetitions is outside of RunBenchmark(), hence the name
|
|
||||||
// outer_repetitions.
|
|
||||||
int inner_repetitions =
|
|
||||||
FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions();
|
|
||||||
int outer_repetitions =
|
|
||||||
FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1;
|
|
||||||
std::vector<size_t> benchmark_indices(benchmarks.size());
|
|
||||||
for (size_t i = 0; i < benchmarks.size(); ++i) {
|
|
||||||
benchmark_indices[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::random_device rd;
|
auto report = [&run_results](BenchmarkReporter* reporter,
|
||||||
std::mt19937 g(rd());
|
bool report_aggregates_only) {
|
||||||
// 'run_results_vector' and 'benchmarks' are parallel arrays.
|
assert(reporter);
|
||||||
std::vector<RunResults> run_results_vector(benchmarks.size());
|
// If there are no aggregates, do output non-aggregates.
|
||||||
for (int i = 0; i < outer_repetitions; i++) {
|
report_aggregates_only &= !run_results.aggregates_only.empty();
|
||||||
if (FLAGS_benchmark_enable_random_interleaving) {
|
if (!report_aggregates_only)
|
||||||
std::shuffle(benchmark_indices.begin(), benchmark_indices.end(), g);
|
reporter->ReportRuns(run_results.non_aggregates);
|
||||||
}
|
if (!run_results.aggregates_only.empty())
|
||||||
for (size_t j : benchmark_indices) {
|
reporter->ReportRuns(run_results.aggregates_only);
|
||||||
// Repetitions will be automatically adjusted under random interleaving.
|
};
|
||||||
if (!FLAGS_benchmark_enable_random_interleaving ||
|
|
||||||
i < benchmarks[j].RandomInterleavingRepetitions()) {
|
|
||||||
RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions,
|
|
||||||
&complexity_reports, &run_results_vector[j]);
|
|
||||||
if (!FLAGS_benchmark_enable_random_interleaving) {
|
|
||||||
// Print out reports as they come in.
|
|
||||||
Report(display_reporter, file_reporter, run_results_vector.at(j));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FLAGS_benchmark_enable_random_interleaving) {
|
report(display_reporter, run_results.display_report_aggregates_only);
|
||||||
// Print out all reports at the end of the test.
|
if (file_reporter)
|
||||||
for (const RunResults& run_results : run_results_vector) {
|
report(file_reporter, run_results.file_report_aggregates_only);
|
||||||
Report(display_reporter, file_reporter, run_results);
|
|
||||||
}
|
flushStreams(display_reporter);
|
||||||
|
flushStreams(file_reporter);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
display_reporter->Finalize();
|
display_reporter->Finalize();
|
||||||
if (file_reporter) file_reporter->Finalize();
|
if (file_reporter) file_reporter->Finalize();
|
||||||
FlushStreams(display_reporter);
|
flushStreams(display_reporter);
|
||||||
FlushStreams(file_reporter);
|
flushStreams(file_reporter);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disable deprecated warnings temporarily because we need to reference
|
// Disable deprecated warnings temporarily because we need to reference
|
||||||
@ -567,7 +456,6 @@ void PrintUsageAndExit() {
|
|||||||
" [--benchmark_filter=<regex>]\n"
|
" [--benchmark_filter=<regex>]\n"
|
||||||
" [--benchmark_min_time=<min_time>]\n"
|
" [--benchmark_min_time=<min_time>]\n"
|
||||||
" [--benchmark_repetitions=<num_repetitions>]\n"
|
" [--benchmark_repetitions=<num_repetitions>]\n"
|
||||||
" [--benchmark_enable_random_interleaving={true|false}]\n"
|
|
||||||
" [--benchmark_report_aggregates_only={true|false}]\n"
|
" [--benchmark_report_aggregates_only={true|false}]\n"
|
||||||
" [--benchmark_display_aggregates_only={true|false}]\n"
|
" [--benchmark_display_aggregates_only={true|false}]\n"
|
||||||
" [--benchmark_format=<console|json|csv>]\n"
|
" [--benchmark_format=<console|json|csv>]\n"
|
||||||
@ -588,16 +476,10 @@ void ParseCommandLineFlags(int* argc, char** argv) {
|
|||||||
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
|
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
|
||||||
&FLAGS_benchmark_list_tests) ||
|
&FLAGS_benchmark_list_tests) ||
|
||||||
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
|
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
|
||||||
ParseDoubleFlag(
|
ParseDoubleFlag(argv[i], "benchmark_min_time",
|
||||||
argv[i], "benchmark_min_time",
|
&FLAGS_benchmark_min_time) ||
|
||||||
&do_not_read_flag_directly::FLAGS_benchmark_min_time) ||
|
ParseInt32Flag(argv[i], "benchmark_repetitions",
|
||||||
ParseInt32Flag(
|
&FLAGS_benchmark_repetitions) ||
|
||||||
argv[i], "benchmark_repetitions",
|
|
||||||
&do_not_read_flag_directly::FLAGS_benchmark_repetitions) ||
|
|
||||||
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
|
|
||||||
&FLAGS_benchmark_enable_random_interleaving) ||
|
|
||||||
ParseDoubleFlag(argv[i], "benchmark_random_interleaving_max_overhead",
|
|
||||||
&FLAGS_benchmark_random_interleaving_max_overhead) ||
|
|
||||||
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
|
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
|
||||||
&FLAGS_benchmark_report_aggregates_only) ||
|
&FLAGS_benchmark_report_aggregates_only) ||
|
||||||
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
|
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
|
||||||
|
@ -1,125 +0,0 @@
|
|||||||
// Copyright 2015 Google Inc. All rights reserved.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
#include "benchmark_adjust_repetitions.h"
|
|
||||||
|
|
||||||
#include "benchmark_api_internal.h"
|
|
||||||
#include "log.h"
|
|
||||||
|
|
||||||
namespace benchmark {
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
constexpr double kNanosecondInSecond = 1e-9;
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
int ComputeRandomInterleavingRepetitions(
|
|
||||||
InternalRandomInterleavingRepetitionsInput input) {
|
|
||||||
// Find the repetitions such that total overhead is bounded. Let
|
|
||||||
// n = desired number of repetitions, i.e., the output of this method.
|
|
||||||
// t = total real execution time per repetition including overhead,
|
|
||||||
// (input.total_execution_time_per_repetition).
|
|
||||||
// o = maximum allowed increase in total real execution time due to random
|
|
||||||
// interleaving, measured as a fraction (input.max_overhead).
|
|
||||||
// e = estimated total execution time without Random Interleaving
|
|
||||||
// We want
|
|
||||||
// t * n / e <= 1 + o
|
|
||||||
// I.e.,
|
|
||||||
// n <= (1 + o) * e / t
|
|
||||||
//
|
|
||||||
// Let
|
|
||||||
// h = overhead per repetition, which include all setup / teardown time and
|
|
||||||
// also the execution time of preliminary trials used to search for the
|
|
||||||
// correct number of iterations.
|
|
||||||
// r = real execution time per repetition not including overhead
|
|
||||||
// (input.real_accumulated_time_per_repetition).
|
|
||||||
// s = measured execution time per repetition not including overhead,
|
|
||||||
// which can be either real or CPU time
|
|
||||||
// (input.accumulated_time_per_repetition).
|
|
||||||
// We have
|
|
||||||
// h = t - r
|
|
||||||
//
|
|
||||||
// Let
|
|
||||||
// m = total minimum measured execution time for all repetitions
|
|
||||||
// (input.min_time_per_repetition * input.max_repetitions).
|
|
||||||
// Let
|
|
||||||
// f = m / s
|
|
||||||
// f is the scale factor between m and s, and will be used to estimate
|
|
||||||
// l, the total real execution time for all repetitions excluding the
|
|
||||||
// overhead. It's reasonable to assume that the real execution time excluding
|
|
||||||
// the overhead is proportional to the measured time. Hence we expect to see
|
|
||||||
// l / r to be equal to m / s. That is, l / r = f, thus, l = r * f. Then the
|
|
||||||
// total execution time e can be estimated by h + l, which is h + r * f.
|
|
||||||
// e = h + r * f
|
|
||||||
// Note that this might be an underestimation. If number of repetitions is
|
|
||||||
// reduced, we may need to run more iterations per repetition, and that may
|
|
||||||
// increase the number of preliminary trials needed to find the correct
|
|
||||||
// number of iterations.
|
|
||||||
|
|
||||||
double h = std::max(0.0, input.total_execution_time_per_repetition -
|
|
||||||
input.real_time_used_per_repetition);
|
|
||||||
double r =
|
|
||||||
std::max(input.real_time_used_per_repetition, kNanosecondInSecond);
|
|
||||||
double s =
|
|
||||||
std::max(input.time_used_per_repetition, kNanosecondInSecond);
|
|
||||||
double m = input.min_time_per_repetition * input.max_repetitions;
|
|
||||||
|
|
||||||
// f = m / s
|
|
||||||
// RunBenchmark() always overshoot the iteration count by kSafetyMultiplier.
|
|
||||||
// Apply the same factor here.
|
|
||||||
// f = kSafetyMultiplier * m / s
|
|
||||||
// Also we want to make sure 1 <= f <= input.max_repetitions. Note that we
|
|
||||||
// may not be able to reach m because the total iters per repetition is
|
|
||||||
// upper bounded by --benchmark_max_iters. This behavior is preserved in
|
|
||||||
// Random Interleaving, as we won't run repetitions more than
|
|
||||||
// input.max_repetitions to reach m.
|
|
||||||
|
|
||||||
double f = kSafetyMultiplier * m / s;
|
|
||||||
f = std::min(std::max(f, 1.0), static_cast<double>(input.max_repetitions));
|
|
||||||
|
|
||||||
double e = h + r * f;
|
|
||||||
// n <= (1 + o) * e / t = (1 + o) * e / (h + r)
|
|
||||||
// Also we want to make sure 1 <= n <= input.max_repetition, and (h + r) > 0.
|
|
||||||
double n = (1 + input.max_overhead) * e / (h + r);
|
|
||||||
n = std::min(std::max(n, 1.0), static_cast<double>(input.max_repetitions));
|
|
||||||
|
|
||||||
int n_int = static_cast<int>(n);
|
|
||||||
|
|
||||||
VLOG(2) << "Computed random interleaving repetitions"
|
|
||||||
<< "\n input.total_execution_time_per_repetition: "
|
|
||||||
<< input.total_execution_time_per_repetition
|
|
||||||
<< "\n input.time_used_per_repetition: "
|
|
||||||
<< input.time_used_per_repetition
|
|
||||||
<< "\n input.real_time_used_per_repetition: "
|
|
||||||
<< input.real_time_used_per_repetition
|
|
||||||
<< "\n input.min_time_per_repetitions: "
|
|
||||||
<< input.min_time_per_repetition
|
|
||||||
<< "\n input.max_repetitions: " << input.max_repetitions
|
|
||||||
<< "\n input.max_overhead: " << input.max_overhead
|
|
||||||
<< "\n h: " << h
|
|
||||||
<< "\n r: " << r
|
|
||||||
<< "\n s: " << s
|
|
||||||
<< "\n f: " << f
|
|
||||||
<< "\n m: " << m
|
|
||||||
<< "\n e: " << e
|
|
||||||
<< "\n n: " << n
|
|
||||||
<< "\n n_int: " << n_int;
|
|
||||||
|
|
||||||
return n_int;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // internal
|
|
||||||
} // benchmark
|
|
@ -1,42 +0,0 @@
|
|||||||
// Copyright 2015 Google Inc. All rights reserved.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
#ifndef BENCHMARK_ADJUST_REPETITIONS_H
|
|
||||||
#define BENCHMARK_ADJUST_REPETITIONS_H
|
|
||||||
|
|
||||||
#include "benchmark/benchmark.h"
|
|
||||||
#include "commandlineflags.h"
|
|
||||||
|
|
||||||
namespace benchmark {
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
// Defines the input tuple to ComputeRandomInterleavingRepetitions().
|
|
||||||
struct InternalRandomInterleavingRepetitionsInput {
|
|
||||||
double total_execution_time_per_repetition;
|
|
||||||
double time_used_per_repetition;
|
|
||||||
double real_time_used_per_repetition;
|
|
||||||
double min_time_per_repetition;
|
|
||||||
double max_overhead;
|
|
||||||
int max_repetitions;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Should be called right after the first repetition is completed to estimate
|
|
||||||
// the number of iterations.
|
|
||||||
int ComputeRandomInterleavingRepetitions(
|
|
||||||
InternalRandomInterleavingRepetitionsInput input);
|
|
||||||
|
|
||||||
} // end namespace internal
|
|
||||||
} // end namespace benchmark
|
|
||||||
|
|
||||||
#endif // BENCHMARK_ADJUST_REPETITIONS_H
|
|
@ -2,11 +2,8 @@
|
|||||||
|
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
|
|
||||||
#include "check.h"
|
|
||||||
#include "string_util.h"
|
#include "string_util.h"
|
||||||
|
|
||||||
DECLARE_bool(benchmark_enable_random_interleaving);
|
|
||||||
|
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
@ -24,12 +21,9 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
|
|||||||
complexity_lambda_(benchmark_.complexity_lambda_),
|
complexity_lambda_(benchmark_.complexity_lambda_),
|
||||||
statistics_(benchmark_.statistics_),
|
statistics_(benchmark_.statistics_),
|
||||||
repetitions_(benchmark_.repetitions_),
|
repetitions_(benchmark_.repetitions_),
|
||||||
min_time_(!IsZero(benchmark_.min_time_) ? benchmark_.min_time_
|
min_time_(benchmark_.min_time_),
|
||||||
: GetMinTime()),
|
|
||||||
iterations_(benchmark_.iterations_),
|
iterations_(benchmark_.iterations_),
|
||||||
threads_(thread_count) {
|
threads_(thread_count) {
|
||||||
CHECK(!IsZero(min_time_)) << "min_time must be non-zero.";
|
|
||||||
|
|
||||||
name_.function_name = benchmark_.name_;
|
name_.function_name = benchmark_.name_;
|
||||||
|
|
||||||
size_t arg_i = 0;
|
size_t arg_i = 0;
|
||||||
@ -83,32 +77,6 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double BenchmarkInstance::MinTime() const {
|
|
||||||
if (FLAGS_benchmark_enable_random_interleaving) {
|
|
||||||
// Random Interleaving will automatically adjust
|
|
||||||
// random_interleaving_repetitions(). Dividing
|
|
||||||
// total execution time by random_interleaving_repetitions() gives
|
|
||||||
// the adjusted min_time per repetition.
|
|
||||||
return min_time_ * GetRepetitions() / RandomInterleavingRepetitions();
|
|
||||||
}
|
|
||||||
return min_time_;
|
|
||||||
}
|
|
||||||
|
|
||||||
int BenchmarkInstance::RandomInterleavingRepetitions() const {
|
|
||||||
return random_interleaving_repetitions_ < 0
|
|
||||||
? GetRepetitions()
|
|
||||||
: random_interleaving_repetitions_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool BenchmarkInstance::RandomInterleavingRepetitionsInitialized() const {
|
|
||||||
return random_interleaving_repetitions_ >= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void BenchmarkInstance::InitRandomInterleavingRepetitions(
|
|
||||||
int reps) const {
|
|
||||||
random_interleaving_repetitions_ = reps;
|
|
||||||
}
|
|
||||||
|
|
||||||
State BenchmarkInstance::Run(
|
State BenchmarkInstance::Run(
|
||||||
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager,
|
internal::ThreadManager* manager,
|
||||||
|
@ -1,10 +1,6 @@
|
|||||||
#ifndef BENCHMARK_API_INTERNAL_H
|
#ifndef BENCHMARK_API_INTERNAL_H
|
||||||
#define BENCHMARK_API_INTERNAL_H
|
#define BENCHMARK_API_INTERNAL_H
|
||||||
|
|
||||||
#include "benchmark/benchmark.h"
|
|
||||||
#include "commandlineflags.h"
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <iosfwd>
|
#include <iosfwd>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
@ -18,25 +14,12 @@
|
|||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
extern const double kSafetyMultiplier;
|
|
||||||
|
|
||||||
// Information kept per benchmark we may want to run
|
// Information kept per benchmark we may want to run
|
||||||
class BenchmarkInstance {
|
class BenchmarkInstance {
|
||||||
public:
|
public:
|
||||||
BenchmarkInstance(Benchmark* benchmark, const std::vector<int64_t>& args,
|
BenchmarkInstance(Benchmark* benchmark, const std::vector<int64_t>& args,
|
||||||
int threads);
|
int threads);
|
||||||
|
|
||||||
// Returns number of repetitions for Random Interleaving. This will be
|
|
||||||
// initialized later once we finish the first repetition, if Random
|
|
||||||
// Interleaving is enabled. See also ComputeRandominterleavingrepetitions().
|
|
||||||
int RandomInterleavingRepetitions() const;
|
|
||||||
|
|
||||||
// Returns true if repetitions for Random Interleaving is initialized.
|
|
||||||
bool RandomInterleavingRepetitionsInitialized() const;
|
|
||||||
|
|
||||||
// Initializes number of repetitions for random interleaving.
|
|
||||||
void InitRandomInterleavingRepetitions(int reps) const;
|
|
||||||
|
|
||||||
const BenchmarkName& name() const { return name_; }
|
const BenchmarkName& name() const { return name_; }
|
||||||
AggregationReportMode aggregation_report_mode() const {
|
AggregationReportMode aggregation_report_mode() const {
|
||||||
return aggregation_report_mode_;
|
return aggregation_report_mode_;
|
||||||
@ -49,7 +32,7 @@ class BenchmarkInstance {
|
|||||||
BigOFunc& complexity_lambda() const { return *complexity_lambda_; }
|
BigOFunc& complexity_lambda() const { return *complexity_lambda_; }
|
||||||
const std::vector<Statistics>& statistics() const { return statistics_; }
|
const std::vector<Statistics>& statistics() const { return statistics_; }
|
||||||
int repetitions() const { return repetitions_; }
|
int repetitions() const { return repetitions_; }
|
||||||
double MinTime() const;
|
double min_time() const { return min_time_; }
|
||||||
IterationCount iterations() const { return iterations_; }
|
IterationCount iterations() const { return iterations_; }
|
||||||
int threads() const { return threads_; }
|
int threads() const { return threads_; }
|
||||||
|
|
||||||
@ -70,13 +53,12 @@ class BenchmarkInstance {
|
|||||||
bool use_manual_time_;
|
bool use_manual_time_;
|
||||||
BigO complexity_;
|
BigO complexity_;
|
||||||
BigOFunc* complexity_lambda_;
|
BigOFunc* complexity_lambda_;
|
||||||
std::vector<Statistics> statistics_;
|
UserCounters counters_;
|
||||||
|
const std::vector<Statistics>& statistics_;
|
||||||
int repetitions_;
|
int repetitions_;
|
||||||
double min_time_;
|
double min_time_;
|
||||||
IterationCount iterations_;
|
IterationCount iterations_;
|
||||||
int threads_;
|
int threads_; // Number of concurrent threads to us
|
||||||
UserCounters counters_;
|
|
||||||
mutable int random_interleaving_repetitions_ = -1;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
bool FindBenchmarksInternal(const std::string& re,
|
bool FindBenchmarksInternal(const std::string& re,
|
||||||
@ -87,10 +69,6 @@ bool IsZero(double n);
|
|||||||
|
|
||||||
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
|
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
|
||||||
|
|
||||||
double GetMinTime();
|
|
||||||
|
|
||||||
int GetRepetitions();
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
} // end namespace benchmark
|
} // end namespace benchmark
|
||||||
|
|
||||||
|
@ -15,7 +15,6 @@
|
|||||||
#include "benchmark_runner.h"
|
#include "benchmark_runner.h"
|
||||||
#include "benchmark/benchmark.h"
|
#include "benchmark/benchmark.h"
|
||||||
#include "benchmark_api_internal.h"
|
#include "benchmark_api_internal.h"
|
||||||
#include "benchmark_adjust_repetitions.h"
|
|
||||||
#include "internal_macros.h"
|
#include "internal_macros.h"
|
||||||
|
|
||||||
#ifndef BENCHMARK_OS_WINDOWS
|
#ifndef BENCHMARK_OS_WINDOWS
|
||||||
@ -53,9 +52,6 @@
|
|||||||
#include "thread_manager.h"
|
#include "thread_manager.h"
|
||||||
#include "thread_timer.h"
|
#include "thread_timer.h"
|
||||||
|
|
||||||
DECLARE_bool(benchmark_enable_random_interleaving);
|
|
||||||
DECLARE_double(benchmark_random_interleaving_max_overhead);
|
|
||||||
|
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@ -71,7 +67,7 @@ BenchmarkReporter::Run CreateRunReport(
|
|||||||
const internal::ThreadManager::Result& results,
|
const internal::ThreadManager::Result& results,
|
||||||
IterationCount memory_iterations,
|
IterationCount memory_iterations,
|
||||||
const MemoryManager::Result& memory_result, double seconds,
|
const MemoryManager::Result& memory_result, double seconds,
|
||||||
int repetition_index) {
|
int64_t repetition_index) {
|
||||||
// Create report about this benchmark run.
|
// Create report about this benchmark run.
|
||||||
BenchmarkReporter::Run report;
|
BenchmarkReporter::Run report;
|
||||||
|
|
||||||
@ -142,16 +138,12 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
|
|||||||
class BenchmarkRunner {
|
class BenchmarkRunner {
|
||||||
public:
|
public:
|
||||||
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
|
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
|
||||||
int outer_repetitions_,
|
std::vector<BenchmarkReporter::Run>* complexity_reports_)
|
||||||
int inner_repetitions_,
|
|
||||||
std::vector<BenchmarkReporter::Run>* complexity_reports_,
|
|
||||||
RunResults* run_results_)
|
|
||||||
: b(b_),
|
: b(b_),
|
||||||
complexity_reports(*complexity_reports_),
|
complexity_reports(*complexity_reports_),
|
||||||
run_results(*run_results_),
|
min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
|
||||||
outer_repetitions(outer_repetitions_),
|
repeats(b.repetitions() != 0 ? b.repetitions()
|
||||||
inner_repetitions(inner_repetitions_),
|
: FLAGS_benchmark_repetitions),
|
||||||
repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions),
|
|
||||||
has_explicit_iteration_count(b.iterations() != 0),
|
has_explicit_iteration_count(b.iterations() != 0),
|
||||||
pool(b.threads() - 1),
|
pool(b.threads() - 1),
|
||||||
iters(has_explicit_iteration_count ? b.iterations() : 1),
|
iters(has_explicit_iteration_count ? b.iterations() : 1),
|
||||||
@ -171,7 +163,6 @@ class BenchmarkRunner {
|
|||||||
internal::ARM_DisplayReportAggregatesOnly);
|
internal::ARM_DisplayReportAggregatesOnly);
|
||||||
run_results.file_report_aggregates_only =
|
run_results.file_report_aggregates_only =
|
||||||
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
|
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
|
||||||
|
|
||||||
CHECK(FLAGS_benchmark_perf_counters.empty() ||
|
CHECK(FLAGS_benchmark_perf_counters.empty() ||
|
||||||
perf_counters_measurement.IsValid())
|
perf_counters_measurement.IsValid())
|
||||||
<< "Perf counters were requested but could not be set up.";
|
<< "Perf counters were requested but could not be set up.";
|
||||||
@ -188,20 +179,21 @@ class BenchmarkRunner {
|
|||||||
if ((b.complexity() != oNone) && b.last_benchmark_instance) {
|
if ((b.complexity() != oNone) && b.last_benchmark_instance) {
|
||||||
auto additional_run_stats = ComputeBigO(complexity_reports);
|
auto additional_run_stats = ComputeBigO(complexity_reports);
|
||||||
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
|
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
|
||||||
additional_run_stats.begin(),
|
additional_run_stats.begin(),
|
||||||
additional_run_stats.end());
|
additional_run_stats.end());
|
||||||
complexity_reports.clear();
|
complexity_reports.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RunResults&& get_results() { return std::move(run_results); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
RunResults run_results;
|
||||||
|
|
||||||
const benchmark::internal::BenchmarkInstance& b;
|
const benchmark::internal::BenchmarkInstance& b;
|
||||||
std::vector<BenchmarkReporter::Run>& complexity_reports;
|
std::vector<BenchmarkReporter::Run>& complexity_reports;
|
||||||
|
|
||||||
RunResults& run_results;
|
const double min_time;
|
||||||
|
|
||||||
const int outer_repetitions;
|
|
||||||
const int inner_repetitions;
|
|
||||||
const int repeats;
|
const int repeats;
|
||||||
const bool has_explicit_iteration_count;
|
const bool has_explicit_iteration_count;
|
||||||
|
|
||||||
@ -276,14 +268,13 @@ class BenchmarkRunner {
|
|||||||
IterationCount PredictNumItersNeeded(const IterationResults& i) const {
|
IterationCount PredictNumItersNeeded(const IterationResults& i) const {
|
||||||
// See how much iterations should be increased by.
|
// See how much iterations should be increased by.
|
||||||
// Note: Avoid division by zero with max(seconds, 1ns).
|
// Note: Avoid division by zero with max(seconds, 1ns).
|
||||||
double multiplier =
|
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
|
||||||
b.MinTime() * kSafetyMultiplier / std::max(i.seconds, 1e-9);
|
|
||||||
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
|
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
|
||||||
// use the multiplier directly.
|
// use the multiplier directly.
|
||||||
// Otherwise we use at most 10 times expansion.
|
// Otherwise we use at most 10 times expansion.
|
||||||
// NOTE: When the last run was at least 10% of the min time the max
|
// NOTE: When the last run was at least 10% of the min time the max
|
||||||
// expansion should be 14x.
|
// expansion should be 14x.
|
||||||
bool is_significant = (i.seconds / b.MinTime()) > 0.1;
|
bool is_significant = (i.seconds / min_time) > 0.1;
|
||||||
multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
|
multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
|
||||||
if (multiplier <= 1.0) multiplier = 2.0;
|
if (multiplier <= 1.0) multiplier = 2.0;
|
||||||
|
|
||||||
@ -304,17 +295,14 @@ class BenchmarkRunner {
|
|||||||
// or because an error was reported.
|
// or because an error was reported.
|
||||||
return i.results.has_error_ ||
|
return i.results.has_error_ ||
|
||||||
i.iters >= kMaxIterations || // Too many iterations already.
|
i.iters >= kMaxIterations || // Too many iterations already.
|
||||||
i.seconds >= b.MinTime() || // The elapsed time is large enough.
|
i.seconds >= min_time || // The elapsed time is large enough.
|
||||||
// CPU time is specified but the
|
// CPU time is specified but the elapsed real time greatly exceeds
|
||||||
// elapsed real time greatly exceeds
|
// the minimum time.
|
||||||
// the minimum time. Note that user
|
// Note that user provided timers are except from this sanity check.
|
||||||
// provided timers are except from this
|
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
|
||||||
// sanity check.
|
|
||||||
((i.results.real_time_used >= 5 * b.MinTime()) &&
|
|
||||||
!b.use_manual_time());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DoOneRepetition(int repetition_index) {
|
void DoOneRepetition(int64_t repetition_index) {
|
||||||
const bool is_the_first_repetition = repetition_index == 0;
|
const bool is_the_first_repetition = repetition_index == 0;
|
||||||
IterationResults i;
|
IterationResults i;
|
||||||
|
|
||||||
@ -324,10 +312,8 @@ class BenchmarkRunner {
|
|||||||
// Please do note that the if there are repetitions, the iteration count
|
// Please do note that the if there are repetitions, the iteration count
|
||||||
// is *only* calculated for the *first* repetition, and other repetitions
|
// is *only* calculated for the *first* repetition, and other repetitions
|
||||||
// simply use that precomputed iteration count.
|
// simply use that precomputed iteration count.
|
||||||
const auto exec_start = benchmark::ChronoClockNow();
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
i = DoNIterations();
|
i = DoNIterations();
|
||||||
const auto exec_end = benchmark::ChronoClockNow();
|
|
||||||
|
|
||||||
// Do we consider the results to be significant?
|
// Do we consider the results to be significant?
|
||||||
// If we are doing repetitions, and the first repetition was already done,
|
// If we are doing repetitions, and the first repetition was already done,
|
||||||
@ -338,38 +324,7 @@ class BenchmarkRunner {
|
|||||||
has_explicit_iteration_count ||
|
has_explicit_iteration_count ||
|
||||||
ShouldReportIterationResults(i);
|
ShouldReportIterationResults(i);
|
||||||
|
|
||||||
if (results_are_significant) {
|
if (results_are_significant) break; // Good, let's report them!
|
||||||
// The number of repetitions for random interleaving may be reduced
|
|
||||||
// to limit the increase in benchmark execution time. When this happens
|
|
||||||
// the target execution time for each repetition is increased. We may
|
|
||||||
// need to rerun trials to calculate iters according to the increased
|
|
||||||
// target execution time.
|
|
||||||
bool rerun_trial = false;
|
|
||||||
// If random interleaving is enabled and the repetitions is not
|
|
||||||
// initialized, do it now.
|
|
||||||
if (FLAGS_benchmark_enable_random_interleaving &&
|
|
||||||
!b.RandomInterleavingRepetitionsInitialized()) {
|
|
||||||
InternalRandomInterleavingRepetitionsInput input;
|
|
||||||
input.total_execution_time_per_repetition = exec_end - exec_start;
|
|
||||||
input.time_used_per_repetition = i.seconds;
|
|
||||||
input.real_time_used_per_repetition = i.results.real_time_used;
|
|
||||||
input.min_time_per_repetition = GetMinTime();
|
|
||||||
input.max_overhead = FLAGS_benchmark_random_interleaving_max_overhead;
|
|
||||||
input.max_repetitions = GetRepetitions();
|
|
||||||
b.InitRandomInterleavingRepetitions(
|
|
||||||
ComputeRandomInterleavingRepetitions(input));
|
|
||||||
// If the number of repetitions changed, need to rerun the last trial
|
|
||||||
// because iters may also change. Note that we only need to do this
|
|
||||||
// if accumulated_time < b.MinTime(), i.e., the iterations we have
|
|
||||||
// run is not enough for the already adjusted b.MinTime().
|
|
||||||
// Otherwise, we will still skip the rerun.
|
|
||||||
rerun_trial =
|
|
||||||
b.RandomInterleavingRepetitions() < GetRepetitions() &&
|
|
||||||
i.seconds < b.MinTime() && !has_explicit_iteration_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!rerun_trial) break; // Good, let's report them!
|
|
||||||
}
|
|
||||||
|
|
||||||
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
|
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
|
||||||
// iteration count, and run the benchmark again...
|
// iteration count, and run the benchmark again...
|
||||||
@ -386,8 +341,7 @@ class BenchmarkRunner {
|
|||||||
if (memory_manager != nullptr) {
|
if (memory_manager != nullptr) {
|
||||||
// Only run a few iterations to reduce the impact of one-time
|
// Only run a few iterations to reduce the impact of one-time
|
||||||
// allocations in benchmarks that are not properly managed.
|
// allocations in benchmarks that are not properly managed.
|
||||||
memory_iterations = std::min<IterationCount>(
|
memory_iterations = std::min<IterationCount>(16, iters);
|
||||||
16 / outer_repetitions + (16 % outer_repetitions != 0), iters);
|
|
||||||
memory_manager->Start();
|
memory_manager->Start();
|
||||||
std::unique_ptr<internal::ThreadManager> manager;
|
std::unique_ptr<internal::ThreadManager> manager;
|
||||||
manager.reset(new internal::ThreadManager(1));
|
manager.reset(new internal::ThreadManager(1));
|
||||||
@ -413,12 +367,11 @@ class BenchmarkRunner {
|
|||||||
|
|
||||||
} // end namespace
|
} // end namespace
|
||||||
|
|
||||||
void RunBenchmark(const benchmark::internal::BenchmarkInstance& b,
|
RunResults RunBenchmark(
|
||||||
const int outer_repetitions, const int inner_repetitions,
|
const benchmark::internal::BenchmarkInstance& b,
|
||||||
std::vector<BenchmarkReporter::Run>* complexity_reports,
|
std::vector<BenchmarkReporter::Run>* complexity_reports) {
|
||||||
RunResults* run_results) {
|
internal::BenchmarkRunner r(b, complexity_reports);
|
||||||
internal::BenchmarkRunner r(b, outer_repetitions, inner_repetitions,
|
return r.get_results();
|
||||||
complexity_reports, run_results);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
@ -42,10 +42,9 @@ struct RunResults {
|
|||||||
bool file_report_aggregates_only = false;
|
bool file_report_aggregates_only = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
void RunBenchmark(const benchmark::internal::BenchmarkInstance& b,
|
RunResults RunBenchmark(
|
||||||
int outer_repetitions, int inner_repetitions,
|
const benchmark::internal::BenchmarkInstance& b,
|
||||||
std::vector<BenchmarkReporter::Run>* complexity_reports,
|
std::vector<BenchmarkReporter::Run>* complexity_reports);
|
||||||
RunResults* run_results);
|
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
|
||||||
|
@ -196,7 +196,6 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
|
|||||||
|
|
||||||
add_gtest(benchmark_gtest)
|
add_gtest(benchmark_gtest)
|
||||||
add_gtest(benchmark_name_gtest)
|
add_gtest(benchmark_name_gtest)
|
||||||
add_gtest(benchmark_random_interleaving_gtest)
|
|
||||||
add_gtest(commandlineflags_gtest)
|
add_gtest(commandlineflags_gtest)
|
||||||
add_gtest(statistics_gtest)
|
add_gtest(statistics_gtest)
|
||||||
add_gtest(string_util_gtest)
|
add_gtest(string_util_gtest)
|
||||||
|
@ -1,271 +0,0 @@
|
|||||||
#include <queue>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "../src/benchmark_adjust_repetitions.h"
|
|
||||||
#include "../src/string_util.h"
|
|
||||||
#include "benchmark/benchmark.h"
|
|
||||||
#include "gmock/gmock.h"
|
|
||||||
#include "gtest/gtest.h"
|
|
||||||
|
|
||||||
DECLARE_bool(benchmark_enable_random_interleaving);
|
|
||||||
DECLARE_string(benchmark_filter);
|
|
||||||
DECLARE_double(benchmark_random_interleaving_max_overhead);
|
|
||||||
|
|
||||||
namespace do_not_read_flag_directly {
|
|
||||||
DECLARE_int32(benchmark_repetitions);
|
|
||||||
} // namespace do_not_read_flag_directly
|
|
||||||
|
|
||||||
namespace benchmark {
|
|
||||||
namespace internal {
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class EventQueue : public std::queue<std::string> {
|
|
||||||
public:
|
|
||||||
void Put(const std::string& event) {
|
|
||||||
push(event);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Clear() {
|
|
||||||
while (!empty()) {
|
|
||||||
pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string Get() {
|
|
||||||
std::string event = front();
|
|
||||||
pop();
|
|
||||||
return event;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static EventQueue* queue = new EventQueue;
|
|
||||||
|
|
||||||
class NullReporter : public BenchmarkReporter {
|
|
||||||
public:
|
|
||||||
bool ReportContext(const Context& /*context*/) override {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
void ReportRuns(const std::vector<Run>& /* report */) override {}
|
|
||||||
};
|
|
||||||
|
|
||||||
class BenchmarkTest : public testing::Test {
|
|
||||||
public:
|
|
||||||
static void SetupHook(int /* num_threads */) { queue->push("Setup"); }
|
|
||||||
|
|
||||||
static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); }
|
|
||||||
|
|
||||||
void Execute(const std::string& pattern) {
|
|
||||||
queue->Clear();
|
|
||||||
|
|
||||||
BenchmarkReporter* reporter = new NullReporter;
|
|
||||||
FLAGS_benchmark_filter = pattern;
|
|
||||||
RunSpecifiedBenchmarks(reporter);
|
|
||||||
delete reporter;
|
|
||||||
|
|
||||||
queue->Put("DONE"); // End marker
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static void BM_Match1(benchmark::State& state) {
|
|
||||||
const int64_t arg = state.range(0);
|
|
||||||
|
|
||||||
for (auto _ : state) {}
|
|
||||||
queue->Put(StrFormat("BM_Match1/%d", static_cast<int>(arg)));
|
|
||||||
}
|
|
||||||
BENCHMARK(BM_Match1)
|
|
||||||
->Iterations(100)
|
|
||||||
->Arg(1)
|
|
||||||
->Arg(2)
|
|
||||||
->Arg(3)
|
|
||||||
->Range(10, 80)
|
|
||||||
->Args({90})
|
|
||||||
->Args({100});
|
|
||||||
|
|
||||||
static void BM_MatchOverhead(benchmark::State& state) {
|
|
||||||
const int64_t arg = state.range(0);
|
|
||||||
|
|
||||||
for (auto _ : state) {}
|
|
||||||
queue->Put(StrFormat("BM_MatchOverhead/%d", static_cast<int>(arg)));
|
|
||||||
}
|
|
||||||
BENCHMARK(BM_MatchOverhead)
|
|
||||||
->Iterations(100)
|
|
||||||
->Arg(64)
|
|
||||||
->Arg(80);
|
|
||||||
|
|
||||||
TEST_F(BenchmarkTest, Match1) {
|
|
||||||
Execute("BM_Match1");
|
|
||||||
ASSERT_EQ("BM_Match1/1", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/2", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/3", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/10", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/64", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/80", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/90", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/100", queue->Get());
|
|
||||||
ASSERT_EQ("DONE", queue->Get());
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(BenchmarkTest, Match1WithRepetition) {
|
|
||||||
do_not_read_flag_directly::FLAGS_benchmark_repetitions = 2;
|
|
||||||
|
|
||||||
Execute("BM_Match1/(64|80)");
|
|
||||||
ASSERT_EQ("BM_Match1/64", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/64", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/80", queue->Get());
|
|
||||||
ASSERT_EQ("BM_Match1/80", queue->Get());
|
|
||||||
ASSERT_EQ("DONE", queue->Get());
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(BenchmarkTest, Match1WithRandomInterleaving) {
|
|
||||||
FLAGS_benchmark_enable_random_interleaving = true;
|
|
||||||
do_not_read_flag_directly::FLAGS_benchmark_repetitions = 100;
|
|
||||||
FLAGS_benchmark_random_interleaving_max_overhead =
|
|
||||||
std::numeric_limits<double>::infinity();
|
|
||||||
|
|
||||||
std::vector<std::string> expected({"BM_Match1/64", "BM_Match1/80"});
|
|
||||||
std::map<std::string, int> interleaving_count;
|
|
||||||
Execute("BM_Match1/(64|80)");
|
|
||||||
for (int i = 0; i < 100; ++i) {
|
|
||||||
std::vector<std::string> interleaving;
|
|
||||||
interleaving.push_back(queue->Get());
|
|
||||||
interleaving.push_back(queue->Get());
|
|
||||||
EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected));
|
|
||||||
interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(),
|
|
||||||
interleaving[1].c_str())]++;
|
|
||||||
}
|
|
||||||
EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized.";
|
|
||||||
ASSERT_EQ("DONE", queue->Get());
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) {
|
|
||||||
FLAGS_benchmark_enable_random_interleaving = true;
|
|
||||||
do_not_read_flag_directly::FLAGS_benchmark_repetitions = 100;
|
|
||||||
FLAGS_benchmark_random_interleaving_max_overhead = 0;
|
|
||||||
|
|
||||||
// ComputeRandomInterleavingRepetitions() will kick in and rerun each
|
|
||||||
// benchmark once with increased iterations. Then number of repetitions will
|
|
||||||
// be reduced to < 100. The first 4 executions should be
|
|
||||||
// 2 x BM_MatchOverhead/64 and 2 x BM_MatchOverhead/80.
|
|
||||||
std::vector<std::string> expected(
|
|
||||||
{"BM_MatchOverhead/64", "BM_MatchOverhead/80", "BM_MatchOverhead/64",
|
|
||||||
"BM_MatchOverhead/80"});
|
|
||||||
std::map<std::string, int> interleaving_count;
|
|
||||||
Execute("BM_MatchOverhead/(64|80)");
|
|
||||||
std::vector<std::string> interleaving;
|
|
||||||
interleaving.push_back(queue->Get());
|
|
||||||
interleaving.push_back(queue->Get());
|
|
||||||
interleaving.push_back(queue->Get());
|
|
||||||
interleaving.push_back(queue->Get());
|
|
||||||
EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected));
|
|
||||||
ASSERT_LT(queue->size(), 100) << "# Repetitions was not reduced to < 100.";
|
|
||||||
}
|
|
||||||
|
|
||||||
InternalRandomInterleavingRepetitionsInput CreateInput(
|
|
||||||
double total, double time, double real_time, double min_time,
|
|
||||||
double overhead, int repetitions) {
|
|
||||||
InternalRandomInterleavingRepetitionsInput input;
|
|
||||||
input.total_execution_time_per_repetition = total;
|
|
||||||
input.time_used_per_repetition = time;
|
|
||||||
input.real_time_used_per_repetition = real_time;
|
|
||||||
input.min_time_per_repetition = min_time;
|
|
||||||
input.max_overhead = overhead;
|
|
||||||
input.max_repetitions = repetitions;
|
|
||||||
return input;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(Benchmark, ComputeRandomInterleavingRepetitions) {
|
|
||||||
// On wall clock time.
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.05, 0.05, 0.05, 0.05, 0.0, 10)),
|
|
||||||
10);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.05, 0.05, 0.05, 0.05, 0.4, 10)),
|
|
||||||
10);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.06, 0.05, 0.05, 0.05, 0.0, 10)),
|
|
||||||
8);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.06, 0.05, 0.05, 0.05, 0.4, 10)),
|
|
||||||
10);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.08, 0.05, 0.05, 0.05, 0.0, 10)),
|
|
||||||
6);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.08, 0.05, 0.05, 0.05, 0.4, 10)),
|
|
||||||
9);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.26, 0.25, 0.25, 0.05, 0.0, 10)),
|
|
||||||
2);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.25, 0.25, 0.25, 0.05, 0.4, 10)),
|
|
||||||
3);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.26, 0.25, 0.25, 0.05, 0.0, 10)),
|
|
||||||
2);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.26, 0.25, 0.25, 0.05, 0.4, 10)),
|
|
||||||
3);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.38, 0.25, 0.25, 0.05, 0.0, 10)),
|
|
||||||
2);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.38, 0.25, 0.25, 0.05, 0.4, 10)),
|
|
||||||
3);
|
|
||||||
|
|
||||||
// On CPU time.
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.1, 0.05, 0.1, 0.05, 0.0, 10)),
|
|
||||||
10);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.1, 0.05, 0.1, 0.05, 0.4, 10)),
|
|
||||||
10);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.11, 0.05, 0.1, 0.05, 0.0, 10)),
|
|
||||||
9);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.11, 0.05, 0.1, 0.05, 0.4, 10)),
|
|
||||||
10);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.15, 0.05, 0.1, 0.05, 0.0, 10)),
|
|
||||||
7);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.15, 0.05, 0.1, 0.05, 0.4, 10)),
|
|
||||||
9);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.5, 0.25, 0.5, 0.05, 0.0, 10)),
|
|
||||||
2);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.5, 0.25, 0.5, 0.05, 0.4, 10)),
|
|
||||||
3);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.51, 0.25, 0.5, 0.05, 0.0, 10)),
|
|
||||||
2);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.51, 0.25, 0.5, 0.05, 0.4, 10)),
|
|
||||||
3);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.8, 0.25, 0.5, 0.05, 0.0, 10)),
|
|
||||||
2);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.8, 0.25, 0.5, 0.05, 0.4, 10)),
|
|
||||||
2);
|
|
||||||
|
|
||||||
// Corner cases.
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.0, 0.25, 0.5, 0.05, 0.4, 10)),
|
|
||||||
3);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.8, 0.0, 0.5, 0.05, 0.4, 10)),
|
|
||||||
9);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.8, 0.25, 0.0, 0.05, 0.4, 10)),
|
|
||||||
1);
|
|
||||||
EXPECT_EQ(ComputeRandomInterleavingRepetitions(
|
|
||||||
CreateInput(0.8, 0.25, 0.5, 0.0, 0.4, 10)),
|
|
||||||
1);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
} // namespace internal
|
|
||||||
} // namespace benchmark
|
|
Loading…
Reference in New Issue
Block a user