2015-03-07 06:01:05 +08:00
|
|
|
// Copyright 2015 Google Inc. All rights reserved.
|
2014-01-10 04:16:51 +08:00
|
|
|
//
|
2014-01-10 00:01:34 +08:00
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
2014-01-10 04:16:51 +08:00
|
|
|
//
|
2014-01-10 00:01:34 +08:00
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
2014-01-10 04:16:51 +08:00
|
|
|
//
|
2014-01-10 00:01:34 +08:00
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2013-12-19 08:55:45 +08:00
|
|
|
#include "benchmark/benchmark.h"
|
2021-05-04 21:36:11 +08:00
|
|
|
|
2016-09-06 05:48:40 +08:00
|
|
|
#include "benchmark_api_internal.h"
|
2018-10-01 22:51:08 +08:00
|
|
|
#include "benchmark_runner.h"
|
2016-10-08 02:35:03 +08:00
|
|
|
#include "internal_macros.h"
|
2013-12-19 08:55:45 +08:00
|
|
|
|
2015-10-05 19:58:35 +08:00
|
|
|
#ifndef BENCHMARK_OS_WINDOWS
|
2022-10-05 03:43:27 +08:00
|
|
|
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
|
2015-03-13 06:03:33 +08:00
|
|
|
#include <sys/resource.h>
|
2018-02-15 05:17:12 +08:00
|
|
|
#endif
|
2016-10-08 02:35:03 +08:00
|
|
|
#include <sys/time.h>
|
2015-03-13 06:03:33 +08:00
|
|
|
#include <unistd.h>
|
2015-04-14 01:45:16 +08:00
|
|
|
#endif
|
2013-12-19 08:55:45 +08:00
|
|
|
|
|
|
|
#include <algorithm>
|
2013-12-20 09:04:54 +08:00
|
|
|
#include <atomic>
|
2014-08-04 18:38:37 +08:00
|
|
|
#include <condition_variable>
|
2016-10-08 02:35:03 +08:00
|
|
|
#include <cstdio>
|
|
|
|
#include <cstdlib>
|
2016-08-03 05:12:43 +08:00
|
|
|
#include <fstream>
|
2016-10-08 02:35:03 +08:00
|
|
|
#include <iostream>
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
#include <limits>
|
2021-05-05 19:08:23 +08:00
|
|
|
#include <map>
|
2013-12-19 08:55:45 +08:00
|
|
|
#include <memory>
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
#include <random>
|
2018-03-16 18:14:38 +08:00
|
|
|
#include <string>
|
2014-08-04 18:38:37 +08:00
|
|
|
#include <thread>
|
2018-09-12 21:26:17 +08:00
|
|
|
#include <utility>
|
2015-03-13 06:03:33 +08:00
|
|
|
|
|
|
|
#include "check.h"
|
2016-09-16 05:10:35 +08:00
|
|
|
#include "colorprint.h"
|
2015-03-13 06:03:33 +08:00
|
|
|
#include "commandlineflags.h"
|
2016-05-28 06:45:25 +08:00
|
|
|
#include "complexity.h"
|
2017-03-02 08:23:42 +08:00
|
|
|
#include "counter.h"
|
2017-11-23 00:33:52 +08:00
|
|
|
#include "internal_macros.h"
|
2015-03-13 06:03:33 +08:00
|
|
|
#include "log.h"
|
|
|
|
#include "mutex.h"
|
2021-04-28 16:25:29 +08:00
|
|
|
#include "perf_counters.h"
|
2015-03-13 06:03:33 +08:00
|
|
|
#include "re.h"
|
2017-11-23 00:33:52 +08:00
|
|
|
#include "statistics.h"
|
2015-03-13 06:03:33 +08:00
|
|
|
#include "string_util.h"
|
2018-03-16 18:14:38 +08:00
|
|
|
#include "thread_manager.h"
|
|
|
|
#include "thread_timer.h"
|
2013-12-19 08:55:45 +08:00
|
|
|
|
2021-06-24 23:50:19 +08:00
|
|
|
namespace benchmark {
|
2019-08-22 05:12:03 +08:00
|
|
|
// Print a list of benchmarks. This option overrides all other options.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_bool(benchmark_list_tests, false);
|
2019-08-22 05:12:03 +08:00
|
|
|
|
|
|
|
// A regular expression that specifies the set of benchmarks to execute. If
|
|
|
|
// this flag is empty, or if this flag is the string \"all\", all benchmarks
|
|
|
|
// linked into the binary are run.
|
2021-08-04 00:11:47 +08:00
|
|
|
BM_DEFINE_string(benchmark_filter, "");
|
2019-08-22 05:12:03 +08:00
|
|
|
|
2023-02-07 19:45:18 +08:00
|
|
|
// Specification of how long to run the benchmark.
|
|
|
|
//
|
|
|
|
// It can be either an exact number of iterations (specified as `<integer>x`),
|
|
|
|
// or a minimum number of seconds (specified as `<float>s`). If the latter
|
|
|
|
// format (ie., min seconds) is used, the system may run the benchmark longer
|
|
|
|
// until the results are considered significant.
|
|
|
|
//
|
|
|
|
// For backward compatibility, the `s` suffix may be omitted, in which case,
|
|
|
|
// the specified number is interpreted as the number of seconds.
|
|
|
|
//
|
|
|
|
// For cpu-time based tests, this is the lower bound
|
2021-06-01 23:05:50 +08:00
|
|
|
// on the total cpu time used by all threads that make up the test. For
|
|
|
|
// real-time based tests, this is the lower bound on the elapsed time of the
|
|
|
|
// benchmark execution, regardless of number of threads.
|
2023-02-07 19:45:18 +08:00
|
|
|
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);
|
2019-08-22 05:12:03 +08:00
|
|
|
|
2022-05-23 20:50:17 +08:00
|
|
|
// Minimum number of seconds a benchmark should be run before results should be
|
2023-01-10 20:25:32 +08:00
|
|
|
// taken into account. This e.g can be necessary for benchmarks of code which
|
|
|
|
// needs to fill some form of cache before performance is of interest.
|
2022-05-23 20:50:17 +08:00
|
|
|
// Note: results gathered within this period are discarded and not used for
|
|
|
|
// reported result.
|
|
|
|
BM_DEFINE_double(benchmark_min_warmup_time, 0.0);
|
|
|
|
|
2019-08-22 05:12:03 +08:00
|
|
|
// The number of runs of each benchmark. If greater than 1, the mean and
|
2021-06-01 23:05:50 +08:00
|
|
|
// standard deviation of the runs will be reported.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_int32(benchmark_repetitions, 1);
|
2019-08-22 05:12:03 +08:00
|
|
|
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
// If set, enable random interleaving of repetitions of all benchmarks.
|
|
|
|
// See http://github.com/google/benchmark/issues/1051 for details.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_bool(benchmark_enable_random_interleaving, false);
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
|
2019-08-22 05:12:03 +08:00
|
|
|
// Report the result of each benchmark repetitions. When 'true' is specified
|
|
|
|
// only the mean, standard deviation, and other statistics are reported for
|
|
|
|
// repeated benchmarks. Affects all reporters.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_bool(benchmark_report_aggregates_only, false);
|
2019-08-22 05:12:03 +08:00
|
|
|
|
|
|
|
// Display the result of each benchmark repetitions. When 'true' is specified
|
|
|
|
// only the mean, standard deviation, and other statistics are displayed for
|
|
|
|
// repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects
|
|
|
|
// the display reporter, but *NOT* file reporter, which will still contain
|
|
|
|
// all the output.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_bool(benchmark_display_aggregates_only, false);
|
2019-08-22 05:12:03 +08:00
|
|
|
|
|
|
|
// The format to use for console output.
|
|
|
|
// Valid values are 'console', 'json', or 'csv'.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_string(benchmark_format, "console");
|
2019-08-22 05:12:03 +08:00
|
|
|
|
|
|
|
// The format to use for file output.
|
|
|
|
// Valid values are 'console', 'json', or 'csv'.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_string(benchmark_out_format, "json");
|
2019-08-22 05:12:03 +08:00
|
|
|
|
|
|
|
// The file to write additional output to.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_string(benchmark_out, "");
|
2019-08-22 05:12:03 +08:00
|
|
|
|
|
|
|
// Whether to use colors in the output. Valid values:
|
|
|
|
// 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if
|
|
|
|
// the output is being sent to a terminal and the TERM environment variable is
|
|
|
|
// set to a terminal type that supports colors.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_string(benchmark_color, "auto");
|
2019-08-22 05:12:03 +08:00
|
|
|
|
|
|
|
// Whether to use tabular format when printing user counters to the console.
|
|
|
|
// Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_bool(benchmark_counters_tabular, false);
|
2019-08-22 05:12:03 +08:00
|
|
|
|
2021-04-28 16:25:29 +08:00
|
|
|
// List of additional perf counters to collect, in libpfm format. For more
|
|
|
|
// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_string(benchmark_perf_counters, "");
|
2021-04-28 16:25:29 +08:00
|
|
|
|
2021-05-05 19:08:23 +08:00
|
|
|
// Extra context to include in the output formatted as comma-separated key-value
|
|
|
|
// pairs. Kept internal as it's only used for parsing from env/command line.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_kvpairs(benchmark_context, {});
|
2021-05-05 19:08:23 +08:00
|
|
|
|
2022-03-04 19:07:01 +08:00
|
|
|
// Set the default time unit to use for reports
|
|
|
|
// Valid values are 'ns', 'us', 'ms' or 's'
|
|
|
|
BM_DEFINE_string(benchmark_time_unit, "");
|
|
|
|
|
2021-06-24 23:50:19 +08:00
|
|
|
// The level of verbose logging to output
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_DEFINE_int32(v, 0);
|
2021-06-24 23:50:19 +08:00
|
|
|
|
|
|
|
namespace internal {
|
|
|
|
|
2022-08-03 16:44:35 +08:00
|
|
|
std::map<std::string, std::string>* global_context = nullptr;
|
|
|
|
|
|
|
|
BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext() {
|
|
|
|
return global_context;
|
|
|
|
}
|
2021-05-05 19:08:23 +08:00
|
|
|
|
2018-10-01 22:51:08 +08:00
|
|
|
// FIXME: wouldn't LTO mess this up?
|
2017-10-10 03:01:30 +08:00
|
|
|
void UseCharPointer(char const volatile*) {}
|
|
|
|
|
2016-10-08 02:35:03 +08:00
|
|
|
} // namespace internal
|
2015-03-13 06:03:33 +08:00
|
|
|
|
2023-01-11 00:48:17 +08:00
|
|
|
State::State(std::string name, IterationCount max_iters,
|
|
|
|
const std::vector<int64_t>& ranges, int thread_i, int n_threads,
|
|
|
|
internal::ThreadTimer* timer, internal::ThreadManager* manager,
|
2021-04-28 16:25:29 +08:00
|
|
|
internal::PerfCountersMeasurement* perf_counters_measurement)
|
2018-02-15 04:44:41 +08:00
|
|
|
: total_iterations_(0),
|
|
|
|
batch_leftover_(0),
|
|
|
|
max_iterations(max_iters),
|
|
|
|
started_(false),
|
2016-09-03 11:34:34 +08:00
|
|
|
finished_(false),
|
2023-03-09 02:24:48 +08:00
|
|
|
skipped_(internal::NotSkipped),
|
2016-08-05 03:30:14 +08:00
|
|
|
range_(ranges),
|
2016-05-25 02:06:54 +08:00
|
|
|
complexity_n_(0),
|
2023-01-11 00:48:17 +08:00
|
|
|
name_(std::move(name)),
|
Introduce accessors for currently public data members (threads and thread_index) (#1208)
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate the direct access to these fields.
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate the direct access to these fields.
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
2021-08-23 16:06:57 +08:00
|
|
|
thread_index_(thread_i),
|
|
|
|
threads_(n_threads),
|
2016-09-03 11:34:34 +08:00
|
|
|
timer_(timer),
|
2021-04-28 16:25:29 +08:00
|
|
|
manager_(manager),
|
|
|
|
perf_counters_measurement_(perf_counters_measurement) {
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_CHECK(max_iterations != 0) << "At least one iteration must be run";
|
Introduce accessors for currently public data members (threads and thread_index) (#1208)
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate the direct access to these fields.
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate the direct access to these fields.
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
* [benchmark] Introduce accessors for currently public data members `threads` and `thread_index`
Also deprecate direct access to `.thread_index` and make threads a private field
Motivations:
Our internal library provides accessors for those fields because the styleguide disalows accessing classes' data members directly (even if they're const).
There has been a discussion to simply move internal library to make its fields public similarly to the OSS version here, however, the concern is that these kinds of direct access would prevent many types of future design changes (eg how/whether the values would be stored in the data member)
I think the concensus in the end is that we'd change the external library for this case.
AFAIK, there are three important third_party users that we'd need to migrate: tcmalloc, abseil and tensorflow.
Please let me know if I'm missing anyone else.
2021-08-23 16:06:57 +08:00
|
|
|
BM_CHECK_LT(thread_index_, threads_)
|
2021-06-25 01:21:59 +08:00
|
|
|
<< "thread_index must be less than threads";
|
2018-02-15 04:44:41 +08:00
|
|
|
|
2023-08-21 22:35:42 +08:00
|
|
|
// Add counters with correct flag now. If added with `counters[name]` in
|
|
|
|
// `PauseTiming`, a new `Counter` will be inserted the first time, which
|
|
|
|
// won't have the flag. Inserting them now also reduces the allocations
|
|
|
|
// during the benchmark.
|
|
|
|
if (perf_counters_measurement_) {
|
|
|
|
for (const std::string& counter_name :
|
|
|
|
perf_counters_measurement_->names()) {
|
|
|
|
counters[counter_name] = Counter(0.0, Counter::kAvgIterations);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-22 03:47:25 +08:00
|
|
|
// Note: The use of offsetof below is technically undefined until C++17
|
|
|
|
// because State is not a standard layout type. However, all compilers
|
|
|
|
// currently provide well-defined behavior as an extension (which is
|
|
|
|
// demonstrated since constexpr evaluation must diagnose all undefined
|
|
|
|
// behavior). However, GCC and Clang also warn about this use of offsetof,
|
|
|
|
// which must be suppressed.
|
2018-07-03 17:13:22 +08:00
|
|
|
#if defined(__INTEL_COMPILER)
|
|
|
|
#pragma warning push
|
2019-10-23 16:07:08 +08:00
|
|
|
#pragma warning(disable : 1875)
|
2018-07-03 17:13:22 +08:00
|
|
|
#elif defined(__GNUC__)
|
2018-03-22 03:47:25 +08:00
|
|
|
#pragma GCC diagnostic push
|
|
|
|
#pragma GCC diagnostic ignored "-Winvalid-offsetof"
|
2022-07-15 19:18:45 +08:00
|
|
|
#endif
|
2022-08-08 22:57:48 +08:00
|
|
|
#if defined(__NVCC__)
|
2022-07-15 19:18:45 +08:00
|
|
|
#pragma nv_diagnostic push
|
|
|
|
#pragma nv_diag_suppress 1427
|
2023-03-13 20:34:12 +08:00
|
|
|
#endif
|
|
|
|
#if defined(__NVCOMPILER)
|
|
|
|
#pragma diagnostic push
|
|
|
|
#pragma diag_suppress offset_in_non_POD_nonstandard
|
2018-03-22 03:47:25 +08:00
|
|
|
#endif
|
2018-02-15 04:44:41 +08:00
|
|
|
// Offset tests to ensure commonly accessed data is on the first cache line.
|
|
|
|
const int cache_line_size = 64;
|
2023-03-09 02:24:48 +08:00
|
|
|
static_assert(
|
|
|
|
offsetof(State, skipped_) <= (cache_line_size - sizeof(skipped_)), "");
|
2018-07-03 17:13:22 +08:00
|
|
|
#if defined(__INTEL_COMPILER)
|
|
|
|
#pragma warning pop
|
|
|
|
#elif defined(__GNUC__)
|
2018-03-22 03:47:25 +08:00
|
|
|
#pragma GCC diagnostic pop
|
|
|
|
#endif
|
2022-08-08 22:57:48 +08:00
|
|
|
#if defined(__NVCC__)
|
2022-07-15 19:18:45 +08:00
|
|
|
#pragma nv_diagnostic pop
|
|
|
|
#endif
|
2023-03-13 20:34:12 +08:00
|
|
|
#if defined(__NVCOMPILER)
|
|
|
|
#pragma diagnostic pop
|
|
|
|
#endif
|
2013-12-19 08:55:45 +08:00
|
|
|
}
|
|
|
|
|
2014-05-29 21:49:38 +08:00
|
|
|
void State::PauseTiming() {
|
2015-03-13 06:03:33 +08:00
|
|
|
// Add in time accumulated so far
|
2023-03-09 02:24:48 +08:00
|
|
|
BM_CHECK(started_ && !finished_ && !skipped());
|
2016-09-03 11:34:34 +08:00
|
|
|
timer_->StopTimer();
|
2021-04-28 16:25:29 +08:00
|
|
|
if (perf_counters_measurement_) {
|
2022-01-25 18:14:20 +08:00
|
|
|
std::vector<std::pair<std::string, double>> measurements;
|
|
|
|
if (!perf_counters_measurement_->Stop(measurements)) {
|
|
|
|
BM_CHECK(false) << "Perf counters read the value failed.";
|
|
|
|
}
|
2021-04-28 16:25:29 +08:00
|
|
|
for (const auto& name_and_measurement : measurements) {
|
2023-08-21 22:35:42 +08:00
|
|
|
const std::string& name = name_and_measurement.first;
|
|
|
|
const double measurement = name_and_measurement.second;
|
|
|
|
// Counter was inserted with `kAvgIterations` flag by the constructor.
|
|
|
|
assert(counters.find(name) != counters.end());
|
|
|
|
counters[name].value += measurement;
|
2021-04-28 16:25:29 +08:00
|
|
|
}
|
|
|
|
}
|
2014-05-29 21:49:38 +08:00
|
|
|
}
|
2013-12-19 08:55:45 +08:00
|
|
|
|
2014-05-29 21:49:38 +08:00
|
|
|
void State::ResumeTiming() {
|
2023-03-09 02:24:48 +08:00
|
|
|
BM_CHECK(started_ && !finished_ && !skipped());
|
2016-09-03 11:34:34 +08:00
|
|
|
timer_->StartTimer();
|
2021-04-28 16:25:29 +08:00
|
|
|
if (perf_counters_measurement_) {
|
|
|
|
perf_counters_measurement_->Start();
|
|
|
|
}
|
2014-05-29 21:49:38 +08:00
|
|
|
}
|
2013-12-19 08:55:45 +08:00
|
|
|
|
2023-03-09 02:57:19 +08:00
|
|
|
void State::SkipWithMessage(const std::string& msg) {
|
2023-03-09 02:24:48 +08:00
|
|
|
skipped_ = internal::SkippedWithMessage;
|
|
|
|
{
|
|
|
|
MutexLock l(manager_->GetBenchmarkMutex());
|
|
|
|
if (internal::NotSkipped == manager_->results.skipped_) {
|
|
|
|
manager_->results.skip_message_ = msg;
|
|
|
|
manager_->results.skipped_ = skipped_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
total_iterations_ = 0;
|
|
|
|
if (timer_->running()) timer_->StopTimer();
|
|
|
|
}
|
|
|
|
|
2023-03-09 02:57:19 +08:00
|
|
|
void State::SkipWithError(const std::string& msg) {
|
2023-03-09 02:24:48 +08:00
|
|
|
skipped_ = internal::SkippedWithError;
|
2016-09-03 11:34:34 +08:00
|
|
|
{
|
|
|
|
MutexLock l(manager_->GetBenchmarkMutex());
|
2023-03-09 02:24:48 +08:00
|
|
|
if (internal::NotSkipped == manager_->results.skipped_) {
|
|
|
|
manager_->results.skip_message_ = msg;
|
|
|
|
manager_->results.skipped_ = skipped_;
|
2016-09-03 11:34:34 +08:00
|
|
|
}
|
|
|
|
}
|
2018-02-10 12:57:04 +08:00
|
|
|
total_iterations_ = 0;
|
2016-09-03 11:34:34 +08:00
|
|
|
if (timer_->running()) timer_->StopTimer();
|
2016-05-24 09:24:56 +08:00
|
|
|
}
|
|
|
|
|
2016-10-08 02:35:03 +08:00
|
|
|
void State::SetIterationTime(double seconds) {
|
2016-09-03 11:34:34 +08:00
|
|
|
timer_->SetIterationTime(seconds);
|
2016-04-30 21:23:58 +08:00
|
|
|
}
|
|
|
|
|
2023-03-14 21:10:27 +08:00
|
|
|
void State::SetLabel(const std::string& label) {
|
2016-09-03 11:34:34 +08:00
|
|
|
MutexLock l(manager_->GetBenchmarkMutex());
|
2016-09-06 16:28:35 +08:00
|
|
|
manager_->results.report_label_ = label;
|
2016-09-03 11:34:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void State::StartKeepRunning() {
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_CHECK(!started_ && !finished_);
|
2016-09-03 11:34:34 +08:00
|
|
|
started_ = true;
|
2023-03-09 02:24:48 +08:00
|
|
|
total_iterations_ = skipped() ? 0 : max_iterations;
|
2016-09-03 11:34:34 +08:00
|
|
|
manager_->StartStopBarrier();
|
2023-03-09 02:24:48 +08:00
|
|
|
if (!skipped()) ResumeTiming();
|
2016-09-03 11:34:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void State::FinishKeepRunning() {
|
2023-03-09 02:24:48 +08:00
|
|
|
BM_CHECK(started_ && (!finished_ || skipped()));
|
|
|
|
if (!skipped()) {
|
2016-09-03 11:34:34 +08:00
|
|
|
PauseTiming();
|
|
|
|
}
|
2018-02-10 12:57:04 +08:00
|
|
|
// Total iterations has now wrapped around past 0. Fix this.
|
|
|
|
total_iterations_ = 0;
|
2016-09-03 11:34:34 +08:00
|
|
|
finished_ = true;
|
|
|
|
manager_->StartStopBarrier();
|
2013-12-19 08:55:45 +08:00
|
|
|
}
|
|
|
|
|
2015-03-13 06:03:33 +08:00
|
|
|
namespace internal {
|
2015-03-31 11:32:37 +08:00
|
|
|
namespace {
|
|
|
|
|
2021-06-03 21:42:08 +08:00
|
|
|
// Flushes streams after invoking reporter methods that write to them. This
|
|
|
|
// ensures users get timely updates even when streams are not line-buffered.
|
|
|
|
void FlushStreams(BenchmarkReporter* reporter) {
|
|
|
|
if (!reporter) return;
|
|
|
|
std::flush(reporter->GetOutputStream());
|
|
|
|
std::flush(reporter->GetErrorStream());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reports in both display and file reporters.
|
|
|
|
void Report(BenchmarkReporter* display_reporter,
|
|
|
|
BenchmarkReporter* file_reporter, const RunResults& run_results) {
|
|
|
|
auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only,
|
|
|
|
const RunResults& results) {
|
|
|
|
assert(reporter);
|
|
|
|
// If there are no aggregates, do output non-aggregates.
|
|
|
|
aggregates_only &= !results.aggregates_only.empty();
|
|
|
|
if (!aggregates_only) reporter->ReportRuns(results.non_aggregates);
|
|
|
|
if (!results.aggregates_only.empty())
|
|
|
|
reporter->ReportRuns(results.aggregates_only);
|
|
|
|
};
|
|
|
|
|
|
|
|
report_one(display_reporter, run_results.display_report_aggregates_only,
|
|
|
|
run_results);
|
|
|
|
if (file_reporter)
|
|
|
|
report_one(file_reporter, run_results.file_report_aggregates_only,
|
|
|
|
run_results);
|
|
|
|
|
|
|
|
FlushStreams(display_reporter);
|
|
|
|
FlushStreams(file_reporter);
|
|
|
|
}
|
|
|
|
|
2018-09-28 19:28:43 +08:00
|
|
|
void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
|
2018-08-29 19:58:54 +08:00
|
|
|
BenchmarkReporter* display_reporter,
|
2018-06-01 18:14:19 +08:00
|
|
|
BenchmarkReporter* file_reporter) {
|
2016-08-03 05:12:43 +08:00
|
|
|
// Note the file_reporter can be null.
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_CHECK(display_reporter != nullptr);
|
2015-03-13 06:03:33 +08:00
|
|
|
|
2013-12-19 08:55:45 +08:00
|
|
|
// Determine the width of the name field using a minimum width of 10.
|
2021-06-01 23:05:50 +08:00
|
|
|
bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
|
2015-02-19 02:07:45 +08:00
|
|
|
size_t name_field_width = 10;
|
Drop Stat1, refactor statistics to be user-providable, add median. (#428)
* Drop Stat1, refactor statistics to be user-providable, add median.
My main goal was to add median statistic. Since Stat1
calculated the stats incrementally, and did not store
the values themselves, it is was not possible. Thus,
i have replaced Stat1 with simple std::vector<double>,
containing all the values.
Then, i have refactored current mean/stdev to be a
function that is provided with values vector, and
returns the statistic. While there, it seemed to make
sense to deduplicate the code by storing all the
statistics functions in a map, and then simply iterate
over it. And the interface to add new statistics is
intentionally exposed, so they may be added easily.
The notable change is that Iterations are no longer
displayed as 0 for stdev. Is could be changed, but
i'm not sure how to nicely fit that into the API.
Similarly, this dance about sometimes (for some fields,
for some statistics) dividing by run.iterations, and
then multiplying the calculated stastic back is also
dropped, and if you do the math, i fail to see why
it was needed there in the first place.
Since that was the only use of stat.h, it is removed.
* complexity.h: attempt to fix MSVC build
* Update README.md
* Store statistics to compute in a vector, ensures ordering.
* Add a bit more tests for repetitions.
* Partially address review notes.
* Fix gcc build: drop extra ';'
clang, why didn't you warn me?
* Address review comments.
* double() -> 0.0
* early return
2017-08-24 07:44:29 +08:00
|
|
|
size_t stat_field_width = 0;
|
2018-09-28 19:28:43 +08:00
|
|
|
for (const BenchmarkInstance& benchmark : benchmarks) {
|
2015-03-28 00:28:22 +08:00
|
|
|
name_field_width =
|
2021-05-11 00:12:09 +08:00
|
|
|
std::max<size_t>(name_field_width, benchmark.name().str().size());
|
|
|
|
might_have_aggregates |= benchmark.repetitions() > 1;
|
Drop Stat1, refactor statistics to be user-providable, add median. (#428)
* Drop Stat1, refactor statistics to be user-providable, add median.
My main goal was to add median statistic. Since Stat1
calculated the stats incrementally, and did not store
the values themselves, it is was not possible. Thus,
i have replaced Stat1 with simple std::vector<double>,
containing all the values.
Then, i have refactored current mean/stdev to be a
function that is provided with values vector, and
returns the statistic. While there, it seemed to make
sense to deduplicate the code by storing all the
statistics functions in a map, and then simply iterate
over it. And the interface to add new statistics is
intentionally exposed, so they may be added easily.
The notable change is that Iterations are no longer
displayed as 0 for stdev. Is could be changed, but
i'm not sure how to nicely fit that into the API.
Similarly, this dance about sometimes (for some fields,
for some statistics) dividing by run.iterations, and
then multiplying the calculated stastic back is also
dropped, and if you do the math, i fail to see why
it was needed there in the first place.
Since that was the only use of stat.h, it is removed.
* complexity.h: attempt to fix MSVC build
* Update README.md
* Store statistics to compute in a vector, ensures ordering.
* Add a bit more tests for repetitions.
* Partially address review notes.
* Fix gcc build: drop extra ';'
clang, why didn't you warn me?
* Address review comments.
* double() -> 0.0
* early return
2017-08-24 07:44:29 +08:00
|
|
|
|
2021-06-01 23:05:50 +08:00
|
|
|
for (const auto& Stat : benchmark.statistics())
|
Drop Stat1, refactor statistics to be user-providable, add median. (#428)
* Drop Stat1, refactor statistics to be user-providable, add median.
My main goal was to add median statistic. Since Stat1
calculated the stats incrementally, and did not store
the values themselves, it is was not possible. Thus,
i have replaced Stat1 with simple std::vector<double>,
containing all the values.
Then, i have refactored current mean/stdev to be a
function that is provided with values vector, and
returns the statistic. While there, it seemed to make
sense to deduplicate the code by storing all the
statistics functions in a map, and then simply iterate
over it. And the interface to add new statistics is
intentionally exposed, so they may be added easily.
The notable change is that Iterations are no longer
displayed as 0 for stdev. Is could be changed, but
i'm not sure how to nicely fit that into the API.
Similarly, this dance about sometimes (for some fields,
for some statistics) dividing by run.iterations, and
then multiplying the calculated stastic back is also
dropped, and if you do the math, i fail to see why
it was needed there in the first place.
Since that was the only use of stat.h, it is removed.
* complexity.h: attempt to fix MSVC build
* Update README.md
* Store statistics to compute in a vector, ensures ordering.
* Add a bit more tests for repetitions.
* Partially address review notes.
* Fix gcc build: drop extra ';'
clang, why didn't you warn me?
* Address review comments.
* double() -> 0.0
* early return
2017-08-24 07:44:29 +08:00
|
|
|
stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
|
2013-12-19 08:55:45 +08:00
|
|
|
}
|
2018-10-18 20:03:17 +08:00
|
|
|
if (might_have_aggregates) name_field_width += 1 + stat_field_width;
|
2013-12-19 08:55:45 +08:00
|
|
|
|
|
|
|
// Print header here
|
2014-01-08 08:33:40 +08:00
|
|
|
BenchmarkReporter::Context context;
|
2013-12-19 08:55:45 +08:00
|
|
|
context.name_field_width = name_field_width;
|
|
|
|
|
2021-06-03 16:46:34 +08:00
|
|
|
// Keep track of running times of all instances of each benchmark family.
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
|
|
|
|
per_family_reports;
|
2016-05-19 03:25:00 +08:00
|
|
|
|
2018-08-29 19:58:54 +08:00
|
|
|
if (display_reporter->ReportContext(context) &&
|
2016-10-08 02:35:03 +08:00
|
|
|
(!file_reporter || file_reporter->ReportContext(context))) {
|
2021-06-03 21:42:08 +08:00
|
|
|
FlushStreams(display_reporter);
|
|
|
|
FlushStreams(file_reporter);
|
2021-06-01 23:05:50 +08:00
|
|
|
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
size_t num_repetitions_total = 0;
|
|
|
|
|
2023-03-07 18:27:52 +08:00
|
|
|
// This perfcounters object needs to be created before the runners vector
|
|
|
|
// below so it outlasts their lifetime.
|
|
|
|
PerfCountersMeasurement perfcounters(
|
|
|
|
StrSplit(FLAGS_benchmark_perf_counters, ','));
|
|
|
|
|
|
|
|
// Vector of benchmarks to run
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
std::vector<internal::BenchmarkRunner> runners;
|
|
|
|
runners.reserve(benchmarks.size());
|
2023-03-07 18:27:52 +08:00
|
|
|
|
|
|
|
// Count the number of benchmarks with threads to warn the user in case
|
|
|
|
// performance counters are used.
|
|
|
|
int benchmarks_with_threads = 0;
|
|
|
|
|
|
|
|
// Loop through all benchmarks
|
2021-06-03 16:46:34 +08:00
|
|
|
for (const BenchmarkInstance& benchmark : benchmarks) {
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
|
2021-06-03 16:46:34 +08:00
|
|
|
if (benchmark.complexity() != oNone)
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
reports_for_family = &per_family_reports[benchmark.family_index()];
|
2023-08-17 22:41:17 +08:00
|
|
|
benchmarks_with_threads += (benchmark.threads() > 1);
|
2023-03-07 18:27:52 +08:00
|
|
|
runners.emplace_back(benchmark, &perfcounters, reports_for_family);
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
|
|
|
|
num_repetitions_total += num_repeats_of_this_instance;
|
|
|
|
if (reports_for_family)
|
|
|
|
reports_for_family->num_runs_total += num_repeats_of_this_instance;
|
|
|
|
}
|
|
|
|
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");
|
|
|
|
|
2023-03-07 18:27:52 +08:00
|
|
|
// The use of performance counters with threads would be unintuitive for
|
|
|
|
// the average user so we need to warn them about this case
|
|
|
|
if ((benchmarks_with_threads > 0) && (perfcounters.num_counters() > 0)) {
|
|
|
|
GetErrorLogInstance()
|
|
|
|
<< "***WARNING*** There are " << benchmarks_with_threads
|
|
|
|
<< " benchmarks with threads and " << perfcounters.num_counters()
|
|
|
|
<< " performance counters were requested. Beware counters will "
|
|
|
|
"reflect the combined usage across all "
|
|
|
|
"threads.\n";
|
|
|
|
}
|
|
|
|
|
2021-06-29 00:06:22 +08:00
|
|
|
std::vector<size_t> repetition_indices;
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
repetition_indices.reserve(num_repetitions_total);
|
|
|
|
for (size_t runner_index = 0, num_runners = runners.size();
|
|
|
|
runner_index != num_runners; ++runner_index) {
|
|
|
|
const internal::BenchmarkRunner& runner = runners[runner_index];
|
|
|
|
std::fill_n(std::back_inserter(repetition_indices),
|
|
|
|
runner.GetNumRepeats(), runner_index);
|
|
|
|
}
|
|
|
|
assert(repetition_indices.size() == num_repetitions_total &&
|
|
|
|
"Unexpected number of repetition indexes.");
|
|
|
|
|
|
|
|
if (FLAGS_benchmark_enable_random_interleaving) {
|
|
|
|
std::random_device rd;
|
|
|
|
std::mt19937 g(rd());
|
|
|
|
std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
|
|
|
|
}
|
2021-06-03 16:46:34 +08:00
|
|
|
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
for (size_t repetition_index : repetition_indices) {
|
|
|
|
internal::BenchmarkRunner& runner = runners[repetition_index];
|
|
|
|
runner.DoOneRepetition();
|
|
|
|
if (runner.HasRepeatsRemaining()) continue;
|
|
|
|
// FIXME: report each repetition separately, not all of them in bulk.
|
|
|
|
|
2023-02-07 19:45:18 +08:00
|
|
|
display_reporter->ReportRunsConfig(
|
|
|
|
runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
|
|
|
|
if (file_reporter)
|
|
|
|
file_reporter->ReportRunsConfig(
|
|
|
|
runner.GetMinTime(), runner.HasExplicitIters(), runner.GetIters());
|
|
|
|
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
RunResults run_results = runner.GetResults();
|
|
|
|
|
|
|
|
// Maybe calculate complexity report
|
|
|
|
if (const auto* reports_for_family = runner.GetReportsForFamily()) {
|
|
|
|
if (reports_for_family->num_runs_done ==
|
|
|
|
reports_for_family->num_runs_total) {
|
|
|
|
auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
|
|
|
|
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
|
|
|
|
additional_run_stats.begin(),
|
|
|
|
additional_run_stats.end());
|
|
|
|
per_family_reports.erase(
|
2021-11-04 20:09:10 +08:00
|
|
|
static_cast<int>(reports_for_family->Runs.front().family_index));
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
}
|
|
|
|
}
|
2021-06-01 23:05:50 +08:00
|
|
|
|
2021-06-03 21:42:08 +08:00
|
|
|
Report(display_reporter, file_reporter, run_results);
|
2015-03-13 06:03:33 +08:00
|
|
|
}
|
|
|
|
}
|
2018-08-29 19:58:54 +08:00
|
|
|
display_reporter->Finalize();
|
2016-08-03 05:12:43 +08:00
|
|
|
if (file_reporter) file_reporter->Finalize();
|
2021-06-03 21:42:08 +08:00
|
|
|
FlushStreams(display_reporter);
|
|
|
|
FlushStreams(file_reporter);
|
2013-12-19 08:55:45 +08:00
|
|
|
}
|
|
|
|
|
2019-08-08 03:55:40 +08:00
|
|
|
// Disable deprecated warnings temporarily because we need to reference
|
2020-04-14 17:20:22 +08:00
|
|
|
// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations
|
2021-10-21 17:10:38 +08:00
|
|
|
BENCHMARK_DISABLE_DEPRECATED_WARNING
|
2019-08-08 03:55:40 +08:00
|
|
|
|
2016-10-08 02:35:03 +08:00
|
|
|
std::unique_ptr<BenchmarkReporter> CreateReporter(
|
2017-03-02 10:55:36 +08:00
|
|
|
std::string const& name, ConsoleReporter::OutputOptions output_opts) {
|
2015-03-18 06:18:06 +08:00
|
|
|
typedef std::unique_ptr<BenchmarkReporter> PtrType;
|
2016-08-03 05:12:43 +08:00
|
|
|
if (name == "console") {
|
2017-03-02 10:55:36 +08:00
|
|
|
return PtrType(new ConsoleReporter(output_opts));
|
2023-01-16 20:28:48 +08:00
|
|
|
}
|
|
|
|
if (name == "json") {
|
2022-02-11 18:23:05 +08:00
|
|
|
return PtrType(new JSONReporter());
|
2023-01-16 20:28:48 +08:00
|
|
|
}
|
|
|
|
if (name == "csv") {
|
2022-02-11 18:23:05 +08:00
|
|
|
return PtrType(new CSVReporter());
|
2015-03-18 06:18:06 +08:00
|
|
|
}
|
2023-01-16 20:28:48 +08:00
|
|
|
std::cerr << "Unexpected format: '" << name << "'\n";
|
|
|
|
std::exit(1);
|
2015-03-18 06:18:06 +08:00
|
|
|
}
|
|
|
|
|
2021-10-21 17:10:38 +08:00
|
|
|
BENCHMARK_RESTORE_DEPRECATED_WARNING
|
2019-08-08 03:55:40 +08:00
|
|
|
|
2016-10-08 02:35:03 +08:00
|
|
|
} // end namespace
|
2017-05-03 03:31:54 +08:00
|
|
|
|
2017-05-03 07:05:15 +08:00
|
|
|
bool IsZero(double n) {
|
|
|
|
return std::abs(n) < std::numeric_limits<double>::epsilon();
|
|
|
|
}
|
|
|
|
|
2017-05-03 03:31:54 +08:00
|
|
|
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) {
|
|
|
|
int output_opts = ConsoleReporter::OO_Defaults;
|
2019-10-23 16:07:08 +08:00
|
|
|
auto is_benchmark_color = [force_no_color]() -> bool {
|
2018-10-08 16:33:21 +08:00
|
|
|
if (force_no_color) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (FLAGS_benchmark_color == "auto") {
|
|
|
|
return IsColorTerminal();
|
|
|
|
}
|
|
|
|
return IsTruthyFlagValue(FLAGS_benchmark_color);
|
|
|
|
};
|
|
|
|
if (is_benchmark_color()) {
|
2017-05-03 03:31:54 +08:00
|
|
|
output_opts |= ConsoleReporter::OO_Color;
|
|
|
|
} else {
|
|
|
|
output_opts &= ~ConsoleReporter::OO_Color;
|
|
|
|
}
|
2018-06-01 18:14:19 +08:00
|
|
|
if (FLAGS_benchmark_counters_tabular) {
|
2017-05-03 03:31:54 +08:00
|
|
|
output_opts |= ConsoleReporter::OO_Tabular;
|
|
|
|
} else {
|
|
|
|
output_opts &= ~ConsoleReporter::OO_Tabular;
|
|
|
|
}
|
2018-06-01 18:14:19 +08:00
|
|
|
return static_cast<ConsoleReporter::OutputOptions>(output_opts);
|
2017-05-03 03:31:54 +08:00
|
|
|
}
|
|
|
|
|
2016-10-08 02:35:03 +08:00
|
|
|
} // end namespace internal
|
2013-12-19 08:55:45 +08:00
|
|
|
|
2022-02-11 18:23:05 +08:00
|
|
|
BenchmarkReporter* CreateDefaultDisplayReporter() {
|
|
|
|
static auto default_display_reporter =
|
|
|
|
internal::CreateReporter(FLAGS_benchmark_format,
|
|
|
|
internal::GetOutputOptions())
|
|
|
|
.release();
|
|
|
|
return default_display_reporter;
|
|
|
|
}
|
|
|
|
|
2021-10-29 18:48:56 +08:00
|
|
|
size_t RunSpecifiedBenchmarks() {
|
|
|
|
return RunSpecifiedBenchmarks(nullptr, nullptr, FLAGS_benchmark_filter);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t RunSpecifiedBenchmarks(std::string spec) {
|
2021-12-06 19:18:04 +08:00
|
|
|
return RunSpecifiedBenchmarks(nullptr, nullptr, std::move(spec));
|
2015-03-13 07:16:06 +08:00
|
|
|
}
|
2013-12-19 08:55:45 +08:00
|
|
|
|
2021-10-29 18:48:56 +08:00
|
|
|
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter) {
|
|
|
|
return RunSpecifiedBenchmarks(display_reporter, nullptr,
|
|
|
|
FLAGS_benchmark_filter);
|
|
|
|
}
|
|
|
|
|
2021-10-27 15:52:57 +08:00
|
|
|
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
2021-10-29 18:48:56 +08:00
|
|
|
std::string spec) {
|
2021-12-06 19:18:04 +08:00
|
|
|
return RunSpecifiedBenchmarks(display_reporter, nullptr, std::move(spec));
|
2016-08-03 05:12:43 +08:00
|
|
|
}
|
|
|
|
|
2021-10-29 18:48:56 +08:00
|
|
|
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
|
|
|
BenchmarkReporter* file_reporter) {
|
|
|
|
return RunSpecifiedBenchmarks(display_reporter, file_reporter,
|
|
|
|
FLAGS_benchmark_filter);
|
|
|
|
}
|
|
|
|
|
2018-08-29 19:58:54 +08:00
|
|
|
size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
2021-10-27 15:52:57 +08:00
|
|
|
BenchmarkReporter* file_reporter,
|
2021-10-29 18:48:56 +08:00
|
|
|
std::string spec) {
|
2013-12-19 08:55:45 +08:00
|
|
|
if (spec.empty() || spec == "all")
|
2014-01-10 04:12:11 +08:00
|
|
|
spec = "."; // Regexp that matches all benchmarks
|
2015-03-18 06:18:06 +08:00
|
|
|
|
2016-08-30 02:43:55 +08:00
|
|
|
// Setup the reporters
|
|
|
|
std::ofstream output_file;
|
2018-08-29 19:58:54 +08:00
|
|
|
std::unique_ptr<BenchmarkReporter> default_display_reporter;
|
2016-08-30 02:43:55 +08:00
|
|
|
std::unique_ptr<BenchmarkReporter> default_file_reporter;
|
2018-08-29 19:58:54 +08:00
|
|
|
if (!display_reporter) {
|
2022-02-11 18:23:05 +08:00
|
|
|
default_display_reporter.reset(CreateDefaultDisplayReporter());
|
2018-08-29 19:58:54 +08:00
|
|
|
display_reporter = default_display_reporter.get();
|
2016-08-30 02:43:55 +08:00
|
|
|
}
|
2018-08-29 19:58:54 +08:00
|
|
|
auto& Out = display_reporter->GetOutputStream();
|
|
|
|
auto& Err = display_reporter->GetErrorStream();
|
2016-08-30 02:43:55 +08:00
|
|
|
|
|
|
|
std::string const& fname = FLAGS_benchmark_out;
|
2017-07-14 00:33:43 +08:00
|
|
|
if (fname.empty() && file_reporter) {
|
2016-08-30 02:43:55 +08:00
|
|
|
Err << "A custom file reporter was provided but "
|
2016-10-08 02:35:03 +08:00
|
|
|
"--benchmark_out=<file> was not specified."
|
|
|
|
<< std::endl;
|
2024-01-09 22:59:10 +08:00
|
|
|
Out.flush();
|
|
|
|
Err.flush();
|
2016-08-30 02:43:55 +08:00
|
|
|
std::exit(1);
|
|
|
|
}
|
2017-07-14 00:33:43 +08:00
|
|
|
if (!fname.empty()) {
|
2016-08-30 02:43:55 +08:00
|
|
|
output_file.open(fname);
|
|
|
|
if (!output_file.is_open()) {
|
2021-02-22 17:55:07 +08:00
|
|
|
Err << "invalid file name: '" << fname << "'" << std::endl;
|
2024-01-09 22:59:10 +08:00
|
|
|
Out.flush();
|
|
|
|
Err.flush();
|
2016-08-03 05:12:43 +08:00
|
|
|
std::exit(1);
|
|
|
|
}
|
2016-08-30 02:43:55 +08:00
|
|
|
if (!file_reporter) {
|
|
|
|
default_file_reporter = internal::CreateReporter(
|
2023-08-11 17:59:53 +08:00
|
|
|
FLAGS_benchmark_out_format, FLAGS_benchmark_counters_tabular
|
|
|
|
? ConsoleReporter::OO_Tabular
|
|
|
|
: ConsoleReporter::OO_None);
|
2016-08-30 02:43:55 +08:00
|
|
|
file_reporter = default_file_reporter.get();
|
2016-08-03 05:12:43 +08:00
|
|
|
}
|
2016-08-30 02:43:55 +08:00
|
|
|
file_reporter->SetOutputStream(&output_file);
|
|
|
|
file_reporter->SetErrorStream(&output_file);
|
|
|
|
}
|
|
|
|
|
2018-09-28 19:28:43 +08:00
|
|
|
std::vector<internal::BenchmarkInstance> benchmarks;
|
2024-01-09 22:59:10 +08:00
|
|
|
if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) {
|
|
|
|
Out.flush();
|
|
|
|
Err.flush();
|
|
|
|
return 0;
|
|
|
|
}
|
2016-08-30 02:43:55 +08:00
|
|
|
|
2016-10-29 05:22:22 +08:00
|
|
|
if (benchmarks.empty()) {
|
|
|
|
Err << "Failed to match any benchmarks against regex: " << spec << "\n";
|
2024-01-09 22:59:10 +08:00
|
|
|
Out.flush();
|
|
|
|
Err.flush();
|
2016-10-29 05:22:22 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-08-30 02:43:55 +08:00
|
|
|
if (FLAGS_benchmark_list_tests) {
|
2019-03-17 21:38:51 +08:00
|
|
|
for (auto const& benchmark : benchmarks)
|
2021-05-11 00:12:09 +08:00
|
|
|
Out << benchmark.name().str() << "\n";
|
2016-08-30 02:43:55 +08:00
|
|
|
} else {
|
2018-08-29 19:58:54 +08:00
|
|
|
internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
|
2015-03-18 06:18:06 +08:00
|
|
|
}
|
2016-08-30 02:43:55 +08:00
|
|
|
|
2024-01-09 22:59:10 +08:00
|
|
|
Out.flush();
|
|
|
|
Err.flush();
|
2016-05-24 14:42:11 +08:00
|
|
|
return benchmarks.size();
|
2015-03-13 06:03:33 +08:00
|
|
|
}
|
|
|
|
|
2022-03-04 19:07:01 +08:00
|
|
|
namespace {
|
|
|
|
// stores the time unit benchmarks use by default
|
|
|
|
TimeUnit default_time_unit = kNanosecond;
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
TimeUnit GetDefaultTimeUnit() { return default_time_unit; }
|
|
|
|
|
|
|
|
void SetDefaultTimeUnit(TimeUnit unit) { default_time_unit = unit; }
|
|
|
|
|
2021-10-29 18:48:56 +08:00
|
|
|
std::string GetBenchmarkFilter() { return FLAGS_benchmark_filter; }
|
2021-10-27 15:52:57 +08:00
|
|
|
|
2022-03-09 00:02:37 +08:00
|
|
|
void SetBenchmarkFilter(std::string value) {
|
|
|
|
FLAGS_benchmark_filter = std::move(value);
|
|
|
|
}
|
|
|
|
|
2022-05-18 00:59:36 +08:00
|
|
|
int32_t GetBenchmarkVerbosity() { return FLAGS_v; }
|
|
|
|
|
2018-10-01 22:51:08 +08:00
|
|
|
void RegisterMemoryManager(MemoryManager* manager) {
|
|
|
|
internal::memory_manager = manager;
|
|
|
|
}
|
2018-07-24 22:57:15 +08:00
|
|
|
|
2021-05-05 19:08:23 +08:00
|
|
|
void AddCustomContext(const std::string& key, const std::string& value) {
|
|
|
|
if (internal::global_context == nullptr) {
|
|
|
|
internal::global_context = new std::map<std::string, std::string>();
|
|
|
|
}
|
|
|
|
if (!internal::global_context->emplace(key, value).second) {
|
|
|
|
std::cerr << "Failed to add custom context \"" << key << "\" as it already "
|
|
|
|
<< "exists with value \"" << value << "\"\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-13 06:03:33 +08:00
|
|
|
namespace internal {
|
|
|
|
|
2022-02-16 17:23:54 +08:00
|
|
|
void (*HelperPrintf)();
|
|
|
|
|
2015-03-13 06:03:33 +08:00
|
|
|
void PrintUsageAndExit() {
|
2022-07-04 17:29:03 +08:00
|
|
|
HelperPrintf();
|
2015-03-13 06:03:33 +08:00
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
2022-03-04 19:07:01 +08:00
|
|
|
void SetDefaultTimeUnitFromFlag(const std::string& time_unit_flag) {
|
|
|
|
if (time_unit_flag == "s") {
|
|
|
|
return SetDefaultTimeUnit(kSecond);
|
2023-01-16 20:28:48 +08:00
|
|
|
}
|
|
|
|
if (time_unit_flag == "ms") {
|
2022-03-04 19:07:01 +08:00
|
|
|
return SetDefaultTimeUnit(kMillisecond);
|
2023-01-16 20:28:48 +08:00
|
|
|
}
|
|
|
|
if (time_unit_flag == "us") {
|
2022-03-04 19:07:01 +08:00
|
|
|
return SetDefaultTimeUnit(kMicrosecond);
|
2023-01-16 20:28:48 +08:00
|
|
|
}
|
|
|
|
if (time_unit_flag == "ns") {
|
2022-03-04 19:07:01 +08:00
|
|
|
return SetDefaultTimeUnit(kNanosecond);
|
2023-01-16 20:28:48 +08:00
|
|
|
}
|
|
|
|
if (!time_unit_flag.empty()) {
|
2022-03-04 19:07:01 +08:00
|
|
|
PrintUsageAndExit();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-01 04:14:50 +08:00
|
|
|
void ParseCommandLineFlags(int* argc, char** argv) {
|
2015-03-13 06:03:33 +08:00
|
|
|
using namespace benchmark;
|
2018-05-30 20:17:41 +08:00
|
|
|
BenchmarkReporter::Context::executable_name =
|
|
|
|
(argc && *argc > 0) ? argv[0] : "unknown";
|
2019-11-23 05:23:11 +08:00
|
|
|
for (int i = 1; argc && i < *argc; ++i) {
|
2016-10-08 02:35:03 +08:00
|
|
|
if (ParseBoolFlag(argv[i], "benchmark_list_tests",
|
2015-03-31 11:32:37 +08:00
|
|
|
&FLAGS_benchmark_list_tests) ||
|
2016-10-08 02:35:03 +08:00
|
|
|
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
|
2023-02-07 19:45:18 +08:00
|
|
|
ParseStringFlag(argv[i], "benchmark_min_time",
|
2021-06-01 23:05:50 +08:00
|
|
|
&FLAGS_benchmark_min_time) ||
|
2022-05-23 20:50:17 +08:00
|
|
|
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
|
|
|
|
&FLAGS_benchmark_min_warmup_time) ||
|
2021-06-01 23:05:50 +08:00
|
|
|
ParseInt32Flag(argv[i], "benchmark_repetitions",
|
|
|
|
&FLAGS_benchmark_repetitions) ||
|
Random interleaving of benchmark repetitions - the sequel (fixes #1051) (#1163)
Inspired by the original implementation by Hai Huang @haih-g
from https://github.com/google/benchmark/pull/1105.
The original implementation had design deficiencies that
weren't really addressable without redesign, so it was reverted.
In essence, the original implementation consisted of two separateable parts:
* reducing the amount time each repetition is run for, and symmetrically increasing repetition count
* running the repetitions in random order
While it worked fine for the usual case, it broke down when user would specify repetitions
(it would completely ignore that request), or specified per-repetition min time (while it would
still adjust the repetition count, it would not adjust the per-repetition time,
leading to much greater run times)
Here, like i was originally suggesting in the original review, i'm separating the features,
and only dealing with a single one - running repetitions in random order.
Now that the runs/repetitions are no longer in-order, the tooling may wish to sort the output,
and indeed `compare.py` has been updated to do that: #1168.
2021-06-04 02:16:54 +08:00
|
|
|
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
|
|
|
|
&FLAGS_benchmark_enable_random_interleaving) ||
|
2016-08-11 08:20:54 +08:00
|
|
|
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
|
2016-10-08 02:35:03 +08:00
|
|
|
&FLAGS_benchmark_report_aggregates_only) ||
|
2018-09-12 21:26:17 +08:00
|
|
|
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
|
|
|
|
&FLAGS_benchmark_display_aggregates_only) ||
|
2016-10-08 02:35:03 +08:00
|
|
|
ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) ||
|
|
|
|
ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) ||
|
2016-08-03 05:12:43 +08:00
|
|
|
ParseStringFlag(argv[i], "benchmark_out_format",
|
|
|
|
&FLAGS_benchmark_out_format) ||
|
2016-10-08 02:35:03 +08:00
|
|
|
ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
|
2017-03-02 10:55:36 +08:00
|
|
|
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
|
2018-06-01 18:14:19 +08:00
|
|
|
&FLAGS_benchmark_counters_tabular) ||
|
2021-04-28 16:25:29 +08:00
|
|
|
ParseStringFlag(argv[i], "benchmark_perf_counters",
|
2021-05-04 21:36:11 +08:00
|
|
|
&FLAGS_benchmark_perf_counters) ||
|
|
|
|
ParseKeyValueFlag(argv[i], "benchmark_context",
|
|
|
|
&FLAGS_benchmark_context) ||
|
2022-03-04 19:07:01 +08:00
|
|
|
ParseStringFlag(argv[i], "benchmark_time_unit",
|
|
|
|
&FLAGS_benchmark_time_unit) ||
|
2021-05-04 21:36:11 +08:00
|
|
|
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
|
2016-12-07 00:38:03 +08:00
|
|
|
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
|
2015-03-13 06:03:33 +08:00
|
|
|
|
|
|
|
--(*argc);
|
|
|
|
--i;
|
|
|
|
} else if (IsFlag(argv[i], "help")) {
|
|
|
|
PrintUsageAndExit();
|
|
|
|
}
|
|
|
|
}
|
2016-10-08 02:35:03 +08:00
|
|
|
for (auto const* flag :
|
2021-05-05 19:08:23 +08:00
|
|
|
{&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) {
|
2016-10-08 02:35:03 +08:00
|
|
|
if (*flag != "console" && *flag != "json" && *flag != "csv") {
|
|
|
|
PrintUsageAndExit();
|
|
|
|
}
|
2021-05-05 19:08:23 +08:00
|
|
|
}
|
2022-03-04 19:07:01 +08:00
|
|
|
SetDefaultTimeUnitFromFlag(FLAGS_benchmark_time_unit);
|
2016-09-16 05:10:35 +08:00
|
|
|
if (FLAGS_benchmark_color.empty()) {
|
|
|
|
PrintUsageAndExit();
|
|
|
|
}
|
2021-05-05 19:08:23 +08:00
|
|
|
for (const auto& kv : FLAGS_benchmark_context) {
|
|
|
|
AddCustomContext(kv.first, kv.second);
|
|
|
|
}
|
2013-12-19 08:55:45 +08:00
|
|
|
}
|
|
|
|
|
2016-08-29 12:48:48 +08:00
|
|
|
int InitializeStreams() {
|
2016-10-08 02:35:03 +08:00
|
|
|
static std::ios_base::Init init;
|
|
|
|
return 0;
|
2016-08-29 12:48:48 +08:00
|
|
|
}
|
|
|
|
|
2016-10-08 02:35:03 +08:00
|
|
|
} // end namespace internal
|
2015-03-12 00:47:15 +08:00
|
|
|
|
2024-01-29 21:48:04 +08:00
|
|
|
std::string GetBenchmarkVersiom() { return {BENCHMARK_VERSION}; }
|
2024-01-29 21:15:43 +08:00
|
|
|
|
2022-07-04 17:29:03 +08:00
|
|
|
void PrintDefaultHelp() {
|
|
|
|
fprintf(stdout,
|
|
|
|
"benchmark"
|
|
|
|
" [--benchmark_list_tests={true|false}]\n"
|
|
|
|
" [--benchmark_filter=<regex>]\n"
|
2023-02-07 19:45:18 +08:00
|
|
|
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
|
2022-07-04 17:29:03 +08:00
|
|
|
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
|
|
|
|
" [--benchmark_repetitions=<num_repetitions>]\n"
|
|
|
|
" [--benchmark_enable_random_interleaving={true|false}]\n"
|
|
|
|
" [--benchmark_report_aggregates_only={true|false}]\n"
|
|
|
|
" [--benchmark_display_aggregates_only={true|false}]\n"
|
|
|
|
" [--benchmark_format=<console|json|csv>]\n"
|
|
|
|
" [--benchmark_out=<filename>]\n"
|
|
|
|
" [--benchmark_out_format=<json|console|csv>]\n"
|
|
|
|
" [--benchmark_color={auto|true|false}]\n"
|
|
|
|
" [--benchmark_counters_tabular={true|false}]\n"
|
2022-11-12 08:50:16 +08:00
|
|
|
#if defined HAVE_LIBPFM
|
|
|
|
" [--benchmark_perf_counters=<counter>,...]\n"
|
|
|
|
#endif
|
2022-07-04 17:29:03 +08:00
|
|
|
" [--benchmark_context=<key>=<value>,...]\n"
|
|
|
|
" [--benchmark_time_unit={ns|us|ms|s}]\n"
|
|
|
|
" [--v=<verbosity>]\n");
|
|
|
|
}
|
|
|
|
|
2022-02-16 17:23:54 +08:00
|
|
|
void Initialize(int* argc, char** argv, void (*HelperPrintf)()) {
|
2022-07-26 23:33:32 +08:00
|
|
|
internal::HelperPrintf = HelperPrintf;
|
2014-01-10 04:12:11 +08:00
|
|
|
internal::ParseCommandLineFlags(argc, argv);
|
2016-09-03 11:34:34 +08:00
|
|
|
internal::LogLevel() = FLAGS_v;
|
2013-12-19 08:55:45 +08:00
|
|
|
}
|
|
|
|
|
2021-11-11 00:04:32 +08:00
|
|
|
void Shutdown() { delete internal::global_context; }
|
2021-06-03 23:08:00 +08:00
|
|
|
|
2017-01-18 11:28:20 +08:00
|
|
|
bool ReportUnrecognizedArguments(int argc, char** argv) {
|
|
|
|
for (int i = 1; i < argc; ++i) {
|
2018-06-01 18:14:19 +08:00
|
|
|
fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0],
|
|
|
|
argv[i]);
|
2017-01-18 11:28:20 +08:00
|
|
|
}
|
|
|
|
return argc > 1;
|
|
|
|
}
|
|
|
|
|
2016-10-08 02:35:03 +08:00
|
|
|
} // end namespace benchmark
|