mirror of
https://github.com/google/benchmark.git
synced 2024-12-26 12:30:14 +08:00
Support optional, user-directed collection of performance counters (#1114)
* Support optional, user-directed collection of performance counters The patch allows an engineer wishing to drill into the root causes of a regression, for example. Currently, only single threaded runs are supported. The feature is a build-time opt in, and then a runtime opt in. The engineer may run the benchmark executable, passing a list of performance counter names (using libpfm's naming scheme) at the command line. The counter values will then be collected and reported back as UserCounters. This is different from #240 in that it is a benchmark user opt-in, and the counter collection is transparent to the benchmark. Currently, this is only supported on platforms where libpfm is supported. libpfm: http://perfmon2.sourceforge.net/ * 'Use' values param in Snapshot when BENCHMARK_OS_WINDOWS This is to avoid unused parameter warning-as-error * Added missing include for <vector> in perf_counters.cc * Moved doc to docs * Added license blurbs
This commit is contained in:
parent
835951aa44
commit
376ebc2635
44
.github/workflows/build-and-test-perfcounters.yml
vendored
Normal file
44
.github/workflows/build-and-test-perfcounters.yml
vendored
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
name: build-and-test-perfcounters
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ master ]
|
||||||
|
pull_request:
|
||||||
|
branches: [ master ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
job:
|
||||||
|
# TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
|
||||||
|
name: ${{ matrix.os }}.${{ matrix.build_type }}
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest, ubuntu-16.04, ubuntu-20.04]
|
||||||
|
build_type: ['Release', 'Debug']
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: install libpfm
|
||||||
|
run: sudo apt install libpfm4-dev
|
||||||
|
|
||||||
|
- name: create build environment
|
||||||
|
run: cmake -E make_directory ${{ runner.workspace }}/_build
|
||||||
|
|
||||||
|
- name: configure cmake
|
||||||
|
shell: bash
|
||||||
|
working-directory: ${{ runner.workspace }}/_build
|
||||||
|
run: cmake -DBENCHMARK_ENABLE_LIBPFM=1 -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
||||||
|
|
||||||
|
- name: build
|
||||||
|
shell: bash
|
||||||
|
working-directory: ${{ runner.workspace }}/_build
|
||||||
|
run: cmake --build . --config ${{ matrix.build_type }}
|
||||||
|
|
||||||
|
# Skip testing, for now. It seems perf_event_open does not succeed on the
|
||||||
|
# hosting machine, very likely a permissions issue.
|
||||||
|
# TODO(mtrofin): Enable test.
|
||||||
|
# - name: test
|
||||||
|
# shell: bash
|
||||||
|
# working-directory: ${{ runner.workspace }}/_build
|
||||||
|
# run: sudo ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure
|
@ -270,6 +270,10 @@ cxx_feature_check(STEADY_CLOCK)
|
|||||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
if (BENCHMARK_ENABLE_LIBPFM)
|
||||||
|
find_package(PFM)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Set up directories
|
# Set up directories
|
||||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||||
|
|
||||||
|
@ -297,6 +297,8 @@ too (`-lkstat`).
|
|||||||
|
|
||||||
[Setting the Time Unit](#setting-the-time-unit)
|
[Setting the Time Unit](#setting-the-time-unit)
|
||||||
|
|
||||||
|
[User-Requested Performance Counters](docs/perf_counters.md)
|
||||||
|
|
||||||
[Preventing Optimization](#preventing-optimization)
|
[Preventing Optimization](#preventing-optimization)
|
||||||
|
|
||||||
[Reporting Statistics](#reporting-statistics)
|
[Reporting Statistics](#reporting-statistics)
|
||||||
|
19
cmake/Modules/FindPFM.cmake
Normal file
19
cmake/Modules/FindPFM.cmake
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# If successful, the following variables will be defined:
|
||||||
|
# HAVE_LIBPFM.
|
||||||
|
# Set BENCHMARK_ENABLE_LIBPFM to 0 to disable, regardless of libpfm presence.
|
||||||
|
include(CheckIncludeFile)
|
||||||
|
include(CheckLibraryExists)
|
||||||
|
enable_language(C)
|
||||||
|
|
||||||
|
check_library_exists(libpfm.a pfm_initialize "" HAVE_LIBPFM_INITIALIZE)
|
||||||
|
if(HAVE_LIBPFM_INITIALIZE)
|
||||||
|
check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H)
|
||||||
|
check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H)
|
||||||
|
check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
|
||||||
|
if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
|
||||||
|
message("Using Perf Counters.")
|
||||||
|
set(HAVE_LIBPFM 1)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
message("Perf Counters support requested, but was unable to find libpfm.")
|
||||||
|
endif()
|
35
docs/perf_counters.md
Normal file
35
docs/perf_counters.md
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
<a name="perf-counters" />
|
||||||
|
|
||||||
|
# User-Requested Performance Counters
|
||||||
|
|
||||||
|
When running benchmarks, the user may choose to request collection of
|
||||||
|
performance counters. This may be useful in investigation scenarios - narrowing
|
||||||
|
down the cause of a regression; or verifying that the underlying cause of a
|
||||||
|
performance improvement matches expectations.
|
||||||
|
|
||||||
|
This feature is available if:
|
||||||
|
|
||||||
|
* The benchmark is run on an architecture featuring a Performance Monitoring
|
||||||
|
Unit (PMU),
|
||||||
|
* The benchmark is compiled with support for collecting counters. Currently,
|
||||||
|
this requires [libpfm](http://perfmon2.sourceforge.net/) be available at build
|
||||||
|
time, and
|
||||||
|
* Currently, there is a limitation that the benchmark be run on one thread.
|
||||||
|
|
||||||
|
The feature does not require modifying benchmark code. Counter collection is
|
||||||
|
handled at the boundaries where timer collection is also handled.
|
||||||
|
|
||||||
|
To opt-in:
|
||||||
|
|
||||||
|
* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
|
||||||
|
* Enable the cmake flag BENCHMARK_ENABLE_LIBPFM.
|
||||||
|
|
||||||
|
To use, pass a comma-separated list of counter names through the
|
||||||
|
`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
|
||||||
|
they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
|
||||||
|
mapped by libpfm to platform-specifics - see libpfm
|
||||||
|
[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
|
||||||
|
|
||||||
|
The counter values are reported back through the [User Counters](../README.md#custom-counters)
|
||||||
|
mechanism, meaning, they are available in all the formats (e.g. JSON) supported
|
||||||
|
by User Counters.
|
@ -448,6 +448,7 @@ struct Statistics {
|
|||||||
struct BenchmarkInstance;
|
struct BenchmarkInstance;
|
||||||
class ThreadTimer;
|
class ThreadTimer;
|
||||||
class ThreadManager;
|
class ThreadManager;
|
||||||
|
class PerfCountersMeasurement;
|
||||||
|
|
||||||
enum AggregationReportMode
|
enum AggregationReportMode
|
||||||
#if defined(BENCHMARK_HAS_CXX11)
|
#if defined(BENCHMARK_HAS_CXX11)
|
||||||
@ -687,15 +688,17 @@ class State {
|
|||||||
private:
|
private:
|
||||||
State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
||||||
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager);
|
internal::ThreadManager* manager,
|
||||||
|
internal::PerfCountersMeasurement* perf_counters_measurement);
|
||||||
|
|
||||||
void StartKeepRunning();
|
void StartKeepRunning();
|
||||||
// Implementation of KeepRunning() and KeepRunningBatch().
|
// Implementation of KeepRunning() and KeepRunningBatch().
|
||||||
// is_batch must be true unless n is 1.
|
// is_batch must be true unless n is 1.
|
||||||
bool KeepRunningInternal(IterationCount n, bool is_batch);
|
bool KeepRunningInternal(IterationCount n, bool is_batch);
|
||||||
void FinishKeepRunning();
|
void FinishKeepRunning();
|
||||||
internal::ThreadTimer* timer_;
|
internal::ThreadTimer* const timer_;
|
||||||
internal::ThreadManager* manager_;
|
internal::ThreadManager* const manager_;
|
||||||
|
internal::PerfCountersMeasurement* const perf_counters_measurement_;
|
||||||
|
|
||||||
friend struct internal::BenchmarkInstance;
|
friend struct internal::BenchmarkInstance;
|
||||||
};
|
};
|
||||||
|
@ -28,6 +28,12 @@ target_include_directories(benchmark PUBLIC
|
|||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# libpfm, if available
|
||||||
|
if (HAVE_LIBPFM)
|
||||||
|
target_link_libraries(benchmark libpfm.a)
|
||||||
|
add_definitions(-DHAVE_LIBPFM)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Link threads.
|
# Link threads.
|
||||||
target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
|
||||||
find_library(LIBRT rt)
|
find_library(LIBRT rt)
|
||||||
|
@ -45,6 +45,7 @@
|
|||||||
#include "internal_macros.h"
|
#include "internal_macros.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "mutex.h"
|
#include "mutex.h"
|
||||||
|
#include "perf_counters.h"
|
||||||
#include "re.h"
|
#include "re.h"
|
||||||
#include "statistics.h"
|
#include "statistics.h"
|
||||||
#include "string_util.h"
|
#include "string_util.h"
|
||||||
@ -106,6 +107,10 @@ DEFINE_bool(benchmark_counters_tabular, false);
|
|||||||
// The level of verbose logging to output
|
// The level of verbose logging to output
|
||||||
DEFINE_int32(v, 0);
|
DEFINE_int32(v, 0);
|
||||||
|
|
||||||
|
// List of additional perf counters to collect, in libpfm format. For more
|
||||||
|
// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html
|
||||||
|
DEFINE_string(benchmark_perf_counters, "");
|
||||||
|
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@ -117,7 +122,8 @@ void UseCharPointer(char const volatile*) {}
|
|||||||
|
|
||||||
State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
||||||
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
int thread_i, int n_threads, internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager)
|
internal::ThreadManager* manager,
|
||||||
|
internal::PerfCountersMeasurement* perf_counters_measurement)
|
||||||
: total_iterations_(0),
|
: total_iterations_(0),
|
||||||
batch_leftover_(0),
|
batch_leftover_(0),
|
||||||
max_iterations(max_iters),
|
max_iterations(max_iters),
|
||||||
@ -130,7 +136,8 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges,
|
|||||||
thread_index(thread_i),
|
thread_index(thread_i),
|
||||||
threads(n_threads),
|
threads(n_threads),
|
||||||
timer_(timer),
|
timer_(timer),
|
||||||
manager_(manager) {
|
manager_(manager),
|
||||||
|
perf_counters_measurement_(perf_counters_measurement) {
|
||||||
CHECK(max_iterations != 0) << "At least one iteration must be run";
|
CHECK(max_iterations != 0) << "At least one iteration must be run";
|
||||||
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
|
CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
|
||||||
|
|
||||||
@ -163,11 +170,23 @@ void State::PauseTiming() {
|
|||||||
// Add in time accumulated so far
|
// Add in time accumulated so far
|
||||||
CHECK(started_ && !finished_ && !error_occurred_);
|
CHECK(started_ && !finished_ && !error_occurred_);
|
||||||
timer_->StopTimer();
|
timer_->StopTimer();
|
||||||
|
if (perf_counters_measurement_) {
|
||||||
|
auto measurements = perf_counters_measurement_->StopAndGetMeasurements();
|
||||||
|
for (const auto& name_and_measurement : measurements) {
|
||||||
|
auto name = name_and_measurement.first;
|
||||||
|
auto measurement = name_and_measurement.second;
|
||||||
|
CHECK_EQ(counters[name], 0.0);
|
||||||
|
counters[name] = Counter(measurement, Counter::kAvgIterations);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void State::ResumeTiming() {
|
void State::ResumeTiming() {
|
||||||
CHECK(started_ && !finished_ && !error_occurred_);
|
CHECK(started_ && !finished_ && !error_occurred_);
|
||||||
timer_->StartTimer();
|
timer_->StartTimer();
|
||||||
|
if (perf_counters_measurement_) {
|
||||||
|
perf_counters_measurement_->Start();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void State::SkipWithError(const char* msg) {
|
void State::SkipWithError(const char* msg) {
|
||||||
@ -457,7 +476,9 @@ void ParseCommandLineFlags(int* argc, char** argv) {
|
|||||||
ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
|
ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
|
||||||
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
|
ParseBoolFlag(argv[i], "benchmark_counters_tabular",
|
||||||
&FLAGS_benchmark_counters_tabular) ||
|
&FLAGS_benchmark_counters_tabular) ||
|
||||||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
|
ParseInt32Flag(argv[i], "v", &FLAGS_v) ||
|
||||||
|
ParseStringFlag(argv[i], "benchmark_perf_counters",
|
||||||
|
&FLAGS_benchmark_perf_counters)) {
|
||||||
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
|
for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1];
|
||||||
|
|
||||||
--(*argc);
|
--(*argc);
|
||||||
|
@ -3,10 +3,12 @@
|
|||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
State BenchmarkInstance::Run(IterationCount iters, int thread_id,
|
State BenchmarkInstance::Run(
|
||||||
internal::ThreadTimer* timer,
|
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager) const {
|
internal::ThreadManager* manager,
|
||||||
State st(iters, arg, thread_id, threads, timer, manager);
|
internal::PerfCountersMeasurement* perf_counters_measurement) const {
|
||||||
|
State st(iters, arg, thread_id, threads, timer, manager,
|
||||||
|
perf_counters_measurement);
|
||||||
benchmark->Run(st);
|
benchmark->Run(st);
|
||||||
return st;
|
return st;
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,8 @@ struct BenchmarkInstance {
|
|||||||
int threads; // Number of concurrent threads to us
|
int threads; // Number of concurrent threads to us
|
||||||
|
|
||||||
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
|
||||||
internal::ThreadManager* manager) const;
|
internal::ThreadManager* manager,
|
||||||
|
internal::PerfCountersMeasurement* perf_counters_measurement) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool FindBenchmarksInternal(const std::string& re,
|
bool FindBenchmarksInternal(const std::string& re,
|
||||||
|
@ -45,6 +45,7 @@
|
|||||||
#include "internal_macros.h"
|
#include "internal_macros.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
#include "mutex.h"
|
#include "mutex.h"
|
||||||
|
#include "perf_counters.h"
|
||||||
#include "re.h"
|
#include "re.h"
|
||||||
#include "statistics.h"
|
#include "statistics.h"
|
||||||
#include "string_util.h"
|
#include "string_util.h"
|
||||||
@ -111,12 +112,14 @@ BenchmarkReporter::Run CreateRunReport(
|
|||||||
// Execute one thread of benchmark b for the specified number of iterations.
|
// Execute one thread of benchmark b for the specified number of iterations.
|
||||||
// Adds the stats collected for the thread into manager->results.
|
// Adds the stats collected for the thread into manager->results.
|
||||||
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
|
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
|
||||||
int thread_id, ThreadManager* manager) {
|
int thread_id, ThreadManager* manager,
|
||||||
|
PerfCountersMeasurement* perf_counters_measurement) {
|
||||||
internal::ThreadTimer timer(
|
internal::ThreadTimer timer(
|
||||||
b->measure_process_cpu_time
|
b->measure_process_cpu_time
|
||||||
? internal::ThreadTimer::CreateProcessCpuTime()
|
? internal::ThreadTimer::CreateProcessCpuTime()
|
||||||
: internal::ThreadTimer::Create());
|
: internal::ThreadTimer::Create());
|
||||||
State st = b->Run(iters, thread_id, &timer, manager);
|
State st =
|
||||||
|
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
|
||||||
CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
|
CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
|
||||||
<< "Benchmark returned before State::KeepRunning() returned false!";
|
<< "Benchmark returned before State::KeepRunning() returned false!";
|
||||||
{
|
{
|
||||||
@ -143,7 +146,12 @@ class BenchmarkRunner {
|
|||||||
: FLAGS_benchmark_repetitions),
|
: FLAGS_benchmark_repetitions),
|
||||||
has_explicit_iteration_count(b.iterations != 0),
|
has_explicit_iteration_count(b.iterations != 0),
|
||||||
pool(b.threads - 1),
|
pool(b.threads - 1),
|
||||||
iters(has_explicit_iteration_count ? b.iterations : 1) {
|
iters(has_explicit_iteration_count ? b.iterations : 1),
|
||||||
|
perf_counters_measurement(
|
||||||
|
PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
|
||||||
|
perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
|
||||||
|
? &perf_counters_measurement
|
||||||
|
: nullptr) {
|
||||||
run_results.display_report_aggregates_only =
|
run_results.display_report_aggregates_only =
|
||||||
(FLAGS_benchmark_report_aggregates_only ||
|
(FLAGS_benchmark_report_aggregates_only ||
|
||||||
FLAGS_benchmark_display_aggregates_only);
|
FLAGS_benchmark_display_aggregates_only);
|
||||||
@ -155,6 +163,11 @@ class BenchmarkRunner {
|
|||||||
internal::ARM_DisplayReportAggregatesOnly);
|
internal::ARM_DisplayReportAggregatesOnly);
|
||||||
run_results.file_report_aggregates_only =
|
run_results.file_report_aggregates_only =
|
||||||
(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
|
(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
|
||||||
|
CHECK(b.threads == 1 || !perf_counters_measurement.IsValid())
|
||||||
|
<< "Perf counters are not supported in multi-threaded cases.\n";
|
||||||
|
CHECK(FLAGS_benchmark_perf_counters.empty() ||
|
||||||
|
perf_counters_measurement.IsValid())
|
||||||
|
<< "Perf counters were requested but could not be set up.";
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
|
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
|
||||||
@ -192,6 +205,9 @@ class BenchmarkRunner {
|
|||||||
// So only the first repetition has to find/calculate it,
|
// So only the first repetition has to find/calculate it,
|
||||||
// the other repetitions will just use that precomputed iteration count.
|
// the other repetitions will just use that precomputed iteration count.
|
||||||
|
|
||||||
|
PerfCountersMeasurement perf_counters_measurement;
|
||||||
|
PerfCountersMeasurement* const perf_counters_measurement_ptr;
|
||||||
|
|
||||||
struct IterationResults {
|
struct IterationResults {
|
||||||
internal::ThreadManager::Result results;
|
internal::ThreadManager::Result results;
|
||||||
IterationCount iters;
|
IterationCount iters;
|
||||||
@ -206,12 +222,12 @@ class BenchmarkRunner {
|
|||||||
// Run all but one thread in separate threads
|
// Run all but one thread in separate threads
|
||||||
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
|
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
|
||||||
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
|
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
|
||||||
manager.get());
|
manager.get(), perf_counters_measurement_ptr);
|
||||||
}
|
}
|
||||||
// And run one thread here directly.
|
// And run one thread here directly.
|
||||||
// (If we were asked to run just one thread, we don't create new threads.)
|
// (If we were asked to run just one thread, we don't create new threads.)
|
||||||
// Yes, we need to do this here *after* we start the separate threads.
|
// Yes, we need to do this here *after* we start the separate threads.
|
||||||
RunInThread(&b, iters, 0, manager.get());
|
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
|
||||||
|
|
||||||
// The main thread has finished. Now let's wait for the other threads.
|
// The main thread has finished. Now let's wait for the other threads.
|
||||||
manager->WaitForAllThreads();
|
manager->WaitForAllThreads();
|
||||||
@ -331,7 +347,8 @@ class BenchmarkRunner {
|
|||||||
memory_manager->Start();
|
memory_manager->Start();
|
||||||
std::unique_ptr<internal::ThreadManager> manager;
|
std::unique_ptr<internal::ThreadManager> manager;
|
||||||
manager.reset(new internal::ThreadManager(1));
|
manager.reset(new internal::ThreadManager(1));
|
||||||
RunInThread(&b, memory_iterations, 0, manager.get());
|
RunInThread(&b, memory_iterations, 0, manager.get(),
|
||||||
|
perf_counters_measurement_ptr);
|
||||||
manager->WaitForAllThreads();
|
manager->WaitForAllThreads();
|
||||||
manager.reset();
|
manager.reset();
|
||||||
|
|
||||||
|
@ -26,6 +26,8 @@ DECLARE_bool(benchmark_report_aggregates_only);
|
|||||||
|
|
||||||
DECLARE_bool(benchmark_display_aggregates_only);
|
DECLARE_bool(benchmark_display_aggregates_only);
|
||||||
|
|
||||||
|
DECLARE_string(benchmark_perf_counters);
|
||||||
|
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
128
src/perf_counters.cc
Normal file
128
src/perf_counters.cc
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
// Copyright 2021 Google Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "perf_counters.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#if defined HAVE_LIBPFM
|
||||||
|
#include "perfmon/pfmlib.h"
|
||||||
|
#include "perfmon/pfmlib_perf_event.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace benchmark {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
constexpr size_t PerfCounterValues::kMaxCounters;
|
||||||
|
|
||||||
|
#if defined HAVE_LIBPFM
|
||||||
|
const bool PerfCounters::kSupported = true;
|
||||||
|
|
||||||
|
bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; }
|
||||||
|
|
||||||
|
PerfCounters PerfCounters::Create(
|
||||||
|
const std::vector<std::string>& counter_names) {
|
||||||
|
if (counter_names.empty()) {
|
||||||
|
return NoCounters();
|
||||||
|
}
|
||||||
|
if (counter_names.size() > PerfCounterValues::kMaxCounters) {
|
||||||
|
GetErrorLogInstance()
|
||||||
|
<< counter_names.size()
|
||||||
|
<< " counters were requested. The minimum is 1, the maximum is "
|
||||||
|
<< PerfCounterValues::kMaxCounters << "\n";
|
||||||
|
return NoCounters();
|
||||||
|
}
|
||||||
|
std::vector<int> counter_ids(counter_names.size());
|
||||||
|
|
||||||
|
const int mode = PFM_PLM3; // user mode only
|
||||||
|
for (size_t i = 0; i < counter_names.size(); ++i) {
|
||||||
|
const bool is_first = i == 0;
|
||||||
|
struct perf_event_attr attr{};
|
||||||
|
attr.size = sizeof(attr);
|
||||||
|
const int group_id = !is_first ? counter_ids[0] : -1;
|
||||||
|
const auto& name = counter_names[i];
|
||||||
|
if (name.empty()) {
|
||||||
|
GetErrorLogInstance() << "A counter name was the empty string\n";
|
||||||
|
return NoCounters();
|
||||||
|
}
|
||||||
|
pfm_perf_encode_arg_t arg{};
|
||||||
|
arg.attr = &attr;
|
||||||
|
|
||||||
|
const int pfm_get =
|
||||||
|
pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg);
|
||||||
|
if (pfm_get != PFM_SUCCESS) {
|
||||||
|
GetErrorLogInstance() << "Unknown counter name: " << name << "\n";
|
||||||
|
return NoCounters();
|
||||||
|
}
|
||||||
|
attr.disabled = is_first;
|
||||||
|
attr.pinned = is_first;
|
||||||
|
attr.exclude_kernel = true;
|
||||||
|
attr.exclude_user = false;
|
||||||
|
attr.exclude_hv = true;
|
||||||
|
// Read all counters in one read.
|
||||||
|
attr.read_format = PERF_FORMAT_GROUP;
|
||||||
|
|
||||||
|
int id = -1;
|
||||||
|
static constexpr size_t kNrOfSyscallRetries = 5;
|
||||||
|
// Retry syscall as it was interrupted often (b/64774091).
|
||||||
|
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
|
||||||
|
++num_retries) {
|
||||||
|
id = perf_event_open(&attr, 0, -1, group_id, 0);
|
||||||
|
if (id >= 0 || errno != EINTR) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (id < 0) {
|
||||||
|
GetErrorLogInstance()
|
||||||
|
<< "Failed to get a file descriptor for " << name << "\n";
|
||||||
|
return NoCounters();
|
||||||
|
}
|
||||||
|
|
||||||
|
counter_ids[i] = id;
|
||||||
|
}
|
||||||
|
if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) {
|
||||||
|
GetErrorLogInstance() << "Failed to start counters\n";
|
||||||
|
return NoCounters();
|
||||||
|
}
|
||||||
|
|
||||||
|
return PerfCounters(counter_names, std::move(counter_ids));
|
||||||
|
}
|
||||||
|
|
||||||
|
PerfCounters::~PerfCounters() {
|
||||||
|
if (counter_ids_.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE);
|
||||||
|
for (int fd : counter_ids_) {
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else // defined HAVE_LIBPFM
|
||||||
|
const bool PerfCounters::kSupported = false;
|
||||||
|
|
||||||
|
bool PerfCounters::Initialize() { return false; }
|
||||||
|
|
||||||
|
PerfCounters PerfCounters::Create(
|
||||||
|
const std::vector<std::string>& counter_names) {
|
||||||
|
if (!counter_names.empty()) {
|
||||||
|
GetErrorLogInstance() << "Performance counters not supported.";
|
||||||
|
}
|
||||||
|
return NoCounters();
|
||||||
|
}
|
||||||
|
|
||||||
|
PerfCounters::~PerfCounters() = default;
|
||||||
|
#endif // defined HAVE_LIBPFM
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace benchmark
|
172
src/perf_counters.h
Normal file
172
src/perf_counters.h
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
// Copyright 2021 Google Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef BENCHMARK_PERF_COUNTERS_H
|
||||||
|
#define BENCHMARK_PERF_COUNTERS_H
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "benchmark/benchmark.h"
|
||||||
|
#include "check.h"
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
#ifndef BENCHMARK_OS_WINDOWS
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace benchmark {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
// Typically, we can only read a small number of counters. There is also a
|
||||||
|
// padding preceding counter values, when reading multiple counters with one
|
||||||
|
// syscall (which is desirable). PerfCounterValues abstracts these details.
|
||||||
|
// The implementation ensures the storage is inlined, and allows 0-based
|
||||||
|
// indexing into the counter values.
|
||||||
|
// The object is used in conjunction with a PerfCounters object, by passing it
|
||||||
|
// to Snapshot(). The values are populated such that
|
||||||
|
// perfCounters->names()[i]'s value is obtained at position i (as given by
|
||||||
|
// operator[]) of this object.
|
||||||
|
class PerfCounterValues {
|
||||||
|
public:
|
||||||
|
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
|
||||||
|
CHECK_LE(nr_counters_, kMaxCounters);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
|
||||||
|
|
||||||
|
static constexpr size_t kMaxCounters = 3;
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class PerfCounters;
|
||||||
|
// Get the byte buffer in which perf counters can be captured.
|
||||||
|
// This is used by PerfCounters::Read
|
||||||
|
std::pair<char*, size_t> get_data_buffer() {
|
||||||
|
return {reinterpret_cast<char*>(values_.data()),
|
||||||
|
sizeof(uint64_t) * (kPadding + nr_counters_)};
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr size_t kPadding = 1;
|
||||||
|
std::array<uint64_t, kPadding + kMaxCounters> values_;
|
||||||
|
const size_t nr_counters_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Collect PMU counters. The object, once constructed, is ready to be used by
|
||||||
|
// calling read(). PMU counter collection is enabled from the time create() is
|
||||||
|
// called, to obtain the object, until the object's destructor is called.
|
||||||
|
class PerfCounters final {
|
||||||
|
public:
|
||||||
|
// True iff this platform supports performance counters.
|
||||||
|
static const bool kSupported;
|
||||||
|
|
||||||
|
bool IsValid() const { return is_valid_; }
|
||||||
|
static PerfCounters NoCounters() { return PerfCounters(); }
|
||||||
|
|
||||||
|
~PerfCounters();
|
||||||
|
PerfCounters(PerfCounters&&) = default;
|
||||||
|
PerfCounters(const PerfCounters&) = delete;
|
||||||
|
|
||||||
|
// Platform-specific implementations may choose to do some library
|
||||||
|
// initialization here.
|
||||||
|
static bool Initialize();
|
||||||
|
|
||||||
|
// Return a PerfCounters object ready to read the counters with the names
|
||||||
|
// specified. The values are user-mode only. The counter name format is
|
||||||
|
// implementation and OS specific.
|
||||||
|
// TODO: once we move to C++-17, this should be a std::optional, and then the
|
||||||
|
// IsValid() boolean can be dropped.
|
||||||
|
static PerfCounters Create(const std::vector<std::string>& counter_names);
|
||||||
|
|
||||||
|
// Take a snapshot of the current value of the counters into the provided
|
||||||
|
// valid PerfCounterValues storage. The values are populated such that:
|
||||||
|
// names()[i]'s value is (*values)[i]
|
||||||
|
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) {
|
||||||
|
#ifndef BENCHMARK_OS_WINDOWS
|
||||||
|
assert(values != nullptr);
|
||||||
|
assert(IsValid());
|
||||||
|
auto buffer = values->get_data_buffer();
|
||||||
|
auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
|
||||||
|
return static_cast<size_t>(read_bytes) == buffer.second;
|
||||||
|
#else
|
||||||
|
(void)values;
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<std::string>& names() const { return counter_names_; }
|
||||||
|
size_t num_counters() const { return counter_names_.size(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
PerfCounters(const std::vector<std::string>& counter_names,
|
||||||
|
std::vector<int>&& counter_ids)
|
||||||
|
: counter_ids_(std::move(counter_ids)),
|
||||||
|
counter_names_(counter_names),
|
||||||
|
is_valid_(true) {}
|
||||||
|
PerfCounters() : is_valid_(false) {}
|
||||||
|
|
||||||
|
std::vector<int> counter_ids_;
|
||||||
|
const std::vector<std::string> counter_names_;
|
||||||
|
const bool is_valid_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Typical usage of the above primitives.
|
||||||
|
class PerfCountersMeasurement final {
|
||||||
|
public:
|
||||||
|
PerfCountersMeasurement(PerfCounters&& c)
|
||||||
|
: counters_(std::move(c)),
|
||||||
|
start_values_(counters_.IsValid() ? counters_.names().size() : 0),
|
||||||
|
end_values_(counters_.IsValid() ? counters_.names().size() : 0) {}
|
||||||
|
|
||||||
|
bool IsValid() const { return counters_.IsValid(); }
|
||||||
|
|
||||||
|
BENCHMARK_ALWAYS_INLINE void Start() {
|
||||||
|
assert(IsValid());
|
||||||
|
// Tell the compiler to not move instructions above/below where we take
|
||||||
|
// the snapshot.
|
||||||
|
ClobberMemory();
|
||||||
|
counters_.Snapshot(&start_values_);
|
||||||
|
ClobberMemory();
|
||||||
|
}
|
||||||
|
|
||||||
|
BENCHMARK_ALWAYS_INLINE std::vector<std::pair<std::string, double>>
|
||||||
|
StopAndGetMeasurements() {
|
||||||
|
assert(IsValid());
|
||||||
|
// Tell the compiler to not move instructions above/below where we take
|
||||||
|
// the snapshot.
|
||||||
|
ClobberMemory();
|
||||||
|
counters_.Snapshot(&end_values_);
|
||||||
|
ClobberMemory();
|
||||||
|
|
||||||
|
std::vector<std::pair<std::string, double>> ret;
|
||||||
|
for (size_t i = 0; i < counters_.names().size(); ++i) {
|
||||||
|
double measurement = static_cast<double>(end_values_[i]) -
|
||||||
|
static_cast<double>(start_values_[i]);
|
||||||
|
ret.push_back({counters_.names()[i], measurement});
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
PerfCounters counters_;
|
||||||
|
PerfCounterValues start_values_;
|
||||||
|
PerfCounterValues end_values_;
|
||||||
|
};
|
||||||
|
|
||||||
|
BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize();
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace benchmark
|
||||||
|
|
||||||
|
#endif // BENCHMARK_PERF_COUNTERS_H
|
@ -163,6 +163,18 @@ std::string StrFormat(const char* format, ...) {
|
|||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> StrSplit(const std::string& str, char delim) {
|
||||||
|
std::vector<std::string> ret;
|
||||||
|
size_t first = 0;
|
||||||
|
size_t next = str.find(delim);
|
||||||
|
for (; next != std::string::npos;
|
||||||
|
first = next + 1, next = str.find(first, delim)) {
|
||||||
|
ret.push_back(str.substr(first, next - first));
|
||||||
|
}
|
||||||
|
ret.push_back(str.substr(first));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
|
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
|
||||||
/*
|
/*
|
||||||
* GNU STL in Android NDK lacks support for some C++11 functions, including
|
* GNU STL in Android NDK lacks support for some C++11 functions, including
|
||||||
|
@ -37,6 +37,8 @@ inline std::string StrCat(Args&&... args) {
|
|||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> StrSplit(const std::string& str, char delim);
|
||||||
|
|
||||||
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
|
#ifdef BENCHMARK_STL_ANDROID_GNUSTL
|
||||||
/*
|
/*
|
||||||
* GNU STL in Android NDK lacks support for some C++11 functions, including
|
* GNU STL in Android NDK lacks support for some C++11 functions, including
|
||||||
|
@ -128,6 +128,9 @@ add_test(NAME templated_fixture_test COMMAND templated_fixture_test --benchmark_
|
|||||||
compile_output_test(user_counters_test)
|
compile_output_test(user_counters_test)
|
||||||
add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01)
|
add_test(NAME user_counters_test COMMAND user_counters_test --benchmark_min_time=0.01)
|
||||||
|
|
||||||
|
compile_output_test(perf_counters_test)
|
||||||
|
add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time=0.01 --benchmark_perf_counters=CYCLES,BRANCHES)
|
||||||
|
|
||||||
compile_output_test(internal_threading_test)
|
compile_output_test(internal_threading_test)
|
||||||
add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01)
|
add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01)
|
||||||
|
|
||||||
@ -196,6 +199,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
|
|||||||
add_gtest(commandlineflags_gtest)
|
add_gtest(commandlineflags_gtest)
|
||||||
add_gtest(statistics_gtest)
|
add_gtest(statistics_gtest)
|
||||||
add_gtest(string_util_gtest)
|
add_gtest(string_util_gtest)
|
||||||
|
add_gtest(perf_counters_gtest)
|
||||||
endif(BENCHMARK_ENABLE_GTEST_TESTS)
|
endif(BENCHMARK_ENABLE_GTEST_TESTS)
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
95
test/perf_counters_gtest.cc
Normal file
95
test/perf_counters_gtest.cc
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
#include "../src/perf_counters.h"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
#ifndef GTEST_SKIP
|
||||||
|
struct MsgHandler {
|
||||||
|
void operator=(std::ostream&){}
|
||||||
|
};
|
||||||
|
#define GTEST_SKIP() return MsgHandler() = std::cout
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using benchmark::internal::PerfCounters;
|
||||||
|
using benchmark::internal::PerfCounterValues;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const char kGenericPerfEvent1[] = "CYCLES";
|
||||||
|
const char kGenericPerfEvent2[] = "BRANCHES";
|
||||||
|
const char kGenericPerfEvent3[] = "INSTRUCTIONS";
|
||||||
|
|
||||||
|
TEST(PerfCountersTest, Init) {
|
||||||
|
EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PerfCountersTest, OneCounter) {
|
||||||
|
if (!PerfCounters::kSupported) {
|
||||||
|
GTEST_SKIP() << "Performance counters not supported.\n";
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
|
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PerfCountersTest, NegativeTest) {
|
||||||
|
if (!PerfCounters::kSupported) {
|
||||||
|
EXPECT_FALSE(PerfCounters::Initialize());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
|
EXPECT_FALSE(PerfCounters::Create({}).IsValid());
|
||||||
|
EXPECT_FALSE(PerfCounters::Create({""}).IsValid());
|
||||||
|
EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid());
|
||||||
|
{
|
||||||
|
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||||
|
kGenericPerfEvent3})
|
||||||
|
.IsValid());
|
||||||
|
}
|
||||||
|
EXPECT_FALSE(
|
||||||
|
PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1})
|
||||||
|
.IsValid());
|
||||||
|
EXPECT_FALSE(PerfCounters::Create({kGenericPerfEvent3, "not a counter name",
|
||||||
|
kGenericPerfEvent1})
|
||||||
|
.IsValid());
|
||||||
|
{
|
||||||
|
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||||
|
kGenericPerfEvent3})
|
||||||
|
.IsValid());
|
||||||
|
}
|
||||||
|
EXPECT_FALSE(
|
||||||
|
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||||
|
kGenericPerfEvent3, "MISPREDICTED_BRANCH_RETIRED"})
|
||||||
|
.IsValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PerfCountersTest, Read1Counter) {
|
||||||
|
if (!PerfCounters::kSupported) {
|
||||||
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
|
auto counters = PerfCounters::Create({kGenericPerfEvent1});
|
||||||
|
EXPECT_TRUE(counters.IsValid());
|
||||||
|
PerfCounterValues values1(1);
|
||||||
|
EXPECT_TRUE(counters.Snapshot(&values1));
|
||||||
|
EXPECT_GT(values1[0], 0);
|
||||||
|
PerfCounterValues values2(1);
|
||||||
|
EXPECT_TRUE(counters.Snapshot(&values2));
|
||||||
|
EXPECT_GT(values2[0], 0);
|
||||||
|
EXPECT_GT(values2[0], values1[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PerfCountersTest, Read2Counters) {
|
||||||
|
if (!PerfCounters::kSupported) {
|
||||||
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(PerfCounters::Initialize());
|
||||||
|
auto counters =
|
||||||
|
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
|
||||||
|
EXPECT_TRUE(counters.IsValid());
|
||||||
|
PerfCounterValues values1(2);
|
||||||
|
EXPECT_TRUE(counters.Snapshot(&values1));
|
||||||
|
EXPECT_GT(values1[0], 0);
|
||||||
|
EXPECT_GT(values1[1], 0);
|
||||||
|
PerfCounterValues values2(2);
|
||||||
|
EXPECT_TRUE(counters.Snapshot(&values2));
|
||||||
|
EXPECT_GT(values2[0], 0);
|
||||||
|
EXPECT_GT(values2[1], 0);
|
||||||
|
}
|
||||||
|
} // namespace
|
27
test/perf_counters_test.cc
Normal file
27
test/perf_counters_test.cc
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
#undef NDEBUG
|
||||||
|
|
||||||
|
#include "../src/perf_counters.h"
|
||||||
|
|
||||||
|
#include "benchmark/benchmark.h"
|
||||||
|
#include "output_test.h"
|
||||||
|
|
||||||
|
void BM_Simple(benchmark::State& state) {
|
||||||
|
for (auto _ : state) {
|
||||||
|
benchmark::DoNotOptimize(state.iterations());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_Simple);
|
||||||
|
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_Simple\",$"}});
|
||||||
|
|
||||||
|
void CheckSimple(Results const& e) {
|
||||||
|
CHECK_COUNTER_VALUE(e, double, "CYCLES", GT, 0);
|
||||||
|
CHECK_COUNTER_VALUE(e, double, "BRANCHES", GT, 0.0);
|
||||||
|
}
|
||||||
|
CHECK_BENCHMARK_RESULTS("BM_Simple", &CheckSimple);
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
if (!benchmark::internal::PerfCounters::kSupported) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
RunOutputTests(argc, argv);
|
||||||
|
}
|
@ -150,4 +150,12 @@ TEST(StringUtilTest, stod) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(StringUtilTest, StrSplit) {
|
||||||
|
EXPECT_EQ(benchmark::StrSplit("", ','), std::vector<std::string>{""});
|
||||||
|
EXPECT_EQ(benchmark::StrSplit("hello", ','),
|
||||||
|
std::vector<std::string>({"hello"}));
|
||||||
|
EXPECT_EQ(benchmark::StrSplit("hello,there", ','),
|
||||||
|
std::vector<std::string>({"hello", "there"}));
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace
|
} // end namespace
|
||||||
|
Loading…
Reference in New Issue
Block a user