benchmark/test/perf_counters_gtest.cc

259 lines
9.5 KiB
C++
Raw Normal View History

#include <thread>
#include "../src/perf_counters.h"
#include "gtest/gtest.h"
#ifndef GTEST_SKIP
struct MsgHandler {
2021-11-11 00:22:31 +08:00
void operator=(std::ostream&) {}
};
#define GTEST_SKIP() return MsgHandler() = std::cout
#endif
using benchmark::internal::PerfCounters;
using benchmark::internal::PerfCountersMeasurement;
using benchmark::internal::PerfCounterValues;
namespace {
const char kGenericPerfEvent1[] = "CYCLES";
const char kGenericPerfEvent2[] = "BRANCHES";
const char kGenericPerfEvent3[] = "INSTRUCTIONS";
TEST(PerfCountersTest, Init) {
EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
}
TEST(PerfCountersTest, OneCounter) {
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Performance counters not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid());
}
TEST(PerfCountersTest, NegativeTest) {
if (!PerfCounters::kSupported) {
EXPECT_FALSE(PerfCounters::Initialize());
return;
}
EXPECT_TRUE(PerfCounters::Initialize());
EXPECT_FALSE(PerfCounters::Create({}).IsValid());
EXPECT_FALSE(PerfCounters::Create({""}).IsValid());
EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid());
EXPECT_TRUE(PerfCounters::Create(
{kGenericPerfEvent1, kGenericPerfEvent2, kGenericPerfEvent3})
.IsValid());
{
auto counter =
PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
EXPECT_TRUE(counter.IsValid());
EXPECT_EQ(counter.num_counters(), 2);
EXPECT_EQ(counter.names(), std::vector<std::string>(
{kGenericPerfEvent2, kGenericPerfEvent1}));
}
{
auto counter = PerfCounters::Create(
{kGenericPerfEvent3, "not a counter name", kGenericPerfEvent1});
EXPECT_TRUE(counter.IsValid());
EXPECT_EQ(counter.num_counters(), 2);
EXPECT_EQ(counter.names(), std::vector<std::string>(
{kGenericPerfEvent3, kGenericPerfEvent1}));
}
{
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
kGenericPerfEvent3})
.IsValid());
}
{
auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
kGenericPerfEvent3,
"MISPREDICTED_BRANCH_RETIRED"});
EXPECT_TRUE(counter.IsValid());
EXPECT_EQ(counter.num_counters(), 3);
EXPECT_EQ(counter.names(),
std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
kGenericPerfEvent3}));
}
}
TEST(PerfCountersTest, Read1Counter) {
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
auto counters = PerfCounters::Create({kGenericPerfEvent1});
EXPECT_TRUE(counters.IsValid());
PerfCounterValues values1(1);
EXPECT_TRUE(counters.Snapshot(&values1));
EXPECT_GT(values1[0], 0);
PerfCounterValues values2(1);
EXPECT_TRUE(counters.Snapshot(&values2));
EXPECT_GT(values2[0], 0);
EXPECT_GT(values2[0], values1[0]);
}
TEST(PerfCountersTest, Read2Counters) {
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
auto counters =
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
EXPECT_TRUE(counters.IsValid());
PerfCounterValues values1(2);
EXPECT_TRUE(counters.Snapshot(&values1));
EXPECT_GT(values1[0], 0);
EXPECT_GT(values1[1], 0);
PerfCounterValues values2(2);
EXPECT_TRUE(counters.Snapshot(&values2));
EXPECT_GT(values2[0], 0);
EXPECT_GT(values2[1], 0);
}
TEST(PerfCountersTest, ReopenExistingCounters) {
// The test works (i.e. causes read to fail) for the assumptions
// about hardware capabilities (i.e. small number (3-4) hardware
// counters) at this date.
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
std::vector<PerfCounters> counters;
counters.reserve(6);
for (int i = 0; i < 6; i++)
counters.push_back(PerfCounters::Create({kGenericPerfEvent1}));
PerfCounterValues values(1);
EXPECT_TRUE(counters[0].Snapshot(&values));
EXPECT_FALSE(counters[4].Snapshot(&values));
EXPECT_FALSE(counters[5].Snapshot(&values));
}
TEST(PerfCountersTest, CreateExistingMeasurements) {
// The test works (i.e. causes read to fail) for the assumptions
// about hardware capabilities (i.e. small number (3-4) hardware
// counters) at this date,
// the same as previous test ReopenExistingCounters.
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
std::vector<PerfCountersMeasurement> perf_counter_measurements;
std::vector<std::pair<std::string, double>> measurements;
perf_counter_measurements.reserve(10);
for (int i = 0; i < 10; i++)
perf_counter_measurements.emplace_back(
std::vector<std::string>{kGenericPerfEvent1});
perf_counter_measurements[0].Start();
EXPECT_TRUE(perf_counter_measurements[0].Stop(measurements));
measurements.clear();
perf_counter_measurements[8].Start();
EXPECT_FALSE(perf_counter_measurements[8].Stop(measurements));
measurements.clear();
perf_counter_measurements[9].Start();
EXPECT_FALSE(perf_counter_measurements[9].Stop(measurements));
}
size_t do_work() {
size_t res = 0;
for (size_t i = 0; i < 100000000; ++i) res += i * i;
return res;
}
void measure(size_t threadcount, PerfCounterValues* values1,
PerfCounterValues* values2) {
2021-06-25 01:21:59 +08:00
BM_CHECK_NE(values1, nullptr);
BM_CHECK_NE(values2, nullptr);
std::vector<std::thread> threads(threadcount);
2021-06-25 01:21:59 +08:00
auto work = [&]() { BM_CHECK(do_work() > 1000); };
// We need to first set up the counters, then start the threads, so the
// threads would inherit the counters. But later, we need to first destroy
// the thread pool (so all the work finishes), then measure the counters. So
// the scopes overlap, and we need to explicitly control the scope of the
// threadpool.
auto counters =
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3});
for (auto& t : threads) t = std::thread(work);
counters.Snapshot(values1);
for (auto& t : threads) t.join();
counters.Snapshot(values2);
}
TEST(PerfCountersTest, MultiThreaded) {
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.";
}
EXPECT_TRUE(PerfCounters::Initialize());
PerfCounterValues values1(2);
PerfCounterValues values2(2);
measure(2, &values1, &values2);
std::vector<double> D1{static_cast<double>(values2[0] - values1[0]),
static_cast<double>(values2[1] - values1[1])};
measure(4, &values1, &values2);
std::vector<double> D2{static_cast<double>(values2[0] - values1[0]),
static_cast<double>(values2[1] - values1[1])};
// Some extra work will happen on the main thread - like joining the threads
// - so the ratio won't be quite 2.0, but very close.
EXPECT_GE(D2[0], 1.9 * D1[0]);
EXPECT_GE(D2[1], 1.9 * D1[1]);
}
Implement unlimited number of performance counters (#1552) * Implement unlimited number of performance counters Linux performance counters will limit the number of hardware counters per reading group. For that reason the implementation of PerfCounters is limited to 3. However if only software counters are added, there is no reason to limit the counters. For hardware counters, we create multiple groups and store a vector or leaders in the PerfCounters object. When reading, there is an extra time waste by iterating through all the group leaders. However this should be the same performance as with today. Reading is done by groups and it had to be heavily adjusted with the logic being moved to PerfCounterValues. I created a test for x86-64 and took care of filtering out the events in case it runs in a platform that does not support those counters - the test will not fail. The current tests were already failing (ReOpenExistingCounters, CreateExistingMeasurements and MultiThreaded) on the main branch and they continue to fail after this implementation - I did not fix those not to conflate all here. * Moved the PerfCounterValues::Read() implementation from header to body. * Added missing implementation of PerfCounters::IsCounterSupported when HAVE_LIBPFM is not defined. * Changed comments to reflect the implementation * Removed arg name so it does not generate an error when HAVE_LIBPBM is not defined. * Made loop counter a const reference for clang-tidy * Added missig BENCHMARK_EXPORT to PerfCounterValues
2023-03-01 23:30:41 +08:00
TEST(PerfCountersTest, HardwareLimits) {
// The test works (i.e. causes read to fail) for the assumptions
// about hardware capabilities (i.e. small number (3-4) hardware
// counters) at this date,
// the same as previous test ReopenExistingCounters.
if (!PerfCounters::kSupported) {
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
}
EXPECT_TRUE(PerfCounters::Initialize());
// Taken straight from `perf list` on x86-64
// Got all hardware names since these are the problematic ones
std::vector<std::string> counter_names{"cycles", // leader
"instructions",
"branches",
"L1-dcache-loads",
"L1-dcache-load-misses",
"L1-dcache-prefetches",
"L1-icache-load-misses", // leader
"L1-icache-loads",
"branch-load-misses",
"branch-loads",
"dTLB-load-misses",
"dTLB-loads",
"iTLB-load-misses", // leader
"iTLB-loads",
"branch-instructions",
"branch-misses",
"cache-misses",
"cache-references",
"stalled-cycles-backend", // leader
"stalled-cycles-frontend"};
// In the off-chance that some of these values are not supported,
// we filter them out so the test will complete without failure
// albeit it might not actually test the grouping on that platform
std::vector<std::string> valid_names;
for (const std::string& name : counter_names) {
if (PerfCounters::IsCounterSupported(name)) {
valid_names.push_back(name);
}
}
PerfCountersMeasurement counter(valid_names);
std::vector<std::pair<std::string, double>> measurements;
counter.Start();
EXPECT_TRUE(counter.Stop(measurements));
}
} // namespace