2021-05-19 16:49:05 +08:00
|
|
|
#include <thread>
|
|
|
|
|
2021-04-28 16:25:29 +08:00
|
|
|
#include "../src/perf_counters.h"
|
|
|
|
#include "gtest/gtest.h"
|
|
|
|
|
|
|
|
#ifndef GTEST_SKIP
|
|
|
|
struct MsgHandler {
|
2021-11-11 00:22:31 +08:00
|
|
|
void operator=(std::ostream&) {}
|
2021-04-28 16:25:29 +08:00
|
|
|
};
|
|
|
|
#define GTEST_SKIP() return MsgHandler() = std::cout
|
|
|
|
#endif
|
|
|
|
|
|
|
|
using benchmark::internal::PerfCounters;
|
2022-01-25 18:14:20 +08:00
|
|
|
using benchmark::internal::PerfCountersMeasurement;
|
2021-04-28 16:25:29 +08:00
|
|
|
using benchmark::internal::PerfCounterValues;
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
const char kGenericPerfEvent1[] = "CYCLES";
|
|
|
|
const char kGenericPerfEvent2[] = "BRANCHES";
|
|
|
|
const char kGenericPerfEvent3[] = "INSTRUCTIONS";
|
|
|
|
|
|
|
|
TEST(PerfCountersTest, Init) {
|
|
|
|
EXPECT_EQ(PerfCounters::Initialize(), PerfCounters::kSupported);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(PerfCountersTest, OneCounter) {
|
|
|
|
if (!PerfCounters::kSupported) {
|
|
|
|
GTEST_SKIP() << "Performance counters not supported.\n";
|
|
|
|
}
|
|
|
|
EXPECT_TRUE(PerfCounters::Initialize());
|
|
|
|
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1}).IsValid());
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(PerfCountersTest, NegativeTest) {
|
|
|
|
if (!PerfCounters::kSupported) {
|
|
|
|
EXPECT_FALSE(PerfCounters::Initialize());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
EXPECT_TRUE(PerfCounters::Initialize());
|
|
|
|
EXPECT_FALSE(PerfCounters::Create({}).IsValid());
|
|
|
|
EXPECT_FALSE(PerfCounters::Create({""}).IsValid());
|
|
|
|
EXPECT_FALSE(PerfCounters::Create({"not a counter name"}).IsValid());
|
Filter performance counter names, not invalidate all (#1554)
* Filter performance counter names, not invalidate all
Currently, the performance counters are validated while they
are being created and one failure returns NoCounters(), ie it
effecitvely invalidates all the counters.
I would like to propose a new behavior: filter instead. If an
invalid name is added to the counter list, or if that particular
counter is not supported on this platform, that counter is dropped
from the list and an error messages is created, while all the
other counters remain active.
This will give testers a peace of mind that if one mistake is made
or if something is changed or removed from libpfm, their entire
test will not be invalidated. This feature gives more tolerance
with respect to versioning.
Another positive is that testers can now input a superset of all
desired counters for all platforms they support and just let
Benchmark drop all those that are not supported, although it will
create quite a lot of noise down the line, in which case perhaps
we should drop silently or make a consolidated, single error line
but this was not implemented in this change set.
* Removed unused helper type.
2023-03-02 22:56:13 +08:00
|
|
|
EXPECT_TRUE(PerfCounters::Create(
|
|
|
|
{kGenericPerfEvent1, kGenericPerfEvent2, kGenericPerfEvent3})
|
|
|
|
.IsValid());
|
2021-04-28 16:25:29 +08:00
|
|
|
{
|
Filter performance counter names, not invalidate all (#1554)
* Filter performance counter names, not invalidate all
Currently, the performance counters are validated while they
are being created and one failure returns NoCounters(), ie it
effecitvely invalidates all the counters.
I would like to propose a new behavior: filter instead. If an
invalid name is added to the counter list, or if that particular
counter is not supported on this platform, that counter is dropped
from the list and an error messages is created, while all the
other counters remain active.
This will give testers a peace of mind that if one mistake is made
or if something is changed or removed from libpfm, their entire
test will not be invalidated. This feature gives more tolerance
with respect to versioning.
Another positive is that testers can now input a superset of all
desired counters for all platforms they support and just let
Benchmark drop all those that are not supported, although it will
create quite a lot of noise down the line, in which case perhaps
we should drop silently or make a consolidated, single error line
but this was not implemented in this change set.
* Removed unused helper type.
2023-03-02 22:56:13 +08:00
|
|
|
auto counter =
|
|
|
|
PerfCounters::Create({kGenericPerfEvent2, "", kGenericPerfEvent1});
|
|
|
|
EXPECT_TRUE(counter.IsValid());
|
|
|
|
EXPECT_EQ(counter.num_counters(), 2);
|
|
|
|
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
|
|
|
{kGenericPerfEvent2, kGenericPerfEvent1}));
|
|
|
|
}
|
|
|
|
{
|
|
|
|
auto counter = PerfCounters::Create(
|
|
|
|
{kGenericPerfEvent3, "not a counter name", kGenericPerfEvent1});
|
|
|
|
EXPECT_TRUE(counter.IsValid());
|
|
|
|
EXPECT_EQ(counter.num_counters(), 2);
|
|
|
|
EXPECT_EQ(counter.names(), std::vector<std::string>(
|
|
|
|
{kGenericPerfEvent3, kGenericPerfEvent1}));
|
2021-04-28 16:25:29 +08:00
|
|
|
}
|
|
|
|
{
|
|
|
|
EXPECT_TRUE(PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
|
|
|
kGenericPerfEvent3})
|
|
|
|
.IsValid());
|
|
|
|
}
|
Filter performance counter names, not invalidate all (#1554)
* Filter performance counter names, not invalidate all
Currently, the performance counters are validated while they
are being created and one failure returns NoCounters(), ie it
effecitvely invalidates all the counters.
I would like to propose a new behavior: filter instead. If an
invalid name is added to the counter list, or if that particular
counter is not supported on this platform, that counter is dropped
from the list and an error messages is created, while all the
other counters remain active.
This will give testers a peace of mind that if one mistake is made
or if something is changed or removed from libpfm, their entire
test will not be invalidated. This feature gives more tolerance
with respect to versioning.
Another positive is that testers can now input a superset of all
desired counters for all platforms they support and just let
Benchmark drop all those that are not supported, although it will
create quite a lot of noise down the line, in which case perhaps
we should drop silently or make a consolidated, single error line
but this was not implemented in this change set.
* Removed unused helper type.
2023-03-02 22:56:13 +08:00
|
|
|
{
|
|
|
|
auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
|
|
|
kGenericPerfEvent3,
|
|
|
|
"MISPREDICTED_BRANCH_RETIRED"});
|
|
|
|
EXPECT_TRUE(counter.IsValid());
|
|
|
|
EXPECT_EQ(counter.num_counters(), 3);
|
|
|
|
EXPECT_EQ(counter.names(),
|
|
|
|
std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
|
|
|
|
kGenericPerfEvent3}));
|
|
|
|
}
|
2021-04-28 16:25:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(PerfCountersTest, Read1Counter) {
|
|
|
|
if (!PerfCounters::kSupported) {
|
|
|
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
|
|
|
}
|
|
|
|
EXPECT_TRUE(PerfCounters::Initialize());
|
|
|
|
auto counters = PerfCounters::Create({kGenericPerfEvent1});
|
|
|
|
EXPECT_TRUE(counters.IsValid());
|
|
|
|
PerfCounterValues values1(1);
|
|
|
|
EXPECT_TRUE(counters.Snapshot(&values1));
|
|
|
|
EXPECT_GT(values1[0], 0);
|
|
|
|
PerfCounterValues values2(1);
|
|
|
|
EXPECT_TRUE(counters.Snapshot(&values2));
|
|
|
|
EXPECT_GT(values2[0], 0);
|
|
|
|
EXPECT_GT(values2[0], values1[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(PerfCountersTest, Read2Counters) {
|
|
|
|
if (!PerfCounters::kSupported) {
|
|
|
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
|
|
|
}
|
|
|
|
EXPECT_TRUE(PerfCounters::Initialize());
|
|
|
|
auto counters =
|
|
|
|
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2});
|
|
|
|
EXPECT_TRUE(counters.IsValid());
|
|
|
|
PerfCounterValues values1(2);
|
|
|
|
EXPECT_TRUE(counters.Snapshot(&values1));
|
|
|
|
EXPECT_GT(values1[0], 0);
|
|
|
|
EXPECT_GT(values1[1], 0);
|
|
|
|
PerfCounterValues values2(2);
|
|
|
|
EXPECT_TRUE(counters.Snapshot(&values2));
|
|
|
|
EXPECT_GT(values2[0], 0);
|
|
|
|
EXPECT_GT(values2[1], 0);
|
|
|
|
}
|
2021-05-19 16:49:05 +08:00
|
|
|
|
2022-01-25 18:14:20 +08:00
|
|
|
TEST(PerfCountersTest, ReopenExistingCounters) {
|
|
|
|
// The test works (i.e. causes read to fail) for the assumptions
|
|
|
|
// about hardware capabilities (i.e. small number (3-4) hardware
|
|
|
|
// counters) at this date.
|
|
|
|
if (!PerfCounters::kSupported) {
|
|
|
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
|
|
|
}
|
|
|
|
EXPECT_TRUE(PerfCounters::Initialize());
|
|
|
|
std::vector<PerfCounters> counters;
|
|
|
|
counters.reserve(6);
|
|
|
|
for (int i = 0; i < 6; i++)
|
|
|
|
counters.push_back(PerfCounters::Create({kGenericPerfEvent1}));
|
|
|
|
PerfCounterValues values(1);
|
|
|
|
EXPECT_TRUE(counters[0].Snapshot(&values));
|
|
|
|
EXPECT_FALSE(counters[4].Snapshot(&values));
|
|
|
|
EXPECT_FALSE(counters[5].Snapshot(&values));
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(PerfCountersTest, CreateExistingMeasurements) {
|
|
|
|
// The test works (i.e. causes read to fail) for the assumptions
|
|
|
|
// about hardware capabilities (i.e. small number (3-4) hardware
|
|
|
|
// counters) at this date,
|
|
|
|
// the same as previous test ReopenExistingCounters.
|
|
|
|
if (!PerfCounters::kSupported) {
|
|
|
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
|
|
|
}
|
|
|
|
EXPECT_TRUE(PerfCounters::Initialize());
|
|
|
|
std::vector<PerfCountersMeasurement> perf_counter_measurements;
|
|
|
|
std::vector<std::pair<std::string, double>> measurements;
|
|
|
|
|
|
|
|
perf_counter_measurements.reserve(10);
|
|
|
|
for (int i = 0; i < 10; i++)
|
|
|
|
perf_counter_measurements.emplace_back(
|
|
|
|
std::vector<std::string>{kGenericPerfEvent1});
|
|
|
|
|
|
|
|
perf_counter_measurements[0].Start();
|
|
|
|
EXPECT_TRUE(perf_counter_measurements[0].Stop(measurements));
|
|
|
|
|
|
|
|
measurements.clear();
|
|
|
|
perf_counter_measurements[8].Start();
|
|
|
|
EXPECT_FALSE(perf_counter_measurements[8].Stop(measurements));
|
|
|
|
|
|
|
|
measurements.clear();
|
|
|
|
perf_counter_measurements[9].Start();
|
|
|
|
EXPECT_FALSE(perf_counter_measurements[9].Stop(measurements));
|
|
|
|
}
|
|
|
|
|
2021-05-19 16:49:05 +08:00
|
|
|
size_t do_work() {
|
|
|
|
size_t res = 0;
|
|
|
|
for (size_t i = 0; i < 100000000; ++i) res += i * i;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
void measure(size_t threadcount, PerfCounterValues* values1,
|
|
|
|
PerfCounterValues* values2) {
|
2021-06-25 01:21:59 +08:00
|
|
|
BM_CHECK_NE(values1, nullptr);
|
|
|
|
BM_CHECK_NE(values2, nullptr);
|
2021-05-19 16:49:05 +08:00
|
|
|
std::vector<std::thread> threads(threadcount);
|
2021-06-25 01:21:59 +08:00
|
|
|
auto work = [&]() { BM_CHECK(do_work() > 1000); };
|
2021-05-19 16:49:05 +08:00
|
|
|
|
|
|
|
// We need to first set up the counters, then start the threads, so the
|
Filter performance counter names, not invalidate all (#1554)
* Filter performance counter names, not invalidate all
Currently, the performance counters are validated while they
are being created and one failure returns NoCounters(), ie it
effecitvely invalidates all the counters.
I would like to propose a new behavior: filter instead. If an
invalid name is added to the counter list, or if that particular
counter is not supported on this platform, that counter is dropped
from the list and an error messages is created, while all the
other counters remain active.
This will give testers a peace of mind that if one mistake is made
or if something is changed or removed from libpfm, their entire
test will not be invalidated. This feature gives more tolerance
with respect to versioning.
Another positive is that testers can now input a superset of all
desired counters for all platforms they support and just let
Benchmark drop all those that are not supported, although it will
create quite a lot of noise down the line, in which case perhaps
we should drop silently or make a consolidated, single error line
but this was not implemented in this change set.
* Removed unused helper type.
2023-03-02 22:56:13 +08:00
|
|
|
// threads would inherit the counters. But later, we need to first destroy
|
|
|
|
// the thread pool (so all the work finishes), then measure the counters. So
|
|
|
|
// the scopes overlap, and we need to explicitly control the scope of the
|
2021-05-19 16:49:05 +08:00
|
|
|
// threadpool.
|
|
|
|
auto counters =
|
|
|
|
PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent3});
|
|
|
|
for (auto& t : threads) t = std::thread(work);
|
|
|
|
counters.Snapshot(values1);
|
|
|
|
for (auto& t : threads) t.join();
|
|
|
|
counters.Snapshot(values2);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(PerfCountersTest, MultiThreaded) {
|
|
|
|
if (!PerfCounters::kSupported) {
|
|
|
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.";
|
|
|
|
}
|
|
|
|
EXPECT_TRUE(PerfCounters::Initialize());
|
|
|
|
PerfCounterValues values1(2);
|
|
|
|
PerfCounterValues values2(2);
|
|
|
|
|
|
|
|
measure(2, &values1, &values2);
|
|
|
|
std::vector<double> D1{static_cast<double>(values2[0] - values1[0]),
|
|
|
|
static_cast<double>(values2[1] - values1[1])};
|
|
|
|
|
|
|
|
measure(4, &values1, &values2);
|
|
|
|
std::vector<double> D2{static_cast<double>(values2[0] - values1[0]),
|
|
|
|
static_cast<double>(values2[1] - values1[1])};
|
|
|
|
|
|
|
|
// Some extra work will happen on the main thread - like joining the threads
|
|
|
|
// - so the ratio won't be quite 2.0, but very close.
|
|
|
|
EXPECT_GE(D2[0], 1.9 * D1[0]);
|
|
|
|
EXPECT_GE(D2[1], 1.9 * D1[1]);
|
|
|
|
}
|
2023-03-01 23:30:41 +08:00
|
|
|
|
|
|
|
TEST(PerfCountersTest, HardwareLimits) {
|
|
|
|
// The test works (i.e. causes read to fail) for the assumptions
|
|
|
|
// about hardware capabilities (i.e. small number (3-4) hardware
|
|
|
|
// counters) at this date,
|
|
|
|
// the same as previous test ReopenExistingCounters.
|
|
|
|
if (!PerfCounters::kSupported) {
|
|
|
|
GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
|
|
|
|
}
|
|
|
|
EXPECT_TRUE(PerfCounters::Initialize());
|
|
|
|
|
|
|
|
// Taken straight from `perf list` on x86-64
|
|
|
|
// Got all hardware names since these are the problematic ones
|
|
|
|
std::vector<std::string> counter_names{"cycles", // leader
|
|
|
|
"instructions",
|
|
|
|
"branches",
|
|
|
|
"L1-dcache-loads",
|
|
|
|
"L1-dcache-load-misses",
|
|
|
|
"L1-dcache-prefetches",
|
|
|
|
"L1-icache-load-misses", // leader
|
|
|
|
"L1-icache-loads",
|
|
|
|
"branch-load-misses",
|
|
|
|
"branch-loads",
|
|
|
|
"dTLB-load-misses",
|
|
|
|
"dTLB-loads",
|
|
|
|
"iTLB-load-misses", // leader
|
|
|
|
"iTLB-loads",
|
|
|
|
"branch-instructions",
|
|
|
|
"branch-misses",
|
|
|
|
"cache-misses",
|
|
|
|
"cache-references",
|
|
|
|
"stalled-cycles-backend", // leader
|
|
|
|
"stalled-cycles-frontend"};
|
|
|
|
|
|
|
|
// In the off-chance that some of these values are not supported,
|
|
|
|
// we filter them out so the test will complete without failure
|
|
|
|
// albeit it might not actually test the grouping on that platform
|
|
|
|
std::vector<std::string> valid_names;
|
|
|
|
for (const std::string& name : counter_names) {
|
|
|
|
if (PerfCounters::IsCounterSupported(name)) {
|
|
|
|
valid_names.push_back(name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PerfCountersMeasurement counter(valid_names);
|
|
|
|
|
|
|
|
std::vector<std::pair<std::string, double>> measurements;
|
|
|
|
|
|
|
|
counter.Start();
|
|
|
|
EXPECT_TRUE(counter.Stop(measurements));
|
|
|
|
}
|
|
|
|
|
2021-04-28 16:25:29 +08:00
|
|
|
} // namespace
|