1
0
mirror of https://github.com/google/benchmark.git synced 2025-04-29 14:30:37 +08:00

Fix broken PFM-enabled tests ()

* Add pfm CI actions for bazel

* Fix problems in unit test.

* Undo enabling the CI tests for pfm - github CI machines seemingly do not support performance counters.

* Remove commented code - can be revisited in github history when needed, and there's a comment explaining the rationale behind the new test code.

---------

Co-authored-by: Andy Christiansen <achristiansen@google.com>
Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
This commit is contained in:
Andy Christiansen 2023-07-07 10:58:16 +02:00 committed by GitHub
parent 015d1a091a
commit 4931aefb51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 28 deletions

View File

@ -2,6 +2,7 @@
#include <thread>
#include "../src/perf_counters.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#ifndef GTEST_SKIP
@ -14,6 +15,9 @@ struct MsgHandler {
using benchmark::internal::PerfCounters;
using benchmark::internal::PerfCountersMeasurement;
using benchmark::internal::PerfCounterValues;
using ::testing::AllOf;
using ::testing::Gt;
using ::testing::Lt;
namespace {
const char kGenericPerfEvent1[] = "CYCLES";
@ -72,8 +76,7 @@ TEST(PerfCountersTest, NegativeTest) {
{
// Add a bad apple in the end of the chain to check the edges
auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
kGenericPerfEvent3,
"MISPREDICTED_BRANCH_RETIRED"});
kGenericPerfEvent3, "bad event name"});
EXPECT_EQ(counter.num_counters(), 3);
EXPECT_EQ(counter.names(),
std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
@ -257,10 +260,14 @@ TEST(PerfCountersTest, MultiThreaded) {
static_cast<double>(after[0] - before[0]),
static_cast<double>(after[1] - before[1])};
// Some extra work will happen on the main thread - like joining the threads
// - so the ratio won't be quite 2.0, but very close.
EXPECT_GE(Elapsed4Threads[0], 1.9 * Elapsed2Threads[0]);
EXPECT_GE(Elapsed4Threads[1], 1.9 * Elapsed2Threads[1]);
// The following expectations fail (at least on a beefy workstation with lots
// of cpus) - it seems that in some circumstances the runtime of 4 threads
// can even be better than with 2.
// So instead of expecting 4 threads to be slower, let's just make sure they
// do not differ too much in general (one is not more than 10x than the
// other).
EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
}
TEST(PerfCountersTest, HardwareLimits) {
@ -273,28 +280,18 @@ TEST(PerfCountersTest, HardwareLimits) {
}
EXPECT_TRUE(PerfCounters::Initialize());
// Taken straight from `perf list` on x86-64
// Got all hardware names since these are the problematic ones
std::vector<std::string> counter_names{"cycles", // leader
"instructions",
"branches",
"L1-dcache-loads",
"L1-dcache-load-misses",
"L1-dcache-prefetches",
"L1-icache-load-misses", // leader
"L1-icache-loads",
"branch-load-misses",
"branch-loads",
"dTLB-load-misses",
"dTLB-loads",
"iTLB-load-misses", // leader
"iTLB-loads",
"branch-instructions",
"branch-misses",
"cache-misses",
"cache-references",
"stalled-cycles-backend", // leader
"stalled-cycles-frontend"};
// Taken from `perf list`, but focusses only on those HW events that actually
// were reported when running `sudo perf stat -a sleep 10`. All HW events
// listed in the first command not reported in the second seem to not work.
// This is sad as we don't really get to test the grouping here (groups can
// contain up to 6 members)...
std::vector<std::string> counter_names{
"cycles", // leader
"instructions", //
"branches", //
"branch-misses", //
"cache-misses", //
};
// In the off-chance that some of these values are not supported,
// we filter them out so the test will complete without failure

View File

@ -2,9 +2,16 @@
#include "../src/perf_counters.h"
#include "../src/commandlineflags.h"
#include "benchmark/benchmark.h"
#include "output_test.h"
namespace benchmark {
BM_DECLARE_string(benchmark_perf_counters);
} // namespace benchmark
static void BM_Simple(benchmark::State& state) {
for (auto _ : state) {
auto iterations = state.iterations();
@ -24,5 +31,7 @@ int main(int argc, char* argv[]) {
if (!benchmark::internal::PerfCounters::kSupported) {
return 0;
}
benchmark::FLAGS_benchmark_perf_counters = "CYCLES,BRANCHES";
benchmark::internal::PerfCounters::Initialize();
RunOutputTests(argc, argv);
}