mirror of
https://github.com/google/benchmark.git
synced 2025-04-29 14:30:37 +08:00
Fix broken PFM-enabled tests (#1623)
* Add pfm CI actions for bazel * Fix problems in unit test. * Undo enabling the CI tests for pfm - github CI machines seemingly do not support performance counters. * Remove commented code - can be revisited in github history when needed, and there's a comment explaining the rationale behind the new test code. --------- Co-authored-by: Andy Christiansen <achristiansen@google.com> Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
This commit is contained in:
parent
015d1a091a
commit
4931aefb51
@ -2,6 +2,7 @@
|
||||
#include <thread>
|
||||
|
||||
#include "../src/perf_counters.h"
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#ifndef GTEST_SKIP
|
||||
@ -14,6 +15,9 @@ struct MsgHandler {
|
||||
using benchmark::internal::PerfCounters;
|
||||
using benchmark::internal::PerfCountersMeasurement;
|
||||
using benchmark::internal::PerfCounterValues;
|
||||
using ::testing::AllOf;
|
||||
using ::testing::Gt;
|
||||
using ::testing::Lt;
|
||||
|
||||
namespace {
|
||||
const char kGenericPerfEvent1[] = "CYCLES";
|
||||
@ -72,8 +76,7 @@ TEST(PerfCountersTest, NegativeTest) {
|
||||
{
|
||||
// Add a bad apple in the end of the chain to check the edges
|
||||
auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||
kGenericPerfEvent3,
|
||||
"MISPREDICTED_BRANCH_RETIRED"});
|
||||
kGenericPerfEvent3, "bad event name"});
|
||||
EXPECT_EQ(counter.num_counters(), 3);
|
||||
EXPECT_EQ(counter.names(),
|
||||
std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
|
||||
@ -257,10 +260,14 @@ TEST(PerfCountersTest, MultiThreaded) {
|
||||
static_cast<double>(after[0] - before[0]),
|
||||
static_cast<double>(after[1] - before[1])};
|
||||
|
||||
// Some extra work will happen on the main thread - like joining the threads
|
||||
// - so the ratio won't be quite 2.0, but very close.
|
||||
EXPECT_GE(Elapsed4Threads[0], 1.9 * Elapsed2Threads[0]);
|
||||
EXPECT_GE(Elapsed4Threads[1], 1.9 * Elapsed2Threads[1]);
|
||||
// The following expectations fail (at least on a beefy workstation with lots
|
||||
// of cpus) - it seems that in some circumstances the runtime of 4 threads
|
||||
// can even be better than with 2.
|
||||
// So instead of expecting 4 threads to be slower, let's just make sure they
|
||||
// do not differ too much in general (one is not more than 10x than the
|
||||
// other).
|
||||
EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
|
||||
EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
|
||||
}
|
||||
|
||||
TEST(PerfCountersTest, HardwareLimits) {
|
||||
@ -273,28 +280,18 @@ TEST(PerfCountersTest, HardwareLimits) {
|
||||
}
|
||||
EXPECT_TRUE(PerfCounters::Initialize());
|
||||
|
||||
// Taken straight from `perf list` on x86-64
|
||||
// Got all hardware names since these are the problematic ones
|
||||
std::vector<std::string> counter_names{"cycles", // leader
|
||||
"instructions",
|
||||
"branches",
|
||||
"L1-dcache-loads",
|
||||
"L1-dcache-load-misses",
|
||||
"L1-dcache-prefetches",
|
||||
"L1-icache-load-misses", // leader
|
||||
"L1-icache-loads",
|
||||
"branch-load-misses",
|
||||
"branch-loads",
|
||||
"dTLB-load-misses",
|
||||
"dTLB-loads",
|
||||
"iTLB-load-misses", // leader
|
||||
"iTLB-loads",
|
||||
"branch-instructions",
|
||||
"branch-misses",
|
||||
"cache-misses",
|
||||
"cache-references",
|
||||
"stalled-cycles-backend", // leader
|
||||
"stalled-cycles-frontend"};
|
||||
// Taken from `perf list`, but focusses only on those HW events that actually
|
||||
// were reported when running `sudo perf stat -a sleep 10`. All HW events
|
||||
// listed in the first command not reported in the second seem to not work.
|
||||
// This is sad as we don't really get to test the grouping here (groups can
|
||||
// contain up to 6 members)...
|
||||
std::vector<std::string> counter_names{
|
||||
"cycles", // leader
|
||||
"instructions", //
|
||||
"branches", //
|
||||
"branch-misses", //
|
||||
"cache-misses", //
|
||||
};
|
||||
|
||||
// In the off-chance that some of these values are not supported,
|
||||
// we filter them out so the test will complete without failure
|
||||
|
@ -2,9 +2,16 @@
|
||||
|
||||
#include "../src/perf_counters.h"
|
||||
|
||||
#include "../src/commandlineflags.h"
|
||||
#include "benchmark/benchmark.h"
|
||||
#include "output_test.h"
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
BM_DECLARE_string(benchmark_perf_counters);
|
||||
|
||||
} // namespace benchmark
|
||||
|
||||
static void BM_Simple(benchmark::State& state) {
|
||||
for (auto _ : state) {
|
||||
auto iterations = state.iterations();
|
||||
@ -24,5 +31,7 @@ int main(int argc, char* argv[]) {
|
||||
if (!benchmark::internal::PerfCounters::kSupported) {
|
||||
return 0;
|
||||
}
|
||||
benchmark::FLAGS_benchmark_perf_counters = "CYCLES,BRANCHES";
|
||||
benchmark::internal::PerfCounters::Initialize();
|
||||
RunOutputTests(argc, argv);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user