From 11dc36822b2aaccb51bb1c75dbd43d2026b23cf6 Mon Sep 17 00:00:00 2001 From: Eric Date: Sun, 26 Nov 2017 13:33:01 -0700 Subject: [PATCH] Improve CPU Cache info reporting -- Add Windows support. (#486) * Improve CPU Cache info reporting -- Add Windows support. This patch does a couple of thing regarding CPU Cache reporting. First, it adds an implementation on Windows. Second it fixes the JSONReporter to correctly (and actually) output the CPU configuration information. And finally, third, it detects and reports the number of physical CPU's that share the same cache. --- include/benchmark/benchmark.h | 1 + src/json_reporter.cc | 21 +++++++ src/reporter.cc | 5 +- src/sysinfo.cc | 107 ++++++++++++++++++++++++++++++++-- test/reporter_output_test.cc | 35 +++++++++++ 5 files changed, 164 insertions(+), 5 deletions(-) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index b07073f9..364135f5 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -1159,6 +1159,7 @@ struct CPUInfo { std::string type; int level; int size; + int num_sharing; }; int num_cpus; diff --git a/src/json_reporter.cc b/src/json_reporter.cc index 93a5bc83..b5ae302a 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -87,6 +87,27 @@ bool JSONReporter::ReportContext(const Context& context) { out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled) << ",\n"; + out << indent << "\"caches\": [\n"; + indent = std::string(6, ' '); + std::string cache_indent(8, ' '); + for (size_t i = 0; i < info.caches.size(); ++i) { + auto& CI = info.caches[i]; + out << indent << "{\n"; + out << cache_indent << FormatKV("type", CI.type) << ",\n"; + out << cache_indent << FormatKV("level", static_cast(CI.level)) + << ",\n"; + out << cache_indent + << FormatKV("size", static_cast(CI.size) * 1000u) << ",\n"; + out << cache_indent + << FormatKV("num_sharing", static_cast(CI.num_sharing)) + << "\n"; + out << indent << "}"; + if (i != info.caches.size() - 1) out << ","; + out << "\n"; + } + indent = std::string(4, ' '); + out << indent << "],\n"; + #if defined(NDEBUG) const char build_type[] = "release"; #else diff --git a/src/reporter.cc b/src/reporter.cc index c37ad94d..5d2fa05a 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -45,7 +45,10 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, Out << "CPU Caches:\n"; for (auto &CInfo : info.caches) { Out << " L" << CInfo.level << " " << CInfo.type << " " - << (CInfo.size / 1000) << "K\n"; + << (CInfo.size / 1000) << "K"; + if (CInfo.num_sharing != 0) + Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")"; + Out << "\n"; } } diff --git a/src/sysinfo.cc b/src/sysinfo.cc index 8fea183f..41e36a13 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -32,7 +32,10 @@ #endif #include +#include +#include #include +#include #include #include #include @@ -123,6 +126,15 @@ struct ValueUnion { return Buff->uint64_value; BENCHMARK_UNREACHABLE(); } + + template + std::array GetAsArray() { + const int ArrSize = sizeof(T) * N; + CHECK_LE(ArrSize, Size); + std::array Arr; + std::memcpy(Arr.data(), data(), ArrSize); + return Arr; + } }; #ifdef __GNUC__ @@ -158,6 +170,14 @@ bool GetSysctl(std::string const& Name, Tp* Out) { *Out = static_cast(Buff.GetAsUnsigned()); return true; } + +template +bool GetSysctl(std::string const& Name, std::array* Out) { + auto Buff = GetSysctlImp(Name); + if (!Buff) return false; + *Out = Buff.GetAsArray(); + return true; +}; #endif template @@ -186,6 +206,25 @@ bool CpuScalingEnabled(int num_cpus) { return false; } +int CountSetBitsInCPUMap(std::string Val) { + auto CountBits = [](std::string Part) { + using CPUMask = std::bitset; + Part = "0x" + Part; + CPUMask Mask(std::stoul(Part, nullptr, 16)); + return static_cast(Mask.count()); + }; + size_t Pos; + int total = 0; + while ((Pos = Val.find(',')) != std::string::npos) { + total += CountBits(Val.substr(0, Pos)); + Val = Val.substr(Pos + 1); + } + if (!Val.empty()) { + total += CountBits(Val); + } + return total; +} + BENCHMARK_MAYBE_UNUSED std::vector GetCacheSizesFromKVFS() { std::vector res; @@ -214,6 +253,10 @@ std::vector GetCacheSizesFromKVFS() { PrintErrorAndDie("Failed to read from file ", FPath, "type"); if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) PrintErrorAndDie("Failed to read from file ", FPath, "level"); + std::string map_str; + if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str)) + PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map"); + info.num_sharing = CountSetBitsInCPUMap(map_str); res.push_back(info); } @@ -223,14 +266,18 @@ std::vector GetCacheSizesFromKVFS() { #ifdef BENCHMARK_OS_MACOSX std::vector GetCacheSizesMacOSX() { std::vector res; + std::array CacheCounts{{0, 0, 0, 0}}; + GetSysctl("hw.cacheconfig", &CacheCounts); + struct { std::string name; std::string type; int level; - } Cases[] = {{"hw.l1dcachesize", "Data", 1}, - {"hw.l1icachesize", "Instruction", 1}, - {"hw.l2cachesize", "Unified", 2}, - {"hw.l3cachesize", "Unified", 3}}; + size_t num_sharing; + } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]}, + {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]}, + {"hw.l2cachesize", "Unified", 2, CacheCounts[2]}, + {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}}; for (auto& C : Cases) { int val; if (!GetSysctl(C.name, &val)) continue; @@ -238,15 +285,67 @@ std::vector GetCacheSizesMacOSX() { info.type = C.type; info.level = C.level; info.size = val; + info.num_sharing = static_cast(C.num_sharing); res.push_back(std::move(info)); } return res; } +#elif defined(BENCHMARK_OS_WINDOWS) +std::vector GetCacheSizesWindows() { + std::vector res; + DWORD buffer_size = 0; + using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; + using CInfo = CACHE_DESCRIPTOR; + + using UPtr = std::unique_ptr; + GetLogicalProcessorInformation(nullptr, &buffer_size); + UPtr buff((PInfo*)malloc(buffer_size), &std::free); + if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) + PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ", + GetLastError()); + + PInfo* it = buff.get(); + PInfo* end = buff.get() + (buffer_size / sizeof(PInfo)); + + for (; it != end; ++it) { + if (it->Relationship != RelationCache) continue; + using BitSet = std::bitset; + BitSet B(it->ProcessorMask); + // To prevent duplicates, only consider caches where CPU 0 is specified + if (!B.test(0)) continue; + CInfo* Cache = &it->Cache; + CPUInfo::CacheInfo C; + C.num_sharing = B.count(); + C.level = Cache->Level; + C.size = Cache->Size; + switch (Cache->Type) { + case CacheUnified: + C.type = "Unified"; + break; + case CacheInstruction: + C.type = "Instruction"; + break; + case CacheData: + C.type = "Data"; + break; + case CacheTrace: + C.type = "Trace"; + break; + default: + C.type = "Unknown"; + break; + } + res.push_back(C); + } + return res; +} #endif std::vector GetCacheSizes() { #ifdef BENCHMARK_OS_MACOSX return GetCacheSizesMacOSX(); +#elif defined(BENCHMARK_OS_WINDOWS) + return GetCacheSizesWindows(); #else return GetCacheSizesFromKVFS(); #endif diff --git a/test/reporter_output_test.cc b/test/reporter_output_test.cc index eac88066..1620b313 100644 --- a/test/reporter_output_test.cc +++ b/test/reporter_output_test.cc @@ -13,6 +13,41 @@ ADD_CASES(TC_ConsoleOut, {{"^[-]+$", MR_Next}, {"^Benchmark %s Time %s CPU %s Iterations$", MR_Next}, {"^[-]+$", MR_Next}}); +static int AddContextCases() { + AddCases(TC_ConsoleErr, + { + {"%int[-/]%int[-/]%int %int:%int:%int$", MR_Default}, + {"Run on \\(%int X %float MHz CPU s\\)", MR_Next}, + }); + AddCases(TC_JSONOut, {{"^\\{", MR_Default}, + {"\"context\":", MR_Next}, + {"\"date\": \"", MR_Next}, + {"\"num_cpus\": %int,$", MR_Next}, + {"\"mhz_per_cpu\": %float,$", MR_Next}, + {"\"cpu_scaling_enabled\": ", MR_Next}, + {"\"caches\": \\[$", MR_Next}}); + auto const& Caches = benchmark::CPUInfo::Get().caches; + if (!Caches.empty()) { + AddCases(TC_ConsoleErr, {{"CPU Caches:$", MR_Next}}); + } + for (size_t I = 0; I < Caches.size(); ++I) { + std::string num_caches_str = + Caches[I].num_sharing != 0 ? " \\(x%int\\)$" : "$"; + AddCases( + TC_ConsoleErr, + {{"L%int (Data|Instruction|Unified) %intK" + num_caches_str, MR_Next}}); + AddCases(TC_JSONOut, {{"\\{$", MR_Next}, + {"\"type\": \"", MR_Next}, + {"\"level\": %int,$", MR_Next}, + {"\"size\": %int,$", MR_Next}, + {"\"num_sharing\": %int$", MR_Next}, + {"}[,]{0,1}$", MR_Next}}); + } + + AddCases(TC_JSONOut, {{"],$"}}); + return 0; +} +int dummy_register = AddContextCases(); ADD_CASES(TC_CSVOut, {{"%csv_header"}}); // ========================================================================= //