From 27e0b439cf0f9be329f617056795b4e5c3612f34 Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 22 Nov 2017 09:33:52 -0700 Subject: [PATCH] Refactor System information collection -- Add CPU Cache Info (#483) * Refactor System information collection. This patch refactors the system information collection, and in particular information about the target CPU. The motivation is to make it easier to access CPU information, and easier to add new information as need be. This patch additionally adds information about the cache sizes of the CPU. * Address review comments: Clean up integer types. This commit cleans up the integer types used in ValueUnion to follow the Google style guide. Additionally it adds a BENCHMARK_UNREACHABLE macro to assist in documenting/catching unreachable code paths. * Rename ValueUnion accessors. --- include/benchmark/benchmark.h | 26 +- src/benchmark.cc | 16 +- src/benchmark_register.cc | 4 +- src/internal_macros.h | 22 ++ src/json_reporter.cc | 9 +- src/reporter.cc | 19 +- src/sysinfo.cc | 585 +++++++++++++++++++--------------- src/sysinfo.h | 10 - 8 files changed, 401 insertions(+), 290 deletions(-) delete mode 100644 src/sysinfo.h diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 16d2b903..b07073f9 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -1154,6 +1154,25 @@ class Fixture : public internal::Benchmark { namespace benchmark { +struct CPUInfo { + struct CacheInfo { + std::string type; + int level; + int size; + }; + + int num_cpus; + double cycles_per_second; + std::vector caches; + bool scaling_enabled; + + static const CPUInfo& Get(); + + private: + CPUInfo(); + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo); +}; + // Interface for custom benchmark result printers. // By default, benchmark reports are printed to stdout. However an application // can control the destination of the reports by calling @@ -1162,12 +1181,11 @@ namespace benchmark { class BenchmarkReporter { public: struct Context { - int num_cpus; - double mhz_per_cpu; - bool cpu_scaling_enabled; - + CPUInfo const& cpu_info; // The number of chars in the longest benchmark name. size_t name_field_width; + + Context(); }; struct Run { diff --git a/src/benchmark.cc b/src/benchmark.cc index 95730fc2..0b2e13d6 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -37,13 +37,13 @@ #include "colorprint.h" #include "commandlineflags.h" #include "complexity.h" -#include "statistics.h" #include "counter.h" +#include "internal_macros.h" #include "log.h" #include "mutex.h" #include "re.h" +#include "statistics.h" #include "string_util.h" -#include "sysinfo.h" #include "timers.h" DEFINE_bool(benchmark_list_tests, false, @@ -108,6 +108,14 @@ namespace internal { void UseCharPointer(char const volatile*) {} +#ifdef BENCHMARK_HAS_NO_BUILTIN_UNREACHABLE +BENCHMARK_NORETURN void UnreachableImp(const char* FName, int Line) { + std::cerr << FName << ":" << Line << " executing unreachable code!" + << std::endl; + std::abort(); +} +#endif + class ThreadManager { public: ThreadManager(int num_threads) @@ -493,10 +501,6 @@ void RunBenchmarks(const std::vector& benchmarks, // Print header here BenchmarkReporter::Context context; - context.num_cpus = NumCPUs(); - context.mhz_per_cpu = CyclesPerSecond() / 1000000.0; - - context.cpu_scaling_enabled = CpuScalingEnabled(); context.name_field_width = name_field_width; // Keep track of runing times of all instances of current benchmark diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index c1b80674..d5746a36 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -42,7 +42,6 @@ #include "mutex.h" #include "re.h" #include "string_util.h" -#include "sysinfo.h" #include "timers.h" namespace benchmark { @@ -448,8 +447,7 @@ Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, } Benchmark* Benchmark::ThreadPerCpu() { - static int num_cpus = NumCPUs(); - thread_counts_.push_back(num_cpus); + thread_counts_.push_back(CPUInfo::Get().num_cpus); return this; } diff --git a/src/internal_macros.h b/src/internal_macros.h index b59261ca..d1ea079d 100644 --- a/src/internal_macros.h +++ b/src/internal_macros.h @@ -6,6 +6,9 @@ #ifndef __has_feature #define __has_feature(x) 0 #endif +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif #if defined(__clang__) #define COMPILER_CLANG @@ -56,4 +59,23 @@ #define BENCHMARK_HAS_NO_EXCEPTIONS #endif +#if defined(COMPILER_CLANG) || defined(COMPILER_GCC) +#define BENCHMARK_MAYBE_UNUSED __attribute__((unused)) +#else +#define BENCHMARK_MAYBE_UNUSED +#endif + +#if defined(COMPILER_GCC) || __has_builtin(__builtin_unreachable) +#define BENCHMARK_UNREACHABLE() __builtin_unreachable() +#else +#define BENCHMARK_HAS_NO_BUILTIN_UNREACHABLE +namespace benchmark { +namespace internal { +BENCHMARK_NORETURN void UnreachableImp(const char* FName, int Line); +} +} // namespace benchmark +#define BENCHMARK_UNREACHABLE() \ + ::benchmark::internal::UnreachableImp(__FILE__, __LINE__) +#endif + #endif // BENCHMARK_INTERNAL_MACROS_H_ diff --git a/src/json_reporter.cc b/src/json_reporter.cc index a49f5b0f..93a5bc83 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -77,11 +77,14 @@ bool JSONReporter::ReportContext(const Context& context) { std::string walltime_value = LocalDateTimeString(); out << indent << FormatKV("date", walltime_value) << ",\n"; - out << indent << FormatKV("num_cpus", static_cast(context.num_cpus)) + CPUInfo const& info = context.cpu_info; + out << indent << FormatKV("num_cpus", static_cast(info.num_cpus)) << ",\n"; - out << indent << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu)) + out << indent + << FormatKV("mhz_per_cpu", + RoundDouble(info.cycles_per_second / 1000000.0)) << ",\n"; - out << indent << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled) + out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled) << ",\n"; #if defined(NDEBUG) diff --git a/src/reporter.cc b/src/reporter.cc index 9a0830b0..c37ad94d 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -35,12 +35,21 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, CHECK(out) << "cannot be null"; auto &Out = *out; - Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu - << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n"; - Out << LocalDateTimeString() << "\n"; - if (context.cpu_scaling_enabled) { + const CPUInfo &info = context.cpu_info; + Out << "Run on (" << info.num_cpus << " X " + << (info.cycles_per_second / 1000000.0) << " MHz CPU " + << ((info.num_cpus > 1) ? "s" : "") << ")\n"; + if (info.caches.size() != 0) { + Out << "CPU Caches:\n"; + for (auto &CInfo : info.caches) { + Out << " L" << CInfo.level << " " << CInfo.type << " " + << (CInfo.size / 1000) << "K\n"; + } + } + + if (info.scaling_enabled) { Out << "***WARNING*** CPU scaling is enabled, the benchmark " "real time measurements may be noisy and will incur extra " "overhead.\n"; @@ -52,6 +61,8 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, #endif } +BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()) {} + double BenchmarkReporter::Run::GetAdjustedRealTime() const { double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); if (iterations != 0) new_time /= static_cast(iterations); diff --git a/src/sysinfo.cc b/src/sysinfo.cc index 7997605f..8fea183f 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "sysinfo.h" #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS @@ -25,21 +24,26 @@ #include #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include -#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD +#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ + defined BENCHMARK_OS_NETBSD +#define BENCHMARK_HAS_SYSCTL #include #endif #endif +#include #include #include #include #include #include +#include #include +#include #include -#include +#include +#include -#include "arraysize.h" #include "check.h" #include "cycleclock.h" #include "internal_macros.h" @@ -49,225 +53,348 @@ namespace benchmark { namespace { -std::once_flag cpuinfo_init; -double cpuinfo_cycles_per_second = 1.0; -int cpuinfo_num_cpus = 1; // Conservative guess -#if !defined BENCHMARK_OS_MACOSX -const int64_t estimate_time_ms = 1000; +void PrintImp(std::ostream& out) { out << std::endl; } -// Helper function estimates cycles/sec by observing cycles elapsed during -// sleep(). Using small sleep time decreases accuracy significantly. -int64_t EstimateCyclesPerSecond() { - const int64_t start_ticks = cycleclock::Now(); - SleepForMilliseconds(estimate_time_ms); - return cycleclock::Now() - start_ticks; +template +void PrintImp(std::ostream& out, First&& f, Rest&&... rest) { + out << std::forward(f); + PrintImp(out, std::forward(rest)...); +} + +template +BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) { + PrintImp(std::cerr, std::forward(args)...); + std::exit(EXIT_FAILURE); +} + +#ifdef BENCHMARK_HAS_SYSCTL + +#ifdef __GNUC__ +// Suppress the warning generated by the C11 flexible array member below. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +/// ValueUnion - A type used to correctly alias the byte-for-byte output of +/// `sysctl` with the result type it's to be interpreted as. +struct ValueUnion { + union DataT { + uint32_t uint32_value; + uint64_t uint64_value; + // FIXME (Maybe?): This is a C11 flexible array member, and not technically + // C++. However, all compilers support it and it allows for correct aliasing + // of union members from bytes. + char bytes[]; + }; + using DataPtr = std::unique_ptr; + + // The size of the data union member + its trailing array size. + size_t Size; + DataPtr Buff; + + public: + ValueUnion() : Size(0), Buff(nullptr, &std::free) {} + + explicit ValueUnion(size_t BuffSize) + : Size(sizeof(DataT) + BuffSize), + Buff(::new (std::malloc(Size)) DataT(), &std::free) {} + + ValueUnion(ValueUnion&& other) = default; + + explicit operator bool() const { return bool(Buff); } + + char* data() const { return Buff->bytes; } + + std::string GetAsString() const { return std::string(data()); } + + int64_t GetAsInteger() const { + if (Size == sizeof(Buff->uint32_value)) + return static_cast(Buff->uint32_value); + else if (Size == sizeof(Buff->uint64_value)) + return static_cast(Buff->uint64_value); + BENCHMARK_UNREACHABLE(); + } + + uint64_t GetAsUnsigned() const { + if (Size == sizeof(Buff->uint32_value)) + return Buff->uint32_value; + else if (Size == sizeof(Buff->uint64_value)) + return Buff->uint64_value; + BENCHMARK_UNREACHABLE(); + } +}; + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +ValueUnion GetSysctlImp(std::string const& Name) { + size_t CurBuffSize = 0; + if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1) + return ValueUnion(); + + ValueUnion buff(CurBuffSize); + if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) + return buff; + return ValueUnion(); +} + +BENCHMARK_MAYBE_UNUSED +bool GetSysctl(std::string const& Name, std::string* Out) { + Out->clear(); + auto Buff = GetSysctlImp(Name); + if (!Buff) return false; + Out->assign(Buff.data()); + return true; +} + +template ::value>::type> +bool GetSysctl(std::string const& Name, Tp* Out) { + *Out = 0; + auto Buff = GetSysctlImp(Name); + if (!Buff) return false; + *Out = static_cast(Buff.GetAsUnsigned()); + return true; } #endif -#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN -// Helper function for reading an int from a file. Returns true if successful -// and the memory location pointed to by value is set to the value read. -bool ReadIntFromFile(const char* file, long* value) { - bool ret = false; - int fd = open(file, O_RDONLY); - if (fd != -1) { - char line[1024]; - char* err; - memset(line, '\0', sizeof(line)); - ssize_t read_err = read(fd, line, sizeof(line) - 1); - ((void)read_err); // prevent unused warning - CHECK(read_err >= 0); - const long temp_value = strtol(line, &err, 10); - if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { - *value = temp_value; - ret = true; +template +bool ReadFromFile(std::string const& fname, ArgT* arg) { + *arg = ArgT(); + std::ifstream f(fname.c_str()); + if (!f.is_open()) return false; + f >> *arg; + return f.good(); +} + +bool CpuScalingEnabled(int num_cpus) { + // We don't have a valid CPU count, so don't even bother. + if (num_cpus <= 0) return false; +#ifndef BENCHMARK_OS_WINDOWS + // On Linux, the CPUfreq subsystem exposes CPU information as files on the + // local file system. If reading the exported files fails, then we may not be + // running on Linux, so we silently ignore all the read errors. + std::string res; + for (int cpu = 0; cpu < num_cpus; ++cpu) { + std::string governor_file = + StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); + if (ReadFromFile(governor_file, &res) && res != "performance") return true; + } +#endif + return false; +} + +BENCHMARK_MAYBE_UNUSED +std::vector GetCacheSizesFromKVFS() { + std::vector res; + std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; + int Idx = 0; + while (true) { + CPUInfo::CacheInfo info; + std::string FPath = StrCat(dir, "index", Idx++, "/"); + std::ifstream f(StrCat(FPath, "size").c_str()); + if (!f.is_open()) break; + std::string suffix; + f >> info.size; + if (f.fail()) + PrintErrorAndDie("Failed while reading file '", FPath, "size'"); + if (f.good()) { + f >> suffix; + if (f.bad()) + PrintErrorAndDie( + "Invalid cache size format: failed to read size suffix"); + else if (f && suffix != "K") + PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix); + else if (suffix == "K") + info.size *= 1000; } - close(fd); + if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) + PrintErrorAndDie("Failed to read from file ", FPath, "type"); + if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) + PrintErrorAndDie("Failed to read from file ", FPath, "level"); + res.push_back(info); } - return ret; + + return res; +} + +#ifdef BENCHMARK_OS_MACOSX +std::vector GetCacheSizesMacOSX() { + std::vector res; + struct { + std::string name; + std::string type; + int level; + } Cases[] = {{"hw.l1dcachesize", "Data", 1}, + {"hw.l1icachesize", "Instruction", 1}, + {"hw.l2cachesize", "Unified", 2}, + {"hw.l3cachesize", "Unified", 3}}; + for (auto& C : Cases) { + int val; + if (!GetSysctl(C.name, &val)) continue; + CPUInfo::CacheInfo info; + info.type = C.type; + info.level = C.level; + info.size = val; + res.push_back(std::move(info)); + } + return res; } #endif -#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN -static std::string convertToLowerCase(std::string s) { - for (auto& ch : s) - ch = std::tolower(ch); - return s; -} -static bool startsWithKey(std::string Value, std::string Key, - bool IgnoreCase = true) { - if (IgnoreCase) { - Key = convertToLowerCase(std::move(Key)); - Value = convertToLowerCase(std::move(Value)); - } - return Value.compare(0, Key.size(), Key) == 0; -} +std::vector GetCacheSizes() { +#ifdef BENCHMARK_OS_MACOSX + return GetCacheSizesMacOSX(); +#else + return GetCacheSizesFromKVFS(); #endif +} -void InitializeSystemInfo() { +int GetNumCPUs() { +#ifdef BENCHMARK_HAS_SYSCTL + int NumCPU = -1; + if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU; + fprintf(stderr, "Err: %s\n", strerror(errno)); + std::exit(EXIT_FAILURE); +#elif defined(BENCHMARK_OS_WINDOWS) + SYSTEM_INFO sysinfo; + // Use memset as opposed to = {} to avoid GCC missing initializer false + // positives. + std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); + GetSystemInfo(&sysinfo); + return sysinfo.dwNumberOfProcessors; // number of logical + // processors in the current + // group +#else + int NumCPUs = 0; + int MaxID = -1; + std::ifstream f("/proc/cpuinfo"); + if (!f.is_open()) { + std::cerr << "failed to open /proc/cpuinfo\n"; + return -1; + } + const std::string Key = "processor"; + std::string ln; + while (std::getline(f, ln)) { + if (ln.empty()) continue; + size_t SplitIdx = ln.find(':'); + std::string value; + if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { + NumCPUs++; + if (!value.empty()) { + int CurID = std::stoi(value); + MaxID = std::max(CurID, MaxID); + } + } + } + if (f.bad()) { + std::cerr << "Failure reading /proc/cpuinfo\n"; + return -1; + } + if (!f.eof()) { + std::cerr << "Failed to read to end of /proc/cpuinfo\n"; + return -1; + } + f.close(); + + if ((MaxID + 1) != NumCPUs) { + fprintf(stderr, + "CPU ID assignments in /proc/cpuinfo seem messed up." + " This is usually caused by a bad BIOS.\n"); + } + return NumCPUs; +#endif + BENCHMARK_UNREACHABLE(); +} + +double GetCPUCyclesPerSecond() { #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN - char line[1024]; - char* err; long freq; - bool saw_mhz = false; - // If the kernel is exporting the tsc frequency use that. There are issues // where cpuinfo_max_freq cannot be relied on because the BIOS may be // exporintg an invalid p-state (on x86) or p-states may be used to put the // processor in a new mode (turbo mode). Essentially, those frequencies // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as // well. - if (!saw_mhz && - ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { + if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) + // If CPU scaling is in effect, we want to use the *maximum* frequency, + // not whatever CPU speed some random processor happens to be using now. + || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &freq)) { // The value is in kHz (as the file name suggests). For example, on a // 2GHz warpstation, the file contains the value "2000000". - cpuinfo_cycles_per_second = freq * 1000.0; - saw_mhz = true; + return freq * 1000.0; } - // If CPU scaling is in effect, we want to use the *maximum* frequency, - // not whatever CPU speed some random processor happens to be using now. - if (!saw_mhz && - ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &freq)) { - // The value is in kHz. For example, on a 2GHz warpstation, the file - // contains the value "2000000". - cpuinfo_cycles_per_second = freq * 1000.0; - saw_mhz = true; + const double error_value = -1; + double bogo_clock = error_value; + + std::ifstream f("/proc/cpuinfo"); + if (!f.is_open()) { + std::cerr << "failed to open /proc/cpuinfo\n"; + return error_value; } - // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. - const char* pname = "/proc/cpuinfo"; - int fd = open(pname, O_RDONLY); - if (fd == -1) { - perror(pname); - if (!saw_mhz) { - cpuinfo_cycles_per_second = - static_cast(EstimateCyclesPerSecond()); - } - return; - } - - double bogo_clock = 1.0; - bool saw_bogo = false; - long max_cpu_id = 0; - int num_cpus = 0; - line[0] = line[1] = '\0'; - size_t chars_read = 0; - do { // we'll exit when the last read didn't read anything - // Move the next line to the beginning of the buffer - const size_t oldlinelen = strlen(line); - if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line - line[0] = '\0'; - else // still other lines left to save - memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); - // Terminate the new line, reading more if we can't find the newline - char* newline = strchr(line, '\n'); - if (newline == nullptr) { - const size_t linelen = strlen(line); - const size_t bytes_to_read = sizeof(line) - 1 - linelen; - CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes - chars_read = read(fd, line + linelen, bytes_to_read); - line[linelen + chars_read] = '\0'; - newline = strchr(line, '\n'); - } - if (newline != nullptr) *newline = '\0'; + auto startsWithKey = [](std::string const& Value, std::string const& Key) { + if (Key.size() > Value.size()) return false; + auto Cmp = [&](char X, char Y) { + return std::tolower(X) == std::tolower(Y); + }; + return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp); + }; + std::string ln; + while (std::getline(f, ln)) { + if (ln.empty()) continue; + size_t SplitIdx = ln.find(':'); + std::string value; + if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only // accept postive values. Some environments (virtual machines) report zero, // which would cause infinite looping in WallTime_Init. - if (!saw_mhz && startsWithKey(line, "cpu MHz")) { - const char* freqstr = strchr(line, ':'); - if (freqstr) { - cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) - saw_mhz = true; + if (startsWithKey(ln, "cpu MHz")) { + if (!value.empty()) { + double cycles_per_second = std::stod(value) * 1000000.0; + if (cycles_per_second > 0) return cycles_per_second; } - } else if (startsWithKey(line, "bogomips")) { - const char* freqstr = strchr(line, ':'); - if (freqstr) { - bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) - saw_bogo = true; + } else if (startsWithKey(ln, "bogomips")) { + if (!value.empty()) { + bogo_clock = std::stod(value) * 1000000.0; + if (bogo_clock < 0.0) bogo_clock = error_value; } - } else if (startsWithKey(line, "processor", /*IgnoreCase*/false)) { - // The above comparison is case-sensitive because ARM kernels often - // include a "Processor" line that tells you about the CPU, distinct - // from the usual "processor" lines that give you CPU ids. No current - // Linux architecture is using "Processor" for CPU ids. - num_cpus++; // count up every time we see an "processor :" entry - const char* id_str = strchr(line, ':'); - if (id_str) { - const long cpu_id = strtol(id_str + 1, &err, 10); - if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id) - max_cpu_id = cpu_id; - } - } - } while (chars_read > 0); - close(fd); - - if (!saw_mhz) { - if (saw_bogo) { - // If we didn't find anything better, we'll use bogomips, but - // we're not happy about it. - cpuinfo_cycles_per_second = bogo_clock; - } else { - // If we don't even have bogomips, we'll use the slow estimation. - cpuinfo_cycles_per_second = - static_cast(EstimateCyclesPerSecond()); } } - if (num_cpus == 0) { - fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n"); - } else { - if ((max_cpu_id + 1) != num_cpus) { - fprintf(stderr, - "CPU ID assignments in /proc/cpuinfo seem messed up." - " This is usually caused by a bad BIOS.\n"); - } - cpuinfo_num_cpus = num_cpus; + if (f.bad()) { + std::cerr << "Failure reading /proc/cpuinfo\n"; + return error_value; } + if (!f.eof()) { + std::cerr << "Failed to read to end of /proc/cpuinfo\n"; + return error_value; + } + f.close(); + // If we found the bogomips clock, but nothing better, we'll use it (but + // we're not happy about it); otherwise, fallback to the rough estimation + // below. + if (bogo_clock >= 0.0) return bogo_clock; -#elif defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_NETBSD -// FreeBSD notes -// ============= -// For this sysctl to work, the machine must be configured without -// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 -// and later. Before that, it's a 32-bit quantity (and gives the -// wrong answer on machines faster than 2^32 Hz). See -// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html -// But also compare FreeBSD 7.0: -// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 -// 231 error = sysctl_handle_quad(oidp, &freq, 0, req); -// To FreeBSD 6.3 (it's the same in 6-STABLE): -// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 -// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); -#if (__FreeBSD__ >= 7) || defined(__NetBSD__) - uint64_t hz = 0; +#elif defined BENCHMARK_HAS_SYSCTL + constexpr auto* FreqStr = +#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) + "machdep.tsc_freq"; #else - unsigned int hz = 0; + "hw.cpufrequency"; #endif - size_t sz = sizeof(hz); - const char* sysctl_path = "machdep.tsc_freq"; - if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) { - fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", - sysctl_path, strerror(errno)); - cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); - } else { - cpuinfo_cycles_per_second = hz; - } + unsigned long long hz = 0; + if (GetSysctl(FreqStr, &hz)) return hz; + + fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", + FreqStr, strerror(errno)); - int32_t num_cpus = 0; - size_t size = sizeof(num_cpus); - if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 && - (size == sizeof(num_cpus))) { - cpuinfo_num_cpus = num_cpus; - } else { - fprintf(stderr, "%s\n", strerror(errno)); - std::exit(EXIT_FAILURE); - } #elif defined BENCHMARK_OS_WINDOWS // In NT, read MHz from the registry. If we fail to do so or we're in win9x // then make a crude estimate. @@ -277,89 +404,27 @@ void InitializeSystemInfo() { SHGetValueA(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "~MHz", nullptr, &data, &data_size))) - cpuinfo_cycles_per_second = - static_cast((int64_t)data * (int64_t)(1000 * 1000)); // was mhz - else - cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); - - SYSTEM_INFO sysinfo; - // Use memset as opposed to = {} to avoid GCC missing initializer false - // positives. - std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); - GetSystemInfo(&sysinfo); - cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical - // processors in the current - // group - -#elif defined BENCHMARK_OS_MACOSX - int32_t num_cpus = 0; - size_t size = sizeof(num_cpus); - if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 && - (size == sizeof(num_cpus))) { - cpuinfo_num_cpus = num_cpus; - } else { - fprintf(stderr, "%s\n", strerror(errno)); - std::exit(EXIT_FAILURE); - } - int64_t cpu_freq = 0; - size = sizeof(cpu_freq); - if (::sysctlbyname("hw.cpufrequency", &cpu_freq, &size, nullptr, 0) == 0 && - (size == sizeof(cpu_freq))) { - cpuinfo_cycles_per_second = cpu_freq; - } else { - #if defined BENCHMARK_OS_IOS - fprintf(stderr, "CPU frequency cannot be detected. \n"); - cpuinfo_cycles_per_second = 0; - #else - fprintf(stderr, "%s\n", strerror(errno)); - std::exit(EXIT_FAILURE); - #endif - } -#else - // Generic cycles per second counter - cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); + return static_cast((int64_t)data * + (int64_t)(1000 * 1000)); // was mhz #endif + // If we've fallen through, attempt to roughly estimate the CPU clock rate. + const int estimate_time_ms = 1000; + const auto start_ticks = cycleclock::Now(); + SleepForMilliseconds(estimate_time_ms); + return static_cast(cycleclock::Now() - start_ticks); } } // end namespace -double CyclesPerSecond(void) { - std::call_once(cpuinfo_init, InitializeSystemInfo); - return cpuinfo_cycles_per_second; +const CPUInfo& CPUInfo::Get() { + static const CPUInfo* info = new CPUInfo(); + return *info; } -int NumCPUs(void) { - std::call_once(cpuinfo_init, InitializeSystemInfo); - return cpuinfo_num_cpus; -} - -// The ""'s catch people who don't pass in a literal for "str" -#define strliterallen(str) (sizeof("" str "") - 1) - -// Must use a string literal for prefix. -#define memprefix(str, len, prefix) \ - ((((len) >= strliterallen(prefix)) && \ - std::memcmp(str, prefix, strliterallen(prefix)) == 0) \ - ? str + strliterallen(prefix) \ - : nullptr) - -bool CpuScalingEnabled() { -#ifndef BENCHMARK_OS_WINDOWS - // On Linux, the CPUfreq subsystem exposes CPU information as files on the - // local file system. If reading the exported files fails, then we may not be - // running on Linux, so we silently ignore all the read errors. - for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { - std::string governor_file = - StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); - FILE* file = fopen(governor_file.c_str(), "r"); - if (!file) break; - char buff[16]; - size_t bytes_read = fread(buff, 1, sizeof(buff), file); - fclose(file); - if (memprefix(buff, bytes_read, "performance") == nullptr) return true; - } -#endif - return false; -} +CPUInfo::CPUInfo() + : num_cpus(GetNumCPUs()), + cycles_per_second(GetCPUCyclesPerSecond()), + caches(GetCacheSizes()), + scaling_enabled(CpuScalingEnabled(num_cpus)) {} } // end namespace benchmark diff --git a/src/sysinfo.h b/src/sysinfo.h deleted file mode 100644 index c5d9916d..00000000 --- a/src/sysinfo.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef BENCHMARK_SYSINFO_H_ -#define BENCHMARK_SYSINFO_H_ - -namespace benchmark { -int NumCPUs(); -double CyclesPerSecond(); -bool CpuScalingEnabled(); -} // end namespace benchmark - -#endif // BENCHMARK_SYSINFO_H_