diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 16d2b903..b07073f9 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -1154,6 +1154,25 @@ class Fixture : public internal::Benchmark { namespace benchmark { +struct CPUInfo { + struct CacheInfo { + std::string type; + int level; + int size; + }; + + int num_cpus; + double cycles_per_second; + std::vector caches; + bool scaling_enabled; + + static const CPUInfo& Get(); + + private: + CPUInfo(); + BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo); +}; + // Interface for custom benchmark result printers. // By default, benchmark reports are printed to stdout. However an application // can control the destination of the reports by calling @@ -1162,12 +1181,11 @@ namespace benchmark { class BenchmarkReporter { public: struct Context { - int num_cpus; - double mhz_per_cpu; - bool cpu_scaling_enabled; - + CPUInfo const& cpu_info; // The number of chars in the longest benchmark name. size_t name_field_width; + + Context(); }; struct Run { diff --git a/src/benchmark.cc b/src/benchmark.cc index 95730fc2..0b2e13d6 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -37,13 +37,13 @@ #include "colorprint.h" #include "commandlineflags.h" #include "complexity.h" -#include "statistics.h" #include "counter.h" +#include "internal_macros.h" #include "log.h" #include "mutex.h" #include "re.h" +#include "statistics.h" #include "string_util.h" -#include "sysinfo.h" #include "timers.h" DEFINE_bool(benchmark_list_tests, false, @@ -108,6 +108,14 @@ namespace internal { void UseCharPointer(char const volatile*) {} +#ifdef BENCHMARK_HAS_NO_BUILTIN_UNREACHABLE +BENCHMARK_NORETURN void UnreachableImp(const char* FName, int Line) { + std::cerr << FName << ":" << Line << " executing unreachable code!" + << std::endl; + std::abort(); +} +#endif + class ThreadManager { public: ThreadManager(int num_threads) @@ -493,10 +501,6 @@ void RunBenchmarks(const std::vector& benchmarks, // Print header here BenchmarkReporter::Context context; - context.num_cpus = NumCPUs(); - context.mhz_per_cpu = CyclesPerSecond() / 1000000.0; - - context.cpu_scaling_enabled = CpuScalingEnabled(); context.name_field_width = name_field_width; // Keep track of runing times of all instances of current benchmark diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index c1b80674..d5746a36 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -42,7 +42,6 @@ #include "mutex.h" #include "re.h" #include "string_util.h" -#include "sysinfo.h" #include "timers.h" namespace benchmark { @@ -448,8 +447,7 @@ Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, } Benchmark* Benchmark::ThreadPerCpu() { - static int num_cpus = NumCPUs(); - thread_counts_.push_back(num_cpus); + thread_counts_.push_back(CPUInfo::Get().num_cpus); return this; } diff --git a/src/internal_macros.h b/src/internal_macros.h index b59261ca..d1ea079d 100644 --- a/src/internal_macros.h +++ b/src/internal_macros.h @@ -6,6 +6,9 @@ #ifndef __has_feature #define __has_feature(x) 0 #endif +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif #if defined(__clang__) #define COMPILER_CLANG @@ -56,4 +59,23 @@ #define BENCHMARK_HAS_NO_EXCEPTIONS #endif +#if defined(COMPILER_CLANG) || defined(COMPILER_GCC) +#define BENCHMARK_MAYBE_UNUSED __attribute__((unused)) +#else +#define BENCHMARK_MAYBE_UNUSED +#endif + +#if defined(COMPILER_GCC) || __has_builtin(__builtin_unreachable) +#define BENCHMARK_UNREACHABLE() __builtin_unreachable() +#else +#define BENCHMARK_HAS_NO_BUILTIN_UNREACHABLE +namespace benchmark { +namespace internal { +BENCHMARK_NORETURN void UnreachableImp(const char* FName, int Line); +} +} // namespace benchmark +#define BENCHMARK_UNREACHABLE() \ + ::benchmark::internal::UnreachableImp(__FILE__, __LINE__) +#endif + #endif // BENCHMARK_INTERNAL_MACROS_H_ diff --git a/src/json_reporter.cc b/src/json_reporter.cc index a49f5b0f..93a5bc83 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -77,11 +77,14 @@ bool JSONReporter::ReportContext(const Context& context) { std::string walltime_value = LocalDateTimeString(); out << indent << FormatKV("date", walltime_value) << ",\n"; - out << indent << FormatKV("num_cpus", static_cast(context.num_cpus)) + CPUInfo const& info = context.cpu_info; + out << indent << FormatKV("num_cpus", static_cast(info.num_cpus)) << ",\n"; - out << indent << FormatKV("mhz_per_cpu", RoundDouble(context.mhz_per_cpu)) + out << indent + << FormatKV("mhz_per_cpu", + RoundDouble(info.cycles_per_second / 1000000.0)) << ",\n"; - out << indent << FormatKV("cpu_scaling_enabled", context.cpu_scaling_enabled) + out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled) << ",\n"; #if defined(NDEBUG) diff --git a/src/reporter.cc b/src/reporter.cc index 9a0830b0..c37ad94d 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -35,12 +35,21 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, CHECK(out) << "cannot be null"; auto &Out = *out; - Out << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu - << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n"; - Out << LocalDateTimeString() << "\n"; - if (context.cpu_scaling_enabled) { + const CPUInfo &info = context.cpu_info; + Out << "Run on (" << info.num_cpus << " X " + << (info.cycles_per_second / 1000000.0) << " MHz CPU " + << ((info.num_cpus > 1) ? "s" : "") << ")\n"; + if (info.caches.size() != 0) { + Out << "CPU Caches:\n"; + for (auto &CInfo : info.caches) { + Out << " L" << CInfo.level << " " << CInfo.type << " " + << (CInfo.size / 1000) << "K\n"; + } + } + + if (info.scaling_enabled) { Out << "***WARNING*** CPU scaling is enabled, the benchmark " "real time measurements may be noisy and will incur extra " "overhead.\n"; @@ -52,6 +61,8 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, #endif } +BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()) {} + double BenchmarkReporter::Run::GetAdjustedRealTime() const { double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit); if (iterations != 0) new_time /= static_cast(iterations); diff --git a/src/sysinfo.cc b/src/sysinfo.cc index 7997605f..8fea183f 100644 --- a/src/sysinfo.cc +++ b/src/sysinfo.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "sysinfo.h" #include "internal_macros.h" #ifdef BENCHMARK_OS_WINDOWS @@ -25,21 +24,26 @@ #include #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD #include -#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD +#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ + defined BENCHMARK_OS_NETBSD +#define BENCHMARK_HAS_SYSCTL #include #endif #endif +#include #include #include #include #include #include +#include #include +#include #include -#include +#include +#include -#include "arraysize.h" #include "check.h" #include "cycleclock.h" #include "internal_macros.h" @@ -49,225 +53,348 @@ namespace benchmark { namespace { -std::once_flag cpuinfo_init; -double cpuinfo_cycles_per_second = 1.0; -int cpuinfo_num_cpus = 1; // Conservative guess -#if !defined BENCHMARK_OS_MACOSX -const int64_t estimate_time_ms = 1000; +void PrintImp(std::ostream& out) { out << std::endl; } -// Helper function estimates cycles/sec by observing cycles elapsed during -// sleep(). Using small sleep time decreases accuracy significantly. -int64_t EstimateCyclesPerSecond() { - const int64_t start_ticks = cycleclock::Now(); - SleepForMilliseconds(estimate_time_ms); - return cycleclock::Now() - start_ticks; +template +void PrintImp(std::ostream& out, First&& f, Rest&&... rest) { + out << std::forward(f); + PrintImp(out, std::forward(rest)...); +} + +template +BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) { + PrintImp(std::cerr, std::forward(args)...); + std::exit(EXIT_FAILURE); +} + +#ifdef BENCHMARK_HAS_SYSCTL + +#ifdef __GNUC__ +// Suppress the warning generated by the C11 flexible array member below. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +/// ValueUnion - A type used to correctly alias the byte-for-byte output of +/// `sysctl` with the result type it's to be interpreted as. +struct ValueUnion { + union DataT { + uint32_t uint32_value; + uint64_t uint64_value; + // FIXME (Maybe?): This is a C11 flexible array member, and not technically + // C++. However, all compilers support it and it allows for correct aliasing + // of union members from bytes. + char bytes[]; + }; + using DataPtr = std::unique_ptr; + + // The size of the data union member + its trailing array size. + size_t Size; + DataPtr Buff; + + public: + ValueUnion() : Size(0), Buff(nullptr, &std::free) {} + + explicit ValueUnion(size_t BuffSize) + : Size(sizeof(DataT) + BuffSize), + Buff(::new (std::malloc(Size)) DataT(), &std::free) {} + + ValueUnion(ValueUnion&& other) = default; + + explicit operator bool() const { return bool(Buff); } + + char* data() const { return Buff->bytes; } + + std::string GetAsString() const { return std::string(data()); } + + int64_t GetAsInteger() const { + if (Size == sizeof(Buff->uint32_value)) + return static_cast(Buff->uint32_value); + else if (Size == sizeof(Buff->uint64_value)) + return static_cast(Buff->uint64_value); + BENCHMARK_UNREACHABLE(); + } + + uint64_t GetAsUnsigned() const { + if (Size == sizeof(Buff->uint32_value)) + return Buff->uint32_value; + else if (Size == sizeof(Buff->uint64_value)) + return Buff->uint64_value; + BENCHMARK_UNREACHABLE(); + } +}; + +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +ValueUnion GetSysctlImp(std::string const& Name) { + size_t CurBuffSize = 0; + if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1) + return ValueUnion(); + + ValueUnion buff(CurBuffSize); + if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0) + return buff; + return ValueUnion(); +} + +BENCHMARK_MAYBE_UNUSED +bool GetSysctl(std::string const& Name, std::string* Out) { + Out->clear(); + auto Buff = GetSysctlImp(Name); + if (!Buff) return false; + Out->assign(Buff.data()); + return true; +} + +template ::value>::type> +bool GetSysctl(std::string const& Name, Tp* Out) { + *Out = 0; + auto Buff = GetSysctlImp(Name); + if (!Buff) return false; + *Out = static_cast(Buff.GetAsUnsigned()); + return true; } #endif -#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN -// Helper function for reading an int from a file. Returns true if successful -// and the memory location pointed to by value is set to the value read. -bool ReadIntFromFile(const char* file, long* value) { - bool ret = false; - int fd = open(file, O_RDONLY); - if (fd != -1) { - char line[1024]; - char* err; - memset(line, '\0', sizeof(line)); - ssize_t read_err = read(fd, line, sizeof(line) - 1); - ((void)read_err); // prevent unused warning - CHECK(read_err >= 0); - const long temp_value = strtol(line, &err, 10); - if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { - *value = temp_value; - ret = true; +template +bool ReadFromFile(std::string const& fname, ArgT* arg) { + *arg = ArgT(); + std::ifstream f(fname.c_str()); + if (!f.is_open()) return false; + f >> *arg; + return f.good(); +} + +bool CpuScalingEnabled(int num_cpus) { + // We don't have a valid CPU count, so don't even bother. + if (num_cpus <= 0) return false; +#ifndef BENCHMARK_OS_WINDOWS + // On Linux, the CPUfreq subsystem exposes CPU information as files on the + // local file system. If reading the exported files fails, then we may not be + // running on Linux, so we silently ignore all the read errors. + std::string res; + for (int cpu = 0; cpu < num_cpus; ++cpu) { + std::string governor_file = + StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); + if (ReadFromFile(governor_file, &res) && res != "performance") return true; + } +#endif + return false; +} + +BENCHMARK_MAYBE_UNUSED +std::vector GetCacheSizesFromKVFS() { + std::vector res; + std::string dir = "/sys/devices/system/cpu/cpu0/cache/"; + int Idx = 0; + while (true) { + CPUInfo::CacheInfo info; + std::string FPath = StrCat(dir, "index", Idx++, "/"); + std::ifstream f(StrCat(FPath, "size").c_str()); + if (!f.is_open()) break; + std::string suffix; + f >> info.size; + if (f.fail()) + PrintErrorAndDie("Failed while reading file '", FPath, "size'"); + if (f.good()) { + f >> suffix; + if (f.bad()) + PrintErrorAndDie( + "Invalid cache size format: failed to read size suffix"); + else if (f && suffix != "K") + PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix); + else if (suffix == "K") + info.size *= 1000; } - close(fd); + if (!ReadFromFile(StrCat(FPath, "type"), &info.type)) + PrintErrorAndDie("Failed to read from file ", FPath, "type"); + if (!ReadFromFile(StrCat(FPath, "level"), &info.level)) + PrintErrorAndDie("Failed to read from file ", FPath, "level"); + res.push_back(info); } - return ret; + + return res; +} + +#ifdef BENCHMARK_OS_MACOSX +std::vector GetCacheSizesMacOSX() { + std::vector res; + struct { + std::string name; + std::string type; + int level; + } Cases[] = {{"hw.l1dcachesize", "Data", 1}, + {"hw.l1icachesize", "Instruction", 1}, + {"hw.l2cachesize", "Unified", 2}, + {"hw.l3cachesize", "Unified", 3}}; + for (auto& C : Cases) { + int val; + if (!GetSysctl(C.name, &val)) continue; + CPUInfo::CacheInfo info; + info.type = C.type; + info.level = C.level; + info.size = val; + res.push_back(std::move(info)); + } + return res; } #endif -#if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN -static std::string convertToLowerCase(std::string s) { - for (auto& ch : s) - ch = std::tolower(ch); - return s; -} -static bool startsWithKey(std::string Value, std::string Key, - bool IgnoreCase = true) { - if (IgnoreCase) { - Key = convertToLowerCase(std::move(Key)); - Value = convertToLowerCase(std::move(Value)); - } - return Value.compare(0, Key.size(), Key) == 0; -} +std::vector GetCacheSizes() { +#ifdef BENCHMARK_OS_MACOSX + return GetCacheSizesMacOSX(); +#else + return GetCacheSizesFromKVFS(); #endif +} -void InitializeSystemInfo() { +int GetNumCPUs() { +#ifdef BENCHMARK_HAS_SYSCTL + int NumCPU = -1; + if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU; + fprintf(stderr, "Err: %s\n", strerror(errno)); + std::exit(EXIT_FAILURE); +#elif defined(BENCHMARK_OS_WINDOWS) + SYSTEM_INFO sysinfo; + // Use memset as opposed to = {} to avoid GCC missing initializer false + // positives. + std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); + GetSystemInfo(&sysinfo); + return sysinfo.dwNumberOfProcessors; // number of logical + // processors in the current + // group +#else + int NumCPUs = 0; + int MaxID = -1; + std::ifstream f("/proc/cpuinfo"); + if (!f.is_open()) { + std::cerr << "failed to open /proc/cpuinfo\n"; + return -1; + } + const std::string Key = "processor"; + std::string ln; + while (std::getline(f, ln)) { + if (ln.empty()) continue; + size_t SplitIdx = ln.find(':'); + std::string value; + if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); + if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) { + NumCPUs++; + if (!value.empty()) { + int CurID = std::stoi(value); + MaxID = std::max(CurID, MaxID); + } + } + } + if (f.bad()) { + std::cerr << "Failure reading /proc/cpuinfo\n"; + return -1; + } + if (!f.eof()) { + std::cerr << "Failed to read to end of /proc/cpuinfo\n"; + return -1; + } + f.close(); + + if ((MaxID + 1) != NumCPUs) { + fprintf(stderr, + "CPU ID assignments in /proc/cpuinfo seem messed up." + " This is usually caused by a bad BIOS.\n"); + } + return NumCPUs; +#endif + BENCHMARK_UNREACHABLE(); +} + +double GetCPUCyclesPerSecond() { #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN - char line[1024]; - char* err; long freq; - bool saw_mhz = false; - // If the kernel is exporting the tsc frequency use that. There are issues // where cpuinfo_max_freq cannot be relied on because the BIOS may be // exporintg an invalid p-state (on x86) or p-states may be used to put the // processor in a new mode (turbo mode). Essentially, those frequencies // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as // well. - if (!saw_mhz && - ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { + if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) + // If CPU scaling is in effect, we want to use the *maximum* frequency, + // not whatever CPU speed some random processor happens to be using now. + || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &freq)) { // The value is in kHz (as the file name suggests). For example, on a // 2GHz warpstation, the file contains the value "2000000". - cpuinfo_cycles_per_second = freq * 1000.0; - saw_mhz = true; + return freq * 1000.0; } - // If CPU scaling is in effect, we want to use the *maximum* frequency, - // not whatever CPU speed some random processor happens to be using now. - if (!saw_mhz && - ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &freq)) { - // The value is in kHz. For example, on a 2GHz warpstation, the file - // contains the value "2000000". - cpuinfo_cycles_per_second = freq * 1000.0; - saw_mhz = true; + const double error_value = -1; + double bogo_clock = error_value; + + std::ifstream f("/proc/cpuinfo"); + if (!f.is_open()) { + std::cerr << "failed to open /proc/cpuinfo\n"; + return error_value; } - // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. - const char* pname = "/proc/cpuinfo"; - int fd = open(pname, O_RDONLY); - if (fd == -1) { - perror(pname); - if (!saw_mhz) { - cpuinfo_cycles_per_second = - static_cast(EstimateCyclesPerSecond()); - } - return; - } - - double bogo_clock = 1.0; - bool saw_bogo = false; - long max_cpu_id = 0; - int num_cpus = 0; - line[0] = line[1] = '\0'; - size_t chars_read = 0; - do { // we'll exit when the last read didn't read anything - // Move the next line to the beginning of the buffer - const size_t oldlinelen = strlen(line); - if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line - line[0] = '\0'; - else // still other lines left to save - memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); - // Terminate the new line, reading more if we can't find the newline - char* newline = strchr(line, '\n'); - if (newline == nullptr) { - const size_t linelen = strlen(line); - const size_t bytes_to_read = sizeof(line) - 1 - linelen; - CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes - chars_read = read(fd, line + linelen, bytes_to_read); - line[linelen + chars_read] = '\0'; - newline = strchr(line, '\n'); - } - if (newline != nullptr) *newline = '\0'; + auto startsWithKey = [](std::string const& Value, std::string const& Key) { + if (Key.size() > Value.size()) return false; + auto Cmp = [&](char X, char Y) { + return std::tolower(X) == std::tolower(Y); + }; + return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp); + }; + std::string ln; + while (std::getline(f, ln)) { + if (ln.empty()) continue; + size_t SplitIdx = ln.find(':'); + std::string value; + if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1); // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only // accept postive values. Some environments (virtual machines) report zero, // which would cause infinite looping in WallTime_Init. - if (!saw_mhz && startsWithKey(line, "cpu MHz")) { - const char* freqstr = strchr(line, ':'); - if (freqstr) { - cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) - saw_mhz = true; + if (startsWithKey(ln, "cpu MHz")) { + if (!value.empty()) { + double cycles_per_second = std::stod(value) * 1000000.0; + if (cycles_per_second > 0) return cycles_per_second; } - } else if (startsWithKey(line, "bogomips")) { - const char* freqstr = strchr(line, ':'); - if (freqstr) { - bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) - saw_bogo = true; + } else if (startsWithKey(ln, "bogomips")) { + if (!value.empty()) { + bogo_clock = std::stod(value) * 1000000.0; + if (bogo_clock < 0.0) bogo_clock = error_value; } - } else if (startsWithKey(line, "processor", /*IgnoreCase*/false)) { - // The above comparison is case-sensitive because ARM kernels often - // include a "Processor" line that tells you about the CPU, distinct - // from the usual "processor" lines that give you CPU ids. No current - // Linux architecture is using "Processor" for CPU ids. - num_cpus++; // count up every time we see an "processor :" entry - const char* id_str = strchr(line, ':'); - if (id_str) { - const long cpu_id = strtol(id_str + 1, &err, 10); - if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id) - max_cpu_id = cpu_id; - } - } - } while (chars_read > 0); - close(fd); - - if (!saw_mhz) { - if (saw_bogo) { - // If we didn't find anything better, we'll use bogomips, but - // we're not happy about it. - cpuinfo_cycles_per_second = bogo_clock; - } else { - // If we don't even have bogomips, we'll use the slow estimation. - cpuinfo_cycles_per_second = - static_cast(EstimateCyclesPerSecond()); } } - if (num_cpus == 0) { - fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n"); - } else { - if ((max_cpu_id + 1) != num_cpus) { - fprintf(stderr, - "CPU ID assignments in /proc/cpuinfo seem messed up." - " This is usually caused by a bad BIOS.\n"); - } - cpuinfo_num_cpus = num_cpus; + if (f.bad()) { + std::cerr << "Failure reading /proc/cpuinfo\n"; + return error_value; } + if (!f.eof()) { + std::cerr << "Failed to read to end of /proc/cpuinfo\n"; + return error_value; + } + f.close(); + // If we found the bogomips clock, but nothing better, we'll use it (but + // we're not happy about it); otherwise, fallback to the rough estimation + // below. + if (bogo_clock >= 0.0) return bogo_clock; -#elif defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_NETBSD -// FreeBSD notes -// ============= -// For this sysctl to work, the machine must be configured without -// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 -// and later. Before that, it's a 32-bit quantity (and gives the -// wrong answer on machines faster than 2^32 Hz). See -// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html -// But also compare FreeBSD 7.0: -// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 -// 231 error = sysctl_handle_quad(oidp, &freq, 0, req); -// To FreeBSD 6.3 (it's the same in 6-STABLE): -// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 -// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); -#if (__FreeBSD__ >= 7) || defined(__NetBSD__) - uint64_t hz = 0; +#elif defined BENCHMARK_HAS_SYSCTL + constexpr auto* FreqStr = +#if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) + "machdep.tsc_freq"; #else - unsigned int hz = 0; + "hw.cpufrequency"; #endif - size_t sz = sizeof(hz); - const char* sysctl_path = "machdep.tsc_freq"; - if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) { - fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", - sysctl_path, strerror(errno)); - cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); - } else { - cpuinfo_cycles_per_second = hz; - } + unsigned long long hz = 0; + if (GetSysctl(FreqStr, &hz)) return hz; + + fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", + FreqStr, strerror(errno)); - int32_t num_cpus = 0; - size_t size = sizeof(num_cpus); - if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 && - (size == sizeof(num_cpus))) { - cpuinfo_num_cpus = num_cpus; - } else { - fprintf(stderr, "%s\n", strerror(errno)); - std::exit(EXIT_FAILURE); - } #elif defined BENCHMARK_OS_WINDOWS // In NT, read MHz from the registry. If we fail to do so or we're in win9x // then make a crude estimate. @@ -277,89 +404,27 @@ void InitializeSystemInfo() { SHGetValueA(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "~MHz", nullptr, &data, &data_size))) - cpuinfo_cycles_per_second = - static_cast((int64_t)data * (int64_t)(1000 * 1000)); // was mhz - else - cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); - - SYSTEM_INFO sysinfo; - // Use memset as opposed to = {} to avoid GCC missing initializer false - // positives. - std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); - GetSystemInfo(&sysinfo); - cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical - // processors in the current - // group - -#elif defined BENCHMARK_OS_MACOSX - int32_t num_cpus = 0; - size_t size = sizeof(num_cpus); - if (::sysctlbyname("hw.ncpu", &num_cpus, &size, nullptr, 0) == 0 && - (size == sizeof(num_cpus))) { - cpuinfo_num_cpus = num_cpus; - } else { - fprintf(stderr, "%s\n", strerror(errno)); - std::exit(EXIT_FAILURE); - } - int64_t cpu_freq = 0; - size = sizeof(cpu_freq); - if (::sysctlbyname("hw.cpufrequency", &cpu_freq, &size, nullptr, 0) == 0 && - (size == sizeof(cpu_freq))) { - cpuinfo_cycles_per_second = cpu_freq; - } else { - #if defined BENCHMARK_OS_IOS - fprintf(stderr, "CPU frequency cannot be detected. \n"); - cpuinfo_cycles_per_second = 0; - #else - fprintf(stderr, "%s\n", strerror(errno)); - std::exit(EXIT_FAILURE); - #endif - } -#else - // Generic cycles per second counter - cpuinfo_cycles_per_second = static_cast(EstimateCyclesPerSecond()); + return static_cast((int64_t)data * + (int64_t)(1000 * 1000)); // was mhz #endif + // If we've fallen through, attempt to roughly estimate the CPU clock rate. + const int estimate_time_ms = 1000; + const auto start_ticks = cycleclock::Now(); + SleepForMilliseconds(estimate_time_ms); + return static_cast(cycleclock::Now() - start_ticks); } } // end namespace -double CyclesPerSecond(void) { - std::call_once(cpuinfo_init, InitializeSystemInfo); - return cpuinfo_cycles_per_second; +const CPUInfo& CPUInfo::Get() { + static const CPUInfo* info = new CPUInfo(); + return *info; } -int NumCPUs(void) { - std::call_once(cpuinfo_init, InitializeSystemInfo); - return cpuinfo_num_cpus; -} - -// The ""'s catch people who don't pass in a literal for "str" -#define strliterallen(str) (sizeof("" str "") - 1) - -// Must use a string literal for prefix. -#define memprefix(str, len, prefix) \ - ((((len) >= strliterallen(prefix)) && \ - std::memcmp(str, prefix, strliterallen(prefix)) == 0) \ - ? str + strliterallen(prefix) \ - : nullptr) - -bool CpuScalingEnabled() { -#ifndef BENCHMARK_OS_WINDOWS - // On Linux, the CPUfreq subsystem exposes CPU information as files on the - // local file system. If reading the exported files fails, then we may not be - // running on Linux, so we silently ignore all the read errors. - for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { - std::string governor_file = - StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); - FILE* file = fopen(governor_file.c_str(), "r"); - if (!file) break; - char buff[16]; - size_t bytes_read = fread(buff, 1, sizeof(buff), file); - fclose(file); - if (memprefix(buff, bytes_read, "performance") == nullptr) return true; - } -#endif - return false; -} +CPUInfo::CPUInfo() + : num_cpus(GetNumCPUs()), + cycles_per_second(GetCPUCyclesPerSecond()), + caches(GetCacheSizes()), + scaling_enabled(CpuScalingEnabled(num_cpus)) {} } // end namespace benchmark diff --git a/src/sysinfo.h b/src/sysinfo.h deleted file mode 100644 index c5d9916d..00000000 --- a/src/sysinfo.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef BENCHMARK_SYSINFO_H_ -#define BENCHMARK_SYSINFO_H_ - -namespace benchmark { -int NumCPUs(); -double CyclesPerSecond(); -bool CpuScalingEnabled(); -} // end namespace benchmark - -#endif // BENCHMARK_SYSINFO_H_