mirror of
https://github.com/google/benchmark.git
synced 2025-03-06 15:20:10 +08:00
Code reformat
This commit is contained in:
parent
8009351938
commit
4ce184d86d
@ -194,7 +194,7 @@ class State {
|
||||
//
|
||||
// REQUIRES: a benchmark has exited its KeepRunning loop.
|
||||
void SetBytesProcessed(int64_t bytes);
|
||||
|
||||
|
||||
// If this routine is called with items > 0, then an items/s
|
||||
// label is printed on the benchmark report line for the currently
|
||||
// executing benchmark. It is typically called at the end of a processing
|
||||
@ -243,11 +243,11 @@ class State {
|
||||
void Wait();
|
||||
|
||||
enum EState {
|
||||
STATE_INITIAL, // KeepRunning hasn't been called
|
||||
STATE_STARTING, // KeepRunning called, waiting for other threads
|
||||
STATE_RUNNING, // Running and being timed
|
||||
STATE_STOPPING, // Not being timed but waiting for other threads
|
||||
STATE_STOPPED, // Stopped
|
||||
STATE_INITIAL, // KeepRunning hasn't been called
|
||||
STATE_STARTING, // KeepRunning called, waiting for other threads
|
||||
STATE_RUNNING, // Running and being timed
|
||||
STATE_STOPPING, // Not being timed but waiting for other threads
|
||||
STATE_STOPPED, // Stopped
|
||||
};
|
||||
|
||||
EState state_;
|
||||
@ -414,8 +414,8 @@ class Benchmark {
|
||||
static void RunInstance(const Instance& b, BenchmarkReporter* br);
|
||||
friend class ::benchmark::State;
|
||||
friend struct ::benchmark::internal::Benchmark::Instance;
|
||||
friend void ::benchmark::internal::RunMatchingBenchmarks(
|
||||
const std::string&, BenchmarkReporter*);
|
||||
friend void ::benchmark::internal::RunMatchingBenchmarks(const std::string&,
|
||||
BenchmarkReporter*);
|
||||
DISALLOW_COPY_AND_ASSIGN(Benchmark);
|
||||
};
|
||||
|
||||
@ -425,7 +425,7 @@ class Benchmark {
|
||||
struct BenchmarkContextData {
|
||||
int num_cpus;
|
||||
double mhz_per_cpu;
|
||||
//std::string cpu_info;
|
||||
// std::string cpu_info;
|
||||
bool cpu_scaling_enabled;
|
||||
|
||||
// The number of chars in the longest benchmark name.
|
||||
@ -433,14 +433,14 @@ struct BenchmarkContextData {
|
||||
};
|
||||
|
||||
struct BenchmarkRunData {
|
||||
BenchmarkRunData() :
|
||||
thread_index(-1),
|
||||
iterations(1),
|
||||
real_accumulated_time(0),
|
||||
cpu_accumulated_time(0),
|
||||
bytes_per_second(0),
|
||||
items_per_second(0),
|
||||
max_heapbytes_used(0) {}
|
||||
BenchmarkRunData()
|
||||
: thread_index(-1),
|
||||
iterations(1),
|
||||
real_accumulated_time(0),
|
||||
cpu_accumulated_time(0),
|
||||
bytes_per_second(0),
|
||||
items_per_second(0),
|
||||
max_heapbytes_used(0) {}
|
||||
|
||||
std::string benchmark_name;
|
||||
std::string report_label;
|
||||
@ -481,15 +481,13 @@ class BenchmarkReporter {
|
||||
virtual ~BenchmarkReporter();
|
||||
};
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Internal implementation details follow; please ignore
|
||||
|
||||
// Given a collection of reports, computes their mean and stddev.
|
||||
// REQUIRES: all runs in "reports" must be from the same benchmark.
|
||||
void ComputeStats(const std::vector<BenchmarkRunData>& reports,
|
||||
BenchmarkRunData* mean_data,
|
||||
BenchmarkRunData* stddev_data);
|
||||
BenchmarkRunData* mean_data, BenchmarkRunData* stddev_data);
|
||||
|
||||
// Simple reporter that outputs benchmark data to the console. This is the
|
||||
// default reporter used by RunSpecifiedBenchmarks().
|
||||
@ -497,6 +495,7 @@ class ConsoleReporter : public BenchmarkReporter {
|
||||
public:
|
||||
virtual bool ReportContext(const BenchmarkContextData& context);
|
||||
virtual void ReportRuns(const std::vector<BenchmarkRunData>& reports);
|
||||
|
||||
private:
|
||||
std::string PrintMemoryUsage(double bytes);
|
||||
virtual void PrintRunData(const BenchmarkRunData& report);
|
||||
@ -513,11 +512,11 @@ void Initialize(int* argc, const char** argv);
|
||||
|
||||
// Helpers for generating unique variable names
|
||||
#define BENCHMARK_CONCAT(a, b, c) BENCHMARK_CONCAT2(a, b, c)
|
||||
#define BENCHMARK_CONCAT2(a, b, c) a ## b ## c
|
||||
#define BENCHMARK_CONCAT2(a, b, c) a##b##c
|
||||
|
||||
#define BENCHMARK(n) \
|
||||
static ::benchmark::internal::Benchmark* \
|
||||
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
#define BENCHMARK(n) \
|
||||
static ::benchmark::internal::Benchmark* BENCHMARK_CONCAT( \
|
||||
__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
(new ::benchmark::internal::Benchmark(#n, n))
|
||||
|
||||
// Old-style macros
|
||||
@ -525,7 +524,7 @@ void Initialize(int* argc, const char** argv);
|
||||
#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
|
||||
#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
|
||||
#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
|
||||
BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
|
||||
BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
|
||||
|
||||
// This will register a benchmark for a templatized function. For example:
|
||||
//
|
||||
@ -535,14 +534,14 @@ void Initialize(int* argc, const char** argv);
|
||||
// BENCHMARK_TEMPLATE(BM_Foo, 1);
|
||||
//
|
||||
// will register BM_Foo<1> as a benchmark.
|
||||
#define BENCHMARK_TEMPLATE(n, a) \
|
||||
static ::benchmark::internal::Benchmark* \
|
||||
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
#define BENCHMARK_TEMPLATE(n, a) \
|
||||
static ::benchmark::internal::Benchmark* BENCHMARK_CONCAT( \
|
||||
__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
(new ::benchmark::internal::Benchmark(#n "<" #a ">", n<a>))
|
||||
|
||||
#define BENCHMARK_TEMPLATE2(n, a, b) \
|
||||
static ::benchmark::internal::Benchmark* \
|
||||
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
#define BENCHMARK_TEMPLATE2(n, a, b) \
|
||||
static ::benchmark::internal::Benchmark* BENCHMARK_CONCAT( \
|
||||
__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
(new ::benchmark::internal::Benchmark(#n "<" #a "," #b ">", n<a, b>))
|
||||
|
||||
#endif // BENCHMARK_BENCHMARK_H_
|
||||
|
@ -34,7 +34,10 @@ char (&ArraySizeHelper(const T (&array)[N]))[N];
|
||||
|
||||
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
|
||||
|
||||
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
|
||||
#define CHECK(b) \
|
||||
do { \
|
||||
if (!(b)) assert(false); \
|
||||
} while (0)
|
||||
#define CHECK_EQ(a, b) CHECK((a) == (b))
|
||||
#define CHECK_NE(a, b) CHECK((a) != (b))
|
||||
#define CHECK_GE(a, b) CHECK((a) >= (b))
|
||||
@ -45,14 +48,14 @@ char (&ArraySizeHelper(const T (&array)[N]))[N];
|
||||
//
|
||||
// Prevent the compiler from complaining about or optimizing away variables
|
||||
// that appear unused.
|
||||
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
|
||||
#define ATTRIBUTE_UNUSED __attribute__((unused))
|
||||
|
||||
//
|
||||
// For functions we want to force inline or not inline.
|
||||
// Introduced in gcc 3.1.
|
||||
#define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline))
|
||||
#define ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
|
||||
#define HAVE_ATTRIBUTE_ALWAYS_INLINE 1
|
||||
#define ATTRIBUTE_NOINLINE __attribute__ ((noinline))
|
||||
#define ATTRIBUTE_NOINLINE __attribute__((noinline))
|
||||
#define HAVE_ATTRIBUTE_NOINLINE 1
|
||||
|
||||
#endif // BENCHMARK_MACROS_H_
|
||||
|
324
src/benchmark.cc
324
src/benchmark.cc
@ -58,14 +58,14 @@ DEFINE_bool(color_print, true, "Enables colorized logging.");
|
||||
DECLARE_string(heap_check);
|
||||
|
||||
// The ""'s catch people who don't pass in a literal for "str"
|
||||
#define strliterallen(str) (sizeof("" str "")-1)
|
||||
#define strliterallen(str) (sizeof("" str "") - 1)
|
||||
|
||||
// Must use a string literal for prefix.
|
||||
#define memprefix(str, len, prefix) \
|
||||
( (((len) >= strliterallen(prefix)) \
|
||||
&& memcmp(str, prefix, strliterallen(prefix)) == 0) \
|
||||
? str + strliterallen(prefix) \
|
||||
: NULL )
|
||||
#define memprefix(str, len, prefix) \
|
||||
((((len) >= strliterallen(prefix)) && \
|
||||
memcmp(str, prefix, strliterallen(prefix)) == 0) \
|
||||
? str + strliterallen(prefix) \
|
||||
: NULL)
|
||||
|
||||
namespace benchmark {
|
||||
namespace {
|
||||
@ -83,9 +83,8 @@ static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
|
||||
"Small SI and Big SI unit arrays must be the same size");
|
||||
static const int kUnitsSize = arraysize(kBigSIUnits);
|
||||
|
||||
void ToExponentAndMantissa(double val, double thresh,
|
||||
int precision, double one_k,
|
||||
std::string* mantissa, int* exponent) {
|
||||
void ToExponentAndMantissa(double val, double thresh, int precision,
|
||||
double one_k, std::string* mantissa, int* exponent) {
|
||||
std::stringstream mantissa_stream;
|
||||
|
||||
if (val < 0) {
|
||||
@ -136,15 +135,13 @@ void ToExponentAndMantissa(double val, double thresh,
|
||||
}
|
||||
|
||||
std::string ExponentToPrefix(int exponent, bool iec) {
|
||||
if (exponent == 0)
|
||||
return "";
|
||||
if (exponent == 0) return "";
|
||||
|
||||
const int index = (exponent > 0 ? exponent - 1 : -exponent - 1);
|
||||
if (index >= kUnitsSize)
|
||||
return "";
|
||||
if (index >= kUnitsSize) return "";
|
||||
|
||||
const char *array = (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) :
|
||||
kSmallSIUnits);
|
||||
const char* array =
|
||||
(exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
|
||||
if (iec)
|
||||
return array[index] + std::string("i");
|
||||
else
|
||||
@ -207,7 +204,7 @@ const char* Prefix() {
|
||||
}
|
||||
|
||||
// TODO
|
||||
//static internal::MallocCounter *benchmark_mc;
|
||||
// static internal::MallocCounter *benchmark_mc;
|
||||
|
||||
bool CpuScalingEnabled() {
|
||||
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
|
||||
@ -218,13 +215,11 @@ bool CpuScalingEnabled() {
|
||||
ss << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_governor";
|
||||
std::string governor_file = ss.str();
|
||||
FILE* file = fopen(governor_file.c_str(), "r");
|
||||
if (!file)
|
||||
break;
|
||||
if (!file) break;
|
||||
char buff[16];
|
||||
size_t bytes_read = fread(buff, 1, sizeof(buff), file);
|
||||
fclose(file);
|
||||
if (memprefix(buff, bytes_read, "performance") == NULL)
|
||||
return true;
|
||||
if (memprefix(buff, bytes_read, "performance") == NULL) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -236,8 +231,7 @@ namespace internal {
|
||||
BenchmarkReporter::~BenchmarkReporter() {}
|
||||
|
||||
void ComputeStats(const std::vector<BenchmarkRunData>& reports,
|
||||
BenchmarkRunData* mean_data,
|
||||
BenchmarkRunData* stddev_data) {
|
||||
BenchmarkRunData* mean_data, BenchmarkRunData* stddev_data) {
|
||||
// Accumulators.
|
||||
Stat1_d real_accumulated_time_stat;
|
||||
Stat1_d cpu_accumulated_time_stat;
|
||||
@ -257,8 +251,8 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
|
||||
items_per_second_stat += Stat1_d(it->items_per_second, it->iterations);
|
||||
bytes_per_second_stat += Stat1_d(it->bytes_per_second, it->iterations);
|
||||
iterations_stat += Stat1_d(it->iterations, it->iterations);
|
||||
max_heapbytes_used_stat += Stat1MinMax_d(it->max_heapbytes_used,
|
||||
it->iterations);
|
||||
max_heapbytes_used_stat +=
|
||||
Stat1MinMax_d(it->max_heapbytes_used, it->iterations);
|
||||
}
|
||||
|
||||
// Get the data from the accumulator to BenchmarkRunData's.
|
||||
@ -268,7 +262,7 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
|
||||
mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Mean();
|
||||
mean_data->bytes_per_second = bytes_per_second_stat.Mean();
|
||||
mean_data->items_per_second = items_per_second_stat.Mean();
|
||||
mean_data->max_heapbytes_used = max_heapbytes_used_stat.Max();
|
||||
mean_data->max_heapbytes_used = max_heapbytes_used_stat.max();
|
||||
|
||||
// Only add label to mean/stddev if it is same for all runs
|
||||
mean_data->report_label = reports[0].report_label;
|
||||
@ -290,8 +284,7 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
|
||||
}
|
||||
|
||||
std::string ConsoleReporter::PrintMemoryUsage(double bytes) {
|
||||
if (!get_memory_usage || bytes < 0.0)
|
||||
return "";
|
||||
if (!get_memory_usage || bytes < 0.0) return "";
|
||||
|
||||
std::stringstream ss;
|
||||
ss << " " << HumanReadableNumber(bytes) << "B peak-mem";
|
||||
@ -307,12 +300,12 @@ bool ConsoleReporter::ReportContext(const BenchmarkContextData& context) {
|
||||
|
||||
int remainder_ms;
|
||||
std::cout << walltime::Print(walltime::Now(), "%Y/%m/%d-%H:%M:%S",
|
||||
true, // use local timezone
|
||||
&remainder_ms) << "\n";
|
||||
true, // use local timezone
|
||||
&remainder_ms) << "\n";
|
||||
|
||||
// Show details of CPU model, caches, TLBs etc.
|
||||
// if (!context.cpu_info.empty())
|
||||
// std::cout << "CPU: " << context.cpu_info.c_str();
|
||||
// if (!context.cpu_info.empty())
|
||||
// std::cout << "CPU: " << context.cpu_info.c_str();
|
||||
|
||||
if (context.cpu_scaling_enabled) {
|
||||
std::cerr << "CPU scaling is enabled: Benchmark timings may be noisy.\n";
|
||||
@ -334,8 +327,7 @@ void ConsoleReporter::ReportRuns(const std::vector<BenchmarkRunData>& reports) {
|
||||
}
|
||||
|
||||
// We don't report aggregated data if there was a single run.
|
||||
if (reports.size() < 2)
|
||||
return;
|
||||
if (reports.size() < 2) return;
|
||||
|
||||
BenchmarkRunData mean_data;
|
||||
BenchmarkRunData stddev_data;
|
||||
@ -379,45 +371,42 @@ void ConsoleReporter::PrintRunData(const BenchmarkRunData& result) {
|
||||
}
|
||||
|
||||
void MemoryUsage() {
|
||||
//if (benchmark_mc) {
|
||||
// if (benchmark_mc) {
|
||||
// benchmark_mc->Reset();
|
||||
//} else {
|
||||
get_memory_usage = true;
|
||||
get_memory_usage = true;
|
||||
//}
|
||||
}
|
||||
|
||||
void UseRealTime() {
|
||||
use_real_time = true;
|
||||
}
|
||||
void UseRealTime() { use_real_time = true; }
|
||||
|
||||
void PrintUsageAndExit() {
|
||||
fprintf(stdout, "benchmark [--benchmark_filter=<regex>]\n"
|
||||
" [--benchmark_iterations=<iterations>]\n"
|
||||
" [--benchmark_min_time=<min_time>]\n"
|
||||
// " [--benchmark_memory_usage]\n"
|
||||
" [--benchmark_repetitions=<num_repetitions>]\n"
|
||||
" [--color_print={true|false}]\n"
|
||||
" [--v=<verbosity>]\n");
|
||||
fprintf(stdout,
|
||||
"benchmark [--benchmark_filter=<regex>]\n"
|
||||
" [--benchmark_iterations=<iterations>]\n"
|
||||
" [--benchmark_min_time=<min_time>]\n"
|
||||
//" [--benchmark_memory_usage]\n"
|
||||
" [--benchmark_repetitions=<num_repetitions>]\n"
|
||||
" [--color_print={true|false}]\n"
|
||||
" [--v=<verbosity>]\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void ParseCommandLineFlags(int* argc, const char** argv) {
|
||||
for (int i = 1; i < *argc; ++i) {
|
||||
if (ParseStringFlag(argv[i], "benchmark_filter",
|
||||
&FLAGS_benchmark_filter) ||
|
||||
if (ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
|
||||
ParseInt32Flag(argv[i], "benchmark_iterations",
|
||||
&FLAGS_benchmark_iterations) ||
|
||||
ParseDoubleFlag(argv[i], "benchmark_min_time",
|
||||
&FLAGS_benchmark_min_time) ||
|
||||
// TODO(dominic)
|
||||
// ParseBoolFlag(argv[i], "gbenchmark_memory_usage",
|
||||
// &FLAGS_gbenchmark_memory_usage) ||
|
||||
// ParseBoolFlag(argv[i], "gbenchmark_memory_usage",
|
||||
// &FLAGS_gbenchmark_memory_usage) ||
|
||||
ParseInt32Flag(argv[i], "benchmark_repetitions",
|
||||
&FLAGS_benchmark_repetitions) ||
|
||||
ParseBoolFlag(argv[i], "color_print", &FLAGS_color_print) ||
|
||||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
|
||||
for (int j = i; j != *argc; ++j)
|
||||
argv[j] = argv[j + 1];
|
||||
for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
|
||||
|
||||
--(*argc);
|
||||
--i;
|
||||
@ -431,9 +420,11 @@ void ParseCommandLineFlags(int* argc, const char** argv) {
|
||||
// A clock that provides a fast mechanism to check if we're nearly done.
|
||||
class State::FastClock {
|
||||
public:
|
||||
enum Type { REAL_TIME, CPU_TIME };
|
||||
explicit FastClock(Type type)
|
||||
: type_(type), approx_time_(NowMicros()) {
|
||||
enum Type {
|
||||
REAL_TIME,
|
||||
CPU_TIME
|
||||
};
|
||||
explicit FastClock(Type type) : type_(type), approx_time_(NowMicros()) {
|
||||
sem_init(&bg_done_, 0, 0);
|
||||
pthread_create(&bg_, NULL, &BGThreadWrapper, this);
|
||||
}
|
||||
@ -449,7 +440,7 @@ class State::FastClock {
|
||||
inline bool HasReached(int64_t when_micros) {
|
||||
return std::atomic_load(&approx_time_) >= when_micros;
|
||||
// NOTE: this is the same as we're dealing with an int64_t
|
||||
//return (base::subtle::NoBarrier_Load(&approx_time_) >= when_micros);
|
||||
// return (base::subtle::NoBarrier_Load(&approx_time_) >= when_micros);
|
||||
}
|
||||
|
||||
// Returns the current time in microseconds past the epoch.
|
||||
@ -493,7 +484,7 @@ class State::FastClock {
|
||||
SleepForMicroseconds(1000);
|
||||
std::atomic_store(&approx_time_, NowMicros());
|
||||
// NOTE: same code but no memory barrier. think on it.
|
||||
//base::subtle::Release_Store(&approx_time_, NowMicros());
|
||||
// base::subtle::Release_Store(&approx_time_, NowMicros());
|
||||
sem_getvalue(&bg_done_, &done);
|
||||
} while (done == 0);
|
||||
}
|
||||
@ -523,17 +514,21 @@ namespace internal {
|
||||
// Information kept per benchmark we may want to run
|
||||
struct Benchmark::Instance {
|
||||
Instance()
|
||||
: bm(nullptr), threads(1), rangeXset(false), rangeX(kNoRange),
|
||||
rangeYset(false), rangeY(kNoRange) {}
|
||||
: bm(nullptr),
|
||||
threads(1),
|
||||
rangeXset(false),
|
||||
rangeX(kNoRange),
|
||||
rangeYset(false),
|
||||
rangeY(kNoRange) {}
|
||||
|
||||
std::string name;
|
||||
Benchmark* bm;
|
||||
int threads; // Number of concurrent threads to use
|
||||
int threads; // Number of concurrent threads to use
|
||||
|
||||
bool rangeXset;
|
||||
int rangeX;
|
||||
bool rangeYset;
|
||||
int rangeY;
|
||||
bool rangeXset;
|
||||
int rangeX;
|
||||
bool rangeYset;
|
||||
int rangeY;
|
||||
|
||||
bool multithreaded() const { return !bm->thread_counts_.empty(); }
|
||||
};
|
||||
@ -551,14 +546,14 @@ struct State::SharedState {
|
||||
std::string label;
|
||||
|
||||
explicit SharedState(const internal::Benchmark::Instance* b)
|
||||
: instance(b), starting(0), stopping(0),
|
||||
: instance(b),
|
||||
starting(0),
|
||||
stopping(0),
|
||||
threads(b == nullptr ? 1 : b->threads) {
|
||||
pthread_mutex_init(&mu, nullptr);
|
||||
}
|
||||
|
||||
~SharedState() {
|
||||
pthread_mutex_destroy(&mu);
|
||||
}
|
||||
~SharedState() { pthread_mutex_destroy(&mu); }
|
||||
DISALLOW_COPY_AND_ASSIGN(SharedState);
|
||||
};
|
||||
|
||||
@ -567,8 +562,7 @@ namespace internal {
|
||||
Benchmark::Benchmark(const char* name, BenchmarkFunction f)
|
||||
: name_(name), function_(f) {
|
||||
mutex_lock l(&benchmark_mutex);
|
||||
if (families == nullptr)
|
||||
families = new std::vector<Benchmark*>();
|
||||
if (families == nullptr) families = new std::vector<Benchmark*>();
|
||||
registration_index_ = families->size();
|
||||
families->push_back(this);
|
||||
}
|
||||
@ -578,8 +572,7 @@ Benchmark::~Benchmark() {
|
||||
CHECK((*families)[registration_index_] == this);
|
||||
(*families)[registration_index_] = NULL;
|
||||
// Shrink the vector if convenient.
|
||||
while (!families->empty() && families->back() == NULL)
|
||||
families->pop_back();
|
||||
while (!families->empty() && families->back() == NULL) families->pop_back();
|
||||
}
|
||||
|
||||
Benchmark* Benchmark::Arg(int x) {
|
||||
@ -593,8 +586,7 @@ Benchmark* Benchmark::Range(int start, int limit) {
|
||||
AddRange(&arglist, start, limit, kRangeMultiplier);
|
||||
|
||||
mutex_lock l(&benchmark_mutex);
|
||||
for (size_t i = 0; i < arglist.size(); ++i)
|
||||
rangeX_.push_back(arglist[i]);
|
||||
for (size_t i = 0; i < arglist.size(); ++i) rangeX_.push_back(arglist[i]);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -602,8 +594,7 @@ Benchmark* Benchmark::DenseRange(int start, int limit) {
|
||||
CHECK_GE(start, 0);
|
||||
CHECK_LE(start, limit);
|
||||
mutex_lock l(&benchmark_mutex);
|
||||
for (int arg = start; arg <= limit; ++arg)
|
||||
rangeX_.push_back(arg);
|
||||
for (int arg = start; arg <= limit; ++arg) rangeX_.push_back(arg);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -662,14 +653,13 @@ void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
|
||||
dst->push_back(lo);
|
||||
|
||||
// Now space out the benchmarks in multiples of "mult"
|
||||
for (int32_t i = 1; i < std::numeric_limits<int32_t>::max()/mult; i *= mult) {
|
||||
for (int32_t i = 1; i < std::numeric_limits<int32_t>::max() / mult;
|
||||
i *= mult) {
|
||||
if (i >= hi) break;
|
||||
if (i > lo)
|
||||
dst->push_back(i);
|
||||
if (i > lo) dst->push_back(i);
|
||||
}
|
||||
// Add "hi" (if different from "lo")
|
||||
if (hi != lo)
|
||||
dst->push_back(hi);
|
||||
if (hi != lo) dst->push_back(hi);
|
||||
}
|
||||
|
||||
std::vector<Benchmark::Instance> Benchmark::CreateBenchmarkInstances(
|
||||
@ -767,9 +757,10 @@ void Benchmark::MeasureOverhead() {
|
||||
State::FastClock clock(State::FastClock::CPU_TIME);
|
||||
State::SharedState state(nullptr);
|
||||
State runner(&clock, &state, 0);
|
||||
while (runner.KeepRunning()) {}
|
||||
while (runner.KeepRunning()) {
|
||||
}
|
||||
overhead = state.runs[0].real_accumulated_time /
|
||||
static_cast<double>(state.runs[0].iterations);
|
||||
static_cast<double>(state.runs[0].iterations);
|
||||
#ifdef DEBUG
|
||||
std::cout << "Per-iteration overhead for doing nothing: " << overhead << "\n";
|
||||
#endif
|
||||
@ -796,33 +787,32 @@ void Benchmark::RunInstance(const Instance& b, BenchmarkReporter* br) {
|
||||
runners[i]->Run();
|
||||
}
|
||||
if (b.multithreaded()) {
|
||||
for (int i = 0; i < b.threads; ++i)
|
||||
runners[i]->Wait();
|
||||
for (int i = 0; i < b.threads; ++i) runners[i]->Wait();
|
||||
}
|
||||
}
|
||||
/*
|
||||
double mem_usage = 0;
|
||||
if (get_memory_usage) {
|
||||
// Measure memory usage
|
||||
Notification mem_done;
|
||||
BenchmarkRun mem_run;
|
||||
BenchmarkRun::SharedState mem_shared(&b, 1);
|
||||
mem_run.Init(&clock, &mem_shared, 0);
|
||||
{
|
||||
testing::MallocCounter mc(testing::MallocCounter::THIS_THREAD_ONLY);
|
||||
benchmark_mc = &mc;
|
||||
mem_run.Run(&mem_done);
|
||||
mem_done.WaitForNotification();
|
||||
benchmark_mc = NULL;
|
||||
mem_usage = mc.PeakHeapGrowth();
|
||||
/*
|
||||
double mem_usage = 0;
|
||||
if (get_memory_usage) {
|
||||
// Measure memory usage
|
||||
Notification mem_done;
|
||||
BenchmarkRun mem_run;
|
||||
BenchmarkRun::SharedState mem_shared(&b, 1);
|
||||
mem_run.Init(&clock, &mem_shared, 0);
|
||||
{
|
||||
testing::MallocCounter mc(testing::MallocCounter::THIS_THREAD_ONLY);
|
||||
benchmark_mc = &mc;
|
||||
mem_run.Run(&mem_done);
|
||||
mem_done.WaitForNotification();
|
||||
benchmark_mc = NULL;
|
||||
mem_usage = mc.PeakHeapGrowth();
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
*/
|
||||
running_benchmark = false;
|
||||
|
||||
for (internal::BenchmarkRunData& report : state.runs) {
|
||||
double seconds = (use_real_time ? report.real_accumulated_time :
|
||||
report.cpu_accumulated_time);
|
||||
double seconds = (use_real_time ? report.real_accumulated_time
|
||||
: report.cpu_accumulated_time);
|
||||
report.benchmark_name = b.name;
|
||||
report.report_label = state.label;
|
||||
report.bytes_per_second = state.stats.bytes_processed / seconds;
|
||||
@ -836,29 +826,28 @@ void Benchmark::RunInstance(const Instance& b, BenchmarkReporter* br) {
|
||||
// Run the specified benchmark, measure its peak memory usage, and
|
||||
// return the peak memory usage.
|
||||
double Benchmark::MeasurePeakHeapMemory(const Instance& b) {
|
||||
if (!get_memory_usage)
|
||||
return 0.0;
|
||||
if (!get_memory_usage) return 0.0;
|
||||
double bytes = 0.0;
|
||||
/* TODO(dominich)
|
||||
// Should we do multi-threaded runs?
|
||||
const int num_threads = 1;
|
||||
const int num_iters = 1;
|
||||
{
|
||||
// internal::MallocCounter mc(internal::MallocCounter::THIS_THREAD_ONLY);
|
||||
running_benchmark = true;
|
||||
timer_manager = new TimerManager(1, NULL);
|
||||
// benchmark_mc = &mc;
|
||||
timer_manager->StartTimer();
|
||||
/* TODO(dominich)
|
||||
// Should we do multi-threaded runs?
|
||||
const int num_threads = 1;
|
||||
const int num_iters = 1;
|
||||
{
|
||||
// internal::MallocCounter mc(internal::MallocCounter::THIS_THREAD_ONLY);
|
||||
running_benchmark = true;
|
||||
timer_manager = new TimerManager(1, NULL);
|
||||
// benchmark_mc = &mc;
|
||||
timer_manager->StartTimer();
|
||||
|
||||
b.Run(num_iters);
|
||||
b.Run(num_iters);
|
||||
|
||||
running_benchmark = false;
|
||||
delete timer_manager;
|
||||
timer_manager = NULL;
|
||||
// benchmark_mc = NULL;
|
||||
// bytes = mc.PeakHeapGrowth();
|
||||
}
|
||||
*/
|
||||
running_benchmark = false;
|
||||
delete timer_manager;
|
||||
timer_manager = NULL;
|
||||
// benchmark_mc = NULL;
|
||||
// bytes = mc.PeakHeapGrowth();
|
||||
}
|
||||
*/
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@ -876,14 +865,13 @@ State::State(FastClock* clock, SharedState* s, int t)
|
||||
start_pause_(0.0),
|
||||
pause_time_(0.0),
|
||||
total_iterations_(0),
|
||||
interval_micros_(
|
||||
static_cast<int64_t>(kNumMicrosPerSecond * FLAGS_benchmark_min_time /
|
||||
FLAGS_benchmark_repetitions)),
|
||||
interval_micros_(static_cast<int64_t>(kNumMicrosPerSecond *
|
||||
FLAGS_benchmark_min_time /
|
||||
FLAGS_benchmark_repetitions)),
|
||||
is_continuation_(false),
|
||||
stats_(new ThreadStats()) {
|
||||
CHECK(clock != nullptr);
|
||||
CHECK(s != nullptr);
|
||||
|
||||
}
|
||||
|
||||
bool State::KeepRunning() {
|
||||
@ -895,24 +883,27 @@ bool State::KeepRunning() {
|
||||
return true;
|
||||
}
|
||||
|
||||
switch(state_) {
|
||||
case STATE_INITIAL: return StartRunning();
|
||||
case STATE_STARTING: CHECK(false); return true;
|
||||
case STATE_RUNNING: return FinishInterval();
|
||||
case STATE_STOPPING: return MaybeStop();
|
||||
case STATE_STOPPED: CHECK(false); return true;
|
||||
switch (state_) {
|
||||
case STATE_INITIAL:
|
||||
return StartRunning();
|
||||
case STATE_STARTING:
|
||||
CHECK(false);
|
||||
return true;
|
||||
case STATE_RUNNING:
|
||||
return FinishInterval();
|
||||
case STATE_STOPPING:
|
||||
return MaybeStop();
|
||||
case STATE_STOPPED:
|
||||
CHECK(false);
|
||||
return true;
|
||||
}
|
||||
CHECK(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
void State::PauseTiming() {
|
||||
start_pause_ = walltime::Now();
|
||||
}
|
||||
void State::PauseTiming() { start_pause_ = walltime::Now(); }
|
||||
|
||||
void State::ResumeTiming() {
|
||||
pause_time_ += walltime::Now() - start_pause_;
|
||||
}
|
||||
void State::ResumeTiming() { pause_time_ += walltime::Now() - start_pause_; }
|
||||
|
||||
void State::SetBytesProcessed(int64_t bytes) {
|
||||
CHECK_EQ(STATE_STOPPED, state_);
|
||||
@ -944,10 +935,10 @@ int State::range_x() const {
|
||||
|
||||
int State::range_y() const {
|
||||
CHECK(shared_->instance->rangeYset);
|
||||
/* <<
|
||||
"Failed to get range_y as it was not set. Did you register your "
|
||||
"benchmark with a range parameter?";
|
||||
*/
|
||||
/* <<
|
||||
"Failed to get range_y as it was not set. Did you register your "
|
||||
"benchmark with a range parameter?";
|
||||
*/
|
||||
return shared_->instance->rangeY;
|
||||
}
|
||||
|
||||
@ -962,10 +953,10 @@ bool State::StartRunning() {
|
||||
++shared_->starting;
|
||||
last_thread = shared_->starting == shared_->threads;
|
||||
}
|
||||
|
||||
|
||||
if (last_thread) {
|
||||
clock_->InitType(
|
||||
use_real_time ? FastClock::REAL_TIME : FastClock::CPU_TIME);
|
||||
clock_->InitType(use_real_time ? FastClock::REAL_TIME
|
||||
: FastClock::CPU_TIME);
|
||||
{
|
||||
mutex_lock l(&starting_mutex);
|
||||
pthread_cond_broadcast(&starting_cv);
|
||||
@ -1022,7 +1013,6 @@ bool State::FinishInterval() {
|
||||
|
||||
const double accumulated_time = walltime::Now() - start_time_;
|
||||
const double total_overhead = overhead * iterations_;
|
||||
//const double total_overhead = 0.0;
|
||||
CHECK_LT(pause_time_, accumulated_time);
|
||||
CHECK_LT(pause_time_ + total_overhead, accumulated_time);
|
||||
data.real_accumulated_time =
|
||||
@ -1046,9 +1036,8 @@ bool State::FinishInterval() {
|
||||
is_continuation_ = keep_going;
|
||||
} else {
|
||||
// If this is a repetition, run another interval as a new data point.
|
||||
keep_going =
|
||||
shared_->runs.size() <
|
||||
static_cast<size_t>(FLAGS_benchmark_repetitions);
|
||||
keep_going = shared_->runs.size() <
|
||||
static_cast<size_t>(FLAGS_benchmark_repetitions);
|
||||
is_continuation_ = !keep_going;
|
||||
}
|
||||
|
||||
@ -1065,8 +1054,7 @@ bool State::FinishInterval() {
|
||||
}
|
||||
}
|
||||
|
||||
if (state_ == STATE_RUNNING)
|
||||
NewInterval();
|
||||
if (state_ == STATE_RUNNING) NewInterval();
|
||||
return keep_going;
|
||||
}
|
||||
|
||||
@ -1093,9 +1081,7 @@ void State::RunAsThread() {
|
||||
CHECK_EQ(0, pthread_create(&thread_, nullptr, &State::RunWrapper, this));
|
||||
}
|
||||
|
||||
void State::Wait() {
|
||||
CHECK_EQ(0, pthread_join(thread_, nullptr));
|
||||
}
|
||||
void State::Wait() { CHECK_EQ(0, pthread_join(thread_, nullptr)); }
|
||||
|
||||
// static
|
||||
void* State::RunWrapper(void* arg) {
|
||||
@ -1121,25 +1107,24 @@ void RunMatchingBenchmarks(const std::string& spec,
|
||||
for (const internal::Benchmark::Instance& benchmark : benchmarks) {
|
||||
// Add width for _stddev and threads:XX
|
||||
if (benchmark.threads > 1 && FLAGS_benchmark_repetitions > 1) {
|
||||
name_field_width = std::max<int>(name_field_width,
|
||||
benchmark.name.size() + 17);
|
||||
} else if (benchmark.threads> 1) {
|
||||
name_field_width = std::max<int>(name_field_width,
|
||||
benchmark.name.size() + 10);
|
||||
name_field_width =
|
||||
std::max<int>(name_field_width, benchmark.name.size() + 17);
|
||||
} else if (benchmark.threads > 1) {
|
||||
name_field_width =
|
||||
std::max<int>(name_field_width, benchmark.name.size() + 10);
|
||||
} else if (FLAGS_benchmark_repetitions > 1) {
|
||||
name_field_width = std::max<int>(name_field_width,
|
||||
benchmark.name.size() + 7);
|
||||
name_field_width =
|
||||
std::max<int>(name_field_width, benchmark.name.size() + 7);
|
||||
} else {
|
||||
name_field_width = std::max<int>(name_field_width,
|
||||
benchmark.name.size());
|
||||
name_field_width = std::max<int>(name_field_width, benchmark.name.size());
|
||||
}
|
||||
}
|
||||
|
||||
// Print header here
|
||||
BenchmarkContextData context;
|
||||
context.num_cpus = NumCPUs();
|
||||
context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
|
||||
// context.cpu_info = base::CompactCPUIDInfoString();
|
||||
context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
|
||||
// context.cpu_info = base::CompactCPUIDInfoString();
|
||||
context.cpu_scaling_enabled = CpuScalingEnabled();
|
||||
context.name_field_width = name_field_width;
|
||||
|
||||
@ -1155,7 +1140,7 @@ void FindMatchingBenchmarkNames(const std::string& spec,
|
||||
std::vector<internal::Benchmark::Instance> benchmarks;
|
||||
internal::Benchmark::FindBenchmarks(spec, &benchmarks);
|
||||
std::transform(benchmarks.begin(), benchmarks.end(), benchmark_names->begin(),
|
||||
[] (const internal::Benchmark::Instance& b) { return b.name; } );
|
||||
[](const internal::Benchmark::Instance& b) { return b.name; });
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
@ -1163,7 +1148,7 @@ void FindMatchingBenchmarkNames(const std::string& spec,
|
||||
void RunSpecifiedBenchmarks() {
|
||||
std::string spec = FLAGS_benchmark_filter;
|
||||
if (spec.empty() || spec == "all")
|
||||
spec = "."; // Regexp that matches all benchmarks
|
||||
spec = "."; // Regexp that matches all benchmarks
|
||||
internal::ConsoleReporter default_reporter;
|
||||
internal::RunMatchingBenchmarks(spec, &default_reporter);
|
||||
pthread_cond_destroy(&starting_cv);
|
||||
@ -1172,12 +1157,11 @@ void RunSpecifiedBenchmarks() {
|
||||
}
|
||||
|
||||
void Initialize(int* argc, const char** argv) {
|
||||
//AtomicOps_Internalx86CPUFeaturesInit();
|
||||
pthread_mutex_init(&benchmark_mutex, nullptr);
|
||||
pthread_mutex_init(&starting_mutex, nullptr);
|
||||
pthread_cond_init(&starting_cv, nullptr);
|
||||
walltime::Initialize();
|
||||
internal::ParseCommandLineFlags(argc, argv);
|
||||
internal::ParseCommandLineFlags(argc, argv);
|
||||
internal::Benchmark::MeasureOverhead();
|
||||
}
|
||||
|
||||
|
@ -17,25 +17,40 @@ typedef const char* PlatformColorCode;
|
||||
PlatformColorCode GetPlatformColorCode(LogColor color) {
|
||||
#ifdef OS_WINDOWS
|
||||
switch (color) {
|
||||
case COLOR_RED: return FOREGROUND_RED;
|
||||
case COLOR_GREEN: return FOREGROUND_GREEN;
|
||||
case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
|
||||
case COLOR_BLUE: return FOREGROUND_BLUE;
|
||||
case COLOR_MAGENTA: return FOREGROUND_BLUE | FOREGROUND_RED;
|
||||
case COLOR_CYAN: return FOREGROUND_BLUE | FOREGROUND_GREEN;
|
||||
case COLOR_WHITE: // fall through to default
|
||||
default: return 0;
|
||||
case COLOR_RED:
|
||||
return FOREGROUND_RED;
|
||||
case COLOR_GREEN:
|
||||
return FOREGROUND_GREEN;
|
||||
case COLOR_YELLOW:
|
||||
return FOREGROUND_RED | FOREGROUND_GREEN;
|
||||
case COLOR_BLUE:
|
||||
return FOREGROUND_BLUE;
|
||||
case COLOR_MAGENTA:
|
||||
return FOREGROUND_BLUE | FOREGROUND_RED;
|
||||
case COLOR_CYAN:
|
||||
return FOREGROUND_BLUE | FOREGROUND_GREEN;
|
||||
case COLOR_WHITE: // fall through to default
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
switch (color) {
|
||||
case COLOR_RED: return "1";
|
||||
case COLOR_GREEN: return "2";
|
||||
case COLOR_YELLOW: return "3";
|
||||
case COLOR_BLUE: return "4";
|
||||
case COLOR_MAGENTA: return "5";
|
||||
case COLOR_CYAN: return "6";
|
||||
case COLOR_WHITE: return "7";
|
||||
default: return NULL;
|
||||
case COLOR_RED:
|
||||
return "1";
|
||||
case COLOR_GREEN:
|
||||
return "2";
|
||||
case COLOR_YELLOW:
|
||||
return "3";
|
||||
case COLOR_BLUE:
|
||||
return "4";
|
||||
case COLOR_MAGENTA:
|
||||
return "5";
|
||||
case COLOR_CYAN:
|
||||
return "6";
|
||||
case COLOR_WHITE:
|
||||
return "7";
|
||||
default:
|
||||
return NULL;
|
||||
};
|
||||
#endif
|
||||
}
|
||||
@ -72,8 +87,7 @@ void ColorPrintf(LogColor color, const char* fmt, ...) {
|
||||
SetConsoleTextAttribute(stdout_handle, old_color_attrs);
|
||||
#else
|
||||
const char* color_code = GetPlatformColorCode(color);
|
||||
if (color_code)
|
||||
fprintf(stdout, "\033[0;3%sm", color_code);
|
||||
if (color_code) fprintf(stdout, "\033[0;3%sm", color_code);
|
||||
vprintf(fmt, args);
|
||||
printf("\033[m"); // Resets the terminal to default.
|
||||
#endif
|
||||
|
@ -29,7 +29,7 @@ bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
|
||||
// The parsed value overflows as a long. (strtol() returns
|
||||
// LONG_MAX or LONG_MIN when the input overflows.)
|
||||
result != long_value
|
||||
// The parsed value overflows as an Int32.
|
||||
// The parsed value overflows as an Int32.
|
||||
) {
|
||||
std::cerr << src_text << " is expected to be a 32-bit integer, "
|
||||
<< "but actually has value \"" << str << "\", "
|
||||
@ -60,7 +60,6 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
inline const char* GetEnv(const char* name) {
|
||||
#if GTEST_OS_WINDOWS_MOBILE
|
||||
// We are on Windows CE, which has no environment variables.
|
||||
@ -95,8 +94,7 @@ static std::string FlagToEnvVar(const char* flag) {
|
||||
bool BoolFromEnv(const char* flag, bool default_value) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const string_value = GetEnv(env_var.c_str());
|
||||
return string_value == NULL ?
|
||||
default_value : strcmp(string_value, "0") != 0;
|
||||
return string_value == NULL ? default_value : strcmp(string_value, "0") != 0;
|
||||
}
|
||||
|
||||
// Reads and returns a 32-bit integer stored in the environment
|
||||
@ -111,8 +109,8 @@ int32_t Int32FromEnv(const char* flag, int32_t default_value) {
|
||||
}
|
||||
|
||||
int32_t result = default_value;
|
||||
if (!ParseInt32(std::string("Environment variable ") + env_var,
|
||||
string_value, &result)) {
|
||||
if (!ParseInt32(std::string("Environment variable ") + env_var, string_value,
|
||||
&result)) {
|
||||
std::cout << "The default value " << default_value << " is used.\n";
|
||||
return default_value;
|
||||
}
|
||||
@ -133,13 +131,12 @@ const char* StringFromEnv(const char* flag, const char* default_value) {
|
||||
// part can be omitted.
|
||||
//
|
||||
// Returns the value of the flag, or NULL if the parsing failed.
|
||||
const char* ParseFlagValue(const char* str,
|
||||
const char* flag,
|
||||
const char* ParseFlagValue(const char* str, const char* flag,
|
||||
bool def_optional) {
|
||||
// str and flag must not be NULL.
|
||||
if (str == NULL || flag == NULL) return NULL;
|
||||
|
||||
// The flag must start with "--".
|
||||
// The flag must start with "--".
|
||||
const std::string flag_str = std::string("--") + std::string(flag);
|
||||
const size_t flag_len = flag_str.length();
|
||||
if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
|
||||
@ -148,8 +145,7 @@ const char* ParseFlagValue(const char* str,
|
||||
const char* flag_end = str + flag_len;
|
||||
|
||||
// When def_optional is true, it's OK to not have a "=value" part.
|
||||
if (def_optional && (flag_end[0] == '\0'))
|
||||
return flag_end;
|
||||
if (def_optional && (flag_end[0] == '\0')) return flag_end;
|
||||
|
||||
// If def_optional is true and there are more characters after the
|
||||
// flag name, or if def_optional is false, there must be a '=' after
|
||||
@ -180,8 +176,8 @@ bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
|
||||
if (value_str == NULL) return false;
|
||||
|
||||
// Sets *value to the value of the flag.
|
||||
return ParseInt32(std::string("The value of flag --") + flag,
|
||||
value_str, value);
|
||||
return ParseInt32(std::string("The value of flag --") + flag, value_str,
|
||||
value);
|
||||
}
|
||||
|
||||
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
|
||||
@ -192,8 +188,8 @@ bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
|
||||
if (value_str == NULL) return false;
|
||||
|
||||
// Sets *value to the value of the flag.
|
||||
return ParseDouble(std::string("The value of flag --") + flag,
|
||||
value_str, value);
|
||||
return ParseDouble(std::string("The value of flag --") + flag, value_str,
|
||||
value);
|
||||
}
|
||||
|
||||
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
|
||||
|
@ -21,7 +21,7 @@
|
||||
#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
|
||||
#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
|
||||
#define DEFINE_string(name, default_val, doc) \
|
||||
std::string FLAG(name) = (default_val)
|
||||
std::string FLAG(name) = (default_val)
|
||||
|
||||
namespace benchmark {
|
||||
// Parses 'str' for a 32-bit signed integer. If successful, writes the result
|
||||
|
122
src/cycleclock.h
122
src/cycleclock.h
@ -24,7 +24,7 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(OS_MACOSX)
|
||||
# include <mach/mach_time.h>
|
||||
#include <mach/mach_time.h>
|
||||
#endif
|
||||
// For MSVC, we want to use '_asm rdtsc' when possible (since it works
|
||||
// with even ancient MSVC compilers), and when not possible the
|
||||
@ -48,85 +48,85 @@ namespace benchmark {
|
||||
// with modifications by m3b. See also
|
||||
// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
|
||||
namespace cycleclock {
|
||||
// This should return the number of cycles since power-on. Thread-safe.
|
||||
inline ATTRIBUTE_ALWAYS_INLINE int64_t Now() {
|
||||
// This should return the number of cycles since power-on. Thread-safe.
|
||||
inline ATTRIBUTE_ALWAYS_INLINE int64_t Now() {
|
||||
#if defined(OS_MACOSX)
|
||||
// this goes at the top because we need ALL Macs, regardless of
|
||||
// architecture, to return the number of "mach time units" that
|
||||
// have passed since startup. See sysinfo.cc where
|
||||
// InitializeSystemInfo() sets the supposed cpu clock frequency of
|
||||
// macs to the number of mach time units per second, not actual
|
||||
// CPU clock frequency (which can change in the face of CPU
|
||||
// frequency scaling). Also note that when the Mac sleeps, this
|
||||
// counter pauses; it does not continue counting, nor does it
|
||||
// reset to zero.
|
||||
return mach_absolute_time();
|
||||
// this goes at the top because we need ALL Macs, regardless of
|
||||
// architecture, to return the number of "mach time units" that
|
||||
// have passed since startup. See sysinfo.cc where
|
||||
// InitializeSystemInfo() sets the supposed cpu clock frequency of
|
||||
// macs to the number of mach time units per second, not actual
|
||||
// CPU clock frequency (which can change in the face of CPU
|
||||
// frequency scaling). Also note that when the Mac sleeps, this
|
||||
// counter pauses; it does not continue counting, nor does it
|
||||
// reset to zero.
|
||||
return mach_absolute_time();
|
||||
#elif defined(__i386__)
|
||||
int64_t ret;
|
||||
__asm__ volatile ("rdtsc" : "=A" (ret) );
|
||||
return ret;
|
||||
int64_t ret;
|
||||
__asm__ volatile("rdtsc" : "=A"(ret));
|
||||
return ret;
|
||||
#elif defined(__x86_64__) || defined(__amd64__)
|
||||
uint64_t low, high;
|
||||
__asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
|
||||
return (high << 32) | low;
|
||||
uint64_t low, high;
|
||||
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
|
||||
return (high << 32) | low;
|
||||
#elif defined(__powerpc__) || defined(__ppc__)
|
||||
// This returns a time-base, which is not always precisely a cycle-count.
|
||||
int64_t tbl, tbu0, tbu1;
|
||||
asm("mftbu %0" : "=r" (tbu0));
|
||||
asm("mftb %0" : "=r" (tbl));
|
||||
asm("mftbu %0" : "=r" (tbu1));
|
||||
tbl &= -static_cast<int64>(tbu0 == tbu1);
|
||||
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
|
||||
return (tbu1 << 32) | tbl;
|
||||
// This returns a time-base, which is not always precisely a cycle-count.
|
||||
int64_t tbl, tbu0, tbu1;
|
||||
asm("mftbu %0" : "=r"(tbu0));
|
||||
asm("mftb %0" : "=r"(tbl));
|
||||
asm("mftbu %0" : "=r"(tbu1));
|
||||
tbl &= -static_cast<int64>(tbu0 == tbu1);
|
||||
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
|
||||
return (tbu1 << 32) | tbl;
|
||||
#elif defined(__sparc__)
|
||||
int64_t tick;
|
||||
asm(".byte 0x83, 0x41, 0x00, 0x00");
|
||||
asm("mov %%g1, %0" : "=r" (tick));
|
||||
return tick;
|
||||
int64_t tick;
|
||||
asm(".byte 0x83, 0x41, 0x00, 0x00");
|
||||
asm("mov %%g1, %0" : "=r"(tick));
|
||||
return tick;
|
||||
#elif defined(__ia64__)
|
||||
int64_t itc;
|
||||
asm("mov %0 = ar.itc" : "=r" (itc));
|
||||
return itc;
|
||||
int64_t itc;
|
||||
asm("mov %0 = ar.itc" : "=r"(itc));
|
||||
return itc;
|
||||
#elif defined(COMPILER_MSVC) && defined(_M_IX86)
|
||||
// Older MSVC compilers (like 7.x) don't seem to support the
|
||||
// __rdtsc intrinsic properly, so I prefer to use _asm instead
|
||||
// when I know it will work. Otherwise, I'll use __rdtsc and hope
|
||||
// the code is being compiled with a non-ancient compiler.
|
||||
_asm rdtsc
|
||||
// Older MSVC compilers (like 7.x) don't seem to support the
|
||||
// __rdtsc intrinsic properly, so I prefer to use _asm instead
|
||||
// when I know it will work. Otherwise, I'll use __rdtsc and hope
|
||||
// the code is being compiled with a non-ancient compiler.
|
||||
_asm rdtsc
|
||||
#elif defined(COMPILER_MSVC)
|
||||
return __rdtsc();
|
||||
return __rdtsc();
|
||||
#elif defined(ARMV3)
|
||||
#if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount
|
||||
uint32_t pmccntr;
|
||||
uint32_t pmuseren;
|
||||
uint32_t pmcntenset;
|
||||
// Read the user mode perf monitor counter access permissions.
|
||||
asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
|
||||
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
|
||||
asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
|
||||
if (pmcntenset & 0x80000000ul) { // Is it counting?
|
||||
asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
|
||||
// The counter is set up to count every 64th cycle
|
||||
return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6
|
||||
}
|
||||
uint32_t pmccntr;
|
||||
uint32_t pmuseren;
|
||||
uint32_t pmcntenset;
|
||||
// Read the user mode perf monitor counter access permissions.
|
||||
asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
|
||||
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
|
||||
asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
|
||||
if (pmcntenset & 0x80000000ul) { // Is it counting?
|
||||
asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
|
||||
// The counter is set up to count every 64th cycle
|
||||
return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6
|
||||
}
|
||||
}
|
||||
#endif
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
#elif defined(__mips__)
|
||||
// mips apparently only allows rdtsc for superusers, so we fall
|
||||
// back to gettimeofday. It's possible clock_gettime would be better.
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
// mips apparently only allows rdtsc for superusers, so we fall
|
||||
// back to gettimeofday. It's possible clock_gettime would be better.
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
#else
|
||||
// The soft failover to a generic implementation is automatic only for ARM.
|
||||
// For other platforms the developer is expected to make an attempt to create
|
||||
// a fast implementation and use generic version if nothing better is available.
|
||||
#error You need to define CycleTimer for your OS and CPU
|
||||
#endif
|
||||
}
|
||||
}
|
||||
} // end namespace cycleclock
|
||||
} // end namespace benchmark
|
||||
|
||||
|
@ -10,9 +10,7 @@ class mutex_lock {
|
||||
pthread_mutex_lock(mu_);
|
||||
}
|
||||
|
||||
~mutex_lock() {
|
||||
pthread_mutex_unlock(mu_);
|
||||
}
|
||||
~mutex_lock() { pthread_mutex_unlock(mu_); }
|
||||
|
||||
private:
|
||||
pthread_mutex_t* mu_;
|
||||
|
@ -6,13 +6,11 @@
|
||||
namespace benchmark {
|
||||
#ifdef OS_WINDOWS
|
||||
// Window's _sleep takes milliseconds argument.
|
||||
void SleepForMilliseconds(int milliseconds) {
|
||||
_sleep(milliseconds);
|
||||
}
|
||||
void SleepForMilliseconds(int milliseconds) { _sleep(milliseconds); }
|
||||
void SleepForSeconds(double seconds) {
|
||||
SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
|
||||
}
|
||||
#else // OS_WINDOWS
|
||||
#else // OS_WINDOWS
|
||||
void SleepForMicroseconds(int64_t microseconds) {
|
||||
struct timespec sleep_time;
|
||||
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
|
||||
|
205
src/stat.h
205
src/stat.h
@ -12,27 +12,24 @@ class Stat1;
|
||||
template <typename VType, typename NumType>
|
||||
class Stat1MinMax;
|
||||
|
||||
typedef Stat1<float, float> Stat1_f;
|
||||
typedef Stat1<float, float> Stat1_f;
|
||||
typedef Stat1<double, double> Stat1_d;
|
||||
typedef Stat1MinMax<float, float> Stat1MinMax_f;
|
||||
typedef Stat1MinMax<float, float> Stat1MinMax_f;
|
||||
typedef Stat1MinMax<double, double> Stat1MinMax_d;
|
||||
|
||||
template <typename VType> class Vector2;
|
||||
template <typename VType> class Vector3;
|
||||
template <typename VType> class Vector4;
|
||||
template <typename VType>
|
||||
class Vector2;
|
||||
template <typename VType>
|
||||
class Vector3;
|
||||
template <typename VType>
|
||||
class Vector4;
|
||||
|
||||
template <typename VType, typename NumType>
|
||||
class Stat1 {
|
||||
public:
|
||||
typedef Stat1<VType, NumType> Self;
|
||||
|
||||
Stat1() {
|
||||
Clear();
|
||||
}
|
||||
void Clear() {
|
||||
numsamples_ = NumType();
|
||||
sum_squares_ = sum_ = VType();
|
||||
}
|
||||
Stat1() { Clear(); }
|
||||
// Create a sample of value dat and weight 1
|
||||
explicit Stat1(const VType &dat) {
|
||||
sum_ = dat;
|
||||
@ -43,7 +40,7 @@ class Stat1 {
|
||||
// and end(excluded)
|
||||
explicit Stat1(const VType *begin, const VType *end) {
|
||||
Clear();
|
||||
for ( const VType *item = begin; item < end; ++item ) {
|
||||
for (const VType *item = begin; item < end; ++item) {
|
||||
(*this) += Stat1(*item);
|
||||
}
|
||||
}
|
||||
@ -60,58 +57,60 @@ class Stat1 {
|
||||
numsamples_ = stat.numsamples_;
|
||||
}
|
||||
|
||||
inline Self &operator =(const Self &stat) {
|
||||
void Clear() {
|
||||
numsamples_ = NumType();
|
||||
sum_squares_ = sum_ = VType();
|
||||
}
|
||||
|
||||
Self& operator=(const Self &stat) {
|
||||
sum_ = stat.sum_;
|
||||
sum_squares_ = stat.sum_squares_;
|
||||
numsamples_ = stat.numsamples_;
|
||||
return (*this);
|
||||
}
|
||||
// Merge statistics from two sample sets.
|
||||
inline Self &operator +=(const Self &stat) {
|
||||
Self& operator+=(const Self &stat) {
|
||||
sum_ += stat.sum_;
|
||||
sum_squares_+= stat.sum_squares_;
|
||||
sum_squares_ += stat.sum_squares_;
|
||||
numsamples_ += stat.numsamples_;
|
||||
return (*this);
|
||||
}
|
||||
// The operation opposite to +=
|
||||
inline Self &operator -=(const Self &stat) {
|
||||
Self& operator-=(const Self &stat) {
|
||||
sum_ -= stat.sum_;
|
||||
sum_squares_-= stat.sum_squares_;
|
||||
sum_squares_ -= stat.sum_squares_;
|
||||
numsamples_ -= stat.numsamples_;
|
||||
return (*this);
|
||||
}
|
||||
// Multiply the weight of the set of samples by a factor k
|
||||
inline Self &operator *=(const VType &k) {
|
||||
Self& operator*=(const VType &k) {
|
||||
sum_ *= k;
|
||||
sum_squares_*= k;
|
||||
sum_squares_ *= k;
|
||||
numsamples_ *= k;
|
||||
return (*this);
|
||||
}
|
||||
|
||||
// Merge statistics from two sample sets.
|
||||
inline Self operator + (const Self &stat) const {
|
||||
return Self(*this) += stat;
|
||||
}
|
||||
Self operator+(const Self& stat) const { return Self(*this) += stat; }
|
||||
|
||||
// The operation opposite to +
|
||||
inline Self operator - (const Self &stat) const {
|
||||
return Self(*this) -= stat;
|
||||
}
|
||||
Self operator-(const Self& stat) const { return Self(*this) -= stat; }
|
||||
|
||||
// Multiply the weight of the set of samples by a factor k
|
||||
inline Self operator * (const VType &k) const {
|
||||
return Self(*this) *= k;
|
||||
}
|
||||
Self operator*(const VType& k) const { return Self(*this) *= k; }
|
||||
|
||||
// Return the total weight of this sample set
|
||||
NumType NumSamples() const {
|
||||
return numsamples_;
|
||||
}
|
||||
NumType numSamples() const { return numsamples_; }
|
||||
|
||||
// Return the sum of this sample set
|
||||
VType Sum() const {
|
||||
return sum_;
|
||||
}
|
||||
VType sum() const { return sum_; }
|
||||
|
||||
// Return the mean of this sample set
|
||||
VType Mean() const {
|
||||
if (numsamples_ == 0) return VType();
|
||||
return sum_ * (1.0 / numsamples_);
|
||||
}
|
||||
|
||||
// Return the mean of this sample set and compute the standard deviation at
|
||||
// the same time.
|
||||
VType Mean(VType *stddev) const {
|
||||
@ -119,10 +118,11 @@ class Stat1 {
|
||||
VType mean = sum_ * (1.0 / numsamples_);
|
||||
if (stddev) {
|
||||
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
|
||||
*stddev = Sqrt(avg_squares - Sqr(mean));
|
||||
*stddev = Sqrt(avg_squares - Sqr(mean));
|
||||
}
|
||||
return mean;
|
||||
}
|
||||
|
||||
// Return the standard deviation of the sample set
|
||||
VType StdDev() const {
|
||||
if (numsamples_ == 0) return VType();
|
||||
@ -130,10 +130,11 @@ class Stat1 {
|
||||
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
|
||||
return Sqrt(avg_squares - Sqr(mean));
|
||||
}
|
||||
|
||||
private:
|
||||
// Let i be the index of the samples provided (using +=)
|
||||
// and weight[i],value[i] be the data of sample #i
|
||||
// then the variables have the following meaning:
|
||||
// Let i be the index of the samples provided (using +=)
|
||||
// and weight[i],value[i] be the data of sample #i
|
||||
// then the variables have the following meaning:
|
||||
NumType numsamples_; // sum of weight[i];
|
||||
VType sum_; // sum of weight[i]*value[i];
|
||||
VType sum_squares_; // sum of weight[i]*value[i]^2;
|
||||
@ -141,17 +142,18 @@ class Stat1 {
|
||||
// Template function used to square a number.
|
||||
// For a vector we square all components
|
||||
template <typename SType>
|
||||
static inline SType Sqr(const SType &dat) {
|
||||
return dat * dat;
|
||||
}
|
||||
static inline SType Sqr(const SType &dat) { return dat * dat; }
|
||||
|
||||
template <typename SType>
|
||||
static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
|
||||
return dat.MulComponents(dat);
|
||||
}
|
||||
|
||||
template <typename SType>
|
||||
static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
|
||||
return dat.MulComponents(dat);
|
||||
}
|
||||
|
||||
template <typename SType>
|
||||
static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
|
||||
return dat.MulComponents(dat);
|
||||
@ -162,20 +164,22 @@ class Stat1 {
|
||||
template <typename SType>
|
||||
static inline SType Sqrt(const SType &dat) {
|
||||
// Avoid NaN due to imprecision in the calculations
|
||||
if ( dat < 0 )
|
||||
return 0;
|
||||
if (dat < 0) return 0;
|
||||
return sqrt(dat);
|
||||
}
|
||||
|
||||
template <typename SType>
|
||||
static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
|
||||
// Avoid NaN due to imprecision in the calculations
|
||||
return Max(dat, Vector2<SType>()).Sqrt();
|
||||
}
|
||||
|
||||
template <typename SType>
|
||||
static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
|
||||
// Avoid NaN due to imprecision in the calculations
|
||||
return Max(dat, Vector3<SType>()).Sqrt();
|
||||
}
|
||||
|
||||
template <typename SType>
|
||||
static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
|
||||
// Avoid NaN due to imprecision in the calculations
|
||||
@ -185,15 +189,12 @@ class Stat1 {
|
||||
|
||||
// Useful printing function
|
||||
template <typename VType, typename NumType>
|
||||
inline std::ostream& operator<<(std::ostream& out,
|
||||
const Stat1<VType, NumType>& s) {
|
||||
out << "{ avg = " << s.Mean()
|
||||
<< " std = " << s.StdDev()
|
||||
std::ostream& operator<<(std::ostream& out, const Stat1<VType, NumType>& s) {
|
||||
out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
|
||||
<< " nsamples = " << s.NumSamples() << "}";
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
// Stat1MinMax: same as Stat1, but it also
|
||||
// keeps the Min and Max values; the "-"
|
||||
// operator is disabled because it cannot be implemented
|
||||
@ -203,9 +204,32 @@ class Stat1MinMax : public Stat1<VType, NumType> {
|
||||
public:
|
||||
typedef Stat1MinMax<VType, NumType> Self;
|
||||
|
||||
Stat1MinMax() {
|
||||
Clear();
|
||||
Stat1MinMax() { Clear(); }
|
||||
// Create a sample of value dat and weight 1
|
||||
explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
|
||||
max_ = dat;
|
||||
min_ = dat;
|
||||
}
|
||||
// Create statistics for all the samples between begin (included)
|
||||
// and end(excluded)
|
||||
explicit Stat1MinMax(const VType *begin, const VType *end) {
|
||||
Clear();
|
||||
for (const VType* item = begin; item < end; ++item) {
|
||||
(*this) += Stat1MinMax(*item);
|
||||
}
|
||||
}
|
||||
// Create a sample of value dat and weight w
|
||||
Stat1MinMax(const VType &dat, const NumType &w)
|
||||
: Stat1<VType, NumType>(dat, w) {
|
||||
max_ = dat;
|
||||
min_ = dat;
|
||||
}
|
||||
// Copy operator
|
||||
Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
|
||||
max_ = stat.max_;
|
||||
min_ = stat.min_;
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
Stat1<VType, NumType>::Clear();
|
||||
if (std::numeric_limits<VType>::has_infinity) {
|
||||
@ -216,90 +240,57 @@ class Stat1MinMax : public Stat1<VType, NumType> {
|
||||
max_ = std::numeric_limits<VType>::min();
|
||||
}
|
||||
}
|
||||
// Create a sample of value dat and weight 1
|
||||
explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
|
||||
max_ = dat;
|
||||
min_ = dat;
|
||||
}
|
||||
// Create statistics for all the samples between begin (included)
|
||||
// and end(excluded)
|
||||
explicit Stat1MinMax(const VType *begin, const VType *end) {
|
||||
Clear();
|
||||
for ( const VType *item = begin; item < end; ++item ) {
|
||||
(*this) += Stat1MinMax(*item);
|
||||
}
|
||||
}
|
||||
// Create a sample of value dat and weight w
|
||||
Stat1MinMax(const VType &dat, const NumType &w)
|
||||
: Stat1<VType, NumType>(dat, w) {
|
||||
max_ = dat;
|
||||
min_ = dat;
|
||||
}
|
||||
// Copy operator
|
||||
Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
|
||||
max_ = stat.max_;
|
||||
min_ = stat.min_;
|
||||
}
|
||||
inline Self &operator =(const Self &stat) {
|
||||
|
||||
Self& operator=(const Self& stat) {
|
||||
this->Stat1<VType, NumType>::operator=(stat);
|
||||
max_ = stat.max_;
|
||||
min_ = stat.min_;
|
||||
return (*this);
|
||||
}
|
||||
// Merge statistics from two sample sets.
|
||||
inline Self &operator +=(const Self &stat) {
|
||||
Self& operator+=(const Self& stat) {
|
||||
this->Stat1<VType, NumType>::operator+=(stat);
|
||||
if (stat.max_ > max_) max_ = stat.max_;
|
||||
if (stat.min_ < min_) min_ = stat.min_;
|
||||
return (*this);
|
||||
}
|
||||
// Multiply the weight of the set of samples by a factor k
|
||||
inline Self &operator *=(const VType &stat) {
|
||||
Self& operator*=(const VType& stat) {
|
||||
this->Stat1<VType, NumType>::operator*=(stat);
|
||||
return (*this);
|
||||
}
|
||||
// Merge statistics from two sample sets.
|
||||
inline Self operator + (const Self &stat) const {
|
||||
return Self(*this) += stat;
|
||||
}
|
||||
Self operator+(const Self& stat) const { return Self(*this) += stat; }
|
||||
// Multiply the weight of the set of samples by a factor k
|
||||
inline Self operator * (const VType &k) const {
|
||||
return Self(*this) *= k;
|
||||
}
|
||||
Self operator*(const VType& k) const { return Self(*this) *= k; }
|
||||
|
||||
// Return the maximal value in this sample set
|
||||
VType max() const { return max_; }
|
||||
// Return the minimal value in this sample set
|
||||
VType min() const { return min_; }
|
||||
|
||||
private:
|
||||
// The - operation makes no sense with Min/Max
|
||||
// unless we keep the full list of values (but we don't)
|
||||
// make it private, and let it undefined so nobody can call it
|
||||
Self &operator -=(const Self &stat); // senseless. let it undefined.
|
||||
Self &operator-=(const Self& stat); // senseless. let it undefined.
|
||||
|
||||
// The operation opposite to -
|
||||
Self operator - (const Self &stat) const; // senseless. let it undefined.
|
||||
Self operator-(const Self& stat) const; // senseless. let it undefined.
|
||||
|
||||
public:
|
||||
// Return the maximal value in this sample set
|
||||
VType Max() const {
|
||||
return max_;
|
||||
}
|
||||
// Return the minimal value in this sample set
|
||||
VType Min() const {
|
||||
return min_;
|
||||
}
|
||||
private:
|
||||
// Let i be the index of the samples provided (using +=)
|
||||
// and weight[i],value[i] be the data of sample #i
|
||||
// then the variables have the following meaning:
|
||||
VType max_; // max of value[i]
|
||||
VType min_; // min of value[i]
|
||||
// Let i be the index of the samples provided (using +=)
|
||||
// and weight[i],value[i] be the data of sample #i
|
||||
// then the variables have the following meaning:
|
||||
VType max_; // max of value[i]
|
||||
VType min_; // min of value[i]
|
||||
};
|
||||
|
||||
// Useful printing function
|
||||
template <typename VType, typename NumType>
|
||||
inline std::ostream& operator <<(std::ostream& out,
|
||||
const Stat1MinMax<VType, NumType>& s) {
|
||||
out << "{ avg = " << s.Mean()
|
||||
<< " std = " << s.StdDev()
|
||||
<< " nsamples = " << s.NumSamples()
|
||||
<< " min = " << s.Min()
|
||||
std::ostream& operator<<(std::ostream& out,
|
||||
const Stat1MinMax<VType, NumType>& s) {
|
||||
out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
|
||||
<< " nsamples = " << s.NumSamples() << " min = " << s.Min()
|
||||
<< " max = " << s.Max() << "}";
|
||||
return out;
|
||||
}
|
||||
|
109
src/sysinfo.cc
109
src/sysinfo.cc
@ -39,7 +39,7 @@ int64_t EstimateCyclesPerSecond(const int estimate_time_ms) {
|
||||
|
||||
// Helper function for reading an int from a file. Returns true if successful
|
||||
// and the memory location pointed to by value is set to the value read.
|
||||
bool ReadIntFromFile(const char *file, int *value) {
|
||||
bool ReadIntFromFile(const char* file, int* value) {
|
||||
bool ret = false;
|
||||
int fd = open(file, O_RDONLY);
|
||||
if (fd != -1) {
|
||||
@ -76,10 +76,10 @@ void InitializeSystemInfo() {
|
||||
// well.
|
||||
if (!saw_mhz &&
|
||||
ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
|
||||
// The value is in kHz (as the file name suggests). For example, on a
|
||||
// 2GHz warpstation, the file contains the value "2000000".
|
||||
cpuinfo_cycles_per_second = freq * 1000.0;
|
||||
saw_mhz = true;
|
||||
// The value is in kHz (as the file name suggests). For example, on a
|
||||
// 2GHz warpstation, the file contains the value "2000000".
|
||||
cpuinfo_cycles_per_second = freq * 1000.0;
|
||||
saw_mhz = true;
|
||||
}
|
||||
|
||||
// If CPU scaling is in effect, we want to use the *maximum* frequency,
|
||||
@ -101,7 +101,7 @@ void InitializeSystemInfo() {
|
||||
if (!saw_mhz) {
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||
}
|
||||
return; // TODO: use generic tester instead?
|
||||
return; // TODO: use generic tester instead?
|
||||
}
|
||||
|
||||
double bogo_clock = 1.0;
|
||||
@ -110,48 +110,47 @@ void InitializeSystemInfo() {
|
||||
int num_cpus = 0;
|
||||
line[0] = line[1] = '\0';
|
||||
int chars_read = 0;
|
||||
do { // we'll exit when the last read didn't read anything
|
||||
do { // we'll exit when the last read didn't read anything
|
||||
// Move the next line to the beginning of the buffer
|
||||
const int oldlinelen = strlen(line);
|
||||
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
|
||||
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
|
||||
line[0] = '\0';
|
||||
else // still other lines left to save
|
||||
memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
|
||||
else // still other lines left to save
|
||||
memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
|
||||
// Terminate the new line, reading more if we can't find the newline
|
||||
char* newline = strchr(line, '\n');
|
||||
if (newline == NULL) {
|
||||
const int linelen = strlen(line);
|
||||
const int bytes_to_read = sizeof(line)-1 - linelen;
|
||||
const int bytes_to_read = sizeof(line) - 1 - linelen;
|
||||
CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
|
||||
chars_read = read(fd, line + linelen, bytes_to_read);
|
||||
line[linelen + chars_read] = '\0';
|
||||
newline = strchr(line, '\n');
|
||||
}
|
||||
if (newline != NULL)
|
||||
*newline = '\0';
|
||||
if (newline != NULL) *newline = '\0';
|
||||
|
||||
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
|
||||
// accept postive values. Some environments (virtual machines) report zero,
|
||||
// which would cause infinite looping in WallTime_Init.
|
||||
if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
|
||||
if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) {
|
||||
const char* freqstr = strchr(line, ':');
|
||||
if (freqstr) {
|
||||
cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
|
||||
cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
|
||||
if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
|
||||
saw_mhz = true;
|
||||
}
|
||||
} else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
|
||||
} else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) {
|
||||
const char* freqstr = strchr(line, ':');
|
||||
if (freqstr) {
|
||||
bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
|
||||
bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
|
||||
if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
|
||||
saw_bogo = true;
|
||||
}
|
||||
} else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
|
||||
} else if (strncasecmp(line, "processor", sizeof("processor") - 1) == 0) {
|
||||
num_cpus++; // count up every time we see an "processor :" entry
|
||||
const char* freqstr = strchr(line, ':');
|
||||
if (freqstr) {
|
||||
const int cpu_id = strtol(freqstr+1, &err, 10);
|
||||
const int cpu_id = strtol(freqstr + 1, &err, 10);
|
||||
if (freqstr[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
|
||||
max_cpu_id = cpu_id;
|
||||
}
|
||||
@ -181,17 +180,17 @@ void InitializeSystemInfo() {
|
||||
}
|
||||
|
||||
#elif defined OS_FREEBSD
|
||||
// For this sysctl to work, the machine must be configured without
|
||||
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
|
||||
// and later. Before that, it's a 32-bit quantity (and gives the
|
||||
// wrong answer on machines faster than 2^32 Hz). See
|
||||
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
|
||||
// But also compare FreeBSD 7.0:
|
||||
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
|
||||
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
|
||||
// To FreeBSD 6.3 (it's the same in 6-STABLE):
|
||||
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
|
||||
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
|
||||
// For this sysctl to work, the machine must be configured without
|
||||
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
|
||||
// and later. Before that, it's a 32-bit quantity (and gives the
|
||||
// wrong answer on machines faster than 2^32 Hz). See
|
||||
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
|
||||
// But also compare FreeBSD 7.0:
|
||||
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
|
||||
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
|
||||
// To FreeBSD 6.3 (it's the same in 6-STABLE):
|
||||
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
|
||||
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
|
||||
#if __FreeBSD__ >= 7
|
||||
uint64_t hz = 0;
|
||||
#else
|
||||
@ -199,31 +198,31 @@ void InitializeSystemInfo() {
|
||||
#endif
|
||||
size_t sz = sizeof(hz);
|
||||
const char *sysctl_path = "machdep.tsc_freq";
|
||||
if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
|
||||
if (sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0) {
|
||||
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
|
||||
sysctl_path, strerror(errno));
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||
} else {
|
||||
cpuinfo_cycles_per_second = hz;
|
||||
}
|
||||
// TODO: also figure out cpuinfo_num_cpus
|
||||
// TODO: also figure out cpuinfo_num_cpus
|
||||
|
||||
#elif defined OS_WINDOWS
|
||||
# pragma comment(lib, "shlwapi.lib") // for SHGetValue()
|
||||
#pragma comment(lib, "shlwapi.lib") // for SHGetValue()
|
||||
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
|
||||
// then make a crude estimate.
|
||||
OSVERSIONINFO os;
|
||||
os.dwOSVersionInfoSize = sizeof(os);
|
||||
DWORD data, data_size = sizeof(data);
|
||||
if (GetVersionEx(&os) &&
|
||||
os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
|
||||
SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
|
||||
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
|
||||
"~MHz", NULL, &data, &data_size)))
|
||||
cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
|
||||
if (GetVersionEx(&os) && os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
|
||||
SUCCEEDED(
|
||||
SHGetValueA(HKEY_LOCAL_MACHINE,
|
||||
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
|
||||
"~MHz", NULL, &data, &data_size)))
|
||||
cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
|
||||
else
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
|
||||
// TODO: also figure out cpuinfo_num_cpus
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
|
||||
// TODO: also figure out cpuinfo_num_cpus
|
||||
|
||||
#elif defined OS_MACOSX
|
||||
// returning "mach time units" per second. the current number of elapsed
|
||||
@ -243,10 +242,10 @@ void InitializeSystemInfo() {
|
||||
|
||||
int num_cpus = 0;
|
||||
size_t size = sizeof(num_cpus);
|
||||
int numcpus_name[] = { CTL_HW, HW_NCPU };
|
||||
if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0)
|
||||
== 0
|
||||
&& (size == sizeof(num_cpus)))
|
||||
int numcpus_name[] = {CTL_HW, HW_NCPU};
|
||||
if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0) ==
|
||||
0 &&
|
||||
(size == sizeof(num_cpus)))
|
||||
cpuinfo_num_cpus = num_cpus;
|
||||
|
||||
#else
|
||||
@ -261,16 +260,16 @@ void InitializeSystemInfo() {
|
||||
static double MyCPUUsageRUsage() {
|
||||
struct rusage ru;
|
||||
if (getrusage(RUSAGE_SELF, &ru) == 0) {
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool MyCPUUsageCPUTimeNsLocked(double *cputime) {
|
||||
static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
|
||||
static int cputime_fd = -1;
|
||||
if (cputime_fd == -1) {
|
||||
cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
|
||||
@ -281,7 +280,7 @@ static bool MyCPUUsageCPUTimeNsLocked(double *cputime) {
|
||||
}
|
||||
char buff[64];
|
||||
memset(buff, 0, sizeof(buff));
|
||||
if (pread(cputime_fd, buff, sizeof(buff)-1, 0) <= 0) {
|
||||
if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
|
||||
close(cputime_fd);
|
||||
cputime_fd = -1;
|
||||
return false;
|
||||
@ -316,10 +315,10 @@ double MyCPUUsage() {
|
||||
double ChildrenCPUUsage() {
|
||||
struct rusage ru;
|
||||
if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
|
@ -75,8 +75,8 @@ void Initialize() {
|
||||
cycles_per_second = static_cast<int64_t>(CyclesPerSecond());
|
||||
CHECK(cycles_per_second != 0);
|
||||
seconds_per_cycle = 1.0 / cycles_per_second;
|
||||
max_interval_cycles = static_cast<int64_t>(
|
||||
cycles_per_second * kMaxErrorInterval);
|
||||
max_interval_cycles =
|
||||
static_cast<int64_t>(cycles_per_second * kMaxErrorInterval);
|
||||
do {
|
||||
base_cycletime = cycleclock::Now();
|
||||
base_walltime = Slow();
|
||||
@ -90,8 +90,7 @@ void Initialize() {
|
||||
}
|
||||
|
||||
WallTime Now() {
|
||||
if (!std::atomic_load(&initialized))
|
||||
return Slow();
|
||||
if (!std::atomic_load(&initialized)) return Slow();
|
||||
|
||||
WallTime now = 0.0;
|
||||
WallTime result = 0.0;
|
||||
@ -105,7 +104,7 @@ WallTime Now() {
|
||||
top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
|
||||
// Recompute drift no more often than every 2^32 cycles.
|
||||
// I.e., @2GHz, ~ every two seconds
|
||||
if (top_bits == last_adjust_time) { // don't need to recompute drift
|
||||
if (top_bits == last_adjust_time) { // don't need to recompute drift
|
||||
return result + GetDrift();
|
||||
}
|
||||
|
||||
@ -119,8 +118,8 @@ WallTime Now() {
|
||||
return now;
|
||||
}
|
||||
|
||||
std::string Print(WallTime time, const char *format, bool local,
|
||||
int *remainder_us) {
|
||||
std::string Print(WallTime time, const char* format, bool local,
|
||||
int* remainder_us) {
|
||||
char storage[32];
|
||||
struct tm split;
|
||||
double subsecond;
|
||||
@ -130,7 +129,7 @@ std::string Print(WallTime time, const char *format, bool local,
|
||||
if (remainder_us != NULL) {
|
||||
*remainder_us = static_cast<int>((subsecond * 1000000) + 0.5);
|
||||
if (*remainder_us > 999999) *remainder_us = 999999;
|
||||
if (*remainder_us < 0) *remainder_us = 0;
|
||||
if (*remainder_us < 0) *remainder_us = 0;
|
||||
}
|
||||
strftime(storage, sizeof(storage), format, &split);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user