Code reformat

This commit is contained in:
Dominic Hamon 2014-01-09 12:12:11 -08:00
parent 8009351938
commit 4ce184d86d
12 changed files with 458 additions and 477 deletions

View File

@ -194,7 +194,7 @@ class State {
//
// REQUIRES: a benchmark has exited its KeepRunning loop.
void SetBytesProcessed(int64_t bytes);
// If this routine is called with items > 0, then an items/s
// label is printed on the benchmark report line for the currently
// executing benchmark. It is typically called at the end of a processing
@ -243,11 +243,11 @@ class State {
void Wait();
enum EState {
STATE_INITIAL, // KeepRunning hasn't been called
STATE_STARTING, // KeepRunning called, waiting for other threads
STATE_RUNNING, // Running and being timed
STATE_STOPPING, // Not being timed but waiting for other threads
STATE_STOPPED, // Stopped
STATE_INITIAL, // KeepRunning hasn't been called
STATE_STARTING, // KeepRunning called, waiting for other threads
STATE_RUNNING, // Running and being timed
STATE_STOPPING, // Not being timed but waiting for other threads
STATE_STOPPED, // Stopped
};
EState state_;
@ -414,8 +414,8 @@ class Benchmark {
static void RunInstance(const Instance& b, BenchmarkReporter* br);
friend class ::benchmark::State;
friend struct ::benchmark::internal::Benchmark::Instance;
friend void ::benchmark::internal::RunMatchingBenchmarks(
const std::string&, BenchmarkReporter*);
friend void ::benchmark::internal::RunMatchingBenchmarks(const std::string&,
BenchmarkReporter*);
DISALLOW_COPY_AND_ASSIGN(Benchmark);
};
@ -425,7 +425,7 @@ class Benchmark {
struct BenchmarkContextData {
int num_cpus;
double mhz_per_cpu;
//std::string cpu_info;
// std::string cpu_info;
bool cpu_scaling_enabled;
// The number of chars in the longest benchmark name.
@ -433,14 +433,14 @@ struct BenchmarkContextData {
};
struct BenchmarkRunData {
BenchmarkRunData() :
thread_index(-1),
iterations(1),
real_accumulated_time(0),
cpu_accumulated_time(0),
bytes_per_second(0),
items_per_second(0),
max_heapbytes_used(0) {}
BenchmarkRunData()
: thread_index(-1),
iterations(1),
real_accumulated_time(0),
cpu_accumulated_time(0),
bytes_per_second(0),
items_per_second(0),
max_heapbytes_used(0) {}
std::string benchmark_name;
std::string report_label;
@ -481,15 +481,13 @@ class BenchmarkReporter {
virtual ~BenchmarkReporter();
};
// ------------------------------------------------------
// Internal implementation details follow; please ignore
// Given a collection of reports, computes their mean and stddev.
// REQUIRES: all runs in "reports" must be from the same benchmark.
void ComputeStats(const std::vector<BenchmarkRunData>& reports,
BenchmarkRunData* mean_data,
BenchmarkRunData* stddev_data);
BenchmarkRunData* mean_data, BenchmarkRunData* stddev_data);
// Simple reporter that outputs benchmark data to the console. This is the
// default reporter used by RunSpecifiedBenchmarks().
@ -497,6 +495,7 @@ class ConsoleReporter : public BenchmarkReporter {
public:
virtual bool ReportContext(const BenchmarkContextData& context);
virtual void ReportRuns(const std::vector<BenchmarkRunData>& reports);
private:
std::string PrintMemoryUsage(double bytes);
virtual void PrintRunData(const BenchmarkRunData& report);
@ -513,11 +512,11 @@ void Initialize(int* argc, const char** argv);
// Helpers for generating unique variable names
#define BENCHMARK_CONCAT(a, b, c) BENCHMARK_CONCAT2(a, b, c)
#define BENCHMARK_CONCAT2(a, b, c) a ## b ## c
#define BENCHMARK_CONCAT2(a, b, c) a##b##c
#define BENCHMARK(n) \
static ::benchmark::internal::Benchmark* \
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
#define BENCHMARK(n) \
static ::benchmark::internal::Benchmark* BENCHMARK_CONCAT( \
__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
(new ::benchmark::internal::Benchmark(#n, n))
// Old-style macros
@ -525,7 +524,7 @@ void Initialize(int* argc, const char** argv);
#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
// This will register a benchmark for a templatized function. For example:
//
@ -535,14 +534,14 @@ void Initialize(int* argc, const char** argv);
// BENCHMARK_TEMPLATE(BM_Foo, 1);
//
// will register BM_Foo<1> as a benchmark.
#define BENCHMARK_TEMPLATE(n, a) \
static ::benchmark::internal::Benchmark* \
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
#define BENCHMARK_TEMPLATE(n, a) \
static ::benchmark::internal::Benchmark* BENCHMARK_CONCAT( \
__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
(new ::benchmark::internal::Benchmark(#n "<" #a ">", n<a>))
#define BENCHMARK_TEMPLATE2(n, a, b) \
static ::benchmark::internal::Benchmark* \
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
#define BENCHMARK_TEMPLATE2(n, a, b) \
static ::benchmark::internal::Benchmark* BENCHMARK_CONCAT( \
__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
(new ::benchmark::internal::Benchmark(#n "<" #a "," #b ">", n<a, b>))
#endif // BENCHMARK_BENCHMARK_H_

View File

@ -34,7 +34,10 @@ char (&ArraySizeHelper(const T (&array)[N]))[N];
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
#define CHECK(b) \
do { \
if (!(b)) assert(false); \
} while (0)
#define CHECK_EQ(a, b) CHECK((a) == (b))
#define CHECK_NE(a, b) CHECK((a) != (b))
#define CHECK_GE(a, b) CHECK((a) >= (b))
@ -45,14 +48,14 @@ char (&ArraySizeHelper(const T (&array)[N]))[N];
//
// Prevent the compiler from complaining about or optimizing away variables
// that appear unused.
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
#define ATTRIBUTE_UNUSED __attribute__((unused))
//
// For functions we want to force inline or not inline.
// Introduced in gcc 3.1.
#define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline))
#define ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
#define HAVE_ATTRIBUTE_ALWAYS_INLINE 1
#define ATTRIBUTE_NOINLINE __attribute__ ((noinline))
#define ATTRIBUTE_NOINLINE __attribute__((noinline))
#define HAVE_ATTRIBUTE_NOINLINE 1
#endif // BENCHMARK_MACROS_H_

View File

@ -58,14 +58,14 @@ DEFINE_bool(color_print, true, "Enables colorized logging.");
DECLARE_string(heap_check);
// The ""'s catch people who don't pass in a literal for "str"
#define strliterallen(str) (sizeof("" str "")-1)
#define strliterallen(str) (sizeof("" str "") - 1)
// Must use a string literal for prefix.
#define memprefix(str, len, prefix) \
( (((len) >= strliterallen(prefix)) \
&& memcmp(str, prefix, strliterallen(prefix)) == 0) \
? str + strliterallen(prefix) \
: NULL )
#define memprefix(str, len, prefix) \
((((len) >= strliterallen(prefix)) && \
memcmp(str, prefix, strliterallen(prefix)) == 0) \
? str + strliterallen(prefix) \
: NULL)
namespace benchmark {
namespace {
@ -83,9 +83,8 @@ static_assert(arraysize(kSmallSIUnits) == arraysize(kBigSIUnits),
"Small SI and Big SI unit arrays must be the same size");
static const int kUnitsSize = arraysize(kBigSIUnits);
void ToExponentAndMantissa(double val, double thresh,
int precision, double one_k,
std::string* mantissa, int* exponent) {
void ToExponentAndMantissa(double val, double thresh, int precision,
double one_k, std::string* mantissa, int* exponent) {
std::stringstream mantissa_stream;
if (val < 0) {
@ -136,15 +135,13 @@ void ToExponentAndMantissa(double val, double thresh,
}
std::string ExponentToPrefix(int exponent, bool iec) {
if (exponent == 0)
return "";
if (exponent == 0) return "";
const int index = (exponent > 0 ? exponent - 1 : -exponent - 1);
if (index >= kUnitsSize)
return "";
if (index >= kUnitsSize) return "";
const char *array = (exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) :
kSmallSIUnits);
const char* array =
(exponent > 0 ? (iec ? kBigIECUnits : kBigSIUnits) : kSmallSIUnits);
if (iec)
return array[index] + std::string("i");
else
@ -207,7 +204,7 @@ const char* Prefix() {
}
// TODO
//static internal::MallocCounter *benchmark_mc;
// static internal::MallocCounter *benchmark_mc;
bool CpuScalingEnabled() {
// On Linux, the CPUfreq subsystem exposes CPU information as files on the
@ -218,13 +215,11 @@ bool CpuScalingEnabled() {
ss << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_governor";
std::string governor_file = ss.str();
FILE* file = fopen(governor_file.c_str(), "r");
if (!file)
break;
if (!file) break;
char buff[16];
size_t bytes_read = fread(buff, 1, sizeof(buff), file);
fclose(file);
if (memprefix(buff, bytes_read, "performance") == NULL)
return true;
if (memprefix(buff, bytes_read, "performance") == NULL) return true;
}
return false;
}
@ -236,8 +231,7 @@ namespace internal {
BenchmarkReporter::~BenchmarkReporter() {}
void ComputeStats(const std::vector<BenchmarkRunData>& reports,
BenchmarkRunData* mean_data,
BenchmarkRunData* stddev_data) {
BenchmarkRunData* mean_data, BenchmarkRunData* stddev_data) {
// Accumulators.
Stat1_d real_accumulated_time_stat;
Stat1_d cpu_accumulated_time_stat;
@ -257,8 +251,8 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
items_per_second_stat += Stat1_d(it->items_per_second, it->iterations);
bytes_per_second_stat += Stat1_d(it->bytes_per_second, it->iterations);
iterations_stat += Stat1_d(it->iterations, it->iterations);
max_heapbytes_used_stat += Stat1MinMax_d(it->max_heapbytes_used,
it->iterations);
max_heapbytes_used_stat +=
Stat1MinMax_d(it->max_heapbytes_used, it->iterations);
}
// Get the data from the accumulator to BenchmarkRunData's.
@ -268,7 +262,7 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
mean_data->cpu_accumulated_time = cpu_accumulated_time_stat.Mean();
mean_data->bytes_per_second = bytes_per_second_stat.Mean();
mean_data->items_per_second = items_per_second_stat.Mean();
mean_data->max_heapbytes_used = max_heapbytes_used_stat.Max();
mean_data->max_heapbytes_used = max_heapbytes_used_stat.max();
// Only add label to mean/stddev if it is same for all runs
mean_data->report_label = reports[0].report_label;
@ -290,8 +284,7 @@ void ComputeStats(const std::vector<BenchmarkRunData>& reports,
}
std::string ConsoleReporter::PrintMemoryUsage(double bytes) {
if (!get_memory_usage || bytes < 0.0)
return "";
if (!get_memory_usage || bytes < 0.0) return "";
std::stringstream ss;
ss << " " << HumanReadableNumber(bytes) << "B peak-mem";
@ -307,12 +300,12 @@ bool ConsoleReporter::ReportContext(const BenchmarkContextData& context) {
int remainder_ms;
std::cout << walltime::Print(walltime::Now(), "%Y/%m/%d-%H:%M:%S",
true, // use local timezone
&remainder_ms) << "\n";
true, // use local timezone
&remainder_ms) << "\n";
// Show details of CPU model, caches, TLBs etc.
// if (!context.cpu_info.empty())
// std::cout << "CPU: " << context.cpu_info.c_str();
// if (!context.cpu_info.empty())
// std::cout << "CPU: " << context.cpu_info.c_str();
if (context.cpu_scaling_enabled) {
std::cerr << "CPU scaling is enabled: Benchmark timings may be noisy.\n";
@ -334,8 +327,7 @@ void ConsoleReporter::ReportRuns(const std::vector<BenchmarkRunData>& reports) {
}
// We don't report aggregated data if there was a single run.
if (reports.size() < 2)
return;
if (reports.size() < 2) return;
BenchmarkRunData mean_data;
BenchmarkRunData stddev_data;
@ -379,45 +371,42 @@ void ConsoleReporter::PrintRunData(const BenchmarkRunData& result) {
}
void MemoryUsage() {
//if (benchmark_mc) {
// if (benchmark_mc) {
// benchmark_mc->Reset();
//} else {
get_memory_usage = true;
get_memory_usage = true;
//}
}
void UseRealTime() {
use_real_time = true;
}
void UseRealTime() { use_real_time = true; }
void PrintUsageAndExit() {
fprintf(stdout, "benchmark [--benchmark_filter=<regex>]\n"
" [--benchmark_iterations=<iterations>]\n"
" [--benchmark_min_time=<min_time>]\n"
// " [--benchmark_memory_usage]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--color_print={true|false}]\n"
" [--v=<verbosity>]\n");
fprintf(stdout,
"benchmark [--benchmark_filter=<regex>]\n"
" [--benchmark_iterations=<iterations>]\n"
" [--benchmark_min_time=<min_time>]\n"
//" [--benchmark_memory_usage]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--color_print={true|false}]\n"
" [--v=<verbosity>]\n");
exit(0);
}
void ParseCommandLineFlags(int* argc, const char** argv) {
for (int i = 1; i < *argc; ++i) {
if (ParseStringFlag(argv[i], "benchmark_filter",
&FLAGS_benchmark_filter) ||
if (ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
ParseInt32Flag(argv[i], "benchmark_iterations",
&FLAGS_benchmark_iterations) ||
ParseDoubleFlag(argv[i], "benchmark_min_time",
&FLAGS_benchmark_min_time) ||
// TODO(dominic)
// ParseBoolFlag(argv[i], "gbenchmark_memory_usage",
// &FLAGS_gbenchmark_memory_usage) ||
// ParseBoolFlag(argv[i], "gbenchmark_memory_usage",
// &FLAGS_gbenchmark_memory_usage) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) ||
ParseBoolFlag(argv[i], "color_print", &FLAGS_color_print) ||
ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
for (int j = i; j != *argc; ++j)
argv[j] = argv[j + 1];
for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
--(*argc);
--i;
@ -431,9 +420,11 @@ void ParseCommandLineFlags(int* argc, const char** argv) {
// A clock that provides a fast mechanism to check if we're nearly done.
class State::FastClock {
public:
enum Type { REAL_TIME, CPU_TIME };
explicit FastClock(Type type)
: type_(type), approx_time_(NowMicros()) {
enum Type {
REAL_TIME,
CPU_TIME
};
explicit FastClock(Type type) : type_(type), approx_time_(NowMicros()) {
sem_init(&bg_done_, 0, 0);
pthread_create(&bg_, NULL, &BGThreadWrapper, this);
}
@ -449,7 +440,7 @@ class State::FastClock {
inline bool HasReached(int64_t when_micros) {
return std::atomic_load(&approx_time_) >= when_micros;
// NOTE: this is the same as we're dealing with an int64_t
//return (base::subtle::NoBarrier_Load(&approx_time_) >= when_micros);
// return (base::subtle::NoBarrier_Load(&approx_time_) >= when_micros);
}
// Returns the current time in microseconds past the epoch.
@ -493,7 +484,7 @@ class State::FastClock {
SleepForMicroseconds(1000);
std::atomic_store(&approx_time_, NowMicros());
// NOTE: same code but no memory barrier. think on it.
//base::subtle::Release_Store(&approx_time_, NowMicros());
// base::subtle::Release_Store(&approx_time_, NowMicros());
sem_getvalue(&bg_done_, &done);
} while (done == 0);
}
@ -523,17 +514,21 @@ namespace internal {
// Information kept per benchmark we may want to run
struct Benchmark::Instance {
Instance()
: bm(nullptr), threads(1), rangeXset(false), rangeX(kNoRange),
rangeYset(false), rangeY(kNoRange) {}
: bm(nullptr),
threads(1),
rangeXset(false),
rangeX(kNoRange),
rangeYset(false),
rangeY(kNoRange) {}
std::string name;
Benchmark* bm;
int threads; // Number of concurrent threads to use
int threads; // Number of concurrent threads to use
bool rangeXset;
int rangeX;
bool rangeYset;
int rangeY;
bool rangeXset;
int rangeX;
bool rangeYset;
int rangeY;
bool multithreaded() const { return !bm->thread_counts_.empty(); }
};
@ -551,14 +546,14 @@ struct State::SharedState {
std::string label;
explicit SharedState(const internal::Benchmark::Instance* b)
: instance(b), starting(0), stopping(0),
: instance(b),
starting(0),
stopping(0),
threads(b == nullptr ? 1 : b->threads) {
pthread_mutex_init(&mu, nullptr);
}
~SharedState() {
pthread_mutex_destroy(&mu);
}
~SharedState() { pthread_mutex_destroy(&mu); }
DISALLOW_COPY_AND_ASSIGN(SharedState);
};
@ -567,8 +562,7 @@ namespace internal {
Benchmark::Benchmark(const char* name, BenchmarkFunction f)
: name_(name), function_(f) {
mutex_lock l(&benchmark_mutex);
if (families == nullptr)
families = new std::vector<Benchmark*>();
if (families == nullptr) families = new std::vector<Benchmark*>();
registration_index_ = families->size();
families->push_back(this);
}
@ -578,8 +572,7 @@ Benchmark::~Benchmark() {
CHECK((*families)[registration_index_] == this);
(*families)[registration_index_] = NULL;
// Shrink the vector if convenient.
while (!families->empty() && families->back() == NULL)
families->pop_back();
while (!families->empty() && families->back() == NULL) families->pop_back();
}
Benchmark* Benchmark::Arg(int x) {
@ -593,8 +586,7 @@ Benchmark* Benchmark::Range(int start, int limit) {
AddRange(&arglist, start, limit, kRangeMultiplier);
mutex_lock l(&benchmark_mutex);
for (size_t i = 0; i < arglist.size(); ++i)
rangeX_.push_back(arglist[i]);
for (size_t i = 0; i < arglist.size(); ++i) rangeX_.push_back(arglist[i]);
return this;
}
@ -602,8 +594,7 @@ Benchmark* Benchmark::DenseRange(int start, int limit) {
CHECK_GE(start, 0);
CHECK_LE(start, limit);
mutex_lock l(&benchmark_mutex);
for (int arg = start; arg <= limit; ++arg)
rangeX_.push_back(arg);
for (int arg = start; arg <= limit; ++arg) rangeX_.push_back(arg);
return this;
}
@ -662,14 +653,13 @@ void Benchmark::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
dst->push_back(lo);
// Now space out the benchmarks in multiples of "mult"
for (int32_t i = 1; i < std::numeric_limits<int32_t>::max()/mult; i *= mult) {
for (int32_t i = 1; i < std::numeric_limits<int32_t>::max() / mult;
i *= mult) {
if (i >= hi) break;
if (i > lo)
dst->push_back(i);
if (i > lo) dst->push_back(i);
}
// Add "hi" (if different from "lo")
if (hi != lo)
dst->push_back(hi);
if (hi != lo) dst->push_back(hi);
}
std::vector<Benchmark::Instance> Benchmark::CreateBenchmarkInstances(
@ -767,9 +757,10 @@ void Benchmark::MeasureOverhead() {
State::FastClock clock(State::FastClock::CPU_TIME);
State::SharedState state(nullptr);
State runner(&clock, &state, 0);
while (runner.KeepRunning()) {}
while (runner.KeepRunning()) {
}
overhead = state.runs[0].real_accumulated_time /
static_cast<double>(state.runs[0].iterations);
static_cast<double>(state.runs[0].iterations);
#ifdef DEBUG
std::cout << "Per-iteration overhead for doing nothing: " << overhead << "\n";
#endif
@ -796,33 +787,32 @@ void Benchmark::RunInstance(const Instance& b, BenchmarkReporter* br) {
runners[i]->Run();
}
if (b.multithreaded()) {
for (int i = 0; i < b.threads; ++i)
runners[i]->Wait();
for (int i = 0; i < b.threads; ++i) runners[i]->Wait();
}
}
/*
double mem_usage = 0;
if (get_memory_usage) {
// Measure memory usage
Notification mem_done;
BenchmarkRun mem_run;
BenchmarkRun::SharedState mem_shared(&b, 1);
mem_run.Init(&clock, &mem_shared, 0);
{
testing::MallocCounter mc(testing::MallocCounter::THIS_THREAD_ONLY);
benchmark_mc = &mc;
mem_run.Run(&mem_done);
mem_done.WaitForNotification();
benchmark_mc = NULL;
mem_usage = mc.PeakHeapGrowth();
/*
double mem_usage = 0;
if (get_memory_usage) {
// Measure memory usage
Notification mem_done;
BenchmarkRun mem_run;
BenchmarkRun::SharedState mem_shared(&b, 1);
mem_run.Init(&clock, &mem_shared, 0);
{
testing::MallocCounter mc(testing::MallocCounter::THIS_THREAD_ONLY);
benchmark_mc = &mc;
mem_run.Run(&mem_done);
mem_done.WaitForNotification();
benchmark_mc = NULL;
mem_usage = mc.PeakHeapGrowth();
}
}
}
*/
*/
running_benchmark = false;
for (internal::BenchmarkRunData& report : state.runs) {
double seconds = (use_real_time ? report.real_accumulated_time :
report.cpu_accumulated_time);
double seconds = (use_real_time ? report.real_accumulated_time
: report.cpu_accumulated_time);
report.benchmark_name = b.name;
report.report_label = state.label;
report.bytes_per_second = state.stats.bytes_processed / seconds;
@ -836,29 +826,28 @@ void Benchmark::RunInstance(const Instance& b, BenchmarkReporter* br) {
// Run the specified benchmark, measure its peak memory usage, and
// return the peak memory usage.
double Benchmark::MeasurePeakHeapMemory(const Instance& b) {
if (!get_memory_usage)
return 0.0;
if (!get_memory_usage) return 0.0;
double bytes = 0.0;
/* TODO(dominich)
// Should we do multi-threaded runs?
const int num_threads = 1;
const int num_iters = 1;
{
// internal::MallocCounter mc(internal::MallocCounter::THIS_THREAD_ONLY);
running_benchmark = true;
timer_manager = new TimerManager(1, NULL);
// benchmark_mc = &mc;
timer_manager->StartTimer();
/* TODO(dominich)
// Should we do multi-threaded runs?
const int num_threads = 1;
const int num_iters = 1;
{
// internal::MallocCounter mc(internal::MallocCounter::THIS_THREAD_ONLY);
running_benchmark = true;
timer_manager = new TimerManager(1, NULL);
// benchmark_mc = &mc;
timer_manager->StartTimer();
b.Run(num_iters);
b.Run(num_iters);
running_benchmark = false;
delete timer_manager;
timer_manager = NULL;
// benchmark_mc = NULL;
// bytes = mc.PeakHeapGrowth();
}
*/
running_benchmark = false;
delete timer_manager;
timer_manager = NULL;
// benchmark_mc = NULL;
// bytes = mc.PeakHeapGrowth();
}
*/
return bytes;
}
@ -876,14 +865,13 @@ State::State(FastClock* clock, SharedState* s, int t)
start_pause_(0.0),
pause_time_(0.0),
total_iterations_(0),
interval_micros_(
static_cast<int64_t>(kNumMicrosPerSecond * FLAGS_benchmark_min_time /
FLAGS_benchmark_repetitions)),
interval_micros_(static_cast<int64_t>(kNumMicrosPerSecond *
FLAGS_benchmark_min_time /
FLAGS_benchmark_repetitions)),
is_continuation_(false),
stats_(new ThreadStats()) {
CHECK(clock != nullptr);
CHECK(s != nullptr);
}
bool State::KeepRunning() {
@ -895,24 +883,27 @@ bool State::KeepRunning() {
return true;
}
switch(state_) {
case STATE_INITIAL: return StartRunning();
case STATE_STARTING: CHECK(false); return true;
case STATE_RUNNING: return FinishInterval();
case STATE_STOPPING: return MaybeStop();
case STATE_STOPPED: CHECK(false); return true;
switch (state_) {
case STATE_INITIAL:
return StartRunning();
case STATE_STARTING:
CHECK(false);
return true;
case STATE_RUNNING:
return FinishInterval();
case STATE_STOPPING:
return MaybeStop();
case STATE_STOPPED:
CHECK(false);
return true;
}
CHECK(false);
return false;
}
void State::PauseTiming() {
start_pause_ = walltime::Now();
}
void State::PauseTiming() { start_pause_ = walltime::Now(); }
void State::ResumeTiming() {
pause_time_ += walltime::Now() - start_pause_;
}
void State::ResumeTiming() { pause_time_ += walltime::Now() - start_pause_; }
void State::SetBytesProcessed(int64_t bytes) {
CHECK_EQ(STATE_STOPPED, state_);
@ -944,10 +935,10 @@ int State::range_x() const {
int State::range_y() const {
CHECK(shared_->instance->rangeYset);
/* <<
"Failed to get range_y as it was not set. Did you register your "
"benchmark with a range parameter?";
*/
/* <<
"Failed to get range_y as it was not set. Did you register your "
"benchmark with a range parameter?";
*/
return shared_->instance->rangeY;
}
@ -962,10 +953,10 @@ bool State::StartRunning() {
++shared_->starting;
last_thread = shared_->starting == shared_->threads;
}
if (last_thread) {
clock_->InitType(
use_real_time ? FastClock::REAL_TIME : FastClock::CPU_TIME);
clock_->InitType(use_real_time ? FastClock::REAL_TIME
: FastClock::CPU_TIME);
{
mutex_lock l(&starting_mutex);
pthread_cond_broadcast(&starting_cv);
@ -1022,7 +1013,6 @@ bool State::FinishInterval() {
const double accumulated_time = walltime::Now() - start_time_;
const double total_overhead = overhead * iterations_;
//const double total_overhead = 0.0;
CHECK_LT(pause_time_, accumulated_time);
CHECK_LT(pause_time_ + total_overhead, accumulated_time);
data.real_accumulated_time =
@ -1046,9 +1036,8 @@ bool State::FinishInterval() {
is_continuation_ = keep_going;
} else {
// If this is a repetition, run another interval as a new data point.
keep_going =
shared_->runs.size() <
static_cast<size_t>(FLAGS_benchmark_repetitions);
keep_going = shared_->runs.size() <
static_cast<size_t>(FLAGS_benchmark_repetitions);
is_continuation_ = !keep_going;
}
@ -1065,8 +1054,7 @@ bool State::FinishInterval() {
}
}
if (state_ == STATE_RUNNING)
NewInterval();
if (state_ == STATE_RUNNING) NewInterval();
return keep_going;
}
@ -1093,9 +1081,7 @@ void State::RunAsThread() {
CHECK_EQ(0, pthread_create(&thread_, nullptr, &State::RunWrapper, this));
}
void State::Wait() {
CHECK_EQ(0, pthread_join(thread_, nullptr));
}
void State::Wait() { CHECK_EQ(0, pthread_join(thread_, nullptr)); }
// static
void* State::RunWrapper(void* arg) {
@ -1121,25 +1107,24 @@ void RunMatchingBenchmarks(const std::string& spec,
for (const internal::Benchmark::Instance& benchmark : benchmarks) {
// Add width for _stddev and threads:XX
if (benchmark.threads > 1 && FLAGS_benchmark_repetitions > 1) {
name_field_width = std::max<int>(name_field_width,
benchmark.name.size() + 17);
} else if (benchmark.threads> 1) {
name_field_width = std::max<int>(name_field_width,
benchmark.name.size() + 10);
name_field_width =
std::max<int>(name_field_width, benchmark.name.size() + 17);
} else if (benchmark.threads > 1) {
name_field_width =
std::max<int>(name_field_width, benchmark.name.size() + 10);
} else if (FLAGS_benchmark_repetitions > 1) {
name_field_width = std::max<int>(name_field_width,
benchmark.name.size() + 7);
name_field_width =
std::max<int>(name_field_width, benchmark.name.size() + 7);
} else {
name_field_width = std::max<int>(name_field_width,
benchmark.name.size());
name_field_width = std::max<int>(name_field_width, benchmark.name.size());
}
}
// Print header here
BenchmarkContextData context;
context.num_cpus = NumCPUs();
context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
// context.cpu_info = base::CompactCPUIDInfoString();
context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
// context.cpu_info = base::CompactCPUIDInfoString();
context.cpu_scaling_enabled = CpuScalingEnabled();
context.name_field_width = name_field_width;
@ -1155,7 +1140,7 @@ void FindMatchingBenchmarkNames(const std::string& spec,
std::vector<internal::Benchmark::Instance> benchmarks;
internal::Benchmark::FindBenchmarks(spec, &benchmarks);
std::transform(benchmarks.begin(), benchmarks.end(), benchmark_names->begin(),
[] (const internal::Benchmark::Instance& b) { return b.name; } );
[](const internal::Benchmark::Instance& b) { return b.name; });
}
} // end namespace internal
@ -1163,7 +1148,7 @@ void FindMatchingBenchmarkNames(const std::string& spec,
void RunSpecifiedBenchmarks() {
std::string spec = FLAGS_benchmark_filter;
if (spec.empty() || spec == "all")
spec = "."; // Regexp that matches all benchmarks
spec = "."; // Regexp that matches all benchmarks
internal::ConsoleReporter default_reporter;
internal::RunMatchingBenchmarks(spec, &default_reporter);
pthread_cond_destroy(&starting_cv);
@ -1172,12 +1157,11 @@ void RunSpecifiedBenchmarks() {
}
void Initialize(int* argc, const char** argv) {
//AtomicOps_Internalx86CPUFeaturesInit();
pthread_mutex_init(&benchmark_mutex, nullptr);
pthread_mutex_init(&starting_mutex, nullptr);
pthread_cond_init(&starting_cv, nullptr);
walltime::Initialize();
internal::ParseCommandLineFlags(argc, argv);
internal::ParseCommandLineFlags(argc, argv);
internal::Benchmark::MeasureOverhead();
}

View File

@ -17,25 +17,40 @@ typedef const char* PlatformColorCode;
PlatformColorCode GetPlatformColorCode(LogColor color) {
#ifdef OS_WINDOWS
switch (color) {
case COLOR_RED: return FOREGROUND_RED;
case COLOR_GREEN: return FOREGROUND_GREEN;
case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
case COLOR_BLUE: return FOREGROUND_BLUE;
case COLOR_MAGENTA: return FOREGROUND_BLUE | FOREGROUND_RED;
case COLOR_CYAN: return FOREGROUND_BLUE | FOREGROUND_GREEN;
case COLOR_WHITE: // fall through to default
default: return 0;
case COLOR_RED:
return FOREGROUND_RED;
case COLOR_GREEN:
return FOREGROUND_GREEN;
case COLOR_YELLOW:
return FOREGROUND_RED | FOREGROUND_GREEN;
case COLOR_BLUE:
return FOREGROUND_BLUE;
case COLOR_MAGENTA:
return FOREGROUND_BLUE | FOREGROUND_RED;
case COLOR_CYAN:
return FOREGROUND_BLUE | FOREGROUND_GREEN;
case COLOR_WHITE: // fall through to default
default:
return 0;
}
#else
switch (color) {
case COLOR_RED: return "1";
case COLOR_GREEN: return "2";
case COLOR_YELLOW: return "3";
case COLOR_BLUE: return "4";
case COLOR_MAGENTA: return "5";
case COLOR_CYAN: return "6";
case COLOR_WHITE: return "7";
default: return NULL;
case COLOR_RED:
return "1";
case COLOR_GREEN:
return "2";
case COLOR_YELLOW:
return "3";
case COLOR_BLUE:
return "4";
case COLOR_MAGENTA:
return "5";
case COLOR_CYAN:
return "6";
case COLOR_WHITE:
return "7";
default:
return NULL;
};
#endif
}
@ -72,8 +87,7 @@ void ColorPrintf(LogColor color, const char* fmt, ...) {
SetConsoleTextAttribute(stdout_handle, old_color_attrs);
#else
const char* color_code = GetPlatformColorCode(color);
if (color_code)
fprintf(stdout, "\033[0;3%sm", color_code);
if (color_code) fprintf(stdout, "\033[0;3%sm", color_code);
vprintf(fmt, args);
printf("\033[m"); // Resets the terminal to default.
#endif

View File

@ -29,7 +29,7 @@ bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
// The parsed value overflows as a long. (strtol() returns
// LONG_MAX or LONG_MIN when the input overflows.)
result != long_value
// The parsed value overflows as an Int32.
// The parsed value overflows as an Int32.
) {
std::cerr << src_text << " is expected to be a 32-bit integer, "
<< "but actually has value \"" << str << "\", "
@ -60,7 +60,6 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) {
return true;
}
inline const char* GetEnv(const char* name) {
#if GTEST_OS_WINDOWS_MOBILE
// We are on Windows CE, which has no environment variables.
@ -95,8 +94,7 @@ static std::string FlagToEnvVar(const char* flag) {
bool BoolFromEnv(const char* flag, bool default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const string_value = GetEnv(env_var.c_str());
return string_value == NULL ?
default_value : strcmp(string_value, "0") != 0;
return string_value == NULL ? default_value : strcmp(string_value, "0") != 0;
}
// Reads and returns a 32-bit integer stored in the environment
@ -111,8 +109,8 @@ int32_t Int32FromEnv(const char* flag, int32_t default_value) {
}
int32_t result = default_value;
if (!ParseInt32(std::string("Environment variable ") + env_var,
string_value, &result)) {
if (!ParseInt32(std::string("Environment variable ") + env_var, string_value,
&result)) {
std::cout << "The default value " << default_value << " is used.\n";
return default_value;
}
@ -133,13 +131,12 @@ const char* StringFromEnv(const char* flag, const char* default_value) {
// part can be omitted.
//
// Returns the value of the flag, or NULL if the parsing failed.
const char* ParseFlagValue(const char* str,
const char* flag,
const char* ParseFlagValue(const char* str, const char* flag,
bool def_optional) {
// str and flag must not be NULL.
if (str == NULL || flag == NULL) return NULL;
// The flag must start with "--".
// The flag must start with "--".
const std::string flag_str = std::string("--") + std::string(flag);
const size_t flag_len = flag_str.length();
if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
@ -148,8 +145,7 @@ const char* ParseFlagValue(const char* str,
const char* flag_end = str + flag_len;
// When def_optional is true, it's OK to not have a "=value" part.
if (def_optional && (flag_end[0] == '\0'))
return flag_end;
if (def_optional && (flag_end[0] == '\0')) return flag_end;
// If def_optional is true and there are more characters after the
// flag name, or if def_optional is false, there must be a '=' after
@ -180,8 +176,8 @@ bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
if (value_str == NULL) return false;
// Sets *value to the value of the flag.
return ParseInt32(std::string("The value of flag --") + flag,
value_str, value);
return ParseInt32(std::string("The value of flag --") + flag, value_str,
value);
}
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
@ -192,8 +188,8 @@ bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
if (value_str == NULL) return false;
// Sets *value to the value of the flag.
return ParseDouble(std::string("The value of flag --") + flag,
value_str, value);
return ParseDouble(std::string("The value of flag --") + flag, value_str,
value);
}
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {

View File

@ -21,7 +21,7 @@
#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
#define DEFINE_string(name, default_val, doc) \
std::string FLAG(name) = (default_val)
std::string FLAG(name) = (default_val)
namespace benchmark {
// Parses 'str' for a 32-bit signed integer. If successful, writes the result

View File

@ -24,7 +24,7 @@
#include <stdint.h>
#if defined(OS_MACOSX)
# include <mach/mach_time.h>
#include <mach/mach_time.h>
#endif
// For MSVC, we want to use '_asm rdtsc' when possible (since it works
// with even ancient MSVC compilers), and when not possible the
@ -48,85 +48,85 @@ namespace benchmark {
// with modifications by m3b. See also
// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
namespace cycleclock {
// This should return the number of cycles since power-on. Thread-safe.
inline ATTRIBUTE_ALWAYS_INLINE int64_t Now() {
// This should return the number of cycles since power-on. Thread-safe.
inline ATTRIBUTE_ALWAYS_INLINE int64_t Now() {
#if defined(OS_MACOSX)
// this goes at the top because we need ALL Macs, regardless of
// architecture, to return the number of "mach time units" that
// have passed since startup. See sysinfo.cc where
// InitializeSystemInfo() sets the supposed cpu clock frequency of
// macs to the number of mach time units per second, not actual
// CPU clock frequency (which can change in the face of CPU
// frequency scaling). Also note that when the Mac sleeps, this
// counter pauses; it does not continue counting, nor does it
// reset to zero.
return mach_absolute_time();
// this goes at the top because we need ALL Macs, regardless of
// architecture, to return the number of "mach time units" that
// have passed since startup. See sysinfo.cc where
// InitializeSystemInfo() sets the supposed cpu clock frequency of
// macs to the number of mach time units per second, not actual
// CPU clock frequency (which can change in the face of CPU
// frequency scaling). Also note that when the Mac sleeps, this
// counter pauses; it does not continue counting, nor does it
// reset to zero.
return mach_absolute_time();
#elif defined(__i386__)
int64_t ret;
__asm__ volatile ("rdtsc" : "=A" (ret) );
return ret;
int64_t ret;
__asm__ volatile("rdtsc" : "=A"(ret));
return ret;
#elif defined(__x86_64__) || defined(__amd64__)
uint64_t low, high;
__asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
return (high << 32) | low;
uint64_t low, high;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
return (high << 32) | low;
#elif defined(__powerpc__) || defined(__ppc__)
// This returns a time-base, which is not always precisely a cycle-count.
int64_t tbl, tbu0, tbu1;
asm("mftbu %0" : "=r" (tbu0));
asm("mftb %0" : "=r" (tbl));
asm("mftbu %0" : "=r" (tbu1));
tbl &= -static_cast<int64>(tbu0 == tbu1);
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
return (tbu1 << 32) | tbl;
// This returns a time-base, which is not always precisely a cycle-count.
int64_t tbl, tbu0, tbu1;
asm("mftbu %0" : "=r"(tbu0));
asm("mftb %0" : "=r"(tbl));
asm("mftbu %0" : "=r"(tbu1));
tbl &= -static_cast<int64>(tbu0 == tbu1);
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
return (tbu1 << 32) | tbl;
#elif defined(__sparc__)
int64_t tick;
asm(".byte 0x83, 0x41, 0x00, 0x00");
asm("mov %%g1, %0" : "=r" (tick));
return tick;
int64_t tick;
asm(".byte 0x83, 0x41, 0x00, 0x00");
asm("mov %%g1, %0" : "=r"(tick));
return tick;
#elif defined(__ia64__)
int64_t itc;
asm("mov %0 = ar.itc" : "=r" (itc));
return itc;
int64_t itc;
asm("mov %0 = ar.itc" : "=r"(itc));
return itc;
#elif defined(COMPILER_MSVC) && defined(_M_IX86)
// Older MSVC compilers (like 7.x) don't seem to support the
// __rdtsc intrinsic properly, so I prefer to use _asm instead
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
// Older MSVC compilers (like 7.x) don't seem to support the
// __rdtsc intrinsic properly, so I prefer to use _asm instead
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
#elif defined(COMPILER_MSVC)
return __rdtsc();
return __rdtsc();
#elif defined(ARMV3)
#if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount
uint32_t pmccntr;
uint32_t pmuseren;
uint32_t pmcntenset;
// Read the user mode perf monitor counter access permissions.
asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
if (pmcntenset & 0x80000000ul) { // Is it counting?
asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
// The counter is set up to count every 64th cycle
return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6
}
uint32_t pmccntr;
uint32_t pmuseren;
uint32_t pmcntenset;
// Read the user mode perf monitor counter access permissions.
asm("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
asm("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
if (pmcntenset & 0x80000000ul) { // Is it counting?
asm("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
// The counter is set up to count every 64th cycle
return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6
}
}
#endif
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__mips__)
// mips apparently only allows rdtsc for superusers, so we fall
// back to gettimeofday. It's possible clock_gettime would be better.
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
// mips apparently only allows rdtsc for superusers, so we fall
// back to gettimeofday. It's possible clock_gettime would be better.
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#else
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create
// a fast implementation and use generic version if nothing better is available.
#error You need to define CycleTimer for your OS and CPU
#endif
}
}
} // end namespace cycleclock
} // end namespace benchmark

View File

@ -10,9 +10,7 @@ class mutex_lock {
pthread_mutex_lock(mu_);
}
~mutex_lock() {
pthread_mutex_unlock(mu_);
}
~mutex_lock() { pthread_mutex_unlock(mu_); }
private:
pthread_mutex_t* mu_;

View File

@ -6,13 +6,11 @@
namespace benchmark {
#ifdef OS_WINDOWS
// Window's _sleep takes milliseconds argument.
void SleepForMilliseconds(int milliseconds) {
_sleep(milliseconds);
}
void SleepForMilliseconds(int milliseconds) { _sleep(milliseconds); }
void SleepForSeconds(double seconds) {
SleepForMilliseconds(static_cast<int>(kNumMillisPerSecond * seconds));
}
#else // OS_WINDOWS
#else // OS_WINDOWS
void SleepForMicroseconds(int64_t microseconds) {
struct timespec sleep_time;
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;

View File

@ -12,27 +12,24 @@ class Stat1;
template <typename VType, typename NumType>
class Stat1MinMax;
typedef Stat1<float, float> Stat1_f;
typedef Stat1<float, float> Stat1_f;
typedef Stat1<double, double> Stat1_d;
typedef Stat1MinMax<float, float> Stat1MinMax_f;
typedef Stat1MinMax<float, float> Stat1MinMax_f;
typedef Stat1MinMax<double, double> Stat1MinMax_d;
template <typename VType> class Vector2;
template <typename VType> class Vector3;
template <typename VType> class Vector4;
template <typename VType>
class Vector2;
template <typename VType>
class Vector3;
template <typename VType>
class Vector4;
template <typename VType, typename NumType>
class Stat1 {
public:
typedef Stat1<VType, NumType> Self;
Stat1() {
Clear();
}
void Clear() {
numsamples_ = NumType();
sum_squares_ = sum_ = VType();
}
Stat1() { Clear(); }
// Create a sample of value dat and weight 1
explicit Stat1(const VType &dat) {
sum_ = dat;
@ -43,7 +40,7 @@ class Stat1 {
// and end(excluded)
explicit Stat1(const VType *begin, const VType *end) {
Clear();
for ( const VType *item = begin; item < end; ++item ) {
for (const VType *item = begin; item < end; ++item) {
(*this) += Stat1(*item);
}
}
@ -60,58 +57,60 @@ class Stat1 {
numsamples_ = stat.numsamples_;
}
inline Self &operator =(const Self &stat) {
void Clear() {
numsamples_ = NumType();
sum_squares_ = sum_ = VType();
}
Self& operator=(const Self &stat) {
sum_ = stat.sum_;
sum_squares_ = stat.sum_squares_;
numsamples_ = stat.numsamples_;
return (*this);
}
// Merge statistics from two sample sets.
inline Self &operator +=(const Self &stat) {
Self& operator+=(const Self &stat) {
sum_ += stat.sum_;
sum_squares_+= stat.sum_squares_;
sum_squares_ += stat.sum_squares_;
numsamples_ += stat.numsamples_;
return (*this);
}
// The operation opposite to +=
inline Self &operator -=(const Self &stat) {
Self& operator-=(const Self &stat) {
sum_ -= stat.sum_;
sum_squares_-= stat.sum_squares_;
sum_squares_ -= stat.sum_squares_;
numsamples_ -= stat.numsamples_;
return (*this);
}
// Multiply the weight of the set of samples by a factor k
inline Self &operator *=(const VType &k) {
Self& operator*=(const VType &k) {
sum_ *= k;
sum_squares_*= k;
sum_squares_ *= k;
numsamples_ *= k;
return (*this);
}
// Merge statistics from two sample sets.
inline Self operator + (const Self &stat) const {
return Self(*this) += stat;
}
Self operator+(const Self& stat) const { return Self(*this) += stat; }
// The operation opposite to +
inline Self operator - (const Self &stat) const {
return Self(*this) -= stat;
}
Self operator-(const Self& stat) const { return Self(*this) -= stat; }
// Multiply the weight of the set of samples by a factor k
inline Self operator * (const VType &k) const {
return Self(*this) *= k;
}
Self operator*(const VType& k) const { return Self(*this) *= k; }
// Return the total weight of this sample set
NumType NumSamples() const {
return numsamples_;
}
NumType numSamples() const { return numsamples_; }
// Return the sum of this sample set
VType Sum() const {
return sum_;
}
VType sum() const { return sum_; }
// Return the mean of this sample set
VType Mean() const {
if (numsamples_ == 0) return VType();
return sum_ * (1.0 / numsamples_);
}
// Return the mean of this sample set and compute the standard deviation at
// the same time.
VType Mean(VType *stddev) const {
@ -119,10 +118,11 @@ class Stat1 {
VType mean = sum_ * (1.0 / numsamples_);
if (stddev) {
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
*stddev = Sqrt(avg_squares - Sqr(mean));
*stddev = Sqrt(avg_squares - Sqr(mean));
}
return mean;
}
// Return the standard deviation of the sample set
VType StdDev() const {
if (numsamples_ == 0) return VType();
@ -130,10 +130,11 @@ class Stat1 {
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
return Sqrt(avg_squares - Sqr(mean));
}
private:
// Let i be the index of the samples provided (using +=)
// and weight[i],value[i] be the data of sample #i
// then the variables have the following meaning:
// Let i be the index of the samples provided (using +=)
// and weight[i],value[i] be the data of sample #i
// then the variables have the following meaning:
NumType numsamples_; // sum of weight[i];
VType sum_; // sum of weight[i]*value[i];
VType sum_squares_; // sum of weight[i]*value[i]^2;
@ -141,17 +142,18 @@ class Stat1 {
// Template function used to square a number.
// For a vector we square all components
template <typename SType>
static inline SType Sqr(const SType &dat) {
return dat * dat;
}
static inline SType Sqr(const SType &dat) { return dat * dat; }
template <typename SType>
static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
return dat.MulComponents(dat);
}
template <typename SType>
static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
return dat.MulComponents(dat);
}
template <typename SType>
static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
return dat.MulComponents(dat);
@ -162,20 +164,22 @@ class Stat1 {
template <typename SType>
static inline SType Sqrt(const SType &dat) {
// Avoid NaN due to imprecision in the calculations
if ( dat < 0 )
return 0;
if (dat < 0) return 0;
return sqrt(dat);
}
template <typename SType>
static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
// Avoid NaN due to imprecision in the calculations
return Max(dat, Vector2<SType>()).Sqrt();
}
template <typename SType>
static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
// Avoid NaN due to imprecision in the calculations
return Max(dat, Vector3<SType>()).Sqrt();
}
template <typename SType>
static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
// Avoid NaN due to imprecision in the calculations
@ -185,15 +189,12 @@ class Stat1 {
// Useful printing function
template <typename VType, typename NumType>
inline std::ostream& operator<<(std::ostream& out,
const Stat1<VType, NumType>& s) {
out << "{ avg = " << s.Mean()
<< " std = " << s.StdDev()
std::ostream& operator<<(std::ostream& out, const Stat1<VType, NumType>& s) {
out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
<< " nsamples = " << s.NumSamples() << "}";
return out;
}
// Stat1MinMax: same as Stat1, but it also
// keeps the Min and Max values; the "-"
// operator is disabled because it cannot be implemented
@ -203,9 +204,32 @@ class Stat1MinMax : public Stat1<VType, NumType> {
public:
typedef Stat1MinMax<VType, NumType> Self;
Stat1MinMax() {
Clear();
Stat1MinMax() { Clear(); }
// Create a sample of value dat and weight 1
explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
max_ = dat;
min_ = dat;
}
// Create statistics for all the samples between begin (included)
// and end(excluded)
explicit Stat1MinMax(const VType *begin, const VType *end) {
Clear();
for (const VType* item = begin; item < end; ++item) {
(*this) += Stat1MinMax(*item);
}
}
// Create a sample of value dat and weight w
Stat1MinMax(const VType &dat, const NumType &w)
: Stat1<VType, NumType>(dat, w) {
max_ = dat;
min_ = dat;
}
// Copy operator
Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
max_ = stat.max_;
min_ = stat.min_;
}
void Clear() {
Stat1<VType, NumType>::Clear();
if (std::numeric_limits<VType>::has_infinity) {
@ -216,90 +240,57 @@ class Stat1MinMax : public Stat1<VType, NumType> {
max_ = std::numeric_limits<VType>::min();
}
}
// Create a sample of value dat and weight 1
explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
max_ = dat;
min_ = dat;
}
// Create statistics for all the samples between begin (included)
// and end(excluded)
explicit Stat1MinMax(const VType *begin, const VType *end) {
Clear();
for ( const VType *item = begin; item < end; ++item ) {
(*this) += Stat1MinMax(*item);
}
}
// Create a sample of value dat and weight w
Stat1MinMax(const VType &dat, const NumType &w)
: Stat1<VType, NumType>(dat, w) {
max_ = dat;
min_ = dat;
}
// Copy operator
Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
max_ = stat.max_;
min_ = stat.min_;
}
inline Self &operator =(const Self &stat) {
Self& operator=(const Self& stat) {
this->Stat1<VType, NumType>::operator=(stat);
max_ = stat.max_;
min_ = stat.min_;
return (*this);
}
// Merge statistics from two sample sets.
inline Self &operator +=(const Self &stat) {
Self& operator+=(const Self& stat) {
this->Stat1<VType, NumType>::operator+=(stat);
if (stat.max_ > max_) max_ = stat.max_;
if (stat.min_ < min_) min_ = stat.min_;
return (*this);
}
// Multiply the weight of the set of samples by a factor k
inline Self &operator *=(const VType &stat) {
Self& operator*=(const VType& stat) {
this->Stat1<VType, NumType>::operator*=(stat);
return (*this);
}
// Merge statistics from two sample sets.
inline Self operator + (const Self &stat) const {
return Self(*this) += stat;
}
Self operator+(const Self& stat) const { return Self(*this) += stat; }
// Multiply the weight of the set of samples by a factor k
inline Self operator * (const VType &k) const {
return Self(*this) *= k;
}
Self operator*(const VType& k) const { return Self(*this) *= k; }
// Return the maximal value in this sample set
VType max() const { return max_; }
// Return the minimal value in this sample set
VType min() const { return min_; }
private:
// The - operation makes no sense with Min/Max
// unless we keep the full list of values (but we don't)
// make it private, and let it undefined so nobody can call it
Self &operator -=(const Self &stat); // senseless. let it undefined.
Self &operator-=(const Self& stat); // senseless. let it undefined.
// The operation opposite to -
Self operator - (const Self &stat) const; // senseless. let it undefined.
Self operator-(const Self& stat) const; // senseless. let it undefined.
public:
// Return the maximal value in this sample set
VType Max() const {
return max_;
}
// Return the minimal value in this sample set
VType Min() const {
return min_;
}
private:
// Let i be the index of the samples provided (using +=)
// and weight[i],value[i] be the data of sample #i
// then the variables have the following meaning:
VType max_; // max of value[i]
VType min_; // min of value[i]
// Let i be the index of the samples provided (using +=)
// and weight[i],value[i] be the data of sample #i
// then the variables have the following meaning:
VType max_; // max of value[i]
VType min_; // min of value[i]
};
// Useful printing function
template <typename VType, typename NumType>
inline std::ostream& operator <<(std::ostream& out,
const Stat1MinMax<VType, NumType>& s) {
out << "{ avg = " << s.Mean()
<< " std = " << s.StdDev()
<< " nsamples = " << s.NumSamples()
<< " min = " << s.Min()
std::ostream& operator<<(std::ostream& out,
const Stat1MinMax<VType, NumType>& s) {
out << "{ avg = " << s.Mean() << " std = " << s.StdDev()
<< " nsamples = " << s.NumSamples() << " min = " << s.Min()
<< " max = " << s.Max() << "}";
return out;
}

View File

@ -39,7 +39,7 @@ int64_t EstimateCyclesPerSecond(const int estimate_time_ms) {
// Helper function for reading an int from a file. Returns true if successful
// and the memory location pointed to by value is set to the value read.
bool ReadIntFromFile(const char *file, int *value) {
bool ReadIntFromFile(const char* file, int* value) {
bool ret = false;
int fd = open(file, O_RDONLY);
if (fd != -1) {
@ -76,10 +76,10 @@ void InitializeSystemInfo() {
// well.
if (!saw_mhz &&
ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
// The value is in kHz (as the file name suggests). For example, on a
// 2GHz warpstation, the file contains the value "2000000".
cpuinfo_cycles_per_second = freq * 1000.0;
saw_mhz = true;
// The value is in kHz (as the file name suggests). For example, on a
// 2GHz warpstation, the file contains the value "2000000".
cpuinfo_cycles_per_second = freq * 1000.0;
saw_mhz = true;
}
// If CPU scaling is in effect, we want to use the *maximum* frequency,
@ -101,7 +101,7 @@ void InitializeSystemInfo() {
if (!saw_mhz) {
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
}
return; // TODO: use generic tester instead?
return; // TODO: use generic tester instead?
}
double bogo_clock = 1.0;
@ -110,48 +110,47 @@ void InitializeSystemInfo() {
int num_cpus = 0;
line[0] = line[1] = '\0';
int chars_read = 0;
do { // we'll exit when the last read didn't read anything
do { // we'll exit when the last read didn't read anything
// Move the next line to the beginning of the buffer
const int oldlinelen = strlen(line);
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
line[0] = '\0';
else // still other lines left to save
memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
else // still other lines left to save
memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
// Terminate the new line, reading more if we can't find the newline
char* newline = strchr(line, '\n');
if (newline == NULL) {
const int linelen = strlen(line);
const int bytes_to_read = sizeof(line)-1 - linelen;
const int bytes_to_read = sizeof(line) - 1 - linelen;
CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
chars_read = read(fd, line + linelen, bytes_to_read);
line[linelen + chars_read] = '\0';
newline = strchr(line, '\n');
}
if (newline != NULL)
*newline = '\0';
if (newline != NULL) *newline = '\0';
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
// accept postive values. Some environments (virtual machines) report zero,
// which would cause infinite looping in WallTime_Init.
if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr) {
cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
saw_mhz = true;
}
} else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
} else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) {
const char* freqstr = strchr(line, ':');
if (freqstr) {
bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
saw_bogo = true;
}
} else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
} else if (strncasecmp(line, "processor", sizeof("processor") - 1) == 0) {
num_cpus++; // count up every time we see an "processor :" entry
const char* freqstr = strchr(line, ':');
if (freqstr) {
const int cpu_id = strtol(freqstr+1, &err, 10);
const int cpu_id = strtol(freqstr + 1, &err, 10);
if (freqstr[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
max_cpu_id = cpu_id;
}
@ -181,17 +180,17 @@ void InitializeSystemInfo() {
}
#elif defined OS_FREEBSD
// For this sysctl to work, the machine must be configured without
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
// and later. Before that, it's a 32-bit quantity (and gives the
// wrong answer on machines faster than 2^32 Hz). See
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
// But also compare FreeBSD 7.0:
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
// To FreeBSD 6.3 (it's the same in 6-STABLE):
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
// For this sysctl to work, the machine must be configured without
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
// and later. Before that, it's a 32-bit quantity (and gives the
// wrong answer on machines faster than 2^32 Hz). See
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
// But also compare FreeBSD 7.0:
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
// To FreeBSD 6.3 (it's the same in 6-STABLE):
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
#if __FreeBSD__ >= 7
uint64_t hz = 0;
#else
@ -199,31 +198,31 @@ void InitializeSystemInfo() {
#endif
size_t sz = sizeof(hz);
const char *sysctl_path = "machdep.tsc_freq";
if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
if (sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0) {
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
sysctl_path, strerror(errno));
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
} else {
cpuinfo_cycles_per_second = hz;
}
// TODO: also figure out cpuinfo_num_cpus
// TODO: also figure out cpuinfo_num_cpus
#elif defined OS_WINDOWS
# pragma comment(lib, "shlwapi.lib") // for SHGetValue()
#pragma comment(lib, "shlwapi.lib") // for SHGetValue()
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
// then make a crude estimate.
OSVERSIONINFO os;
os.dwOSVersionInfoSize = sizeof(os);
DWORD data, data_size = sizeof(data);
if (GetVersionEx(&os) &&
os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"~MHz", NULL, &data, &data_size)))
cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
if (GetVersionEx(&os) && os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
SUCCEEDED(
SHGetValueA(HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
"~MHz", NULL, &data, &data_size)))
cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
else
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
// TODO: also figure out cpuinfo_num_cpus
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
// TODO: also figure out cpuinfo_num_cpus
#elif defined OS_MACOSX
// returning "mach time units" per second. the current number of elapsed
@ -243,10 +242,10 @@ void InitializeSystemInfo() {
int num_cpus = 0;
size_t size = sizeof(num_cpus);
int numcpus_name[] = { CTL_HW, HW_NCPU };
if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0)
== 0
&& (size == sizeof(num_cpus)))
int numcpus_name[] = {CTL_HW, HW_NCPU};
if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0) ==
0 &&
(size == sizeof(num_cpus)))
cpuinfo_num_cpus = num_cpus;
#else
@ -261,16 +260,16 @@ void InitializeSystemInfo() {
static double MyCPUUsageRUsage() {
struct rusage ru;
if (getrusage(RUSAGE_SELF, &ru) == 0) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
} else {
return 0.0;
}
}
static bool MyCPUUsageCPUTimeNsLocked(double *cputime) {
static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
static int cputime_fd = -1;
if (cputime_fd == -1) {
cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
@ -281,7 +280,7 @@ static bool MyCPUUsageCPUTimeNsLocked(double *cputime) {
}
char buff[64];
memset(buff, 0, sizeof(buff));
if (pread(cputime_fd, buff, sizeof(buff)-1, 0) <= 0) {
if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
close(cputime_fd);
cputime_fd = -1;
return false;
@ -316,10 +315,10 @@ double MyCPUUsage() {
double ChildrenCPUUsage() {
struct rusage ru;
if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
return (static_cast<double>(ru.ru_utime.tv_sec) +
static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
static_cast<double>(ru.ru_stime.tv_sec) +
static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
} else {
return 0.0;
}

View File

@ -75,8 +75,8 @@ void Initialize() {
cycles_per_second = static_cast<int64_t>(CyclesPerSecond());
CHECK(cycles_per_second != 0);
seconds_per_cycle = 1.0 / cycles_per_second;
max_interval_cycles = static_cast<int64_t>(
cycles_per_second * kMaxErrorInterval);
max_interval_cycles =
static_cast<int64_t>(cycles_per_second * kMaxErrorInterval);
do {
base_cycletime = cycleclock::Now();
base_walltime = Slow();
@ -90,8 +90,7 @@ void Initialize() {
}
WallTime Now() {
if (!std::atomic_load(&initialized))
return Slow();
if (!std::atomic_load(&initialized)) return Slow();
WallTime now = 0.0;
WallTime result = 0.0;
@ -105,7 +104,7 @@ WallTime Now() {
top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
// Recompute drift no more often than every 2^32 cycles.
// I.e., @2GHz, ~ every two seconds
if (top_bits == last_adjust_time) { // don't need to recompute drift
if (top_bits == last_adjust_time) { // don't need to recompute drift
return result + GetDrift();
}
@ -119,8 +118,8 @@ WallTime Now() {
return now;
}
std::string Print(WallTime time, const char *format, bool local,
int *remainder_us) {
std::string Print(WallTime time, const char* format, bool local,
int* remainder_us) {
char storage[32];
struct tm split;
double subsecond;
@ -130,7 +129,7 @@ std::string Print(WallTime time, const char *format, bool local,
if (remainder_us != NULL) {
*remainder_us = static_cast<int>((subsecond * 1000000) + 0.5);
if (*remainder_us > 999999) *remainder_us = 999999;
if (*remainder_us < 0) *remainder_us = 0;
if (*remainder_us < 0) *remainder_us = 0;
}
strftime(storage, sizeof(storage), format, &split);
}