mirror of
https://github.com/google/benchmark.git
synced 2025-01-13 21:30:14 +08:00
Initial commit
Benchmark library builds and runs but only single-threaded. Multithreaded support needs a bit more love. Currently requires some C++11 support (g++ 4.6.3 seems to work).
This commit is contained in:
commit
403f354423
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
CMakeCache.txt
|
||||
CMakeFiles/
|
||||
Makefile
|
||||
bin/
|
||||
cmake_install.cmake
|
||||
lib/
|
43
CMakeLists.txt
Normal file
43
CMakeLists.txt
Normal file
@ -0,0 +1,43 @@
|
||||
cmake_minimum_required (VERSION 2.8)
|
||||
project (benchmark)
|
||||
|
||||
find_package(Threads)
|
||||
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "-Wall -Werror --std=c++0x")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-fno-strict-aliasing -O3 -DNDEBUG")
|
||||
|
||||
# Set OS
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
add_definitions(-DOS_MACOSX)
|
||||
endif()
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
add_definitions(-DOS_LINUX)
|
||||
endif()
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
|
||||
add_definitions(-DOS_WINDOWS)
|
||||
endif()
|
||||
|
||||
# Set CPU
|
||||
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86")
|
||||
add_definitions(-DARCH_X86)
|
||||
endif()
|
||||
|
||||
# Set up directories
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/src)
|
||||
link_directories(${PROJECT_SOURCE_DIR}/lib)
|
||||
|
||||
# Build the targets
|
||||
FILE(GLOB SOURCE_FILES "src/*.cc")
|
||||
add_library(benchmark STATIC ${SOURCE_FILES})
|
||||
|
||||
add_executable(benchmark_test test/benchmark_test.cc)
|
||||
target_link_libraries(benchmark_test benchmark ${CMAKE_THREAD_LIBS_INIT})
|
||||
|
540
include/benchmark/benchmark.h
Normal file
540
include/benchmark/benchmark.h
Normal file
@ -0,0 +1,540 @@
|
||||
// Support for registering benchmarks for functions.
|
||||
|
||||
/* Example usage:
|
||||
// Define a function that executes the code to be measured a
|
||||
// specified number of times:
|
||||
static void BM_StringCreation(benchmark::State& state) {
|
||||
while (state.KeepRunning())
|
||||
std::string empty_string;
|
||||
}
|
||||
|
||||
// Register the function as a benchmark
|
||||
BENCHMARK(BM_StringCreation);
|
||||
|
||||
// Define another benchmark
|
||||
static void BM_StringCopy(benchmark::State& state) {
|
||||
std::string x = "hello";
|
||||
while (state.KeepRunning())
|
||||
std::string copy(x);
|
||||
}
|
||||
BENCHMARK(BM_StringCopy);
|
||||
|
||||
// Augment the main() program to invoke benchmarks if specified
|
||||
// via the --benchmarks command line flag. E.g.,
|
||||
// my_unittest --benchmarks=all
|
||||
// my_unittest --benchmarks=BM_StringCreation
|
||||
// my_unittest --benchmarks=String
|
||||
// my_unittest --benchmarks='Copy|Creation'
|
||||
int main(int argc, char** argv) {
|
||||
Initialize(&argc, argv);
|
||||
|
||||
RunSpecifiedBenchmarks();
|
||||
}
|
||||
|
||||
// Sometimes a family of microbenchmarks can be implemented with
|
||||
// just one routine that takes an extra argument to specify which
|
||||
// one of the family of benchmarks to run. For example, the following
|
||||
// code defines a family of microbenchmarks for measuring the speed
|
||||
// of memcpy() calls of different lengths:
|
||||
|
||||
static void BM_memcpy(benchmark::State& state) {
|
||||
char* src = new char[state.range_x()]; char* dst = new char[state.range_x()];
|
||||
memset(src, 'x', state.range_x());
|
||||
while (state.KeepRunning()) {
|
||||
memcpy(dst, src, state.range_x());
|
||||
SetBenchmarkBytesProcessed(int64_t_t(state.iterations) * int64(state.range_x()));
|
||||
delete[] src; delete[] dst;
|
||||
}
|
||||
BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
|
||||
|
||||
// The preceding code is quite repetitive, and can be replaced with the
|
||||
// following short-hand. The following invocation will pick a few
|
||||
// appropriate arguments in the specified range and will generate a
|
||||
// microbenchmark for each such argument.
|
||||
BENCHMARK(BM_memcpy)->Range(8, 8<<10);
|
||||
|
||||
// You might have a microbenchmark that depends on two inputs. For
|
||||
// example, the following code defines a family of microbenchmarks for
|
||||
// measuring the speed of set insertion.
|
||||
static void BM_SetInsert(benchmark::State& state) {
|
||||
while (state.KeepRunning()) {
|
||||
state.PauseTiming();
|
||||
set<int> data = ConstructRandomSet(state.range_x());
|
||||
state.ResumeTiming();
|
||||
for (int j = 0; j < state.rangeY; ++j)
|
||||
data.insert(RandomNumber());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_SetInsert)
|
||||
->ArgPair(1<<10, 1)
|
||||
->ArgPair(1<<10, 8)
|
||||
->ArgPair(1<<10, 64)
|
||||
->ArgPair(1<<10, 512)
|
||||
->ArgPair(8<<10, 1)
|
||||
->ArgPair(8<<10, 8)
|
||||
->ArgPair(8<<10, 64)
|
||||
->ArgPair(8<<10, 512);
|
||||
|
||||
// The preceding code is quite repetitive, and can be replaced with
|
||||
// the following short-hand. The following macro will pick a few
|
||||
// appropriate arguments in the product of the two specified ranges
|
||||
// and will generate a microbenchmark for each such pair.
|
||||
BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
|
||||
|
||||
// For more complex patterns of inputs, passing a custom function
|
||||
// to Apply allows programmatic specification of an
|
||||
// arbitrary set of arguments to run the microbenchmark on.
|
||||
// The following example enumerates a dense range on
|
||||
// one parameter, and a sparse range on the second.
|
||||
static benchmark::internal::Benchmark* CustomArguments(
|
||||
benchmark::internal::Benchmark* b) {
|
||||
for (int i = 0; i <= 10; ++i)
|
||||
for (int j = 32; j <= 1024*1024; j *= 8)
|
||||
b = b->ArgPair(i, j);
|
||||
return b;
|
||||
}
|
||||
BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
|
||||
|
||||
// Templated microbenchmarks work the same way:
|
||||
// Produce then consume 'size' messages 'iters' times
|
||||
// Measures throughput in the absence of multiprogramming.
|
||||
template <class Q> int BM_Sequential(benchmark::State& state) {
|
||||
Q q;
|
||||
typename Q::value_type v;
|
||||
while (state.KeepRunning()) {
|
||||
for (int i = state.range_x(); i--; )
|
||||
q.push(v);
|
||||
for (int e = state.range_x(); e--; )
|
||||
q.Wait(&v);
|
||||
}
|
||||
// actually messages, not bytes:
|
||||
state.SetBytesProcessed(
|
||||
static_cast<int64_t>(state.iterations())*state.range_x());
|
||||
}
|
||||
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
|
||||
|
||||
In a multithreaded test, it is guaranteed that none of the threads will start
|
||||
until all have called KeepRunning, and all will have finished before KeepRunning
|
||||
returns false. As such, any global setup or teardown you want to do can be
|
||||
wrapped in a check against the thread index:
|
||||
|
||||
static void BM_MultiThreaded(benchmark::State& state) {
|
||||
if (state.thread_index == 0) {
|
||||
// Setup code here.
|
||||
}
|
||||
while (state.KeepRunning()) {
|
||||
// Run the test as normal.
|
||||
}
|
||||
if (state.thread_index == 0) {
|
||||
// Teardown code here.
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#ifndef BENCHMARK_BENCHMARK_H_
|
||||
#define BENCHMARK_BENCHMARK_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "macros.h"
|
||||
|
||||
namespace benchmark {
|
||||
// If the --benchmarks flag is empty, do nothing.
|
||||
//
|
||||
// Otherwise, run all benchmarks specified by the --benchmarks flag,
|
||||
// and exit after running the benchmarks.
|
||||
extern void RunSpecifiedBenchmarks();
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Routines that can be called from within a benchmark
|
||||
|
||||
//
|
||||
// REQUIRES: a benchmark is currently executing
|
||||
extern void SetLabel(const std::string& label);
|
||||
|
||||
// If this routine is called, peak memory allocation past this point in the
|
||||
// benchmark is reported at the end of the benchmark report line. (It is
|
||||
// computed by running the benchmark once with a single iteration and a memory
|
||||
// tracer.)
|
||||
extern void MemoryUsage();
|
||||
|
||||
// If a particular benchmark is I/O bound, or if for some reason CPU
|
||||
// timings are not representative, call this method from within the
|
||||
// benchmark routine. If called, the elapsed time will be used to
|
||||
// control how many iterations are run, and in the printing of
|
||||
// items/second or MB/seconds values. If not called, the cpu time
|
||||
// used by the benchmark will be used.
|
||||
extern void UseRealTime();
|
||||
|
||||
namespace internal {
|
||||
class Benchmark;
|
||||
}
|
||||
|
||||
// State is passed to a running Benchmark and contains state for the
|
||||
// benchmark to use.
|
||||
class State {
|
||||
public:
|
||||
// Returns true iff the benchmark should continue through another iteration.
|
||||
bool KeepRunning();
|
||||
|
||||
void PauseTiming();
|
||||
void ResumeTiming();
|
||||
|
||||
// Set the number of bytes processed by the current benchmark
|
||||
// execution. This routine is typically called once at the end of a
|
||||
// throughput oriented benchmark. If this routine is called with a
|
||||
// value > 0, the report is printed in MB/sec instead of nanoseconds
|
||||
// per iteration.
|
||||
//
|
||||
// REQUIRES: a benchmark has exited its KeepRunning loop.
|
||||
void SetBytesProcessed(int64_t bytes);
|
||||
|
||||
// If this routine is called with items > 0, then an items/s
|
||||
// label is printed on the benchmark report line for the currently
|
||||
// executing benchmark. It is typically called at the end of a processing
|
||||
// benchmark where a processing items/second output is desired.
|
||||
//
|
||||
// REQUIRES: a benchmark has exited its KeepRunning loop.
|
||||
void SetItemsProcessed(int64_t items);
|
||||
|
||||
// If this routine is called, the specified label is printed at the
|
||||
// end of the benchmark report line for the currently executing
|
||||
// benchmark. Example:
|
||||
// static void BM_Compress(int iters) {
|
||||
// ...
|
||||
// double compress = input_size / output_size;
|
||||
// benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
|
||||
// }
|
||||
// Produces output that looks like:
|
||||
// BM_Compress 50 50 14115038 compress:27.3%
|
||||
//
|
||||
// REQUIRES: a benchmark has exited its KeepRunning loop.
|
||||
void SetLabel(const std::string& label);
|
||||
|
||||
// Range arguments for this run. CHECKs if the argument has been set.
|
||||
int range_x() const;
|
||||
int range_y() const;
|
||||
|
||||
int iterations() const { return total_iterations_; }
|
||||
|
||||
const int thread_index;
|
||||
|
||||
private:
|
||||
class FastClock;
|
||||
struct SharedState;
|
||||
|
||||
State(FastClock* clock, SharedState* s, int t);
|
||||
bool StartRunning();
|
||||
bool FinishInterval();
|
||||
bool MaybeStop();
|
||||
void NewInterval();
|
||||
bool AllStarting();
|
||||
bool RunAnotherInterval() const;
|
||||
|
||||
void Run();
|
||||
|
||||
enum EState {
|
||||
STATE_INITIAL, // KeepRunning hasn't been called
|
||||
STATE_STARTING, // KeepRunning called, waiting for other threads
|
||||
STATE_RUNNING, // Running and being timed
|
||||
STATE_STOPPING, // Not being timed but waiting for other threads
|
||||
STATE_STOPPED, // Stopped
|
||||
} state_;
|
||||
|
||||
FastClock* clock_;
|
||||
|
||||
// State shared by all BenchmarkRun objects that belong to the same
|
||||
// BenchmarkInstance
|
||||
SharedState* shared_;
|
||||
|
||||
// Custom label set by the user.
|
||||
std::string label_;
|
||||
|
||||
// Each State object goes through a sequence of measurement intervals. By
|
||||
// default each interval is approx. 100ms in length. The following stats are
|
||||
// kept for each interval.
|
||||
int64_t iterations_;
|
||||
double start_cpu_;
|
||||
double start_time_;
|
||||
int64_t stop_time_micros_;
|
||||
|
||||
double start_pause_;
|
||||
double pause_time_;
|
||||
|
||||
// Total number of iterations for all finished runs.
|
||||
int64_t total_iterations_;
|
||||
|
||||
// Approximate time in microseconds for one interval of execution.
|
||||
// Dynamically adjusted as needed.
|
||||
int64_t interval_micros_;
|
||||
|
||||
// True if the current interval is the continuation of a previous one.
|
||||
bool is_continuation_;
|
||||
|
||||
friend class internal::Benchmark;
|
||||
DISALLOW_COPY_AND_ASSIGN(State);
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
class BenchmarkReporter;
|
||||
|
||||
typedef std::function<void(State&)> BenchmarkFunction;
|
||||
|
||||
// Run all benchmarks whose name is a partial match for the regular
|
||||
// expression in "spec". The results of benchmark runs are fed to "reporter".
|
||||
void RunMatchingBenchmarks(const std::string& spec,
|
||||
BenchmarkReporter* reporter);
|
||||
|
||||
// Extract the list of benchmark names that match the specified regular
|
||||
// expression.
|
||||
void FindMatchingBenchmarkNames(const std::string& re,
|
||||
std::vector<std::string>* benchmark_names);
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Benchmark registration object. The BENCHMARK() macro expands
|
||||
// into an internal::Benchmark* object. Various methods can
|
||||
// be called on this object to change the properties of the benchmark.
|
||||
// Each method returns "this" so that multiple method calls can
|
||||
// chained into one expression.
|
||||
class Benchmark {
|
||||
public:
|
||||
// The Benchmark takes ownership of the Callback pointed to by f.
|
||||
Benchmark(const char* name, BenchmarkFunction f);
|
||||
|
||||
~Benchmark();
|
||||
|
||||
// Note: the following methods all return "this" so that multiple
|
||||
// method calls can be chained together in one expression.
|
||||
|
||||
// Run this benchmark once with "x" as the extra argument passed
|
||||
// to the function.
|
||||
// REQUIRES: The function passed to the constructor must accept an arg1.
|
||||
Benchmark* Arg(int x);
|
||||
|
||||
// Run this benchmark once for a number of values picked from the
|
||||
// range [start..limit]. (start and limit are always picked.)
|
||||
// REQUIRES: The function passed to the constructor must accept an arg1.
|
||||
Benchmark* Range(int start, int limit);
|
||||
|
||||
// Run this benchmark once for every value in the range [start..limit]
|
||||
// REQUIRES: The function passed to the constructor must accept an arg1.
|
||||
Benchmark* DenseRange(int start, int limit);
|
||||
|
||||
// Run this benchmark once with "x,y" as the extra arguments passed
|
||||
// to the function.
|
||||
// REQUIRES: The function passed to the constructor must accept arg1,arg2.
|
||||
Benchmark* ArgPair(int x, int y);
|
||||
|
||||
// Pick a set of values A from the range [lo1..hi1] and a set
|
||||
// of values B from the range [lo2..hi2]. Run the benchmark for
|
||||
// every pair of values in the cartesian product of A and B
|
||||
// (i.e., for all combinations of the values in A and B).
|
||||
// REQUIRES: The function passed to the constructor must accept arg1,arg2.
|
||||
Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2);
|
||||
|
||||
// Pass this benchmark object to *func, which can customize
|
||||
// the benchmark by calling various methods like Arg, ArgPair,
|
||||
// Threads, etc.
|
||||
Benchmark* Apply(void (*func)(Benchmark* benchmark));
|
||||
|
||||
// Support for running multiple copies of the same benchmark concurrently
|
||||
// in multiple threads. This may be useful when measuring the scaling
|
||||
// of some piece of code.
|
||||
|
||||
// Run one instance of this benchmark concurrently in t threads.
|
||||
Benchmark* Threads(int t);
|
||||
|
||||
// Pick a set of values T from [min_threads,max_threads].
|
||||
// min_threads and max_threads are always included in T. Run this
|
||||
// benchmark once for each value in T. The benchmark run for a
|
||||
// particular value t consists of t threads running the benchmark
|
||||
// function concurrently. For example, consider:
|
||||
// BENCHMARK(Foo)->ThreadRange(1,16);
|
||||
// This will run the following benchmarks:
|
||||
// Foo in 1 thread
|
||||
// Foo in 2 threads
|
||||
// Foo in 4 threads
|
||||
// Foo in 8 threads
|
||||
// Foo in 16 threads
|
||||
Benchmark* ThreadRange(int min_threads, int max_threads);
|
||||
|
||||
// Equivalent to ThreadRange(NumCPUs(), NumCPUs())
|
||||
Benchmark* ThreadPerCpu();
|
||||
|
||||
// TODO(dominich): Control whether or not real-time is used for this benchmark
|
||||
// TODO(dominich): Control the default number of iterations
|
||||
|
||||
// -------------------------------
|
||||
// Following methods are not useful for clients
|
||||
|
||||
// Used inside the benchmark implementation
|
||||
struct Instance;
|
||||
struct ThreadStats;
|
||||
|
||||
// Extract the list of benchmark instances that match the specified
|
||||
// regular expression.
|
||||
static void FindBenchmarks(const std::string& re,
|
||||
std::vector<Instance>* benchmarks);
|
||||
|
||||
// Measure the overhead of an empty benchmark to subtract later.
|
||||
static void MeasureOverhead();
|
||||
|
||||
private:
|
||||
std::vector<Benchmark::Instance> CreateBenchmarkInstances(int rangeXindex,
|
||||
int rangeYindex);
|
||||
|
||||
std::string name_;
|
||||
BenchmarkFunction function_;
|
||||
int registration_index_;
|
||||
std::vector<int> rangeX_;
|
||||
std::vector<int> rangeY_;
|
||||
std::vector<int> thread_counts_;
|
||||
|
||||
// Special value placed in thread_counts_ to stand for NumCPUs()
|
||||
static const int kNumCpuMarker = -1;
|
||||
|
||||
// Special value used to indicate that no range is required.
|
||||
static const int kNoRange = -1;
|
||||
|
||||
static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
|
||||
static double MeasurePeakHeapMemory(const Instance& b);
|
||||
static void RunInstance(const Instance& b, BenchmarkReporter* br);
|
||||
friend class ::benchmark::State;
|
||||
friend struct ::benchmark::internal::Benchmark::Instance;
|
||||
friend void ::benchmark::internal::RunMatchingBenchmarks(
|
||||
const std::string&, BenchmarkReporter*);
|
||||
DISALLOW_COPY_AND_ASSIGN(Benchmark);
|
||||
};
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Benchmarks reporter interface + data containers.
|
||||
|
||||
struct BenchmarkContextData {
|
||||
int num_cpus;
|
||||
double mhz_per_cpu;
|
||||
//std::string cpu_info;
|
||||
bool cpu_scaling_enabled;
|
||||
|
||||
// The number of chars in the longest benchmark name.
|
||||
int name_field_width;
|
||||
};
|
||||
|
||||
struct BenchmarkRunData {
|
||||
BenchmarkRunData() :
|
||||
thread_index(-1),
|
||||
iterations(1),
|
||||
real_accumulated_time(0),
|
||||
cpu_accumulated_time(0),
|
||||
bytes_per_second(0),
|
||||
items_per_second(0),
|
||||
max_heapbytes_used(0) {}
|
||||
|
||||
std::string benchmark_name;
|
||||
std::string report_label;
|
||||
int thread_index;
|
||||
int64_t iterations;
|
||||
double real_accumulated_time;
|
||||
double cpu_accumulated_time;
|
||||
|
||||
// Zero if not set by benchmark.
|
||||
double bytes_per_second;
|
||||
double items_per_second;
|
||||
|
||||
// This is set to 0.0 if memory tracing is not enabled.
|
||||
double max_heapbytes_used;
|
||||
};
|
||||
|
||||
// Interface for custom benchmark result printers.
|
||||
// By default, benchmark reports are printed to stdout. However an application
|
||||
// can control the destination of the reports by calling
|
||||
// RunMatchingBenchmarks and passing it a custom reporter object.
|
||||
// The reporter object must implement the following interface.
|
||||
class BenchmarkReporter {
|
||||
public:
|
||||
// Called once for every suite of benchmarks run.
|
||||
// The parameter "context" contains information that the
|
||||
// reporter may wish to use when generating its report, for example the
|
||||
// platform under which the benchmarks are running. The benchmark run is
|
||||
// never started if this function returns false, allowing the reporter
|
||||
// to skip runs based on the context information.
|
||||
virtual bool ReportContext(const BenchmarkContextData& context) = 0;
|
||||
|
||||
// Called once for each group of benchmark runs, gives information about
|
||||
// cpu-time and heap memory usage during the benchmark run.
|
||||
// Note that all the grouped benchmark runs should refer to the same
|
||||
// benchmark, thus have the same name.
|
||||
virtual void ReportRuns(const std::vector<BenchmarkRunData>& report) = 0;
|
||||
|
||||
virtual ~BenchmarkReporter();
|
||||
};
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Internal implementation details follow; please ignore
|
||||
|
||||
// Given a collection of reports, computes their mean and stddev.
|
||||
// REQUIRES: all runs in "reports" must be from the same benchmark.
|
||||
void ComputeStats(const std::vector<BenchmarkRunData>& reports,
|
||||
BenchmarkRunData* mean_data,
|
||||
BenchmarkRunData* stddev_data);
|
||||
|
||||
// Simple reporter that outputs benchmark data to the console. This is the
|
||||
// default reporter used by RunSpecifiedBenchmarks().
|
||||
class ConsoleReporter : public BenchmarkReporter {
|
||||
public:
|
||||
virtual bool ReportContext(const BenchmarkContextData& context);
|
||||
virtual void ReportRuns(const std::vector<BenchmarkRunData>& reports);
|
||||
private:
|
||||
std::string PrintMemoryUsage(double bytes);
|
||||
virtual void PrintRunData(const BenchmarkRunData& report);
|
||||
int name_field_width_;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
void Initialize(int* argc, const char** argv);
|
||||
} // end namespace benchmark
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Macro to register benchmarks
|
||||
|
||||
// Helpers for generating unique variable names
|
||||
#define BENCHMARK_CONCAT(a, b, c) BENCHMARK_CONCAT2(a, b, c)
|
||||
#define BENCHMARK_CONCAT2(a, b, c) a ## b ## c
|
||||
|
||||
#define BENCHMARK(n) \
|
||||
static ::benchmark::internal::Benchmark* \
|
||||
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
(new ::benchmark::internal::Benchmark(#n, n))
|
||||
|
||||
// Old-style macros
|
||||
#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
|
||||
#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
|
||||
#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
|
||||
#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
|
||||
BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
|
||||
|
||||
// This will register a benchmark for a templatized function. For example:
|
||||
//
|
||||
// template<int arg>
|
||||
// void BM_Foo(int iters);
|
||||
//
|
||||
// BENCHMARK_TEMPLATE(BM_Foo, 1);
|
||||
//
|
||||
// will register BM_Foo<1> as a benchmark.
|
||||
#define BENCHMARK_TEMPLATE(n, a) \
|
||||
static ::benchmark::internal::Benchmark* \
|
||||
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
(new ::benchmark::internal::Benchmark(#n "<" #a ">", n<a>))
|
||||
|
||||
#define BENCHMARK_TEMPLATE2(n, a, b) \
|
||||
static ::benchmark::internal::Benchmark* \
|
||||
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||
(new ::benchmark::internal::Benchmark(#n "<" #a "," #b ">", n<a, b>))
|
||||
|
||||
#endif // BENCHMARK_BENCHMARK_H_
|
||||
|
120
include/benchmark/macros.h
Normal file
120
include/benchmark/macros.h
Normal file
@ -0,0 +1,120 @@
|
||||
#ifndef BENCHMARK_MACROS_H_
|
||||
#define BENCHMARK_MACROS_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&); \
|
||||
void operator=(const TypeName&);
|
||||
|
||||
// The arraysize(arr) macro returns the # of elements in an array arr.
|
||||
// The expression is a compile-time constant, and therefore can be
|
||||
// used in defining new arrays, for example. If you use arraysize on
|
||||
// a pointer by mistake, you will get a compile-time error.
|
||||
//
|
||||
// One caveat is that, for C++03, arraysize() doesn't accept any array of
|
||||
// an anonymous type or a type defined inside a function. In these rare
|
||||
// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
|
||||
// due to a limitation in C++03's template system. The limitation has
|
||||
// been removed in C++11.
|
||||
|
||||
// This template function declaration is used in defining arraysize.
|
||||
// Note that the function doesn't need an implementation, as we only
|
||||
// use its type.
|
||||
template <typename T, size_t N>
|
||||
char (&ArraySizeHelper(T (&array)[N]))[N];
|
||||
|
||||
// That gcc wants both of these prototypes seems mysterious. VC, for
|
||||
// its part, can't decide which to use (another mystery). Matching of
|
||||
// template overloads: the final frontier.
|
||||
#ifndef COMPILER_MSVC
|
||||
template <typename T, size_t N>
|
||||
char (&ArraySizeHelper(const T (&array)[N]))[N];
|
||||
#endif
|
||||
|
||||
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
|
||||
|
||||
// The STATIC_ASSERT macro can be used to verify that a compile time
|
||||
// expression is true. For example, you could use it to verify the
|
||||
// size of a static array:
|
||||
//
|
||||
// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
|
||||
// content_type_names_incorrect_size);
|
||||
//
|
||||
// or to make sure a struct is smaller than a certain size:
|
||||
//
|
||||
// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
|
||||
//
|
||||
// The second argument to the macro is the name of the variable. If
|
||||
// the expression is false, most compilers will issue a warning/error
|
||||
// containing the name of the variable.
|
||||
|
||||
template <bool>
|
||||
struct StaticAssert {
|
||||
};
|
||||
|
||||
#define STATIC_ASSERT(expr, msg) \
|
||||
typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
|
||||
|
||||
// Implementation details of STATIC_ASSERT:
|
||||
//
|
||||
// - STATIC_ASSERT works by defining an array type that has -1
|
||||
// elements (and thus is invalid) when the expression is false.
|
||||
//
|
||||
// - The simpler definition
|
||||
//
|
||||
// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
|
||||
//
|
||||
// does not work, as gcc supports variable-length arrays whose sizes
|
||||
// are determined at run-time (this is gcc's extension and not part
|
||||
// of the C++ standard). As a result, gcc fails to reject the
|
||||
// following code with the simple definition:
|
||||
//
|
||||
// int foo;
|
||||
// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
|
||||
// // not a compile-time constant.
|
||||
//
|
||||
// - By using the type StaticAssert<(bool(expr))>, we ensures that
|
||||
// expr is a compile-time constant. (Template arguments must be
|
||||
// determined at compile-time.)
|
||||
//
|
||||
// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
|
||||
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
|
||||
//
|
||||
// StaticAssert<bool(expr)>
|
||||
//
|
||||
// instead, these compilers will refuse to compile
|
||||
//
|
||||
// STATIC_ASSERT(5 > 0, some_message);
|
||||
//
|
||||
// (They seem to think the ">" in "5 > 0" marks the end of the
|
||||
// template argument list.)
|
||||
//
|
||||
// - The array size is (bool(expr) ? 1 : -1), instead of simply
|
||||
//
|
||||
// ((expr) ? 1 : -1).
|
||||
//
|
||||
// This is to avoid running into a bug in MS VC 7.1, which
|
||||
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
|
||||
|
||||
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
|
||||
#define CHECK_EQ(a, b) CHECK((a) == (b))
|
||||
#define CHECK_GE(a, b) CHECK((a) >= (b))
|
||||
#define CHECK_LE(a, b) CHECK((a) <= (b))
|
||||
#define CHECK_GT(a, b) CHECK((a) > (b))
|
||||
#define CHECK_LT(a, b) CHECK((a) < (b))
|
||||
|
||||
//
|
||||
// Prevent the compiler from complaining about or optimizing away variables
|
||||
// that appear unused.
|
||||
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
|
||||
|
||||
//
|
||||
// For functions we want to force inline or not inline.
|
||||
// Introduced in gcc 3.1.
|
||||
#define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline))
|
||||
#define HAVE_ATTRIBUTE_ALWAYS_INLINE 1
|
||||
#define ATTRIBUTE_NOINLINE __attribute__ ((noinline))
|
||||
#define HAVE_ATTRIBUTE_NOINLINE 1
|
||||
|
||||
#endif // BENCHMARK_MACROS_H_
|
1197
src/benchmark.cc
Normal file
1197
src/benchmark.cc
Normal file
File diff suppressed because it is too large
Load Diff
82
src/colorprint.cc
Normal file
82
src/colorprint.cc
Normal file
@ -0,0 +1,82 @@
|
||||
#include "colorprint.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "commandlineflags.h"
|
||||
|
||||
DECLARE_bool(color_print);
|
||||
|
||||
namespace {
|
||||
#ifdef OS_WINDOWS
|
||||
typedef WORD PlatformColorCode;
|
||||
#else
|
||||
typedef const char* PlatformColorCode;
|
||||
#endif
|
||||
|
||||
PlatformColorCode GetPlatformColorCode(LogColor color) {
|
||||
#ifdef OS_WINDOWS
|
||||
switch (color) {
|
||||
case COLOR_RED: return FOREGROUND_RED;
|
||||
case COLOR_GREEN: return FOREGROUND_GREEN;
|
||||
case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
|
||||
case COLOR_BLUE: return FOREGROUND_BLUE;
|
||||
case COLOR_MAGENTA: return FOREGROUND_BLUE | FOREGROUND_RED;
|
||||
case COLOR_CYAN: return FOREGROUND_BLUE | FOREGROUND_GREEN;
|
||||
case COLOR_WHITE: // fall through to default
|
||||
default: return 0;
|
||||
}
|
||||
#else
|
||||
switch (color) {
|
||||
case COLOR_RED: return "1";
|
||||
case COLOR_GREEN: return "2";
|
||||
case COLOR_YELLOW: return "3";
|
||||
case COLOR_BLUE: return "4";
|
||||
case COLOR_MAGENTA: return "5";
|
||||
case COLOR_CYAN: return "6";
|
||||
case COLOR_WHITE: return "7";
|
||||
default: return NULL;
|
||||
};
|
||||
#endif
|
||||
}
|
||||
} // end namespace
|
||||
|
||||
void ColorPrintf(LogColor color, const char* fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
|
||||
if (!FLAGS_color_print) {
|
||||
vprintf(fmt, args);
|
||||
va_end(args);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
|
||||
// Gets the current text color.
|
||||
CONSOLE_SCREEN_BUFFER_INFO buffer_info;
|
||||
GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
|
||||
const WORD old_color_attrs = buffer_info.wAttributes;
|
||||
|
||||
// We need to flush the stream buffers into the console before each
|
||||
// SetConsoleTextAttribute call lest it affect the text that is already
|
||||
// printed but has not yet reached the console.
|
||||
fflush(stdout);
|
||||
SetConsoleTextAttribute(stdout_handle,
|
||||
GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
|
||||
vprintf(fmt, args);
|
||||
|
||||
fflush(stdout);
|
||||
// Restores the text color.
|
||||
SetConsoleTextAttribute(stdout_handle, old_color_attrs);
|
||||
#else
|
||||
const char* color_code = GetPlatformColorCode(color);
|
||||
if (color_code)
|
||||
fprintf(stdout, "\033[0;3%sm", color_code);
|
||||
vprintf(fmt, args);
|
||||
printf("\033[m"); // Resets the terminal to default.
|
||||
#endif
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
|
17
src/colorprint.h
Normal file
17
src/colorprint.h
Normal file
@ -0,0 +1,17 @@
|
||||
#ifndef BENCHMARK_COLORPRINT_H_
|
||||
#define BENCHMARK_COLORPRINT_H_
|
||||
|
||||
enum LogColor {
|
||||
COLOR_DEFAULT,
|
||||
COLOR_RED,
|
||||
COLOR_GREEN,
|
||||
COLOR_YELLOW,
|
||||
COLOR_BLUE,
|
||||
COLOR_MAGENTA,
|
||||
COLOR_CYAN,
|
||||
COLOR_WHITE
|
||||
};
|
||||
|
||||
void ColorPrintf(LogColor color, const char* fmt, ...);
|
||||
|
||||
#endif // BENCHMARK_COLORPRINT_H_
|
213
src/commandlineflags.cc
Normal file
213
src/commandlineflags.cc
Normal file
@ -0,0 +1,213 @@
|
||||
#include "commandlineflags.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
namespace benchmark {
|
||||
// Parses 'str' for a 32-bit signed integer. If successful, writes
|
||||
// the result to *value and returns true; otherwise leaves *value
|
||||
// unchanged and returns false.
|
||||
bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
|
||||
// Parses the environment variable as a decimal integer.
|
||||
char* end = NULL;
|
||||
const long long_value = strtol(str, &end, 10); // NOLINT
|
||||
|
||||
// Has strtol() consumed all characters in the string?
|
||||
if (*end != '\0') {
|
||||
// No - an invalid character was encountered.
|
||||
std::cerr << src_text << " is expected to be a 32-bit integer, "
|
||||
<< "but actually has value \"" << str << "\".\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Is the parsed value in the range of an Int32?
|
||||
const int32_t result = static_cast<int32_t>(long_value);
|
||||
if (long_value == std::numeric_limits<long>::max() ||
|
||||
long_value == std::numeric_limits<long>::min() ||
|
||||
// The parsed value overflows as a long. (strtol() returns
|
||||
// LONG_MAX or LONG_MIN when the input overflows.)
|
||||
result != long_value
|
||||
// The parsed value overflows as an Int32.
|
||||
) {
|
||||
std::cerr << src_text << " is expected to be a 32-bit integer, "
|
||||
<< "but actually has value \"" << str << "\", "
|
||||
<< "which overflows.\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
*value = result;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Parses 'str' for a double. If successful, writes the result to *value and
|
||||
// returns true; otherwise leaves *value unchanged and returns false.
|
||||
bool ParseDouble(const std::string& src_text, const char* str, double* value) {
|
||||
// Parses the environment variable as a decimal integer.
|
||||
char* end = NULL;
|
||||
const double double_value = strtod(str, &end); // NOLINT
|
||||
|
||||
// Has strtol() consumed all characters in the string?
|
||||
if (*end != '\0') {
|
||||
// No - an invalid character was encountered.
|
||||
std::cerr << src_text << " is expected to be a double, "
|
||||
<< "but actually has value \"" << str << "\".\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
*value = double_value;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
inline const char* GetEnv(const char* name) {
|
||||
#if GTEST_OS_WINDOWS_MOBILE
|
||||
// We are on Windows CE, which has no environment variables.
|
||||
return NULL;
|
||||
#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
|
||||
// Environment variables which we programmatically clear will be set to the
|
||||
// empty string rather than unset (NULL). Handle that case.
|
||||
const char* const env = getenv(name);
|
||||
return (env != NULL && env[0] != '\0') ? env : NULL;
|
||||
#else
|
||||
return getenv(name);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Returns the name of the environment variable corresponding to the
|
||||
// given flag. For example, FlagToEnvVar("foo") will return
|
||||
// "BENCHMARK_FOO" in the open-source version.
|
||||
static std::string FlagToEnvVar(const char* flag) {
|
||||
const std::string flag_str(flag);
|
||||
|
||||
std::string env_var;
|
||||
for (size_t i = 0; i != flag_str.length(); ++i)
|
||||
env_var += ::toupper(flag_str.c_str()[i]);
|
||||
|
||||
return "BENCHMARK_" + env_var;
|
||||
}
|
||||
|
||||
// Reads and returns the Boolean environment variable corresponding to
|
||||
// the given flag; if it's not set, returns default_value.
|
||||
//
|
||||
// The value is considered true iff it's not "0".
|
||||
bool BoolFromEnv(const char* flag, bool default_value) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const string_value = GetEnv(env_var.c_str());
|
||||
return string_value == NULL ?
|
||||
default_value : strcmp(string_value, "0") != 0;
|
||||
}
|
||||
|
||||
// Reads and returns a 32-bit integer stored in the environment
|
||||
// variable corresponding to the given flag; if it isn't set or
|
||||
// doesn't represent a valid 32-bit integer, returns default_value.
|
||||
int32_t Int32FromEnv(const char* flag, int32_t default_value) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const string_value = GetEnv(env_var.c_str());
|
||||
if (string_value == NULL) {
|
||||
// The environment variable is not set.
|
||||
return default_value;
|
||||
}
|
||||
|
||||
int32_t result = default_value;
|
||||
if (!ParseInt32(std::string("Environment variable ") + env_var,
|
||||
string_value, &result)) {
|
||||
std::cout << "The default value " << default_value << " is used.\n";
|
||||
return default_value;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Reads and returns the string environment variable corresponding to
|
||||
// the given flag; if it's not set, returns default_value.
|
||||
const char* StringFromEnv(const char* flag, const char* default_value) {
|
||||
const std::string env_var = FlagToEnvVar(flag);
|
||||
const char* const value = GetEnv(env_var.c_str());
|
||||
return value == NULL ? default_value : value;
|
||||
}
|
||||
|
||||
// Parses a string as a command line flag. The string should have
|
||||
// the format "--flag=value". When def_optional is true, the "=value"
|
||||
// part can be omitted.
|
||||
//
|
||||
// Returns the value of the flag, or NULL if the parsing failed.
|
||||
const char* ParseFlagValue(const char* str,
|
||||
const char* flag,
|
||||
bool def_optional) {
|
||||
// str and flag must not be NULL.
|
||||
if (str == NULL || flag == NULL) return NULL;
|
||||
|
||||
// The flag must start with "--".
|
||||
const std::string flag_str = std::string("--") + std::string(flag);
|
||||
const size_t flag_len = flag_str.length();
|
||||
if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
|
||||
|
||||
// Skips the flag name.
|
||||
const char* flag_end = str + flag_len;
|
||||
|
||||
// When def_optional is true, it's OK to not have a "=value" part.
|
||||
if (def_optional && (flag_end[0] == '\0'))
|
||||
return flag_end;
|
||||
|
||||
// If def_optional is true and there are more characters after the
|
||||
// flag name, or if def_optional is false, there must be a '=' after
|
||||
// the flag name.
|
||||
if (flag_end[0] != '=') return NULL;
|
||||
|
||||
// Returns the string after "=".
|
||||
return flag_end + 1;
|
||||
}
|
||||
|
||||
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
|
||||
// Gets the value of the flag as a string.
|
||||
const char* const value_str = ParseFlagValue(str, flag, true);
|
||||
|
||||
// Aborts if the parsing failed.
|
||||
if (value_str == NULL) return false;
|
||||
|
||||
// Converts the string value to a bool.
|
||||
*value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
|
||||
// Gets the value of the flag as a string.
|
||||
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||
|
||||
// Aborts if the parsing failed.
|
||||
if (value_str == NULL) return false;
|
||||
|
||||
// Sets *value to the value of the flag.
|
||||
return ParseInt32(std::string("The value of flag --") + flag,
|
||||
value_str, value);
|
||||
}
|
||||
|
||||
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
|
||||
// Gets the value of the flag as a string.
|
||||
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||
|
||||
// Aborts if the parsing failed.
|
||||
if (value_str == NULL) return false;
|
||||
|
||||
// Sets *value to the value of the flag.
|
||||
return ParseDouble(std::string("The value of flag --") + flag,
|
||||
value_str, value);
|
||||
}
|
||||
|
||||
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
|
||||
// Gets the value of the flag as a string.
|
||||
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||
|
||||
// Aborts if the parsing failed.
|
||||
if (value_str == NULL) return false;
|
||||
|
||||
*value = value_str;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsFlag(const char* str, const char* flag) {
|
||||
return (ParseFlagValue(str, flag, true) != NULL);
|
||||
}
|
||||
} // end namespace benchmark
|
79
src/commandlineflags.h
Normal file
79
src/commandlineflags.h
Normal file
@ -0,0 +1,79 @@
|
||||
#ifndef BENCHMARK_COMMANDLINEFLAGS_H_
|
||||
#define BENCHMARK_COMMANDLINEFLAGS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
// Macro for referencing flags.
|
||||
#define FLAG(name) FLAGS_##name
|
||||
|
||||
// Macros for declaring flags.
|
||||
#define DECLARE_bool(name) extern bool FLAG(name)
|
||||
#define DECLARE_int32(name) extern int32_t FLAG(name)
|
||||
#define DECLARE_int64(name) extern int64_t FLAG(name)
|
||||
#define DECLARE_double(name) extern double FLAG(name)
|
||||
#define DECLARE_string(name) extern std::string FLAG(name)
|
||||
|
||||
// Macros for defining flags.
|
||||
#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
|
||||
#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
|
||||
#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
|
||||
#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
|
||||
#define DEFINE_string(name, default_val, doc) \
|
||||
std::string FLAG(name) = (default_val)
|
||||
|
||||
namespace benchmark {
|
||||
|
||||
// Parses 'str' for a 32-bit signed integer. If successful, writes the result
|
||||
// to *value and returns true; otherwise leaves *value unchanged and returns
|
||||
// false.
|
||||
bool ParseInt32(const std::string& src_text, const char* str, int32_t* value);
|
||||
|
||||
// Parses a bool/Int32/string from the environment variable
|
||||
// corresponding to the given Google Test flag.
|
||||
bool BoolFromEnv(const char* flag, bool default_val);
|
||||
int32_t Int32FromEnv(const char* flag, int32_t default_val);
|
||||
double DoubleFromEnv(const char* flag, double default_val);
|
||||
const char* StringFromEnv(const char* flag, const char* default_val);
|
||||
|
||||
// Parses a string for a bool flag, in the form of either
|
||||
// "--flag=value" or "--flag".
|
||||
//
|
||||
// In the former case, the value is taken as true as long as it does
|
||||
// not start with '0', 'f', or 'F'.
|
||||
//
|
||||
// In the latter case, the value is taken as true.
|
||||
//
|
||||
// On success, stores the value of the flag in *value, and returns
|
||||
// true. On failure, returns false without changing *value.
|
||||
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
|
||||
|
||||
// Parses a string for an Int32 flag, in the form of
|
||||
// "--flag=value".
|
||||
//
|
||||
// On success, stores the value of the flag in *value, and returns
|
||||
// true. On failure, returns false without changing *value.
|
||||
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
|
||||
|
||||
// Parses a string for a Double flag, in the form of
|
||||
// "--flag=value".
|
||||
//
|
||||
// On success, stores the value of the flag in *value, and returns
|
||||
// true. On failure, returns false without changing *value.
|
||||
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
|
||||
|
||||
// Parses a string for a string flag, in the form of
|
||||
// "--flag=value".
|
||||
//
|
||||
// On success, stores the value of the flag in *value, and returns
|
||||
// true. On failure, returns false without changing *value.
|
||||
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
|
||||
|
||||
// Returns true if the string matches the flag.
|
||||
bool IsFlag(const char* str, const char* flag);
|
||||
|
||||
} // end namespace gbenchmark
|
||||
|
||||
#endif // BENCHMARK_COMMANDLINEFLAGS_H_
|
||||
|
129
src/cycleclock.h
Normal file
129
src/cycleclock.h
Normal file
@ -0,0 +1,129 @@
|
||||
// ----------------------------------------------------------------------
|
||||
// CycleClock
|
||||
// A CycleClock tells you the current time in Cycles. The "time"
|
||||
// is actually time since power-on. This is like time() but doesn't
|
||||
// involve a system call and is much more precise.
|
||||
//
|
||||
// NOTE: Not all cpu/platform/kernel combinations guarantee that this
|
||||
// clock increments at a constant rate or is synchronized across all logical
|
||||
// cpus in a system.
|
||||
//
|
||||
// If you need the above guarantees, please consider using a different
|
||||
// API. There are efforts to provide an interface which provides a millisecond
|
||||
// granularity and implemented as a memory read. A memory read is generally
|
||||
// cheaper than the CycleClock for many architectures.
|
||||
//
|
||||
// Also, in some out of order CPU implementations, the CycleClock is not
|
||||
// serializing. So if you're trying to count at cycles granularity, your
|
||||
// data might be inaccurate due to out of order instruction execution.
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
#ifndef BENCHMARK_CYCLECLOCK_H_
|
||||
#define BENCHMARK_CYCLECLOCK_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(OS_MACOSX)
|
||||
# include <mach/mach_time.h>
|
||||
#endif
|
||||
// For MSVC, we want to use '_asm rdtsc' when possible (since it works
|
||||
// with even ancient MSVC compilers), and when not possible the
|
||||
// __rdtsc intrinsic, declared in <intrin.h>. Unfortunately, in some
|
||||
// environments, <windows.h> and <intrin.h> have conflicting
|
||||
// declarations of some other intrinsics, breaking compilation.
|
||||
// Therefore, we simply declare __rdtsc ourselves. See also
|
||||
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
|
||||
#if defined(COMPILER_MSVC) && !defined(_M_IX86)
|
||||
extern "C" uint64_t __rdtsc();
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
|
||||
// NOTE: only i386 and x86_64 have been well tested.
|
||||
// PPC, sparc, alpha, and ia64 are based on
|
||||
// http://peter.kuscsik.com/wordpress/?p=14
|
||||
// with modifications by m3b. See also
|
||||
// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
|
||||
struct CycleClock {
|
||||
// This should return the number of cycles since power-on. Thread-safe.
|
||||
static inline int64_t Now() {
|
||||
#if defined(OS_MACOSX)
|
||||
// this goes at the top because we need ALL Macs, regardless of
|
||||
// architecture, to return the number of "mach time units" that
|
||||
// have passed since startup. See sysinfo.cc where
|
||||
// InitializeSystemInfo() sets the supposed cpu clock frequency of
|
||||
// macs to the number of mach time units per second, not actual
|
||||
// CPU clock frequency (which can change in the face of CPU
|
||||
// frequency scaling). Also note that when the Mac sleeps, this
|
||||
// counter pauses; it does not continue counting, nor does it
|
||||
// reset to zero.
|
||||
return mach_absolute_time();
|
||||
#elif defined(__i386__)
|
||||
int64_t ret;
|
||||
__asm__ volatile ("rdtsc" : "=A" (ret) );
|
||||
return ret;
|
||||
#elif defined(__x86_64__) || defined(__amd64__)
|
||||
uint64_t low, high;
|
||||
__asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
|
||||
return (high << 32) | low;
|
||||
#elif defined(__powerpc__) || defined(__ppc__)
|
||||
// This returns a time-base, which is not always precisely a cycle-count.
|
||||
int64_t tbl, tbu0, tbu1;
|
||||
asm("mftbu %0" : "=r" (tbu0));
|
||||
asm("mftb %0" : "=r" (tbl));
|
||||
asm("mftbu %0" : "=r" (tbu1));
|
||||
tbl &= -static_cast<int64>(tbu0 == tbu1);
|
||||
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
|
||||
return (tbu1 << 32) | tbl;
|
||||
#elif defined(__sparc__)
|
||||
int64_t tick;
|
||||
asm(".byte 0x83, 0x41, 0x00, 0x00");
|
||||
asm("mov %%g1, %0" : "=r" (tick));
|
||||
return tick;
|
||||
#elif defined(__ia64__)
|
||||
int64_t itc;
|
||||
asm("mov %0 = ar.itc" : "=r" (itc));
|
||||
return itc;
|
||||
#elif defined(COMPILER_MSVC) && defined(_M_IX86)
|
||||
// Older MSVC compilers (like 7.x) don't seem to support the
|
||||
// __rdtsc intrinsic properly, so I prefer to use _asm instead
|
||||
// when I know it will work. Otherwise, I'll use __rdtsc and hope
|
||||
// the code is being compiled with a non-ancient compiler.
|
||||
_asm rdtsc
|
||||
#elif defined(COMPILER_MSVC)
|
||||
return __rdtsc();
|
||||
#elif defined(ARMV3)
|
||||
#if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount
|
||||
uint32_t pmccntr;
|
||||
uint32_t pmuseren;
|
||||
uint32_t pmcntenset;
|
||||
// Read the user mode perf monitor counter access permissions.
|
||||
asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
|
||||
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
|
||||
asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
|
||||
if (pmcntenset & 0x80000000ul) { // Is it counting?
|
||||
asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
|
||||
// The counter is set up to count every 64th cycle
|
||||
return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6
|
||||
}
|
||||
}
|
||||
#endif
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
#elif defined(__mips__)
|
||||
// mips apparently only allows rdtsc for superusers, so we fall
|
||||
// back to gettimeofday. It's possible clock_gettime would be better.
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||
#else
|
||||
// The soft failover to a generic implementation is automatic only for ARM.
|
||||
// For other platforms the developer is expected to make an attempt to create
|
||||
// a fast implementation and use generic version if nothing better is available.
|
||||
#error You need to define CycleTimer for your OS and CPU
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
#endif // BENCHMARK_CYCLECLOCK_H_
|
110
src/macros.h
Normal file
110
src/macros.h
Normal file
@ -0,0 +1,110 @@
|
||||
#ifndef BENCHMARK_MACROS_H_
|
||||
#define BENCHMARK_MACROS_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&); \
|
||||
void operator=(const TypeName&);
|
||||
|
||||
// The arraysize(arr) macro returns the # of elements in an array arr.
|
||||
// The expression is a compile-time constant, and therefore can be
|
||||
// used in defining new arrays, for example. If you use arraysize on
|
||||
// a pointer by mistake, you will get a compile-time error.
|
||||
//
|
||||
// One caveat is that, for C++03, arraysize() doesn't accept any array of
|
||||
// an anonymous type or a type defined inside a function. In these rare
|
||||
// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
|
||||
// due to a limitation in C++03's template system. The limitation has
|
||||
// been removed in C++11.
|
||||
|
||||
// This template function declaration is used in defining arraysize.
|
||||
// Note that the function doesn't need an implementation, as we only
|
||||
// use its type.
|
||||
template <typename T, size_t N>
|
||||
char (&ArraySizeHelper(T (&array)[N]))[N];
|
||||
|
||||
// That gcc wants both of these prototypes seems mysterious. VC, for
|
||||
// its part, can't decide which to use (another mystery). Matching of
|
||||
// template overloads: the final frontier.
|
||||
#ifndef COMPILER_MSVC
|
||||
template <typename T, size_t N>
|
||||
char (&ArraySizeHelper(const T (&array)[N]))[N];
|
||||
#endif
|
||||
|
||||
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
|
||||
|
||||
// The STATIC_ASSERT macro can be used to verify that a compile time
|
||||
// expression is true. For example, you could use it to verify the
|
||||
// size of a static array:
|
||||
//
|
||||
// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
|
||||
// content_type_names_incorrect_size);
|
||||
//
|
||||
// or to make sure a struct is smaller than a certain size:
|
||||
//
|
||||
// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
|
||||
//
|
||||
// The second argument to the macro is the name of the variable. If
|
||||
// the expression is false, most compilers will issue a warning/error
|
||||
// containing the name of the variable.
|
||||
|
||||
template <bool>
|
||||
struct StaticAssert {
|
||||
};
|
||||
|
||||
#define STATIC_ASSERT(expr, msg) \
|
||||
typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
|
||||
|
||||
// Implementation details of STATIC_ASSERT:
|
||||
//
|
||||
// - STATIC_ASSERT works by defining an array type that has -1
|
||||
// elements (and thus is invalid) when the expression is false.
|
||||
//
|
||||
// - The simpler definition
|
||||
//
|
||||
// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
|
||||
//
|
||||
// does not work, as gcc supports variable-length arrays whose sizes
|
||||
// are determined at run-time (this is gcc's extension and not part
|
||||
// of the C++ standard). As a result, gcc fails to reject the
|
||||
// following code with the simple definition:
|
||||
//
|
||||
// int foo;
|
||||
// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
|
||||
// // not a compile-time constant.
|
||||
//
|
||||
// - By using the type StaticAssert<(bool(expr))>, we ensures that
|
||||
// expr is a compile-time constant. (Template arguments must be
|
||||
// determined at compile-time.)
|
||||
//
|
||||
// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
|
||||
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
|
||||
//
|
||||
// StaticAssert<bool(expr)>
|
||||
//
|
||||
// instead, these compilers will refuse to compile
|
||||
//
|
||||
// STATIC_ASSERT(5 > 0, some_message);
|
||||
//
|
||||
// (They seem to think the ">" in "5 > 0" marks the end of the
|
||||
// template argument list.)
|
||||
//
|
||||
// - The array size is (bool(expr) ? 1 : -1), instead of simply
|
||||
//
|
||||
// ((expr) ? 1 : -1).
|
||||
//
|
||||
// This is to avoid running into a bug in MS VC 7.1, which
|
||||
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
|
||||
|
||||
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
|
||||
#define CHECK_EQ(a, b) CHECK((a) == (b))
|
||||
#define CHECK_GE(a, b) CHECK((a) >= (b))
|
||||
#define CHECK_LE(a, b) CHECK((a) <= (b))
|
||||
#define CHECK_GT(a, b) CHECK((a) > (b))
|
||||
#define CHECK_LT(a, b) CHECK((a) < (b))
|
||||
|
||||
|
||||
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
|
||||
|
||||
#endif // BENCHMARK_MACROS_H_
|
20
src/mutex_lock.h
Normal file
20
src/mutex_lock.h
Normal file
@ -0,0 +1,20 @@
|
||||
#ifndef BENCHMARK_MUTEX_LOCK_H_
|
||||
#define BENCHMARK_MUTEX_LOCK_H_
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
class mutex_lock {
|
||||
public:
|
||||
explicit mutex_lock(pthread_mutex_t* mu) : mu_(mu) {
|
||||
pthread_mutex_lock(mu_);
|
||||
}
|
||||
|
||||
~mutex_lock() {
|
||||
pthread_mutex_unlock(mu_);
|
||||
}
|
||||
|
||||
private:
|
||||
pthread_mutex_t* mu_;
|
||||
};
|
||||
|
||||
#endif // BENCHMARK_MUTEX_LOCK_H_
|
8
src/port.h
Normal file
8
src/port.h
Normal file
@ -0,0 +1,8 @@
|
||||
#ifndef BENCHMARK_PORT_H_
|
||||
#define BENCHMARK_PORT_H_
|
||||
|
||||
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&); \
|
||||
void operator=(const TypeName&);
|
||||
|
||||
#endif // BENCHMARK_PORT_H_
|
42
src/sleep.cc
Normal file
42
src/sleep.cc
Normal file
@ -0,0 +1,42 @@
|
||||
#include "sleep.h"
|
||||
|
||||
#include <time.h>
|
||||
#include <errno.h>
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
|
||||
// Window's _sleep takes milliseconds argument.
|
||||
void SleepForMilliseconds(int milliseconds) {
|
||||
_sleep(milliseconds);
|
||||
}
|
||||
void SleepForSeconds(double seconds) {
|
||||
SleepForMilliseconds(static_cast<int>(seconds * 1000));
|
||||
}
|
||||
|
||||
#else // OS_WINDOWS
|
||||
|
||||
static const int64_t kNumMillisPerSecond = 1000LL;
|
||||
static const int64_t kNumMicrosPerMilli = 1000LL;
|
||||
static const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
|
||||
static const int64_t kNumNanosPerMicro = 1000LL;
|
||||
|
||||
void SleepForMicroseconds(int64_t microseconds) {
|
||||
struct timespec sleep_time;
|
||||
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
|
||||
sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
|
||||
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
|
||||
; // Ignore signals and wait for the full interval to elapse.
|
||||
}
|
||||
|
||||
void SleepForMilliseconds(int milliseconds) {
|
||||
SleepForMicroseconds(static_cast<int64_t>(milliseconds) * kNumMicrosPerMilli);
|
||||
}
|
||||
|
||||
void SleepForSeconds(double seconds) {
|
||||
SleepForMicroseconds(static_cast<int64_t>(seconds * kNumMicrosPerSecond));
|
||||
}
|
||||
|
||||
#endif // OS_WINDOWS
|
||||
|
||||
|
||||
|
10
src/sleep.h
Normal file
10
src/sleep.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef BENCHMARK_SLEEP_H_
|
||||
#define BENCHMARK_SLEEP_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void SleepForMicroseconds(int64_t microseconds);
|
||||
void SleepForMilliseconds(int milliseconds);
|
||||
void SleepForSeconds(double seconds);
|
||||
|
||||
#endif // BENCHMARK_SLEEP_H_
|
306
src/stat.h
Normal file
306
src/stat.h
Normal file
@ -0,0 +1,306 @@
|
||||
#ifndef BENCHMARK_STAT_H_
|
||||
#define BENCHMARK_STAT_H_
|
||||
|
||||
#include <math.h>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
template <typename VType, typename NumType>
|
||||
class Stat1;
|
||||
|
||||
template <typename VType, typename NumType>
|
||||
class Stat1MinMax;
|
||||
|
||||
typedef Stat1<float, float> Stat1_f;
|
||||
typedef Stat1<double, double> Stat1_d;
|
||||
typedef Stat1MinMax<float, float> Stat1MinMax_f;
|
||||
typedef Stat1MinMax<double, double> Stat1MinMax_d;
|
||||
|
||||
template <typename VType> class Vector2;
|
||||
template <typename VType> class Vector3;
|
||||
template <typename VType> class Vector4;
|
||||
|
||||
template <typename VType, typename NumType>
|
||||
class Stat1 {
|
||||
public:
|
||||
typedef Stat1<VType, NumType> Self;
|
||||
|
||||
Stat1() {
|
||||
Clear();
|
||||
}
|
||||
void Clear() {
|
||||
numsamples_ = NumType();
|
||||
sum_squares_ = sum_ = VType();
|
||||
}
|
||||
// Create a sample of value dat and weight 1
|
||||
explicit Stat1(const VType &dat) {
|
||||
sum_ = dat;
|
||||
sum_squares_ = Sqr(dat);
|
||||
numsamples_ = 1;
|
||||
}
|
||||
// Create statistics for all the samples between begin (included)
|
||||
// and end(excluded)
|
||||
explicit Stat1(const VType *begin, const VType *end) {
|
||||
Clear();
|
||||
for ( const VType *item = begin; item < end; ++item ) {
|
||||
(*this) += Stat1(*item);
|
||||
}
|
||||
}
|
||||
// Create a sample of value dat and weight w
|
||||
Stat1(const VType &dat, const NumType &w) {
|
||||
sum_ = w * dat;
|
||||
sum_squares_ = w * Sqr(dat);
|
||||
numsamples_ = w;
|
||||
}
|
||||
// Copy operator
|
||||
Stat1(const Self &stat) {
|
||||
sum_ = stat.sum_;
|
||||
sum_squares_ = stat.sum_squares_;
|
||||
numsamples_ = stat.numsamples_;
|
||||
}
|
||||
|
||||
inline Self &operator =(const Self &stat) {
|
||||
sum_ = stat.sum_;
|
||||
sum_squares_ = stat.sum_squares_;
|
||||
numsamples_ = stat.numsamples_;
|
||||
return (*this);
|
||||
}
|
||||
// Merge statistics from two sample sets.
|
||||
inline Self &operator +=(const Self &stat) {
|
||||
sum_ += stat.sum_;
|
||||
sum_squares_+= stat.sum_squares_;
|
||||
numsamples_ += stat.numsamples_;
|
||||
return (*this);
|
||||
}
|
||||
// The operation opposite to +=
|
||||
inline Self &operator -=(const Self &stat) {
|
||||
sum_ -= stat.sum_;
|
||||
sum_squares_-= stat.sum_squares_;
|
||||
numsamples_ -= stat.numsamples_;
|
||||
return (*this);
|
||||
}
|
||||
// Multiply the weight of the set of samples by a factor k
|
||||
inline Self &operator *=(const VType &k) {
|
||||
sum_ *= k;
|
||||
sum_squares_*= k;
|
||||
numsamples_ *= k;
|
||||
return (*this);
|
||||
}
|
||||
// Merge statistics from two sample sets.
|
||||
inline Self operator + (const Self &stat) const {
|
||||
return Self(*this) += stat;
|
||||
}
|
||||
// The operation opposite to +
|
||||
inline Self operator - (const Self &stat) const {
|
||||
return Self(*this) -= stat;
|
||||
}
|
||||
// Multiply the weight of the set of samples by a factor k
|
||||
inline Self operator * (const VType &k) const {
|
||||
return Self(*this) *= k;
|
||||
}
|
||||
// Return the total weight of this sample set
|
||||
NumType NumSamples() const {
|
||||
return numsamples_;
|
||||
}
|
||||
// Return the sum of this sample set
|
||||
VType Sum() const {
|
||||
return sum_;
|
||||
}
|
||||
// Return the mean of this sample set
|
||||
VType Mean() const {
|
||||
if (numsamples_ == 0) return VType();
|
||||
return sum_ * (1.0 / numsamples_);
|
||||
}
|
||||
// Return the mean of this sample set and compute the standard deviation at
|
||||
// the same time.
|
||||
VType Mean(VType *stddev) const {
|
||||
if (numsamples_ == 0) return VType();
|
||||
VType mean = sum_ * (1.0 / numsamples_);
|
||||
if (stddev) {
|
||||
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
|
||||
*stddev = Sqrt(avg_squares - Sqr(mean));
|
||||
}
|
||||
return mean;
|
||||
}
|
||||
// Return the standard deviation of the sample set
|
||||
VType StdDev() const {
|
||||
if (numsamples_ == 0) return VType();
|
||||
VType mean = Mean();
|
||||
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
|
||||
return Sqrt(avg_squares - Sqr(mean));
|
||||
}
|
||||
private:
|
||||
// Let i be the index of the samples provided (using +=)
|
||||
// and weight[i],value[i] be the data of sample #i
|
||||
// then the variables have the following meaning:
|
||||
NumType numsamples_; // sum of weight[i];
|
||||
VType sum_; // sum of weight[i]*value[i];
|
||||
VType sum_squares_; // sum of weight[i]*value[i]^2;
|
||||
|
||||
// Template function used to square a number.
|
||||
// For a vector we square all components
|
||||
template <typename SType>
|
||||
static inline SType Sqr(const SType &dat) {
|
||||
return dat * dat;
|
||||
}
|
||||
template <typename SType>
|
||||
static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
|
||||
return dat.MulComponents(dat);
|
||||
}
|
||||
template <typename SType>
|
||||
static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
|
||||
return dat.MulComponents(dat);
|
||||
}
|
||||
template <typename SType>
|
||||
static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
|
||||
return dat.MulComponents(dat);
|
||||
}
|
||||
|
||||
// Template function used to take the square root of a number.
|
||||
// For a vector we square all components
|
||||
template <typename SType>
|
||||
static inline SType Sqrt(const SType &dat) {
|
||||
// Avoid NaN due to imprecision in the calculations
|
||||
if ( dat < 0 )
|
||||
return 0;
|
||||
return sqrt(dat);
|
||||
}
|
||||
template <typename SType>
|
||||
static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
|
||||
// Avoid NaN due to imprecision in the calculations
|
||||
return Max(dat, Vector2<SType>()).Sqrt();
|
||||
}
|
||||
template <typename SType>
|
||||
static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
|
||||
// Avoid NaN due to imprecision in the calculations
|
||||
return Max(dat, Vector3<SType>()).Sqrt();
|
||||
}
|
||||
template <typename SType>
|
||||
static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
|
||||
// Avoid NaN due to imprecision in the calculations
|
||||
return Max(dat, Vector4<SType>()).Sqrt();
|
||||
}
|
||||
};
|
||||
|
||||
// Useful printing function
|
||||
template <typename VType, typename NumType>
|
||||
inline std::ostream& operator<<(std::ostream& out,
|
||||
const Stat1<VType, NumType>& s) {
|
||||
out << "{ avg = " << s.Mean()
|
||||
<< " std = " << s.StdDev()
|
||||
<< " nsamples = " << s.NumSamples() << "}";
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
// Stat1MinMax: same as Stat1, but it also
|
||||
// keeps the Min and Max values; the "-"
|
||||
// operator is disabled because it cannot be implemented
|
||||
// efficiently
|
||||
template <typename VType, typename NumType>
|
||||
class Stat1MinMax : public Stat1<VType, NumType> {
|
||||
public:
|
||||
typedef Stat1MinMax<VType, NumType> Self;
|
||||
|
||||
Stat1MinMax() {
|
||||
Clear();
|
||||
}
|
||||
void Clear() {
|
||||
Stat1<VType, NumType>::Clear();
|
||||
if (std::numeric_limits<VType>::has_infinity) {
|
||||
min_ = std::numeric_limits<VType>::infinity();
|
||||
max_ = -std::numeric_limits<VType>::infinity();
|
||||
} else {
|
||||
min_ = std::numeric_limits<VType>::max();
|
||||
max_ = std::numeric_limits<VType>::min();
|
||||
}
|
||||
}
|
||||
// Create a sample of value dat and weight 1
|
||||
explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
|
||||
max_ = dat;
|
||||
min_ = dat;
|
||||
}
|
||||
// Create statistics for all the samples between begin (included)
|
||||
// and end(excluded)
|
||||
explicit Stat1MinMax(const VType *begin, const VType *end) {
|
||||
Clear();
|
||||
for ( const VType *item = begin; item < end; ++item ) {
|
||||
(*this) += Stat1MinMax(*item);
|
||||
}
|
||||
}
|
||||
// Create a sample of value dat and weight w
|
||||
Stat1MinMax(const VType &dat, const NumType &w)
|
||||
: Stat1<VType, NumType>(dat, w) {
|
||||
max_ = dat;
|
||||
min_ = dat;
|
||||
}
|
||||
// Copy operator
|
||||
Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
|
||||
max_ = stat.max_;
|
||||
min_ = stat.min_;
|
||||
}
|
||||
inline Self &operator =(const Self &stat) {
|
||||
this->Stat1<VType, NumType>::operator=(stat);
|
||||
max_ = stat.max_;
|
||||
min_ = stat.min_;
|
||||
return (*this);
|
||||
}
|
||||
// Merge statistics from two sample sets.
|
||||
inline Self &operator +=(const Self &stat) {
|
||||
this->Stat1<VType, NumType>::operator+=(stat);
|
||||
if (stat.max_ > max_) max_ = stat.max_;
|
||||
if (stat.min_ < min_) min_ = stat.min_;
|
||||
return (*this);
|
||||
}
|
||||
// Multiply the weight of the set of samples by a factor k
|
||||
inline Self &operator *=(const VType &stat) {
|
||||
this->Stat1<VType, NumType>::operator*=(stat);
|
||||
return (*this);
|
||||
}
|
||||
// Merge statistics from two sample sets.
|
||||
inline Self operator + (const Self &stat) const {
|
||||
return Self(*this) += stat;
|
||||
}
|
||||
// Multiply the weight of the set of samples by a factor k
|
||||
inline Self operator * (const VType &k) const {
|
||||
return Self(*this) *= k;
|
||||
}
|
||||
private:
|
||||
// The - operation makes no sense with Min/Max
|
||||
// unless we keep the full list of values (but we don't)
|
||||
// make it private, and let it undefined so nobody can call it
|
||||
Self &operator -=(const Self &stat); // senseless. let it undefined.
|
||||
|
||||
// The operation opposite to -
|
||||
Self operator - (const Self &stat) const; // senseless. let it undefined.
|
||||
|
||||
public:
|
||||
// Return the maximal value in this sample set
|
||||
VType Max() const {
|
||||
return max_;
|
||||
}
|
||||
// Return the minimal value in this sample set
|
||||
VType Min() const {
|
||||
return min_;
|
||||
}
|
||||
private:
|
||||
// Let i be the index of the samples provided (using +=)
|
||||
// and weight[i],value[i] be the data of sample #i
|
||||
// then the variables have the following meaning:
|
||||
VType max_; // max of value[i]
|
||||
VType min_; // min of value[i]
|
||||
};
|
||||
|
||||
// Useful printing function
|
||||
template <typename VType, typename NumType>
|
||||
inline std::ostream& operator <<(std::ostream& out,
|
||||
const Stat1MinMax<VType, NumType>& s) {
|
||||
out << "{ avg = " << s.Mean()
|
||||
<< " std = " << s.StdDev()
|
||||
<< " nsamples = " << s.NumSamples()
|
||||
<< " min = " << s.Min()
|
||||
<< " max = " << s.Max() << "}";
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif // BENCHMARK_STAT_H_
|
337
src/sysinfo.cc
Normal file
337
src/sysinfo.cc
Normal file
@ -0,0 +1,337 @@
|
||||
#include "sysinfo.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
#include "cycleclock.h"
|
||||
#include "macros.h"
|
||||
#include "mutex_lock.h"
|
||||
#include "sleep.h"
|
||||
|
||||
namespace {
|
||||
pthread_once_t cpuinfo_init = PTHREAD_ONCE_INIT;
|
||||
double cpuinfo_cycles_per_second = 1.0;
|
||||
int cpuinfo_num_cpus = 1; // Conservative guess
|
||||
static pthread_mutex_t cputimens_mutex;
|
||||
|
||||
// Helper function estimates cycles/sec by observing cycles elapsed during
|
||||
// sleep(). Using small sleep time decreases accuracy significantly.
|
||||
int64_t EstimateCyclesPerSecond(const int estimate_time_ms) {
|
||||
CHECK(estimate_time_ms > 0);
|
||||
double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much
|
||||
|
||||
const int64_t start_ticks = CycleClock::Now();
|
||||
SleepForMilliseconds(estimate_time_ms);
|
||||
const int64_t guess = int64_t(multiplier * (CycleClock::Now() - start_ticks));
|
||||
return guess;
|
||||
}
|
||||
|
||||
// Helper function for reading an int from a file. Returns true if successful
|
||||
// and the memory location pointed to by value is set to the value read.
|
||||
bool ReadIntFromFile(const char *file, int *value) {
|
||||
bool ret = false;
|
||||
int fd = open(file, O_RDONLY);
|
||||
if (fd != -1) {
|
||||
char line[1024];
|
||||
char* err;
|
||||
memset(line, '\0', sizeof(line));
|
||||
CHECK(read(fd, line, sizeof(line) - 1));
|
||||
const int temp_value = strtol(line, &err, 10);
|
||||
if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
|
||||
*value = temp_value;
|
||||
ret = true;
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void InitializeSystemInfo() {
|
||||
bool saw_mhz = false;
|
||||
|
||||
// TODO: destroy this
|
||||
pthread_mutex_init(&cputimens_mutex, NULL);
|
||||
|
||||
#if defined OS_LINUX || defined OS_CYGWIN
|
||||
char line[1024];
|
||||
char* err;
|
||||
int freq;
|
||||
|
||||
// If the kernel is exporting the tsc frequency use that. There are issues
|
||||
// where cpuinfo_max_freq cannot be relied on because the BIOS may be
|
||||
// exporintg an invalid p-state (on x86) or p-states may be used to put the
|
||||
// processor in a new mode (turbo mode). Essentially, those frequencies
|
||||
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
|
||||
// well.
|
||||
if (!saw_mhz &&
|
||||
ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
|
||||
// The value is in kHz (as the file name suggests). For example, on a
|
||||
// 2GHz warpstation, the file contains the value "2000000".
|
||||
cpuinfo_cycles_per_second = freq * 1000.0;
|
||||
saw_mhz = true;
|
||||
}
|
||||
|
||||
// If CPU scaling is in effect, we want to use the *maximum* frequency,
|
||||
// not whatever CPU speed some random processor happens to be using now.
|
||||
if (!saw_mhz &&
|
||||
ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
|
||||
&freq)) {
|
||||
// The value is in kHz. For example, on a 2GHz warpstation, the file
|
||||
// contains the value "2000000".
|
||||
cpuinfo_cycles_per_second = freq * 1000.0;
|
||||
saw_mhz = true;
|
||||
}
|
||||
|
||||
// Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
|
||||
const char* pname = "/proc/cpuinfo";
|
||||
int fd = open(pname, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
perror(pname);
|
||||
if (!saw_mhz) {
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||
}
|
||||
return; // TODO: use generic tester instead?
|
||||
}
|
||||
|
||||
double bogo_clock = 1.0;
|
||||
bool saw_bogo = false;
|
||||
int max_cpu_id = 0;
|
||||
int num_cpus = 0;
|
||||
line[0] = line[1] = '\0';
|
||||
int chars_read = 0;
|
||||
do { // we'll exit when the last read didn't read anything
|
||||
// Move the next line to the beginning of the buffer
|
||||
const int oldlinelen = strlen(line);
|
||||
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
|
||||
line[0] = '\0';
|
||||
else // still other lines left to save
|
||||
memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
|
||||
// Terminate the new line, reading more if we can't find the newline
|
||||
char* newline = strchr(line, '\n');
|
||||
if (newline == NULL) {
|
||||
const int linelen = strlen(line);
|
||||
const int bytes_to_read = sizeof(line)-1 - linelen;
|
||||
CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
|
||||
chars_read = read(fd, line + linelen, bytes_to_read);
|
||||
line[linelen + chars_read] = '\0';
|
||||
newline = strchr(line, '\n');
|
||||
}
|
||||
if (newline != NULL)
|
||||
*newline = '\0';
|
||||
|
||||
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
|
||||
// accept postive values. Some environments (virtual machines) report zero,
|
||||
// which would cause infinite looping in WallTime_Init.
|
||||
if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
|
||||
const char* freqstr = strchr(line, ':');
|
||||
if (freqstr) {
|
||||
cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
|
||||
if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
|
||||
saw_mhz = true;
|
||||
}
|
||||
} else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
|
||||
const char* freqstr = strchr(line, ':');
|
||||
if (freqstr) {
|
||||
bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
|
||||
if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
|
||||
saw_bogo = true;
|
||||
}
|
||||
} else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
|
||||
num_cpus++; // count up every time we see an "processor :" entry
|
||||
const char* freqstr = strchr(line, ':');
|
||||
if (freqstr) {
|
||||
const int cpu_id = strtol(freqstr+1, &err, 10);
|
||||
if (freqstr[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
|
||||
max_cpu_id = cpu_id;
|
||||
}
|
||||
}
|
||||
} while (chars_read > 0);
|
||||
close(fd);
|
||||
|
||||
if (!saw_mhz) {
|
||||
if (saw_bogo) {
|
||||
// If we didn't find anything better, we'll use bogomips, but
|
||||
// we're not happy about it.
|
||||
cpuinfo_cycles_per_second = bogo_clock;
|
||||
} else {
|
||||
// If we don't even have bogomips, we'll use the slow estimation.
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||
}
|
||||
}
|
||||
if (num_cpus == 0) {
|
||||
fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
|
||||
} else {
|
||||
if ((max_cpu_id + 1) != num_cpus) {
|
||||
fprintf(stderr,
|
||||
"CPU ID assignments in /proc/cpuinfo seems messed up."
|
||||
" This is usually caused by a bad BIOS.\n");
|
||||
}
|
||||
cpuinfo_num_cpus = num_cpus;
|
||||
}
|
||||
|
||||
#elif defined OS_FREEBSD
|
||||
// For this sysctl to work, the machine must be configured without
|
||||
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
|
||||
// and later. Before that, it's a 32-bit quantity (and gives the
|
||||
// wrong answer on machines faster than 2^32 Hz). See
|
||||
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
|
||||
// But also compare FreeBSD 7.0:
|
||||
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
|
||||
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
|
||||
// To FreeBSD 6.3 (it's the same in 6-STABLE):
|
||||
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
|
||||
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
|
||||
#if __FreeBSD__ >= 7
|
||||
uint64_t hz = 0;
|
||||
#else
|
||||
unsigned int hz = 0;
|
||||
#endif
|
||||
size_t sz = sizeof(hz);
|
||||
const char *sysctl_path = "machdep.tsc_freq";
|
||||
if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
|
||||
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
|
||||
sysctl_path, strerror(errno));
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||
} else {
|
||||
cpuinfo_cycles_per_second = hz;
|
||||
}
|
||||
// TODO: also figure out cpuinfo_num_cpus
|
||||
|
||||
#elif defined OS_WINDOWS
|
||||
# pragma comment(lib, "shlwapi.lib") // for SHGetValue()
|
||||
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
|
||||
// then make a crude estimate.
|
||||
OSVERSIONINFO os;
|
||||
os.dwOSVersionInfoSize = sizeof(os);
|
||||
DWORD data, data_size = sizeof(data);
|
||||
if (GetVersionEx(&os) &&
|
||||
os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
|
||||
SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
|
||||
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
|
||||
"~MHz", NULL, &data, &data_size)))
|
||||
cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
|
||||
else
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
|
||||
// TODO: also figure out cpuinfo_num_cpus
|
||||
|
||||
#elif defined OS_MACOSX
|
||||
// returning "mach time units" per second. the current number of elapsed
|
||||
// mach time units can be found by calling uint64 mach_absolute_time();
|
||||
// while not as precise as actual CPU cycles, it is accurate in the face
|
||||
// of CPU frequency scaling and multi-cpu/core machines.
|
||||
// Our mac users have these types of machines, and accuracy
|
||||
// (i.e. correctness) trumps precision.
|
||||
// See cycleclock.h: CycleClock::Now(), which returns number of mach time
|
||||
// units on Mac OS X.
|
||||
mach_timebase_info_data_t timebase_info;
|
||||
mach_timebase_info(&timebase_info);
|
||||
double mach_time_units_per_nanosecond =
|
||||
static_cast<double>(timebase_info.denom) /
|
||||
static_cast<double>(timebase_info.numer);
|
||||
cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
|
||||
|
||||
int num_cpus = 0;
|
||||
size_t size = sizeof(num_cpus);
|
||||
int numcpus_name[] = { CTL_HW, HW_NCPU };
|
||||
if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0)
|
||||
== 0
|
||||
&& (size == sizeof(num_cpus)))
|
||||
cpuinfo_num_cpus = num_cpus;
|
||||
|
||||
#else
|
||||
// Generic cycles per second counter
|
||||
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||
#endif
|
||||
}
|
||||
} // end namespace
|
||||
|
||||
#ifndef OS_WINDOWS
|
||||
// getrusage() based implementation of MyCPUUsage
|
||||
static double MyCPUUsageRUsage() {
|
||||
struct rusage ru;
|
||||
if (getrusage(RUSAGE_SELF, &ru) == 0) {
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool MyCPUUsageCPUTimeNsLocked(double *cputime) {
|
||||
static int cputime_fd = -1;
|
||||
if (cputime_fd == -1) {
|
||||
cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
|
||||
if (cputime_fd < 0) {
|
||||
cputime_fd = -1;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
char buff[64];
|
||||
memset(buff, 0, sizeof(buff));
|
||||
if (pread(cputime_fd, buff, sizeof(buff)-1, 0) <= 0) {
|
||||
close(cputime_fd);
|
||||
cputime_fd = -1;
|
||||
return false;
|
||||
}
|
||||
unsigned long long result = strtoull(buff, NULL, 0);
|
||||
if (result == (std::numeric_limits<unsigned long long>::max)()) {
|
||||
close(cputime_fd);
|
||||
cputime_fd = -1;
|
||||
return false;
|
||||
}
|
||||
*cputime = static_cast<double>(result) / 1e9;
|
||||
return true;
|
||||
}
|
||||
|
||||
double MyCPUUsage() {
|
||||
{
|
||||
mutex_lock l(&cputimens_mutex);
|
||||
static bool use_cputime_ns = true;
|
||||
if (use_cputime_ns) {
|
||||
double value;
|
||||
if (MyCPUUsageCPUTimeNsLocked(&value)) {
|
||||
return value;
|
||||
}
|
||||
// Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
|
||||
std::cout << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
|
||||
use_cputime_ns = false;
|
||||
}
|
||||
}
|
||||
return MyCPUUsageRUsage();
|
||||
}
|
||||
|
||||
double ChildrenCPUUsage() {
|
||||
struct rusage ru;
|
||||
if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
|
||||
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||
static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
|
||||
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||
static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
#endif // OS_WINDOWS
|
||||
|
||||
double CyclesPerSecond(void) {
|
||||
pthread_once(&cpuinfo_init, &InitializeSystemInfo);
|
||||
return cpuinfo_cycles_per_second;
|
||||
}
|
||||
|
||||
int NumCPUs(void) {
|
||||
pthread_once(&cpuinfo_init, &InitializeSystemInfo);
|
||||
return cpuinfo_num_cpus;
|
||||
}
|
||||
|
9
src/sysinfo.h
Normal file
9
src/sysinfo.h
Normal file
@ -0,0 +1,9 @@
|
||||
#ifndef BENCHMARK_SYSINFO_H_
|
||||
#define BENCHMARK_SYSINFO_H_
|
||||
|
||||
double MyCPUUsage();
|
||||
double ChildrenCPUUsage();
|
||||
int NumCPUs();
|
||||
double CyclesPerSecond();
|
||||
|
||||
#endif // BENCHMARK_SYSINFO_H_
|
137
src/walltime.cc
Normal file
137
src/walltime.cc
Normal file
@ -0,0 +1,137 @@
|
||||
#include "walltime.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
|
||||
#include "cycleclock.h"
|
||||
#include "macros.h"
|
||||
#include "sysinfo.h"
|
||||
|
||||
namespace walltime {
|
||||
namespace {
|
||||
const double kMaxErrorInterval = 100e-6;
|
||||
|
||||
std::atomic<bool> initialized(false);
|
||||
WallTime base_walltime = 0.0;
|
||||
int64_t base_cycletime = 0;
|
||||
int64_t cycles_per_second;
|
||||
double seconds_per_cycle;
|
||||
uint32_t last_adjust_time = 0;
|
||||
std::atomic<int32_t> drift_adjust(0);
|
||||
int64_t max_interval_cycles = 0;
|
||||
|
||||
// Helper routines to load/store a float from an AtomicWord. Required because
|
||||
// g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to get
|
||||
// rid of this horror show.
|
||||
inline void SetDrift(float f) {
|
||||
int32_t w;
|
||||
memcpy(&w, &f, sizeof(f));
|
||||
std::atomic_store(&drift_adjust, w);
|
||||
}
|
||||
|
||||
inline float GetDrift() {
|
||||
float f;
|
||||
int32_t w = std::atomic_load(&drift_adjust);
|
||||
memcpy(&f, &w, sizeof(f));
|
||||
return f;
|
||||
}
|
||||
|
||||
static_assert(sizeof(float) <= sizeof(int32_t),
|
||||
"type sizes don't allow the drift_adjust hack");
|
||||
|
||||
WallTime Slow() {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return tv.tv_sec + tv.tv_usec * 1e-6;
|
||||
}
|
||||
|
||||
bool SplitTimezone(WallTime value, bool local, struct tm* t,
|
||||
double* subsecond) {
|
||||
memset(t, 0, sizeof(*t));
|
||||
if ((value < 0) || (value > std::numeric_limits<time_t>::max())) {
|
||||
*subsecond = 0.0;
|
||||
return false;
|
||||
}
|
||||
const time_t whole_time = static_cast<time_t>(value);
|
||||
*subsecond = value - whole_time;
|
||||
if (local)
|
||||
localtime_r(&whole_time, t);
|
||||
else
|
||||
gmtime_r(&whole_time, t);
|
||||
return true;
|
||||
}
|
||||
} // end namespace
|
||||
|
||||
// This routine should be invoked to initialize walltime.
|
||||
// It is not intended for general purpose use.
|
||||
void Initialize() {
|
||||
CHECK(!std::atomic_load(&initialized));
|
||||
cycles_per_second = static_cast<int64_t>(CyclesPerSecond());
|
||||
CHECK(cycles_per_second != 0);
|
||||
seconds_per_cycle = 1.0 / cycles_per_second;
|
||||
max_interval_cycles = static_cast<int64_t>(
|
||||
cycles_per_second * kMaxErrorInterval);
|
||||
do {
|
||||
base_cycletime = CycleClock::Now();
|
||||
base_walltime = Slow();
|
||||
} while (CycleClock::Now() - base_cycletime > max_interval_cycles);
|
||||
// We are now sure that "base_walltime" and "base_cycletime" were produced
|
||||
// within kMaxErrorInterval of one another.
|
||||
|
||||
SetDrift(0.0);
|
||||
last_adjust_time = static_cast<uint32_t>(uint64_t(base_cycletime) >> 32);
|
||||
std::atomic_store(&initialized, true);
|
||||
}
|
||||
|
||||
WallTime Now() {
|
||||
if (!std::atomic_load(&initialized))
|
||||
return Slow();
|
||||
|
||||
WallTime now = 0.0;
|
||||
WallTime result = 0.0;
|
||||
int64_t ct = 0;
|
||||
uint32_t top_bits = 0;
|
||||
do {
|
||||
ct = CycleClock::Now();
|
||||
int64_t cycle_delta = ct - base_cycletime;
|
||||
result = base_walltime + cycle_delta * seconds_per_cycle;
|
||||
|
||||
top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
|
||||
// Recompute drift no more often than every 2^32 cycles.
|
||||
// I.e., @2GHz, ~ every two seconds
|
||||
if (top_bits == last_adjust_time) { // don't need to recompute drift
|
||||
return result + GetDrift();
|
||||
}
|
||||
|
||||
now = Slow();
|
||||
} while (CycleClock::Now() - ct > max_interval_cycles);
|
||||
// We are now sure that "now" and "result" were produced within
|
||||
// kMaxErrorInterval of one another.
|
||||
|
||||
SetDrift(now - result);
|
||||
last_adjust_time = top_bits;
|
||||
return now;
|
||||
}
|
||||
|
||||
const char* Print(WallTime time, const char *format, bool local,
|
||||
char* storage, int *remainder_us) {
|
||||
struct tm split;
|
||||
double subsecond;
|
||||
if (!SplitTimezone(time, local, &split, &subsecond)) {
|
||||
snprintf(storage, sizeof(storage), "Invalid time: %f", time);
|
||||
} else {
|
||||
if (remainder_us != NULL) {
|
||||
*remainder_us = static_cast<int>((subsecond * 1000000) + 0.5);
|
||||
if (*remainder_us > 999999) *remainder_us = 999999;
|
||||
if (*remainder_us < 0) *remainder_us = 0;
|
||||
}
|
||||
strftime(storage, sizeof(storage), format, &split);
|
||||
}
|
||||
return storage;
|
||||
}
|
||||
} // end namespace walltime
|
19
src/walltime.h
Normal file
19
src/walltime.h
Normal file
@ -0,0 +1,19 @@
|
||||
#ifndef BENCHMARK_WALLTIME_H_
|
||||
#define BENCHMARK_WALLTIME_H_
|
||||
|
||||
typedef double WallTime;
|
||||
|
||||
namespace walltime {
|
||||
void Initialize();
|
||||
WallTime Now();
|
||||
|
||||
// GIVEN: walltime, generic format string (as understood by strftime),
|
||||
// a boolean flag specifying if the time is local or UTC (true=local).
|
||||
// RETURNS: the formatted string. ALSO RETURNS: the storage printbuffer
|
||||
// passed and the remaining number of microseconds (never printed in
|
||||
// the string since strftime does not understand it)
|
||||
const char* Print(WallTime time, const char *format, bool local,
|
||||
char* storage, int *remainder_us);
|
||||
} // end namespace walltime
|
||||
|
||||
#endif // BENCHMARK_WALLTIME_H_
|
138
test/benchmark_test.cc
Normal file
138
test/benchmark_test.cc
Normal file
@ -0,0 +1,138 @@
|
||||
#include "benchmark/benchmark.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <limits>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
int ATTRIBUTE_NOINLINE Factorial(uint32_t n) {
|
||||
return (n == 1) ? 1 : n * Factorial(n - 1);
|
||||
}
|
||||
|
||||
double CalculatePi(int depth) {
|
||||
double pi = 0.0;
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
double numerator = static_cast<double>(((i % 2) * 2) - 1);
|
||||
double denominator = static_cast<double>((2 * i) - 1);
|
||||
pi += numerator / denominator;
|
||||
}
|
||||
return (pi - 1.0) * 4;
|
||||
}
|
||||
|
||||
std::set<int> ConstructRandomSet(int size) {
|
||||
std::set<int> s;
|
||||
for (int i = 0; i < size; ++i)
|
||||
s.insert(i);
|
||||
return s;
|
||||
}
|
||||
|
||||
static std::vector<int>* test_vector = NULL;
|
||||
|
||||
} // end namespace
|
||||
|
||||
#ifdef DEBUG
|
||||
static void BM_Factorial(benchmark::State& state) {
|
||||
int fac_42 = 0;
|
||||
while (state.KeepRunning())
|
||||
fac_42 = Factorial(8);
|
||||
// Prevent compiler optimizations
|
||||
CHECK(fac_42 != std::numeric_limits<int>::max());
|
||||
}
|
||||
BENCHMARK(BM_Factorial);
|
||||
#endif
|
||||
|
||||
static void BM_CalculatePiRange(benchmark::State& state) {
|
||||
double pi = 0.0;
|
||||
while (state.KeepRunning())
|
||||
pi = CalculatePi(state.range_x());
|
||||
std::stringstream ss;
|
||||
ss << pi;
|
||||
state.SetLabel(ss.str());
|
||||
}
|
||||
BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
|
||||
|
||||
static void BM_CalculatePi(benchmark::State& state) {
|
||||
static const int depth = 1024;
|
||||
double pi ATTRIBUTE_UNUSED = 0.0;
|
||||
while (state.KeepRunning()) {
|
||||
pi = CalculatePi(depth);
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_CalculatePi)->Threads(8);
|
||||
BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
|
||||
BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
|
||||
|
||||
static void BM_SetInsert(benchmark::State& state) {
|
||||
while (state.KeepRunning()) {
|
||||
state.PauseTiming();
|
||||
std::set<int> data = ConstructRandomSet(state.range_x());
|
||||
state.ResumeTiming();
|
||||
for (int j = 0; j < state.range_y(); ++j)
|
||||
data.insert(rand());
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
|
||||
|
||||
template<typename Q>
|
||||
static void BM_Sequential(benchmark::State& state) {
|
||||
Q q;
|
||||
typename Q::value_type v;
|
||||
while (state.KeepRunning())
|
||||
for (int i = state.range_x(); --i; )
|
||||
q.push_back(v);
|
||||
const int64_t items_processed =
|
||||
static_cast<int64_t>(state.iterations()) * state.range_x();
|
||||
state.SetItemsProcessed(items_processed);
|
||||
state.SetBytesProcessed(items_processed * sizeof(v));
|
||||
}
|
||||
BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>)->Range(1 << 0, 1 << 10);
|
||||
BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
|
||||
|
||||
static void BM_StringCompare(benchmark::State& state) {
|
||||
std::string s1(state.range_x(), '-');
|
||||
std::string s2(state.range_x(), '-');
|
||||
int r = 0;
|
||||
while (state.KeepRunning())
|
||||
r |= s1.compare(s2);
|
||||
// Prevent compiler optimizations
|
||||
CHECK(r != std::numeric_limits<int>::max());
|
||||
}
|
||||
BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
|
||||
|
||||
static void BM_SetupTeardown(benchmark::State& state) {
|
||||
if (state.thread_index == 0)
|
||||
test_vector = new std::vector<int>();
|
||||
while (state.KeepRunning())
|
||||
test_vector->push_back(0);
|
||||
if (state.thread_index == 0) {
|
||||
delete test_vector;
|
||||
test_vector = NULL;
|
||||
}
|
||||
}
|
||||
BENCHMARK(BM_SetupTeardown);
|
||||
|
||||
static void BM_LongTest(benchmark::State& state) {
|
||||
double tracker = 0.0;
|
||||
while (state.KeepRunning())
|
||||
for (int i = 0; i < state.range_x(); ++i)
|
||||
tracker += i;
|
||||
CHECK(tracker != 0.0);
|
||||
}
|
||||
BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
benchmark::Initialize(&argc, argv);
|
||||
|
||||
CHECK(Factorial(8) == 40320);
|
||||
CHECK(CalculatePi(1) == 0.0);
|
||||
|
||||
benchmark::RunSpecifiedBenchmarks();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user