mirror of
https://github.com/google/benchmark.git
synced 2025-01-13 21:30:14 +08:00
Initial commit
Benchmark library builds and runs but only single-threaded. Multithreaded support needs a bit more love. Currently requires some C++11 support (g++ 4.6.3 seems to work).
This commit is contained in:
commit
403f354423
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
CMakeCache.txt
|
||||||
|
CMakeFiles/
|
||||||
|
Makefile
|
||||||
|
bin/
|
||||||
|
cmake_install.cmake
|
||||||
|
lib/
|
43
CMakeLists.txt
Normal file
43
CMakeLists.txt
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
cmake_minimum_required (VERSION 2.8)
|
||||||
|
project (benchmark)
|
||||||
|
|
||||||
|
find_package(Threads)
|
||||||
|
|
||||||
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
|
||||||
|
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
|
||||||
|
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_FLAGS "-Wall -Werror --std=c++0x")
|
||||||
|
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -DDEBUG")
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "-fno-strict-aliasing -O3 -DNDEBUG")
|
||||||
|
|
||||||
|
# Set OS
|
||||||
|
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||||
|
add_definitions(-DOS_MACOSX)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||||
|
add_definitions(-DOS_LINUX)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
|
||||||
|
add_definitions(-DOS_WINDOWS)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Set CPU
|
||||||
|
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86")
|
||||||
|
add_definitions(-DARCH_X86)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Set up directories
|
||||||
|
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||||
|
include_directories(${PROJECT_SOURCE_DIR}/src)
|
||||||
|
link_directories(${PROJECT_SOURCE_DIR}/lib)
|
||||||
|
|
||||||
|
# Build the targets
|
||||||
|
FILE(GLOB SOURCE_FILES "src/*.cc")
|
||||||
|
add_library(benchmark STATIC ${SOURCE_FILES})
|
||||||
|
|
||||||
|
add_executable(benchmark_test test/benchmark_test.cc)
|
||||||
|
target_link_libraries(benchmark_test benchmark ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
|
540
include/benchmark/benchmark.h
Normal file
540
include/benchmark/benchmark.h
Normal file
@ -0,0 +1,540 @@
|
|||||||
|
// Support for registering benchmarks for functions.
|
||||||
|
|
||||||
|
/* Example usage:
|
||||||
|
// Define a function that executes the code to be measured a
|
||||||
|
// specified number of times:
|
||||||
|
static void BM_StringCreation(benchmark::State& state) {
|
||||||
|
while (state.KeepRunning())
|
||||||
|
std::string empty_string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Register the function as a benchmark
|
||||||
|
BENCHMARK(BM_StringCreation);
|
||||||
|
|
||||||
|
// Define another benchmark
|
||||||
|
static void BM_StringCopy(benchmark::State& state) {
|
||||||
|
std::string x = "hello";
|
||||||
|
while (state.KeepRunning())
|
||||||
|
std::string copy(x);
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_StringCopy);
|
||||||
|
|
||||||
|
// Augment the main() program to invoke benchmarks if specified
|
||||||
|
// via the --benchmarks command line flag. E.g.,
|
||||||
|
// my_unittest --benchmarks=all
|
||||||
|
// my_unittest --benchmarks=BM_StringCreation
|
||||||
|
// my_unittest --benchmarks=String
|
||||||
|
// my_unittest --benchmarks='Copy|Creation'
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
Initialize(&argc, argv);
|
||||||
|
|
||||||
|
RunSpecifiedBenchmarks();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sometimes a family of microbenchmarks can be implemented with
|
||||||
|
// just one routine that takes an extra argument to specify which
|
||||||
|
// one of the family of benchmarks to run. For example, the following
|
||||||
|
// code defines a family of microbenchmarks for measuring the speed
|
||||||
|
// of memcpy() calls of different lengths:
|
||||||
|
|
||||||
|
static void BM_memcpy(benchmark::State& state) {
|
||||||
|
char* src = new char[state.range_x()]; char* dst = new char[state.range_x()];
|
||||||
|
memset(src, 'x', state.range_x());
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
memcpy(dst, src, state.range_x());
|
||||||
|
SetBenchmarkBytesProcessed(int64_t_t(state.iterations) * int64(state.range_x()));
|
||||||
|
delete[] src; delete[] dst;
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
|
||||||
|
|
||||||
|
// The preceding code is quite repetitive, and can be replaced with the
|
||||||
|
// following short-hand. The following invocation will pick a few
|
||||||
|
// appropriate arguments in the specified range and will generate a
|
||||||
|
// microbenchmark for each such argument.
|
||||||
|
BENCHMARK(BM_memcpy)->Range(8, 8<<10);
|
||||||
|
|
||||||
|
// You might have a microbenchmark that depends on two inputs. For
|
||||||
|
// example, the following code defines a family of microbenchmarks for
|
||||||
|
// measuring the speed of set insertion.
|
||||||
|
static void BM_SetInsert(benchmark::State& state) {
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
state.PauseTiming();
|
||||||
|
set<int> data = ConstructRandomSet(state.range_x());
|
||||||
|
state.ResumeTiming();
|
||||||
|
for (int j = 0; j < state.rangeY; ++j)
|
||||||
|
data.insert(RandomNumber());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_SetInsert)
|
||||||
|
->ArgPair(1<<10, 1)
|
||||||
|
->ArgPair(1<<10, 8)
|
||||||
|
->ArgPair(1<<10, 64)
|
||||||
|
->ArgPair(1<<10, 512)
|
||||||
|
->ArgPair(8<<10, 1)
|
||||||
|
->ArgPair(8<<10, 8)
|
||||||
|
->ArgPair(8<<10, 64)
|
||||||
|
->ArgPair(8<<10, 512);
|
||||||
|
|
||||||
|
// The preceding code is quite repetitive, and can be replaced with
|
||||||
|
// the following short-hand. The following macro will pick a few
|
||||||
|
// appropriate arguments in the product of the two specified ranges
|
||||||
|
// and will generate a microbenchmark for each such pair.
|
||||||
|
BENCHMARK(BM_SetInsert)->RangePair(1<<10, 8<<10, 1, 512);
|
||||||
|
|
||||||
|
// For more complex patterns of inputs, passing a custom function
|
||||||
|
// to Apply allows programmatic specification of an
|
||||||
|
// arbitrary set of arguments to run the microbenchmark on.
|
||||||
|
// The following example enumerates a dense range on
|
||||||
|
// one parameter, and a sparse range on the second.
|
||||||
|
static benchmark::internal::Benchmark* CustomArguments(
|
||||||
|
benchmark::internal::Benchmark* b) {
|
||||||
|
for (int i = 0; i <= 10; ++i)
|
||||||
|
for (int j = 32; j <= 1024*1024; j *= 8)
|
||||||
|
b = b->ArgPair(i, j);
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
|
||||||
|
|
||||||
|
// Templated microbenchmarks work the same way:
|
||||||
|
// Produce then consume 'size' messages 'iters' times
|
||||||
|
// Measures throughput in the absence of multiprogramming.
|
||||||
|
template <class Q> int BM_Sequential(benchmark::State& state) {
|
||||||
|
Q q;
|
||||||
|
typename Q::value_type v;
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
for (int i = state.range_x(); i--; )
|
||||||
|
q.push(v);
|
||||||
|
for (int e = state.range_x(); e--; )
|
||||||
|
q.Wait(&v);
|
||||||
|
}
|
||||||
|
// actually messages, not bytes:
|
||||||
|
state.SetBytesProcessed(
|
||||||
|
static_cast<int64_t>(state.iterations())*state.range_x());
|
||||||
|
}
|
||||||
|
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
|
||||||
|
|
||||||
|
In a multithreaded test, it is guaranteed that none of the threads will start
|
||||||
|
until all have called KeepRunning, and all will have finished before KeepRunning
|
||||||
|
returns false. As such, any global setup or teardown you want to do can be
|
||||||
|
wrapped in a check against the thread index:
|
||||||
|
|
||||||
|
static void BM_MultiThreaded(benchmark::State& state) {
|
||||||
|
if (state.thread_index == 0) {
|
||||||
|
// Setup code here.
|
||||||
|
}
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
// Run the test as normal.
|
||||||
|
}
|
||||||
|
if (state.thread_index == 0) {
|
||||||
|
// Teardown code here.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BENCHMARK_BENCHMARK_H_
|
||||||
|
#define BENCHMARK_BENCHMARK_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "macros.h"
|
||||||
|
|
||||||
|
namespace benchmark {
|
||||||
|
// If the --benchmarks flag is empty, do nothing.
|
||||||
|
//
|
||||||
|
// Otherwise, run all benchmarks specified by the --benchmarks flag,
|
||||||
|
// and exit after running the benchmarks.
|
||||||
|
extern void RunSpecifiedBenchmarks();
|
||||||
|
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// Routines that can be called from within a benchmark
|
||||||
|
|
||||||
|
//
|
||||||
|
// REQUIRES: a benchmark is currently executing
|
||||||
|
extern void SetLabel(const std::string& label);
|
||||||
|
|
||||||
|
// If this routine is called, peak memory allocation past this point in the
|
||||||
|
// benchmark is reported at the end of the benchmark report line. (It is
|
||||||
|
// computed by running the benchmark once with a single iteration and a memory
|
||||||
|
// tracer.)
|
||||||
|
extern void MemoryUsage();
|
||||||
|
|
||||||
|
// If a particular benchmark is I/O bound, or if for some reason CPU
|
||||||
|
// timings are not representative, call this method from within the
|
||||||
|
// benchmark routine. If called, the elapsed time will be used to
|
||||||
|
// control how many iterations are run, and in the printing of
|
||||||
|
// items/second or MB/seconds values. If not called, the cpu time
|
||||||
|
// used by the benchmark will be used.
|
||||||
|
extern void UseRealTime();
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
class Benchmark;
|
||||||
|
}
|
||||||
|
|
||||||
|
// State is passed to a running Benchmark and contains state for the
|
||||||
|
// benchmark to use.
|
||||||
|
class State {
|
||||||
|
public:
|
||||||
|
// Returns true iff the benchmark should continue through another iteration.
|
||||||
|
bool KeepRunning();
|
||||||
|
|
||||||
|
void PauseTiming();
|
||||||
|
void ResumeTiming();
|
||||||
|
|
||||||
|
// Set the number of bytes processed by the current benchmark
|
||||||
|
// execution. This routine is typically called once at the end of a
|
||||||
|
// throughput oriented benchmark. If this routine is called with a
|
||||||
|
// value > 0, the report is printed in MB/sec instead of nanoseconds
|
||||||
|
// per iteration.
|
||||||
|
//
|
||||||
|
// REQUIRES: a benchmark has exited its KeepRunning loop.
|
||||||
|
void SetBytesProcessed(int64_t bytes);
|
||||||
|
|
||||||
|
// If this routine is called with items > 0, then an items/s
|
||||||
|
// label is printed on the benchmark report line for the currently
|
||||||
|
// executing benchmark. It is typically called at the end of a processing
|
||||||
|
// benchmark where a processing items/second output is desired.
|
||||||
|
//
|
||||||
|
// REQUIRES: a benchmark has exited its KeepRunning loop.
|
||||||
|
void SetItemsProcessed(int64_t items);
|
||||||
|
|
||||||
|
// If this routine is called, the specified label is printed at the
|
||||||
|
// end of the benchmark report line for the currently executing
|
||||||
|
// benchmark. Example:
|
||||||
|
// static void BM_Compress(int iters) {
|
||||||
|
// ...
|
||||||
|
// double compress = input_size / output_size;
|
||||||
|
// benchmark::SetLabel(StringPrintf("compress:%.1f%%", 100.0*compression));
|
||||||
|
// }
|
||||||
|
// Produces output that looks like:
|
||||||
|
// BM_Compress 50 50 14115038 compress:27.3%
|
||||||
|
//
|
||||||
|
// REQUIRES: a benchmark has exited its KeepRunning loop.
|
||||||
|
void SetLabel(const std::string& label);
|
||||||
|
|
||||||
|
// Range arguments for this run. CHECKs if the argument has been set.
|
||||||
|
int range_x() const;
|
||||||
|
int range_y() const;
|
||||||
|
|
||||||
|
int iterations() const { return total_iterations_; }
|
||||||
|
|
||||||
|
const int thread_index;
|
||||||
|
|
||||||
|
private:
|
||||||
|
class FastClock;
|
||||||
|
struct SharedState;
|
||||||
|
|
||||||
|
State(FastClock* clock, SharedState* s, int t);
|
||||||
|
bool StartRunning();
|
||||||
|
bool FinishInterval();
|
||||||
|
bool MaybeStop();
|
||||||
|
void NewInterval();
|
||||||
|
bool AllStarting();
|
||||||
|
bool RunAnotherInterval() const;
|
||||||
|
|
||||||
|
void Run();
|
||||||
|
|
||||||
|
enum EState {
|
||||||
|
STATE_INITIAL, // KeepRunning hasn't been called
|
||||||
|
STATE_STARTING, // KeepRunning called, waiting for other threads
|
||||||
|
STATE_RUNNING, // Running and being timed
|
||||||
|
STATE_STOPPING, // Not being timed but waiting for other threads
|
||||||
|
STATE_STOPPED, // Stopped
|
||||||
|
} state_;
|
||||||
|
|
||||||
|
FastClock* clock_;
|
||||||
|
|
||||||
|
// State shared by all BenchmarkRun objects that belong to the same
|
||||||
|
// BenchmarkInstance
|
||||||
|
SharedState* shared_;
|
||||||
|
|
||||||
|
// Custom label set by the user.
|
||||||
|
std::string label_;
|
||||||
|
|
||||||
|
// Each State object goes through a sequence of measurement intervals. By
|
||||||
|
// default each interval is approx. 100ms in length. The following stats are
|
||||||
|
// kept for each interval.
|
||||||
|
int64_t iterations_;
|
||||||
|
double start_cpu_;
|
||||||
|
double start_time_;
|
||||||
|
int64_t stop_time_micros_;
|
||||||
|
|
||||||
|
double start_pause_;
|
||||||
|
double pause_time_;
|
||||||
|
|
||||||
|
// Total number of iterations for all finished runs.
|
||||||
|
int64_t total_iterations_;
|
||||||
|
|
||||||
|
// Approximate time in microseconds for one interval of execution.
|
||||||
|
// Dynamically adjusted as needed.
|
||||||
|
int64_t interval_micros_;
|
||||||
|
|
||||||
|
// True if the current interval is the continuation of a previous one.
|
||||||
|
bool is_continuation_;
|
||||||
|
|
||||||
|
friend class internal::Benchmark;
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(State);
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
class BenchmarkReporter;
|
||||||
|
|
||||||
|
typedef std::function<void(State&)> BenchmarkFunction;
|
||||||
|
|
||||||
|
// Run all benchmarks whose name is a partial match for the regular
|
||||||
|
// expression in "spec". The results of benchmark runs are fed to "reporter".
|
||||||
|
void RunMatchingBenchmarks(const std::string& spec,
|
||||||
|
BenchmarkReporter* reporter);
|
||||||
|
|
||||||
|
// Extract the list of benchmark names that match the specified regular
|
||||||
|
// expression.
|
||||||
|
void FindMatchingBenchmarkNames(const std::string& re,
|
||||||
|
std::vector<std::string>* benchmark_names);
|
||||||
|
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// Benchmark registration object. The BENCHMARK() macro expands
|
||||||
|
// into an internal::Benchmark* object. Various methods can
|
||||||
|
// be called on this object to change the properties of the benchmark.
|
||||||
|
// Each method returns "this" so that multiple method calls can
|
||||||
|
// chained into one expression.
|
||||||
|
class Benchmark {
|
||||||
|
public:
|
||||||
|
// The Benchmark takes ownership of the Callback pointed to by f.
|
||||||
|
Benchmark(const char* name, BenchmarkFunction f);
|
||||||
|
|
||||||
|
~Benchmark();
|
||||||
|
|
||||||
|
// Note: the following methods all return "this" so that multiple
|
||||||
|
// method calls can be chained together in one expression.
|
||||||
|
|
||||||
|
// Run this benchmark once with "x" as the extra argument passed
|
||||||
|
// to the function.
|
||||||
|
// REQUIRES: The function passed to the constructor must accept an arg1.
|
||||||
|
Benchmark* Arg(int x);
|
||||||
|
|
||||||
|
// Run this benchmark once for a number of values picked from the
|
||||||
|
// range [start..limit]. (start and limit are always picked.)
|
||||||
|
// REQUIRES: The function passed to the constructor must accept an arg1.
|
||||||
|
Benchmark* Range(int start, int limit);
|
||||||
|
|
||||||
|
// Run this benchmark once for every value in the range [start..limit]
|
||||||
|
// REQUIRES: The function passed to the constructor must accept an arg1.
|
||||||
|
Benchmark* DenseRange(int start, int limit);
|
||||||
|
|
||||||
|
// Run this benchmark once with "x,y" as the extra arguments passed
|
||||||
|
// to the function.
|
||||||
|
// REQUIRES: The function passed to the constructor must accept arg1,arg2.
|
||||||
|
Benchmark* ArgPair(int x, int y);
|
||||||
|
|
||||||
|
// Pick a set of values A from the range [lo1..hi1] and a set
|
||||||
|
// of values B from the range [lo2..hi2]. Run the benchmark for
|
||||||
|
// every pair of values in the cartesian product of A and B
|
||||||
|
// (i.e., for all combinations of the values in A and B).
|
||||||
|
// REQUIRES: The function passed to the constructor must accept arg1,arg2.
|
||||||
|
Benchmark* RangePair(int lo1, int hi1, int lo2, int hi2);
|
||||||
|
|
||||||
|
// Pass this benchmark object to *func, which can customize
|
||||||
|
// the benchmark by calling various methods like Arg, ArgPair,
|
||||||
|
// Threads, etc.
|
||||||
|
Benchmark* Apply(void (*func)(Benchmark* benchmark));
|
||||||
|
|
||||||
|
// Support for running multiple copies of the same benchmark concurrently
|
||||||
|
// in multiple threads. This may be useful when measuring the scaling
|
||||||
|
// of some piece of code.
|
||||||
|
|
||||||
|
// Run one instance of this benchmark concurrently in t threads.
|
||||||
|
Benchmark* Threads(int t);
|
||||||
|
|
||||||
|
// Pick a set of values T from [min_threads,max_threads].
|
||||||
|
// min_threads and max_threads are always included in T. Run this
|
||||||
|
// benchmark once for each value in T. The benchmark run for a
|
||||||
|
// particular value t consists of t threads running the benchmark
|
||||||
|
// function concurrently. For example, consider:
|
||||||
|
// BENCHMARK(Foo)->ThreadRange(1,16);
|
||||||
|
// This will run the following benchmarks:
|
||||||
|
// Foo in 1 thread
|
||||||
|
// Foo in 2 threads
|
||||||
|
// Foo in 4 threads
|
||||||
|
// Foo in 8 threads
|
||||||
|
// Foo in 16 threads
|
||||||
|
Benchmark* ThreadRange(int min_threads, int max_threads);
|
||||||
|
|
||||||
|
// Equivalent to ThreadRange(NumCPUs(), NumCPUs())
|
||||||
|
Benchmark* ThreadPerCpu();
|
||||||
|
|
||||||
|
// TODO(dominich): Control whether or not real-time is used for this benchmark
|
||||||
|
// TODO(dominich): Control the default number of iterations
|
||||||
|
|
||||||
|
// -------------------------------
|
||||||
|
// Following methods are not useful for clients
|
||||||
|
|
||||||
|
// Used inside the benchmark implementation
|
||||||
|
struct Instance;
|
||||||
|
struct ThreadStats;
|
||||||
|
|
||||||
|
// Extract the list of benchmark instances that match the specified
|
||||||
|
// regular expression.
|
||||||
|
static void FindBenchmarks(const std::string& re,
|
||||||
|
std::vector<Instance>* benchmarks);
|
||||||
|
|
||||||
|
// Measure the overhead of an empty benchmark to subtract later.
|
||||||
|
static void MeasureOverhead();
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<Benchmark::Instance> CreateBenchmarkInstances(int rangeXindex,
|
||||||
|
int rangeYindex);
|
||||||
|
|
||||||
|
std::string name_;
|
||||||
|
BenchmarkFunction function_;
|
||||||
|
int registration_index_;
|
||||||
|
std::vector<int> rangeX_;
|
||||||
|
std::vector<int> rangeY_;
|
||||||
|
std::vector<int> thread_counts_;
|
||||||
|
|
||||||
|
// Special value placed in thread_counts_ to stand for NumCPUs()
|
||||||
|
static const int kNumCpuMarker = -1;
|
||||||
|
|
||||||
|
// Special value used to indicate that no range is required.
|
||||||
|
static const int kNoRange = -1;
|
||||||
|
|
||||||
|
static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
|
||||||
|
static double MeasurePeakHeapMemory(const Instance& b);
|
||||||
|
static void RunInstance(const Instance& b, BenchmarkReporter* br);
|
||||||
|
friend class ::benchmark::State;
|
||||||
|
friend struct ::benchmark::internal::Benchmark::Instance;
|
||||||
|
friend void ::benchmark::internal::RunMatchingBenchmarks(
|
||||||
|
const std::string&, BenchmarkReporter*);
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(Benchmark);
|
||||||
|
};
|
||||||
|
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// Benchmarks reporter interface + data containers.
|
||||||
|
|
||||||
|
struct BenchmarkContextData {
|
||||||
|
int num_cpus;
|
||||||
|
double mhz_per_cpu;
|
||||||
|
//std::string cpu_info;
|
||||||
|
bool cpu_scaling_enabled;
|
||||||
|
|
||||||
|
// The number of chars in the longest benchmark name.
|
||||||
|
int name_field_width;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BenchmarkRunData {
|
||||||
|
BenchmarkRunData() :
|
||||||
|
thread_index(-1),
|
||||||
|
iterations(1),
|
||||||
|
real_accumulated_time(0),
|
||||||
|
cpu_accumulated_time(0),
|
||||||
|
bytes_per_second(0),
|
||||||
|
items_per_second(0),
|
||||||
|
max_heapbytes_used(0) {}
|
||||||
|
|
||||||
|
std::string benchmark_name;
|
||||||
|
std::string report_label;
|
||||||
|
int thread_index;
|
||||||
|
int64_t iterations;
|
||||||
|
double real_accumulated_time;
|
||||||
|
double cpu_accumulated_time;
|
||||||
|
|
||||||
|
// Zero if not set by benchmark.
|
||||||
|
double bytes_per_second;
|
||||||
|
double items_per_second;
|
||||||
|
|
||||||
|
// This is set to 0.0 if memory tracing is not enabled.
|
||||||
|
double max_heapbytes_used;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Interface for custom benchmark result printers.
|
||||||
|
// By default, benchmark reports are printed to stdout. However an application
|
||||||
|
// can control the destination of the reports by calling
|
||||||
|
// RunMatchingBenchmarks and passing it a custom reporter object.
|
||||||
|
// The reporter object must implement the following interface.
|
||||||
|
class BenchmarkReporter {
|
||||||
|
public:
|
||||||
|
// Called once for every suite of benchmarks run.
|
||||||
|
// The parameter "context" contains information that the
|
||||||
|
// reporter may wish to use when generating its report, for example the
|
||||||
|
// platform under which the benchmarks are running. The benchmark run is
|
||||||
|
// never started if this function returns false, allowing the reporter
|
||||||
|
// to skip runs based on the context information.
|
||||||
|
virtual bool ReportContext(const BenchmarkContextData& context) = 0;
|
||||||
|
|
||||||
|
// Called once for each group of benchmark runs, gives information about
|
||||||
|
// cpu-time and heap memory usage during the benchmark run.
|
||||||
|
// Note that all the grouped benchmark runs should refer to the same
|
||||||
|
// benchmark, thus have the same name.
|
||||||
|
virtual void ReportRuns(const std::vector<BenchmarkRunData>& report) = 0;
|
||||||
|
|
||||||
|
virtual ~BenchmarkReporter();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// Internal implementation details follow; please ignore
|
||||||
|
|
||||||
|
// Given a collection of reports, computes their mean and stddev.
|
||||||
|
// REQUIRES: all runs in "reports" must be from the same benchmark.
|
||||||
|
void ComputeStats(const std::vector<BenchmarkRunData>& reports,
|
||||||
|
BenchmarkRunData* mean_data,
|
||||||
|
BenchmarkRunData* stddev_data);
|
||||||
|
|
||||||
|
// Simple reporter that outputs benchmark data to the console. This is the
|
||||||
|
// default reporter used by RunSpecifiedBenchmarks().
|
||||||
|
class ConsoleReporter : public BenchmarkReporter {
|
||||||
|
public:
|
||||||
|
virtual bool ReportContext(const BenchmarkContextData& context);
|
||||||
|
virtual void ReportRuns(const std::vector<BenchmarkRunData>& reports);
|
||||||
|
private:
|
||||||
|
std::string PrintMemoryUsage(double bytes);
|
||||||
|
virtual void PrintRunData(const BenchmarkRunData& report);
|
||||||
|
int name_field_width_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
void Initialize(int* argc, const char** argv);
|
||||||
|
} // end namespace benchmark
|
||||||
|
|
||||||
|
// ------------------------------------------------------
|
||||||
|
// Macro to register benchmarks
|
||||||
|
|
||||||
|
// Helpers for generating unique variable names
|
||||||
|
#define BENCHMARK_CONCAT(a, b, c) BENCHMARK_CONCAT2(a, b, c)
|
||||||
|
#define BENCHMARK_CONCAT2(a, b, c) a ## b ## c
|
||||||
|
|
||||||
|
#define BENCHMARK(n) \
|
||||||
|
static ::benchmark::internal::Benchmark* \
|
||||||
|
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||||
|
(new ::benchmark::internal::Benchmark(#n, n))
|
||||||
|
|
||||||
|
// Old-style macros
|
||||||
|
#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
|
||||||
|
#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
|
||||||
|
#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
|
||||||
|
#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
|
||||||
|
BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
|
||||||
|
|
||||||
|
// This will register a benchmark for a templatized function. For example:
|
||||||
|
//
|
||||||
|
// template<int arg>
|
||||||
|
// void BM_Foo(int iters);
|
||||||
|
//
|
||||||
|
// BENCHMARK_TEMPLATE(BM_Foo, 1);
|
||||||
|
//
|
||||||
|
// will register BM_Foo<1> as a benchmark.
|
||||||
|
#define BENCHMARK_TEMPLATE(n, a) \
|
||||||
|
static ::benchmark::internal::Benchmark* \
|
||||||
|
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||||
|
(new ::benchmark::internal::Benchmark(#n "<" #a ">", n<a>))
|
||||||
|
|
||||||
|
#define BENCHMARK_TEMPLATE2(n, a, b) \
|
||||||
|
static ::benchmark::internal::Benchmark* \
|
||||||
|
BENCHMARK_CONCAT(__benchmark_, n, __LINE__) ATTRIBUTE_UNUSED = \
|
||||||
|
(new ::benchmark::internal::Benchmark(#n "<" #a "," #b ">", n<a, b>))
|
||||||
|
|
||||||
|
#endif // BENCHMARK_BENCHMARK_H_
|
||||||
|
|
120
include/benchmark/macros.h
Normal file
120
include/benchmark/macros.h
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
#ifndef BENCHMARK_MACROS_H_
|
||||||
|
#define BENCHMARK_MACROS_H_
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||||
|
TypeName(const TypeName&); \
|
||||||
|
void operator=(const TypeName&);
|
||||||
|
|
||||||
|
// The arraysize(arr) macro returns the # of elements in an array arr.
|
||||||
|
// The expression is a compile-time constant, and therefore can be
|
||||||
|
// used in defining new arrays, for example. If you use arraysize on
|
||||||
|
// a pointer by mistake, you will get a compile-time error.
|
||||||
|
//
|
||||||
|
// One caveat is that, for C++03, arraysize() doesn't accept any array of
|
||||||
|
// an anonymous type or a type defined inside a function. In these rare
|
||||||
|
// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
|
||||||
|
// due to a limitation in C++03's template system. The limitation has
|
||||||
|
// been removed in C++11.
|
||||||
|
|
||||||
|
// This template function declaration is used in defining arraysize.
|
||||||
|
// Note that the function doesn't need an implementation, as we only
|
||||||
|
// use its type.
|
||||||
|
template <typename T, size_t N>
|
||||||
|
char (&ArraySizeHelper(T (&array)[N]))[N];
|
||||||
|
|
||||||
|
// That gcc wants both of these prototypes seems mysterious. VC, for
|
||||||
|
// its part, can't decide which to use (another mystery). Matching of
|
||||||
|
// template overloads: the final frontier.
|
||||||
|
#ifndef COMPILER_MSVC
|
||||||
|
template <typename T, size_t N>
|
||||||
|
char (&ArraySizeHelper(const T (&array)[N]))[N];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
|
||||||
|
|
||||||
|
// The STATIC_ASSERT macro can be used to verify that a compile time
|
||||||
|
// expression is true. For example, you could use it to verify the
|
||||||
|
// size of a static array:
|
||||||
|
//
|
||||||
|
// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
|
||||||
|
// content_type_names_incorrect_size);
|
||||||
|
//
|
||||||
|
// or to make sure a struct is smaller than a certain size:
|
||||||
|
//
|
||||||
|
// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
|
||||||
|
//
|
||||||
|
// The second argument to the macro is the name of the variable. If
|
||||||
|
// the expression is false, most compilers will issue a warning/error
|
||||||
|
// containing the name of the variable.
|
||||||
|
|
||||||
|
template <bool>
|
||||||
|
struct StaticAssert {
|
||||||
|
};
|
||||||
|
|
||||||
|
#define STATIC_ASSERT(expr, msg) \
|
||||||
|
typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
|
||||||
|
|
||||||
|
// Implementation details of STATIC_ASSERT:
|
||||||
|
//
|
||||||
|
// - STATIC_ASSERT works by defining an array type that has -1
|
||||||
|
// elements (and thus is invalid) when the expression is false.
|
||||||
|
//
|
||||||
|
// - The simpler definition
|
||||||
|
//
|
||||||
|
// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
|
||||||
|
//
|
||||||
|
// does not work, as gcc supports variable-length arrays whose sizes
|
||||||
|
// are determined at run-time (this is gcc's extension and not part
|
||||||
|
// of the C++ standard). As a result, gcc fails to reject the
|
||||||
|
// following code with the simple definition:
|
||||||
|
//
|
||||||
|
// int foo;
|
||||||
|
// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
|
||||||
|
// // not a compile-time constant.
|
||||||
|
//
|
||||||
|
// - By using the type StaticAssert<(bool(expr))>, we ensures that
|
||||||
|
// expr is a compile-time constant. (Template arguments must be
|
||||||
|
// determined at compile-time.)
|
||||||
|
//
|
||||||
|
// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
|
||||||
|
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
|
||||||
|
//
|
||||||
|
// StaticAssert<bool(expr)>
|
||||||
|
//
|
||||||
|
// instead, these compilers will refuse to compile
|
||||||
|
//
|
||||||
|
// STATIC_ASSERT(5 > 0, some_message);
|
||||||
|
//
|
||||||
|
// (They seem to think the ">" in "5 > 0" marks the end of the
|
||||||
|
// template argument list.)
|
||||||
|
//
|
||||||
|
// - The array size is (bool(expr) ? 1 : -1), instead of simply
|
||||||
|
//
|
||||||
|
// ((expr) ? 1 : -1).
|
||||||
|
//
|
||||||
|
// This is to avoid running into a bug in MS VC 7.1, which
|
||||||
|
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
|
||||||
|
|
||||||
|
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
|
||||||
|
#define CHECK_EQ(a, b) CHECK((a) == (b))
|
||||||
|
#define CHECK_GE(a, b) CHECK((a) >= (b))
|
||||||
|
#define CHECK_LE(a, b) CHECK((a) <= (b))
|
||||||
|
#define CHECK_GT(a, b) CHECK((a) > (b))
|
||||||
|
#define CHECK_LT(a, b) CHECK((a) < (b))
|
||||||
|
|
||||||
|
//
|
||||||
|
// Prevent the compiler from complaining about or optimizing away variables
|
||||||
|
// that appear unused.
|
||||||
|
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
|
||||||
|
|
||||||
|
//
|
||||||
|
// For functions we want to force inline or not inline.
|
||||||
|
// Introduced in gcc 3.1.
|
||||||
|
#define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline))
|
||||||
|
#define HAVE_ATTRIBUTE_ALWAYS_INLINE 1
|
||||||
|
#define ATTRIBUTE_NOINLINE __attribute__ ((noinline))
|
||||||
|
#define HAVE_ATTRIBUTE_NOINLINE 1
|
||||||
|
|
||||||
|
#endif // BENCHMARK_MACROS_H_
|
1197
src/benchmark.cc
Normal file
1197
src/benchmark.cc
Normal file
File diff suppressed because it is too large
Load Diff
82
src/colorprint.cc
Normal file
82
src/colorprint.cc
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
#include "colorprint.h"
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
|
||||||
|
#include "commandlineflags.h"
|
||||||
|
|
||||||
|
DECLARE_bool(color_print);
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
#ifdef OS_WINDOWS
|
||||||
|
typedef WORD PlatformColorCode;
|
||||||
|
#else
|
||||||
|
typedef const char* PlatformColorCode;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
PlatformColorCode GetPlatformColorCode(LogColor color) {
|
||||||
|
#ifdef OS_WINDOWS
|
||||||
|
switch (color) {
|
||||||
|
case COLOR_RED: return FOREGROUND_RED;
|
||||||
|
case COLOR_GREEN: return FOREGROUND_GREEN;
|
||||||
|
case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
|
||||||
|
case COLOR_BLUE: return FOREGROUND_BLUE;
|
||||||
|
case COLOR_MAGENTA: return FOREGROUND_BLUE | FOREGROUND_RED;
|
||||||
|
case COLOR_CYAN: return FOREGROUND_BLUE | FOREGROUND_GREEN;
|
||||||
|
case COLOR_WHITE: // fall through to default
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
switch (color) {
|
||||||
|
case COLOR_RED: return "1";
|
||||||
|
case COLOR_GREEN: return "2";
|
||||||
|
case COLOR_YELLOW: return "3";
|
||||||
|
case COLOR_BLUE: return "4";
|
||||||
|
case COLOR_MAGENTA: return "5";
|
||||||
|
case COLOR_CYAN: return "6";
|
||||||
|
case COLOR_WHITE: return "7";
|
||||||
|
default: return NULL;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
void ColorPrintf(LogColor color, const char* fmt, ...) {
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
|
||||||
|
if (!FLAGS_color_print) {
|
||||||
|
vprintf(fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef OS_WINDOWS
|
||||||
|
const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||||
|
|
||||||
|
// Gets the current text color.
|
||||||
|
CONSOLE_SCREEN_BUFFER_INFO buffer_info;
|
||||||
|
GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
|
||||||
|
const WORD old_color_attrs = buffer_info.wAttributes;
|
||||||
|
|
||||||
|
// We need to flush the stream buffers into the console before each
|
||||||
|
// SetConsoleTextAttribute call lest it affect the text that is already
|
||||||
|
// printed but has not yet reached the console.
|
||||||
|
fflush(stdout);
|
||||||
|
SetConsoleTextAttribute(stdout_handle,
|
||||||
|
GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
|
||||||
|
vprintf(fmt, args);
|
||||||
|
|
||||||
|
fflush(stdout);
|
||||||
|
// Restores the text color.
|
||||||
|
SetConsoleTextAttribute(stdout_handle, old_color_attrs);
|
||||||
|
#else
|
||||||
|
const char* color_code = GetPlatformColorCode(color);
|
||||||
|
if (color_code)
|
||||||
|
fprintf(stdout, "\033[0;3%sm", color_code);
|
||||||
|
vprintf(fmt, args);
|
||||||
|
printf("\033[m"); // Resets the terminal to default.
|
||||||
|
#endif
|
||||||
|
va_end(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
17
src/colorprint.h
Normal file
17
src/colorprint.h
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#ifndef BENCHMARK_COLORPRINT_H_
|
||||||
|
#define BENCHMARK_COLORPRINT_H_
|
||||||
|
|
||||||
|
enum LogColor {
|
||||||
|
COLOR_DEFAULT,
|
||||||
|
COLOR_RED,
|
||||||
|
COLOR_GREEN,
|
||||||
|
COLOR_YELLOW,
|
||||||
|
COLOR_BLUE,
|
||||||
|
COLOR_MAGENTA,
|
||||||
|
COLOR_CYAN,
|
||||||
|
COLOR_WHITE
|
||||||
|
};
|
||||||
|
|
||||||
|
void ColorPrintf(LogColor color, const char* fmt, ...);
|
||||||
|
|
||||||
|
#endif // BENCHMARK_COLORPRINT_H_
|
213
src/commandlineflags.cc
Normal file
213
src/commandlineflags.cc
Normal file
@ -0,0 +1,213 @@
|
|||||||
|
#include "commandlineflags.h"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
namespace benchmark {
|
||||||
|
// Parses 'str' for a 32-bit signed integer. If successful, writes
|
||||||
|
// the result to *value and returns true; otherwise leaves *value
|
||||||
|
// unchanged and returns false.
|
||||||
|
bool ParseInt32(const std::string& src_text, const char* str, int32_t* value) {
|
||||||
|
// Parses the environment variable as a decimal integer.
|
||||||
|
char* end = NULL;
|
||||||
|
const long long_value = strtol(str, &end, 10); // NOLINT
|
||||||
|
|
||||||
|
// Has strtol() consumed all characters in the string?
|
||||||
|
if (*end != '\0') {
|
||||||
|
// No - an invalid character was encountered.
|
||||||
|
std::cerr << src_text << " is expected to be a 32-bit integer, "
|
||||||
|
<< "but actually has value \"" << str << "\".\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Is the parsed value in the range of an Int32?
|
||||||
|
const int32_t result = static_cast<int32_t>(long_value);
|
||||||
|
if (long_value == std::numeric_limits<long>::max() ||
|
||||||
|
long_value == std::numeric_limits<long>::min() ||
|
||||||
|
// The parsed value overflows as a long. (strtol() returns
|
||||||
|
// LONG_MAX or LONG_MIN when the input overflows.)
|
||||||
|
result != long_value
|
||||||
|
// The parsed value overflows as an Int32.
|
||||||
|
) {
|
||||||
|
std::cerr << src_text << " is expected to be a 32-bit integer, "
|
||||||
|
<< "but actually has value \"" << str << "\", "
|
||||||
|
<< "which overflows.\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
*value = result;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parses 'str' for a double. If successful, writes the result to *value and
|
||||||
|
// returns true; otherwise leaves *value unchanged and returns false.
|
||||||
|
bool ParseDouble(const std::string& src_text, const char* str, double* value) {
|
||||||
|
// Parses the environment variable as a decimal integer.
|
||||||
|
char* end = NULL;
|
||||||
|
const double double_value = strtod(str, &end); // NOLINT
|
||||||
|
|
||||||
|
// Has strtol() consumed all characters in the string?
|
||||||
|
if (*end != '\0') {
|
||||||
|
// No - an invalid character was encountered.
|
||||||
|
std::cerr << src_text << " is expected to be a double, "
|
||||||
|
<< "but actually has value \"" << str << "\".\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
*value = double_value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline const char* GetEnv(const char* name) {
|
||||||
|
#if GTEST_OS_WINDOWS_MOBILE
|
||||||
|
// We are on Windows CE, which has no environment variables.
|
||||||
|
return NULL;
|
||||||
|
#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
|
||||||
|
// Environment variables which we programmatically clear will be set to the
|
||||||
|
// empty string rather than unset (NULL). Handle that case.
|
||||||
|
const char* const env = getenv(name);
|
||||||
|
return (env != NULL && env[0] != '\0') ? env : NULL;
|
||||||
|
#else
|
||||||
|
return getenv(name);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the name of the environment variable corresponding to the
|
||||||
|
// given flag. For example, FlagToEnvVar("foo") will return
|
||||||
|
// "BENCHMARK_FOO" in the open-source version.
|
||||||
|
static std::string FlagToEnvVar(const char* flag) {
|
||||||
|
const std::string flag_str(flag);
|
||||||
|
|
||||||
|
std::string env_var;
|
||||||
|
for (size_t i = 0; i != flag_str.length(); ++i)
|
||||||
|
env_var += ::toupper(flag_str.c_str()[i]);
|
||||||
|
|
||||||
|
return "BENCHMARK_" + env_var;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads and returns the Boolean environment variable corresponding to
|
||||||
|
// the given flag; if it's not set, returns default_value.
|
||||||
|
//
|
||||||
|
// The value is considered true iff it's not "0".
|
||||||
|
bool BoolFromEnv(const char* flag, bool default_value) {
|
||||||
|
const std::string env_var = FlagToEnvVar(flag);
|
||||||
|
const char* const string_value = GetEnv(env_var.c_str());
|
||||||
|
return string_value == NULL ?
|
||||||
|
default_value : strcmp(string_value, "0") != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads and returns a 32-bit integer stored in the environment
|
||||||
|
// variable corresponding to the given flag; if it isn't set or
|
||||||
|
// doesn't represent a valid 32-bit integer, returns default_value.
|
||||||
|
int32_t Int32FromEnv(const char* flag, int32_t default_value) {
|
||||||
|
const std::string env_var = FlagToEnvVar(flag);
|
||||||
|
const char* const string_value = GetEnv(env_var.c_str());
|
||||||
|
if (string_value == NULL) {
|
||||||
|
// The environment variable is not set.
|
||||||
|
return default_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t result = default_value;
|
||||||
|
if (!ParseInt32(std::string("Environment variable ") + env_var,
|
||||||
|
string_value, &result)) {
|
||||||
|
std::cout << "The default value " << default_value << " is used.\n";
|
||||||
|
return default_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads and returns the string environment variable corresponding to
|
||||||
|
// the given flag; if it's not set, returns default_value.
|
||||||
|
const char* StringFromEnv(const char* flag, const char* default_value) {
|
||||||
|
const std::string env_var = FlagToEnvVar(flag);
|
||||||
|
const char* const value = GetEnv(env_var.c_str());
|
||||||
|
return value == NULL ? default_value : value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parses a string as a command line flag. The string should have
|
||||||
|
// the format "--flag=value". When def_optional is true, the "=value"
|
||||||
|
// part can be omitted.
|
||||||
|
//
|
||||||
|
// Returns the value of the flag, or NULL if the parsing failed.
|
||||||
|
const char* ParseFlagValue(const char* str,
|
||||||
|
const char* flag,
|
||||||
|
bool def_optional) {
|
||||||
|
// str and flag must not be NULL.
|
||||||
|
if (str == NULL || flag == NULL) return NULL;
|
||||||
|
|
||||||
|
// The flag must start with "--".
|
||||||
|
const std::string flag_str = std::string("--") + std::string(flag);
|
||||||
|
const size_t flag_len = flag_str.length();
|
||||||
|
if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
|
||||||
|
|
||||||
|
// Skips the flag name.
|
||||||
|
const char* flag_end = str + flag_len;
|
||||||
|
|
||||||
|
// When def_optional is true, it's OK to not have a "=value" part.
|
||||||
|
if (def_optional && (flag_end[0] == '\0'))
|
||||||
|
return flag_end;
|
||||||
|
|
||||||
|
// If def_optional is true and there are more characters after the
|
||||||
|
// flag name, or if def_optional is false, there must be a '=' after
|
||||||
|
// the flag name.
|
||||||
|
if (flag_end[0] != '=') return NULL;
|
||||||
|
|
||||||
|
// Returns the string after "=".
|
||||||
|
return flag_end + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
|
||||||
|
// Gets the value of the flag as a string.
|
||||||
|
const char* const value_str = ParseFlagValue(str, flag, true);
|
||||||
|
|
||||||
|
// Aborts if the parsing failed.
|
||||||
|
if (value_str == NULL) return false;
|
||||||
|
|
||||||
|
// Converts the string value to a bool.
|
||||||
|
*value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
|
||||||
|
// Gets the value of the flag as a string.
|
||||||
|
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||||
|
|
||||||
|
// Aborts if the parsing failed.
|
||||||
|
if (value_str == NULL) return false;
|
||||||
|
|
||||||
|
// Sets *value to the value of the flag.
|
||||||
|
return ParseInt32(std::string("The value of flag --") + flag,
|
||||||
|
value_str, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
|
||||||
|
// Gets the value of the flag as a string.
|
||||||
|
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||||
|
|
||||||
|
// Aborts if the parsing failed.
|
||||||
|
if (value_str == NULL) return false;
|
||||||
|
|
||||||
|
// Sets *value to the value of the flag.
|
||||||
|
return ParseDouble(std::string("The value of flag --") + flag,
|
||||||
|
value_str, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
|
||||||
|
// Gets the value of the flag as a string.
|
||||||
|
const char* const value_str = ParseFlagValue(str, flag, false);
|
||||||
|
|
||||||
|
// Aborts if the parsing failed.
|
||||||
|
if (value_str == NULL) return false;
|
||||||
|
|
||||||
|
*value = value_str;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsFlag(const char* str, const char* flag) {
|
||||||
|
return (ParseFlagValue(str, flag, true) != NULL);
|
||||||
|
}
|
||||||
|
} // end namespace benchmark
|
79
src/commandlineflags.h
Normal file
79
src/commandlineflags.h
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
#ifndef BENCHMARK_COMMANDLINEFLAGS_H_
|
||||||
|
#define BENCHMARK_COMMANDLINEFLAGS_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
// Macro for referencing flags.
|
||||||
|
#define FLAG(name) FLAGS_##name
|
||||||
|
|
||||||
|
// Macros for declaring flags.
|
||||||
|
#define DECLARE_bool(name) extern bool FLAG(name)
|
||||||
|
#define DECLARE_int32(name) extern int32_t FLAG(name)
|
||||||
|
#define DECLARE_int64(name) extern int64_t FLAG(name)
|
||||||
|
#define DECLARE_double(name) extern double FLAG(name)
|
||||||
|
#define DECLARE_string(name) extern std::string FLAG(name)
|
||||||
|
|
||||||
|
// Macros for defining flags.
|
||||||
|
#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
|
||||||
|
#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
|
||||||
|
#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
|
||||||
|
#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
|
||||||
|
#define DEFINE_string(name, default_val, doc) \
|
||||||
|
std::string FLAG(name) = (default_val)
|
||||||
|
|
||||||
|
namespace benchmark {
|
||||||
|
|
||||||
|
// Parses 'str' for a 32-bit signed integer. If successful, writes the result
|
||||||
|
// to *value and returns true; otherwise leaves *value unchanged and returns
|
||||||
|
// false.
|
||||||
|
bool ParseInt32(const std::string& src_text, const char* str, int32_t* value);
|
||||||
|
|
||||||
|
// Parses a bool/Int32/string from the environment variable
|
||||||
|
// corresponding to the given Google Test flag.
|
||||||
|
bool BoolFromEnv(const char* flag, bool default_val);
|
||||||
|
int32_t Int32FromEnv(const char* flag, int32_t default_val);
|
||||||
|
double DoubleFromEnv(const char* flag, double default_val);
|
||||||
|
const char* StringFromEnv(const char* flag, const char* default_val);
|
||||||
|
|
||||||
|
// Parses a string for a bool flag, in the form of either
|
||||||
|
// "--flag=value" or "--flag".
|
||||||
|
//
|
||||||
|
// In the former case, the value is taken as true as long as it does
|
||||||
|
// not start with '0', 'f', or 'F'.
|
||||||
|
//
|
||||||
|
// In the latter case, the value is taken as true.
|
||||||
|
//
|
||||||
|
// On success, stores the value of the flag in *value, and returns
|
||||||
|
// true. On failure, returns false without changing *value.
|
||||||
|
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
|
||||||
|
|
||||||
|
// Parses a string for an Int32 flag, in the form of
|
||||||
|
// "--flag=value".
|
||||||
|
//
|
||||||
|
// On success, stores the value of the flag in *value, and returns
|
||||||
|
// true. On failure, returns false without changing *value.
|
||||||
|
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
|
||||||
|
|
||||||
|
// Parses a string for a Double flag, in the form of
|
||||||
|
// "--flag=value".
|
||||||
|
//
|
||||||
|
// On success, stores the value of the flag in *value, and returns
|
||||||
|
// true. On failure, returns false without changing *value.
|
||||||
|
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
|
||||||
|
|
||||||
|
// Parses a string for a string flag, in the form of
|
||||||
|
// "--flag=value".
|
||||||
|
//
|
||||||
|
// On success, stores the value of the flag in *value, and returns
|
||||||
|
// true. On failure, returns false without changing *value.
|
||||||
|
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
|
||||||
|
|
||||||
|
// Returns true if the string matches the flag.
|
||||||
|
bool IsFlag(const char* str, const char* flag);
|
||||||
|
|
||||||
|
} // end namespace gbenchmark
|
||||||
|
|
||||||
|
#endif // BENCHMARK_COMMANDLINEFLAGS_H_
|
||||||
|
|
129
src/cycleclock.h
Normal file
129
src/cycleclock.h
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// CycleClock
|
||||||
|
// A CycleClock tells you the current time in Cycles. The "time"
|
||||||
|
// is actually time since power-on. This is like time() but doesn't
|
||||||
|
// involve a system call and is much more precise.
|
||||||
|
//
|
||||||
|
// NOTE: Not all cpu/platform/kernel combinations guarantee that this
|
||||||
|
// clock increments at a constant rate or is synchronized across all logical
|
||||||
|
// cpus in a system.
|
||||||
|
//
|
||||||
|
// If you need the above guarantees, please consider using a different
|
||||||
|
// API. There are efforts to provide an interface which provides a millisecond
|
||||||
|
// granularity and implemented as a memory read. A memory read is generally
|
||||||
|
// cheaper than the CycleClock for many architectures.
|
||||||
|
//
|
||||||
|
// Also, in some out of order CPU implementations, the CycleClock is not
|
||||||
|
// serializing. So if you're trying to count at cycles granularity, your
|
||||||
|
// data might be inaccurate due to out of order instruction execution.
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#ifndef BENCHMARK_CYCLECLOCK_H_
|
||||||
|
#define BENCHMARK_CYCLECLOCK_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#if defined(OS_MACOSX)
|
||||||
|
# include <mach/mach_time.h>
|
||||||
|
#endif
|
||||||
|
// For MSVC, we want to use '_asm rdtsc' when possible (since it works
|
||||||
|
// with even ancient MSVC compilers), and when not possible the
|
||||||
|
// __rdtsc intrinsic, declared in <intrin.h>. Unfortunately, in some
|
||||||
|
// environments, <windows.h> and <intrin.h> have conflicting
|
||||||
|
// declarations of some other intrinsics, breaking compilation.
|
||||||
|
// Therefore, we simply declare __rdtsc ourselves. See also
|
||||||
|
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
|
||||||
|
#if defined(COMPILER_MSVC) && !defined(_M_IX86)
|
||||||
|
extern "C" uint64_t __rdtsc();
|
||||||
|
#pragma intrinsic(__rdtsc)
|
||||||
|
#endif
|
||||||
|
#include <sys/time.h>
|
||||||
|
|
||||||
|
// NOTE: only i386 and x86_64 have been well tested.
|
||||||
|
// PPC, sparc, alpha, and ia64 are based on
|
||||||
|
// http://peter.kuscsik.com/wordpress/?p=14
|
||||||
|
// with modifications by m3b. See also
|
||||||
|
// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h
|
||||||
|
struct CycleClock {
|
||||||
|
// This should return the number of cycles since power-on. Thread-safe.
|
||||||
|
static inline int64_t Now() {
|
||||||
|
#if defined(OS_MACOSX)
|
||||||
|
// this goes at the top because we need ALL Macs, regardless of
|
||||||
|
// architecture, to return the number of "mach time units" that
|
||||||
|
// have passed since startup. See sysinfo.cc where
|
||||||
|
// InitializeSystemInfo() sets the supposed cpu clock frequency of
|
||||||
|
// macs to the number of mach time units per second, not actual
|
||||||
|
// CPU clock frequency (which can change in the face of CPU
|
||||||
|
// frequency scaling). Also note that when the Mac sleeps, this
|
||||||
|
// counter pauses; it does not continue counting, nor does it
|
||||||
|
// reset to zero.
|
||||||
|
return mach_absolute_time();
|
||||||
|
#elif defined(__i386__)
|
||||||
|
int64_t ret;
|
||||||
|
__asm__ volatile ("rdtsc" : "=A" (ret) );
|
||||||
|
return ret;
|
||||||
|
#elif defined(__x86_64__) || defined(__amd64__)
|
||||||
|
uint64_t low, high;
|
||||||
|
__asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
|
||||||
|
return (high << 32) | low;
|
||||||
|
#elif defined(__powerpc__) || defined(__ppc__)
|
||||||
|
// This returns a time-base, which is not always precisely a cycle-count.
|
||||||
|
int64_t tbl, tbu0, tbu1;
|
||||||
|
asm("mftbu %0" : "=r" (tbu0));
|
||||||
|
asm("mftb %0" : "=r" (tbl));
|
||||||
|
asm("mftbu %0" : "=r" (tbu1));
|
||||||
|
tbl &= -static_cast<int64>(tbu0 == tbu1);
|
||||||
|
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
|
||||||
|
return (tbu1 << 32) | tbl;
|
||||||
|
#elif defined(__sparc__)
|
||||||
|
int64_t tick;
|
||||||
|
asm(".byte 0x83, 0x41, 0x00, 0x00");
|
||||||
|
asm("mov %%g1, %0" : "=r" (tick));
|
||||||
|
return tick;
|
||||||
|
#elif defined(__ia64__)
|
||||||
|
int64_t itc;
|
||||||
|
asm("mov %0 = ar.itc" : "=r" (itc));
|
||||||
|
return itc;
|
||||||
|
#elif defined(COMPILER_MSVC) && defined(_M_IX86)
|
||||||
|
// Older MSVC compilers (like 7.x) don't seem to support the
|
||||||
|
// __rdtsc intrinsic properly, so I prefer to use _asm instead
|
||||||
|
// when I know it will work. Otherwise, I'll use __rdtsc and hope
|
||||||
|
// the code is being compiled with a non-ancient compiler.
|
||||||
|
_asm rdtsc
|
||||||
|
#elif defined(COMPILER_MSVC)
|
||||||
|
return __rdtsc();
|
||||||
|
#elif defined(ARMV3)
|
||||||
|
#if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount
|
||||||
|
uint32_t pmccntr;
|
||||||
|
uint32_t pmuseren;
|
||||||
|
uint32_t pmcntenset;
|
||||||
|
// Read the user mode perf monitor counter access permissions.
|
||||||
|
asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
|
||||||
|
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
|
||||||
|
asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
|
||||||
|
if (pmcntenset & 0x80000000ul) { // Is it counting?
|
||||||
|
asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
|
||||||
|
// The counter is set up to count every 64th cycle
|
||||||
|
return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||||
|
#elif defined(__mips__)
|
||||||
|
// mips apparently only allows rdtsc for superusers, so we fall
|
||||||
|
// back to gettimeofday. It's possible clock_gettime would be better.
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
|
||||||
|
#else
|
||||||
|
// The soft failover to a generic implementation is automatic only for ARM.
|
||||||
|
// For other platforms the developer is expected to make an attempt to create
|
||||||
|
// a fast implementation and use generic version if nothing better is available.
|
||||||
|
#error You need to define CycleTimer for your OS and CPU
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // BENCHMARK_CYCLECLOCK_H_
|
110
src/macros.h
Normal file
110
src/macros.h
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
#ifndef BENCHMARK_MACROS_H_
|
||||||
|
#define BENCHMARK_MACROS_H_
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||||
|
TypeName(const TypeName&); \
|
||||||
|
void operator=(const TypeName&);
|
||||||
|
|
||||||
|
// The arraysize(arr) macro returns the # of elements in an array arr.
|
||||||
|
// The expression is a compile-time constant, and therefore can be
|
||||||
|
// used in defining new arrays, for example. If you use arraysize on
|
||||||
|
// a pointer by mistake, you will get a compile-time error.
|
||||||
|
//
|
||||||
|
// One caveat is that, for C++03, arraysize() doesn't accept any array of
|
||||||
|
// an anonymous type or a type defined inside a function. In these rare
|
||||||
|
// cases, you have to use the unsafe ARRAYSIZE() macro below. This is
|
||||||
|
// due to a limitation in C++03's template system. The limitation has
|
||||||
|
// been removed in C++11.
|
||||||
|
|
||||||
|
// This template function declaration is used in defining arraysize.
|
||||||
|
// Note that the function doesn't need an implementation, as we only
|
||||||
|
// use its type.
|
||||||
|
template <typename T, size_t N>
|
||||||
|
char (&ArraySizeHelper(T (&array)[N]))[N];
|
||||||
|
|
||||||
|
// That gcc wants both of these prototypes seems mysterious. VC, for
|
||||||
|
// its part, can't decide which to use (another mystery). Matching of
|
||||||
|
// template overloads: the final frontier.
|
||||||
|
#ifndef COMPILER_MSVC
|
||||||
|
template <typename T, size_t N>
|
||||||
|
char (&ArraySizeHelper(const T (&array)[N]))[N];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
|
||||||
|
|
||||||
|
// The STATIC_ASSERT macro can be used to verify that a compile time
|
||||||
|
// expression is true. For example, you could use it to verify the
|
||||||
|
// size of a static array:
|
||||||
|
//
|
||||||
|
// STATIC_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,
|
||||||
|
// content_type_names_incorrect_size);
|
||||||
|
//
|
||||||
|
// or to make sure a struct is smaller than a certain size:
|
||||||
|
//
|
||||||
|
// STATIC_ASSERT(sizeof(foo) < 128, foo_too_large);
|
||||||
|
//
|
||||||
|
// The second argument to the macro is the name of the variable. If
|
||||||
|
// the expression is false, most compilers will issue a warning/error
|
||||||
|
// containing the name of the variable.
|
||||||
|
|
||||||
|
template <bool>
|
||||||
|
struct StaticAssert {
|
||||||
|
};
|
||||||
|
|
||||||
|
#define STATIC_ASSERT(expr, msg) \
|
||||||
|
typedef StaticAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
|
||||||
|
|
||||||
|
// Implementation details of STATIC_ASSERT:
|
||||||
|
//
|
||||||
|
// - STATIC_ASSERT works by defining an array type that has -1
|
||||||
|
// elements (and thus is invalid) when the expression is false.
|
||||||
|
//
|
||||||
|
// - The simpler definition
|
||||||
|
//
|
||||||
|
// #define STATIC_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1]
|
||||||
|
//
|
||||||
|
// does not work, as gcc supports variable-length arrays whose sizes
|
||||||
|
// are determined at run-time (this is gcc's extension and not part
|
||||||
|
// of the C++ standard). As a result, gcc fails to reject the
|
||||||
|
// following code with the simple definition:
|
||||||
|
//
|
||||||
|
// int foo;
|
||||||
|
// STATIC_ASSERT(foo, msg); // not supposed to compile as foo is
|
||||||
|
// // not a compile-time constant.
|
||||||
|
//
|
||||||
|
// - By using the type StaticAssert<(bool(expr))>, we ensures that
|
||||||
|
// expr is a compile-time constant. (Template arguments must be
|
||||||
|
// determined at compile-time.)
|
||||||
|
//
|
||||||
|
// - The outer parentheses in StaticAssert<(bool(expr))> are necessary
|
||||||
|
// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written
|
||||||
|
//
|
||||||
|
// StaticAssert<bool(expr)>
|
||||||
|
//
|
||||||
|
// instead, these compilers will refuse to compile
|
||||||
|
//
|
||||||
|
// STATIC_ASSERT(5 > 0, some_message);
|
||||||
|
//
|
||||||
|
// (They seem to think the ">" in "5 > 0" marks the end of the
|
||||||
|
// template argument list.)
|
||||||
|
//
|
||||||
|
// - The array size is (bool(expr) ? 1 : -1), instead of simply
|
||||||
|
//
|
||||||
|
// ((expr) ? 1 : -1).
|
||||||
|
//
|
||||||
|
// This is to avoid running into a bug in MS VC 7.1, which
|
||||||
|
// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1.
|
||||||
|
|
||||||
|
#define CHECK(b) do { if (!(b)) assert(false); } while(0)
|
||||||
|
#define CHECK_EQ(a, b) CHECK((a) == (b))
|
||||||
|
#define CHECK_GE(a, b) CHECK((a) >= (b))
|
||||||
|
#define CHECK_LE(a, b) CHECK((a) <= (b))
|
||||||
|
#define CHECK_GT(a, b) CHECK((a) > (b))
|
||||||
|
#define CHECK_LT(a, b) CHECK((a) < (b))
|
||||||
|
|
||||||
|
|
||||||
|
#define ATTRIBUTE_UNUSED __attribute__ ((unused))
|
||||||
|
|
||||||
|
#endif // BENCHMARK_MACROS_H_
|
20
src/mutex_lock.h
Normal file
20
src/mutex_lock.h
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#ifndef BENCHMARK_MUTEX_LOCK_H_
|
||||||
|
#define BENCHMARK_MUTEX_LOCK_H_
|
||||||
|
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
class mutex_lock {
|
||||||
|
public:
|
||||||
|
explicit mutex_lock(pthread_mutex_t* mu) : mu_(mu) {
|
||||||
|
pthread_mutex_lock(mu_);
|
||||||
|
}
|
||||||
|
|
||||||
|
~mutex_lock() {
|
||||||
|
pthread_mutex_unlock(mu_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
pthread_mutex_t* mu_;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // BENCHMARK_MUTEX_LOCK_H_
|
8
src/port.h
Normal file
8
src/port.h
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#ifndef BENCHMARK_PORT_H_
|
||||||
|
#define BENCHMARK_PORT_H_
|
||||||
|
|
||||||
|
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||||
|
TypeName(const TypeName&); \
|
||||||
|
void operator=(const TypeName&);
|
||||||
|
|
||||||
|
#endif // BENCHMARK_PORT_H_
|
42
src/sleep.cc
Normal file
42
src/sleep.cc
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#include "sleep.h"
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
#ifdef OS_WINDOWS
|
||||||
|
|
||||||
|
// Window's _sleep takes milliseconds argument.
|
||||||
|
void SleepForMilliseconds(int milliseconds) {
|
||||||
|
_sleep(milliseconds);
|
||||||
|
}
|
||||||
|
void SleepForSeconds(double seconds) {
|
||||||
|
SleepForMilliseconds(static_cast<int>(seconds * 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // OS_WINDOWS
|
||||||
|
|
||||||
|
static const int64_t kNumMillisPerSecond = 1000LL;
|
||||||
|
static const int64_t kNumMicrosPerMilli = 1000LL;
|
||||||
|
static const int64_t kNumMicrosPerSecond = kNumMillisPerSecond * 1000LL;
|
||||||
|
static const int64_t kNumNanosPerMicro = 1000LL;
|
||||||
|
|
||||||
|
void SleepForMicroseconds(int64_t microseconds) {
|
||||||
|
struct timespec sleep_time;
|
||||||
|
sleep_time.tv_sec = microseconds / kNumMicrosPerSecond;
|
||||||
|
sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro;
|
||||||
|
while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR)
|
||||||
|
; // Ignore signals and wait for the full interval to elapse.
|
||||||
|
}
|
||||||
|
|
||||||
|
void SleepForMilliseconds(int milliseconds) {
|
||||||
|
SleepForMicroseconds(static_cast<int64_t>(milliseconds) * kNumMicrosPerMilli);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SleepForSeconds(double seconds) {
|
||||||
|
SleepForMicroseconds(static_cast<int64_t>(seconds * kNumMicrosPerSecond));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // OS_WINDOWS
|
||||||
|
|
||||||
|
|
||||||
|
|
10
src/sleep.h
Normal file
10
src/sleep.h
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#ifndef BENCHMARK_SLEEP_H_
|
||||||
|
#define BENCHMARK_SLEEP_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
void SleepForMicroseconds(int64_t microseconds);
|
||||||
|
void SleepForMilliseconds(int milliseconds);
|
||||||
|
void SleepForSeconds(double seconds);
|
||||||
|
|
||||||
|
#endif // BENCHMARK_SLEEP_H_
|
306
src/stat.h
Normal file
306
src/stat.h
Normal file
@ -0,0 +1,306 @@
|
|||||||
|
#ifndef BENCHMARK_STAT_H_
|
||||||
|
#define BENCHMARK_STAT_H_
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
template <typename VType, typename NumType>
|
||||||
|
class Stat1;
|
||||||
|
|
||||||
|
template <typename VType, typename NumType>
|
||||||
|
class Stat1MinMax;
|
||||||
|
|
||||||
|
typedef Stat1<float, float> Stat1_f;
|
||||||
|
typedef Stat1<double, double> Stat1_d;
|
||||||
|
typedef Stat1MinMax<float, float> Stat1MinMax_f;
|
||||||
|
typedef Stat1MinMax<double, double> Stat1MinMax_d;
|
||||||
|
|
||||||
|
template <typename VType> class Vector2;
|
||||||
|
template <typename VType> class Vector3;
|
||||||
|
template <typename VType> class Vector4;
|
||||||
|
|
||||||
|
template <typename VType, typename NumType>
|
||||||
|
class Stat1 {
|
||||||
|
public:
|
||||||
|
typedef Stat1<VType, NumType> Self;
|
||||||
|
|
||||||
|
Stat1() {
|
||||||
|
Clear();
|
||||||
|
}
|
||||||
|
void Clear() {
|
||||||
|
numsamples_ = NumType();
|
||||||
|
sum_squares_ = sum_ = VType();
|
||||||
|
}
|
||||||
|
// Create a sample of value dat and weight 1
|
||||||
|
explicit Stat1(const VType &dat) {
|
||||||
|
sum_ = dat;
|
||||||
|
sum_squares_ = Sqr(dat);
|
||||||
|
numsamples_ = 1;
|
||||||
|
}
|
||||||
|
// Create statistics for all the samples between begin (included)
|
||||||
|
// and end(excluded)
|
||||||
|
explicit Stat1(const VType *begin, const VType *end) {
|
||||||
|
Clear();
|
||||||
|
for ( const VType *item = begin; item < end; ++item ) {
|
||||||
|
(*this) += Stat1(*item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create a sample of value dat and weight w
|
||||||
|
Stat1(const VType &dat, const NumType &w) {
|
||||||
|
sum_ = w * dat;
|
||||||
|
sum_squares_ = w * Sqr(dat);
|
||||||
|
numsamples_ = w;
|
||||||
|
}
|
||||||
|
// Copy operator
|
||||||
|
Stat1(const Self &stat) {
|
||||||
|
sum_ = stat.sum_;
|
||||||
|
sum_squares_ = stat.sum_squares_;
|
||||||
|
numsamples_ = stat.numsamples_;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Self &operator =(const Self &stat) {
|
||||||
|
sum_ = stat.sum_;
|
||||||
|
sum_squares_ = stat.sum_squares_;
|
||||||
|
numsamples_ = stat.numsamples_;
|
||||||
|
return (*this);
|
||||||
|
}
|
||||||
|
// Merge statistics from two sample sets.
|
||||||
|
inline Self &operator +=(const Self &stat) {
|
||||||
|
sum_ += stat.sum_;
|
||||||
|
sum_squares_+= stat.sum_squares_;
|
||||||
|
numsamples_ += stat.numsamples_;
|
||||||
|
return (*this);
|
||||||
|
}
|
||||||
|
// The operation opposite to +=
|
||||||
|
inline Self &operator -=(const Self &stat) {
|
||||||
|
sum_ -= stat.sum_;
|
||||||
|
sum_squares_-= stat.sum_squares_;
|
||||||
|
numsamples_ -= stat.numsamples_;
|
||||||
|
return (*this);
|
||||||
|
}
|
||||||
|
// Multiply the weight of the set of samples by a factor k
|
||||||
|
inline Self &operator *=(const VType &k) {
|
||||||
|
sum_ *= k;
|
||||||
|
sum_squares_*= k;
|
||||||
|
numsamples_ *= k;
|
||||||
|
return (*this);
|
||||||
|
}
|
||||||
|
// Merge statistics from two sample sets.
|
||||||
|
inline Self operator + (const Self &stat) const {
|
||||||
|
return Self(*this) += stat;
|
||||||
|
}
|
||||||
|
// The operation opposite to +
|
||||||
|
inline Self operator - (const Self &stat) const {
|
||||||
|
return Self(*this) -= stat;
|
||||||
|
}
|
||||||
|
// Multiply the weight of the set of samples by a factor k
|
||||||
|
inline Self operator * (const VType &k) const {
|
||||||
|
return Self(*this) *= k;
|
||||||
|
}
|
||||||
|
// Return the total weight of this sample set
|
||||||
|
NumType NumSamples() const {
|
||||||
|
return numsamples_;
|
||||||
|
}
|
||||||
|
// Return the sum of this sample set
|
||||||
|
VType Sum() const {
|
||||||
|
return sum_;
|
||||||
|
}
|
||||||
|
// Return the mean of this sample set
|
||||||
|
VType Mean() const {
|
||||||
|
if (numsamples_ == 0) return VType();
|
||||||
|
return sum_ * (1.0 / numsamples_);
|
||||||
|
}
|
||||||
|
// Return the mean of this sample set and compute the standard deviation at
|
||||||
|
// the same time.
|
||||||
|
VType Mean(VType *stddev) const {
|
||||||
|
if (numsamples_ == 0) return VType();
|
||||||
|
VType mean = sum_ * (1.0 / numsamples_);
|
||||||
|
if (stddev) {
|
||||||
|
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
|
||||||
|
*stddev = Sqrt(avg_squares - Sqr(mean));
|
||||||
|
}
|
||||||
|
return mean;
|
||||||
|
}
|
||||||
|
// Return the standard deviation of the sample set
|
||||||
|
VType StdDev() const {
|
||||||
|
if (numsamples_ == 0) return VType();
|
||||||
|
VType mean = Mean();
|
||||||
|
VType avg_squares = sum_squares_ * (1.0 / numsamples_);
|
||||||
|
return Sqrt(avg_squares - Sqr(mean));
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
// Let i be the index of the samples provided (using +=)
|
||||||
|
// and weight[i],value[i] be the data of sample #i
|
||||||
|
// then the variables have the following meaning:
|
||||||
|
NumType numsamples_; // sum of weight[i];
|
||||||
|
VType sum_; // sum of weight[i]*value[i];
|
||||||
|
VType sum_squares_; // sum of weight[i]*value[i]^2;
|
||||||
|
|
||||||
|
// Template function used to square a number.
|
||||||
|
// For a vector we square all components
|
||||||
|
template <typename SType>
|
||||||
|
static inline SType Sqr(const SType &dat) {
|
||||||
|
return dat * dat;
|
||||||
|
}
|
||||||
|
template <typename SType>
|
||||||
|
static inline Vector2<SType> Sqr(const Vector2<SType> &dat) {
|
||||||
|
return dat.MulComponents(dat);
|
||||||
|
}
|
||||||
|
template <typename SType>
|
||||||
|
static inline Vector3<SType> Sqr(const Vector3<SType> &dat) {
|
||||||
|
return dat.MulComponents(dat);
|
||||||
|
}
|
||||||
|
template <typename SType>
|
||||||
|
static inline Vector4<SType> Sqr(const Vector4<SType> &dat) {
|
||||||
|
return dat.MulComponents(dat);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Template function used to take the square root of a number.
|
||||||
|
// For a vector we square all components
|
||||||
|
template <typename SType>
|
||||||
|
static inline SType Sqrt(const SType &dat) {
|
||||||
|
// Avoid NaN due to imprecision in the calculations
|
||||||
|
if ( dat < 0 )
|
||||||
|
return 0;
|
||||||
|
return sqrt(dat);
|
||||||
|
}
|
||||||
|
template <typename SType>
|
||||||
|
static inline Vector2<SType> Sqrt(const Vector2<SType> &dat) {
|
||||||
|
// Avoid NaN due to imprecision in the calculations
|
||||||
|
return Max(dat, Vector2<SType>()).Sqrt();
|
||||||
|
}
|
||||||
|
template <typename SType>
|
||||||
|
static inline Vector3<SType> Sqrt(const Vector3<SType> &dat) {
|
||||||
|
// Avoid NaN due to imprecision in the calculations
|
||||||
|
return Max(dat, Vector3<SType>()).Sqrt();
|
||||||
|
}
|
||||||
|
template <typename SType>
|
||||||
|
static inline Vector4<SType> Sqrt(const Vector4<SType> &dat) {
|
||||||
|
// Avoid NaN due to imprecision in the calculations
|
||||||
|
return Max(dat, Vector4<SType>()).Sqrt();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Useful printing function
|
||||||
|
template <typename VType, typename NumType>
|
||||||
|
inline std::ostream& operator<<(std::ostream& out,
|
||||||
|
const Stat1<VType, NumType>& s) {
|
||||||
|
out << "{ avg = " << s.Mean()
|
||||||
|
<< " std = " << s.StdDev()
|
||||||
|
<< " nsamples = " << s.NumSamples() << "}";
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Stat1MinMax: same as Stat1, but it also
|
||||||
|
// keeps the Min and Max values; the "-"
|
||||||
|
// operator is disabled because it cannot be implemented
|
||||||
|
// efficiently
|
||||||
|
template <typename VType, typename NumType>
|
||||||
|
class Stat1MinMax : public Stat1<VType, NumType> {
|
||||||
|
public:
|
||||||
|
typedef Stat1MinMax<VType, NumType> Self;
|
||||||
|
|
||||||
|
Stat1MinMax() {
|
||||||
|
Clear();
|
||||||
|
}
|
||||||
|
void Clear() {
|
||||||
|
Stat1<VType, NumType>::Clear();
|
||||||
|
if (std::numeric_limits<VType>::has_infinity) {
|
||||||
|
min_ = std::numeric_limits<VType>::infinity();
|
||||||
|
max_ = -std::numeric_limits<VType>::infinity();
|
||||||
|
} else {
|
||||||
|
min_ = std::numeric_limits<VType>::max();
|
||||||
|
max_ = std::numeric_limits<VType>::min();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create a sample of value dat and weight 1
|
||||||
|
explicit Stat1MinMax(const VType &dat) : Stat1<VType, NumType>(dat) {
|
||||||
|
max_ = dat;
|
||||||
|
min_ = dat;
|
||||||
|
}
|
||||||
|
// Create statistics for all the samples between begin (included)
|
||||||
|
// and end(excluded)
|
||||||
|
explicit Stat1MinMax(const VType *begin, const VType *end) {
|
||||||
|
Clear();
|
||||||
|
for ( const VType *item = begin; item < end; ++item ) {
|
||||||
|
(*this) += Stat1MinMax(*item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create a sample of value dat and weight w
|
||||||
|
Stat1MinMax(const VType &dat, const NumType &w)
|
||||||
|
: Stat1<VType, NumType>(dat, w) {
|
||||||
|
max_ = dat;
|
||||||
|
min_ = dat;
|
||||||
|
}
|
||||||
|
// Copy operator
|
||||||
|
Stat1MinMax(const Self &stat) : Stat1<VType, NumType>(stat) {
|
||||||
|
max_ = stat.max_;
|
||||||
|
min_ = stat.min_;
|
||||||
|
}
|
||||||
|
inline Self &operator =(const Self &stat) {
|
||||||
|
this->Stat1<VType, NumType>::operator=(stat);
|
||||||
|
max_ = stat.max_;
|
||||||
|
min_ = stat.min_;
|
||||||
|
return (*this);
|
||||||
|
}
|
||||||
|
// Merge statistics from two sample sets.
|
||||||
|
inline Self &operator +=(const Self &stat) {
|
||||||
|
this->Stat1<VType, NumType>::operator+=(stat);
|
||||||
|
if (stat.max_ > max_) max_ = stat.max_;
|
||||||
|
if (stat.min_ < min_) min_ = stat.min_;
|
||||||
|
return (*this);
|
||||||
|
}
|
||||||
|
// Multiply the weight of the set of samples by a factor k
|
||||||
|
inline Self &operator *=(const VType &stat) {
|
||||||
|
this->Stat1<VType, NumType>::operator*=(stat);
|
||||||
|
return (*this);
|
||||||
|
}
|
||||||
|
// Merge statistics from two sample sets.
|
||||||
|
inline Self operator + (const Self &stat) const {
|
||||||
|
return Self(*this) += stat;
|
||||||
|
}
|
||||||
|
// Multiply the weight of the set of samples by a factor k
|
||||||
|
inline Self operator * (const VType &k) const {
|
||||||
|
return Self(*this) *= k;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
// The - operation makes no sense with Min/Max
|
||||||
|
// unless we keep the full list of values (but we don't)
|
||||||
|
// make it private, and let it undefined so nobody can call it
|
||||||
|
Self &operator -=(const Self &stat); // senseless. let it undefined.
|
||||||
|
|
||||||
|
// The operation opposite to -
|
||||||
|
Self operator - (const Self &stat) const; // senseless. let it undefined.
|
||||||
|
|
||||||
|
public:
|
||||||
|
// Return the maximal value in this sample set
|
||||||
|
VType Max() const {
|
||||||
|
return max_;
|
||||||
|
}
|
||||||
|
// Return the minimal value in this sample set
|
||||||
|
VType Min() const {
|
||||||
|
return min_;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
// Let i be the index of the samples provided (using +=)
|
||||||
|
// and weight[i],value[i] be the data of sample #i
|
||||||
|
// then the variables have the following meaning:
|
||||||
|
VType max_; // max of value[i]
|
||||||
|
VType min_; // min of value[i]
|
||||||
|
};
|
||||||
|
|
||||||
|
// Useful printing function
|
||||||
|
template <typename VType, typename NumType>
|
||||||
|
inline std::ostream& operator <<(std::ostream& out,
|
||||||
|
const Stat1MinMax<VType, NumType>& s) {
|
||||||
|
out << "{ avg = " << s.Mean()
|
||||||
|
<< " std = " << s.StdDev()
|
||||||
|
<< " nsamples = " << s.NumSamples()
|
||||||
|
<< " min = " << s.Min()
|
||||||
|
<< " max = " << s.Max() << "}";
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // BENCHMARK_STAT_H_
|
337
src/sysinfo.cc
Normal file
337
src/sysinfo.cc
Normal file
@ -0,0 +1,337 @@
|
|||||||
|
#include "sysinfo.h"
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#include "cycleclock.h"
|
||||||
|
#include "macros.h"
|
||||||
|
#include "mutex_lock.h"
|
||||||
|
#include "sleep.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
pthread_once_t cpuinfo_init = PTHREAD_ONCE_INIT;
|
||||||
|
double cpuinfo_cycles_per_second = 1.0;
|
||||||
|
int cpuinfo_num_cpus = 1; // Conservative guess
|
||||||
|
static pthread_mutex_t cputimens_mutex;
|
||||||
|
|
||||||
|
// Helper function estimates cycles/sec by observing cycles elapsed during
|
||||||
|
// sleep(). Using small sleep time decreases accuracy significantly.
|
||||||
|
int64_t EstimateCyclesPerSecond(const int estimate_time_ms) {
|
||||||
|
CHECK(estimate_time_ms > 0);
|
||||||
|
double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much
|
||||||
|
|
||||||
|
const int64_t start_ticks = CycleClock::Now();
|
||||||
|
SleepForMilliseconds(estimate_time_ms);
|
||||||
|
const int64_t guess = int64_t(multiplier * (CycleClock::Now() - start_ticks));
|
||||||
|
return guess;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function for reading an int from a file. Returns true if successful
|
||||||
|
// and the memory location pointed to by value is set to the value read.
|
||||||
|
bool ReadIntFromFile(const char *file, int *value) {
|
||||||
|
bool ret = false;
|
||||||
|
int fd = open(file, O_RDONLY);
|
||||||
|
if (fd != -1) {
|
||||||
|
char line[1024];
|
||||||
|
char* err;
|
||||||
|
memset(line, '\0', sizeof(line));
|
||||||
|
CHECK(read(fd, line, sizeof(line) - 1));
|
||||||
|
const int temp_value = strtol(line, &err, 10);
|
||||||
|
if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
|
||||||
|
*value = temp_value;
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void InitializeSystemInfo() {
|
||||||
|
bool saw_mhz = false;
|
||||||
|
|
||||||
|
// TODO: destroy this
|
||||||
|
pthread_mutex_init(&cputimens_mutex, NULL);
|
||||||
|
|
||||||
|
#if defined OS_LINUX || defined OS_CYGWIN
|
||||||
|
char line[1024];
|
||||||
|
char* err;
|
||||||
|
int freq;
|
||||||
|
|
||||||
|
// If the kernel is exporting the tsc frequency use that. There are issues
|
||||||
|
// where cpuinfo_max_freq cannot be relied on because the BIOS may be
|
||||||
|
// exporintg an invalid p-state (on x86) or p-states may be used to put the
|
||||||
|
// processor in a new mode (turbo mode). Essentially, those frequencies
|
||||||
|
// cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
|
||||||
|
// well.
|
||||||
|
if (!saw_mhz &&
|
||||||
|
ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
|
||||||
|
// The value is in kHz (as the file name suggests). For example, on a
|
||||||
|
// 2GHz warpstation, the file contains the value "2000000".
|
||||||
|
cpuinfo_cycles_per_second = freq * 1000.0;
|
||||||
|
saw_mhz = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If CPU scaling is in effect, we want to use the *maximum* frequency,
|
||||||
|
// not whatever CPU speed some random processor happens to be using now.
|
||||||
|
if (!saw_mhz &&
|
||||||
|
ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
|
||||||
|
&freq)) {
|
||||||
|
// The value is in kHz. For example, on a 2GHz warpstation, the file
|
||||||
|
// contains the value "2000000".
|
||||||
|
cpuinfo_cycles_per_second = freq * 1000.0;
|
||||||
|
saw_mhz = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
|
||||||
|
const char* pname = "/proc/cpuinfo";
|
||||||
|
int fd = open(pname, O_RDONLY);
|
||||||
|
if (fd == -1) {
|
||||||
|
perror(pname);
|
||||||
|
if (!saw_mhz) {
|
||||||
|
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||||
|
}
|
||||||
|
return; // TODO: use generic tester instead?
|
||||||
|
}
|
||||||
|
|
||||||
|
double bogo_clock = 1.0;
|
||||||
|
bool saw_bogo = false;
|
||||||
|
int max_cpu_id = 0;
|
||||||
|
int num_cpus = 0;
|
||||||
|
line[0] = line[1] = '\0';
|
||||||
|
int chars_read = 0;
|
||||||
|
do { // we'll exit when the last read didn't read anything
|
||||||
|
// Move the next line to the beginning of the buffer
|
||||||
|
const int oldlinelen = strlen(line);
|
||||||
|
if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line
|
||||||
|
line[0] = '\0';
|
||||||
|
else // still other lines left to save
|
||||||
|
memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1));
|
||||||
|
// Terminate the new line, reading more if we can't find the newline
|
||||||
|
char* newline = strchr(line, '\n');
|
||||||
|
if (newline == NULL) {
|
||||||
|
const int linelen = strlen(line);
|
||||||
|
const int bytes_to_read = sizeof(line)-1 - linelen;
|
||||||
|
CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes
|
||||||
|
chars_read = read(fd, line + linelen, bytes_to_read);
|
||||||
|
line[linelen + chars_read] = '\0';
|
||||||
|
newline = strchr(line, '\n');
|
||||||
|
}
|
||||||
|
if (newline != NULL)
|
||||||
|
*newline = '\0';
|
||||||
|
|
||||||
|
// When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
|
||||||
|
// accept postive values. Some environments (virtual machines) report zero,
|
||||||
|
// which would cause infinite looping in WallTime_Init.
|
||||||
|
if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) {
|
||||||
|
const char* freqstr = strchr(line, ':');
|
||||||
|
if (freqstr) {
|
||||||
|
cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
|
||||||
|
if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
|
||||||
|
saw_mhz = true;
|
||||||
|
}
|
||||||
|
} else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) {
|
||||||
|
const char* freqstr = strchr(line, ':');
|
||||||
|
if (freqstr) {
|
||||||
|
bogo_clock = strtod(freqstr+1, &err) * 1000000.0;
|
||||||
|
if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
|
||||||
|
saw_bogo = true;
|
||||||
|
}
|
||||||
|
} else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
|
||||||
|
num_cpus++; // count up every time we see an "processor :" entry
|
||||||
|
const char* freqstr = strchr(line, ':');
|
||||||
|
if (freqstr) {
|
||||||
|
const int cpu_id = strtol(freqstr+1, &err, 10);
|
||||||
|
if (freqstr[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
|
||||||
|
max_cpu_id = cpu_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (chars_read > 0);
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
if (!saw_mhz) {
|
||||||
|
if (saw_bogo) {
|
||||||
|
// If we didn't find anything better, we'll use bogomips, but
|
||||||
|
// we're not happy about it.
|
||||||
|
cpuinfo_cycles_per_second = bogo_clock;
|
||||||
|
} else {
|
||||||
|
// If we don't even have bogomips, we'll use the slow estimation.
|
||||||
|
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (num_cpus == 0) {
|
||||||
|
fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
|
||||||
|
} else {
|
||||||
|
if ((max_cpu_id + 1) != num_cpus) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"CPU ID assignments in /proc/cpuinfo seems messed up."
|
||||||
|
" This is usually caused by a bad BIOS.\n");
|
||||||
|
}
|
||||||
|
cpuinfo_num_cpus = num_cpus;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined OS_FREEBSD
|
||||||
|
// For this sysctl to work, the machine must be configured without
|
||||||
|
// SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0
|
||||||
|
// and later. Before that, it's a 32-bit quantity (and gives the
|
||||||
|
// wrong answer on machines faster than 2^32 Hz). See
|
||||||
|
// http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
|
||||||
|
// But also compare FreeBSD 7.0:
|
||||||
|
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
|
||||||
|
// 231 error = sysctl_handle_quad(oidp, &freq, 0, req);
|
||||||
|
// To FreeBSD 6.3 (it's the same in 6-STABLE):
|
||||||
|
// http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
|
||||||
|
// 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
|
||||||
|
#if __FreeBSD__ >= 7
|
||||||
|
uint64_t hz = 0;
|
||||||
|
#else
|
||||||
|
unsigned int hz = 0;
|
||||||
|
#endif
|
||||||
|
size_t sz = sizeof(hz);
|
||||||
|
const char *sysctl_path = "machdep.tsc_freq";
|
||||||
|
if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) {
|
||||||
|
fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
|
||||||
|
sysctl_path, strerror(errno));
|
||||||
|
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||||
|
} else {
|
||||||
|
cpuinfo_cycles_per_second = hz;
|
||||||
|
}
|
||||||
|
// TODO: also figure out cpuinfo_num_cpus
|
||||||
|
|
||||||
|
#elif defined OS_WINDOWS
|
||||||
|
# pragma comment(lib, "shlwapi.lib") // for SHGetValue()
|
||||||
|
// In NT, read MHz from the registry. If we fail to do so or we're in win9x
|
||||||
|
// then make a crude estimate.
|
||||||
|
OSVERSIONINFO os;
|
||||||
|
os.dwOSVersionInfoSize = sizeof(os);
|
||||||
|
DWORD data, data_size = sizeof(data);
|
||||||
|
if (GetVersionEx(&os) &&
|
||||||
|
os.dwPlatformId == VER_PLATFORM_WIN32_NT &&
|
||||||
|
SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE,
|
||||||
|
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
|
||||||
|
"~MHz", NULL, &data, &data_size)))
|
||||||
|
cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz
|
||||||
|
else
|
||||||
|
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500?
|
||||||
|
// TODO: also figure out cpuinfo_num_cpus
|
||||||
|
|
||||||
|
#elif defined OS_MACOSX
|
||||||
|
// returning "mach time units" per second. the current number of elapsed
|
||||||
|
// mach time units can be found by calling uint64 mach_absolute_time();
|
||||||
|
// while not as precise as actual CPU cycles, it is accurate in the face
|
||||||
|
// of CPU frequency scaling and multi-cpu/core machines.
|
||||||
|
// Our mac users have these types of machines, and accuracy
|
||||||
|
// (i.e. correctness) trumps precision.
|
||||||
|
// See cycleclock.h: CycleClock::Now(), which returns number of mach time
|
||||||
|
// units on Mac OS X.
|
||||||
|
mach_timebase_info_data_t timebase_info;
|
||||||
|
mach_timebase_info(&timebase_info);
|
||||||
|
double mach_time_units_per_nanosecond =
|
||||||
|
static_cast<double>(timebase_info.denom) /
|
||||||
|
static_cast<double>(timebase_info.numer);
|
||||||
|
cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
|
||||||
|
|
||||||
|
int num_cpus = 0;
|
||||||
|
size_t size = sizeof(num_cpus);
|
||||||
|
int numcpus_name[] = { CTL_HW, HW_NCPU };
|
||||||
|
if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0)
|
||||||
|
== 0
|
||||||
|
&& (size == sizeof(num_cpus)))
|
||||||
|
cpuinfo_num_cpus = num_cpus;
|
||||||
|
|
||||||
|
#else
|
||||||
|
// Generic cycles per second counter
|
||||||
|
cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
#ifndef OS_WINDOWS
|
||||||
|
// getrusage() based implementation of MyCPUUsage
|
||||||
|
static double MyCPUUsageRUsage() {
|
||||||
|
struct rusage ru;
|
||||||
|
if (getrusage(RUSAGE_SELF, &ru) == 0) {
|
||||||
|
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||||
|
static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
|
||||||
|
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||||
|
static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
|
||||||
|
} else {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool MyCPUUsageCPUTimeNsLocked(double *cputime) {
|
||||||
|
static int cputime_fd = -1;
|
||||||
|
if (cputime_fd == -1) {
|
||||||
|
cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
|
||||||
|
if (cputime_fd < 0) {
|
||||||
|
cputime_fd = -1;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
char buff[64];
|
||||||
|
memset(buff, 0, sizeof(buff));
|
||||||
|
if (pread(cputime_fd, buff, sizeof(buff)-1, 0) <= 0) {
|
||||||
|
close(cputime_fd);
|
||||||
|
cputime_fd = -1;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
unsigned long long result = strtoull(buff, NULL, 0);
|
||||||
|
if (result == (std::numeric_limits<unsigned long long>::max)()) {
|
||||||
|
close(cputime_fd);
|
||||||
|
cputime_fd = -1;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*cputime = static_cast<double>(result) / 1e9;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
double MyCPUUsage() {
|
||||||
|
{
|
||||||
|
mutex_lock l(&cputimens_mutex);
|
||||||
|
static bool use_cputime_ns = true;
|
||||||
|
if (use_cputime_ns) {
|
||||||
|
double value;
|
||||||
|
if (MyCPUUsageCPUTimeNsLocked(&value)) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
// Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
|
||||||
|
std::cout << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
|
||||||
|
use_cputime_ns = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return MyCPUUsageRUsage();
|
||||||
|
}
|
||||||
|
|
||||||
|
double ChildrenCPUUsage() {
|
||||||
|
struct rusage ru;
|
||||||
|
if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
|
||||||
|
return (static_cast<double>(ru.ru_utime.tv_sec) +
|
||||||
|
static_cast<double>(ru.ru_utime.tv_usec)*1e-6 +
|
||||||
|
static_cast<double>(ru.ru_stime.tv_sec) +
|
||||||
|
static_cast<double>(ru.ru_stime.tv_usec)*1e-6);
|
||||||
|
} else {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // OS_WINDOWS
|
||||||
|
|
||||||
|
double CyclesPerSecond(void) {
|
||||||
|
pthread_once(&cpuinfo_init, &InitializeSystemInfo);
|
||||||
|
return cpuinfo_cycles_per_second;
|
||||||
|
}
|
||||||
|
|
||||||
|
int NumCPUs(void) {
|
||||||
|
pthread_once(&cpuinfo_init, &InitializeSystemInfo);
|
||||||
|
return cpuinfo_num_cpus;
|
||||||
|
}
|
||||||
|
|
9
src/sysinfo.h
Normal file
9
src/sysinfo.h
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#ifndef BENCHMARK_SYSINFO_H_
|
||||||
|
#define BENCHMARK_SYSINFO_H_
|
||||||
|
|
||||||
|
double MyCPUUsage();
|
||||||
|
double ChildrenCPUUsage();
|
||||||
|
int NumCPUs();
|
||||||
|
double CyclesPerSecond();
|
||||||
|
|
||||||
|
#endif // BENCHMARK_SYSINFO_H_
|
137
src/walltime.cc
Normal file
137
src/walltime.cc
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
#include "walltime.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
#include "cycleclock.h"
|
||||||
|
#include "macros.h"
|
||||||
|
#include "sysinfo.h"
|
||||||
|
|
||||||
|
namespace walltime {
|
||||||
|
namespace {
|
||||||
|
const double kMaxErrorInterval = 100e-6;
|
||||||
|
|
||||||
|
std::atomic<bool> initialized(false);
|
||||||
|
WallTime base_walltime = 0.0;
|
||||||
|
int64_t base_cycletime = 0;
|
||||||
|
int64_t cycles_per_second;
|
||||||
|
double seconds_per_cycle;
|
||||||
|
uint32_t last_adjust_time = 0;
|
||||||
|
std::atomic<int32_t> drift_adjust(0);
|
||||||
|
int64_t max_interval_cycles = 0;
|
||||||
|
|
||||||
|
// Helper routines to load/store a float from an AtomicWord. Required because
|
||||||
|
// g++ < 4.7 doesn't support std::atomic<float> correctly. I cannot wait to get
|
||||||
|
// rid of this horror show.
|
||||||
|
inline void SetDrift(float f) {
|
||||||
|
int32_t w;
|
||||||
|
memcpy(&w, &f, sizeof(f));
|
||||||
|
std::atomic_store(&drift_adjust, w);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float GetDrift() {
|
||||||
|
float f;
|
||||||
|
int32_t w = std::atomic_load(&drift_adjust);
|
||||||
|
memcpy(&f, &w, sizeof(f));
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
static_assert(sizeof(float) <= sizeof(int32_t),
|
||||||
|
"type sizes don't allow the drift_adjust hack");
|
||||||
|
|
||||||
|
WallTime Slow() {
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
return tv.tv_sec + tv.tv_usec * 1e-6;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SplitTimezone(WallTime value, bool local, struct tm* t,
|
||||||
|
double* subsecond) {
|
||||||
|
memset(t, 0, sizeof(*t));
|
||||||
|
if ((value < 0) || (value > std::numeric_limits<time_t>::max())) {
|
||||||
|
*subsecond = 0.0;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const time_t whole_time = static_cast<time_t>(value);
|
||||||
|
*subsecond = value - whole_time;
|
||||||
|
if (local)
|
||||||
|
localtime_r(&whole_time, t);
|
||||||
|
else
|
||||||
|
gmtime_r(&whole_time, t);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
// This routine should be invoked to initialize walltime.
|
||||||
|
// It is not intended for general purpose use.
|
||||||
|
void Initialize() {
|
||||||
|
CHECK(!std::atomic_load(&initialized));
|
||||||
|
cycles_per_second = static_cast<int64_t>(CyclesPerSecond());
|
||||||
|
CHECK(cycles_per_second != 0);
|
||||||
|
seconds_per_cycle = 1.0 / cycles_per_second;
|
||||||
|
max_interval_cycles = static_cast<int64_t>(
|
||||||
|
cycles_per_second * kMaxErrorInterval);
|
||||||
|
do {
|
||||||
|
base_cycletime = CycleClock::Now();
|
||||||
|
base_walltime = Slow();
|
||||||
|
} while (CycleClock::Now() - base_cycletime > max_interval_cycles);
|
||||||
|
// We are now sure that "base_walltime" and "base_cycletime" were produced
|
||||||
|
// within kMaxErrorInterval of one another.
|
||||||
|
|
||||||
|
SetDrift(0.0);
|
||||||
|
last_adjust_time = static_cast<uint32_t>(uint64_t(base_cycletime) >> 32);
|
||||||
|
std::atomic_store(&initialized, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
WallTime Now() {
|
||||||
|
if (!std::atomic_load(&initialized))
|
||||||
|
return Slow();
|
||||||
|
|
||||||
|
WallTime now = 0.0;
|
||||||
|
WallTime result = 0.0;
|
||||||
|
int64_t ct = 0;
|
||||||
|
uint32_t top_bits = 0;
|
||||||
|
do {
|
||||||
|
ct = CycleClock::Now();
|
||||||
|
int64_t cycle_delta = ct - base_cycletime;
|
||||||
|
result = base_walltime + cycle_delta * seconds_per_cycle;
|
||||||
|
|
||||||
|
top_bits = static_cast<uint32_t>(uint64_t(ct) >> 32);
|
||||||
|
// Recompute drift no more often than every 2^32 cycles.
|
||||||
|
// I.e., @2GHz, ~ every two seconds
|
||||||
|
if (top_bits == last_adjust_time) { // don't need to recompute drift
|
||||||
|
return result + GetDrift();
|
||||||
|
}
|
||||||
|
|
||||||
|
now = Slow();
|
||||||
|
} while (CycleClock::Now() - ct > max_interval_cycles);
|
||||||
|
// We are now sure that "now" and "result" were produced within
|
||||||
|
// kMaxErrorInterval of one another.
|
||||||
|
|
||||||
|
SetDrift(now - result);
|
||||||
|
last_adjust_time = top_bits;
|
||||||
|
return now;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* Print(WallTime time, const char *format, bool local,
|
||||||
|
char* storage, int *remainder_us) {
|
||||||
|
struct tm split;
|
||||||
|
double subsecond;
|
||||||
|
if (!SplitTimezone(time, local, &split, &subsecond)) {
|
||||||
|
snprintf(storage, sizeof(storage), "Invalid time: %f", time);
|
||||||
|
} else {
|
||||||
|
if (remainder_us != NULL) {
|
||||||
|
*remainder_us = static_cast<int>((subsecond * 1000000) + 0.5);
|
||||||
|
if (*remainder_us > 999999) *remainder_us = 999999;
|
||||||
|
if (*remainder_us < 0) *remainder_us = 0;
|
||||||
|
}
|
||||||
|
strftime(storage, sizeof(storage), format, &split);
|
||||||
|
}
|
||||||
|
return storage;
|
||||||
|
}
|
||||||
|
} // end namespace walltime
|
19
src/walltime.h
Normal file
19
src/walltime.h
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#ifndef BENCHMARK_WALLTIME_H_
|
||||||
|
#define BENCHMARK_WALLTIME_H_
|
||||||
|
|
||||||
|
typedef double WallTime;
|
||||||
|
|
||||||
|
namespace walltime {
|
||||||
|
void Initialize();
|
||||||
|
WallTime Now();
|
||||||
|
|
||||||
|
// GIVEN: walltime, generic format string (as understood by strftime),
|
||||||
|
// a boolean flag specifying if the time is local or UTC (true=local).
|
||||||
|
// RETURNS: the formatted string. ALSO RETURNS: the storage printbuffer
|
||||||
|
// passed and the remaining number of microseconds (never printed in
|
||||||
|
// the string since strftime does not understand it)
|
||||||
|
const char* Print(WallTime time, const char *format, bool local,
|
||||||
|
char* storage, int *remainder_us);
|
||||||
|
} // end namespace walltime
|
||||||
|
|
||||||
|
#endif // BENCHMARK_WALLTIME_H_
|
138
test/benchmark_test.cc
Normal file
138
test/benchmark_test.cc
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
#include "benchmark/benchmark.h"
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
#include <list>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
int ATTRIBUTE_NOINLINE Factorial(uint32_t n) {
|
||||||
|
return (n == 1) ? 1 : n * Factorial(n - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
double CalculatePi(int depth) {
|
||||||
|
double pi = 0.0;
|
||||||
|
for (int i = 0; i < depth; ++i) {
|
||||||
|
double numerator = static_cast<double>(((i % 2) * 2) - 1);
|
||||||
|
double denominator = static_cast<double>((2 * i) - 1);
|
||||||
|
pi += numerator / denominator;
|
||||||
|
}
|
||||||
|
return (pi - 1.0) * 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::set<int> ConstructRandomSet(int size) {
|
||||||
|
std::set<int> s;
|
||||||
|
for (int i = 0; i < size; ++i)
|
||||||
|
s.insert(i);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<int>* test_vector = NULL;
|
||||||
|
|
||||||
|
} // end namespace
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
static void BM_Factorial(benchmark::State& state) {
|
||||||
|
int fac_42 = 0;
|
||||||
|
while (state.KeepRunning())
|
||||||
|
fac_42 = Factorial(8);
|
||||||
|
// Prevent compiler optimizations
|
||||||
|
CHECK(fac_42 != std::numeric_limits<int>::max());
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_Factorial);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void BM_CalculatePiRange(benchmark::State& state) {
|
||||||
|
double pi = 0.0;
|
||||||
|
while (state.KeepRunning())
|
||||||
|
pi = CalculatePi(state.range_x());
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << pi;
|
||||||
|
state.SetLabel(ss.str());
|
||||||
|
}
|
||||||
|
BENCHMARK_RANGE(BM_CalculatePiRange, 1, 1024 * 1024);
|
||||||
|
|
||||||
|
static void BM_CalculatePi(benchmark::State& state) {
|
||||||
|
static const int depth = 1024;
|
||||||
|
double pi ATTRIBUTE_UNUSED = 0.0;
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
pi = CalculatePi(depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_CalculatePi)->Threads(8);
|
||||||
|
BENCHMARK(BM_CalculatePi)->ThreadRange(1, 32);
|
||||||
|
BENCHMARK(BM_CalculatePi)->ThreadPerCpu();
|
||||||
|
|
||||||
|
static void BM_SetInsert(benchmark::State& state) {
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
state.PauseTiming();
|
||||||
|
std::set<int> data = ConstructRandomSet(state.range_x());
|
||||||
|
state.ResumeTiming();
|
||||||
|
for (int j = 0; j < state.range_y(); ++j)
|
||||||
|
data.insert(rand());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_SetInsert)->RangePair(1<<10,8<<10, 1,10);
|
||||||
|
|
||||||
|
template<typename Q>
|
||||||
|
static void BM_Sequential(benchmark::State& state) {
|
||||||
|
Q q;
|
||||||
|
typename Q::value_type v;
|
||||||
|
while (state.KeepRunning())
|
||||||
|
for (int i = state.range_x(); --i; )
|
||||||
|
q.push_back(v);
|
||||||
|
const int64_t items_processed =
|
||||||
|
static_cast<int64_t>(state.iterations()) * state.range_x();
|
||||||
|
state.SetItemsProcessed(items_processed);
|
||||||
|
state.SetBytesProcessed(items_processed * sizeof(v));
|
||||||
|
}
|
||||||
|
BENCHMARK_TEMPLATE(BM_Sequential, std::vector<int>)->Range(1 << 0, 1 << 10);
|
||||||
|
BENCHMARK_TEMPLATE(BM_Sequential, std::list<int>)->Range(1 << 0, 1 << 10);
|
||||||
|
|
||||||
|
static void BM_StringCompare(benchmark::State& state) {
|
||||||
|
std::string s1(state.range_x(), '-');
|
||||||
|
std::string s2(state.range_x(), '-');
|
||||||
|
int r = 0;
|
||||||
|
while (state.KeepRunning())
|
||||||
|
r |= s1.compare(s2);
|
||||||
|
// Prevent compiler optimizations
|
||||||
|
CHECK(r != std::numeric_limits<int>::max());
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_StringCompare)->Range(1, 1<<20);
|
||||||
|
|
||||||
|
static void BM_SetupTeardown(benchmark::State& state) {
|
||||||
|
if (state.thread_index == 0)
|
||||||
|
test_vector = new std::vector<int>();
|
||||||
|
while (state.KeepRunning())
|
||||||
|
test_vector->push_back(0);
|
||||||
|
if (state.thread_index == 0) {
|
||||||
|
delete test_vector;
|
||||||
|
test_vector = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_SetupTeardown);
|
||||||
|
|
||||||
|
static void BM_LongTest(benchmark::State& state) {
|
||||||
|
double tracker = 0.0;
|
||||||
|
while (state.KeepRunning())
|
||||||
|
for (int i = 0; i < state.range_x(); ++i)
|
||||||
|
tracker += i;
|
||||||
|
CHECK(tracker != 0.0);
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_LongTest)->Range(1<<16,1<<28);
|
||||||
|
|
||||||
|
int main(int argc, const char* argv[]) {
|
||||||
|
benchmark::Initialize(&argc, argv);
|
||||||
|
|
||||||
|
CHECK(Factorial(8) == 40320);
|
||||||
|
CHECK(CalculatePi(1) == 0.0);
|
||||||
|
|
||||||
|
benchmark::RunSpecifiedBenchmarks();
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user