diff --git a/AUTHORS b/AUTHORS index 9da43c73..7ddffd8c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -16,6 +16,7 @@ Eugene Zhuk Evgeny Safronov Felix Homann Google Inc. +Ismael Jimenez Martinez JianXiong Zhou Jussi Knuuttila Kaito Udagawa diff --git a/CONTRIBUTORS b/CONTRIBUTORS index dc311afb..af402921 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -32,6 +32,7 @@ Eric Fiselier Eugene Zhuk Evgeny Safronov Felix Homann +Ismael Jimenez Martinez JianXiong Zhou Jussi Knuuttila Kaito Udagawa diff --git a/README.md b/README.md index 051b3011..c989e571 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,13 @@ the specified range and will generate a benchmark for each such argument. BENCHMARK(BM_memcpy)->Range(8, 8<<10); ``` +By default the arguments in the range are generated in multiples of eight and the command above selects [ 8, 64, 512, 4k, 8k ]. In the following code the range multiplier is changed to multiples of two. + +```c++ +BENCHMARK(BM_memcpy)->RangeMultiplier(2)->Range(8, 8<<10); +``` +Now arguments generated are [ 8, 16, 32, 64, 128, 256, 512, 1024, 2k, 4k, 8k ]. + You might have a benchmark that depends on two inputs. For example, the following code defines a family of benchmarks for measuring the speed of set insertion. @@ -109,6 +116,27 @@ static void CustomArguments(benchmark::internal::Benchmark* b) { BENCHMARK(BM_SetInsert)->Apply(CustomArguments); ``` +### Calculate asymptotic complexity (Big O) +Asymptotic complexity might be calculated for a family of benchmarks. The following code will calculate the coefficient for the high-order term in the running time and the normalized root-mean square error of string comparison. + +```c++ +static void BM_StringCompare(benchmark::State& state) { + std::string s1(state.range_x(), '-'); + std::string s2(state.range_x(), '-'); + while (state.KeepRunning()) + benchmark::DoNotOptimize(s1.compare(s2)); +} +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oN); +``` + +As shown in the following invocation, asymptotic complexity might also be calculated automatically. + +```c++ +BENCHMARK(BM_StringCompare) + ->RangeMultiplier(2)->Range(1<<10, 1<<18)->Complexity(benchmark::oAuto); +``` + ### Templated benchmarks Templated benchmarks work the same way: This example produces and consumes messages of size `sizeof(v)` `range_x` times. It also outputs throughput in the diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h index c7ce7d6b..bf46d974 100644 --- a/include/benchmark/benchmark_api.h +++ b/include/benchmark/benchmark_api.h @@ -154,6 +154,7 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); #include #include "macros.h" +#include "complexity.h" namespace benchmark { class BenchmarkReporter; @@ -321,6 +322,19 @@ public: return bytes_processed_; } + // If this routine is called with complexity_n > 0 and complexity report is requested for the + // family benchmark, then current benchmark will be part of the computation and complexity_n will + // represent the length of N. + BENCHMARK_ALWAYS_INLINE + void SetComplexityN(size_t complexity_n) { + complexity_n_ = complexity_n; + } + + BENCHMARK_ALWAYS_INLINE + size_t complexity_length_n() { + return complexity_n_; + } + // If this routine is called with items > 0, then an items/s // label is printed on the benchmark report line for the currently // executing benchmark. It is typically called at the end of a processing @@ -393,6 +407,8 @@ private: size_t bytes_processed_; size_t items_processed_; + size_t complexity_n_; + public: // Index of the executing thread. Values from [0, threads). const int thread_index; @@ -477,6 +493,10 @@ public: // or MB/second values. Benchmark* UseManualTime(); + // Set the asymptotic computational complexity for the benchmark. If called + // the asymptotic computational complexity will be shown on the output. + Benchmark* Complexity(BigO complexity); + // Support for running multiple copies of the same benchmark concurrently // in multiple threads. This may be useful when measuring the scaling // of some piece of code. diff --git a/include/benchmark/complexity.h b/include/benchmark/complexity.h new file mode 100644 index 00000000..82dba829 --- /dev/null +++ b/include/benchmark/complexity.h @@ -0,0 +1,42 @@ +#ifndef COMPLEXITY_H_ +#define COMPLEXITY_H_ + +#include + +namespace benchmark { + +// BigO is passed to a benchmark in order to specify the asymptotic computational +// complexity for the benchmark. In case oAuto is selected, complexity will be +// calculated automatically to the best fit. +enum BigO { + oNone, + o1, + oN, + oNSquared, + oNCubed, + oLogN, + oNLogN, + oAuto +}; + +inline std::string GetBigO(BigO complexity) { + switch (complexity) { + case oN: + return "* N"; + case oNSquared: + return "* N**2"; + case oNCubed: + return "* N**3"; + case oLogN: + return "* lgN"; + case oNLogN: + return "* NlgN"; + case o1: + return "* 1"; + default: + return ""; + } +} + +} // end namespace benchmark +#endif // COMPLEXITY_H_ diff --git a/include/benchmark/reporter.h b/include/benchmark/reporter.h index aaf5fbff..a9124883 100644 --- a/include/benchmark/reporter.h +++ b/include/benchmark/reporter.h @@ -48,7 +48,11 @@ class BenchmarkReporter { cpu_accumulated_time(0), bytes_per_second(0), items_per_second(0), - max_heapbytes_used(0) {} + max_heapbytes_used(0), + complexity(oNone), + complexity_n(0), + report_big_o(false), + report_rms(false) {} std::string benchmark_name; std::string report_label; // Empty if not set by benchmark. @@ -63,6 +67,14 @@ class BenchmarkReporter { // This is set to 0.0 if memory tracing is not enabled. double max_heapbytes_used; + + // Keep track of arguments to compute asymptotic complexity + BigO complexity; + int complexity_n; + + // Inform print function whether the current run is a complexity report + bool report_big_o; + bool report_rms; }; // Called once for every suite of benchmarks run. @@ -78,6 +90,12 @@ class BenchmarkReporter { // Note that all the grouped benchmark runs should refer to the same // benchmark, thus have the same name. virtual void ReportRuns(const std::vector& report) = 0; + + // Called once at the last benchmark in a family of benchmarks, gives information + // about asymptotic complexity and RMS. + // Note that all the benchmark runs in a range should refer to the same benchmark, + // thus have the same name. + virtual void ReportComplexity(const std::vector& complexity_reports) = 0; // Called once and only once after ever group of benchmarks is run and // reported. @@ -85,7 +103,8 @@ class BenchmarkReporter { virtual ~BenchmarkReporter(); protected: - static void ComputeStats(std::vector const& reports, Run* mean, Run* stddev); + static void ComputeStats(const std::vector & reports, Run* mean, Run* stddev); + static void ComputeBigO(const std::vector & reports, Run* bigO, Run* rms); static TimeUnitMultiplier GetTimeUnitAndMultiplier(TimeUnit unit); }; @@ -95,6 +114,7 @@ class ConsoleReporter : public BenchmarkReporter { public: virtual bool ReportContext(const Context& context); virtual void ReportRuns(const std::vector& reports); + virtual void ReportComplexity(const std::vector& complexity_reports); protected: virtual void PrintRunData(const Run& report); @@ -107,6 +127,7 @@ public: JSONReporter() : first_report_(true) {} virtual bool ReportContext(const Context& context); virtual void ReportRuns(const std::vector& reports); + virtual void ReportComplexity(const std::vector& complexity_reports); virtual void Finalize(); private: @@ -119,6 +140,7 @@ class CSVReporter : public BenchmarkReporter { public: virtual bool ReportContext(const Context& context); virtual void ReportRuns(const std::vector& reports); + virtual void ReportComplexity(const std::vector& complexity_reports); private: void PrintRunData(const Run& report); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 811d0755..a681b35d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories(${PROJECT_SOURCE_DIR}/src) set(SOURCE_FILES "benchmark.cc" "colorprint.cc" "commandlineflags.cc" "console_reporter.cc" "csv_reporter.cc" "json_reporter.cc" "log.cc" "reporter.cc" "sleep.cc" "string_util.cc" - "sysinfo.cc" "walltime.cc") + "sysinfo.cc" "walltime.cc" "minimal_leastsq.cc") # Determine the correct regular expression engine to use if(HAVE_STD_REGEX) set(RE_FILES "re_std.cc") diff --git a/src/benchmark.cc b/src/benchmark.cc index bd970717..15274d89 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -116,9 +116,10 @@ std::string* GetReportLabel() { //static benchmark::MallocCounter *benchmark_mc; struct ThreadStats { - ThreadStats() : bytes_processed(0), items_processed(0) {} + ThreadStats() : bytes_processed(0), items_processed(0), complexity_n(0) {} int64_t bytes_processed; int64_t items_processed; + int complexity_n; }; // Timer management class @@ -290,6 +291,8 @@ struct Benchmark::Instance { int range_multiplier; bool use_real_time; bool use_manual_time; + BigO complexity; + bool last_benchmark_instance; double min_time; int threads; // Number of concurrent threads to use bool multithreaded; // Is benchmark multi-threaded? @@ -331,6 +334,7 @@ public: void MinTime(double n); void UseRealTime(); void UseManualTime(); + void Complexity(BigO complexity); void Threads(int t); void ThreadRange(int min_threads, int max_threads); void ThreadPerCpu(); @@ -349,6 +353,7 @@ private: double min_time_; bool use_real_time_; bool use_manual_time_; + BigO complexity_; std::vector thread_counts_; BenchmarkImp& operator=(BenchmarkImp const&); @@ -411,6 +416,7 @@ bool BenchmarkFamilies::FindBenchmarks( instance.min_time = family->min_time_; instance.use_real_time = family->use_real_time_; instance.use_manual_time = family->use_manual_time_; + instance.complexity = family->complexity_; instance.threads = num_threads; instance.multithreaded = !(family->thread_counts_.empty()); @@ -436,6 +442,7 @@ bool BenchmarkFamilies::FindBenchmarks( } if (re.Match(instance.name)) { + instance.last_benchmark_instance = (args == family->args_.back()); benchmarks->push_back(instance); } } @@ -447,7 +454,8 @@ bool BenchmarkFamilies::FindBenchmarks( BenchmarkImp::BenchmarkImp(const char* name) : name_(name), arg_count_(-1), time_unit_(kNanosecond), range_multiplier_(kRangeMultiplier), min_time_(0.0), - use_real_time_(false), use_manual_time_(false) { + use_real_time_(false), use_manual_time_(false), + complexity_(oNone) { } BenchmarkImp::~BenchmarkImp() { @@ -523,6 +531,10 @@ void BenchmarkImp::UseManualTime() { use_manual_time_ = true; } +void BenchmarkImp::Complexity(BigO complexity){ + complexity_ = complexity; +} + void BenchmarkImp::Threads(int t) { CHECK_GT(t, 0); thread_counts_.push_back(t); @@ -636,6 +648,11 @@ Benchmark* Benchmark::UseManualTime() { return this; } +Benchmark* Benchmark::Complexity(BigO complexity) { + imp_->Complexity(complexity); + return this; +} + Benchmark* Benchmark::Threads(int t) { imp_->Threads(t); return this; @@ -677,13 +694,15 @@ void RunInThread(const benchmark::internal::Benchmark::Instance* b, MutexLock l(GetBenchmarkLock()); total->bytes_processed += st.bytes_processed(); total->items_processed += st.items_processed(); + total->complexity_n += st.complexity_length_n(); } timer_manager->Finalize(); } void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, - BenchmarkReporter* br) EXCLUDES(GetBenchmarkLock()) { + BenchmarkReporter* br, + std::vector& complexity_reports) EXCLUDES(GetBenchmarkLock()) { size_t iters = 1; std::vector reports; @@ -781,7 +800,13 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, report.cpu_accumulated_time = cpu_accumulated_time; report.bytes_per_second = bytes_per_second; report.items_per_second = items_per_second; + report.complexity_n = total.complexity_n; + report.complexity = b.complexity; reports.push_back(report); + + if(report.complexity != oNone) + complexity_reports.push_back(report); + break; } @@ -805,6 +830,12 @@ void RunBenchmark(const benchmark::internal::Benchmark::Instance& b, } } br->ReportRuns(reports); + + if((b.complexity != oNone) && b.last_benchmark_instance) { + br->ReportComplexity(complexity_reports); + complexity_reports.clear(); + } + if (b.multithreaded) { for (std::thread& thread : pool) thread.join(); @@ -819,6 +850,7 @@ State::State(size_t max_iters, bool has_x, int x, bool has_y, int y, has_range_x_(has_x), range_x_(x), has_range_y_(has_y), range_y_(y), bytes_processed_(0), items_processed_(0), + complexity_n_(0), thread_index(thread_i), threads(n_threads), max_iterations(max_iters) @@ -876,9 +908,12 @@ void RunMatchingBenchmarks(const std::vector& benchmarks, context.cpu_scaling_enabled = CpuScalingEnabled(); context.name_field_width = name_field_width; + // Keep track of runing times of all instances of current benchmark + std::vector complexity_reports; + if (reporter->ReportContext(context)) { for (const auto& benchmark : benchmarks) { - RunBenchmark(benchmark, reporter); + RunBenchmark(benchmark, reporter, complexity_reports); } } } diff --git a/src/console_reporter.cc b/src/console_reporter.cc index 56bd3ced..cf78a7f8 100644 --- a/src/console_reporter.cc +++ b/src/console_reporter.cc @@ -79,6 +79,21 @@ void ConsoleReporter::ReportRuns(const std::vector& reports) { PrintRunData(stddev_data); } +void ConsoleReporter::ReportComplexity(const std::vector & complexity_reports) { + if (complexity_reports.size() < 2) { + // We don't report asymptotic complexity data if there was a single run. + return; + } + + Run big_o_data; + Run rms_data; + BenchmarkReporter::ComputeBigO(complexity_reports, &big_o_data, &rms_data); + + // Output using PrintRun. + PrintRunData(big_o_data); + PrintRunData(rms_data); +} + void ConsoleReporter::PrintRunData(const Run& result) { // Format bytes per second std::string rate; @@ -97,10 +112,23 @@ void ConsoleReporter::PrintRunData(const Run& result) { const char* timeLabel; std::tie(timeLabel, multiplier) = GetTimeUnitAndMultiplier(result.time_unit); - ColorPrintf(COLOR_GREEN, "%-*s ", + ColorPrintf((result.report_big_o ||result.report_rms) ? COLOR_BLUE : COLOR_GREEN, "%-*s ", name_field_width_, result.benchmark_name.c_str()); - if (result.iterations == 0) { + if(result.report_big_o) { + std::string big_o = result.report_big_o ? GetBigO(result.complexity) : ""; + ColorPrintf(COLOR_YELLOW, "%10.4f %s %10.4f %s ", + result.real_accumulated_time * multiplier, + big_o.c_str(), + result.cpu_accumulated_time * multiplier, + big_o.c_str()); + } + else if(result.report_rms) { + ColorPrintf(COLOR_YELLOW, "%10.0f %% %10.0f %% ", + result.real_accumulated_time * multiplier * 100, + result.cpu_accumulated_time * multiplier * 100); + } + else if (result.iterations == 0) { ColorPrintf(COLOR_YELLOW, "%10.0f %s %10.0f %s ", result.real_accumulated_time * multiplier, timeLabel, @@ -116,7 +144,8 @@ void ConsoleReporter::PrintRunData(const Run& result) { timeLabel); } - ColorPrintf(COLOR_CYAN, "%10lld", result.iterations); + if(!result.report_big_o && !result.report_rms) + ColorPrintf(COLOR_CYAN, "%10lld", result.iterations); if (!rate.empty()) { ColorPrintf(COLOR_DEFAULT, " %*s", 13, rate.c_str()); diff --git a/src/csv_reporter.cc b/src/csv_reporter.cc index 3f67d1de..9bfd66bf 100644 --- a/src/csv_reporter.cc +++ b/src/csv_reporter.cc @@ -48,7 +48,7 @@ bool CSVReporter::ReportContext(const Context& context) { return true; } -void CSVReporter::ReportRuns(std::vector const& reports) { +void CSVReporter::ReportRuns(const std::vector & reports) { if (reports.empty()) { return; } @@ -66,7 +66,22 @@ void CSVReporter::ReportRuns(std::vector const& reports) { } } -void CSVReporter::PrintRunData(Run const& run) { +void CSVReporter::ReportComplexity(const std::vector & complexity_reports) { + if (complexity_reports.size() < 2) { + // We don't report asymptotic complexity data if there was a single run. + return; + } + + Run big_o_data; + Run rms_data; + BenchmarkReporter::ComputeBigO(complexity_reports, &big_o_data, &rms_data); + + // Output using PrintRun. + PrintRunData(big_o_data); + PrintRunData(rms_data); +} + +void CSVReporter::PrintRunData(const Run & run) { double multiplier; const char* timeLabel; std::tie(timeLabel, multiplier) = GetTimeUnitAndMultiplier(run.time_unit); @@ -84,10 +99,20 @@ void CSVReporter::PrintRunData(Run const& run) { ReplaceAll(&name, "\"", "\"\""); std::cout << "\"" << name << "\","; - std::cout << run.iterations << ","; + // Do not print iteration on bigO and RMS report + if(!run.report_big_o && !run.report_rms) + std::cout << run.iterations << ","; + else + std::cout << ","; + std::cout << real_time << ","; std::cout << cpu_time << ","; - std::cout << timeLabel << ","; + + // Do not print timeLabel on RMS report + if(!run.report_rms) + std::cout << timeLabel << ","; + else + std::cout << ","; if (run.bytes_per_second > 0.0) { std::cout << run.bytes_per_second; diff --git a/src/json_reporter.cc b/src/json_reporter.cc index 7ed141fc..c9d9cf1d 100644 --- a/src/json_reporter.cc +++ b/src/json_reporter.cc @@ -115,6 +115,31 @@ void JSONReporter::ReportRuns(std::vector const& reports) { } } +void JSONReporter::ReportComplexity(const std::vector & complexity_reports) { + if (complexity_reports.size() < 2) { + // We don't report asymptotic complexity data if there was a single run. + return; + } + + std::string indent(4, ' '); + std::ostream& out = std::cout; + if (!first_report_) { + out << ",\n"; + } + + Run big_o_data; + Run rms_data; + BenchmarkReporter::ComputeBigO(complexity_reports, &big_o_data, &rms_data); + + // Output using PrintRun. + out << indent << "{\n"; + PrintRunData(big_o_data); + out << indent << "},\n"; + out << indent << "{\n"; + PrintRunData(rms_data); + out << indent << '}'; +} + void JSONReporter::Finalize() { // Close the list of benchmarks and the top level object. std::cout << "\n ]\n}\n"; @@ -137,17 +162,20 @@ void JSONReporter::PrintRunData(Run const& run) { out << indent << FormatKV("name", run.benchmark_name) << ",\n"; - out << indent - << FormatKV("iterations", run.iterations) - << ",\n"; + if(!run.report_big_o && !run.report_rms) { + out << indent + << FormatKV("iterations", run.iterations) + << ",\n"; + } out << indent << FormatKV("real_time", RoundDouble(real_time)) << ",\n"; out << indent - << FormatKV("cpu_time", RoundDouble(cpu_time)) - << ",\n"; - out << indent - << FormatKV("time_unit", timeLabel); + << FormatKV("cpu_time", RoundDouble(cpu_time)); + if(!run.report_rms) { + out << ",\n" << indent + << FormatKV("time_unit", timeLabel); + } if (run.bytes_per_second > 0.0) { out << ",\n" << indent << FormatKV("bytes_per_second", RoundDouble(run.bytes_per_second)); diff --git a/src/minimal_leastsq.cc b/src/minimal_leastsq.cc new file mode 100644 index 00000000..ea6bd463 --- /dev/null +++ b/src/minimal_leastsq.cc @@ -0,0 +1,115 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Source project : https://github.com/ismaelJimenez/cpp.leastsq +// Adapted to be used with google benchmark + +#include "minimal_leastsq.h" +#include "check.h" +#include + +// Internal function to calculate the different scalability forms +double FittingCurve(double n, benchmark::BigO complexity) { + switch (complexity) { + case benchmark::oN: + return n; + case benchmark::oNSquared: + return pow(n, 2); + case benchmark::oNCubed: + return pow(n, 3); + case benchmark::oLogN: + return log2(n); + case benchmark::oNLogN: + return n * log2(n); + case benchmark::o1: + default: + return 1; + } +} + +// Internal function to find the coefficient for the high-order term in the running time, by minimizing the sum of squares of relative error. +// - n : Vector containing the size of the benchmark tests. +// - time : Vector containing the times for the benchmark tests. +// - complexity : Fitting curve. +// For a deeper explanation on the algorithm logic, look the README file at http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit + +LeastSq CalculateLeastSq(const std::vector& n, const std::vector& time, const benchmark::BigO complexity) { + CHECK_NE(complexity, benchmark::oAuto); + + double sigma_gn = 0; + double sigma_gn_squared = 0; + double sigma_time = 0; + double sigma_time_gn = 0; + + // Calculate least square fitting parameter + for (size_t i = 0; i < n.size(); ++i) { + double gn_i = FittingCurve(n[i], complexity); + sigma_gn += gn_i; + sigma_gn_squared += gn_i * gn_i; + sigma_time += time[i]; + sigma_time_gn += time[i] * gn_i; + } + + LeastSq result; + result.complexity = complexity; + + // Calculate complexity. + // o1 is treated as an special case + if (complexity != benchmark::o1) + result.coef = sigma_time_gn / sigma_gn_squared; + else + result.coef = sigma_time / n.size(); + + // Calculate RMS + double rms = 0; + for (size_t i = 0; i < n.size(); ++i) { + double fit = result.coef * FittingCurve(n[i], complexity); + rms += pow((time[i] - fit), 2); + } + + double mean = sigma_time / n.size(); + + result.rms = sqrt(rms / n.size()) / mean; // Normalized RMS by the mean of the observed values + + return result; +} + +// Find the coefficient for the high-order term in the running time, by minimizing the sum of squares of relative error. +// - n : Vector containing the size of the benchmark tests. +// - time : Vector containing the times for the benchmark tests. +// - complexity : If different than oAuto, the fitting curve will stick to this one. If it is oAuto, it will be calculated +// the best fitting curve. + +LeastSq MinimalLeastSq(const std::vector& n, const std::vector& time, const benchmark::BigO complexity) { + CHECK_EQ(n.size(), time.size()); + CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two benchmark runs are given + CHECK_NE(complexity, benchmark::oNone); + + if(complexity == benchmark::oAuto) { + std::vector fit_curves = { benchmark::oLogN, benchmark::oN, benchmark::oNLogN, benchmark::oNSquared, benchmark::oNCubed }; + + LeastSq best_fit = CalculateLeastSq(n, time, benchmark::o1); // Take o1 as default best fitting curve + + // Compute all possible fitting curves and stick to the best one + for (const auto& fit : fit_curves) { + LeastSq current_fit = CalculateLeastSq(n, time, fit); + if (current_fit.rms < best_fit.rms) + best_fit = current_fit; + } + + return best_fit; + } + else + return CalculateLeastSq(n, time, complexity); +} \ No newline at end of file diff --git a/src/minimal_leastsq.h b/src/minimal_leastsq.h new file mode 100644 index 00000000..6dcb8940 --- /dev/null +++ b/src/minimal_leastsq.h @@ -0,0 +1,46 @@ +// Copyright 2016 Ismael Jimenez Martinez. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Source project : https://github.com/ismaelJimenez/cpp.leastsq +// Adapted to be used with google benchmark + +#if !defined(MINIMAL_LEASTSQ_H_) +#define MINIMAL_LEASTSQ_H_ + +#include "benchmark/benchmark_api.h" + +#include + +// This data structure will contain the result returned by MinimalLeastSq +// - coef : Estimated coeficient for the high-order term as interpolated from data. +// - rms : Normalized Root Mean Squared Error. +// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability form has been provided to MinimalLeastSq +// this will return the same value. In case BigO::oAuto has been selected, this parameter will return the +// best fitting curve detected. + +struct LeastSq { + LeastSq() : + coef(0), + rms(0), + complexity(benchmark::oNone) {} + + double coef; + double rms; + benchmark::BigO complexity; +}; + +// Find the coefficient for the high-order term in the running time, by minimizing the sum of squares of relative error. +LeastSq MinimalLeastSq(const std::vector& n, const std::vector& time, const benchmark::BigO complexity = benchmark::oAuto); + +#endif diff --git a/src/reporter.cc b/src/reporter.cc index 036546e7..544df870 100644 --- a/src/reporter.cc +++ b/src/reporter.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "benchmark/reporter.h" +#include "minimal_leastsq.h" #include #include +#include #include "check.h" #include "stat.h" @@ -77,6 +79,55 @@ void BenchmarkReporter::ComputeStats( stddev_data->items_per_second = items_per_second_stat.StdDev(); } +void BenchmarkReporter::ComputeBigO( + const std::vector& reports, + Run* big_o, Run* rms) { + CHECK(reports.size() >= 2) << "Cannot compute asymptotic complexity for less than 2 reports"; + // Accumulators. + std::vector n; + std::vector real_time; + std::vector cpu_time; + + // Populate the accumulators. + for (const Run& run : reports) { + n.push_back(run.complexity_n); + real_time.push_back(run.real_accumulated_time/run.iterations); + cpu_time.push_back(run.cpu_accumulated_time/run.iterations); + } + + LeastSq result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity); + + // result_cpu.complexity is passed as parameter to result_real because in case + // reports[0].complexity is oAuto, the noise on the measured data could make + // the best fit function of Cpu and Real differ. In order to solve this, we take + // the best fitting function for the Cpu, and apply it to Real data. + LeastSq result_real = MinimalLeastSq(n, real_time, result_cpu.complexity); + + std::string benchmark_name = reports[0].benchmark_name.substr(0, reports[0].benchmark_name.find('/')); + + // Get the data from the accumulator to BenchmarkReporter::Run's. + big_o->benchmark_name = benchmark_name + "_BigO"; + big_o->iterations = 0; + big_o->real_accumulated_time = result_real.coef; + big_o->cpu_accumulated_time = result_cpu.coef; + big_o->report_big_o = true; + big_o->complexity = result_cpu.complexity; + + double multiplier; + const char* time_label; + std::tie(time_label, multiplier) = GetTimeUnitAndMultiplier(reports[0].time_unit); + + // Only add label to mean/stddev if it is same for all runs + big_o->report_label = reports[0].report_label; + rms->benchmark_name = benchmark_name + "_RMS"; + rms->report_label = big_o->report_label; + rms->iterations = 0; + rms->real_accumulated_time = result_real.rms / multiplier; + rms->cpu_accumulated_time = result_cpu.rms / multiplier; + rms->report_rms = true; + rms->complexity = result_cpu.complexity; +} + TimeUnitMultiplier BenchmarkReporter::GetTimeUnitAndMultiplier(TimeUnit unit) { switch (unit) { case kMillisecond: diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4d77a4e0..49d10528 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -55,6 +55,9 @@ if (BENCHMARK_HAS_CXX03_FLAG) add_test(cxx03 cxx03_test --benchmark_min_time=0.01) endif() +compile_benchmark_test(complexity_test) +add_test(complexity_benchmark complexity_test --benchmark_min_time=0.01) + # Add the coverage command(s) if(CMAKE_BUILD_TYPE) string(TOLOWER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_LOWER) @@ -74,7 +77,7 @@ if (${CMAKE_BUILD_TYPE_LOWER} MATCHES "coverage") COMMAND ${LCOV} -q -a before.lcov -a after.lcov --output-file final.lcov COMMAND ${LCOV} -q -r final.lcov "'${CMAKE_SOURCE_DIR}/test/*'" -o final.lcov COMMAND ${GENHTML} final.lcov -o lcov --demangle-cpp --sort -p "${CMAKE_BINARY_DIR}" -t benchmark - DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test + DEPENDS filter_test benchmark_test options_test basic_test fixture_test cxx03_test complexity_test WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMENT "Running LCOV" ) diff --git a/test/complexity_test.cc b/test/complexity_test.cc new file mode 100644 index 00000000..e454ee46 --- /dev/null +++ b/test/complexity_test.cc @@ -0,0 +1,105 @@ + +#include "benchmark/benchmark_api.h" + +#include +#include +#include +#include + +std::vector ConstructRandomVector(int size) { + std::vector v; + v.reserve(size); + for (int i = 0; i < size; ++i) { + v.push_back(rand() % size); + } + return v; +} + +std::map ConstructRandomMap(int size) { + std::map m; + for (int i = 0; i < size; ++i) { + m.insert(std::make_pair(rand() % size, rand() % size)); + } + return m; +} + +void BM_Complexity_O1(benchmark::State& state) { + while (state.KeepRunning()) { + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O1) -> Range(1, 1<<18) -> Complexity(benchmark::o1); + +static void BM_Complexity_O_N(benchmark::State& state) { + auto v = ConstructRandomVector(state.range_x()); + const int item_not_in_vector = state.range_x()*2; // Test worst case scenario (item not in vector) + while (state.KeepRunning()) { + benchmark::DoNotOptimize(std::find(v.begin(), v.end(), item_not_in_vector)); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oN); +BENCHMARK(BM_Complexity_O_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oAuto); + +static void BM_Complexity_O_N_Squared(benchmark::State& state) { + std::string s1(state.range_x(), '-'); + std::string s2(state.range_x(), '-'); + state.SetComplexityN(state.range_x()); + while (state.KeepRunning()) + for(char& c1 : s1) { + for(char& c2 : s2) { + benchmark::DoNotOptimize(c1 = 'a'); + benchmark::DoNotOptimize(c2 = 'b'); + } + } +} +BENCHMARK(BM_Complexity_O_N_Squared) -> Range(1, 1<<8) -> Complexity(benchmark::oNSquared); + +static void BM_Complexity_O_N_Cubed(benchmark::State& state) { + std::string s1(state.range_x(), '-'); + std::string s2(state.range_x(), '-'); + std::string s3(state.range_x(), '-'); + state.SetComplexityN(state.range_x()); + while (state.KeepRunning()) + for(char& c1 : s1) { + for(char& c2 : s2) { + for(char& c3 : s3) { + benchmark::DoNotOptimize(c1 = 'a'); + benchmark::DoNotOptimize(c2 = 'b'); + benchmark::DoNotOptimize(c3 = 'c'); + } + } + } +} +BENCHMARK(BM_Complexity_O_N_Cubed) -> DenseRange(1, 8) -> Complexity(benchmark::oNCubed); + +static void BM_Complexity_O_log_N(benchmark::State& state) { + auto m = ConstructRandomMap(state.range_x()); + const int item_not_in_vector = state.range_x()*2; // Test worst case scenario (item not in vector) + while (state.KeepRunning()) { + benchmark::DoNotOptimize(m.find(item_not_in_vector)); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O_log_N) + -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oLogN); + +static void BM_Complexity_O_N_log_N(benchmark::State& state) { + auto v = ConstructRandomVector(state.range_x()); + while (state.KeepRunning()) { + std::sort(v.begin(), v.end()); + } + state.SetComplexityN(state.range_x()); +} +BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oNLogN); +BENCHMARK(BM_Complexity_O_N_log_N) -> RangeMultiplier(2) -> Range(1<<10, 1<<16) -> Complexity(benchmark::oAuto); + +// Test benchmark with no range and check no complexity is calculated. +void BM_Extreme_Cases(benchmark::State& state) { + while (state.KeepRunning()) { + } +} +BENCHMARK(BM_Extreme_Cases) -> Complexity(benchmark::oNLogN); +BENCHMARK(BM_Extreme_Cases) -> Arg(42) -> Complexity(benchmark::oAuto); + +BENCHMARK_MAIN() \ No newline at end of file