upgraded leastsq

This commit is contained in:
Ismael 2016-05-25 22:26:57 +02:00
parent e246699f25
commit 087f0d3f1b
2 changed files with 76 additions and 41 deletions

View File

@ -20,37 +20,54 @@
#include <math.h>
// Internal function to calculate the different scalability forms
double FittingCurve(double n, benchmark::BigO complexity) {
std::function<double(int)> FittingCurve(benchmark::BigO complexity) {
switch (complexity) {
case benchmark::oN:
return n;
case benchmark::oNSquared:
return pow(n, 2);
case benchmark::oNCubed:
return pow(n, 3);
case benchmark::oLogN:
return log2(n);
case benchmark::oNLogN:
return n * log2(n);
case benchmark::o1:
default:
return 1;
case benchmark::oN:
return [](int n) {return n; };
case benchmark::oNSquared:
return [](int n) {return n*n; };
case benchmark::oNCubed:
return [](int n) {return n*n*n; };
case benchmark::oLogN:
return [](int n) {return log2(n); };
case benchmark::oNLogN:
return [](int n) {return n * log2(n); };
case benchmark::o1:
default:
return [](int) {return 1; };
}
}
// Internal function to find the coefficient for the high-order term in the
// running time, by minimizing the sum of squares of relative error.
// - n : Vector containing the size of the benchmark tests.
// - time : Vector containing the times for the benchmark tests.
// - complexity : Fitting curve.
// Internal function to to return an string for the calculated complexity
std::string GetBigOString(benchmark::BigO complexity) {
switch (complexity) {
case benchmark::oN:
return "* N";
case benchmark::oNSquared:
return "* N**2";
case benchmark::oNCubed:
return "* N**3";
case benchmark::oLogN:
return "* lgN";
case benchmark::oNLogN:
return "* NlgN";
case benchmark::o1:
return "* 1";
default:
return "";
}
}
// Find the coefficient for the high-order term in the running time, by minimizing the sum of squares of relative error, for the fitting curve given on the lambda expresion.
// - n : Vector containing the size of the benchmark tests.
// - time : Vector containing the times for the benchmark tests.
// - fitting_curve : lambda expresion (e.g. [](int n) {return n; };).
// For a deeper explanation on the algorithm logic, look the README file at
// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit
LeastSq CalculateLeastSq(const std::vector<int>& n,
const std::vector<double>& time,
const benchmark::BigO complexity) {
CHECK_NE(complexity, benchmark::oAuto);
LeastSq CalculateLeastSq(const std::vector<int>& n,
const std::vector<double>& time,
std::function<double(int)> fitting_curve) {
double sigma_gn = 0;
double sigma_gn_squared = 0;
double sigma_time = 0;
@ -58,7 +75,7 @@ LeastSq CalculateLeastSq(const std::vector<int>& n,
// Calculate least square fitting parameter
for (size_t i = 0; i < n.size(); ++i) {
double gn_i = FittingCurve(n[i], complexity);
double gn_i = fitting_curve(n[i]);
sigma_gn += gn_i;
sigma_gn_squared += gn_i * gn_i;
sigma_time += time[i];
@ -66,26 +83,19 @@ LeastSq CalculateLeastSq(const std::vector<int>& n,
}
LeastSq result;
result.complexity = complexity;
// Calculate complexity.
// o1 is treated as an special case
if (complexity != benchmark::o1) {
result.coef = sigma_time_gn / sigma_gn_squared;
} else {
result.coef = sigma_time / n.size();
}
result.coef = sigma_time_gn / sigma_gn_squared;
// Calculate RMS
double rms = 0;
for (size_t i = 0; i < n.size(); ++i) {
double fit = result.coef * FittingCurve(n[i], complexity);
double fit = result.coef * fitting_curve(n[i]);
rms += pow((time[i] - fit), 2);
}
double mean = sigma_time / n.size();
// Normalized RMS by the mean of the observed values
double mean = sigma_time / n.size();
result.rms = sqrt(rms / n.size()) / mean;
return result;
@ -105,24 +115,32 @@ LeastSq MinimalLeastSq(const std::vector<int>& n,
CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two benchmark runs are given
CHECK_NE(complexity, benchmark::oNone);
LeastSq best_fit;
if(complexity == benchmark::oAuto) {
std::vector<benchmark::BigO> fit_curves = {
benchmark::oLogN, benchmark::oN, benchmark::oNLogN, benchmark::oNSquared,
benchmark::oNCubed };
// Take o1 as default best fitting curve
LeastSq best_fit = CalculateLeastSq(n, time, benchmark::o1);
best_fit = CalculateLeastSq(n, time, FittingCurve(benchmark::o1));
best_fit.complexity = benchmark::o1;
best_fit.caption = GetBigOString(benchmark::o1);
// Compute all possible fitting curves and stick to the best one
for (const auto& fit : fit_curves) {
LeastSq current_fit = CalculateLeastSq(n, time, fit);
LeastSq current_fit = CalculateLeastSq(n, time, FittingCurve(fit));
if (current_fit.rms < best_fit.rms) {
best_fit = current_fit;
best_fit.complexity = fit;
best_fit.caption = GetBigOString(fit);
}
}
return best_fit;
} else {
best_fit = CalculateLeastSq(n, time, FittingCurve(complexity));
best_fit.complexity = complexity;
best_fit.caption = GetBigOString(complexity);
}
return CalculateLeastSq(n, time, complexity);
return best_fit;
}

View File

@ -21,6 +21,7 @@
#include "benchmark/benchmark_api.h"
#include <vector>
#include <functional>
// This data structure will contain the result returned by MinimalLeastSq
// - coef : Estimated coeficient for the high-order term as
@ -35,11 +36,13 @@ struct LeastSq {
LeastSq() :
coef(0),
rms(0),
complexity(benchmark::oNone) {}
complexity(benchmark::oNone),
caption("") {}
double coef;
double rms;
benchmark::BigO complexity;
std::string caption;
};
// Find the coefficient for the high-order term in the running time, by
@ -48,4 +51,18 @@ LeastSq MinimalLeastSq(const std::vector<int>& n,
const std::vector<double>& time,
const benchmark::BigO complexity = benchmark::oAuto);
// This interface is currently not used from the oustide, but it has been provided
// for future upgrades. If in the future it is not needed to support Cxx03, then
// all the calculations could be upgraded to use lambdas because they are more
// powerful and provide a cleaner inferface than enumerators, but complete
// implementation with lambdas will not work for Cxx03 (e.g. lack of std::function).
// In case lambdas are implemented, the interface would be like :
// -> Complexity([](int n) {return n;};)
// and any arbitrary and valid equation would be allowed, but the option to calculate
// the best fit to the most common scalability curves will still be kept.
LeastSq CalculateLeastSq(const std::vector<int>& n,
const std::vector<double>& time,
std::function<double(int)> fitting_curve);
#endif