Drop Stat1, refactor statistics to be user-providable, add median. (#428)
* Drop Stat1, refactor statistics to be user-providable, add median.
My main goal was to add median statistic. Since Stat1
calculated the stats incrementally, and did not store
the values themselves, it is was not possible. Thus,
i have replaced Stat1 with simple std::vector<double>,
containing all the values.
Then, i have refactored current mean/stdev to be a
function that is provided with values vector, and
returns the statistic. While there, it seemed to make
sense to deduplicate the code by storing all the
statistics functions in a map, and then simply iterate
over it. And the interface to add new statistics is
intentionally exposed, so they may be added easily.
The notable change is that Iterations are no longer
displayed as 0 for stdev. Is could be changed, but
i'm not sure how to nicely fit that into the API.
Similarly, this dance about sometimes (for some fields,
for some statistics) dividing by run.iterations, and
then multiplying the calculated stastic back is also
dropped, and if you do the math, i fail to see why
it was needed there in the first place.
Since that was the only use of stat.h, it is removed.
* complexity.h: attempt to fix MSVC build
* Update README.md
* Store statistics to compute in a vector, ensures ordering.
* Add a bit more tests for repetitions.
* Partially address review notes.
* Fix gcc build: drop extra ';'
clang, why didn't you warn me?
* Address review comments.
* double() -> 0.0
* early return
2017-08-24 07:44:29 +08:00
|
|
|
// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
|
|
|
|
// Copyright 2017 Roman Lebedev. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
#include "benchmark/benchmark.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cmath>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <numeric>
|
|
|
|
#include "check.h"
|
|
|
|
#include "statistics.h"
|
|
|
|
|
|
|
|
namespace benchmark {
|
|
|
|
|
|
|
|
auto StatisticsSum = [](const std::vector<double>& v) {
|
|
|
|
return std::accumulate(v.begin(), v.end(), 0.0);
|
|
|
|
};
|
|
|
|
|
|
|
|
double StatisticsMean(const std::vector<double>& v) {
|
2018-01-30 00:38:47 +08:00
|
|
|
if (v.empty()) return 0.0;
|
Drop Stat1, refactor statistics to be user-providable, add median. (#428)
* Drop Stat1, refactor statistics to be user-providable, add median.
My main goal was to add median statistic. Since Stat1
calculated the stats incrementally, and did not store
the values themselves, it is was not possible. Thus,
i have replaced Stat1 with simple std::vector<double>,
containing all the values.
Then, i have refactored current mean/stdev to be a
function that is provided with values vector, and
returns the statistic. While there, it seemed to make
sense to deduplicate the code by storing all the
statistics functions in a map, and then simply iterate
over it. And the interface to add new statistics is
intentionally exposed, so they may be added easily.
The notable change is that Iterations are no longer
displayed as 0 for stdev. Is could be changed, but
i'm not sure how to nicely fit that into the API.
Similarly, this dance about sometimes (for some fields,
for some statistics) dividing by run.iterations, and
then multiplying the calculated stastic back is also
dropped, and if you do the math, i fail to see why
it was needed there in the first place.
Since that was the only use of stat.h, it is removed.
* complexity.h: attempt to fix MSVC build
* Update README.md
* Store statistics to compute in a vector, ensures ordering.
* Add a bit more tests for repetitions.
* Partially address review notes.
* Fix gcc build: drop extra ';'
clang, why didn't you warn me?
* Address review comments.
* double() -> 0.0
* early return
2017-08-24 07:44:29 +08:00
|
|
|
return StatisticsSum(v) * (1.0 / v.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
double StatisticsMedian(const std::vector<double>& v) {
|
|
|
|
if (v.size() < 3) return StatisticsMean(v);
|
|
|
|
std::vector<double> partial;
|
|
|
|
// we need roundDown(count/2)+1 slots
|
|
|
|
partial.resize(1 + (v.size() / 2));
|
|
|
|
std::partial_sort_copy(v.begin(), v.end(), partial.begin(), partial.end());
|
|
|
|
// did we have odd number of samples?
|
|
|
|
// if yes, then the last element of partially-sorted vector is the median
|
|
|
|
// it no, then the average of the last two elements is the median
|
|
|
|
if(v.size() % 2 == 1)
|
|
|
|
return partial.back();
|
|
|
|
return (partial[partial.size() - 2] + partial[partial.size() - 1]) / 2.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the sum of the squares of this sample set
|
|
|
|
auto SumSquares = [](const std::vector<double>& v) {
|
|
|
|
return std::inner_product(v.begin(), v.end(), v.begin(), 0.0);
|
|
|
|
};
|
|
|
|
|
|
|
|
auto Sqr = [](const double dat) { return dat * dat; };
|
|
|
|
auto Sqrt = [](const double dat) {
|
|
|
|
// Avoid NaN due to imprecision in the calculations
|
|
|
|
if (dat < 0.0) return 0.0;
|
|
|
|
return std::sqrt(dat);
|
|
|
|
};
|
|
|
|
|
|
|
|
double StatisticsStdDev(const std::vector<double>& v) {
|
|
|
|
const auto mean = StatisticsMean(v);
|
2018-01-30 00:38:47 +08:00
|
|
|
if (v.empty()) return mean;
|
Drop Stat1, refactor statistics to be user-providable, add median. (#428)
* Drop Stat1, refactor statistics to be user-providable, add median.
My main goal was to add median statistic. Since Stat1
calculated the stats incrementally, and did not store
the values themselves, it is was not possible. Thus,
i have replaced Stat1 with simple std::vector<double>,
containing all the values.
Then, i have refactored current mean/stdev to be a
function that is provided with values vector, and
returns the statistic. While there, it seemed to make
sense to deduplicate the code by storing all the
statistics functions in a map, and then simply iterate
over it. And the interface to add new statistics is
intentionally exposed, so they may be added easily.
The notable change is that Iterations are no longer
displayed as 0 for stdev. Is could be changed, but
i'm not sure how to nicely fit that into the API.
Similarly, this dance about sometimes (for some fields,
for some statistics) dividing by run.iterations, and
then multiplying the calculated stastic back is also
dropped, and if you do the math, i fail to see why
it was needed there in the first place.
Since that was the only use of stat.h, it is removed.
* complexity.h: attempt to fix MSVC build
* Update README.md
* Store statistics to compute in a vector, ensures ordering.
* Add a bit more tests for repetitions.
* Partially address review notes.
* Fix gcc build: drop extra ';'
clang, why didn't you warn me?
* Address review comments.
* double() -> 0.0
* early return
2017-08-24 07:44:29 +08:00
|
|
|
|
|
|
|
// Sample standard deviation is undefined for n = 1
|
|
|
|
if (v.size() == 1)
|
|
|
|
return 0.0;
|
|
|
|
|
|
|
|
const double avg_squares = SumSquares(v) * (1.0 / v.size());
|
|
|
|
return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean)));
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<BenchmarkReporter::Run> ComputeStats(
|
|
|
|
const std::vector<BenchmarkReporter::Run>& reports) {
|
|
|
|
typedef BenchmarkReporter::Run Run;
|
|
|
|
std::vector<Run> results;
|
|
|
|
|
|
|
|
auto error_count =
|
|
|
|
std::count_if(reports.begin(), reports.end(),
|
|
|
|
[](Run const& run) { return run.error_occurred; });
|
|
|
|
|
|
|
|
if (reports.size() - error_count < 2) {
|
|
|
|
// We don't report aggregated data if there was a single run.
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Accumulators.
|
|
|
|
std::vector<double> real_accumulated_time_stat;
|
|
|
|
std::vector<double> cpu_accumulated_time_stat;
|
|
|
|
std::vector<double> bytes_per_second_stat;
|
|
|
|
std::vector<double> items_per_second_stat;
|
|
|
|
|
|
|
|
real_accumulated_time_stat.reserve(reports.size());
|
|
|
|
cpu_accumulated_time_stat.reserve(reports.size());
|
|
|
|
bytes_per_second_stat.reserve(reports.size());
|
|
|
|
items_per_second_stat.reserve(reports.size());
|
|
|
|
|
|
|
|
// All repetitions should be run with the same number of iterations so we
|
|
|
|
// can take this information from the first benchmark.
|
|
|
|
int64_t const run_iterations = reports.front().iterations;
|
|
|
|
// create stats for user counters
|
|
|
|
struct CounterStat {
|
|
|
|
Counter c;
|
|
|
|
std::vector<double> s;
|
|
|
|
};
|
|
|
|
std::map< std::string, CounterStat > counter_stats;
|
|
|
|
for(Run const& r : reports) {
|
|
|
|
for(auto const& cnt : r.counters) {
|
|
|
|
auto it = counter_stats.find(cnt.first);
|
|
|
|
if(it == counter_stats.end()) {
|
|
|
|
counter_stats.insert({cnt.first, {cnt.second, std::vector<double>{}}});
|
|
|
|
it = counter_stats.find(cnt.first);
|
|
|
|
it->second.s.reserve(reports.size());
|
|
|
|
} else {
|
|
|
|
CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Populate the accumulators.
|
|
|
|
for (Run const& run : reports) {
|
|
|
|
CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
|
|
|
|
CHECK_EQ(run_iterations, run.iterations);
|
|
|
|
if (run.error_occurred) continue;
|
|
|
|
real_accumulated_time_stat.emplace_back(run.real_accumulated_time);
|
|
|
|
cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time);
|
|
|
|
items_per_second_stat.emplace_back(run.items_per_second);
|
|
|
|
bytes_per_second_stat.emplace_back(run.bytes_per_second);
|
|
|
|
// user counters
|
|
|
|
for(auto const& cnt : run.counters) {
|
|
|
|
auto it = counter_stats.find(cnt.first);
|
|
|
|
CHECK_NE(it, counter_stats.end());
|
|
|
|
it->second.s.emplace_back(cnt.second);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only add label if it is same for all runs
|
|
|
|
std::string report_label = reports[0].report_label;
|
|
|
|
for (std::size_t i = 1; i < reports.size(); i++) {
|
|
|
|
if (reports[i].report_label != report_label) {
|
|
|
|
report_label = "";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for(const auto& Stat : *reports[0].statistics) {
|
|
|
|
// Get the data from the accumulator to BenchmarkReporter::Run's.
|
|
|
|
Run data;
|
|
|
|
data.benchmark_name = reports[0].benchmark_name + "_" + Stat.name_;
|
|
|
|
data.report_label = report_label;
|
|
|
|
data.iterations = run_iterations;
|
|
|
|
|
|
|
|
data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat);
|
|
|
|
data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat);
|
|
|
|
data.bytes_per_second = Stat.compute_(bytes_per_second_stat);
|
|
|
|
data.items_per_second = Stat.compute_(items_per_second_stat);
|
|
|
|
|
|
|
|
data.time_unit = reports[0].time_unit;
|
|
|
|
|
|
|
|
// user counters
|
|
|
|
for(auto const& kv : counter_stats) {
|
|
|
|
const auto uc_stat = Stat.compute_(kv.second.s);
|
|
|
|
auto c = Counter(uc_stat, counter_stats[kv.first].c.flags);
|
|
|
|
data.counters[kv.first] = c;
|
|
|
|
}
|
|
|
|
|
|
|
|
results.push_back(data);
|
|
|
|
}
|
|
|
|
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // end namespace benchmark
|