Skiplist::PositionAndCount refactor and test

Summary:
 - refactored so `less` is used instead of `greater`
 - added a fuzzy unit test

Reviewers: mislav.bradac, buda, teon.banek

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D519
This commit is contained in:
florijan 2017-07-05 12:01:41 +02:00
parent e7111b286a
commit feb854d0c7
3 changed files with 122 additions and 40 deletions

View File

@ -530,7 +530,7 @@ class SkipList : private Lockable<lock_t> {
/**
* Position and count estimation. Gives estimates
* on the position of the given item in this skiplist, and
* the number of identical items according to 'greater'.
* the number of identical items according to 'less'.
*
* If `item` is not contained in the skiplist,
* then the position where it would be inserted is returned
@ -543,9 +543,9 @@ class SkipList : private Lockable<lock_t> {
* TODO: tune the levels once benchmarks are available.
*
* @param item The item for which the position is estimated.
* @param greater Comparison function. It must be partially
* @param less Comparison function. It must be partially
* consistent with natural comparison of Skiplist elements:
* if `greater` indicates that X is greater then
* if `less` indicates that X is less than
* Y, then natural comparison must indicate the same. The
* reverse does not have to hold.
* @param position_level_reduction - Defines at which level
@ -553,15 +553,15 @@ class SkipList : private Lockable<lock_t> {
* as log2(skiplist->size()) - position_level_reduction.
* @param count_max_level - Defines the max level at which
* item count is estimated.
* @tparam TGreater Type of `greater`
* @tparam TLess Type of `less`
* @return A pair of ints where the first element is the estimated
* position of item, and the second is the estimated number
* of items that are the same according to `greater`.
* of items that are the same according to `less`.
*/
template <typename TItem, typename TGreater = std::greater<T>>
auto position_and_count(const TItem &item, TGreater greater = TGreater{},
int position_level_reduction = 10,
int count_max_level = 3) {
template <typename TItem, typename TLess = std::less<T>>
std::pair<size_t, size_t> position_and_count(
const TItem &item, TLess less = TLess{},
int position_level_reduction = 10, int count_max_level = 3) {
// the level at which position will be sought
int position_level = std::max(
0, static_cast<int>(std::lround(std::log2(skiplist->size()))) -
@ -576,8 +576,12 @@ class SkipList : private Lockable<lock_t> {
// used for calculating item position
int tower_count = 0;
// on the current height (i) find the last tower
// whose value is lesser than item, store it in pred
// while succ will be either skiplist end or the
// first element greater or equal to item
succ = pred->forward(i);
while (succ && greater(item, succ->value())) {
while (succ && less(succ->value(), item)) {
pred = succ;
succ = succ->forward(i);
tower_count++;
@ -585,22 +589,17 @@ class SkipList : private Lockable<lock_t> {
// in the succs field we'll keep track of successors
// that are equal to item, or nullptr otherwise
succs[i] = (!succ || greater(succ->value(), item)) ? nullptr : succ;
succs[i] = (!succ || less(item, succ->value())) ? nullptr : succ;
position += (1 << i) * tower_count;
}
// if succ is nullptr, we have the last skiplist element
if (succ == nullptr) {
// pred now contains the first node whose value <= item
// check if we found the item exactly (value == item)
bool found = pred != skiplist->header && !greater(item, pred->value());
return std::make_pair(position, found ? 1 : 0);
}
// if succ is nullptr, then item is greater than all elements in the list
if (succ == nullptr) return std::make_pair(size(), 0);
// now we need to estimate the count of elements equal to item
// we'll do that by looking for the first element that is greater
// then item, and counting how far we have to look
// than item, and counting how far we have to look
// first find the rightmost (highest) succ that has value == item
int count_level = 0;
@ -617,7 +616,7 @@ class SkipList : private Lockable<lock_t> {
int count = 1 << count_level;
for (; count_level >= 0; count_level--) {
Node *next = succ->forward(count_level);
while (next && !greater(next->value(), item)) {
while (next && !less(item, next->value())) {
succ = next;
next = next->forward(count_level);
count += 1 << count_level;

View File

@ -39,20 +39,20 @@ std::unique_ptr<SkipList<int>> make_sl(int size) {
* @param size - size of the skiplist to test with
* @param iterations - number of iterations of each test.
* @param granulation - How many sequential ints should be
* considered equal in testing by the custom `greater`
* considered equal in testing by the custom `less`
* function.
*/
void test(int size, int iterations = 20, int granulation = 1) {
auto greater = [granulation](const int &a, const int &b) {
return a / granulation > b / granulation;
auto less = [granulation](const int &a, const int &b) {
return a / granulation < b / granulation;
};
log("\nTesting skiplist size {} with granulation {}", size, granulation);
// test at 1/4, 1/2 and 3/4 points
std::vector<int> positions({size / 4, size / 2, size * 3 / 4});
std::vector<int> test_positions({size / 4, size / 2, size * 3 / 4});
std::vector<std::vector<int>> less(3);
std::vector<std::vector<int>> equal(3);
std::vector<std::vector<int>> position(3);
std::vector<std::vector<int>> count(3);
std::vector<std::vector<double>> time(3);
for (int iteration = 0; iteration < iterations; iteration++) {
auto sl = make_sl(size);
@ -60,26 +60,26 @@ void test(int size, int iterations = 20, int granulation = 1) {
for (auto pos : {0, 1, 2}) {
clock_t start_time = clock();
auto pos_and_count =
sl->access().position_and_count(positions[pos], greater);
sl->access().position_and_count(test_positions[pos], less);
auto t = double(clock() - start_time) / CLOCKS_PER_SEC;
less[pos].push_back(pos_and_count.first);
equal[pos].push_back(pos_and_count.second);
position[pos].push_back(pos_and_count.first);
count[pos].push_back(pos_and_count.second);
time[pos].push_back(t);
}
}
// convert values to errors
for (auto pos : {0, 1, 2}) {
auto position = positions[pos];
log("\tPosition {}", position);
for (auto &less_elem : less[pos])
less_elem = std::abs(less_elem - position);
log("\t\tMean position error: {}", mean(less[pos]));
for (auto &equal_elem : equal[pos])
equal_elem = std::abs(equal_elem - granulation);
log("\t\tMean count error: {}", mean(equal[pos]));
log("\t\tMean time (ms): {}", mean(time[pos]) * 1000);
for (auto pos_index : {0, 1, 2}) {
auto test_position = test_positions[pos_index];
log("\tPosition {}", test_position);
for (auto &position_elem : position[pos_index])
position_elem = std::abs(position_elem - test_position);
log("\t\tMean position error: {}", mean(position[pos_index]));
for (auto &count_elem : count[pos_index])
count_elem = std::abs(count_elem - granulation);
log("\t\tMean count error: {}", mean(count[pos_index]));
log("\t\tMean time (ms): {}", mean(time[pos_index]) * 1000);
}
}
@ -92,7 +92,7 @@ int main(int argc, char *argv[]) {
if (argc > 2) iterations = (int)std::stoi(argv[2]);
std::vector<int> granulations;
for (int i = 1 ; i < size ; i *= 100) granulations.push_back(i);
for (int i = 1; i < size; i *= 100) granulations.push_back(i);
for (auto granulation : granulations) test(size, iterations, granulation);
return 0;

View File

@ -0,0 +1,83 @@
#include <algorithm>
#include <memory>
#include <vector>
#include "gtest/gtest.h"
#include "data_structures/concurrent/skiplist.hpp"
#include "utils/assert.hpp"
/* The following tests validate the SkipList::position_and_count estimation
* functionality. That function has a tunable speed vs. accuracy. The tests
* here test the absolutely-accurate parameterization, as well as the default
* one that should be optimal parametrization. As such the tests are
* stochastic and defined to validate generally acceptable behavior in
* a vast majority of cases. The probability of test failure due to
* stochasticity should be extremely small, but isn't zero.
*/
auto SkiplistRange(int count) {
auto sl = std::make_unique<SkipList<int>>();
auto access = sl->access();
for (int i = 0; i < count; i++) access.insert(i);
return sl;
}
auto Median(std::vector<int> &elements) {
auto elem_size = elements.size();
debug_assert(elem_size > 0, "Provide some elements to get median!");
std::sort(elements.begin(), elements.end());
if (elem_size % 2)
return elements[elem_size / 2];
else
return (elements[elem_size / 2 - 1] + elements[elem_size / 2]) / 2;
}
auto Less(int granularity) {
return [granularity](const int &a, const int &b) {
return a / granularity < b / granularity;
};
}
#define EXPECT_ABS_POS_COUNT(granularity, position, expected_position, \
expected_count) \
{ \
auto sl = SkiplistRange(10000); \
auto position_and_count = \
sl->access().position_and_count(position, Less(granularity), 1000, 0); \
EXPECT_EQ(position_and_count.first, expected_position); \
EXPECT_EQ(position_and_count.second, expected_count); \
}
TEST(SkiplistPosAndCount, AbsoluteAccuracy) {
EXPECT_ABS_POS_COUNT(1, 42, 42, 1);
EXPECT_ABS_POS_COUNT(3, 42, 42, 3);
EXPECT_ABS_POS_COUNT(10, 42, 40, 10);
}
#define EXPECT_POS_COUNT(skiplist_size, position, expected_count, \
position_error_margin, count_error_margin) \
{ \
std::vector<int> pos_errors; \
std::vector<int> count_errors; \
\
for (int i = 0; i < 30; i++) { \
auto sl = SkiplistRange(skiplist_size); \
auto position_count = sl->access().position_and_count(position); \
pos_errors.push_back(std::abs((long)position_count.first - position)); \
count_errors.push_back( \
std::abs((long)position_count.second - expected_count)); \
} \
EXPECT_LE(Median(pos_errors), position_error_margin); \
EXPECT_LE(Median(count_errors), count_error_margin); \
}
TEST(SkiplistPosAndCount, DefaultSpeedAndAccuracy) {
EXPECT_POS_COUNT(5000, 42, 1, 20, 3);
EXPECT_POS_COUNT(5000, 2500, 1, 100, 3);
EXPECT_POS_COUNT(5000, 4500, 1, 200, 3);
// for an item greater then all list elements the returned
// estimations are always absolutely accurate
EXPECT_POS_COUNT(5000, 5000, 0, 0, 0);
}