Skiplist::PositionAndCount refactor and test
Summary: - refactored so `less` is used instead of `greater` - added a fuzzy unit test Reviewers: mislav.bradac, buda, teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D519
This commit is contained in:
parent
e7111b286a
commit
feb854d0c7
@ -530,7 +530,7 @@ class SkipList : private Lockable<lock_t> {
|
||||
/**
|
||||
* Position and count estimation. Gives estimates
|
||||
* on the position of the given item in this skiplist, and
|
||||
* the number of identical items according to 'greater'.
|
||||
* the number of identical items according to 'less'.
|
||||
*
|
||||
* If `item` is not contained in the skiplist,
|
||||
* then the position where it would be inserted is returned
|
||||
@ -543,9 +543,9 @@ class SkipList : private Lockable<lock_t> {
|
||||
* TODO: tune the levels once benchmarks are available.
|
||||
*
|
||||
* @param item The item for which the position is estimated.
|
||||
* @param greater Comparison function. It must be partially
|
||||
* @param less Comparison function. It must be partially
|
||||
* consistent with natural comparison of Skiplist elements:
|
||||
* if `greater` indicates that X is greater then
|
||||
* if `less` indicates that X is less than
|
||||
* Y, then natural comparison must indicate the same. The
|
||||
* reverse does not have to hold.
|
||||
* @param position_level_reduction - Defines at which level
|
||||
@ -553,15 +553,15 @@ class SkipList : private Lockable<lock_t> {
|
||||
* as log2(skiplist->size()) - position_level_reduction.
|
||||
* @param count_max_level - Defines the max level at which
|
||||
* item count is estimated.
|
||||
* @tparam TGreater Type of `greater`
|
||||
* @tparam TLess Type of `less`
|
||||
* @return A pair of ints where the first element is the estimated
|
||||
* position of item, and the second is the estimated number
|
||||
* of items that are the same according to `greater`.
|
||||
* of items that are the same according to `less`.
|
||||
*/
|
||||
template <typename TItem, typename TGreater = std::greater<T>>
|
||||
auto position_and_count(const TItem &item, TGreater greater = TGreater{},
|
||||
int position_level_reduction = 10,
|
||||
int count_max_level = 3) {
|
||||
template <typename TItem, typename TLess = std::less<T>>
|
||||
std::pair<size_t, size_t> position_and_count(
|
||||
const TItem &item, TLess less = TLess{},
|
||||
int position_level_reduction = 10, int count_max_level = 3) {
|
||||
// the level at which position will be sought
|
||||
int position_level = std::max(
|
||||
0, static_cast<int>(std::lround(std::log2(skiplist->size()))) -
|
||||
@ -576,8 +576,12 @@ class SkipList : private Lockable<lock_t> {
|
||||
// used for calculating item position
|
||||
int tower_count = 0;
|
||||
|
||||
// on the current height (i) find the last tower
|
||||
// whose value is lesser than item, store it in pred
|
||||
// while succ will be either skiplist end or the
|
||||
// first element greater or equal to item
|
||||
succ = pred->forward(i);
|
||||
while (succ && greater(item, succ->value())) {
|
||||
while (succ && less(succ->value(), item)) {
|
||||
pred = succ;
|
||||
succ = succ->forward(i);
|
||||
tower_count++;
|
||||
@ -585,22 +589,17 @@ class SkipList : private Lockable<lock_t> {
|
||||
|
||||
// in the succs field we'll keep track of successors
|
||||
// that are equal to item, or nullptr otherwise
|
||||
succs[i] = (!succ || greater(succ->value(), item)) ? nullptr : succ;
|
||||
succs[i] = (!succ || less(item, succ->value())) ? nullptr : succ;
|
||||
|
||||
position += (1 << i) * tower_count;
|
||||
}
|
||||
|
||||
// if succ is nullptr, we have the last skiplist element
|
||||
if (succ == nullptr) {
|
||||
// pred now contains the first node whose value <= item
|
||||
// check if we found the item exactly (value == item)
|
||||
bool found = pred != skiplist->header && !greater(item, pred->value());
|
||||
return std::make_pair(position, found ? 1 : 0);
|
||||
}
|
||||
// if succ is nullptr, then item is greater than all elements in the list
|
||||
if (succ == nullptr) return std::make_pair(size(), 0);
|
||||
|
||||
// now we need to estimate the count of elements equal to item
|
||||
// we'll do that by looking for the first element that is greater
|
||||
// then item, and counting how far we have to look
|
||||
// than item, and counting how far we have to look
|
||||
|
||||
// first find the rightmost (highest) succ that has value == item
|
||||
int count_level = 0;
|
||||
@ -617,7 +616,7 @@ class SkipList : private Lockable<lock_t> {
|
||||
int count = 1 << count_level;
|
||||
for (; count_level >= 0; count_level--) {
|
||||
Node *next = succ->forward(count_level);
|
||||
while (next && !greater(next->value(), item)) {
|
||||
while (next && !less(item, next->value())) {
|
||||
succ = next;
|
||||
next = next->forward(count_level);
|
||||
count += 1 << count_level;
|
||||
|
@ -39,20 +39,20 @@ std::unique_ptr<SkipList<int>> make_sl(int size) {
|
||||
* @param size - size of the skiplist to test with
|
||||
* @param iterations - number of iterations of each test.
|
||||
* @param granulation - How many sequential ints should be
|
||||
* considered equal in testing by the custom `greater`
|
||||
* considered equal in testing by the custom `less`
|
||||
* function.
|
||||
*/
|
||||
void test(int size, int iterations = 20, int granulation = 1) {
|
||||
auto greater = [granulation](const int &a, const int &b) {
|
||||
return a / granulation > b / granulation;
|
||||
auto less = [granulation](const int &a, const int &b) {
|
||||
return a / granulation < b / granulation;
|
||||
};
|
||||
log("\nTesting skiplist size {} with granulation {}", size, granulation);
|
||||
|
||||
// test at 1/4, 1/2 and 3/4 points
|
||||
std::vector<int> positions({size / 4, size / 2, size * 3 / 4});
|
||||
std::vector<int> test_positions({size / 4, size / 2, size * 3 / 4});
|
||||
|
||||
std::vector<std::vector<int>> less(3);
|
||||
std::vector<std::vector<int>> equal(3);
|
||||
std::vector<std::vector<int>> position(3);
|
||||
std::vector<std::vector<int>> count(3);
|
||||
std::vector<std::vector<double>> time(3);
|
||||
for (int iteration = 0; iteration < iterations; iteration++) {
|
||||
auto sl = make_sl(size);
|
||||
@ -60,26 +60,26 @@ void test(int size, int iterations = 20, int granulation = 1) {
|
||||
for (auto pos : {0, 1, 2}) {
|
||||
clock_t start_time = clock();
|
||||
auto pos_and_count =
|
||||
sl->access().position_and_count(positions[pos], greater);
|
||||
sl->access().position_and_count(test_positions[pos], less);
|
||||
auto t = double(clock() - start_time) / CLOCKS_PER_SEC;
|
||||
|
||||
less[pos].push_back(pos_and_count.first);
|
||||
equal[pos].push_back(pos_and_count.second);
|
||||
position[pos].push_back(pos_and_count.first);
|
||||
count[pos].push_back(pos_and_count.second);
|
||||
time[pos].push_back(t);
|
||||
}
|
||||
}
|
||||
|
||||
// convert values to errors
|
||||
for (auto pos : {0, 1, 2}) {
|
||||
auto position = positions[pos];
|
||||
log("\tPosition {}", position);
|
||||
for (auto &less_elem : less[pos])
|
||||
less_elem = std::abs(less_elem - position);
|
||||
log("\t\tMean position error: {}", mean(less[pos]));
|
||||
for (auto &equal_elem : equal[pos])
|
||||
equal_elem = std::abs(equal_elem - granulation);
|
||||
log("\t\tMean count error: {}", mean(equal[pos]));
|
||||
log("\t\tMean time (ms): {}", mean(time[pos]) * 1000);
|
||||
for (auto pos_index : {0, 1, 2}) {
|
||||
auto test_position = test_positions[pos_index];
|
||||
log("\tPosition {}", test_position);
|
||||
for (auto &position_elem : position[pos_index])
|
||||
position_elem = std::abs(position_elem - test_position);
|
||||
log("\t\tMean position error: {}", mean(position[pos_index]));
|
||||
for (auto &count_elem : count[pos_index])
|
||||
count_elem = std::abs(count_elem - granulation);
|
||||
log("\t\tMean count error: {}", mean(count[pos_index]));
|
||||
log("\t\tMean time (ms): {}", mean(time[pos_index]) * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
@ -92,7 +92,7 @@ int main(int argc, char *argv[]) {
|
||||
if (argc > 2) iterations = (int)std::stoi(argv[2]);
|
||||
|
||||
std::vector<int> granulations;
|
||||
for (int i = 1 ; i < size ; i *= 100) granulations.push_back(i);
|
||||
for (int i = 1; i < size; i *= 100) granulations.push_back(i);
|
||||
for (auto granulation : granulations) test(size, iterations, granulation);
|
||||
|
||||
return 0;
|
83
tests/unit/skiplist_position_and_count.cpp
Normal file
83
tests/unit/skiplist_position_and_count.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "data_structures/concurrent/skiplist.hpp"
|
||||
#include "utils/assert.hpp"
|
||||
|
||||
/* The following tests validate the SkipList::position_and_count estimation
|
||||
* functionality. That function has a tunable speed vs. accuracy. The tests
|
||||
* here test the absolutely-accurate parameterization, as well as the default
|
||||
* one that should be optimal parametrization. As such the tests are
|
||||
* stochastic and defined to validate generally acceptable behavior in
|
||||
* a vast majority of cases. The probability of test failure due to
|
||||
* stochasticity should be extremely small, but isn't zero.
|
||||
*/
|
||||
|
||||
auto SkiplistRange(int count) {
|
||||
auto sl = std::make_unique<SkipList<int>>();
|
||||
auto access = sl->access();
|
||||
for (int i = 0; i < count; i++) access.insert(i);
|
||||
return sl;
|
||||
}
|
||||
|
||||
auto Median(std::vector<int> &elements) {
|
||||
auto elem_size = elements.size();
|
||||
debug_assert(elem_size > 0, "Provide some elements to get median!");
|
||||
std::sort(elements.begin(), elements.end());
|
||||
if (elem_size % 2)
|
||||
return elements[elem_size / 2];
|
||||
else
|
||||
return (elements[elem_size / 2 - 1] + elements[elem_size / 2]) / 2;
|
||||
}
|
||||
|
||||
auto Less(int granularity) {
|
||||
return [granularity](const int &a, const int &b) {
|
||||
return a / granularity < b / granularity;
|
||||
};
|
||||
}
|
||||
|
||||
#define EXPECT_ABS_POS_COUNT(granularity, position, expected_position, \
|
||||
expected_count) \
|
||||
{ \
|
||||
auto sl = SkiplistRange(10000); \
|
||||
auto position_and_count = \
|
||||
sl->access().position_and_count(position, Less(granularity), 1000, 0); \
|
||||
EXPECT_EQ(position_and_count.first, expected_position); \
|
||||
EXPECT_EQ(position_and_count.second, expected_count); \
|
||||
}
|
||||
|
||||
TEST(SkiplistPosAndCount, AbsoluteAccuracy) {
|
||||
EXPECT_ABS_POS_COUNT(1, 42, 42, 1);
|
||||
EXPECT_ABS_POS_COUNT(3, 42, 42, 3);
|
||||
EXPECT_ABS_POS_COUNT(10, 42, 40, 10);
|
||||
}
|
||||
|
||||
#define EXPECT_POS_COUNT(skiplist_size, position, expected_count, \
|
||||
position_error_margin, count_error_margin) \
|
||||
{ \
|
||||
std::vector<int> pos_errors; \
|
||||
std::vector<int> count_errors; \
|
||||
\
|
||||
for (int i = 0; i < 30; i++) { \
|
||||
auto sl = SkiplistRange(skiplist_size); \
|
||||
auto position_count = sl->access().position_and_count(position); \
|
||||
pos_errors.push_back(std::abs((long)position_count.first - position)); \
|
||||
count_errors.push_back( \
|
||||
std::abs((long)position_count.second - expected_count)); \
|
||||
} \
|
||||
EXPECT_LE(Median(pos_errors), position_error_margin); \
|
||||
EXPECT_LE(Median(count_errors), count_error_margin); \
|
||||
}
|
||||
|
||||
TEST(SkiplistPosAndCount, DefaultSpeedAndAccuracy) {
|
||||
EXPECT_POS_COUNT(5000, 42, 1, 20, 3);
|
||||
EXPECT_POS_COUNT(5000, 2500, 1, 100, 3);
|
||||
EXPECT_POS_COUNT(5000, 4500, 1, 200, 3);
|
||||
|
||||
// for an item greater then all list elements the returned
|
||||
// estimations are always absolutely accurate
|
||||
EXPECT_POS_COUNT(5000, 5000, 0, 0, 0);
|
||||
}
|
Loading…
Reference in New Issue
Block a user