Implement new functions for new SkipList
Summary: Implement find equal or greater Implement estimate count Implement estimate range count Reviewers: teon.banek, msantl Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1804
This commit is contained in:
parent
bd9f900722
commit
ca00575f82
@ -3,6 +3,7 @@
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <experimental/optional>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <random>
|
||||
@ -10,6 +11,7 @@
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include "utils/bound.hpp"
|
||||
#include "utils/linux.hpp"
|
||||
#include "utils/on_scope_exit.hpp"
|
||||
#include "utils/spin_lock.hpp"
|
||||
@ -32,6 +34,13 @@ const uint64_t kSkipListMaxHeight = 32;
|
||||
/// that the garbage collection will be triggered more often.
|
||||
const uint64_t kSkipListGcHeightTrigger = 16;
|
||||
|
||||
/// This is the highest layer that will be used by default for item count
|
||||
/// estimation. It was determined empirically using benchmarks to have an
|
||||
/// optimal trade-off between performance and accuracy. The function will have
|
||||
/// an expected maximum error of less than 20% when the key matches 100k
|
||||
/// elements.
|
||||
const int kSkipListCountEstimateDefaultLayer = 10;
|
||||
|
||||
/// These variables define the storage sizes for the SkipListGc. The internal
|
||||
/// storage of the GC and the Stack storage used within the GC are all
|
||||
/// optimized to have block sizes that are a whole multiple of the memory page
|
||||
@ -596,6 +605,49 @@ class SkipList final {
|
||||
return skiplist_->template find(key);
|
||||
}
|
||||
|
||||
/// Finds the key or the first larger key in the list and returns an
|
||||
/// iterator to the item.
|
||||
///
|
||||
/// @return Iterator to the item in the list, will be equal to `end()` when
|
||||
/// no items match the search
|
||||
template <typename TKey>
|
||||
Iterator find_equal_or_greater(const TKey &key) {
|
||||
return skiplist_->template find_equal_or_greater(key);
|
||||
}
|
||||
|
||||
/// Estimates the number of items that are contained in the list that are
|
||||
/// identical to the key determined using the equality operator. The default
|
||||
/// layer is chosen to optimize duration vs. precision. The lower the layer
|
||||
/// used for estimation the higher the duration of the count operation. If
|
||||
/// you set the maximum layer for estimation to 1 you will get an exact
|
||||
/// count.
|
||||
///
|
||||
/// @return uint64_t estimated count of identical items in the list
|
||||
template <typename TKey>
|
||||
uint64_t estimate_count(const TKey &key,
|
||||
int max_layer_for_estimation =
|
||||
kSkipListCountEstimateDefaultLayer) const {
|
||||
return skiplist_->template estimate_count(key, max_layer_for_estimation);
|
||||
}
|
||||
|
||||
/// Estimates the number of items that are contained in the list that are
|
||||
/// between the lower and upper bounds using the less and equality operator.
|
||||
/// The default layer is chosen to optimize duration vs. precision. The
|
||||
/// lower the layer used for estimation the higher the duration of the count
|
||||
/// operation. If you set the maximum layer for estimation to 1 you will get
|
||||
/// an exact count.
|
||||
///
|
||||
/// @return uint64_t estimated count of items in the range in the list
|
||||
template <typename TKey>
|
||||
uint64_t estimate_range_count(
|
||||
const std::experimental::optional<utils::Bound<TKey>> &lower,
|
||||
const std::experimental::optional<utils::Bound<TKey>> &upper,
|
||||
int max_layer_for_estimation =
|
||||
kSkipListCountEstimateDefaultLayer) const {
|
||||
return skiplist_->template estimate_range_count(lower, upper,
|
||||
max_layer_for_estimation);
|
||||
}
|
||||
|
||||
/// Removes the key from the list.
|
||||
///
|
||||
/// @return bool indicating whether the removal was successful
|
||||
@ -664,6 +716,28 @@ class SkipList final {
|
||||
return skiplist_->template find(key);
|
||||
}
|
||||
|
||||
template <typename TKey>
|
||||
ConstIterator find_equal_or_greater(const TKey &key) const {
|
||||
return skiplist_->template find_equal_or_greater(key);
|
||||
}
|
||||
|
||||
template <typename TKey>
|
||||
uint64_t estimate_count(const TKey &key,
|
||||
int max_layer_for_estimation =
|
||||
kSkipListCountEstimateDefaultLayer) const {
|
||||
return skiplist_->template estimate_count(key, max_layer_for_estimation);
|
||||
}
|
||||
|
||||
template <typename TKey>
|
||||
uint64_t estimate_range_count(
|
||||
const std::experimental::optional<utils::Bound<TKey>> &lower,
|
||||
const std::experimental::optional<utils::Bound<TKey>> &upper,
|
||||
int max_layer_for_estimation =
|
||||
kSkipListCountEstimateDefaultLayer) const {
|
||||
return skiplist_->template estimate_range_count(lower, upper,
|
||||
max_layer_for_estimation);
|
||||
}
|
||||
|
||||
uint64_t size() const { return skiplist_->size(); }
|
||||
|
||||
private:
|
||||
@ -825,6 +899,121 @@ class SkipList final {
|
||||
return Iterator{nullptr};
|
||||
}
|
||||
|
||||
template <typename TKey>
|
||||
Iterator find_equal_or_greater(const TKey &key) const {
|
||||
TNode *preds[kSkipListMaxHeight], *succs[kSkipListMaxHeight];
|
||||
find_node(key, preds, succs);
|
||||
if (succs[0] && succs[0]->fully_linked.load(std::memory_order_relaxed) &&
|
||||
!succs[0]->marked.load(std::memory_order_relaxed)) {
|
||||
return Iterator{succs[0]};
|
||||
}
|
||||
return Iterator{nullptr};
|
||||
}
|
||||
|
||||
template <typename TKey>
|
||||
uint64_t estimate_count(const TKey &key, int max_layer_for_estimation) const {
|
||||
CHECK(max_layer_for_estimation >= 1 &&
|
||||
max_layer_for_estimation <= kSkipListMaxHeight)
|
||||
<< "Invalid layer for SkipList count estimation!";
|
||||
|
||||
TNode *preds[kSkipListMaxHeight], *succs[kSkipListMaxHeight];
|
||||
int layer_found = find_node(key, preds, succs);
|
||||
if (layer_found == -1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t count = 0;
|
||||
TNode *pred = preds[layer_found];
|
||||
for (int layer = std::min(layer_found, max_layer_for_estimation - 1);
|
||||
layer >= 0; --layer) {
|
||||
uint64_t nodes_traversed = 0;
|
||||
TNode *curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
while (curr != nullptr && curr->obj < key) {
|
||||
pred = curr;
|
||||
curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
}
|
||||
while (curr != nullptr && curr->obj == key) {
|
||||
pred = curr;
|
||||
curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
++nodes_traversed;
|
||||
}
|
||||
// Here we assume that the list is perfectly balanced and that each upper
|
||||
// layer will have two times less items than the layer below it.
|
||||
count += (1ULL << layer) * nodes_traversed;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
template <typename TKey>
|
||||
uint64_t estimate_range_count(
|
||||
const std::experimental::optional<utils::Bound<TKey>> &lower,
|
||||
const std::experimental::optional<utils::Bound<TKey>> &upper,
|
||||
int max_layer_for_estimation) const {
|
||||
CHECK(max_layer_for_estimation >= 1 &&
|
||||
max_layer_for_estimation <= kSkipListMaxHeight)
|
||||
<< "Invalid layer for SkipList count estimation!";
|
||||
|
||||
TNode *preds[kSkipListMaxHeight], *succs[kSkipListMaxHeight];
|
||||
int layer_found = -1;
|
||||
if (lower) {
|
||||
layer_found = find_node(lower->value(), preds, succs);
|
||||
} else {
|
||||
for (int i = 0; i < kSkipListMaxHeight; ++i) {
|
||||
preds[i] = head_;
|
||||
}
|
||||
layer_found = kSkipListMaxHeight - 1;
|
||||
}
|
||||
if (layer_found == -1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t count = 0;
|
||||
TNode *pred = preds[layer_found];
|
||||
for (int layer = std::min(layer_found, max_layer_for_estimation - 1);
|
||||
layer >= 0; --layer) {
|
||||
uint64_t nodes_traversed = 0;
|
||||
TNode *curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
if (lower) {
|
||||
while (curr != nullptr && curr->obj < lower->value()) {
|
||||
pred = curr;
|
||||
curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
}
|
||||
if (lower->IsExclusive()) {
|
||||
while (curr != nullptr && curr->obj == lower->value()) {
|
||||
pred = curr;
|
||||
curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (upper) {
|
||||
while (curr != nullptr && curr->obj < upper->value()) {
|
||||
pred = curr;
|
||||
curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
++nodes_traversed;
|
||||
}
|
||||
if (upper->IsInclusive()) {
|
||||
while (curr != nullptr && curr->obj == upper->value()) {
|
||||
pred = curr;
|
||||
curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
++nodes_traversed;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while (curr != nullptr) {
|
||||
pred = curr;
|
||||
curr = pred->nexts[layer].load(std::memory_order_relaxed);
|
||||
++nodes_traversed;
|
||||
}
|
||||
}
|
||||
// Here we assume that the list is perfectly balanced and that each upper
|
||||
// layer will have two times less items than the layer below it.
|
||||
count += (1ULL << layer) * nodes_traversed;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
bool ok_to_delete(TNode *candidate, int layer_found) {
|
||||
// The paper has an incorrect check here. It expects the `layer_found`
|
||||
// variable to be 1-indexed, but in fact it is 0-indexed.
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include "utils/skip_list.hpp"
|
||||
#include "utils/timer.hpp"
|
||||
|
||||
TEST(SkipList, Int) {
|
||||
utils::SkipList<int64_t> list;
|
||||
@ -389,3 +390,222 @@ TEST(SkipList, Inception) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SkipList, FindEqualOrGreater) {
|
||||
utils::SkipList<uint64_t> list;
|
||||
|
||||
{
|
||||
auto acc = list.access();
|
||||
for (uint64_t i = 1000; i < 2000; i += 2) {
|
||||
auto ret = acc.insert(i);
|
||||
ASSERT_NE(ret.first, acc.end());
|
||||
ASSERT_EQ(*ret.first, i);
|
||||
ASSERT_TRUE(ret.second);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto acc = list.access();
|
||||
for (uint64_t i = 0; i < 1000; ++i) {
|
||||
auto it = acc.find_equal_or_greater(i);
|
||||
ASSERT_NE(it, acc.end());
|
||||
ASSERT_EQ(*it, 1000);
|
||||
}
|
||||
for (uint64_t i = 1000; i < 1999; ++i) {
|
||||
auto it = acc.find_equal_or_greater(i);
|
||||
ASSERT_NE(it, acc.end());
|
||||
ASSERT_EQ(*it, i + (i % 2 == 0 ? 0 : 1));
|
||||
}
|
||||
for (uint64_t i = 1999; i < 3000; ++i) {
|
||||
auto it = acc.find_equal_or_greater(i);
|
||||
ASSERT_EQ(it, acc.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Counter {
|
||||
int64_t key;
|
||||
int64_t value;
|
||||
};
|
||||
|
||||
bool operator==(const Counter &a, const Counter &b) {
|
||||
return a.key == b.key && a.value == b.value;
|
||||
}
|
||||
bool operator<(const Counter &a, const Counter &b) {
|
||||
if (a.key == b.key) return a.value < b.value;
|
||||
return a.key < b.key;
|
||||
}
|
||||
bool operator==(const Counter &a, int64_t b) { return a.key == b; }
|
||||
bool operator<(const Counter &a, int64_t b) { return a.key < b; }
|
||||
|
||||
TEST(SkipList, EstimateCount) {
|
||||
utils::SkipList<Counter> list;
|
||||
|
||||
// 100k elements will yield an expected maximum height of 17
|
||||
const int kMaxElements = 100;
|
||||
const int kElementMembers = 1000;
|
||||
|
||||
{
|
||||
auto acc = list.access();
|
||||
for (int64_t i = 0; i < kMaxElements; ++i) {
|
||||
for (int64_t j = 0; j < kElementMembers; ++j) {
|
||||
auto ret = acc.insert({i, j});
|
||||
ASSERT_NE(ret.first, acc.end());
|
||||
ASSERT_EQ(ret.first->key, i);
|
||||
ASSERT_EQ(ret.first->value, j);
|
||||
ASSERT_TRUE(ret.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
uint64_t delta_min = std::numeric_limits<uint64_t>::max(), delta_max = 0,
|
||||
delta_avg = 0;
|
||||
auto acc = list.access();
|
||||
utils::Timer timer;
|
||||
for (int64_t i = 0; i < kMaxElements; ++i) {
|
||||
uint64_t count = acc.estimate_count(i);
|
||||
uint64_t delta = count >= kElementMembers ? count - kElementMembers
|
||||
: kElementMembers - count;
|
||||
delta_min = std::min(delta_min, delta);
|
||||
delta_max = std::max(delta_max, delta);
|
||||
delta_avg += delta;
|
||||
}
|
||||
auto duration = timer.Elapsed().count();
|
||||
|
||||
delta_avg /= kMaxElements;
|
||||
|
||||
std::cout << "Results for estimation from default layer:" << std::endl;
|
||||
std::cout << " min(delta) = " << delta_min << std::endl;
|
||||
std::cout << " avg(delta) = " << delta_avg << std::endl;
|
||||
std::cout << " max(delta) = " << delta_max << std::endl;
|
||||
std::cout << " duration = " << duration << " s" << std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
auto acc = list.access();
|
||||
for (int64_t i = 0; i < kMaxElements; ++i) {
|
||||
uint64_t count = acc.estimate_count(i, 1);
|
||||
ASSERT_EQ(count, kElementMembers);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define MAKE_RANGE_BOTH_DEFINED_TEST(lower, upper) \
|
||||
{ \
|
||||
for (int64_t i = 0; i < 10; ++i) { \
|
||||
for (int64_t j = 0; j < 10; ++j) { \
|
||||
auto acc = list.access(); \
|
||||
uint64_t blocks = 0; \
|
||||
if (utils::BoundType::lower == utils::BoundType::EXCLUSIVE && \
|
||||
utils::BoundType::upper == utils::BoundType::EXCLUSIVE) { \
|
||||
if (j > i) { \
|
||||
blocks = j - i - 1; \
|
||||
} \
|
||||
} else { \
|
||||
if (j >= i) { \
|
||||
blocks = j - i; \
|
||||
if (utils::BoundType::lower == utils::BoundType::INCLUSIVE && \
|
||||
utils::BoundType::upper == utils::BoundType::INCLUSIVE) { \
|
||||
++blocks; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
uint64_t count = acc.estimate_range_count<int64_t>( \
|
||||
{{i, utils::BoundType::lower}}, {{j, utils::BoundType::upper}}, \
|
||||
1); \
|
||||
ASSERT_EQ(count, kElementMembers *blocks); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MAKE_RANGE_LOWER_INFINITY_TEST(upper_value, upper_type, blocks) \
|
||||
{ \
|
||||
auto acc = list.access(); \
|
||||
uint64_t count = acc.estimate_range_count<int64_t>( \
|
||||
std::experimental::nullopt, \
|
||||
{{upper_value, utils::BoundType::upper_type}}, 1); \
|
||||
ASSERT_EQ(count, kElementMembers *blocks); \
|
||||
}
|
||||
|
||||
#define MAKE_RANGE_UPPER_INFINITY_TEST(lower_value, lower_type, blocks) \
|
||||
{ \
|
||||
auto acc = list.access(); \
|
||||
uint64_t count = acc.estimate_range_count<int64_t>( \
|
||||
{{lower_value, utils::BoundType::lower_type}}, \
|
||||
std::experimental::nullopt, 1); \
|
||||
ASSERT_EQ(count, kElementMembers *blocks); \
|
||||
}
|
||||
|
||||
TEST(SkipList, EstimateRangeCount) {
|
||||
utils::SkipList<Counter> list;
|
||||
|
||||
// 100k elements will yield an expected maximum height of 17
|
||||
const int kMaxElements = 100;
|
||||
const int kElementMembers = 1000;
|
||||
|
||||
{
|
||||
auto acc = list.access();
|
||||
for (int64_t i = 0; i < kMaxElements; ++i) {
|
||||
for (int64_t j = 0; j < kElementMembers; ++j) {
|
||||
auto ret = acc.insert({i, j});
|
||||
ASSERT_NE(ret.first, acc.end());
|
||||
ASSERT_EQ(ret.first->key, i);
|
||||
ASSERT_EQ(ret.first->value, j);
|
||||
ASSERT_TRUE(ret.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
uint64_t delta_min = std::numeric_limits<uint64_t>::max(), delta_max = 0,
|
||||
delta_avg = 0;
|
||||
auto acc = list.access();
|
||||
utils::Timer timer;
|
||||
for (int64_t i = 0; i < kMaxElements; ++i) {
|
||||
uint64_t count = acc.estimate_range_count<int64_t>(
|
||||
std::experimental::nullopt, {{i, utils::BoundType::INCLUSIVE}});
|
||||
uint64_t must_have = kElementMembers * (i + 1);
|
||||
uint64_t delta =
|
||||
count >= must_have ? count - must_have : must_have - count;
|
||||
delta_min = std::min(delta_min, delta);
|
||||
delta_max = std::max(delta_max, delta);
|
||||
delta_avg += delta;
|
||||
}
|
||||
auto duration = timer.Elapsed().count();
|
||||
|
||||
delta_avg /= kMaxElements;
|
||||
|
||||
std::cout << "Results for estimation from default layer:" << std::endl;
|
||||
std::cout << " min(delta) = " << delta_min << std::endl;
|
||||
std::cout << " avg(delta) = " << delta_avg << std::endl;
|
||||
std::cout << " max(delta) = " << delta_max << std::endl;
|
||||
std::cout << " duration = " << duration << " s" << std::endl;
|
||||
}
|
||||
|
||||
MAKE_RANGE_BOTH_DEFINED_TEST(INCLUSIVE, INCLUSIVE);
|
||||
MAKE_RANGE_BOTH_DEFINED_TEST(INCLUSIVE, EXCLUSIVE);
|
||||
MAKE_RANGE_BOTH_DEFINED_TEST(EXCLUSIVE, INCLUSIVE);
|
||||
MAKE_RANGE_BOTH_DEFINED_TEST(EXCLUSIVE, EXCLUSIVE);
|
||||
|
||||
MAKE_RANGE_LOWER_INFINITY_TEST(10, INCLUSIVE, 11);
|
||||
MAKE_RANGE_LOWER_INFINITY_TEST(10, EXCLUSIVE, 10);
|
||||
MAKE_RANGE_LOWER_INFINITY_TEST(0, INCLUSIVE, 1);
|
||||
MAKE_RANGE_LOWER_INFINITY_TEST(0, EXCLUSIVE, 0);
|
||||
MAKE_RANGE_LOWER_INFINITY_TEST(-10, INCLUSIVE, 0);
|
||||
MAKE_RANGE_LOWER_INFINITY_TEST(-10, EXCLUSIVE, 0);
|
||||
|
||||
MAKE_RANGE_UPPER_INFINITY_TEST(89, INCLUSIVE, 11);
|
||||
MAKE_RANGE_UPPER_INFINITY_TEST(89, EXCLUSIVE, 10);
|
||||
MAKE_RANGE_UPPER_INFINITY_TEST(99, INCLUSIVE, 1);
|
||||
MAKE_RANGE_UPPER_INFINITY_TEST(99, EXCLUSIVE, 0);
|
||||
MAKE_RANGE_UPPER_INFINITY_TEST(109, INCLUSIVE, 0);
|
||||
MAKE_RANGE_UPPER_INFINITY_TEST(109, EXCLUSIVE, 0);
|
||||
|
||||
{
|
||||
auto acc = list.access();
|
||||
uint64_t count = acc.estimate_range_count<int64_t>(
|
||||
std::experimental::nullopt, std::experimental::nullopt, 1);
|
||||
ASSERT_EQ(count, kMaxElements * kElementMembers);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user