Implement new functions for new SkipList

Summary:
Implement find equal or greater
Implement estimate count
Implement estimate range count

Reviewers: teon.banek, msantl

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1804
This commit is contained in:
Matej Ferencevic 2019-01-15 15:09:56 +01:00
parent bd9f900722
commit ca00575f82
2 changed files with 409 additions and 0 deletions
src/utils
tests/unit

View File

@ -3,6 +3,7 @@
#include <atomic>
#include <cstdint>
#include <cstdlib>
#include <experimental/optional>
#include <limits>
#include <mutex>
#include <random>
@ -10,6 +11,7 @@
#include <glog/logging.h>
#include "utils/bound.hpp"
#include "utils/linux.hpp"
#include "utils/on_scope_exit.hpp"
#include "utils/spin_lock.hpp"
@ -32,6 +34,13 @@ const uint64_t kSkipListMaxHeight = 32;
/// that the garbage collection will be triggered more often.
const uint64_t kSkipListGcHeightTrigger = 16;
/// This is the highest layer that will be used by default for item count
/// estimation. It was determined empirically using benchmarks to have an
/// optimal trade-off between performance and accuracy. The function will have
/// an expected maximum error of less than 20% when the key matches 100k
/// elements.
const int kSkipListCountEstimateDefaultLayer = 10;
/// These variables define the storage sizes for the SkipListGc. The internal
/// storage of the GC and the Stack storage used within the GC are all
/// optimized to have block sizes that are a whole multiple of the memory page
@ -596,6 +605,49 @@ class SkipList final {
return skiplist_->template find(key);
}
/// Finds the key or the first larger key in the list and returns an
/// iterator to the item.
///
/// @return Iterator to the item in the list, will be equal to `end()` when
/// no items match the search
template <typename TKey>
Iterator find_equal_or_greater(const TKey &key) {
return skiplist_->template find_equal_or_greater(key);
}
/// Estimates the number of items that are contained in the list that are
/// identical to the key determined using the equality operator. The default
/// layer is chosen to optimize duration vs. precision. The lower the layer
/// used for estimation the higher the duration of the count operation. If
/// you set the maximum layer for estimation to 1 you will get an exact
/// count.
///
/// @return uint64_t estimated count of identical items in the list
template <typename TKey>
uint64_t estimate_count(const TKey &key,
int max_layer_for_estimation =
kSkipListCountEstimateDefaultLayer) const {
return skiplist_->template estimate_count(key, max_layer_for_estimation);
}
/// Estimates the number of items that are contained in the list that are
/// between the lower and upper bounds using the less and equality operator.
/// The default layer is chosen to optimize duration vs. precision. The
/// lower the layer used for estimation the higher the duration of the count
/// operation. If you set the maximum layer for estimation to 1 you will get
/// an exact count.
///
/// @return uint64_t estimated count of items in the range in the list
template <typename TKey>
uint64_t estimate_range_count(
const std::experimental::optional<utils::Bound<TKey>> &lower,
const std::experimental::optional<utils::Bound<TKey>> &upper,
int max_layer_for_estimation =
kSkipListCountEstimateDefaultLayer) const {
return skiplist_->template estimate_range_count(lower, upper,
max_layer_for_estimation);
}
/// Removes the key from the list.
///
/// @return bool indicating whether the removal was successful
@ -664,6 +716,28 @@ class SkipList final {
return skiplist_->template find(key);
}
template <typename TKey>
ConstIterator find_equal_or_greater(const TKey &key) const {
return skiplist_->template find_equal_or_greater(key);
}
template <typename TKey>
uint64_t estimate_count(const TKey &key,
int max_layer_for_estimation =
kSkipListCountEstimateDefaultLayer) const {
return skiplist_->template estimate_count(key, max_layer_for_estimation);
}
template <typename TKey>
uint64_t estimate_range_count(
const std::experimental::optional<utils::Bound<TKey>> &lower,
const std::experimental::optional<utils::Bound<TKey>> &upper,
int max_layer_for_estimation =
kSkipListCountEstimateDefaultLayer) const {
return skiplist_->template estimate_range_count(lower, upper,
max_layer_for_estimation);
}
uint64_t size() const { return skiplist_->size(); }
private:
@ -825,6 +899,121 @@ class SkipList final {
return Iterator{nullptr};
}
template <typename TKey>
Iterator find_equal_or_greater(const TKey &key) const {
TNode *preds[kSkipListMaxHeight], *succs[kSkipListMaxHeight];
find_node(key, preds, succs);
if (succs[0] && succs[0]->fully_linked.load(std::memory_order_relaxed) &&
!succs[0]->marked.load(std::memory_order_relaxed)) {
return Iterator{succs[0]};
}
return Iterator{nullptr};
}
template <typename TKey>
uint64_t estimate_count(const TKey &key, int max_layer_for_estimation) const {
CHECK(max_layer_for_estimation >= 1 &&
max_layer_for_estimation <= kSkipListMaxHeight)
<< "Invalid layer for SkipList count estimation!";
TNode *preds[kSkipListMaxHeight], *succs[kSkipListMaxHeight];
int layer_found = find_node(key, preds, succs);
if (layer_found == -1) {
return 0;
}
uint64_t count = 0;
TNode *pred = preds[layer_found];
for (int layer = std::min(layer_found, max_layer_for_estimation - 1);
layer >= 0; --layer) {
uint64_t nodes_traversed = 0;
TNode *curr = pred->nexts[layer].load(std::memory_order_relaxed);
while (curr != nullptr && curr->obj < key) {
pred = curr;
curr = pred->nexts[layer].load(std::memory_order_relaxed);
}
while (curr != nullptr && curr->obj == key) {
pred = curr;
curr = pred->nexts[layer].load(std::memory_order_relaxed);
++nodes_traversed;
}
// Here we assume that the list is perfectly balanced and that each upper
// layer will have two times less items than the layer below it.
count += (1ULL << layer) * nodes_traversed;
}
return count;
}
template <typename TKey>
uint64_t estimate_range_count(
const std::experimental::optional<utils::Bound<TKey>> &lower,
const std::experimental::optional<utils::Bound<TKey>> &upper,
int max_layer_for_estimation) const {
CHECK(max_layer_for_estimation >= 1 &&
max_layer_for_estimation <= kSkipListMaxHeight)
<< "Invalid layer for SkipList count estimation!";
TNode *preds[kSkipListMaxHeight], *succs[kSkipListMaxHeight];
int layer_found = -1;
if (lower) {
layer_found = find_node(lower->value(), preds, succs);
} else {
for (int i = 0; i < kSkipListMaxHeight; ++i) {
preds[i] = head_;
}
layer_found = kSkipListMaxHeight - 1;
}
if (layer_found == -1) {
return 0;
}
uint64_t count = 0;
TNode *pred = preds[layer_found];
for (int layer = std::min(layer_found, max_layer_for_estimation - 1);
layer >= 0; --layer) {
uint64_t nodes_traversed = 0;
TNode *curr = pred->nexts[layer].load(std::memory_order_relaxed);
if (lower) {
while (curr != nullptr && curr->obj < lower->value()) {
pred = curr;
curr = pred->nexts[layer].load(std::memory_order_relaxed);
}
if (lower->IsExclusive()) {
while (curr != nullptr && curr->obj == lower->value()) {
pred = curr;
curr = pred->nexts[layer].load(std::memory_order_relaxed);
}
}
}
if (upper) {
while (curr != nullptr && curr->obj < upper->value()) {
pred = curr;
curr = pred->nexts[layer].load(std::memory_order_relaxed);
++nodes_traversed;
}
if (upper->IsInclusive()) {
while (curr != nullptr && curr->obj == upper->value()) {
pred = curr;
curr = pred->nexts[layer].load(std::memory_order_relaxed);
++nodes_traversed;
}
}
} else {
while (curr != nullptr) {
pred = curr;
curr = pred->nexts[layer].load(std::memory_order_relaxed);
++nodes_traversed;
}
}
// Here we assume that the list is perfectly balanced and that each upper
// layer will have two times less items than the layer below it.
count += (1ULL << layer) * nodes_traversed;
}
return count;
}
bool ok_to_delete(TNode *candidate, int layer_found) {
// The paper has an incorrect check here. It expects the `layer_found`
// variable to be 1-indexed, but in fact it is 0-indexed.

View File

@ -6,6 +6,7 @@
#include <glog/logging.h>
#include "utils/skip_list.hpp"
#include "utils/timer.hpp"
TEST(SkipList, Int) {
utils::SkipList<int64_t> list;
@ -389,3 +390,222 @@ TEST(SkipList, Inception) {
}
}
}
TEST(SkipList, FindEqualOrGreater) {
utils::SkipList<uint64_t> list;
{
auto acc = list.access();
for (uint64_t i = 1000; i < 2000; i += 2) {
auto ret = acc.insert(i);
ASSERT_NE(ret.first, acc.end());
ASSERT_EQ(*ret.first, i);
ASSERT_TRUE(ret.second);
}
}
{
auto acc = list.access();
for (uint64_t i = 0; i < 1000; ++i) {
auto it = acc.find_equal_or_greater(i);
ASSERT_NE(it, acc.end());
ASSERT_EQ(*it, 1000);
}
for (uint64_t i = 1000; i < 1999; ++i) {
auto it = acc.find_equal_or_greater(i);
ASSERT_NE(it, acc.end());
ASSERT_EQ(*it, i + (i % 2 == 0 ? 0 : 1));
}
for (uint64_t i = 1999; i < 3000; ++i) {
auto it = acc.find_equal_or_greater(i);
ASSERT_EQ(it, acc.end());
}
}
}
struct Counter {
int64_t key;
int64_t value;
};
bool operator==(const Counter &a, const Counter &b) {
return a.key == b.key && a.value == b.value;
}
bool operator<(const Counter &a, const Counter &b) {
if (a.key == b.key) return a.value < b.value;
return a.key < b.key;
}
bool operator==(const Counter &a, int64_t b) { return a.key == b; }
bool operator<(const Counter &a, int64_t b) { return a.key < b; }
TEST(SkipList, EstimateCount) {
utils::SkipList<Counter> list;
// 100k elements will yield an expected maximum height of 17
const int kMaxElements = 100;
const int kElementMembers = 1000;
{
auto acc = list.access();
for (int64_t i = 0; i < kMaxElements; ++i) {
for (int64_t j = 0; j < kElementMembers; ++j) {
auto ret = acc.insert({i, j});
ASSERT_NE(ret.first, acc.end());
ASSERT_EQ(ret.first->key, i);
ASSERT_EQ(ret.first->value, j);
ASSERT_TRUE(ret.second);
}
}
}
{
uint64_t delta_min = std::numeric_limits<uint64_t>::max(), delta_max = 0,
delta_avg = 0;
auto acc = list.access();
utils::Timer timer;
for (int64_t i = 0; i < kMaxElements; ++i) {
uint64_t count = acc.estimate_count(i);
uint64_t delta = count >= kElementMembers ? count - kElementMembers
: kElementMembers - count;
delta_min = std::min(delta_min, delta);
delta_max = std::max(delta_max, delta);
delta_avg += delta;
}
auto duration = timer.Elapsed().count();
delta_avg /= kMaxElements;
std::cout << "Results for estimation from default layer:" << std::endl;
std::cout << " min(delta) = " << delta_min << std::endl;
std::cout << " avg(delta) = " << delta_avg << std::endl;
std::cout << " max(delta) = " << delta_max << std::endl;
std::cout << " duration = " << duration << " s" << std::endl;
}
{
auto acc = list.access();
for (int64_t i = 0; i < kMaxElements; ++i) {
uint64_t count = acc.estimate_count(i, 1);
ASSERT_EQ(count, kElementMembers);
}
}
}
#define MAKE_RANGE_BOTH_DEFINED_TEST(lower, upper) \
{ \
for (int64_t i = 0; i < 10; ++i) { \
for (int64_t j = 0; j < 10; ++j) { \
auto acc = list.access(); \
uint64_t blocks = 0; \
if (utils::BoundType::lower == utils::BoundType::EXCLUSIVE && \
utils::BoundType::upper == utils::BoundType::EXCLUSIVE) { \
if (j > i) { \
blocks = j - i - 1; \
} \
} else { \
if (j >= i) { \
blocks = j - i; \
if (utils::BoundType::lower == utils::BoundType::INCLUSIVE && \
utils::BoundType::upper == utils::BoundType::INCLUSIVE) { \
++blocks; \
} \
} \
} \
uint64_t count = acc.estimate_range_count<int64_t>( \
{{i, utils::BoundType::lower}}, {{j, utils::BoundType::upper}}, \
1); \
ASSERT_EQ(count, kElementMembers *blocks); \
} \
} \
}
#define MAKE_RANGE_LOWER_INFINITY_TEST(upper_value, upper_type, blocks) \
{ \
auto acc = list.access(); \
uint64_t count = acc.estimate_range_count<int64_t>( \
std::experimental::nullopt, \
{{upper_value, utils::BoundType::upper_type}}, 1); \
ASSERT_EQ(count, kElementMembers *blocks); \
}
#define MAKE_RANGE_UPPER_INFINITY_TEST(lower_value, lower_type, blocks) \
{ \
auto acc = list.access(); \
uint64_t count = acc.estimate_range_count<int64_t>( \
{{lower_value, utils::BoundType::lower_type}}, \
std::experimental::nullopt, 1); \
ASSERT_EQ(count, kElementMembers *blocks); \
}
TEST(SkipList, EstimateRangeCount) {
utils::SkipList<Counter> list;
// 100k elements will yield an expected maximum height of 17
const int kMaxElements = 100;
const int kElementMembers = 1000;
{
auto acc = list.access();
for (int64_t i = 0; i < kMaxElements; ++i) {
for (int64_t j = 0; j < kElementMembers; ++j) {
auto ret = acc.insert({i, j});
ASSERT_NE(ret.first, acc.end());
ASSERT_EQ(ret.first->key, i);
ASSERT_EQ(ret.first->value, j);
ASSERT_TRUE(ret.second);
}
}
}
{
uint64_t delta_min = std::numeric_limits<uint64_t>::max(), delta_max = 0,
delta_avg = 0;
auto acc = list.access();
utils::Timer timer;
for (int64_t i = 0; i < kMaxElements; ++i) {
uint64_t count = acc.estimate_range_count<int64_t>(
std::experimental::nullopt, {{i, utils::BoundType::INCLUSIVE}});
uint64_t must_have = kElementMembers * (i + 1);
uint64_t delta =
count >= must_have ? count - must_have : must_have - count;
delta_min = std::min(delta_min, delta);
delta_max = std::max(delta_max, delta);
delta_avg += delta;
}
auto duration = timer.Elapsed().count();
delta_avg /= kMaxElements;
std::cout << "Results for estimation from default layer:" << std::endl;
std::cout << " min(delta) = " << delta_min << std::endl;
std::cout << " avg(delta) = " << delta_avg << std::endl;
std::cout << " max(delta) = " << delta_max << std::endl;
std::cout << " duration = " << duration << " s" << std::endl;
}
MAKE_RANGE_BOTH_DEFINED_TEST(INCLUSIVE, INCLUSIVE);
MAKE_RANGE_BOTH_DEFINED_TEST(INCLUSIVE, EXCLUSIVE);
MAKE_RANGE_BOTH_DEFINED_TEST(EXCLUSIVE, INCLUSIVE);
MAKE_RANGE_BOTH_DEFINED_TEST(EXCLUSIVE, EXCLUSIVE);
MAKE_RANGE_LOWER_INFINITY_TEST(10, INCLUSIVE, 11);
MAKE_RANGE_LOWER_INFINITY_TEST(10, EXCLUSIVE, 10);
MAKE_RANGE_LOWER_INFINITY_TEST(0, INCLUSIVE, 1);
MAKE_RANGE_LOWER_INFINITY_TEST(0, EXCLUSIVE, 0);
MAKE_RANGE_LOWER_INFINITY_TEST(-10, INCLUSIVE, 0);
MAKE_RANGE_LOWER_INFINITY_TEST(-10, EXCLUSIVE, 0);
MAKE_RANGE_UPPER_INFINITY_TEST(89, INCLUSIVE, 11);
MAKE_RANGE_UPPER_INFINITY_TEST(89, EXCLUSIVE, 10);
MAKE_RANGE_UPPER_INFINITY_TEST(99, INCLUSIVE, 1);
MAKE_RANGE_UPPER_INFINITY_TEST(99, EXCLUSIVE, 0);
MAKE_RANGE_UPPER_INFINITY_TEST(109, INCLUSIVE, 0);
MAKE_RANGE_UPPER_INFINITY_TEST(109, EXCLUSIVE, 0);
{
auto acc = list.access();
uint64_t count = acc.estimate_range_count<int64_t>(
std::experimental::nullopt, std::experimental::nullopt, 1);
ASSERT_EQ(count, kMaxElements * kElementMembers);
}
}