Use MemoryResource in SkipList

Summary: This is a preparation step in case we want to have a custom allocator in SkipList. For example, pool based allocator for SkipListNode. Introduction of MemoryResource and removal of `calloc` has reduced the performance a bit according to micro benchmarks. This performance hit is not visible on benchmarks which do more concurrent operations. Reviewers: mferencevic, mtomic Reviewed By: mferencevic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2140
2019-06-18 14:51:28 +02:00 · 2019-06-18 14:51:28 +02:00 · a417ef36f1
commit a417ef36f1
parent b1f7bbf051
2 changed files with 74 additions and 29 deletions
--- a/src/utils/skip_list.hpp
+++ b/src/utils/skip_list.hpp
@ -13,6 +13,7 @@

 #include "utils/bound.hpp"
 #include "utils/linux.hpp"
+#include "utils/memory.hpp"
 #include "utils/on_scope_exit.hpp"
 #include "utils/spin_lock.hpp"
 #include "utils/stack.hpp"
@ -82,11 +83,28 @@ struct SkipListNode {
  SpinLock lock;
  std::atomic<bool> marked;
  std::atomic<bool> fully_linked;
+  static_assert(std::numeric_limits<uint8_t>::max() >= kSkipListMaxHeight,
+                "Maximum height doesn't fit in uint8_t");
  uint8_t height;
  // uint8_t PAD;
  std::atomic<SkipListNode<TObj> *> nexts[0];
 };

+/// Maximum size of a single SkipListNode instance.
+///
+/// This can be used to tune the pool allocator for SkipListNode instances.
+template <typename TObj>
+constexpr size_t MaxSkipListNodeSize() {
+  return sizeof(SkipListNode<TObj>) +
+         kSkipListMaxHeight * sizeof(std::atomic<SkipListNode<TObj> *>);
+}
+
+/// Get the size in bytes of the given SkipListNode instance.
+template <typename TObj>
+size_t SkipListNodeSize(const SkipListNode<TObj> &node) {
+  return sizeof(node) + node.height * sizeof(std::atomic<SkipListNode<TObj> *>);
+}
+
 /// The skip list doesn't have built-in reclamation of removed nodes (objects).
 /// This class handles all operations necessary to remove the nodes safely.
 ///
@ -133,7 +151,13 @@ class SkipListGc final {
    std::lock_guard<SpinLock> guard(lock_);
    Block *curr_head = head_.load(std::memory_order_acquire);
    if (curr_head == head) {
-      Block *block = new (calloc(1, sizeof(Block))) Block();
+      // Construct through allocator so it propagates if needed.
+      Allocator<Block> block_allocator(memory_);
+      Block *block = block_allocator.allocate(1);
+      // `calloc` would be faster, but the API has no such call.
+      memset(block, 0, sizeof(Block));
+      // Block constructor should not throw.
+      block_allocator.construct(block);
      block->prev.store(curr_head, std::memory_order_release);
      block->succ.store(nullptr, std::memory_order_release);
      block->first_id = last_id_;
@ -151,7 +175,7 @@ class SkipListGc final {
  }

 public:
-  SkipListGc() {
+  explicit SkipListGc(MemoryResource *memory) : memory_(memory) {
    static_assert(sizeof(Block) % kLinuxPageSize == 0,
                  "It is recommended that you set the kSkipListGcBlockSize "
                  "constant so that the size of SkipListGc::Block is a "
@ -166,15 +190,17 @@ class SkipListGc final {
  ~SkipListGc() {
    Block *head = head_.load(std::memory_order_acquire);
    while (head != nullptr) {
+      Allocator<Block> block_allocator(memory_);
      Block *prev = head->prev.load(std::memory_order_acquire);
      head->~Block();
-      free(head);
+      block_allocator.deallocate(head, 1);
      head = prev;
    }
    std::optional<TDeleted> item;
    while ((item = deleted_.Pop())) {
+      size_t bytes = SkipListNodeSize(*item->second);
      item->second->~TNode();
-      free(item->second);
+      memory_->Deallocate(item->second, bytes);
    }
  }

@ -256,13 +282,14 @@ class SkipListGc final {
      // thread doesn't have a pointer to the block that it got from reading
      // `head_`. We bail out here, this block will be freed next time.
      if (remove_block && next != nullptr) {
+        Allocator<Block> block_allocator(memory_);
        CHECK(tail == tail_.load(std::memory_order_acquire))
            << "Can't remove SkipListGc block that is in the middle!";
        next->prev.store(nullptr, std::memory_order_release);
        tail_.store(next, std::memory_order_release);
        // Destroy the block.
        tail->~Block();
-        free(tail);
+        block_allocator.deallocate(tail, 1);
      }
      tail = next;
    }
@ -270,8 +297,9 @@ class SkipListGc final {
    std::optional<TDeleted> item;
    while ((item = deleted_.Pop())) {
      if (item->first < last_dead) {
+        size_t bytes = SkipListNodeSize(*item->second);
        item->second->~TNode();
-        free(item->second);
+        memory_->Deallocate(item->second, bytes);
      } else {
        leftover.Push(*item);
      }
@ -281,7 +309,10 @@ class SkipListGc final {
    }
  }

+  MemoryResource *GetMemoryResource() const { return memory_; }
+
 private:
+  MemoryResource *memory_;
  SpinLock lock_;
  std::atomic<uint64_t> accessor_id_{0};
  std::atomic<Block *> head_{nullptr};
@ -457,6 +488,9 @@ class SkipList final {
  using TNode = SkipListNode<TObj>;

 public:
+  /// Allocator type so that STL containers are aware that we need one.
+  using allocator_type = Allocator<TNode>;
+
  class ConstIterator;

  class Iterator final {
@ -749,35 +783,40 @@ class SkipList final {
    uint64_t id_{0};
  };

-  SkipList() {
+  explicit SkipList(MemoryResource *memory = NewDeleteResource())
+      : gc_(memory) {
    static_assert(kSkipListMaxHeight <= 32,
                  "The SkipList height must be less or equal to 32!");
-    // Here we use `calloc` instead of `malloc` to ensure that the memory is
-    // filled with zeros before we call the constructor. We don't use `malloc` +
-    // `memset` because `calloc` is smarter:
-    // https://stackoverflow.com/questions/2688466/why-mallocmemset-is-slower-than-calloc/2688522#2688522
-    // Also, here we don't call the `SkipListNode` constructor so that the
-    // `TObj` doesn't have to have a default constructor. The initialization of
-    // the `height` and `lock` variables is done manually.
+    void *ptr = memory->Allocate(MaxSkipListNodeSize<TObj>());
+    // `calloc` would be faster, but the API has no such call.
+    memset(ptr, 0, MaxSkipListNodeSize<TObj>());
+    // Here we don't call the `SkipListNode` constructor so that the `TObj`
+    // doesn't have to have a default constructor. The initialization of the
+    // `height` and `lock` variables is done manually.
    // NOTE: The `head_` node doesn't have a valid `TObj` (because we didn't
    // call the constructor), so you mustn't perform any comparisons using its
    // value.
-    head_ = reinterpret_cast<TNode *>(calloc(
-        1, sizeof(TNode) + kSkipListMaxHeight * sizeof(std::atomic<TNode *>)));
+    head_ = static_cast<TNode *>(ptr);
    head_->height = kSkipListMaxHeight;
    new (&head_->lock) utils::SpinLock();
  }

  SkipList(SkipList &&other) noexcept
-      : head_(other.head_), size_(other.size_.load()) {
+      : head_(other.head_),
+        gc_(other.GetMemoryResource()),
+        size_(other.size_.load()) {
    other.head_ = nullptr;
  }
+
  SkipList &operator=(SkipList &&other) noexcept {
+    CHECK(other.GetMemoryResource() == GetMemoryResource())
+        << "Move assignment with different MemoryResource is not supported";
    TNode *head = head_;
    while (head != nullptr) {
      TNode *succ = head->nexts[0].load(std::memory_order_acquire);
+      size_t bytes = SkipListNodeSize(*head);
      head->~TNode();
-      free(head);
+      GetMemoryResource()->Deallocate(head, bytes);
      head = succ;
    }
    head_ = other.head_;
@ -794,15 +833,16 @@ class SkipList final {
      TNode *curr = head_->nexts[0].load(std::memory_order_acquire);
      while (curr != nullptr) {
        TNode *succ = curr->nexts[0].load(std::memory_order_acquire);
+        size_t bytes = SkipListNodeSize(*curr);
        curr->~TNode();
-        free(curr);
+        GetMemoryResource()->Deallocate(curr, bytes);
        curr = succ;
      }
      // We need to free the `head_` node manually because we didn't call its
      // constructor (see the note in the `SkipList` constructor). We mustn't
      // call the `TObj` destructor because we didn't call its constructor.
      head_->lock.~SpinLock();
-      free(head_);
+      GetMemoryResource()->Deallocate(head_, SkipListNodeSize(*head_));
    }
  }

@ -815,6 +855,8 @@ class SkipList final {
  /// atomic operation.
  uint64_t size() const { return size_.load(std::memory_order_acquire); }

+  MemoryResource *GetMemoryResource() const { return gc_.GetMemoryResource(); }
+
 private:
  template <typename TKey>
  int find_node(const TKey &key, TNode *preds[], TNode *succs[]) const {
@ -876,13 +918,15 @@ class SkipList final {

      if (!valid) continue;

-      // Here we use `calloc` instead of `malloc` to ensure that the memory is
-      // filled with zeros before we call the constructor. We don't use `malloc`
-      // + `memset` because `calloc` is smarter:
-      // https://stackoverflow.com/questions/2688466/why-mallocmemset-is-slower-than-calloc/2688522#2688522
-      TNode *new_node = new (
-          calloc(1, sizeof(TNode) + top_layer * sizeof(std::atomic<TNode *>)))
-          TNode(top_layer, std::forward<TObjUniv>(object));
+      size_t node_bytes =
+          sizeof(TNode) + top_layer * sizeof(std::atomic<TNode *>);
+      void *ptr = GetMemoryResource()->Allocate(node_bytes);
+      // `calloc` would be faster, but the API has no such call.
+      memset(ptr, 0, node_bytes);
+      auto *new_node = static_cast<TNode *>(ptr);
+      // Construct through allocator so it propagates if needed.
+      Allocator<TNode> allocator(GetMemoryResource());
+      allocator.construct(new_node, top_layer, std::forward<TObjUniv>(object));

      // The paper is also wrong here. It states that the loop should go up to
      // `top_layer` which is wrong.
@ -1118,6 +1162,7 @@ class SkipList final {

 private:
  TNode *head_{nullptr};
+  // gc_ also stores the only copy of `MemoryResource *`, to save space.
  mutable SkipListGc<TObj> gc_;

  std::mt19937 gen_{std::random_device{}()};
--- a/tests/benchmark/skip_list_vs_stl.cpp
+++ b/tests/benchmark/skip_list_vs_stl.cpp
@ -19,7 +19,7 @@ class SkipListSetInsertFixture : public benchmark::Fixture {
 protected:
  void SetUp(const benchmark::State &state) override {
    if (state.thread_index == 0) {
-      list = {};
+      list = utils::SkipList<uint64_t>();
    }
  }

@ -178,7 +178,7 @@ class SkipListMapInsertFixture : public benchmark::Fixture {
 protected:
  void SetUp(const benchmark::State &state) override {
    if (state.thread_index == 0) {
-      list = {};
+      list = utils::SkipList<MapObject>();;
    }
  }