From 3e739f33c9366e2a6092b1e42b65e51aedf2eda8 Mon Sep 17 00:00:00 2001
From: Vinko Kasljevic <vinko.kasljevic@memgraph.io>
Date: Thu, 14 Feb 2019 08:54:17 +0100
Subject: [PATCH] Add LruCache implementation and tests

Reviewers: msantl, ipaljak, teon.banek

Reviewed By: msantl, teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1858
---
 src/distributed/cache.hpp        |  57 ----------
 src/distributed/data_manager.hpp |   4 +-
 src/utils/cache.hpp              | 183 +++++++++++++++++++++++++++++++
 tests/unit/CMakeLists.txt        |   3 +
 tests/unit/cache.cpp             |  73 ++++++++++++
 5 files changed, 261 insertions(+), 59 deletions(-)
 delete mode 100644 src/distributed/cache.hpp
 create mode 100644 src/utils/cache.hpp
 create mode 100644 tests/unit/cache.cpp
diff --git a/src/distributed/cache.hpp b/src/distributed/cache.hpp
deleted file mode 100644
index 8aa3ecf2f..000000000
--- a/src/distributed/cache.hpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/// @file
-
-#pragma once
-
-#include <mutex>
-#include <unordered_map>
-
-#include "distributed/data_rpc_clients.hpp"
-#include "storage/distributed/gid.hpp"
-
-namespace database {
-class Storage;
-}
-
-namespace distributed {
-
-// TODO Improvements:
-// 1) Use combination of std::unoredered_map<TKey, list<...>::iterator
-// and std::list<std::pair<TKey, TValue>>. Use map for quick access and
-// checking if TKey exists in map, list for keeping track of LRU order.
-//
-// 2) Implement adaptive replacement cache policy instead of LRU.
-// http://theory.stanford.edu/~megiddo/pdf/IEEE_COMPUTER_0404.pdf/
-
-/// Used for caching objects. Uses least recently used page replacement
-/// algorithm for evicting elements when maximum size is reached. This class
-/// is NOT thread safe.
-///
-/// @see ThreadSafeCache
-/// @tparam TKey - any object that has hash() defined
-/// @tparam TValue - any object
-template <typename TKey, typename TValue>
-class Cache {
- public:
-  using Iterator = typename std::unordered_map<TKey, TValue>::iterator;
-
-  Cache() = default;
-
-  Iterator find(const TKey &key) { return cache_.find(key); }
-
-  std::pair<Iterator, bool> emplace(TKey &&key, TValue &&value) {
-    return cache_.emplace(std::forward<TKey>(key), std::forward<TValue>(value));
-  }
-
-  void erase(const TKey &key) { cache_.erase(key); }
-
-  Iterator end() { return cache_.end(); }
-
-  bool contains(const TKey &key) { return find(key) != end(); }
-
-  void clear() { cache_.clear(); }
-
- private:
-  std::unordered_map<TKey, TValue> cache_;
-};
-
-}  // namespace distributed
diff --git a/src/distributed/data_manager.hpp b/src/distributed/data_manager.hpp
index d9282ec70..19829b58b 100644
--- a/src/distributed/data_manager.hpp
+++ b/src/distributed/data_manager.hpp
@@ -4,9 +4,9 @@
 
 #include "data_structures/concurrent/concurrent_map.hpp"
 #include "database/distributed/graph_db.hpp"
-#include "distributed/cache.hpp"
 #include "distributed/data_rpc_clients.hpp"
 #include "transactions/type.hpp"
+#include "utils/cache.hpp"
 
 class Vertex;
 class Edge;
@@ -32,7 +32,7 @@ struct CachedRecordData {
 /// Handles remote data caches for edges and vertices, per transaction.
 class DataManager {
   template <typename TRecord>
-  using CacheG = Cache<gid::Gid, CachedRecordData<TRecord>>;
+  using CacheG = utils::Cache<gid::Gid, CachedRecordData<TRecord>>;
 
   template <typename TRecord>
   using CacheT = ConcurrentMap<tx::TransactionId, CacheG<TRecord>>;
diff --git a/src/utils/cache.hpp b/src/utils/cache.hpp
new file mode 100644
index 000000000..f2bb45563
--- /dev/null
+++ b/src/utils/cache.hpp
@@ -0,0 +1,183 @@
+/// @file
+
+#pragma once
+
+#include <experimental/optional>
+#include <unordered_map>
+
+namespace utils {
+namespace impl {
+template <typename TKey, typename TValue>
+struct Node {
+  TKey key;
+  TValue value;
+  Node *prev{nullptr};
+  Node *next{nullptr};
+};
+
+/// Helper class used for maintaining lru order.
+template <typename TKey, typename TValue>
+class LruList {
+ public:
+  LruList() = default;
+  LruList(const LruList &) = delete;
+  LruList(LruList &&) = delete;
+  LruList &operator=(const LruList &) = delete;
+  LruList &operator=(LruList &&) = delete;
+  ~LruList() { Clear(); }
+
+  Node<TKey, TValue> *AddPageToHead(const TKey &key, const TValue &value) {
+    auto *page = new Node<TKey, TValue>{key, value};
+    if (!front_ && !rear_) {
+      front_ = rear_ = page;
+    } else {
+      page->next = front_;
+      front_->prev = page;
+      front_ = page;
+    }
+    return page;
+  }
+
+  void MovePageToHead(Node<TKey, TValue> *page) {
+    if (page == front_) {
+      return;
+    }
+    if (page == rear_) {
+      rear_ = rear_->prev;
+      rear_->next = nullptr;
+    } else {
+      page->prev->next = page->next;
+      page->next->prev = page->prev;
+    }
+
+    page->next = front_;
+    page->prev = nullptr;
+    front_->prev = page;
+    front_ = page;
+  }
+  void RemoveRearPage() {
+    if (IsEmpty()) {
+      return;
+    }
+    if (front_ == rear_) {
+      delete rear_;
+      front_ = rear_ = nullptr;
+    } else {
+      auto *temp = rear_;
+      rear_ = rear_->prev;
+      rear_->next = nullptr;
+      delete temp;
+    }
+  }
+
+  Node<TKey, TValue> *Rear() { return rear_; }
+
+  void Clear() {
+    while (!IsEmpty()) {
+      RemoveRearPage();
+    }
+  }
+
+  bool IsEmpty() const { return rear_ == nullptr; }
+
+ private:
+  Node<TKey, TValue> *front_{nullptr};
+  Node<TKey, TValue> *rear_{nullptr};
+};
+} // namespace impl
+
+/// Used for caching objects. Uses least recently used page replacement
+/// algorithm for evicting elements when maximum size is reached. This class
+/// is NOT thread safe.
+///
+/// @tparam TKey - any object that has hash() defined
+/// @tparam TValue - any object
+template <typename TKey, typename TValue>
+class LruCache {
+ public:
+  explicit LruCache(size_t capacity) : capacity_(capacity) {}
+
+  LruCache(const LruCache &) = delete;
+  LruCache(LruCache &&) = delete;
+  LruCache &operator=(const LruCache &) = delete;
+  LruCache &operator=(LruCache &&) = delete;
+  ~LruCache() = default;
+
+  std::experimental::optional<TValue> Find(const TKey &key) {
+    auto found = access_map_.find(key);
+    if (found == access_map_.end()) {
+      return std::experimental::nullopt;
+    }
+
+    // move the page to front
+    lru_order_.MovePageToHead(found->second);
+    return std::experimental::make_optional(found->second->value);
+  }
+
+  /// Inserts given key, value pair to cache. If key already exists in a
+  /// cache, then the value is overwritten.
+  void Insert(const TKey &key, const TValue &value) {
+    auto found = access_map_.find(key);
+    if (found != access_map_.end()) {
+      // if key already present, update value and move page to head
+      found->second->value = value;
+      lru_order_.MovePageToHead(found->second);
+      return;
+    }
+
+    if (access_map_.size() == capacity_) {
+      // remove rear page
+      auto to_del_key = lru_order_.Rear()->key;
+      access_map_.erase(to_del_key);
+      lru_order_.RemoveRearPage();
+    }
+
+    // add new page to head to List
+    auto *page = lru_order_.AddPageToHead(key, value);
+    access_map_.emplace(key, page);
+  }
+
+  void Clear() {
+    access_map_.clear();
+    lru_order_.Clear();
+  }
+
+ private:
+  size_t capacity_;
+  impl::LruList<TKey, TValue> lru_order_;
+  std::unordered_map<TKey, impl::Node<TKey, TValue> *> access_map_;
+};
+
+/// Used for caching objects. Uses least recently used page replacement
+/// algorithm for evicting elements when maximum size is reached. This class
+/// is NOT thread safe.
+///
+/// @see ThreadSafeCache
+/// @tparam TKey - any object that has hash() defined
+/// @tparam TValue - any object
+template <typename TKey, typename TValue>
+class Cache {
+ public:
+  using Iterator = typename std::unordered_map<TKey, TValue>::iterator;
+
+  Cache() = default;
+
+  Iterator find(const TKey &key) { return cache_.find(key); }
+
+  std::pair<Iterator, bool> emplace(TKey &&key, TValue &&value) {
+    return cache_.emplace(std::forward<TKey>(key), std::forward<TValue>(value));
+  }
+
+  void erase(const TKey &key) { cache_.erase(key); }
+
+  Iterator end() { return cache_.end(); }
+
+  bool contains(const TKey &key) { return find(key) != end(); }
+
+  void clear() { cache_.clear(); }
+
+ private:
+  std::unordered_map<TKey, TValue> cache_;
+};
+
+}  // namespace utils
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index bf858523c..3120ca7e1 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -25,6 +25,9 @@ target_link_libraries(${test_prefix}ast_serialization mg-distributed kvstore_dum
 add_unit_test(bolt_encoder.cpp)
 target_link_libraries(${test_prefix}bolt_encoder mg-single-node kvstore_dummy_lib)
 
+add_unit_test(cache.cpp)
+target_link_libraries(${test_prefix}cache mg-distributed kvstore_dummy_lib)
+
 add_unit_test(concurrent_id_mapper_distributed.cpp)
 target_link_libraries(${test_prefix}concurrent_id_mapper_distributed mg-distributed kvstore_dummy_lib)
 
diff --git a/tests/unit/cache.cpp b/tests/unit/cache.cpp
new file mode 100644
index 000000000..0f10454bd
--- /dev/null
+++ b/tests/unit/cache.cpp
@@ -0,0 +1,73 @@
+#include <memory>
+
+#include <glog/logging.h>
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+
+#include "utils/cache.hpp"
+
+class CacheTest : public ::testing::Test {
+ public:
+  void SetUp() override {
+    for (long i = 0; i < 5; ++i) {
+      cache_.Insert(i, to_insert_);
+    }
+  }
+
+  long to_insert_ = 0;
+  utils::LruCache<long, long> cache_{5};
+};
+
+TEST_F(CacheTest, InsertTest) {
+  // cache is full already
+  cache_.Insert(5, to_insert_); // 0 is evicted
+  EXPECT_FALSE(cache_.Find(0));
+  EXPECT_TRUE(cache_.Find(1));
+  EXPECT_TRUE(cache_.Find(2));
+  EXPECT_TRUE(cache_.Find(3));
+  EXPECT_TRUE(cache_.Find(4));
+  EXPECT_TRUE(cache_.Find(5));
+
+  cache_.Insert(6, to_insert_); // 1 is evicted
+
+  EXPECT_FALSE(cache_.Find(0));
+  EXPECT_FALSE(cache_.Find(1));
+  EXPECT_TRUE(cache_.Find(2));
+  EXPECT_TRUE(cache_.Find(3));
+  EXPECT_TRUE(cache_.Find(4));
+  EXPECT_TRUE(cache_.Find(5));
+  EXPECT_TRUE(cache_.Find(6));
+}
+
+TEST_F(CacheTest, GetTest) {
+  // cache is full already
+  // 4 -> 3 -> 2 -> 1 -> 0
+
+  EXPECT_TRUE(cache_.Find(2));
+  EXPECT_TRUE(cache_.Find(4));
+  EXPECT_TRUE(cache_.Find(0));
+  EXPECT_TRUE(cache_.Find(1));
+  // order has changed
+  // 1 -> 0 -> 4 -> 2 -> 3
+
+  cache_.Insert(10, to_insert_);
+  EXPECT_FALSE(cache_.Find(3));
+
+  cache_.Insert(11, to_insert_);
+  EXPECT_FALSE(cache_.Find(2));
+
+  cache_.Insert(12, to_insert_);
+  EXPECT_FALSE(cache_.Find(4));
+
+  cache_.Insert(13, to_insert_);
+  EXPECT_FALSE(cache_.Find(0));
+
+  cache_.Insert(14, to_insert_);
+  EXPECT_FALSE(cache_.Find(1));
+}
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  google::InitGoogleLogging(argv[0]);
+  return RUN_ALL_TESTS();
+}