From c3c8fb66209e6ee8c8e71cbf2f0cc28e10b5cda9 Mon Sep 17 00:00:00 2001 From: Kruno Tomola Fabro <krunotf@memgraph.io> Date: Tue, 9 Aug 2016 16:44:39 +0100 Subject: [PATCH] Commit before branching to chane EdgeModel and EdgeRecord. --- include/mvcc/edge_record.hpp | 11 + include/storage/model/edge_map.hpp | 31 ++ src/data_structures/map/rh_hashmap.hpp | 137 +++++++- src/data_structures/map/rh_hashmultimap.hpp | 370 ++++++++++++++++++++ src/storage/model/vertex_model.hpp | 6 +- tests/unit/rh_hashmap.cpp | 60 +++- tests/unit/rh_hashmultimap.cpp | 128 +++++++ 7 files changed, 728 insertions(+), 15 deletions(-) create mode 100644 include/mvcc/edge_record.hpp create mode 100644 include/storage/model/edge_map.hpp create mode 100644 src/data_structures/map/rh_hashmultimap.hpp create mode 100644 tests/unit/rh_hashmultimap.cpp diff --git a/include/mvcc/edge_record.hpp b/include/mvcc/edge_record.hpp new file mode 100644 index 000000000..0ee9eee80 --- /dev/null +++ b/include/mvcc/edge_record.hpp @@ -0,0 +1,11 @@ +#include "mvcc/version_list.hpp" +#include "storage/edge.hpp" + +// class EdgeRecord: public mvcc::VersionList<Edge>{ +// public: +// using mvcc::VersionList<Edge>; +// +// VertexRecord* get_key(){ +// //TODO +// } +// }; diff --git a/include/storage/model/edge_map.hpp b/include/storage/model/edge_map.hpp new file mode 100644 index 000000000..69414f465 --- /dev/null +++ b/include/storage/model/edge_map.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include "data_structures/map/rh_hashmultimap.hpp" +#include "mvcc/version_list.hpp" + +class EdgeMap +{ +public: + auto begin() { return edges.begin(); } + auto begin() const { return edges.begin(); } + auto cbegin() const { return edges.begin(); } + + auto end() { return edges.end(); } + auto end() const { return edges.end(); } + auto cend() const { return edges.end(); } + + size_t degree() const { return edges.size(); } + + void add(EdgeRecord *edge) { edges.add(edge); } + + void remove(EdgeRecord *edge) + { + // TODO + throw std::bad_function_call::bad_function_call(); + } + + void clear() { edges.clear(); } + +private: + RhHashMultiMap<VertexRecord *, EdgeRecord> edges; +}; diff --git a/src/data_structures/map/rh_hashmap.hpp b/src/data_structures/map/rh_hashmap.hpp index d2a5d5561..7e28105ff 100644 --- a/src/data_structures/map/rh_hashmap.hpp +++ b/src/data_structures/map/rh_hashmap.hpp @@ -1,3 +1,4 @@ +#include "utils/crtp.hpp" #include "utils/option_ptr.hpp" // HashMap with RobinHood collision resolution policy. @@ -33,6 +34,114 @@ private: size_t data; }; + template <class It> + class IteratorBase : public Crtp<It> + { + protected: + IteratorBase() : map(nullptr) { index = ~((size_t)0); } + IteratorBase(const RhHashMap *map) : map(map) + { + index = 0; + while (index < map->capacity && !map->array[index].valid()) { + index++; + } + if (index == map->capacity) { + map = nullptr; + index = ~((size_t)0); + } + } + + const RhHashMap *map; + size_t index; + + public: + IteratorBase(const IteratorBase &) = default; + IteratorBase(IteratorBase &&) = default; + + D *operator*() + { + assert(index < map->capacity && map->array[index].valid()); + return map->array[index].ptr(); + } + + D *operator->() + { + assert(index < map->capacity && map->array[index].valid()); + return map->array[index].ptr(); + } + + It &operator++() + { + assert(index < map->capacity && map->array[index].valid()); + do { + index++; + if (index >= map->capacity) { + map = nullptr; + index = ~((size_t)0); + break; + } + } while (!map->array[index].valid()); + + return this->derived(); + } + + It &operator++(int) { return operator++(); } + + friend bool operator==(const It &a, const It &b) + { + return a.index == b.index && a.map == b.map; + } + + friend bool operator!=(const It &a, const It &b) { return !(a == b); } + }; + +public: + class ConstIterator : public IteratorBase<ConstIterator> + { + friend class RhHashMap; + ConstIterator(const RhHashMap *map) : IteratorBase<ConstIterator>(map) + { + } + + public: + ConstIterator() = default; + ConstIterator(const ConstIterator &) = default; + + const D *operator->() + { + return IteratorBase<ConstIterator>::operator->(); + } + + const D *operator*() + { + return IteratorBase<ConstIterator>::operator*(); + } + }; + + class Iterator : public IteratorBase<Iterator> + { + friend class RhHashMap; + Iterator(const RhHashMap *map) : IteratorBase<Iterator>(map) {} + + public: + Iterator() = default; + Iterator(const Iterator &) = default; + }; + + RhHashMap() {} + + Iterator begin() { return Iterator(this); } + + ConstIterator begin() const { return ConstIterator(this); } + + ConstIterator cbegin() const { return ConstIterator(this); } + + Iterator end() { return Iterator(); } + + ConstIterator end() const { return ConstIterator(); } + + ConstIterator cend() const { return ConstIterator(); } + void init_array(size_t size) { size_t bytes = sizeof(Combined) * size; @@ -61,13 +170,10 @@ private: } } - delete[] a; + free(a); } -public: - RhHashMap() {} - - OptionPtr<D> get(const K &key) + OptionPtr<D> find(const K &key) { size_t mask = this->mask(); size_t now = index(key, mask); @@ -130,16 +236,35 @@ public: return insert(data); } + void clear() + { + free(array); + array = nullptr; + capacity = 0; + count = 0; + } + size_t size() { return count; } private: size_t index(const K &key, size_t mask) { - return std::hash<K>()(key) & mask; + return hash(std::hash<K>()(key)) & mask; } + size_t hash(size_t x) const + { + x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); + x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); + x = x ^ (x >> 31); + return x; + } + size_t mask() { return capacity - 1; } Combined *array = nullptr; size_t capacity = 0; size_t count = 0; + + friend class IteratorBase<Iterator>; + friend class IteratorBase<ConstIterator>; }; diff --git a/src/data_structures/map/rh_hashmultimap.hpp b/src/data_structures/map/rh_hashmultimap.hpp new file mode 100644 index 000000000..781938bda --- /dev/null +++ b/src/data_structures/map/rh_hashmultimap.hpp @@ -0,0 +1,370 @@ +#include "utils/crtp.hpp" +#include "utils/option_ptr.hpp" + +// HashMultiMap with RobinHood collision resolution policy. +// Single threaded. +// Entrys are saved as pointers alligned to 8B. +// Entrys must know thers key. +// D must have method K& get_key() +// K must be comparable with ==. +// HashMap behaves as if it isn't owner of entrys. +template <class K, class D, size_t init_size_pow2 = 2> +class RhHashMultiMap +{ +private: + class Combined + { + + public: + Combined() : data(0) {} + + Combined(D *data, size_t off) + { + assert((data & 0x7) == 0 && off < 8); + this->data = ((size_t)data) | off; + } + + bool valid() { return data != 0; } + + size_t off() { return data & 0x7; } + + D *ptr() { return (D *)(data & (~(0x7))); } + + private: + size_t data; + }; + + template <class It> + class IteratorBase : public Crtp<It> + { + protected: + IteratorBase() : map(nullptr) { advanced = index = ~((size_t)0); } + IteratorBase(const RhHashMultiMap *map) : map(map) + { + index = 0; + while (index < map->capacity && !map->array[index].valid()) { + index++; + } + if (index == map->capacity) { + map = nullptr; + advanced = index = ~((size_t)0); + } else { + advanced = index; + } + } + IteratorBase(const RhHashMultiMap *map, size_t start) + : map(map), index(start), advanced(0) + { + } + + const RhHashMultiMap *map; + size_t advanced; + size_t index; + + public: + IteratorBase(const IteratorBase &) = default; + IteratorBase(IteratorBase &&) = default; + + D *operator*() + { + assert(index < map->capacity && map->array[index].valid()); + return map->array[index].ptr(); + } + + D *operator->() + { + assert(index < map->capacity && map->array[index].valid()); + return map->array[index].ptr(); + } + + It &operator++() + { + assert(index < map->capacity && map->array[index].valid()); + auto mask = map->mask(); + do { + advanced++; + if (advanced >= map->capacity) { + map = nullptr; + advanced = index = ~((size_t)0); + break; + } + index = advanced & mask; + } while (!map->array[index].valid()); + + return this->derived(); + } + // + // // True if value is present + // bool is_present() { return map != nullptr; } + + It &operator++(int) { return operator++(); } + + friend bool operator==(const It &a, const It &b) + { + return a.index == b.index && a.map == b.map; + } + + friend bool operator!=(const It &a, const It &b) { return !(a == b); } + }; + +public: + class ConstIterator : public IteratorBase<ConstIterator> + { + friend class RhHashMultiMap; + ConstIterator(const RhHashMultiMap *map) + : IteratorBase<ConstIterator>(map) + { + } + ConstIterator(const RhHashMultiMap *map, size_t index) + : IteratorBase<ConstIterator>(map, index) + { + } + + public: + ConstIterator() = default; + ConstIterator(const ConstIterator &) = default; + + const D *operator->() + { + return IteratorBase<ConstIterator>::operator->(); + } + + const D *operator*() + { + return IteratorBase<ConstIterator>::operator*(); + } + }; + + class Iterator : public IteratorBase<Iterator> + { + friend class RhHashMultiMap; + Iterator(const RhHashMultiMap *map) : IteratorBase<Iterator>(map) {} + Iterator(const RhHashMultiMap *map, size_t index) + : IteratorBase<Iterator>(map, index) + { + } + + public: + Iterator() = default; + Iterator(const Iterator &) = default; + }; + + RhHashMultiMap() {} + + RhHashMultiMap(const RhHashMultiMap &other) + { + capacity = other.capacity; + count = other.count; + if (capacity > 0) { + size_t bytes = sizeof(Combined) * capacity; + array = (Combined *)malloc(bytes); + memcpy(array, other.array, bytes); + + } else { + array = nullptr; + } + } + + RhHashMultiMap(RhHashMultiMap &&other) + { + capacity = other.capacity; + count = other.count; + array = other.array; + + other.array = nullptr; + other.capacity = 0; + other.count = 0; + } + + Iterator begin() { return Iterator(this); } + + ConstIterator begin() const { return ConstIterator(this); } + + ConstIterator cbegin() const { return ConstIterator(this); } + + Iterator end() { return Iterator(); } + + ConstIterator end() const { return ConstIterator(); } + + ConstIterator cend() const { return ConstIterator(); } + + void init_array(size_t size) + { + size_t bytes = sizeof(Combined) * size; + array = (Combined *)malloc(bytes); + memset(array, 0, bytes); + capacity = size; + } + + void increase_size() + { + if (capacity == 0) { + assert(array == nullptr && count == 0); + size_t new_size = 1 << init_size_pow2; + init_array(new_size); + return; + } + size_t new_size = capacity * 2; + size_t old_size = capacity; + auto a = array; + init_array(new_size); + count = 0; + + for (int i = 0; i < old_size; i++) { + if (a[i].valid()) { + add(a[i].ptr()); + } + } + + free(a); + } + + Iterator find(const K &key) + { + size_t mask = this->mask(); + size_t now = index(key, mask); + size_t off = 0; + + bool bef_init = false; + size_t before_off; + K before_key = key; + + size_t border = 8 <= capacity ? 8 : capacity; + while (off < border) { + Combined other = array[now]; + if (other.valid()) { + auto other_off = other.off(); + auto other_key = other.ptr()->get_key(); + if (other_off == off && key == other_key) { + return Iterator(this, now); + + } else if (other_off < off) { // Other is rich + break; + + } else if (bef_init) { // Else other has equal or greater + // offset, so he is poor. + if (before_off == other_off && before_key == other_key) { + if (count == capacity) { + break; + } + // Proceed + } else { + before_off = other_off; + before_key = other_key; + off++; + } + } else { + bef_init = true; + before_off = other_off; + before_key = other_key; + off++; + } + + } else { + break; + } + + now = (now + 1) & mask; + } + return end(); + } + + // Inserts element with the given key. + void add(K key, D *data) + { + assert(key == data->get_key()); + + size_t mask = this->mask(); + size_t now = index(key, mask); + size_t off = 0; + + bool bef_init = false; + size_t before_off; + K before_key = key; + + size_t border = 8 <= capacity ? 8 : capacity; + while (off < border) { + Combined other = array[now]; + if (other.valid()) { + auto other_off = other.off(); + auto other_key = other.ptr()->get_key(); + if (other_off == off && key == other_key) { + // Proceed + + } else if (other_off < off) { // Other is rich + array[now] = Combined(data, off); + + // Hacked reusing of function + data = other.ptr(); + key = other_key; + off = other_off; + + off++; + } else if (bef_init) { // Else other has equal or greater + // offset, so he is poor. + if (before_off == other_off && before_key == other_key) { + if (count == capacity) { + break; + } + // Proceed + } else { + before_off = other_off; + before_key = other_key; + off++; + } + } else { + bef_init = true; + before_off = other_off; + before_key = other_key; + off++; + } + + } else { + array[now] = Combined(data, off); + count++; + return; + } + + now = (now + 1) & mask; + } + + increase_size(); + add(data); + } + + // Inserts element. + void add(D *data) { add(data->get_key(), data); } + + void clear() + { + free(array); + array = nullptr; + capacity = 0; + count = 0; + } + + size_t size() const { return count; } + +private: + size_t index(const K &key, size_t mask) const + { + return hash(std::hash<K>()(key)) & mask; + } + + size_t hash(size_t x) const + { + x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); + x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); + x = x ^ (x >> 31); + return x; + } + + size_t mask() const { return capacity - 1; } + + Combined *array = nullptr; + size_t capacity = 0; + size_t count = 0; + + friend class IteratorBase<Iterator>; + friend class IteratorBase<ConstIterator>; +}; diff --git a/src/storage/model/vertex_model.hpp b/src/storage/model/vertex_model.hpp index 60be36072..a6642df89 100644 --- a/src/storage/model/vertex_model.hpp +++ b/src/storage/model/vertex_model.hpp @@ -1,12 +1,14 @@ #pragma once +#include "edge_list.hpp" #include "property_model.hpp" #include "storage/label/label_collection.hpp" -#include "edge_list.hpp" +// #include "storage/model/edge_map.hpp" class VertexModel : public PropertyModel { public: - EdgeList in, out; + EdgeList out; + EdgeList in; LabelCollection labels; }; diff --git a/tests/unit/rh_hashmap.cpp b/tests/unit/rh_hashmap.cpp index b8f7a5874..7a40f1857 100644 --- a/tests/unit/rh_hashmap.cpp +++ b/tests/unit/rh_hashmap.cpp @@ -29,11 +29,11 @@ TEST_CASE("Robin hood hashmap insert/get check") { RhHashMap<int, Data> map; - REQUIRE(!map.get(0).is_present()); + REQUIRE(!map.find(0).is_present()); auto ptr0 = new Data(0); REQUIRE(map.insert(ptr0)); - REQUIRE(map.get(0).is_present()); - REQUIRE(map.get(0).get() == ptr0); + REQUIRE(map.find(0).is_present()); + REQUIRE(map.find(0).get() == ptr0); } TEST_CASE("Robin hood hashmap double insert") @@ -49,13 +49,59 @@ TEST_CASE("Robin hood hashmap") RhHashMap<int, Data> map; for (int i = 0; i < 128; i++) { - REQUIRE(!map.get(i).is_present()); + REQUIRE(!map.find(i).is_present()); REQUIRE(map.insert(new Data(i))); - REQUIRE(map.get(i).is_present()); + REQUIRE(map.find(i).is_present()); } for (int i = 0; i < 128; i++) { - REQUIRE(map.get(i).is_present()); - REQUIRE(map.get(i).get()->get_key() == i); + REQUIRE(map.find(i).is_present()); + REQUIRE(map.find(i).get()->get_key() == i); + } +} + +TEST_CASE("Robin hood hashmap iterate") +{ + RhHashMap<int, Data> map; + + for (int i = 0; i < 128; i++) { + REQUIRE(!map.find(i).is_present()); + REQUIRE(map.insert(new Data(i))); + REQUIRE(map.find(i).is_present()); + } + + bool seen[128] = {false}; + for (auto e : map) { + auto key = e->get_key(); + REQUIRE(!seen[key]); + seen[key] = true; + } + for (int i = 0; i < 128; i++) { + REQUIRE(seen[i]); + } +} + +TEST_CASE("Robin hood hashmap checked") +{ + RhHashMap<int, Data> map; + std::map<int, Data *> s_map; + + for (int i = 0; i < 128; i++) { + int key = std::rand(); + auto data = new Data(key); + if (map.insert(data)) { + REQUIRE(s_map.find(key) == s_map.end()); + s_map[key] = data; + } else { + REQUIRE(s_map.find(key) != s_map.end()); + } + } + + for (auto e : map) { + REQUIRE(s_map.find(e->get_key()) != s_map.end()); + } + + for (auto e : s_map) { + REQUIRE(map.find(e.first).get() == e.second); } } diff --git a/tests/unit/rh_hashmultimap.cpp b/tests/unit/rh_hashmultimap.cpp new file mode 100644 index 000000000..6bb916d49 --- /dev/null +++ b/tests/unit/rh_hashmultimap.cpp @@ -0,0 +1,128 @@ +#define CATCH_CONFIG_MAIN +#include "catch.hpp" + +#include "data_structures/map/rh_hashmultimap.hpp" + +class Data +{ + +private: + size_t data = 0; + int key; + +public: + Data(int key) : key(key) {} + + int &get_key() { return key; } +}; + +TEST_CASE("Robin hood hashmultimap basic functionality") +{ + RhHashMultiMap<int, Data> map; + + REQUIRE(map.size() == 0); + map.add(new Data(0)); + REQUIRE(map.size() == 1); +} + +TEST_CASE("Robin hood hashmultimap insert/get check") +{ + RhHashMultiMap<int, Data> map; + + REQUIRE(map.find(0) == map.end()); + auto ptr0 = new Data(0); + map.add(ptr0); + REQUIRE(map.find(0) != map.end()); + REQUIRE(*map.find(0) == ptr0); +} + +TEST_CASE("Robin hood hashmultimap double insert") +{ + RhHashMultiMap<int, Data> map; + + auto ptr0 = new Data(0); + auto ptr1 = new Data(0); + map.add(ptr0); + map.add(ptr1); + + for (auto e : map) { + if (ptr0 == e) { + ptr0 = nullptr; + continue; + } + if (ptr1 == e) { + ptr1 = nullptr; + continue; + } + REQUIRE(false); + } +} + +TEST_CASE("Robin hood hashmultimap") +{ + RhHashMultiMap<int, Data> map; + + for (int i = 0; i < 128; i++) { + REQUIRE(map.find(i) == map.end()); + map.add(new Data(i)); + REQUIRE(map.find(i) != map.end()); + } + + for (int i = 0; i < 128; i++) { + REQUIRE(map.find(i) != map.end()); + REQUIRE(map.find(i)->get_key() == i); + } +} + +TEST_CASE("Robin hood hashmultimap iterate") +{ + RhHashMultiMap<int, Data> map; + + for (int i = 0; i < 128; i++) { + REQUIRE(map.find(i) == map.end()); + map.add(new Data(i)); + REQUIRE(map.find(i) != map.end()); + } + + bool seen[128] = {false}; + for (auto e : map) { + auto key = e->get_key(); + REQUIRE(!seen[key]); + seen[key] = true; + } + for (int i = 0; i < 128; i++) { + REQUIRE(seen[i]); + } +} + +TEST_CASE("Robin hood hashmultimap checked") +{ + RhHashMultiMap<int, Data> map; + std::multimap<int, Data *> s_map; + + for (int i = 0; i < 1638; i++) { + int key = (std::rand() % 100) << 3; + + auto data = new Data(key); + map.add(data); + s_map.insert(std::pair<int, Data *>(key, data)); + } + + for (auto e : map) { + auto it = s_map.find(e->get_key()); + + while (it != s_map.end() && it->second != e) { + it++; + } + REQUIRE(it->second == e); + } + + for (auto e : s_map) { + auto it = map.find(e.first); + + while (it != map.end() && *it != e.second) { + it++; + } + REQUIRE(e.second == *it); + } +}