From 301649466983bfac3a9ff5d6eae20f8013d3e9ce Mon Sep 17 00:00:00 2001 From: Kruno Tomola Fabro Date: Tue, 9 Aug 2016 11:53:10 +0100 Subject: [PATCH] Added RobinHood HashMap and unit tested. --- poc/astar.cpp | 321 +++++++++++++++++++++++++ src/data_structures/map/rh_hashmap.hpp | 145 +++++++++++ src/mvcc/version_list.hpp | 61 +++-- src/utils/option_ptr.hpp | 20 ++ tests/unit/rh_hashmap.cpp | 61 +++++ 5 files changed, 574 insertions(+), 34 deletions(-) create mode 100644 src/data_structures/map/rh_hashmap.hpp create mode 100644 src/utils/option_ptr.hpp create mode 100644 tests/unit/rh_hashmap.cpp diff --git a/poc/astar.cpp b/poc/astar.cpp index 84638900f..c2fc542b4 100644 --- a/poc/astar.cpp +++ b/poc/astar.cpp @@ -1,6 +1,327 @@ +#include #include +#include +#include +#include +#include +#include + +#include "database/db.hpp" + +using namespace std; + +void load_graph_dummy(Db &db); +void load_csv(Db &db, char *file_path, char *edge_file_path); + +class Node +{ +public: + Node *parent = {nullptr}; + double cost; + int depth = {0}; + Vertex *vertex; + + Node(Vertex *va, double cost) : cost(cost), vertex(va) {} + Node(Vertex *va, double cost, Node *parent) + : cost(cost), vertex(va), parent(parent), depth(parent->depth + 1) + { + } +}; + +// class Iterator : public Crtp +// { +// public: +// Vertex *operator*() +// { +// assert(head != nullptr); +// return head->vertex; +// } +// +// Vertex *operator->() +// { +// assert(head != nullptr); +// return head->vertex; +// } +// +// Iterator &operator++() +// { +// assert(head != nullptr); +// head = head->parent; +// return this->derived(); +// } +// +// Iterator &operator++(int) { return operator++(); } +// +// friend bool operator==(const Iterator &a, const Iterator &b) +// { +// return a.head == b.head; +// } +// +// friend bool operator!=(const Iterator &a, const Iterator &b) +// { +// return !(a == b); +// } +// +// Iterator end() { return Iterator(); } +// +// private: +// Node *head; +// }; + +void found_result(Node *bef) +{ + std::cout << "{score: " << bef->cost << endl; + while (bef != nullptr) { + std::cout << " " << *(bef->vertex) << endl; + bef = bef->parent; + } +} + +double calc_heuristic_cost_dummy(Edge *edge, Vertex *vertex) +{ + return 1 - vertex->data.props.at("score").as().value; +} + +typedef bool (*EdgeFilter)(tx::Transaction &t, Edge *, Node *before); +typedef bool (*VertexFilter)(tx::Transaction &t, Vertex *, Node *before); + +bool edge_filter_dummy(tx::Transaction &t, Edge *e, Node *before) +{ + return true; +} + +bool vertex_filter_dummy(tx::Transaction &t, Vertex *v, Node *before) +{ + return true; +} + +bool vertex_filter_contained_dummy(tx::Transaction &t, Vertex *v, Node *before) +{ + bool found; + do { + found = false; + before = before->parent; + if (before == nullptr) { + return true; + } + for (auto e : before->vertex->data.out) { + Edge *edge = e->find(t); + Vertex *e_v = edge->data.to->find(t); + if (e_v == v) { + found = true; + break; + } + } + } while (found); + return false; +} + +// Vertex filter ima max_depth funkcija te edge filter ima max_depth funkcija. +// Jedan za svaku dubinu. +// Filtri vracaju true ako element zadovoljava uvjete. +void a_star(Db &db, int64_t sys_id_start, uint max_depth, EdgeFilter e_filter[], + VertexFilter v_filter[], + double (*calc_heuristic_cost)(Edge *edge, Vertex *vertex), + int limit) +{ + auto &t = db.tx_engine.begin(); + + auto cmp = [](Node *left, Node *right) { return left->cost > right->cost; }; + std::priority_queue, decltype(cmp)> queue(cmp); + + Node *start = + new Node(db.graph.vertices.find(t, sys_id_start).vlist->find(t), 0); + queue.push(start); + int count = 0; + do { + auto now = queue.top(); + queue.pop(); + if (max_depth <= now->depth) { + found_result(now); + count++; + if (count >= limit) { + return; + } + continue; + } + + for (auto e : now->vertex->data.out) { + Edge *edge = e->find(t); + if (e_filter[now->depth](t, edge, now)) { + Vertex *v = edge->data.to->find(t); + if (v_filter[now->depth](t, v, now)) { + Node *n = new Node( + v, now->cost + calc_heuristic_cost(edge, v), now); + queue.push(n); + } + } + } + } while (!queue.empty()); + + // GUBI SE MEMORIJA JER SE NODOVI NEBRISU + + t.commit(); +} int main() { + Db db; + load_csv(db, "neo4j_nodes_export_2000.csv", "neo4j_edges_export_2000.csv"); + // + // load_graph_dummy(db); + // + EdgeFilter e_filters[] = {&edge_filter_dummy, &edge_filter_dummy, + &edge_filter_dummy, &edge_filter_dummy}; + VertexFilter f_filters[] = {&vertex_filter_dummy, &vertex_filter_dummy, + &vertex_filter_dummy, &vertex_filter_dummy}; + a_star(db, 0, 3, e_filters, f_filters, &calc_heuristic_cost_dummy, 10); + return 0; } + +void split(const string &s, char delim, vector &elems) +{ + stringstream ss(s); + string item; + while (getline(ss, item, delim)) { + elems.push_back(item); + } +} + +vector split(const string &s, char delim) +{ + vector elems; + split(s, delim, elems); + return elems; +} + +void load_csv(Db &db, char *file_path, char *edge_file_path) +{ + std::fstream file(file_path); + std::fstream e_file(edge_file_path); + + std::string line; + + auto &t = db.tx_engine.begin(); + int max_score = 1000000; + + // VERTEX import + int start_vertex_id = -1; + auto v = [&](auto id, auto labels, auto gar_id, auto cat_id) { + if (start_vertex_id < 0) { + start_vertex_id = id; + } + + auto vertex_accessor = db.graph.vertices.insert(t); + vertex_accessor.property("id", std::make_shared(id)); + vertex_accessor.property("garment_id", std::make_shared(gar_id)); + vertex_accessor.property("garment_category_id", + std::make_shared(cat_id)); + std::srand(id ^ 0x7482616); + vertex_accessor.property( + "score", std::make_shared((std::rand() % max_score) / + (max_score + 0.0))); + for (auto l_name : labels) { + auto &label = db.graph.label_store.find_or_create(l_name); + vertex_accessor.add_label(label); + } + return vertex_accessor.id(); + }; + + std::getline(file, line); + + vector va; + int v_count = 0; + while (std::getline(file, line)) { + v_count++; + line.erase(std::remove(line.begin(), line.end(), '['), line.end()); + line.erase(std::remove(line.begin(), line.end(), ']'), line.end()); + line.erase(std::remove(line.begin(), line.end(), '\"'), line.end()); + line.erase(std::remove(line.begin(), line.end(), ' '), line.end()); + auto splited = split(line, ','); + vector labels(splited.begin() + 1, + splited.begin() + splited.size() - 2); + auto id = v(stoi(splited[0]), labels, stoi(splited[splited.size() - 2]), + stoi(splited[splited.size() - 1])); + + assert(va.size() == (uint64_t)id); + va.push_back(db.graph.vertices.find(t, id)); + } + + // EDGE IMPORT + auto e = [&](auto from, auto type, auto to) { + auto v1 = va[from - start_vertex_id]; + + auto v2 = va[to - start_vertex_id]; + + auto edge_accessor = db.graph.edges.insert(t); + + v1.vlist->update(t)->data.out.add(edge_accessor.vlist); + v2.vlist->update(t)->data.in.add(edge_accessor.vlist); + + edge_accessor.from(v1.vlist); + edge_accessor.to(v2.vlist); + + auto &edge_type = db.graph.edge_type_store.find_or_create(type); + edge_accessor.edge_type(edge_type); + }; + + std::getline(e_file, line); + long count = 0; + while (std::getline(e_file, line)) { + auto splited = split(line, ','); + count++; + e(stoi(splited[2]), splited[1], stoi(splited[3])); + } + + cout << "Loaded:\n Vertices: " << v_count << "\n Edges: " << count + << endl; + + t.commit(); +} + +void load_graph_dummy(Db &db) +{ + auto &t = db.tx_engine.begin(); + auto v = [&](auto id, auto score) { + auto vertex_accessor = db.graph.vertices.insert(t); + vertex_accessor.property("id", std::make_shared(id)); + vertex_accessor.property("score", std::make_shared(score)); + return vertex_accessor.id(); + }; + + Id va[] = { + v(0, 0.5), v(1, 1), v(2, 0.3), v(3, 0.15), v(4, 0.8), v(5, 0.8), + }; + + auto e = [&](auto from, auto type, auto to) { + auto v1 = db.graph.vertices.find(t, va[from]); + + auto v2 = db.graph.vertices.find(t, va[to]); + + auto edge_accessor = db.graph.edges.insert(t); + + v1.vlist->update(t)->data.out.add(edge_accessor.vlist); + v2.vlist->update(t)->data.in.add(edge_accessor.vlist); + + edge_accessor.from(v1.vlist); + edge_accessor.to(v2.vlist); + + auto &edge_type = db.graph.edge_type_store.find_or_create(type); + edge_accessor.edge_type(edge_type); + }; + + e(0, "ok", 3); + e(0, "ok", 2); + e(0, "ok", 4); + e(1, "ok", 3); + e(2, "ok", 1); + e(2, "ok", 4); + e(3, "ok", 4); + e(3, "ok", 5); + e(4, "ok", 0); + e(4, "ok", 1); + e(5, "ok", 2); + + t.commit(); +} diff --git a/src/data_structures/map/rh_hashmap.hpp b/src/data_structures/map/rh_hashmap.hpp new file mode 100644 index 000000000..d2a5d5561 --- /dev/null +++ b/src/data_structures/map/rh_hashmap.hpp @@ -0,0 +1,145 @@ +#include "utils/option_ptr.hpp" + +// HashMap with RobinHood collision resolution policy. +// Single threaded. +// Entrys are saved as pointers alligned to 8B. +// Entrys must know thers key. +// D must have method K& get_key() +// K must be comparable with ==. +// HashMap behaves as if it isn't owner of entrys. +template +class RhHashMap +{ +private: + class Combined + { + + public: + Combined() : data(0) {} + + Combined(D *data, size_t off) + { + assert((data & 0x7) == 0 && off < 8); + this->data = ((size_t)data) | off; + } + + bool valid() { return data != 0; } + + size_t off() { return data & 0x7; } + + D *ptr() { return (D *)(data & (~(0x7))); } + + private: + size_t data; + }; + + void init_array(size_t size) + { + size_t bytes = sizeof(Combined) * size; + array = (Combined *)malloc(bytes); + memset(array, 0, bytes); + capacity = size; + } + + void increase_size() + { + if (capacity == 0) { + assert(array == nullptr && count == 0); + size_t new_size = 1 << init_size_pow2; + init_array(new_size); + return; + } + size_t new_size = capacity * 2; + size_t old_size = capacity; + auto a = array; + init_array(new_size); + count = 0; + + for (int i = 0; i < old_size; i++) { + if (a[i].valid()) { + insert(a[i].ptr()); + } + } + + delete[] a; + } + +public: + RhHashMap() {} + + OptionPtr get(const K &key) + { + size_t mask = this->mask(); + size_t now = index(key, mask); + size_t off = 0; + size_t border = 8 <= capacity ? 8 : capacity; + while (off < border) { + Combined other = array[now]; + if (other.valid()) { + auto other_off = other.off(); + if (other_off == off && key == other.ptr()->get_key()) { + return OptionPtr(other.ptr()); + + } else if (other_off < off) { // Other is rich + break; + } // Else other has equal or greater offset, so he is poor. + } else { + break; + } + + off++; + now = (now + 1) & mask; + } + return OptionPtr(); + } + + // Inserts element. Returns true if element wasn't in the map. + bool insert(D *data) + { + size_t mask = this->mask(); + auto key = data->get_key(); + size_t now = index(key, mask); + size_t off = 0; + size_t border = 8 <= capacity ? 8 : capacity; + while (off < border) { + Combined other = array[now]; + if (other.valid()) { + auto other_off = other.off(); + if (other_off == off && key == other.ptr()->get_key()) { + return false; + + } else if (other_off < off) { // Other is rich + array[now] = Combined(data, off); + + // Hacked reusing of function + data = other.ptr(); + key = data->get_key(); + off = other_off; + } // Else other has equal or greater offset, so he is poor. + } else { + array[now] = Combined(data, off); + count++; + return true; + } + + off++; + now = (now + 1) & mask; + } + + increase_size(); + return insert(data); + } + + size_t size() { return count; } + +private: + size_t index(const K &key, size_t mask) + { + return std::hash()(key) & mask; + } + size_t mask() { return capacity - 1; } + + Combined *array = nullptr; + size_t capacity = 0; + size_t count = 0; +}; diff --git a/src/mvcc/version_list.hpp b/src/mvcc/version_list.hpp index dd4185f28..d0bbc12fc 100644 --- a/src/mvcc/version_list.hpp +++ b/src/mvcc/version_list.hpp @@ -20,32 +20,28 @@ public: using item_t = T; VersionList(Id id) : id(id) {} - VersionList(const VersionList&) = delete; + VersionList(const VersionList &) = delete; /* @brief Move constructs the version list * Note: use only at the beginning of the "other's" lifecycle since this * constructor doesn't move the RecordLock, but only the head pointer */ - VersionList(VersionList&& other) : id(other.id) + VersionList(VersionList &&other) : id(other.id) { this->head = other.head.load(); other.head = nullptr; } - ~VersionList() - { - delete head.load(); - } + ~VersionList() { delete head.load(); } - friend std::ostream& operator<<(std::ostream& stream, - const VersionList& vlist) + friend std::ostream &operator<<(std::ostream &stream, + const VersionList &vlist) { stream << "VersionList" << std::endl; auto record = vlist.head.load(); - while(record != nullptr) - { + while (record != nullptr) { stream << "-- " << *record << std::endl; record = record->next(); } @@ -53,17 +49,11 @@ public: return stream; } - auto gc_lock_acquire() - { - return std::unique_lock(lock); - } + auto gc_lock_acquire() { return std::unique_lock(lock); } - void vacuum() - { + void vacuum() {} - } - - T* find(const tx::Transaction& t) const + T *find(const tx::Transaction &t) const { auto r = head.load(std::memory_order_seq_cst); @@ -77,13 +67,13 @@ public: // | | Jump backwards until you find a first visible // [VerList] ----+ version, or you reach the end of the list // - while(r != nullptr && !r->visible(t)) + while (r != nullptr && !r->visible(t)) r = r->next(std::memory_order_seq_cst); return r; } - T* insert(tx::Transaction& t) + T *insert(tx::Transaction &t) { assert(head == nullptr); @@ -99,21 +89,27 @@ public: return v1; } - T* update(tx::Transaction& t) + T *update(tx::Transaction &t) { assert(head != nullptr); auto record = find(t); // check if we found any visible records - if(!record) - return nullptr; + if (!record) return nullptr; return update(record, t); } - T* update(T* record, tx::Transaction& t) + T *update(T *record, tx::Transaction &t) { assert(record != nullptr); + // TODO: VALIDATE NEXT IF BLOCK + if (record->tx.cre() == t.id) { + // THEN ONLY THIS TRANSACTION CAN SEE THIS DATA WHICH MENS THAT IT + // CAN CHANGE IT. + return record; + } + lock_and_validate(record, t); auto updated = new T(); @@ -128,19 +124,18 @@ public: return updated; } - bool remove(tx::Transaction& t) + bool remove(tx::Transaction &t) { assert(head != nullptr); auto record = find(t); - if(!record) - return false; + if (!record) return false; lock_and_validate(record, t); return remove(record, t), true; } - bool remove(T* record, tx::Transaction& t) + bool remove(T *record, tx::Transaction &t) { assert(record != nullptr); lock_and_validate(record, t); @@ -151,7 +146,7 @@ public: const Id id; private: - void lock_and_validate(T* record, tx::Transaction& t) + void lock_and_validate(T *record, tx::Transaction &t) { assert(record != nullptr); assert(record == find(t)); @@ -161,18 +156,16 @@ private: // if the record hasn't been deleted yet or the deleting transaction // has aborted, it's ok to modify it - if(!record->tx.exp() || !record->exp_committed(t)) - return; + if (!record->tx.exp() || !record->exp_committed(t)) return; // if it committed, then we have a serialization conflict assert(record->hints.load().exp.is_committed()); throw SerializationError(); } - std::atomic head {nullptr}; + std::atomic head{nullptr}; RecordLock lock; }; - } class Vertex; diff --git a/src/utils/option_ptr.hpp b/src/utils/option_ptr.hpp new file mode 100644 index 000000000..1c312216a --- /dev/null +++ b/src/utils/option_ptr.hpp @@ -0,0 +1,20 @@ + + +template +class OptionPtr +{ +public: + OptionPtr() {} + OptionPtr(T *ptr) : ptr(ptr) {} + + bool is_present() { return ptr != nullptr; } + + T *get() + { + assert(is_present()); + return ptr; + } + +private: + T *ptr = nullptr; +}; diff --git a/tests/unit/rh_hashmap.cpp b/tests/unit/rh_hashmap.cpp new file mode 100644 index 000000000..b8f7a5874 --- /dev/null +++ b/tests/unit/rh_hashmap.cpp @@ -0,0 +1,61 @@ +#define CATCH_CONFIG_MAIN +#include "catch.hpp" + +#include "data_structures/map/rh_hashmap.hpp" + +class Data +{ + +private: + size_t data = 0; + int key; + +public: + Data(int key) : key(key) {} + + int &get_key() { return key; } +}; + +TEST_CASE("Robin hood hashmap basic functionality") +{ + RhHashMap map; + + REQUIRE(map.size() == 0); + REQUIRE(map.insert(new Data(0))); + REQUIRE(map.size() == 1); +} + +TEST_CASE("Robin hood hashmap insert/get check") +{ + RhHashMap map; + + REQUIRE(!map.get(0).is_present()); + auto ptr0 = new Data(0); + REQUIRE(map.insert(ptr0)); + REQUIRE(map.get(0).is_present()); + REQUIRE(map.get(0).get() == ptr0); +} + +TEST_CASE("Robin hood hashmap double insert") +{ + RhHashMap map; + + REQUIRE(map.insert(new Data(0))); + REQUIRE(!map.insert(new Data(0))); +} + +TEST_CASE("Robin hood hashmap") +{ + RhHashMap map; + + for (int i = 0; i < 128; i++) { + REQUIRE(!map.get(i).is_present()); + REQUIRE(map.insert(new Data(i))); + REQUIRE(map.get(i).is_present()); + } + + for (int i = 0; i < 128; i++) { + REQUIRE(map.get(i).is_present()); + REQUIRE(map.get(i).get()->get_key() == i); + } +}