Added RobinHood HashMap and unit tested.

This commit is contained in:
Kruno Tomola Fabro 2016-08-09 11:53:10 +01:00
parent 95aa385e31
commit 3016494669
5 changed files with 574 additions and 34 deletions

View File

@ -1,6 +1,327 @@
#include <fstream>
#include <iostream>
#include <queue>
#include <regex>
#include <sstream>
#include <string>
#include <vector>
#include "database/db.hpp"
using namespace std;
void load_graph_dummy(Db &db);
void load_csv(Db &db, char *file_path, char *edge_file_path);
class Node
{
public:
Node *parent = {nullptr};
double cost;
int depth = {0};
Vertex *vertex;
Node(Vertex *va, double cost) : cost(cost), vertex(va) {}
Node(Vertex *va, double cost, Node *parent)
: cost(cost), vertex(va), parent(parent), depth(parent->depth + 1)
{
}
};
// class Iterator : public Crtp<Iterator>
// {
// public:
// Vertex *operator*()
// {
// assert(head != nullptr);
// return head->vertex;
// }
//
// Vertex *operator->()
// {
// assert(head != nullptr);
// return head->vertex;
// }
//
// Iterator &operator++()
// {
// assert(head != nullptr);
// head = head->parent;
// return this->derived();
// }
//
// Iterator &operator++(int) { return operator++(); }
//
// friend bool operator==(const Iterator &a, const Iterator &b)
// {
// return a.head == b.head;
// }
//
// friend bool operator!=(const Iterator &a, const Iterator &b)
// {
// return !(a == b);
// }
//
// Iterator end() { return Iterator(); }
//
// private:
// Node *head;
// };
void found_result(Node *bef)
{
std::cout << "{score: " << bef->cost << endl;
while (bef != nullptr) {
std::cout << " " << *(bef->vertex) << endl;
bef = bef->parent;
}
}
double calc_heuristic_cost_dummy(Edge *edge, Vertex *vertex)
{
return 1 - vertex->data.props.at("score").as<Double>().value;
}
typedef bool (*EdgeFilter)(tx::Transaction &t, Edge *, Node *before);
typedef bool (*VertexFilter)(tx::Transaction &t, Vertex *, Node *before);
bool edge_filter_dummy(tx::Transaction &t, Edge *e, Node *before)
{
return true;
}
bool vertex_filter_dummy(tx::Transaction &t, Vertex *v, Node *before)
{
return true;
}
bool vertex_filter_contained_dummy(tx::Transaction &t, Vertex *v, Node *before)
{
bool found;
do {
found = false;
before = before->parent;
if (before == nullptr) {
return true;
}
for (auto e : before->vertex->data.out) {
Edge *edge = e->find(t);
Vertex *e_v = edge->data.to->find(t);
if (e_v == v) {
found = true;
break;
}
}
} while (found);
return false;
}
// Vertex filter ima max_depth funkcija te edge filter ima max_depth funkcija.
// Jedan za svaku dubinu.
// Filtri vracaju true ako element zadovoljava uvjete.
void a_star(Db &db, int64_t sys_id_start, uint max_depth, EdgeFilter e_filter[],
VertexFilter v_filter[],
double (*calc_heuristic_cost)(Edge *edge, Vertex *vertex),
int limit)
{
auto &t = db.tx_engine.begin();
auto cmp = [](Node *left, Node *right) { return left->cost > right->cost; };
std::priority_queue<Node *, std::vector<Node *>, decltype(cmp)> queue(cmp);
Node *start =
new Node(db.graph.vertices.find(t, sys_id_start).vlist->find(t), 0);
queue.push(start);
int count = 0;
do {
auto now = queue.top();
queue.pop();
if (max_depth <= now->depth) {
found_result(now);
count++;
if (count >= limit) {
return;
}
continue;
}
for (auto e : now->vertex->data.out) {
Edge *edge = e->find(t);
if (e_filter[now->depth](t, edge, now)) {
Vertex *v = edge->data.to->find(t);
if (v_filter[now->depth](t, v, now)) {
Node *n = new Node(
v, now->cost + calc_heuristic_cost(edge, v), now);
queue.push(n);
}
}
}
} while (!queue.empty());
// GUBI SE MEMORIJA JER SE NODOVI NEBRISU
t.commit();
}
int main()
{
Db db;
load_csv(db, "neo4j_nodes_export_2000.csv", "neo4j_edges_export_2000.csv");
//
// load_graph_dummy(db);
//
EdgeFilter e_filters[] = {&edge_filter_dummy, &edge_filter_dummy,
&edge_filter_dummy, &edge_filter_dummy};
VertexFilter f_filters[] = {&vertex_filter_dummy, &vertex_filter_dummy,
&vertex_filter_dummy, &vertex_filter_dummy};
a_star(db, 0, 3, e_filters, f_filters, &calc_heuristic_cost_dummy, 10);
return 0;
}
void split(const string &s, char delim, vector<string> &elems)
{
stringstream ss(s);
string item;
while (getline(ss, item, delim)) {
elems.push_back(item);
}
}
vector<string> split(const string &s, char delim)
{
vector<string> elems;
split(s, delim, elems);
return elems;
}
void load_csv(Db &db, char *file_path, char *edge_file_path)
{
std::fstream file(file_path);
std::fstream e_file(edge_file_path);
std::string line;
auto &t = db.tx_engine.begin();
int max_score = 1000000;
// VERTEX import
int start_vertex_id = -1;
auto v = [&](auto id, auto labels, auto gar_id, auto cat_id) {
if (start_vertex_id < 0) {
start_vertex_id = id;
}
auto vertex_accessor = db.graph.vertices.insert(t);
vertex_accessor.property("id", std::make_shared<Int32>(id));
vertex_accessor.property("garment_id", std::make_shared<Int32>(gar_id));
vertex_accessor.property("garment_category_id",
std::make_shared<Int32>(cat_id));
std::srand(id ^ 0x7482616);
vertex_accessor.property(
"score", std::make_shared<Double>((std::rand() % max_score) /
(max_score + 0.0)));
for (auto l_name : labels) {
auto &label = db.graph.label_store.find_or_create(l_name);
vertex_accessor.add_label(label);
}
return vertex_accessor.id();
};
std::getline(file, line);
vector<Vertex::Accessor> va;
int v_count = 0;
while (std::getline(file, line)) {
v_count++;
line.erase(std::remove(line.begin(), line.end(), '['), line.end());
line.erase(std::remove(line.begin(), line.end(), ']'), line.end());
line.erase(std::remove(line.begin(), line.end(), '\"'), line.end());
line.erase(std::remove(line.begin(), line.end(), ' '), line.end());
auto splited = split(line, ',');
vector<string> labels(splited.begin() + 1,
splited.begin() + splited.size() - 2);
auto id = v(stoi(splited[0]), labels, stoi(splited[splited.size() - 2]),
stoi(splited[splited.size() - 1]));
assert(va.size() == (uint64_t)id);
va.push_back(db.graph.vertices.find(t, id));
}
// EDGE IMPORT
auto e = [&](auto from, auto type, auto to) {
auto v1 = va[from - start_vertex_id];
auto v2 = va[to - start_vertex_id];
auto edge_accessor = db.graph.edges.insert(t);
v1.vlist->update(t)->data.out.add(edge_accessor.vlist);
v2.vlist->update(t)->data.in.add(edge_accessor.vlist);
edge_accessor.from(v1.vlist);
edge_accessor.to(v2.vlist);
auto &edge_type = db.graph.edge_type_store.find_or_create(type);
edge_accessor.edge_type(edge_type);
};
std::getline(e_file, line);
long count = 0;
while (std::getline(e_file, line)) {
auto splited = split(line, ',');
count++;
e(stoi(splited[2]), splited[1], stoi(splited[3]));
}
cout << "Loaded:\n Vertices: " << v_count << "\n Edges: " << count
<< endl;
t.commit();
}
void load_graph_dummy(Db &db)
{
auto &t = db.tx_engine.begin();
auto v = [&](auto id, auto score) {
auto vertex_accessor = db.graph.vertices.insert(t);
vertex_accessor.property("id", std::make_shared<Int32>(id));
vertex_accessor.property("score", std::make_shared<Double>(score));
return vertex_accessor.id();
};
Id va[] = {
v(0, 0.5), v(1, 1), v(2, 0.3), v(3, 0.15), v(4, 0.8), v(5, 0.8),
};
auto e = [&](auto from, auto type, auto to) {
auto v1 = db.graph.vertices.find(t, va[from]);
auto v2 = db.graph.vertices.find(t, va[to]);
auto edge_accessor = db.graph.edges.insert(t);
v1.vlist->update(t)->data.out.add(edge_accessor.vlist);
v2.vlist->update(t)->data.in.add(edge_accessor.vlist);
edge_accessor.from(v1.vlist);
edge_accessor.to(v2.vlist);
auto &edge_type = db.graph.edge_type_store.find_or_create(type);
edge_accessor.edge_type(edge_type);
};
e(0, "ok", 3);
e(0, "ok", 2);
e(0, "ok", 4);
e(1, "ok", 3);
e(2, "ok", 1);
e(2, "ok", 4);
e(3, "ok", 4);
e(3, "ok", 5);
e(4, "ok", 0);
e(4, "ok", 1);
e(5, "ok", 2);
t.commit();
}

View File

@ -0,0 +1,145 @@
#include "utils/option_ptr.hpp"
// HashMap with RobinHood collision resolution policy.
// Single threaded.
// Entrys are saved as pointers alligned to 8B.
// Entrys must know thers key.
// D must have method K& get_key()
// K must be comparable with ==.
// HashMap behaves as if it isn't owner of entrys.
template <class K, class D, size_t init_size_pow2 = 2>
class RhHashMap
{
private:
class Combined
{
public:
Combined() : data(0) {}
Combined(D *data, size_t off)
{
assert((data & 0x7) == 0 && off < 8);
this->data = ((size_t)data) | off;
}
bool valid() { return data != 0; }
size_t off() { return data & 0x7; }
D *ptr() { return (D *)(data & (~(0x7))); }
private:
size_t data;
};
void init_array(size_t size)
{
size_t bytes = sizeof(Combined) * size;
array = (Combined *)malloc(bytes);
memset(array, 0, bytes);
capacity = size;
}
void increase_size()
{
if (capacity == 0) {
assert(array == nullptr && count == 0);
size_t new_size = 1 << init_size_pow2;
init_array(new_size);
return;
}
size_t new_size = capacity * 2;
size_t old_size = capacity;
auto a = array;
init_array(new_size);
count = 0;
for (int i = 0; i < old_size; i++) {
if (a[i].valid()) {
insert(a[i].ptr());
}
}
delete[] a;
}
public:
RhHashMap() {}
OptionPtr<D> get(const K &key)
{
size_t mask = this->mask();
size_t now = index(key, mask);
size_t off = 0;
size_t border = 8 <= capacity ? 8 : capacity;
while (off < border) {
Combined other = array[now];
if (other.valid()) {
auto other_off = other.off();
if (other_off == off && key == other.ptr()->get_key()) {
return OptionPtr<D>(other.ptr());
} else if (other_off < off) { // Other is rich
break;
} // Else other has equal or greater offset, so he is poor.
} else {
break;
}
off++;
now = (now + 1) & mask;
}
return OptionPtr<D>();
}
// Inserts element. Returns true if element wasn't in the map.
bool insert(D *data)
{
size_t mask = this->mask();
auto key = data->get_key();
size_t now = index(key, mask);
size_t off = 0;
size_t border = 8 <= capacity ? 8 : capacity;
while (off < border) {
Combined other = array[now];
if (other.valid()) {
auto other_off = other.off();
if (other_off == off && key == other.ptr()->get_key()) {
return false;
} else if (other_off < off) { // Other is rich
array[now] = Combined(data, off);
// Hacked reusing of function
data = other.ptr();
key = data->get_key();
off = other_off;
} // Else other has equal or greater offset, so he is poor.
} else {
array[now] = Combined(data, off);
count++;
return true;
}
off++;
now = (now + 1) & mask;
}
increase_size();
return insert(data);
}
size_t size() { return count; }
private:
size_t index(const K &key, size_t mask)
{
return std::hash<K>()(key) & mask;
}
size_t mask() { return capacity - 1; }
Combined *array = nullptr;
size_t capacity = 0;
size_t count = 0;
};

View File

@ -20,32 +20,28 @@ public:
using item_t = T;
VersionList(Id id) : id(id) {}
VersionList(const VersionList&) = delete;
VersionList(const VersionList &) = delete;
/* @brief Move constructs the version list
* Note: use only at the beginning of the "other's" lifecycle since this
* constructor doesn't move the RecordLock, but only the head pointer
*/
VersionList(VersionList&& other) : id(other.id)
VersionList(VersionList &&other) : id(other.id)
{
this->head = other.head.load();
other.head = nullptr;
}
~VersionList()
{
delete head.load();
}
~VersionList() { delete head.load(); }
friend std::ostream& operator<<(std::ostream& stream,
const VersionList<T>& vlist)
friend std::ostream &operator<<(std::ostream &stream,
const VersionList<T> &vlist)
{
stream << "VersionList" << std::endl;
auto record = vlist.head.load();
while(record != nullptr)
{
while (record != nullptr) {
stream << "-- " << *record << std::endl;
record = record->next();
}
@ -53,17 +49,11 @@ public:
return stream;
}
auto gc_lock_acquire()
{
return std::unique_lock<RecordLock>(lock);
}
auto gc_lock_acquire() { return std::unique_lock<RecordLock>(lock); }
void vacuum()
{
void vacuum() {}
}
T* find(const tx::Transaction& t) const
T *find(const tx::Transaction &t) const
{
auto r = head.load(std::memory_order_seq_cst);
@ -77,13 +67,13 @@ public:
// | | Jump backwards until you find a first visible
// [VerList] ----+ version, or you reach the end of the list
//
while(r != nullptr && !r->visible(t))
while (r != nullptr && !r->visible(t))
r = r->next(std::memory_order_seq_cst);
return r;
}
T* insert(tx::Transaction& t)
T *insert(tx::Transaction &t)
{
assert(head == nullptr);
@ -99,21 +89,27 @@ public:
return v1;
}
T* update(tx::Transaction& t)
T *update(tx::Transaction &t)
{
assert(head != nullptr);
auto record = find(t);
// check if we found any visible records
if(!record)
return nullptr;
if (!record) return nullptr;
return update(record, t);
}
T* update(T* record, tx::Transaction& t)
T *update(T *record, tx::Transaction &t)
{
assert(record != nullptr);
// TODO: VALIDATE NEXT IF BLOCK
if (record->tx.cre() == t.id) {
// THEN ONLY THIS TRANSACTION CAN SEE THIS DATA WHICH MENS THAT IT
// CAN CHANGE IT.
return record;
}
lock_and_validate(record, t);
auto updated = new T();
@ -128,19 +124,18 @@ public:
return updated;
}
bool remove(tx::Transaction& t)
bool remove(tx::Transaction &t)
{
assert(head != nullptr);
auto record = find(t);
if(!record)
return false;
if (!record) return false;
lock_and_validate(record, t);
return remove(record, t), true;
}
bool remove(T* record, tx::Transaction& t)
bool remove(T *record, tx::Transaction &t)
{
assert(record != nullptr);
lock_and_validate(record, t);
@ -151,7 +146,7 @@ public:
const Id id;
private:
void lock_and_validate(T* record, tx::Transaction& t)
void lock_and_validate(T *record, tx::Transaction &t)
{
assert(record != nullptr);
assert(record == find(t));
@ -161,18 +156,16 @@ private:
// if the record hasn't been deleted yet or the deleting transaction
// has aborted, it's ok to modify it
if(!record->tx.exp() || !record->exp_committed(t))
return;
if (!record->tx.exp() || !record->exp_committed(t)) return;
// if it committed, then we have a serialization conflict
assert(record->hints.load().exp.is_committed());
throw SerializationError();
}
std::atomic<T*> head {nullptr};
std::atomic<T *> head{nullptr};
RecordLock lock;
};
}
class Vertex;

20
src/utils/option_ptr.hpp Normal file
View File

@ -0,0 +1,20 @@
template <class T>
class OptionPtr
{
public:
OptionPtr() {}
OptionPtr(T *ptr) : ptr(ptr) {}
bool is_present() { return ptr != nullptr; }
T *get()
{
assert(is_present());
return ptr;
}
private:
T *ptr = nullptr;
};

61
tests/unit/rh_hashmap.cpp Normal file
View File

@ -0,0 +1,61 @@
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#include "data_structures/map/rh_hashmap.hpp"
class Data
{
private:
size_t data = 0;
int key;
public:
Data(int key) : key(key) {}
int &get_key() { return key; }
};
TEST_CASE("Robin hood hashmap basic functionality")
{
RhHashMap<int, Data> map;
REQUIRE(map.size() == 0);
REQUIRE(map.insert(new Data(0)));
REQUIRE(map.size() == 1);
}
TEST_CASE("Robin hood hashmap insert/get check")
{
RhHashMap<int, Data> map;
REQUIRE(!map.get(0).is_present());
auto ptr0 = new Data(0);
REQUIRE(map.insert(ptr0));
REQUIRE(map.get(0).is_present());
REQUIRE(map.get(0).get() == ptr0);
}
TEST_CASE("Robin hood hashmap double insert")
{
RhHashMap<int, Data> map;
REQUIRE(map.insert(new Data(0)));
REQUIRE(!map.insert(new Data(0)));
}
TEST_CASE("Robin hood hashmap")
{
RhHashMap<int, Data> map;
for (int i = 0; i < 128; i++) {
REQUIRE(!map.get(i).is_present());
REQUIRE(map.insert(new Data(i)));
REQUIRE(map.get(i).is_present());
}
for (int i = 0; i < 128; i++) {
REQUIRE(map.get(i).is_present());
REQUIRE(map.get(i).get()->get_key() == i);
}
}