From 10e69cd3db2112856f3dce28e7af47fff107aff8 Mon Sep 17 00:00:00 2001 From: Kruno Tomola Fabro Date: Tue, 13 Sep 2016 12:14:16 +0100 Subject: [PATCH] Ready to turn astar into query. --- .../storage/model/properties/properties.hpp | 9 + .../properties/traversers/jsonwriter.hpp | 2 +- include/storage/record_accessor.hpp | 6 + include/storage/vertex.hpp | 2 +- poc/CMakeLists.txt | 18 ++ poc/add_double.cpp | 47 +++ poc/astar.cpp | 6 + poc/astar_query.cpp | 303 ++++++++++++++++++ poc/csv_import.cpp | 25 ++ 9 files changed, 416 insertions(+), 2 deletions(-) create mode 100644 poc/add_double.cpp create mode 100644 poc/astar_query.cpp create mode 100644 poc/csv_import.cpp diff --git a/include/storage/model/properties/properties.hpp b/include/storage/model/properties/properties.hpp index 39a426998..a62a1d750 100644 --- a/include/storage/model/properties/properties.hpp +++ b/include/storage/model/properties/properties.hpp @@ -63,6 +63,15 @@ public: handler.finish(); } + template + void handle(Handler &handler) const + { + for (auto &kv : props) + handler.handle(kv); + + handler.finish(); + } + template void for_all(Handler handler) const { diff --git a/include/storage/model/properties/traversers/jsonwriter.hpp b/include/storage/model/properties/traversers/jsonwriter.hpp index 69e4273e5..486e6bdf4 100644 --- a/include/storage/model/properties/traversers/jsonwriter.hpp +++ b/include/storage/model/properties/traversers/jsonwriter.hpp @@ -27,7 +27,7 @@ public: if (first) first = false; - buffer << '"' << prop.get_property_key().family_name() << "\":"; + buffer << '"' << prop.key.family_name() << "\":"; prop.accept(*this); } diff --git a/include/storage/record_accessor.hpp b/include/storage/record_accessor.hpp index fcd01bb1d..cd7193f78 100644 --- a/include/storage/record_accessor.hpp +++ b/include/storage/record_accessor.hpp @@ -121,6 +121,12 @@ public: properties().template accept(handler); } + template + void handle(Handler &handler) const + { + properties().template handle(handler); + } + Properties &properties() const { return record->data.props; } explicit operator bool() const { return record != nullptr; } diff --git a/include/storage/vertex.hpp b/include/storage/vertex.hpp index d73b4f32d..14d5b2d17 100644 --- a/include/storage/vertex.hpp +++ b/include/storage/vertex.hpp @@ -28,7 +28,7 @@ inline std::ostream &operator<<(std::ostream &stream, const Vertex &record) JsonWriter writer(buffer); // dump properties in this buffer - record.data.props.accept(writer); + record.data.props.handle(writer); writer.finish(); return stream << "Vertex" diff --git a/poc/CMakeLists.txt b/poc/CMakeLists.txt index 5f8d5ec5d..7ccc564fd 100644 --- a/poc/CMakeLists.txt +++ b/poc/CMakeLists.txt @@ -18,6 +18,24 @@ target_link_libraries(profile Threads::Threads) target_link_libraries(profile ${fmt_static_lib}) target_link_libraries(profile ${yaml_static_lib}) +add_executable(csv_import csv_import.cpp) +target_link_libraries(csv_import memgraph) +target_link_libraries(csv_import Threads::Threads) +target_link_libraries(csv_import ${fmt_static_lib}) +target_link_libraries(csv_import ${yaml_static_lib}) + +add_executable(add_double add_double.cpp) +target_link_libraries(add_double memgraph) +target_link_libraries(add_double Threads::Threads) +target_link_libraries(add_double ${fmt_static_lib}) +target_link_libraries(add_double ${yaml_static_lib}) + +add_executable(astar_query astar_query.cpp) +target_link_libraries(astar_query memgraph) +target_link_libraries(astar_query Threads::Threads) +target_link_libraries(astar_query ${fmt_static_lib}) +target_link_libraries(astar_query ${yaml_static_lib}) + add_executable(size_aligment size_aligment.cpp) target_link_libraries(size_aligment memgraph) target_link_libraries(size_aligment Threads::Threads) diff --git a/poc/add_double.cpp b/poc/add_double.cpp new file mode 100644 index 000000000..4c003ede1 --- /dev/null +++ b/poc/add_double.cpp @@ -0,0 +1,47 @@ +#include +#include + +#include "database/db.hpp" +#include "import/csv_import.hpp" +#include "logging/default.hpp" +#include "logging/streams/stdout.hpp" +#include "utils/command_line/arguments.hpp" + +using namespace std; + +// Adds double property with random value of max to all vertices. +void add_scores(Db &db, double max_value, std::string const &property_name) +{ + DbAccessor t(db); + + auto key_score = t.vertex_property_family_get(property_name) + .get(Flags::Double) + .family_key(); + + std::srand(time(0)); + t.vertex_access().fill().for_all([&](auto v) { + double value = ((std::rand() + 0.0) / RAND_MAX) * max_value; + v.set(StoredProperty(Double(value), key_score)); + }); + + t.commit(); +} + +int main(int argc, char **argv) +{ + logging::init_async(); + logging::log->pipe(std::make_unique()); + + auto para = all_arguments(argc, argv); + + std::string property_name = get_argument(para, "-pn", "score"); + double max_value = std::stod(get_argument(para, "-max", "1")); + + Db db(get_argument(para, "-db", "default")); + + add_scores(db, max_value, property_name); + + db.snap_engine.make_snapshot(); + + return 0; +} diff --git a/poc/astar.cpp b/poc/astar.cpp index 5760fa01d..6fe17e509 100644 --- a/poc/astar.cpp +++ b/poc/astar.cpp @@ -13,7 +13,10 @@ #include "database/db.hpp" #include "database/db_accessor.cpp" #include "database/db_accessor.hpp" + #include "import/csv_import.hpp" +#include "logging/default.hpp" +#include "logging/streams/stdout.hpp" #include "storage/edge_x_vertex.hpp" #include "storage/edges.cpp" #include "storage/edges.hpp" @@ -218,6 +221,9 @@ auto a_star( int main(int argc, char **argv) { + logging::init_async(); + logging::log->pipe(std::make_unique()); + auto para = all_arguments(argc, argv); Db db; diff --git a/poc/astar_query.cpp b/poc/astar_query.cpp new file mode 100644 index 000000000..b854929fa --- /dev/null +++ b/poc/astar_query.cpp @@ -0,0 +1,303 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "communication/bolt/v1/serialization/bolt_serializer.hpp" +#include "data_structures/map/rh_hashmap.hpp" +#include "database/db.hpp" +#include "database/db_accessor.cpp" +#include "database/db_accessor.hpp" +#include "import/csv_import.hpp" +#include "logging/default.hpp" +#include "logging/streams/stdout.hpp" +#include "storage/edge_x_vertex.hpp" +#include "storage/edges.cpp" +#include "storage/edges.hpp" +#include "storage/indexes/impl/nonunique_unordered_index.cpp" +#include "storage/model/properties/properties.cpp" +#include "storage/record_accessor.cpp" +#include "storage/vertex_accessor.hpp" +#include "storage/vertices.cpp" +#include "storage/vertices.hpp" +#include "utils/command_line/arguments.hpp" + +const int max_score = 1000000; + +using namespace std; +typedef VertexAccessor VertexAccessor; + +void add_scores(Db &db); + +class Node +{ +public: + Node *parent = {nullptr}; + type_key_t tkey; + double cost; + int depth = {0}; + VertexAccessor vacc; + + Node(VertexAccessor vacc, double cost, + type_key_t tkey) + : cost(cost), vacc(vacc), tkey(tkey) + { + } + Node(VertexAccessor vacc, double cost, Node *parent, + type_key_t tkey) + : cost(cost), vacc(vacc), parent(parent), depth(parent->depth + 1), + tkey(tkey) + { + } + + double sum_vertex_score() + { + auto now = this; + double sum = 0; + do { + sum += (now->vacc.at(tkey).get())->value(); + now = now->parent; + } while (now != nullptr); + return sum; + } +}; + +class Score +{ +public: + Score() : value(std::numeric_limits::max()) {} + Score(double v) : value(v) {} + double value; +}; + +void found_result(Node *res) +{ + double sum = res->sum_vertex_score(); + + std::cout << "{score: " << sum << endl; + auto bef = res; + while (bef != nullptr) { + std::cout << " " << *(bef->vacc.operator->()) << endl; + bef = bef->parent; + } +} + +double calc_heuristic_cost_dummy(type_key_t tkey, + EdgeAccessor &edge, VertexAccessor &vertex) +{ + assert(!vertex.empty()); + return 1 - vertex.at(tkey).get()->value(); +} + +typedef bool (*EdgeFilter)(DbAccessor &t, EdgeAccessor &, Node *before); +typedef bool (*VertexFilter)(DbAccessor &t, VertexAccessor &, Node *before); + +bool edge_filter_dummy(DbAccessor &t, EdgeAccessor &e, Node *before) +{ + return true; +} + +bool vertex_filter_dummy(DbAccessor &t, VertexAccessor &va, Node *before) +{ + return va.fill(); +} + +bool vertex_filter_contained_dummy(DbAccessor &t, VertexAccessor &v, + Node *before) +{ + if (v.fill()) { + bool found; + do { + found = false; + before = before->parent; + if (before == nullptr) { + return true; + } + auto it = before->vacc.out(); + for (auto e = it.next(); e.is_present(); e = it.next()) { + VertexAccessor va = e.get().to(); + if (va == v) { + found = true; + break; + } + } + } while (found); + } + return false; +} + +bool vertex_filter_contained(DbAccessor &t, VertexAccessor &v, Node *before) +{ + if (v.fill()) { + bool found; + do { + found = false; + before = before->parent; + if (before == nullptr) { + return true; + } + } while (v.in_contains(before->vacc)); + } + return false; +} + +// Vertex filter ima max_depth funkcija te edge filter ima max_depth funkcija. +// Jedan za svaku dubinu. +// Filtri vracaju true ako element zadovoljava uvjete. +auto a_star( + Db &db, int64_t sys_id_start, uint max_depth, EdgeFilter e_filter[], + VertexFilter v_filter[], + double (*calc_heuristic_cost)(type_key_t tkey, + EdgeAccessor &edge, VertexAccessor &vertex), + int limit) +{ + DbAccessor t(db); + type_key_t tkey = + t.vertex_property_family_get("score") + .get(Flags::Double) + .type_key(); + + auto best_found = new std::map[max_depth]; + + std::vector best; + auto cmp = [](Node *left, Node *right) { return left->cost > right->cost; }; + std::priority_queue, decltype(cmp)> queue(cmp); + + auto start_vr = t.vertex_find(sys_id_start); + assert(start_vr); + start_vr.get().fill(); + Node *start = new Node(start_vr.take(), 0, tkey); + queue.push(start); + int count = 0; + do { + auto now = queue.top(); + queue.pop(); + // if(!visited.insert(now)){ + // continue; + // } + + if (max_depth <= now->depth) { + best.push_back(now); + count++; + if (count >= limit) { + return best; + } + continue; + } + + // { // FOUND FILTER + // Score &bef = best_found[now->depth][now->vacc.id()]; + // if (bef.value <= now->cost) { + // continue; + // } + // bef.value = now->cost; + // } + + iter::for_all(now->vacc.out(), [&](auto edge) { + if (e_filter[now->depth](t, edge, now)) { + VertexAccessor va = edge.to(); + if (v_filter[now->depth](t, va, now)) { + auto cost = calc_heuristic_cost(tkey, edge, va); + Node *n = new Node(va, now->cost + cost, now, tkey); + queue.push(n); + } + } + }); + } while (!queue.empty()); + + // TODO: GUBI SE MEMORIJA JER SE NODOVI NEBRISU + + t.commit(); + return best; +} + +int main(int argc, char **argv) +{ + logging::init_async(); + logging::log->pipe(std::make_unique()); + + auto para = all_arguments(argc, argv); + + Db db("astar"); + + EdgeFilter e_filters[] = {&edge_filter_dummy, &edge_filter_dummy, + &edge_filter_dummy, &edge_filter_dummy}; + VertexFilter f_filters[] = { + &vertex_filter_contained, &vertex_filter_contained, + &vertex_filter_contained, &vertex_filter_contained}; + + // CONF + std::srand(time(0)); + auto best_n = 10; + auto bench_n = 1000; + auto best_print_n = 10; + bool pick_best_found = + strcmp(get_argument(para, "-p", "true").c_str(), "true") == 0; + + double sum = 0; + std::vector best; + for (int i = 0; i < bench_n; i++) { + auto start_vertex_index = + std::rand() % db.graph.vertices.access().size(); + + auto begin = clock(); + auto found = a_star(db, start_vertex_index, 3, e_filters, f_filters, + &calc_heuristic_cost_dummy, best_n); + clock_t end = clock(); + + double elapsed_ms = (double(end - begin) / CLOCKS_PER_SEC) * 1000; + sum += elapsed_ms; + + if ((best.size() < best_print_n && found.size() > best.size()) || + (pick_best_found && found.size() > 0 && + found.front()->sum_vertex_score() > + best.front()->sum_vertex_score())) { + best = found; + } + + // Just to be safe + if (i + 1 == bench_n && best.size() == 0) { + bench_n++; + } + } + + std::cout << "\nSearch for best " << best_n + << " results has runing time of:\n avg: " << sum / bench_n + << " [ms]\n"; + std::cout << "\nExample of best result:\n"; + for (int i = 0; i < best_print_n && best.size() > 0; i++) { + found_result(best.front()); + best.erase(best.begin()); + } + + return 0; +} + +// Adds property score to all vertices. +void add_scores(Db &db) +{ + DbAccessor t(db); + + auto key_score = + t.vertex_property_family_get("score").get(Flags::Double).family_key(); + + int i = 1; + iter::for_all(t.vertex_access(), [&](auto v) { + if (v.fill()) { + // from Kruno's head :) (could be ALMOST anything else) + std::srand(i ^ 0x7482616); + v.set(StoredProperty( + Double((std::rand() % max_score) / (max_score + 0.0)), + key_score)); + i++; + } + }); + + t.commit(); +} diff --git a/poc/csv_import.cpp b/poc/csv_import.cpp new file mode 100644 index 000000000..b40f6a7a3 --- /dev/null +++ b/poc/csv_import.cpp @@ -0,0 +1,25 @@ +#include "database/db.hpp" +#include "import/csv_import.hpp" +#include "logging/default.hpp" +#include "logging/streams/stdout.hpp" +#include "utils/command_line/arguments.hpp" + +using namespace std; + +// Tool for importing csv to make snapshot of the database after import. +// Accepts flags for csv import. +// -db name # will create database with that name. +int main(int argc, char **argv) +{ + logging::init_async(); + logging::log->pipe(std::make_unique()); + + auto para = all_arguments(argc, argv); + Db db(get_argument(para, "-db", "default")); + + import_csv_from_arguments(db, para); + + db.snap_engine.make_snapshot(); + + return 0; +}