Extracted import tool for csv to file with CSVImporter.
Astar now uses csv import tool.
This commit is contained in:
parent
fbd9ca8420
commit
85dbf1bd86
@ -465,10 +465,6 @@ if (POC)
|
||||
add_subdirectory(poc)
|
||||
endif()
|
||||
|
||||
# proof of concepts
|
||||
if (TOOLS)
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
|
||||
# memgraph build name
|
||||
execute_process(
|
||||
|
@ -6,3 +6,10 @@ add_executable(poc_astar astar.cpp)
|
||||
target_link_libraries(poc_astar memgraph)
|
||||
target_link_libraries(poc_astar Threads::Threads)
|
||||
target_link_libraries(poc_astar ${fmt_static_lib})
|
||||
|
||||
project(memgraph_tools)
|
||||
|
||||
add_executable(import_tool tool.cpp)
|
||||
target_link_libraries(import_tool memgraph)
|
||||
target_link_libraries(import_tool Threads::Threads)
|
||||
target_link_libraries(import_tool ${fmt_static_lib})
|
||||
|
257
poc/astar.cpp
257
poc/astar.cpp
@ -1,4 +1,5 @@
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
@ -10,7 +11,9 @@
|
||||
|
||||
#include "data_structures/map/rh_hashmap.hpp"
|
||||
#include "database/db.hpp"
|
||||
#include "database/db_accessor.cpp"
|
||||
#include "database/db_accessor.hpp"
|
||||
#include "import/csv_import.hpp"
|
||||
#include "storage/edges.cpp"
|
||||
#include "storage/edges.hpp"
|
||||
#include "storage/indexes/impl/nonunique_unordered_index.cpp"
|
||||
@ -20,11 +23,14 @@
|
||||
#include "storage/vertex_accessor.hpp"
|
||||
#include "storage/vertices.cpp"
|
||||
#include "storage/vertices.hpp"
|
||||
#include "utils/command_line/arguments.hpp"
|
||||
|
||||
const int max_score = 1000000;
|
||||
|
||||
using namespace std;
|
||||
typedef Vertex::Accessor VertexAccessor;
|
||||
void load_graph_dummy(Db &db);
|
||||
int load_csv(Db &db, char *file_path, char *edge_file_path);
|
||||
|
||||
void add_scores(Db &db);
|
||||
|
||||
class Node
|
||||
{
|
||||
@ -66,46 +72,6 @@ public:
|
||||
double value;
|
||||
};
|
||||
|
||||
// class Iterator : public Crtp<Iterator>
|
||||
// {
|
||||
// public:
|
||||
// Vertex *operator*()
|
||||
// {
|
||||
// assert(head != nullptr);
|
||||
// return head->vertex;
|
||||
// }
|
||||
//
|
||||
// Vertex *operator->()
|
||||
// {
|
||||
// assert(head != nullptr);
|
||||
// return head->vertex;
|
||||
// }
|
||||
//
|
||||
// Iterator &operator++()
|
||||
// {
|
||||
// assert(head != nullptr);
|
||||
// head = head->parent;
|
||||
// return this->derived();
|
||||
// }
|
||||
//
|
||||
// Iterator &operator++(int) { return operator++(); }
|
||||
//
|
||||
// friend bool operator==(const Iterator &a, const Iterator &b)
|
||||
// {
|
||||
// return a.head == b.head;
|
||||
// }
|
||||
//
|
||||
// friend bool operator!=(const Iterator &a, const Iterator &b)
|
||||
// {
|
||||
// return !(a == b);
|
||||
// }
|
||||
//
|
||||
// Iterator end() { return Iterator(); }
|
||||
//
|
||||
// private:
|
||||
// Node *head;
|
||||
// };
|
||||
|
||||
void found_result(Node *res)
|
||||
{
|
||||
double sum = res->sum_vertex_score();
|
||||
@ -239,38 +205,20 @@ auto a_star(Db &db, int64_t sys_id_start, uint max_depth, EdgeFilter e_filter[],
|
||||
}
|
||||
});
|
||||
} while (!queue.empty());
|
||||
// std::cout << "Found: " << count << " resoults\n";
|
||||
|
||||
// TODO: GUBI SE MEMORIJA JER SE NODOVI NEBRISU
|
||||
|
||||
t.commit();
|
||||
return best;
|
||||
}
|
||||
|
||||
// class Data
|
||||
// {
|
||||
//
|
||||
// private:
|
||||
// size_t data = 0;
|
||||
// int key;
|
||||
//
|
||||
// public:
|
||||
// Data(int key) : key(key) {}
|
||||
//
|
||||
// const int &get_key() { return key; }
|
||||
// };
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc < 3) {
|
||||
std::cout << "Not enough input values\n";
|
||||
return 0;
|
||||
} else if (argc > 4) {
|
||||
std::cout << "To much input values\n";
|
||||
return 0;
|
||||
}
|
||||
auto para = all_arguments(argc, argv);
|
||||
|
||||
Db db;
|
||||
auto vertex_no = load_csv(db, argv[argc - 2], argv[argc - 1]);
|
||||
auto loaded = import_csv_from_arguments(db, para);
|
||||
add_scores(db);
|
||||
|
||||
EdgeFilter e_filters[] = {&edge_filter_dummy, &edge_filter_dummy,
|
||||
&edge_filter_dummy, &edge_filter_dummy};
|
||||
@ -283,12 +231,13 @@ int main(int argc, char **argv)
|
||||
auto best_n = 10;
|
||||
auto bench_n = 1000;
|
||||
auto best_print_n = 10;
|
||||
bool pick_best_found = argc > 3 ? true : false;
|
||||
bool pick_best_found =
|
||||
strcmp(get_argument(para, "-p", "true").c_str(), "true") == 0;
|
||||
|
||||
double sum = 0;
|
||||
std::vector<Node *> best;
|
||||
for (int i = 0; i < bench_n; i++) {
|
||||
auto start_vertex_index = std::rand() % vertex_no;
|
||||
auto start_vertex_index = std::rand() % loaded.first;
|
||||
|
||||
auto begin = clock();
|
||||
auto found = a_star(db, start_vertex_index, 3, e_filters, f_filters,
|
||||
@ -320,178 +269,28 @@ int main(int argc, char **argv)
|
||||
best.erase(best.begin());
|
||||
}
|
||||
|
||||
// RhHashMultiMap benchmark
|
||||
// const int n_pow2 = 20;
|
||||
// int n = 1 << n_pow2;
|
||||
// RhHashMultiMap<int, Data, n_pow2 + 1> map;
|
||||
// std::srand(time(0));
|
||||
// auto data = std::vector<Data *>();
|
||||
// for (int i = 0; i < n; i++) {
|
||||
// data.push_back(new Data(std::rand()));
|
||||
// }
|
||||
//
|
||||
// begin = clock();
|
||||
// for (auto e : data) {
|
||||
// map.add(e);
|
||||
// }
|
||||
// end = clock();
|
||||
// elapsed_ms = (double(end - begin) / CLOCKS_PER_SEC) * 1000;
|
||||
// std::cout << "Map: " << elapsed_ms << " [ms]\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void split(const string &s, char delim, vector<string> &elems)
|
||||
// Adds property score to all vertices.
|
||||
void add_scores(Db &db)
|
||||
{
|
||||
stringstream ss(s);
|
||||
string item;
|
||||
while (getline(ss, item, delim)) {
|
||||
elems.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
vector<string> split(const string &s, char delim)
|
||||
{
|
||||
vector<string> elems;
|
||||
split(s, delim, elems);
|
||||
return elems;
|
||||
}
|
||||
|
||||
int load_csv(Db &db, char *file_path, char *edge_file_path)
|
||||
{
|
||||
std::fstream file(file_path);
|
||||
std::fstream e_file(edge_file_path);
|
||||
|
||||
std::string line;
|
||||
|
||||
DbAccessor t(db);
|
||||
auto key_id =
|
||||
t.vertex_property_family_get("id").get(Flags::Int32).family_key();
|
||||
auto key_garment_id = t.vertex_property_family_get("garment_id")
|
||||
.get(Flags::Int32)
|
||||
.family_key();
|
||||
auto key_garment_category_id =
|
||||
t.vertex_property_family_get("garment_category_id")
|
||||
.get(Flags::Int32)
|
||||
.family_key();
|
||||
|
||||
auto key_score =
|
||||
t.vertex_property_family_get("score").get(Flags::Double).family_key();
|
||||
|
||||
int max_score = 1000000;
|
||||
|
||||
// VERTEX import
|
||||
int start_vertex_id = -1;
|
||||
auto v = [&](auto id, auto labels, auto gar_id, auto cat_id) {
|
||||
if (start_vertex_id < 0) {
|
||||
start_vertex_id = id;
|
||||
int i = 1;
|
||||
iter::for_all(t.vertex_access(), [&](auto v) {
|
||||
if (v.fill()) {
|
||||
// from Kruno's head :) (could be ALMOST anything else)
|
||||
std::srand(i ^ 0x7482616);
|
||||
v.set(key_score,
|
||||
std::make_shared<Double>((std::rand() % max_score) /
|
||||
(max_score + 0.0)));
|
||||
i++;
|
||||
}
|
||||
|
||||
auto vertex_accessor = t.vertex_insert();
|
||||
vertex_accessor.set(key_id, std::make_shared<Int32>(id));
|
||||
vertex_accessor.set(key_garment_id, std::make_shared<Int32>(gar_id));
|
||||
vertex_accessor.set(key_garment_category_id,
|
||||
std::make_shared<Int32>(cat_id));
|
||||
// from Kruno's head :) (could be ALMOST anything else)
|
||||
std::srand(id ^ 0x7482616);
|
||||
vertex_accessor.set(key_score,
|
||||
std::make_shared<Double>((std::rand() % max_score) /
|
||||
(max_score + 0.0)));
|
||||
|
||||
for (auto l_name : labels) {
|
||||
auto &label = t.label_find_or_create(l_name);
|
||||
vertex_accessor.add_label(label);
|
||||
}
|
||||
|
||||
return vertex_accessor;
|
||||
};
|
||||
|
||||
// Skip header
|
||||
std::getline(file, line);
|
||||
|
||||
vector<Vertex::Accessor> va;
|
||||
int v_count = 0;
|
||||
while (std::getline(file, line)) {
|
||||
v_count++;
|
||||
line.erase(std::remove(line.begin(), line.end(), '['), line.end());
|
||||
line.erase(std::remove(line.begin(), line.end(), ']'), line.end());
|
||||
line.erase(std::remove(line.begin(), line.end(), '\"'), line.end());
|
||||
line.erase(std::remove(line.begin(), line.end(), ' '), line.end());
|
||||
auto splited = split(line, ',');
|
||||
vector<string> labels(splited.begin() + 1,
|
||||
splited.begin() + splited.size() - 2);
|
||||
auto vacs =
|
||||
v(stoi(splited[0]), labels, stoi(splited[splited.size() - 2]),
|
||||
stoi(splited[splited.size() - 1]));
|
||||
|
||||
assert(va.size() == (uint64_t)vacs.id());
|
||||
va.push_back(vacs);
|
||||
}
|
||||
|
||||
// EDGE IMPORT
|
||||
auto e = [&](auto from, auto type, auto to) {
|
||||
auto v1 = va[from - start_vertex_id];
|
||||
|
||||
auto v2 = va[to - start_vertex_id];
|
||||
|
||||
auto edge_accessor = t.edge_insert(v1, v2);
|
||||
|
||||
auto &edge_type = t.type_find_or_create(type);
|
||||
edge_accessor.edge_type(edge_type);
|
||||
};
|
||||
|
||||
std::getline(e_file, line);
|
||||
long count = 0;
|
||||
while (std::getline(e_file, line)) {
|
||||
auto splited = split(line, ',');
|
||||
count++;
|
||||
e(stoi(splited[2]), splited[1], stoi(splited[3]));
|
||||
}
|
||||
|
||||
cout << "Loaded:\n Vertices: " << v_count << "\n Edges: " << count
|
||||
<< endl;
|
||||
|
||||
t.commit();
|
||||
return v_count;
|
||||
}
|
||||
|
||||
void load_graph_dummy(Db &db)
|
||||
{
|
||||
DbAccessor t(db);
|
||||
|
||||
// TODO: update code
|
||||
// auto v = [&](auto id, auto score) {
|
||||
// auto vertex_accessor = t.vertex_insert();
|
||||
// vertex_accessor.property("id", std::make_shared<Int32>(id));
|
||||
// vertex_accessor.property("score", std::make_shared<Double>(score));
|
||||
// return vertex_accessor.id();
|
||||
// };
|
||||
//
|
||||
// Id va[] = {
|
||||
// v(0, 0.5), v(1, 1), v(2, 0.3), v(3, 0.15), v(4, 0.8), v(5, 0.8),
|
||||
// };
|
||||
//
|
||||
// auto e = [&](auto from, auto type, auto to) {
|
||||
// auto v1 = t.vertex_find(va[from]);
|
||||
//
|
||||
// auto v2 = t.vertex_find(va[to]);
|
||||
//
|
||||
// auto edge_accessor = t.edge_insert(v1.get(), v2.get());
|
||||
//
|
||||
// auto &edge_type = t.type_find_or_create(type);
|
||||
// edge_accessor.edge_type(edge_type);
|
||||
// };
|
||||
//
|
||||
// e(0, "ok", 3);
|
||||
// e(0, "ok", 2);
|
||||
// e(0, "ok", 4);
|
||||
// e(1, "ok", 3);
|
||||
// e(2, "ok", 1);
|
||||
// e(2, "ok", 4);
|
||||
// e(3, "ok", 4);
|
||||
// e(3, "ok", 5);
|
||||
// e(4, "ok", 0);
|
||||
// e(4, "ok", 1);
|
||||
// e(5, "ok", 2);
|
||||
});
|
||||
|
||||
t.commit();
|
||||
}
|
||||
|
@ -16,6 +16,9 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
// TODO: Turn next template, expand on it, standardize it, and use it for query
|
||||
// generation.
|
||||
|
||||
template <class C>
|
||||
void fill_to_fill(Edge::Accessor &e, const EdgeType &type, C &&consumer)
|
||||
{
|
||||
@ -141,39 +144,6 @@ void find_fill(I iter, C &&consumer)
|
||||
}
|
||||
}
|
||||
|
||||
void load(DbAccessor &t, vector<string> ¶)
|
||||
{
|
||||
// DbAccessor t(db);
|
||||
CSVImporter imp(t, cerr);
|
||||
|
||||
imp.parts_mark = get_argument(para, "-d", ",")[0];
|
||||
imp.parts_array_mark = get_argument(para, "-ad", ",")[0];
|
||||
imp.warning =
|
||||
strcasecmp(get_argument(para, "-w", "true").c_str(), "true") == 0;
|
||||
imp.error =
|
||||
strcasecmp(get_argument(para, "-err", "true").c_str(), "true") == 0;
|
||||
|
||||
// IMPORT VERTICES
|
||||
auto o = take_argument(para, "-v");
|
||||
while (o.is_present()) {
|
||||
std::fstream file(o.get());
|
||||
// cout << "Importing vertices from file: " << o.get() << endl;
|
||||
auto n = imp.import_vertices(file);
|
||||
cout << "Loaded " << n << " vertices from " << o.get() << endl;
|
||||
o = take_argument(para, "-v");
|
||||
}
|
||||
|
||||
// IMPORT EDGES
|
||||
o = take_argument(para, "-e");
|
||||
while (o.is_present()) {
|
||||
std::fstream file(o.get());
|
||||
// cout << "Importing edges from file: " << o.get() << endl;
|
||||
auto n = imp.import_edges(file);
|
||||
cout << "Loaded " << n << " edges from " << o.get() << endl;
|
||||
o = take_argument(para, "-e");
|
||||
}
|
||||
}
|
||||
|
||||
void fill_with_bt(unordered_map<string, double> &values, Vertex::Accessor &com,
|
||||
double weight,
|
||||
PropertyFamily::PropertyType::PropertyTypeKey<ArrayString>
|
||||
@ -385,29 +355,18 @@ int main(int argc, char **argv)
|
||||
{
|
||||
auto para = all_arguments(argc, argv);
|
||||
Db db;
|
||||
{
|
||||
DbAccessor t(db);
|
||||
|
||||
load(t, para);
|
||||
|
||||
t.commit();
|
||||
}
|
||||
import_csv_from_arguments(db, para);
|
||||
|
||||
{
|
||||
DbAccessor t(db);
|
||||
|
||||
// for (int i = 0; i < 100; i++)
|
||||
// make_transactions(db);
|
||||
|
||||
// string line;
|
||||
// while(std::getline(file, line))
|
||||
|
||||
int n = 300 * 1000;
|
||||
vector<pair<Vertex::Accessor, unordered_map<string, double>>> coll;
|
||||
|
||||
// QUERY BENCHMARK
|
||||
auto begin = clock();
|
||||
int i = 0;
|
||||
|
||||
iter::for_all_fill(
|
||||
t.label_find_or_create("Company").index->for_range_exact(t),
|
||||
[&](auto v) {
|
||||
@ -430,7 +389,7 @@ int main(int argc, char **argv)
|
||||
<< endl;
|
||||
cout << "Throughput: " << 1 / (elapsed_s / n) << " [query/sec]" << endl;
|
||||
|
||||
auto res = coll.back(); // query(t, fid.get());
|
||||
auto res = coll.back();
|
||||
while (res.second.empty()) {
|
||||
coll.pop_back();
|
||||
res = coll.back();
|
@ -31,6 +31,7 @@
|
||||
#include "storage/model/properties/all.hpp"
|
||||
#include "storage/model/properties/flags.hpp"
|
||||
#include "storage/vertex_accessor.hpp"
|
||||
#include "utils/command_line/arguments.hpp"
|
||||
#include "utils/option.hpp"
|
||||
|
||||
using namespace std;
|
||||
@ -208,7 +209,8 @@ private:
|
||||
: new IdFiller(make_option(prop_key(name, Flags::Int64))));
|
||||
return make_option(std::move(f));
|
||||
|
||||
} else if (equal_str(type, "start_id") || equal_str(type, "from_id")) {
|
||||
} else if (equal_str(type, "start_id") || equal_str(type, "from_id") ||
|
||||
equal_str(type, "from") || equal_str(type, "source")) {
|
||||
std::unique_ptr<Filler> f(new FromFiller(*this));
|
||||
return make_option(std::move(f));
|
||||
|
||||
@ -216,7 +218,8 @@ private:
|
||||
std::unique_ptr<Filler> f(new LabelFiller(*this));
|
||||
return make_option(std::move(f));
|
||||
|
||||
} else if (equal_str(type, "end_id") || equal_str(type, "to_id")) {
|
||||
} else if (equal_str(type, "end_id") || equal_str(type, "to_id") ||
|
||||
equal_str(type, "to") || equal_str(type, "target")) {
|
||||
std::unique_ptr<Filler> f(new ToFiller(*this));
|
||||
return make_option(std::move(f));
|
||||
|
||||
@ -311,3 +314,68 @@ private:
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Imports all -v "vertex_file_path.csv" vertices and -e "edge_file_path.csv"
|
||||
// edges from specified files. Also defines arguments -d, -ad, -w, -err, -info.
|
||||
// -d delimiter => sets delimiter for parsing .csv files. Default is ,
|
||||
// -ad delimiter => sets delimiter for parsing arrays in .csv. Default is ,
|
||||
// -w bool => turns on/off output of warnings. Default on.
|
||||
// -err bool => turns on/off output of errors. Default on.
|
||||
// -info bool => turns on/off output of info. Default on.
|
||||
// Returns (no loaded vertices,no loaded edges)
|
||||
std::pair<size_t, size_t>
|
||||
import_csv_from_arguments(Db &db, std::vector<std::string> ¶)
|
||||
{
|
||||
DbAccessor t(db);
|
||||
CSVImporter imp(t, cerr);
|
||||
|
||||
imp.parts_mark = get_argument(para, "-d", ",")[0];
|
||||
imp.parts_array_mark = get_argument(para, "-ad", ",")[0];
|
||||
imp.warning = strcmp(get_argument(para, "-w", "true").c_str(), "true") == 0;
|
||||
imp.error = strcmp(get_argument(para, "-err", "true").c_str(), "true") == 0;
|
||||
bool info =
|
||||
strcmp(get_argument(para, "-info", "true").c_str(), "true") == 0;
|
||||
|
||||
// IMPORT VERTICES
|
||||
size_t l_v = 0;
|
||||
auto o = take_argument(para, "-v");
|
||||
while (o.is_present()) {
|
||||
std::fstream file(o.get());
|
||||
|
||||
if (info)
|
||||
std::cout << "Importing vertices from file: " << o.get()
|
||||
<< std::endl;
|
||||
|
||||
auto n = imp.import_vertices(file);
|
||||
l_v = +n;
|
||||
|
||||
if (info)
|
||||
std::cout << "Loaded " << n << " vertices from " << o.get()
|
||||
<< std::endl;
|
||||
|
||||
o = take_argument(para, "-v");
|
||||
}
|
||||
|
||||
// IMPORT EDGES
|
||||
size_t l_e = 0;
|
||||
o = take_argument(para, "-e");
|
||||
while (o.is_present()) {
|
||||
std::fstream file(o.get());
|
||||
|
||||
if (info)
|
||||
std::cout << "Importing edges from file: " << o.get() << std::endl;
|
||||
|
||||
auto n = imp.import_edges(file);
|
||||
l_e = +n;
|
||||
|
||||
if (info)
|
||||
std::cout << "Loaded " << n << " edges from " << o.get()
|
||||
<< std::endl;
|
||||
|
||||
o = take_argument(para, "-e");
|
||||
}
|
||||
|
||||
t.commit();
|
||||
|
||||
return std::make_pair(l_v, l_e);
|
||||
}
|
||||
|
@ -1,8 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
|
||||
project(memgraph_tools)
|
||||
|
||||
add_executable(import_tool tool.cpp)
|
||||
target_link_libraries(import_tool memgraph)
|
||||
target_link_libraries(import_tool Threads::Threads)
|
||||
target_link_libraries(import_tool ${fmt_static_lib})
|
Loading…
Reference in New Issue
Block a user