Extracted import tool for csv to file with CSVImporter.

Astar now uses csv import tool.
This commit is contained in:
Kruno Tomola Fabro 2016-08-23 10:58:55 +01:00
parent fbd9ca8420
commit 85dbf1bd86
6 changed files with 111 additions and 290 deletions

View File

@ -465,10 +465,6 @@ if (POC)
add_subdirectory(poc)
endif()
# proof of concepts
if (TOOLS)
add_subdirectory(tools)
endif()
# memgraph build name
execute_process(

View File

@ -6,3 +6,10 @@ add_executable(poc_astar astar.cpp)
target_link_libraries(poc_astar memgraph)
target_link_libraries(poc_astar Threads::Threads)
target_link_libraries(poc_astar ${fmt_static_lib})
project(memgraph_tools)
add_executable(import_tool tool.cpp)
target_link_libraries(import_tool memgraph)
target_link_libraries(import_tool Threads::Threads)
target_link_libraries(import_tool ${fmt_static_lib})

View File

@ -1,4 +1,5 @@
#include <chrono>
#include <cstring>
#include <ctime>
#include <fstream>
#include <iostream>
@ -10,7 +11,9 @@
#include "data_structures/map/rh_hashmap.hpp"
#include "database/db.hpp"
#include "database/db_accessor.cpp"
#include "database/db_accessor.hpp"
#include "import/csv_import.hpp"
#include "storage/edges.cpp"
#include "storage/edges.hpp"
#include "storage/indexes/impl/nonunique_unordered_index.cpp"
@ -20,11 +23,14 @@
#include "storage/vertex_accessor.hpp"
#include "storage/vertices.cpp"
#include "storage/vertices.hpp"
#include "utils/command_line/arguments.hpp"
const int max_score = 1000000;
using namespace std;
typedef Vertex::Accessor VertexAccessor;
void load_graph_dummy(Db &db);
int load_csv(Db &db, char *file_path, char *edge_file_path);
void add_scores(Db &db);
class Node
{
@ -66,46 +72,6 @@ public:
double value;
};
// class Iterator : public Crtp<Iterator>
// {
// public:
// Vertex *operator*()
// {
// assert(head != nullptr);
// return head->vertex;
// }
//
// Vertex *operator->()
// {
// assert(head != nullptr);
// return head->vertex;
// }
//
// Iterator &operator++()
// {
// assert(head != nullptr);
// head = head->parent;
// return this->derived();
// }
//
// Iterator &operator++(int) { return operator++(); }
//
// friend bool operator==(const Iterator &a, const Iterator &b)
// {
// return a.head == b.head;
// }
//
// friend bool operator!=(const Iterator &a, const Iterator &b)
// {
// return !(a == b);
// }
//
// Iterator end() { return Iterator(); }
//
// private:
// Node *head;
// };
void found_result(Node *res)
{
double sum = res->sum_vertex_score();
@ -239,38 +205,20 @@ auto a_star(Db &db, int64_t sys_id_start, uint max_depth, EdgeFilter e_filter[],
}
});
} while (!queue.empty());
// std::cout << "Found: " << count << " resoults\n";
// TODO: GUBI SE MEMORIJA JER SE NODOVI NEBRISU
t.commit();
return best;
}
// class Data
// {
//
// private:
// size_t data = 0;
// int key;
//
// public:
// Data(int key) : key(key) {}
//
// const int &get_key() { return key; }
// };
int main(int argc, char **argv)
{
if (argc < 3) {
std::cout << "Not enough input values\n";
return 0;
} else if (argc > 4) {
std::cout << "To much input values\n";
return 0;
}
auto para = all_arguments(argc, argv);
Db db;
auto vertex_no = load_csv(db, argv[argc - 2], argv[argc - 1]);
auto loaded = import_csv_from_arguments(db, para);
add_scores(db);
EdgeFilter e_filters[] = {&edge_filter_dummy, &edge_filter_dummy,
&edge_filter_dummy, &edge_filter_dummy};
@ -283,12 +231,13 @@ int main(int argc, char **argv)
auto best_n = 10;
auto bench_n = 1000;
auto best_print_n = 10;
bool pick_best_found = argc > 3 ? true : false;
bool pick_best_found =
strcmp(get_argument(para, "-p", "true").c_str(), "true") == 0;
double sum = 0;
std::vector<Node *> best;
for (int i = 0; i < bench_n; i++) {
auto start_vertex_index = std::rand() % vertex_no;
auto start_vertex_index = std::rand() % loaded.first;
auto begin = clock();
auto found = a_star(db, start_vertex_index, 3, e_filters, f_filters,
@ -320,178 +269,28 @@ int main(int argc, char **argv)
best.erase(best.begin());
}
// RhHashMultiMap benchmark
// const int n_pow2 = 20;
// int n = 1 << n_pow2;
// RhHashMultiMap<int, Data, n_pow2 + 1> map;
// std::srand(time(0));
// auto data = std::vector<Data *>();
// for (int i = 0; i < n; i++) {
// data.push_back(new Data(std::rand()));
// }
//
// begin = clock();
// for (auto e : data) {
// map.add(e);
// }
// end = clock();
// elapsed_ms = (double(end - begin) / CLOCKS_PER_SEC) * 1000;
// std::cout << "Map: " << elapsed_ms << " [ms]\n";
return 0;
}
void split(const string &s, char delim, vector<string> &elems)
// Adds property score to all vertices.
void add_scores(Db &db)
{
stringstream ss(s);
string item;
while (getline(ss, item, delim)) {
elems.push_back(item);
}
}
vector<string> split(const string &s, char delim)
{
vector<string> elems;
split(s, delim, elems);
return elems;
}
int load_csv(Db &db, char *file_path, char *edge_file_path)
{
std::fstream file(file_path);
std::fstream e_file(edge_file_path);
std::string line;
DbAccessor t(db);
auto key_id =
t.vertex_property_family_get("id").get(Flags::Int32).family_key();
auto key_garment_id = t.vertex_property_family_get("garment_id")
.get(Flags::Int32)
.family_key();
auto key_garment_category_id =
t.vertex_property_family_get("garment_category_id")
.get(Flags::Int32)
.family_key();
auto key_score =
t.vertex_property_family_get("score").get(Flags::Double).family_key();
int max_score = 1000000;
// VERTEX import
int start_vertex_id = -1;
auto v = [&](auto id, auto labels, auto gar_id, auto cat_id) {
if (start_vertex_id < 0) {
start_vertex_id = id;
int i = 1;
iter::for_all(t.vertex_access(), [&](auto v) {
if (v.fill()) {
// from Kruno's head :) (could be ALMOST anything else)
std::srand(i ^ 0x7482616);
v.set(key_score,
std::make_shared<Double>((std::rand() % max_score) /
(max_score + 0.0)));
i++;
}
auto vertex_accessor = t.vertex_insert();
vertex_accessor.set(key_id, std::make_shared<Int32>(id));
vertex_accessor.set(key_garment_id, std::make_shared<Int32>(gar_id));
vertex_accessor.set(key_garment_category_id,
std::make_shared<Int32>(cat_id));
// from Kruno's head :) (could be ALMOST anything else)
std::srand(id ^ 0x7482616);
vertex_accessor.set(key_score,
std::make_shared<Double>((std::rand() % max_score) /
(max_score + 0.0)));
for (auto l_name : labels) {
auto &label = t.label_find_or_create(l_name);
vertex_accessor.add_label(label);
}
return vertex_accessor;
};
// Skip header
std::getline(file, line);
vector<Vertex::Accessor> va;
int v_count = 0;
while (std::getline(file, line)) {
v_count++;
line.erase(std::remove(line.begin(), line.end(), '['), line.end());
line.erase(std::remove(line.begin(), line.end(), ']'), line.end());
line.erase(std::remove(line.begin(), line.end(), '\"'), line.end());
line.erase(std::remove(line.begin(), line.end(), ' '), line.end());
auto splited = split(line, ',');
vector<string> labels(splited.begin() + 1,
splited.begin() + splited.size() - 2);
auto vacs =
v(stoi(splited[0]), labels, stoi(splited[splited.size() - 2]),
stoi(splited[splited.size() - 1]));
assert(va.size() == (uint64_t)vacs.id());
va.push_back(vacs);
}
// EDGE IMPORT
auto e = [&](auto from, auto type, auto to) {
auto v1 = va[from - start_vertex_id];
auto v2 = va[to - start_vertex_id];
auto edge_accessor = t.edge_insert(v1, v2);
auto &edge_type = t.type_find_or_create(type);
edge_accessor.edge_type(edge_type);
};
std::getline(e_file, line);
long count = 0;
while (std::getline(e_file, line)) {
auto splited = split(line, ',');
count++;
e(stoi(splited[2]), splited[1], stoi(splited[3]));
}
cout << "Loaded:\n Vertices: " << v_count << "\n Edges: " << count
<< endl;
t.commit();
return v_count;
}
void load_graph_dummy(Db &db)
{
DbAccessor t(db);
// TODO: update code
// auto v = [&](auto id, auto score) {
// auto vertex_accessor = t.vertex_insert();
// vertex_accessor.property("id", std::make_shared<Int32>(id));
// vertex_accessor.property("score", std::make_shared<Double>(score));
// return vertex_accessor.id();
// };
//
// Id va[] = {
// v(0, 0.5), v(1, 1), v(2, 0.3), v(3, 0.15), v(4, 0.8), v(5, 0.8),
// };
//
// auto e = [&](auto from, auto type, auto to) {
// auto v1 = t.vertex_find(va[from]);
//
// auto v2 = t.vertex_find(va[to]);
//
// auto edge_accessor = t.edge_insert(v1.get(), v2.get());
//
// auto &edge_type = t.type_find_or_create(type);
// edge_accessor.edge_type(edge_type);
// };
//
// e(0, "ok", 3);
// e(0, "ok", 2);
// e(0, "ok", 4);
// e(1, "ok", 3);
// e(2, "ok", 1);
// e(2, "ok", 4);
// e(3, "ok", 4);
// e(3, "ok", 5);
// e(4, "ok", 0);
// e(4, "ok", 1);
// e(5, "ok", 2);
});
t.commit();
}

View File

@ -16,6 +16,9 @@
using namespace std;
// TODO: Turn next template, expand on it, standardize it, and use it for query
// generation.
template <class C>
void fill_to_fill(Edge::Accessor &e, const EdgeType &type, C &&consumer)
{
@ -141,39 +144,6 @@ void find_fill(I iter, C &&consumer)
}
}
void load(DbAccessor &t, vector<string> &para)
{
// DbAccessor t(db);
CSVImporter imp(t, cerr);
imp.parts_mark = get_argument(para, "-d", ",")[0];
imp.parts_array_mark = get_argument(para, "-ad", ",")[0];
imp.warning =
strcasecmp(get_argument(para, "-w", "true").c_str(), "true") == 0;
imp.error =
strcasecmp(get_argument(para, "-err", "true").c_str(), "true") == 0;
// IMPORT VERTICES
auto o = take_argument(para, "-v");
while (o.is_present()) {
std::fstream file(o.get());
// cout << "Importing vertices from file: " << o.get() << endl;
auto n = imp.import_vertices(file);
cout << "Loaded " << n << " vertices from " << o.get() << endl;
o = take_argument(para, "-v");
}
// IMPORT EDGES
o = take_argument(para, "-e");
while (o.is_present()) {
std::fstream file(o.get());
// cout << "Importing edges from file: " << o.get() << endl;
auto n = imp.import_edges(file);
cout << "Loaded " << n << " edges from " << o.get() << endl;
o = take_argument(para, "-e");
}
}
void fill_with_bt(unordered_map<string, double> &values, Vertex::Accessor &com,
double weight,
PropertyFamily::PropertyType::PropertyTypeKey<ArrayString>
@ -385,29 +355,18 @@ int main(int argc, char **argv)
{
auto para = all_arguments(argc, argv);
Db db;
{
DbAccessor t(db);
load(t, para);
t.commit();
}
import_csv_from_arguments(db, para);
{
DbAccessor t(db);
// for (int i = 0; i < 100; i++)
// make_transactions(db);
// string line;
// while(std::getline(file, line))
int n = 300 * 1000;
vector<pair<Vertex::Accessor, unordered_map<string, double>>> coll;
// QUERY BENCHMARK
auto begin = clock();
int i = 0;
iter::for_all_fill(
t.label_find_or_create("Company").index->for_range_exact(t),
[&](auto v) {
@ -430,7 +389,7 @@ int main(int argc, char **argv)
<< endl;
cout << "Throughput: " << 1 / (elapsed_s / n) << " [query/sec]" << endl;
auto res = coll.back(); // query(t, fid.get());
auto res = coll.back();
while (res.second.empty()) {
coll.pop_back();
res = coll.back();

View File

@ -31,6 +31,7 @@
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/vertex_accessor.hpp"
#include "utils/command_line/arguments.hpp"
#include "utils/option.hpp"
using namespace std;
@ -208,7 +209,8 @@ private:
: new IdFiller(make_option(prop_key(name, Flags::Int64))));
return make_option(std::move(f));
} else if (equal_str(type, "start_id") || equal_str(type, "from_id")) {
} else if (equal_str(type, "start_id") || equal_str(type, "from_id") ||
equal_str(type, "from") || equal_str(type, "source")) {
std::unique_ptr<Filler> f(new FromFiller(*this));
return make_option(std::move(f));
@ -216,7 +218,8 @@ private:
std::unique_ptr<Filler> f(new LabelFiller(*this));
return make_option(std::move(f));
} else if (equal_str(type, "end_id") || equal_str(type, "to_id")) {
} else if (equal_str(type, "end_id") || equal_str(type, "to_id") ||
equal_str(type, "to") || equal_str(type, "target")) {
std::unique_ptr<Filler> f(new ToFiller(*this));
return make_option(std::move(f));
@ -311,3 +314,68 @@ private:
}
}
};
// Imports all -v "vertex_file_path.csv" vertices and -e "edge_file_path.csv"
// edges from specified files. Also defines arguments -d, -ad, -w, -err, -info.
// -d delimiter => sets delimiter for parsing .csv files. Default is ,
// -ad delimiter => sets delimiter for parsing arrays in .csv. Default is ,
// -w bool => turns on/off output of warnings. Default on.
// -err bool => turns on/off output of errors. Default on.
// -info bool => turns on/off output of info. Default on.
// Returns (no loaded vertices,no loaded edges)
std::pair<size_t, size_t>
import_csv_from_arguments(Db &db, std::vector<std::string> &para)
{
DbAccessor t(db);
CSVImporter imp(t, cerr);
imp.parts_mark = get_argument(para, "-d", ",")[0];
imp.parts_array_mark = get_argument(para, "-ad", ",")[0];
imp.warning = strcmp(get_argument(para, "-w", "true").c_str(), "true") == 0;
imp.error = strcmp(get_argument(para, "-err", "true").c_str(), "true") == 0;
bool info =
strcmp(get_argument(para, "-info", "true").c_str(), "true") == 0;
// IMPORT VERTICES
size_t l_v = 0;
auto o = take_argument(para, "-v");
while (o.is_present()) {
std::fstream file(o.get());
if (info)
std::cout << "Importing vertices from file: " << o.get()
<< std::endl;
auto n = imp.import_vertices(file);
l_v = +n;
if (info)
std::cout << "Loaded " << n << " vertices from " << o.get()
<< std::endl;
o = take_argument(para, "-v");
}
// IMPORT EDGES
size_t l_e = 0;
o = take_argument(para, "-e");
while (o.is_present()) {
std::fstream file(o.get());
if (info)
std::cout << "Importing edges from file: " << o.get() << std::endl;
auto n = imp.import_edges(file);
l_e = +n;
if (info)
std::cout << "Loaded " << n << " edges from " << o.get()
<< std::endl;
o = take_argument(para, "-e");
}
t.commit();
return std::make_pair(l_v, l_e);
}

View File

@ -1,8 +0,0 @@
cmake_minimum_required(VERSION 3.1)
project(memgraph_tools)
add_executable(import_tool tool.cpp)
target_link_libraries(import_tool memgraph)
target_link_libraries(import_tool Threads::Threads)
target_link_libraries(import_tool ${fmt_static_lib})