diff --git a/src/import/base_import.hpp b/src/import/base_import.hpp deleted file mode 100644 index 2389105ef..000000000 --- a/src/import/base_import.hpp +++ /dev/null @@ -1,172 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "import/element_skeleton.hpp" -#include "import/fillings/filler.hpp" -#include "logging/default.hpp" -#include "storage/model/properties/flags.hpp" -#include "storage/vertex_accessor.hpp" -#include "utils/option.hpp" - -using namespace std; - -static Option empty_op_vacc; - -// Base importer with common facilities. -class BaseImporter { - public: - BaseImporter(DbAccessor &db, Logger &&logger) - : db(db), logger(std::move(logger)) {} - - char *cstr(string &str) { return &str[0]; } - - bool split(string &str, char mark, vector &sub_str) { - return split(cstr(str), mark, sub_str); - } - - // Occurances of mark are changed with '\0'. sub_str is filled with - // pointers to parts of str splited by mark in ascending order. Empty - // sub_str are included. Doesn't split inside quotations and - // open_bracket,closed_bracket. - // Returns true if it was succesfully parsed. - bool split(char *str, char mark, vector &sub_str) { - int head = 0; - bool in_text = false; - bool in_array = false; - - for (int i = 0; str[i] != '\0'; i++) { - char &c = str[i]; - - // IN TEXT check - if (c == quotations_mark) { - in_text = !in_text; - if (in_text && head == i) { - c = '\0'; - head = i + 1; - } else if (!in_text && !in_array) { - c = '\0'; - } - continue; - } else if (in_text) { - continue; - } - - // IN ARRAY check - if (c == open_bracket) { - if (in_array) { - logger.error("Nested arrays aren't supported."); - return false; - } - in_array = true; - continue; - } - if (in_array) { - if (c == closed_bracket) { - in_array = false; - } - continue; - } - - // SPLIT CHECK - if (c == mark) { - c = '\0'; - sub_str.push_back(&str[head]); - head = i + 1; - } - } - - sub_str.push_back(&str[head]); - - return true; - } - - // Extracts parts of str while stripping parts of array chars and qutation - // marks. Parts are separated with delimiter. - void extract(char *str, const char delimiter, vector &sub_str) { - int head = 0; - bool in_text = false; - - for (int i = 0; str[i] != '\0'; i++) { - char &c = str[i]; - - // IN TEXT check - if (c == quotations_mark) { - in_text = !in_text; - if (in_text) { - } else { - c = '\0'; - sub_str.push_back(&str[head]); - head = i + 1; - } - head = i + 1; - continue; - } else if (in_text) { - continue; - } - - // IN ARRAY check - if (c == open_bracket) { - head = i + 1; - continue; - } else if (c == closed_bracket) { - c = '\0'; - if (i > head) { - sub_str.push_back(&str[head]); - } - head = i + 1; - continue; - } - - // SPLIT CHECK - if (c == delimiter) { - c = '\0'; - if (i > head) { - sub_str.push_back(&str[head]); - } - head = i + 1; - } else if (c == ' ' && i == head) { - head++; - } - } - - sub_str.push_back(&str[head]); - } - - // Optionaly return vertex with given import local id if it exists. - Option const &get_vertex(size_t id) { - if (vertices.size() > id) { - return vertices[id]; - } else { - cout << vertices.size() << " -> " << id << endl; - return empty_op_vacc; - } - } - - public: - DbAccessor &db; - Logger logger; - - // Varius marks and delimiters. They can be freely changed here and - // everything will work. - char parts_mark = ','; - char parts_array_mark = ','; - char type_mark = ':'; - char quotations_mark = '"'; - char open_bracket = '['; - char closed_bracket = ']'; - - protected: - // All created vertices which have import local id. - vector> vertices; -}; diff --git a/src/import/csv_import.hpp b/src/import/csv_import.hpp deleted file mode 100644 index 6f56c871e..000000000 --- a/src/import/csv_import.hpp +++ /dev/null @@ -1,400 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "import/base_import.hpp" -#include "import/element_skeleton.hpp" -#include "import/fillings/array.hpp" -#include "import/fillings/bool.hpp" -#include "import/fillings/double.hpp" -#include "import/fillings/filler.hpp" -#include "import/fillings/float.hpp" -#include "import/fillings/from.hpp" -#include "import/fillings/id.hpp" -#include "import/fillings/int32.hpp" -#include "import/fillings/int64.hpp" -#include "import/fillings/label.hpp" -#include "import/fillings/skip.hpp" -#include "import/fillings/string.hpp" -#include "import/fillings/to.hpp" -#include "import/fillings/type.hpp" -#include "storage/model/properties/all.hpp" -#include "storage/model/properties/flags.hpp" -#include "storage/vertex_accessor.hpp" -#include "utils/assert.hpp" -#include "utils/command_line/arguments.hpp" -#include "utils/option.hpp" - -using namespace std; - -constexpr char const *_string = "string"; - -// Will change all int32 into int64, and all float into double from csv into -// database. Uplifting will occure even in arrays. -constexpr const bool UPLIFT_PRIMITIVES = true; - -bool equal_str(const char *a, const char *b) { return strcasecmp(a, b) == 0; } - -// CSV importer for importing multiple files regarding same graph. -// CSV format of file should be following: -// header -// line of data -// line of data -// ... -// -// Where header should be composed of parts splited by parts_mark. Number of -// parts should be same as number of parts in every line of data. Parts should -// be of format name:type where name is alfanumeric identifyer of data in thath -// column and type should be one of: id, from, to, label, type, bool, int, long, -// float, double, string, bool[], int[], long[], float[], double[], string[]. -// If name is missing the column data wont be saved into the elements. -// if the type is missing the column will be interperted as type string. If -// neither name nor type are present column will be skipped. -class CSVImporter : public BaseImporter { - public: - CSVImporter(DbAccessor &db) - : BaseImporter(db, logging::log->logger("CSV_import")) {} - - // Loads data from stream and returns number of loaded vertexes. - size_t import_vertices(std::fstream &file) { - return import(file, create_vertex, true); - } - - // Loads data from stream and returns number of loaded edges. - size_t import_edges(std::fstream &file) { - return import(file, create_edge, false); - } - - private: - // Loads data from file and returns number of loaded name. - // TG - TypeGroup - // F - function which will create element from filled element skelleton. - template - size_t import(std::fstream &file, F f, bool vertex) { - string line; - vector sub_str; - vector> fillers; - vector tmp; - - // HEADERS - if (!getline(file, line)) { - logger.error("No lines"); - return 0; - } - - if (!split(line, parts_mark, sub_str)) { - logger.error("Illegal headers"); - return 0; - } - - for (auto p : sub_str) { - auto o = get_filler(p, tmp, vertex); - if (o.is_present()) { - fillers.push_back(o.take()); - } else { - return 0; - } - } - sub_str.clear(); - - // LOAD DATA LINES - size_t count = 0; - size_t line_no = 1; - ElementSkeleton es(db); - while (std::getline(file, line)) { - sub_str.clear(); - es.clear(); - - if (split(line, parts_mark, sub_str)) { - check_for_part_count(sub_str.size() - fillers.size(), line_no); - - int n = min(sub_str.size(), fillers.size()); - for (int i = 0; i < n; i++) { - auto er = fillers[i]->fill(es, sub_str[i]); - if (er.is_present()) { - logger.error("{} on line: {}", er.get(), line_no); - } - } - - if (f(this, es, line_no)) { - count++; - } - } - - line_no++; - } - - return count; - } - - static bool create_vertex(CSVImporter *im, ElementSkeleton &es, - size_t line_no) { - auto va = es.add_vertex(); - auto id = es.element_id(); - if (id.is_present()) { - if (im->vertices.size() <= id.get()) { - Option empty = make_option(); - im->vertices.insert(im->vertices.end(), - id.get() - im->vertices.size() + 1, empty); - } - if (im->vertices[id.get()].is_present()) { - im->logger.error( - "Vertex on line: {} has same id with another " - "previously loaded vertex", - line_no); - return false; - } else { - im->vertices[id.get()] = make_option(std::move(va)); - return true; - } - } else { - im->logger.warn( - "Missing import local vertex id for vertex on " - "line: {}", - line_no); - } - - return true; - } - - static bool create_edge(CSVImporter *im, ElementSkeleton &es, - size_t line_no) { - auto o = es.add_edge(); - if (!o.is_present()) { - return true; - } else { - im->logger.error("{} on line: {}", o.get(), line_no); - return false; - } - } - - template - typename PropertyFamily::PropertyType::PropertyFamilyKey property_key( - const char *name, Flags type) { - debug_assert(false, "Fail."); - } - - // Returns filler for name:type in header_part. None if error occured. - template - Option> get_filler(char *header_part, - vector &tmp_vec, bool vertex) { - tmp_vec.clear(); - split(header_part, type_mark, tmp_vec); - - const char *name = tmp_vec[0]; - const char *type = tmp_vec[1]; - - if (tmp_vec.size() > 2) { - logger.error("To much sub parts in header part"); - return make_option>(); - - } else if (tmp_vec.size() < 2) { - if (tmp_vec.size() == 1) { - logger.warn( - "Column: {} doesn't have specified type so string " - "type will be used", - tmp_vec[0]); - name = tmp_vec[0]; - type = _string; - - } else { - logger.warn("Empty colum definition, skiping column."); - std::unique_ptr f(new SkipFiller()); - return make_option(std::move(f)); - } - - } else { - name = tmp_vec[0]; - type = tmp_vec[1]; - } - - // Create adequat filler - if (equal_str(type, "id")) { - std::unique_ptr f( - name[0] == '\0' ? new IdFiller() - : new IdFiller(make_option( - property_key(name, Flags::Int64)))); - return make_option(std::move(f)); - - } else if (equal_str(type, "start_id") || equal_str(type, "from_id") || - equal_str(type, "from") || equal_str(type, "source")) { - std::unique_ptr f(new FromFiller(*this)); - return make_option(std::move(f)); - - } else if (equal_str(type, "label")) { - std::unique_ptr f(new LabelFiller(*this)); - return make_option(std::move(f)); - - } else if (equal_str(type, "end_id") || equal_str(type, "to_id") || - equal_str(type, "to") || equal_str(type, "target")) { - std::unique_ptr f(new ToFiller(*this)); - return make_option(std::move(f)); - - } else if (equal_str(type, "type")) { - std::unique_ptr f(new TypeFiller(*this)); - return make_option(std::move(f)); - - } else if (name[0] == '\0') { // OTHER FILLERS REQUIRE NAME - logger.warn("Unnamed column of type: {} will be skipped.", type); - std::unique_ptr f(new SkipFiller()); - return make_option(std::move(f)); - - // *********************** PROPERTIES - } else if (equal_str(type, "bool")) { - std::unique_ptr f( - new BoolFiller(property_key(name, Flags::Bool))); - return make_option(std::move(f)); - - } else if (equal_str(type, "double") || - (UPLIFT_PRIMITIVES && equal_str(type, "float"))) { - std::unique_ptr f( - new DoubleFiller(property_key(name, Flags::Double))); - return make_option(std::move(f)); - - } else if (equal_str(type, "float")) { - std::unique_ptr f( - new FloatFiller(property_key(name, Flags::Float))); - return make_option(std::move(f)); - - } else if (equal_str(type, "long") || - (UPLIFT_PRIMITIVES && equal_str(type, "int"))) { - std::unique_ptr f( - new Int64Filler(property_key(name, Flags::Int64))); - return make_option(std::move(f)); - - } else if (equal_str(type, "int")) { - std::unique_ptr f( - new Int32Filler(property_key(name, Flags::Int32))); - return make_option(std::move(f)); - - } else if (equal_str(type, "string")) { - std::unique_ptr f( - new StringFiller(property_key(name, Flags::String))); - return make_option(std::move(f)); - - } else if (equal_str(type, "bool[]")) { - std::unique_ptr f(make_array_filler( - *this, property_key(name, Flags::ArrayBool), to_bool)); - return make_option(std::move(f)); - - } else if (equal_str(type, "double[]") || - (UPLIFT_PRIMITIVES && equal_str(type, "float[]"))) { - std::unique_ptr f(make_array_filler( - *this, property_key(name, Flags::ArrayDouble), to_double)); - return make_option(std::move(f)); - - } else if (equal_str(type, "float[]")) { - std::unique_ptr f(make_array_filler( - *this, property_key(name, Flags::ArrayFloat), to_float)); - return make_option(std::move(f)); - - } else if (equal_str(type, "long[]") || - (UPLIFT_PRIMITIVES && equal_str(type, "int[]"))) { - std::unique_ptr f(make_array_filler( - *this, property_key(name, Flags::ArrayInt64), to_int64)); - return make_option(std::move(f)); - - } else if (equal_str(type, "int[]")) { - std::unique_ptr f(make_array_filler( - *this, property_key(name, Flags::ArrayInt32), to_int32)); - return make_option(std::move(f)); - - } else if (equal_str(type, "string[]")) { - std::unique_ptr f(make_array_filler( - *this, property_key(name, Flags::ArrayString), to_string)); - return make_option(std::move(f)); - - } else { - logger.error("Unknown type: {}", type); - return make_option>(); - } - } - - void check_for_part_count(long diff, long line_no) { - if (diff != 0) { - if (diff < 0) { - logger.warn( - "Line no: {} has less parts then specified in " - "header. Missing: {} parts", - line_no, diff); - } else { - logger.warn( - "Line no: {} has more parts then specified in " - "header. Extra: {} parts", - line_no, diff); - } - } - } -}; - -template <> -PropertyFamily::PropertyType::PropertyFamilyKey -CSVImporter::property_key(const char *name, Flags type) { - return db.vertex_property_key(name, Type(type)); -} - -template <> -PropertyFamily::PropertyType::PropertyFamilyKey -CSVImporter::property_key(const char *name, Flags type) { - return db.edge_property_key(name, Type(type)); -} - -// Imports all -v "vertex_file_path.csv" vertices and -e "edge_file_path.csv" -// edges from specified files. Also defines arguments -d, -ad. -// -d delimiter => sets delimiter for parsing .csv files. Default is , -// -ad delimiter => sets delimiter for parsing arrays in .csv. Default is -// Returns (no loaded vertices,no loaded edges) -std::pair import_csv_from_arguments( - Db &db, std::vector ¶) { - DbAccessor t(db); - CSVImporter imp(t); - - imp.parts_mark = get_argument(para, "-d", ",")[0]; - imp.parts_array_mark = get_argument(para, "-ad", ",")[0]; - - // IMPORT VERTICES - size_t l_v = 0; - auto o = take_argument(para, "-v"); - while (o.is_present()) { - std::fstream file(o.get()); - - imp.logger.info("Importing vertices from file: {}", o.get()); - - auto n = imp.import_vertices(file); - l_v = +n; - - imp.logger.info("Loaded: {} vertices from {}", n, o.get()); - - o = take_argument(para, "-v"); - } - - // IMPORT EDGES - size_t l_e = 0; - o = take_argument(para, "-e"); - while (o.is_present()) { - std::fstream file(o.get()); - - imp.logger.info("Importing edges from file: {}", o.get()); - - auto n = imp.import_edges(file); - l_e = +n; - - imp.logger.info("Loaded: {} edges from {}", n, o.get()); - - o = take_argument(para, "-e"); - } - - t.commit(); - - return std::make_pair(l_v, l_e); -} diff --git a/src/import/element_skeleton.hpp b/src/import/element_skeleton.hpp deleted file mode 100644 index ffb1e8279..000000000 --- a/src/import/element_skeleton.hpp +++ /dev/null @@ -1,100 +0,0 @@ -#pragma once - -#include "database/db_accessor.hpp" -#include "storage/model/property_value.hpp" -#include "storage/model/property_value_store.hpp" -#include "storage/vertex_accessor.hpp" -#include "utils/assert.hpp" - -// Holder for element data which he can then insert as a vertex or edge into the -// database depending on the available data and called add_* method. -class ElementSkeleton { - public: - ElementSkeleton(DbAccessor &db) : db(db){}; - - void add_property(StoredProperty &&prop) { - properties_v.push_back(std::move(prop)); - } - - void add_property(StoredProperty &&prop) { - properties_e.push_back(std::move(prop)); - } - - void set_element_id(size_t id) { el_id = make_option(std::move(id)); } - - void add_label(Label const &label) { labels.push_back(&label); } - - void set_type(EdgeType const &type) { this->type = make_option(&type); } - - void set_from(VertexAccessor &&va) { - from_va = make_option(std::move(va)); - } - - void set_to(VertexAccessor &&va) { - to_va = make_option(std::move(va)); - } - - VertexAccessor add_vertex() { - debug_assert(properties_e.empty(), "Properties aren't empty."); - - auto va = db.vertex_insert(); - - for (auto l : labels) { - // std::cout << *l << std::endl; - va.add_label(*l); - } - - for (auto prop : properties_v) { - va.set(std::move(prop)); - } - - return va; - } - - // Return error msg if unsuccessful - Option add_edge() { - if (!from_va.is_present()) { - return make_option(std::string("From field must be set")); - } - if (!to_va.is_present()) { - return make_option(std::string("To field must be set")); - } - if (!type.is_present()) { - return make_option(std::string("Type field must be set")); - } - debug_assert(properties_v.empty(), "Properties aren't empty."); - - auto ve = db.edge_insert(from_va.get(), to_va.get()); - ve.edge_type(*type.get()); - - for (auto prop : properties_e) { - ve.set(std::move(prop)); - } - - return make_option(); - } - - void clear() { - el_id = make_option(); - to_va = make_option(); - from_va = make_option(); - type = make_option(); - labels.clear(); - properties_v.clear(); - properties_e.clear(); - } - - // Returns import local id. - Option element_id() { return el_id; } - - private: - DbAccessor &db; - - Option el_id; - Option to_va; - Option from_va; - Option type; - std::vector