First version of CSVImport tool.

Query and import works.

Properties now use unordered_map.
Reduced memory footprint of properties by more than half.
This commit is contained in:
Kruno Tomola Fabro 2016-08-22 19:03:45 +01:00
parent 79015177b2
commit fbd9ca8420
44 changed files with 1942 additions and 89 deletions

View File

@ -218,6 +218,7 @@ FILE(COPY ${include_dir}/storage/model/properties/double.hpp DESTINATION ${build
FILE(COPY ${include_dir}/storage/model/properties/int32.hpp DESTINATION ${build_include_dir}/storage/model/properties) FILE(COPY ${include_dir}/storage/model/properties/int32.hpp DESTINATION ${build_include_dir}/storage/model/properties)
FILE(COPY ${include_dir}/storage/model/properties/int64.hpp DESTINATION ${build_include_dir}/storage/model/properties) FILE(COPY ${include_dir}/storage/model/properties/int64.hpp DESTINATION ${build_include_dir}/storage/model/properties)
FILE(COPY ${include_dir}/storage/model/properties/string.hpp DESTINATION ${build_include_dir}/storage/model/properties) FILE(COPY ${include_dir}/storage/model/properties/string.hpp DESTINATION ${build_include_dir}/storage/model/properties)
FILE(COPY ${include_dir}/storage/model/properties/array.hpp DESTINATION ${build_include_dir}/storage/model/properties)
FILE(COPY ${include_dir}/storage/model/properties/floating.hpp DESTINATION ${build_include_dir}/storage/model/properties) FILE(COPY ${include_dir}/storage/model/properties/floating.hpp DESTINATION ${build_include_dir}/storage/model/properties)
FILE(COPY ${include_dir}/storage/model/properties/number.hpp DESTINATION ${build_include_dir}/storage/model/properties) FILE(COPY ${include_dir}/storage/model/properties/number.hpp DESTINATION ${build_include_dir}/storage/model/properties)
FILE(COPY ${include_dir}/storage/model/properties/integral.hpp DESTINATION ${build_include_dir}/storage/model/properties) FILE(COPY ${include_dir}/storage/model/properties/integral.hpp DESTINATION ${build_include_dir}/storage/model/properties)
@ -370,6 +371,8 @@ option(MEMGRAPH "Build memgraph binary" ON)
message(STATUS "MEMGRAPH binary: ${MEMGRAPH}") message(STATUS "MEMGRAPH binary: ${MEMGRAPH}")
option(POC "Build proof of concept binaries" ON) option(POC "Build proof of concept binaries" ON)
message(STATUS "POC binaries: ${POC}") message(STATUS "POC binaries: ${POC}")
option(TOOLS "Build tool executables" ON)
message(STATUS "TOOLS binaries: ${TOOLS}")
option(TESTS "Build test binaries" ON) option(TESTS "Build test binaries" ON)
message(STATUS "TESTS binaries: ${TESTS}") message(STATUS "TESTS binaries: ${TESTS}")
# -- binaries ----------------------------------------------------------------- # -- binaries -----------------------------------------------------------------
@ -423,6 +426,7 @@ set(memgraph_src_files
${src_dir}/storage/model/properties/null.cpp ${src_dir}/storage/model/properties/null.cpp
${src_dir}/storage/model/properties/bool.cpp ${src_dir}/storage/model/properties/bool.cpp
${src_dir}/storage/model/properties/string.cpp ${src_dir}/storage/model/properties/string.cpp
${src_dir}/storage/model/properties/array.cpp
${src_dir}/storage/model/properties/properties.cpp ${src_dir}/storage/model/properties/properties.cpp
${src_dir}/storage/model/properties/property_family.cpp ${src_dir}/storage/model/properties/property_family.cpp
${src_dir}/storage/indexes/impl/nonunique_unordered_index.cpp ${src_dir}/storage/indexes/impl/nonunique_unordered_index.cpp
@ -461,6 +465,11 @@ if (POC)
add_subdirectory(poc) add_subdirectory(poc)
endif() endif()
# proof of concepts
if (TOOLS)
add_subdirectory(tools)
endif()
# memgraph build name # memgraph build name
execute_process( execute_process(
OUTPUT_VARIABLE COMMIT_BRANCH OUTPUT_VARIABLE COMMIT_BRANCH

View File

@ -77,16 +77,17 @@ protected:
{ {
protected: protected:
IteratorBase() : map(nullptr) { advanced = index = ~((size_t)0); } IteratorBase() : map(nullptr) { advanced = index = ~((size_t)0); }
IteratorBase(const RhBase *map) : map(map) IteratorBase(const RhBase *map)
{ {
index = 0; index = 0;
while (index < map->capacity && !map->array[index].valid()) { while (index < map->capacity && !map->array[index].valid()) {
index++; index++;
} }
if (index == map->capacity) { if (index >= map->capacity) {
map = nullptr; this->map = nullptr;
advanced = index = ~((size_t)0); advanced = index = ~((size_t)0);
} else { } else {
this->map = map;
advanced = index; advanced = index;
} }
} }

View File

@ -3,6 +3,7 @@
#include "database/db_transaction.hpp" #include "database/db_transaction.hpp"
#include "storage/vertex_accessor.hpp" #include "storage/vertex_accessor.hpp"
#include "utils/border.hpp" #include "utils/border.hpp"
// #include "utils/iterator/iterator.hpp"
#include "utils/option.hpp" #include "utils/option.hpp"
namespace tx namespace tx
@ -61,15 +62,15 @@ public:
// ******************* LABEL METHODS // ******************* LABEL METHODS
const Label &label_find_or_create(const std::string &name); const Label &label_find_or_create(const char *name);
bool label_contains(const std::string &name); bool label_contains(const char *name);
// ******************** TYPE METHODS // ******************** TYPE METHODS
const EdgeType &type_find_or_create(const std::string &name); const EdgeType &type_find_or_create(const char *name);
bool type_contains(const std::string &name); bool type_contains(const char *name);
// ******************** PROPERTY METHODS // ******************** PROPERTY METHODS

View File

@ -1,16 +1,18 @@
#pragma once #pragma once
#include <stdint.h>
#include <ostream> #include <ostream>
#include <stdint.h>
#include "utils/total_ordering.hpp" #include "utils/char_str.hpp"
#include "utils/reference_wrapper.hpp" #include "utils/reference_wrapper.hpp"
#include "utils/total_ordering.hpp"
class EdgeType : public TotalOrdering<EdgeType> class EdgeType : public TotalOrdering<EdgeType>
{ {
public: public:
EdgeType(); EdgeType();
EdgeType(const std::string &id); EdgeType(const std::string &id);
EdgeType(const char *id);
EdgeType(std::string &&id); EdgeType(std::string &&id);
friend bool operator<(const EdgeType &lhs, const EdgeType &rhs); friend bool operator<(const EdgeType &lhs, const EdgeType &rhs);
@ -21,6 +23,8 @@ public:
operator const std::string &() const; operator const std::string &() const;
CharStr char_str() { return CharStr(&id[0]); }
private: private:
std::string id; std::string id;
}; };

View File

@ -2,16 +2,16 @@
#include <stdexcept> #include <stdexcept>
#include "data_structures/concurrent/concurrent_map.hpp"
#include "storage/edge_type/edge_type.hpp" #include "storage/edge_type/edge_type.hpp"
#include "data_structures/concurrent/concurrent_set.hpp" #include "utils/char_str.hpp"
class EdgeTypeStore class EdgeTypeStore
{ {
public: public:
const EdgeType &find_or_create(const char *name);
const EdgeType& find_or_create(const std::string& name); bool contains(const char *name); // TODO: const
bool contains(const std::string& name); // TODO: const
// TODO: implement find method // TODO: implement find method
// return { EdgeType, is_found } // return { EdgeType, is_found }
@ -24,5 +24,5 @@ public:
// templetize the two of them // templetize the two of them
private: private:
ConcurrentSet<EdgeType> edge_types; ConcurrentMap<CharStr, std::unique_ptr<EdgeType>> edge_types;
}; };

View File

@ -37,7 +37,7 @@ public:
// nonunique => always succeds. // nonunique => always succeds.
virtual bool insert(IndexRecord<T, K> &&value) = 0; virtual bool insert(IndexRecord<T, K> &&value) = 0;
// Returns iterator which returns valid records in range. // Returns iterator which returns valid filled records in range.
// order==noe => doesn't guarantee any order of returned records. // order==noe => doesn't guarantee any order of returned records.
// order==Ascending => guarantees order of returnd records will be from // order==Ascending => guarantees order of returnd records will be from
// smallest to largest. // smallest to largest.

View File

@ -6,19 +6,20 @@
#include "storage/indexes/impl/nonunique_unordered_index.hpp" #include "storage/indexes/impl/nonunique_unordered_index.hpp"
#include "storage/vertex.hpp" #include "storage/vertex.hpp"
#include "storage/vertex_accessor.hpp" #include "storage/vertex_accessor.hpp"
#include "utils/char_str.hpp"
#include "utils/reference_wrapper.hpp" #include "utils/reference_wrapper.hpp"
#include "utils/total_ordering.hpp" #include "utils/total_ordering.hpp"
using LabelIndexRecord = VertexIndexRecord<std::nullptr_t>; using LabelIndexRecord = VertexIndexRecord<std::nullptr_t>;
class Label : public TotalOrdering<Label> class Label : public TotalOrdering<Label>, TotalOrdering<CharStr, Label>
{ {
public: public:
using label_index_t = NonUniqueUnorderedIndex<Vertex, std::nullptr_t>; using label_index_t = NonUniqueUnorderedIndex<Vertex, std::nullptr_t>;
Label() = delete; Label() = delete;
Label(const std::string &name); Label(const char *name);
Label(std::string &&name);
Label(const Label &) = delete; Label(const Label &) = delete;
Label(Label &&other) = default; Label(Label &&other) = default;
@ -27,12 +28,18 @@ public:
friend bool operator==(const Label &lhs, const Label &rhs); friend bool operator==(const Label &lhs, const Label &rhs);
friend bool operator<(const CharStr &lhs, const Label &rhs);
friend bool operator==(const CharStr &lhs, const Label &rhs);
friend std::ostream &operator<<(std::ostream &stream, const Label &label); friend std::ostream &operator<<(std::ostream &stream, const Label &label);
operator const std::string &() const; operator const std::string &() const;
std::unique_ptr<label_index_t> index; std::unique_ptr<label_index_t> index;
CharStr char_str() const { return CharStr(name.c_str()); }
private: private:
std::string name; std::string name;
}; };

View File

@ -2,20 +2,20 @@
#include <stdexcept> #include <stdexcept>
#include "storage/label/label.hpp"
#include "data_structures/concurrent/concurrent_set.hpp" #include "data_structures/concurrent/concurrent_set.hpp"
#include "storage/label/label.hpp"
#include "utils/char_str.hpp"
class LabelStore class LabelStore
{ {
public: public:
const Label &find_or_create(const char *name);
const Label& find_or_create(const std::string& name); bool contains(const char *name); // TODO: const
bool contains(const std::string& name); // TODO: const
// TODO: implement find method // TODO: implement find method
// return { Label, is_found } // return { Label, is_found }
private: private:
ConcurrentSet<Label> labels; ConcurrentMap<CharStr, std::unique_ptr<Label>> labels;
}; };

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "storage/model/properties/array.hpp"
#include "storage/model/properties/bool.hpp" #include "storage/model/properties/bool.hpp"
#include "storage/model/properties/double.hpp" #include "storage/model/properties/double.hpp"
#include "storage/model/properties/float.hpp" #include "storage/model/properties/float.hpp"

View File

@ -0,0 +1,69 @@
#pragma once
#include "storage/model/properties/property.hpp"
template <class T, Flags f_type>
class Array : public Property
{
public:
static constexpr Flags type = f_type;
using Arr = std::vector<T>;
Array(const Array &) = default;
Array(Array &&) = default;
Array(const Arr &value);
Array(Arr &&value);
operator const Arr &() const;
bool operator==(const Property &other) const override;
bool operator==(const Array &other) const;
bool operator==(const Arr &other) const;
friend std::ostream &operator<<(std::ostream &stream, const Array &prop);
std::ostream &print(std::ostream &stream) const override;
Arr const &value_ref() const { return value; }
Arr value;
};
class ArrayString : public Array<std::string, Flags::ArrayString>
{
public:
using Array::Array;
};
class ArrayBool : public Array<bool, Flags::ArrayBool>
{
public:
using Array::Array;
};
class ArrayInt32 : public Array<int32_t, Flags::ArrayInt32>
{
public:
using Array::Array;
};
class ArrayInt64 : public Array<int64_t, Flags::ArrayInt64>
{
public:
using Array::Array;
};
class ArrayFloat : public Array<float, Flags::ArrayFloat>
{
public:
using Array::Array;
};
class ArrayDouble : public Array<double, Flags::ArrayDouble>
{
public:
using Array::Array;
};

View File

@ -29,6 +29,8 @@ enum class Flags : unsigned
Null = 0x0, Null = 0x0,
Bool = 0x1, Bool = 0x1,
// TODO remove this two values
True = 0x2 | Bool, True = 0x2 | Bool,
False = 0x4 | Bool, False = 0x4 | Bool,
@ -44,6 +46,12 @@ enum class Flags : unsigned
Double = 0x400 | Floating, Double = 0x400 | Floating,
Array = 0x1000, Array = 0x1000,
ArrayBool = (Bool << 13) | Array,
ArrayString = (String << 13) | Array,
ArrayInt32 = (Int32 << 13) | Array,
ArrayInt64 = (Int64 << 13) | Array,
ArrayFloat = (Float << 13) | Array,
ArrayDouble = (Double << 13) | Array,
type_mask = 0xFFF type_mask = 0xFFF

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include <map> #include <unordered_map>
#include "storage/model/properties/property.hpp" #include "storage/model/properties/property.hpp"
#include "storage/model/properties/property_family.hpp" #include "storage/model/properties/property_family.hpp"
@ -49,7 +49,15 @@ public:
handler.finish(); handler.finish();
} }
template <class Handler>
void for_all(Handler handler) const
{
for (auto &kv : props)
handler(kv.first, kv.second);
}
private: private:
using props_t = std::map<prop_key_t, Property::sptr>; using props_t =
std::unordered_map<prop_key_t, Property::sptr, PropertyHash>;
props_t props; props_t props;
}; };

View File

@ -80,6 +80,8 @@ public:
return type->family.name(); return type->family.name();
} }
const PropertyFamily &get_family() const { return type->family; }
private: private:
const PropertyType *type; const PropertyType *type;
}; };
@ -181,3 +183,15 @@ private:
// data structure. // data structure.
ConcurrentMap<Type, std::unique_ptr<PropertyType>> types; ConcurrentMap<Type, std::unique_ptr<PropertyType>> types;
}; };
class PropertyHash
{
public:
size_t
operator()(PropertyFamily::PropertyType::PropertyFamilyKey const &key) const
{
return (std::hash<const void *>()((const void *)(&(key.get_family()))) +
7) *
UINT64_C(0xbf58476d1ce4e5b9);
}
};

View File

@ -26,8 +26,14 @@ public:
assert(vlist != nullptr); assert(vlist != nullptr);
} }
RecordAccessor(RecordAccessor const &other) = default; RecordAccessor(RecordAccessor const &other)
RecordAccessor(RecordAccessor &&other) = default; : record(other.record), vlist(other.vlist), db(other.db)
{
}
RecordAccessor(RecordAccessor &&other)
: record(other.record), vlist(other.vlist), db(other.db)
{
}
bool empty() const { return record == nullptr; } bool empty() const { return record == nullptr; }
@ -93,6 +99,22 @@ public:
T const *operator->() const { return record; } T const *operator->() const { return record; }
T *operator->() { return record; } T *operator->() { return record; }
RecordAccessor &operator=(const RecordAccessor &other)
{
record = other.record;
vlist_t *&vl = const_cast<vlist_t *&>(vlist);
vl = other.vlist;
return *this;
}
RecordAccessor &operator=(RecordAccessor &&other)
{
record = other.record;
vlist_t *&vl = const_cast<vlist_t *&>(vlist);
vl = other.vlist;
return *this;
}
// Assumes same transaction // Assumes same transaction
friend bool operator==(const RecordAccessor &a, const RecordAccessor &b) friend bool operator==(const RecordAccessor &a, const RecordAccessor &b)
{ {

View File

@ -0,0 +1,23 @@
#pragma once
#include <cstring>
#include "utils/total_ordering.hpp"
class CharStr : public TotalOrdering<CharStr>
{
public:
CharStr(const char *str) : str(str) {}
friend bool operator==(const CharStr &lhs, const CharStr &rhs)
{
return strcmp(lhs.str, rhs.str) == 0;
}
friend bool operator<(const CharStr &lhs, const CharStr &rhs)
{
return strcmp(lhs.str, rhs.str) < 0;
}
private:
const char *str;
};

View File

@ -1,8 +1,9 @@
#pragma once #pragma once
#include <algorithm>
#include <string> #include <string>
#include <vector> #include <vector>
#include <algorithm> #include "utils/option.hpp"
namespace namespace
{ {
@ -21,18 +22,30 @@ bool contains_argument(const std::vector<std::string>& all,
return std::find(all.begin(), all.end(), flag) != all.end(); return std::find(all.begin(), all.end(), flag) != all.end();
} }
auto get_argument(const std::vector<std::string>& all, auto get_argument(const std::vector<std::string> &all, const std::string &flag,
const std::string& flag,
const std::string &default_value) const std::string &default_value)
{ {
auto it = std::find(all.begin(), all.end(), flag); auto it = std::find(all.begin(), all.end(), flag);
if(it == all.end()) if (it == all.end()) return default_value;
return default_value;
return all[std::distance(all.begin(), it) + 1]; return all[std::distance(all.begin(), it) + 1];
} }
#pragma clang diagnostic pop Option<std::string> take_argument(std::vector<std::string> &all,
const std::string &flag)
{
auto it = std::find(all.begin(), all.end(), flag);
if (it == all.end()) return make_option<std::string>();
auto s = std::string(all[std::distance(all.begin(), it) + 1]);
it++;
it++;
all.erase(std::find(all.begin(), all.end(), flag), it);
return make_option<std::string>(std::move(s));
}
#pragma clang diagnostic pop
} }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <cassert> #include <cassert>
#include <cstring>
#include <ext/aligned_buffer.h> #include <ext/aligned_buffer.h>
#include <utility> #include <utility>
@ -8,7 +9,7 @@ template <class T>
class Option class Option
{ {
public: public:
Option() {} Option() { std::memset(data._M_addr(), 0, sizeof(T)); }
// //
// Option(T item) // Option(T item)
// { // {
@ -28,25 +29,51 @@ public:
initialized = true; initialized = true;
} }
Option(Option &other) = default; Option(const Option &other)
{
if (other.initialized) {
new (data._M_addr()) T(other.get());
initialized = true;
} else {
std::memset(data._M_addr(), 0, sizeof(T));
}
}
// Containers from std which have strong exception guarantees wont use move // Containers from std which have strong exception guarantees wont use move
// constructors and operators wihtout noexcept. "Optimized C++,2016 , Kurt // constructors and operators wihtout noexcept. "Optimized C++,2016 , Kurt
// Guntheroth, page: 142, title: Moving instances into std::vector" // Guntheroth, page: 142, title: Moving instances into std::vector"
Option(Option &&other) noexcept Option(Option &&other) noexcept
{ {
if (other.initialized) { if (other.initialized) {
data = std::move(other.data); new (data._M_addr()) T(std::move(other.get()));
other.initialized = false; other.initialized = false;
initialized = true; initialized = true;
} else {
std::memset(data._M_addr(), 0, sizeof(T));
} }
} }
~Option() ~Option()
{ {
if (initialized) get().~T(); if (initialized) {
get().~T();
initialized = false;
}
} }
Option &operator=(Option &other) = default; Option &operator=(const Option &other)
{
if (initialized) {
get().~T();
initialized = false;
}
if (other.initialized) {
new (data._M_addr()) T(other.get());
initialized = true;
}
return *this;
}
Option &operator=(Option &&other) Option &operator=(Option &&other)
{ {
if (initialized) { if (initialized) {
@ -55,7 +82,7 @@ public:
} }
if (other.initialized) { if (other.initialized) {
data = std::move(other.data); new (data._M_addr()) T(std::move(other.get()));
other.initialized = false; other.initialized = false;
initialized = true; initialized = true;
} }

View File

@ -45,29 +45,25 @@ Edge::Accessor DbAccessor::edge_insert(Vertex::Accessor const &from,
} }
// LABEL METHODS // LABEL METHODS
const Label &DbAccessor::label_find_or_create(const std::string &name) const Label &DbAccessor::label_find_or_create(const char *name)
{ {
return db_transaction.db.graph.label_store.find_or_create( return db_transaction.db.graph.label_store.find_or_create(name);
std::forward<const std::string &>(name));
} }
bool DbAccessor::label_contains(const std::string &name) bool DbAccessor::label_contains(const char *name)
{ {
return db_transaction.db.graph.label_store.contains( return db_transaction.db.graph.label_store.contains(name);
std::forward<const std::string &>(name));
} }
// TYPE METHODS // TYPE METHODS
const EdgeType &DbAccessor::type_find_or_create(const std::string &name) const EdgeType &DbAccessor::type_find_or_create(const char *name)
{ {
return db_transaction.db.graph.edge_type_store.find_or_create( return db_transaction.db.graph.edge_type_store.find_or_create(name);
std::forward<const std::string &>(name));
} }
bool DbAccessor::type_contains(const std::string &name) bool DbAccessor::type_contains(const char *name)
{ {
return db_transaction.db.graph.edge_type_store.contains( return db_transaction.db.graph.edge_type_store.contains(name);
std::forward<const std::string &>(name));
} }
// PROPERTY METHODS // PROPERTY METHODS

218
src/import/base_import.hpp Normal file
View File

@ -0,0 +1,218 @@
#pragma once
#include <algorithm>
#include <chrono>
#include <cstring>
#include <ctime>
#include <fstream>
#include <iostream>
#include <queue>
#include <regex>
#include <sstream>
#include <string>
#include <vector>
#include "import/element_skeleton.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/vertex_accessor.hpp"
#include "utils/option.hpp"
using namespace std;
static Option<Vertex::Accessor> empty_op_vacc;
// Base importer with common facilities.
class BaseImporter
{
public:
BaseImporter(DbAccessor &db, ostream &err_stream)
: db(db), err_stream(err_stream)
{
}
template <class... Args>
void err(Args &... args)
{
if (error) {
err_stream << " Error: ";
out_err(args...);
err_stream << endl;
}
}
template <class... Args>
void warn(Args &... args)
{
if (warning) {
err_stream << " Warning: ";
out_err(args...);
err_stream << endl;
}
}
template <class T, class... Args>
void out_err(T &first, Args &... args)
{
err_stream << first;
out_err(args...);
}
template <class T>
void out_err(T &first)
{
err_stream << first;
}
char *cstr(string &str) { return &str[0]; }
bool split(string &str, char mark, vector<char *> &sub_str)
{
return split(cstr(str), mark, sub_str);
}
// Occurances of mark are changed with '\0'. sub_str is filled with
// pointers to parts of str splited by mark in ascending order. Empty
// sub_str are included. Doesn't split inside quotations and
// open_bracket,closed_bracket.
// Returns true if it was succesfully parsed.
bool split(char *str, char mark, vector<char *> &sub_str)
{
int head = 0;
bool in_text = false;
bool in_array = false;
for (int i = 0; str[i] != '\0'; i++) {
char &c = str[i];
// IN TEXT check
if (c == quotations_mark) {
in_text = !in_text;
if (in_text && head == i) {
c = '\0';
head = i + 1;
} else if (!in_text && !in_array) {
c = '\0';
}
continue;
} else if (in_text) {
continue;
}
// IN ARRAY check
if (c == open_bracket) {
if (in_array) {
err("Nested arrays aren't supported.");
return false;
}
in_array = true;
continue;
}
if (in_array) {
if (c == closed_bracket) {
in_array = false;
}
continue;
}
// SPLIT CHECK
if (c == mark) {
c = '\0';
sub_str.push_back(&str[head]);
head = i + 1;
}
}
sub_str.push_back(&str[head]);
return true;
}
// Extracts parts while stripping data of array chars and qutation marks.
void extract(char *str, const char delimiter, vector<char *> &sub_str)
{
int head = 0;
bool in_text = false;
for (int i = 0; str[i] != '\0'; i++) {
char &c = str[i];
// IN TEXT check
if (c == quotations_mark) {
in_text = !in_text;
if (in_text) {
} else {
c = '\0';
sub_str.push_back(&str[head]);
head = i + 1;
}
head = i + 1;
continue;
} else if (in_text) {
continue;
}
// IN ARRAY check
if (c == open_bracket) {
head = i + 1;
continue;
} else if (c == closed_bracket) {
c = '\0';
if (i > head) {
sub_str.push_back(&str[head]);
}
head = i + 1;
continue;
}
// SPLIT CHECK
if (c == delimiter) {
c = '\0';
if (i > head) {
sub_str.push_back(&str[head]);
}
head = i + 1;
} else if (c == ' ' && i == head) {
head++;
}
}
sub_str.push_back(&str[head]);
//
// for (auto s : sub_str) {
// cout << "#" << s;
// }
}
Option<Vertex::Accessor> const &get_vertex(size_t id)
{
if (vertices.size() > id) {
return vertices[id];
} else {
cout << vertices.size() << " -> " << id << endl;
return empty_op_vacc;
}
}
public:
DbAccessor &db;
char parts_mark = ',';
char parts_array_mark = ',';
char type_mark = ':';
char quotations_mark = '"';
char open_bracket = '[';
char closed_bracket = ']';
bool warning = true;
bool error = true;
protected:
// All errors are writen to this stream.
ostream &err_stream;
// All created vertices which have import local id
vector<Option<Vertex::Accessor>> vertices;
};

313
src/import/csv_import.hpp Normal file
View File

@ -0,0 +1,313 @@
#pragma once
#include <algorithm>
#include <chrono>
#include <cstring>
#include <ctime>
#include <fstream>
#include <iostream>
#include <queue>
#include <regex>
#include <sstream>
#include <string>
#include <vector>
#include "import/base_import.hpp"
#include "import/element_skeleton.hpp"
#include "import/fillings/array.hpp"
#include "import/fillings/bool.hpp"
#include "import/fillings/double.hpp"
#include "import/fillings/filler.hpp"
#include "import/fillings/float.hpp"
#include "import/fillings/from.hpp"
#include "import/fillings/id.hpp"
#include "import/fillings/int32.hpp"
#include "import/fillings/int64.hpp"
#include "import/fillings/label.hpp"
#include "import/fillings/skip.hpp"
#include "import/fillings/string.hpp"
#include "import/fillings/to.hpp"
#include "import/fillings/type.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/vertex_accessor.hpp"
#include "utils/option.hpp"
using namespace std;
bool equal_str(const char *a, const char *b) { return strcasecmp(a, b) == 0; }
// CSV importer for importing multiple files regarding same graph.
// CSV format of file should be following:
//
class CSVImporter : public BaseImporter
{
public:
using BaseImporter::BaseImporter;
// Loads data from stream and returns number of loaded vertexes.
size_t import_vertices(std::fstream &file)
{
return import(file, create_vertex, true);
}
// Loads data from stream and returns number of loaded edges.
size_t import_edges(std::fstream &file)
{
return import(file, create_edge, false);
}
private:
// Loads data from file and returns number of loaded name.
template <class F>
size_t import(std::fstream &file, F f, bool vertex)
{
string line;
vector<char *> sub_str;
vector<unique_ptr<Filler>> fillers;
vector<char *> tmp;
// HEADERS
if (!getline(file, line)) {
err("No lines");
return 0;
}
if (!split(line, parts_mark, sub_str)) {
err("Illegal headers");
return 0;
}
for (auto p : sub_str) {
auto o = get_filler(p, tmp, vertex);
if (o.is_present()) {
fillers.push_back(o.take());
} else {
return 0;
}
}
sub_str.clear();
// LOAD DATA LINES
size_t count = 0;
size_t line_no = 1;
ElementSkeleton es(db);
while (std::getline(file, line)) {
// if (line_no % 1000 == 0) {
// cout << line_no << endl;
// }
// cout << line << endl;
sub_str.clear();
es.clear();
if (split(line, parts_mark, sub_str)) {
check_for_part_count(sub_str.size() - fillers.size(), line_no);
int n = min(sub_str.size(), fillers.size());
for (int i = 0; i < n; i++) {
auto er = fillers[i]->fill(es, sub_str[i]);
if (er.is_present()) {
err(er.get(), " on line: ", line_no);
}
}
if (f(this, es, line_no)) {
count++;
}
}
line_no++;
}
return count;
}
static bool create_vertex(CSVImporter *im, ElementSkeleton &es,
size_t line_no)
{
auto va = es.add_vertex();
auto id = es.element_id();
if (id.is_present()) {
if (im->vertices.size() <= id.get()) {
Option<Vertex::Accessor> empty =
make_option<Vertex::Accessor>();
im->vertices.insert(im->vertices.end(),
id.get() - im->vertices.size() + 1, empty);
}
if (im->vertices[id.get()].is_present()) {
im->err("Vertex on line: ", line_no,
" has same id with another previously loaded vertex");
return false;
} else {
im->vertices[id.get()] = make_option(std::move(va));
return true;
}
} else {
im->warn("Missing import local vertex id for vertex on "
"line: ",
line_no);
}
return true;
}
static bool create_edge(CSVImporter *im, ElementSkeleton &es,
size_t line_no)
{
auto o = es.add_edge();
if (!o.is_present()) {
return true;
} else {
im->err(o.get(), " on line: ", line_no);
return false;
}
}
// Returns filler for name:type in header_part. None if error occured.
Option<unique_ptr<Filler>> get_filler(char *header_part,
vector<char *> &tmp_vec, bool vertex)
{
tmp_vec.clear();
split(header_part, type_mark, tmp_vec);
if (tmp_vec.size() > 2) {
err("To much sub parts in header part");
return make_option<unique_ptr<Filler>>();
} else if (tmp_vec.size() < 2) {
if (tmp_vec.size() == 1) {
warn(
"Column ", tmp_vec[0],
" doesn't have specified type so string type will be used");
tmp_vec.push_back("string");
} else {
warn("Empty colum definition, skiping column.");
std::unique_ptr<Filler> f(new SkipFiller());
return make_option(std::move(f));
}
}
const char *name = tmp_vec[0];
const char *type = tmp_vec[1];
// cout << name << " # " << type << endl;
auto prop_key = [&](auto name, auto type) -> auto
{
if (vertex) {
return db.vertex_property_key(name, Type(type));
} else {
return db.edge_property_key(name, Type(type));
}
};
if (equal_str(type, "id")) {
std::unique_ptr<Filler> f(
name[0] == '\0'
? new IdFiller()
: new IdFiller(make_option(prop_key(name, Flags::Int64))));
return make_option(std::move(f));
} else if (equal_str(type, "start_id") || equal_str(type, "from_id")) {
std::unique_ptr<Filler> f(new FromFiller(*this));
return make_option(std::move(f));
} else if (equal_str(type, "label")) {
std::unique_ptr<Filler> f(new LabelFiller(*this));
return make_option(std::move(f));
} else if (equal_str(type, "end_id") || equal_str(type, "to_id")) {
std::unique_ptr<Filler> f(new ToFiller(*this));
return make_option(std::move(f));
} else if (equal_str(type, "type")) {
std::unique_ptr<Filler> f(new TypeFiller(*this));
return make_option(std::move(f));
} else if (name[0] == '\0') { // OTHER FILLERS REQUIRE NAME
warn("Unnamed column of type: ", type, " will be skipped.");
std::unique_ptr<Filler> f(new SkipFiller());
return make_option(std::move(f));
// *********************** PROPERTIES
} else if (equal_str(type, "bool")) {
std::unique_ptr<Filler> f(
new BoolFiller(prop_key(name, Flags::Bool)));
return make_option(std::move(f));
} else if (equal_str(type, "double")) {
std::unique_ptr<Filler> f(
new DoubleFiller(prop_key(name, Flags::Double)));
return make_option(std::move(f));
} else if (equal_str(type, "float")) {
std::unique_ptr<Filler> f(
new FloatFiller(prop_key(name, Flags::Float)));
return make_option(std::move(f));
} else if (equal_str(type, "int")) {
std::unique_ptr<Filler> f(
new Int32Filler(prop_key(name, Flags::Int32)));
return make_option(std::move(f));
} else if (equal_str(type, "long")) {
std::unique_ptr<Filler> f(
new Int64Filler(prop_key(name, Flags::Int64)));
return make_option(std::move(f));
} else if (equal_str(type, "string")) {
std::unique_ptr<Filler> f(
new StringFiller(prop_key(name, Flags::String)));
return make_option(std::move(f));
} else if (equal_str(type, "bool[]")) {
std::unique_ptr<Filler> f(make_array_filler<bool, ArrayBool>(
*this, prop_key(name, Flags::ArrayBool), to_bool));
return make_option(std::move(f));
} else if (equal_str(type, "float[]")) {
std::unique_ptr<Filler> f(make_array_filler<float, ArrayFloat>(
*this, prop_key(name, Flags::ArrayFloat), to_float));
return make_option(std::move(f));
} else if (equal_str(type, "double[]")) {
std::unique_ptr<Filler> f(make_array_filler<double, ArrayDouble>(
*this, prop_key(name, Flags::ArrayDouble), to_double));
return make_option(std::move(f));
} else if (equal_str(type, "int[]")) {
std::unique_ptr<Filler> f(make_array_filler<int32_t, ArrayInt32>(
*this, prop_key(name, Flags::ArrayInt32), to_int32));
return make_option(std::move(f));
} else if (equal_str(type, "long[]")) {
std::unique_ptr<Filler> f(make_array_filler<int64_t, ArrayInt64>(
*this, prop_key(name, Flags::ArrayInt64), to_int64));
return make_option(std::move(f));
} else if (equal_str(type, "string[]")) {
std::unique_ptr<Filler> f(make_array_filler<string, ArrayString>(
*this, prop_key(name, Flags::ArrayString), to_string));
return make_option(std::move(f));
} else {
err("Unknown type: ", type);
return make_option<unique_ptr<Filler>>();
}
}
void check_for_part_count(long diff, long line_no)
{
if (diff != 0) {
if (diff < 0) {
// warn("Line no: ", line_no, " has less parts then "
// "specified in header. Missing ",
// diff, " parts");
} else {
warn("Line no: ", line_no,
" has more parts then specified in header. Extra ", diff,
" parts");
}
}
}
};

View File

@ -0,0 +1,118 @@
#pragma once
#include <cassert>
#include "database/db_accessor.hpp"
#include "storage/model/properties/property_family.hpp"
#include "storage/vertex_accessor.hpp"
// Holder for element data which he can then insert as a vertex or edge into the
// database depending on the available data.
class ElementSkeleton
{
class Prop
{
public:
Prop(PropertyFamily::PropertyType::PropertyFamilyKey key,
Option<std::shared_ptr<Property>> &&prop)
: key(key), prop(std::move(prop))
{
}
PropertyFamily::PropertyType::PropertyFamilyKey key;
Option<std::shared_ptr<Property>> prop;
};
public:
ElementSkeleton(DbAccessor &db) : db(db){};
void add_property(PropertyFamily::PropertyType::PropertyFamilyKey key,
std::shared_ptr<Property> &&prop)
{
properties.push_back(Prop(key, make_option(std::move(prop))));
}
void set_element_id(size_t id)
{
el_id = make_option<size_t>(std::move(id));
}
void add_label(Label const &label) { labels.push_back(&label); }
void set_type(EdgeType const &type) { this->type = make_option(&type); }
void set_from(Vertex::Accessor &&va)
{
from_va = make_option<Vertex::Accessor>(std::move(va));
}
void set_to(Vertex::Accessor &&va)
{
to_va = make_option<Vertex::Accessor>(std::move(va));
}
Vertex::Accessor add_vertex()
{
auto va = db.vertex_insert();
for (auto l : labels) {
// std::cout << *l << std::endl;
va.add_label(*l);
}
add_propreties(va);
return va;
}
// Return error msg if unsuccessful
Option<std::string> add_edge()
{
if (!from_va.is_present()) {
return make_option(std::string("From field must be seted"));
}
if (!to_va.is_present()) {
return make_option(std::string("To field must be seted"));
}
auto ve = db.edge_insert(from_va.get(), to_va.get());
if (type.is_present()) {
ve.edge_type(*type.get());
}
add_propreties(ve);
return make_option<std::string>();
}
void clear()
{
el_id = make_option<size_t>();
to_va = make_option<Vertex::Accessor>();
from_va = make_option<Vertex::Accessor>();
type = make_option<EdgeType const *>();
labels.clear();
properties.clear();
}
// Returns import local id.
Option<size_t> element_id() { return el_id; }
private:
template <class A>
void add_propreties(A &ra)
{
for (auto prop : properties) {
assert(prop.prop.is_present());
ra.set(prop.key, prop.prop.take());
}
}
DbAccessor &db;
Option<size_t> el_id;
Option<Vertex::Accessor> to_va;
Option<Vertex::Accessor> from_va;
Option<EdgeType const *> type;
std::vector<Label const *> labels;
std::vector<Prop> properties;
};

View File

@ -0,0 +1,50 @@
#pragma once
#include "database/db_accessor.hpp"
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
template <class T, class A>
class ArrayFiller : public Filler
{
public:
ArrayFiller(BaseImporter &db,
PropertyFamily::PropertyType::PropertyFamilyKey key,
T (*f)(const char *))
: bim(db), key(key), f(f)
{
}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
sub_str.clear();
std::vector<T> vec;
bim.extract(str, bim.parts_array_mark, sub_str);
for (auto s : sub_str) {
if (s[0] != '\0') {
vec.push_back(f(s));
}
}
if (vec.size() > 0) {
data.add_property(key, make_shared<A>(std::move(vec)));
}
return make_option<std::string>();
}
private:
BaseImporter &bim;
PropertyFamily::PropertyType::PropertyFamilyKey key;
vector<char *> sub_str;
T (*f)(const char *);
};
template <class T, class A>
auto make_array_filler(BaseImporter &db,
PropertyFamily::PropertyType::PropertyFamilyKey key,
T (*f)(const char *))
{
return new ArrayFiller<T, A>(db, key, f);
}

View File

@ -0,0 +1,30 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class BoolFiller : public Filler
{
public:
BoolFiller(PropertyFamily::PropertyType::PropertyFamilyKey key) : key(key)
{
}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
data.add_property(key, std::make_shared<Bool>(to_bool(str)));
}
return make_option<std::string>();
}
private:
PropertyFamily::PropertyType::PropertyFamilyKey key;
};

View File

@ -0,0 +1,25 @@
#pragma once
#include <cstdlib>
#include <cstdlib>
#include <iostream>
#include <string>
#include <strings.h>
#include "storage/model/properties/all.hpp"
bool string2bool(const char *v)
{
return strcasecmp(v, "true") == 0 || atoi(v) != 0;
}
bool to_bool(const char *str) { return string2bool(str); }
float to_float(const char *str) { return stof(str); }
double to_double(const char *str) { return atof(str); }
int32_t to_int32(const char *str) { return atoi(str); }
int64_t to_int64(const char *str) { return atoll(str); }
std::string to_string(const char *str) { return std::string(str); }

View File

@ -0,0 +1,29 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class DoubleFiller : public Filler
{
public:
DoubleFiller(PropertyFamily::PropertyType::PropertyFamilyKey key) : key(key)
{
}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
data.add_property(key, std::make_shared<Double>(to_double(str)));
}
return make_option<std::string>();
}
private:
PropertyFamily::PropertyType::PropertyFamilyKey key;
};

View File

@ -0,0 +1,12 @@
#pragma once
#include "import/element_skeleton.hpp"
#include "utils/option.hpp"
class Filler
{
public:
// Fills skeleton with data from str. Returns error description if
// error occurs.
virtual Option<std::string> fill(ElementSkeleton &data, char *str) = 0;
};

View File

@ -0,0 +1,29 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class FloatFiller : public Filler
{
public:
FloatFiller(PropertyFamily::PropertyType::PropertyFamilyKey key) : key(key)
{
}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
data.add_property(key, std::make_shared<Float>(to_float(str)));
}
return make_option<std::string>();
}
private:
PropertyFamily::PropertyType::PropertyFamilyKey key;
};

View File

@ -0,0 +1,37 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class FromFiller : public Filler
{
public:
FromFiller(BaseImporter &db) : bim(db) {}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
auto id = atol(str);
Option<Vertex::Accessor> const &oav = bim.get_vertex(id);
if (oav.is_present()) {
data.set_from(Vertex::Accessor(oav.get()));
return make_option<std::string>();
} else {
return make_option(
std::string("Unknown vertex in from field with id: ") +
str);
}
} else {
return make_option(std::string("From field must be spceified"));
}
}
private:
BaseImporter &bim;
};

View File

@ -0,0 +1,38 @@
#pragma once
#include "import/fillings/filler.hpp"
class IdFiller : public Filler
{
public:
IdFiller()
: key(make_option<PropertyFamily::PropertyType::PropertyFamilyKey>())
{
}
IdFiller(Option<PropertyFamily::PropertyType::PropertyFamilyKey> key)
: key(key)
{
assert(!key.is_present() ||
key.get().prop_type() == Type(Flags::Int64));
}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
data.set_element_id(atol(str));
if (key.is_present()) {
data.add_property(key.get(),
std::make_shared<Int64>(to_int64(str)));
}
}
return make_option<std::string>();
}
private:
Option<PropertyFamily::PropertyType::PropertyFamilyKey> key;
};

View File

@ -0,0 +1,30 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class Int32Filler : public Filler
{
public:
Int32Filler(PropertyFamily::PropertyType::PropertyFamilyKey key) : key(key)
{
}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
data.add_property(key, std::make_shared<Int32>(to_int32(str)));
}
return make_option<std::string>();
}
private:
PropertyFamily::PropertyType::PropertyFamilyKey key;
};

View File

@ -0,0 +1,29 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class Int64Filler : public Filler
{
public:
Int64Filler(PropertyFamily::PropertyType::PropertyFamilyKey key) : key(key)
{
}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
data.add_property(key, std::make_shared<Int64>(to_int64(str)));
}
return make_option<std::string>();
}
private:
PropertyFamily::PropertyType::PropertyFamilyKey key;
};

View File

@ -0,0 +1,29 @@
#pragma once
#include "database/db_accessor.hpp"
#include "import/fillings/filler.hpp"
class LabelFiller : public Filler
{
public:
LabelFiller(BaseImporter &db) : bim(db) {}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
sub_str.clear();
bim.extract(str, bim.parts_array_mark, sub_str);
for (auto s : sub_str) {
if (s[0] != '\0') {
data.add_label(bim.db.label_find_or_create(s));
}
}
return make_option<std::string>();
}
private:
BaseImporter &bim;
vector<char *> sub_str;
};

View File

@ -0,0 +1,19 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class SkipFiller : public Filler
{
public:
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
return make_option<std::string>();
}
};

View File

@ -0,0 +1,29 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class StringFiller : public Filler
{
public:
StringFiller(PropertyFamily::PropertyType::PropertyFamilyKey key) : key(key)
{
}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
data.add_property(key, std::make_shared<String>(to_string(str)));
}
return make_option<std::string>();
}
private:
PropertyFamily::PropertyType::PropertyFamilyKey key;
};

View File

@ -0,0 +1,36 @@
#pragma once
#include "import/fillings/common.hpp"
#include "import/fillings/filler.hpp"
#include "storage/model/properties/all.hpp"
#include "storage/model/properties/flags.hpp"
#include "storage/model/properties/property_family.hpp"
class ToFiller : public Filler
{
public:
ToFiller(BaseImporter &db) : bim(db) {}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
auto id = atol(str);
Option<Vertex::Accessor> const &oav = bim.get_vertex(id);
if (oav.is_present()) {
data.set_to(Vertex::Accessor(oav.get()));
return make_option<std::string>();
} else {
return make_option(
std::string("Unknown vertex in to field with id: ") + str);
}
} else {
return make_option(std::string("To field must be spceified"));
}
}
private:
BaseImporter &bim;
};

View File

@ -0,0 +1,25 @@
#pragma once
#include "database/db_accessor.hpp"
#include "import/fillings/filler.hpp"
class TypeFiller : public Filler
{
public:
TypeFiller(BaseImporter &db) : bim(db) {}
// Fills skeleton with data from str. Returns error description if
// error occurs.
Option<std::string> fill(ElementSkeleton &data, char *str) final
{
if (str[0] != '\0') {
data.set_type(bim.db.type_find_or_create(str));
}
return make_option<std::string>();
}
private:
BaseImporter &bim;
};

View File

@ -2,6 +2,7 @@
EdgeType::EdgeType() {} EdgeType::EdgeType() {}
EdgeType::EdgeType(const std::string &id) : id(id) {} EdgeType::EdgeType(const std::string &id) : id(id) {}
EdgeType::EdgeType(const char *id) : id(std::string(id)) {}
EdgeType::EdgeType(std::string &&id) : id(std::move(id)) {} EdgeType::EdgeType(std::string &&id) : id(std::move(id)) {}
bool operator<(const EdgeType &lhs, const EdgeType &rhs) bool operator<(const EdgeType &lhs, const EdgeType &rhs)
@ -19,7 +20,4 @@ std::ostream& operator<<(std::ostream& stream, const EdgeType& type)
return stream << type.id; return stream << type.id;
} }
EdgeType::operator const std::string&() const EdgeType::operator const std::string &() const { return id; }
{
return id;
}

View File

@ -1,13 +1,19 @@
#include "storage/edge_type/edge_type_store.hpp" #include "storage/edge_type/edge_type_store.hpp"
const EdgeType& EdgeTypeStore::find_or_create(const std::string& name) const EdgeType &EdgeTypeStore::find_or_create(const char *name)
{ {
auto accessor = edge_types.access(); auto accessor = edge_types.access();
return accessor.insert(EdgeType(name)).first; auto it = accessor.find(CharStr(name));
if (it == accessor.end()) {
auto l = std::make_unique<EdgeType>(name);
auto k = l->char_str();
it = accessor.insert(k, std::move(l)).first;
}
return *(it->second);
} }
bool EdgeTypeStore::contains(const std::string& name) // const bool EdgeTypeStore::contains(const char *name) // const
{ {
auto accessor = edge_types.access(); auto accessor = edge_types.access();
return accessor.find(EdgeType(name)) != accessor.end(); return accessor.find(CharStr(name)) != accessor.end();
} }

View File

@ -1,12 +1,8 @@
// #include "storage/indexes/impl/nonunique_unordered_index.hpp" // #include "storage/indexes/impl/nonunique_unordered_index.hpp"
#include "storage/label/label.hpp" #include "storage/label/label.hpp"
Label::Label(const std::string &name) Label::Label(const char *name)
: name(name), index(std::unique_ptr<label_index_t>(new label_index_t())) : name(std::string(name)),
{
}
Label::Label(std::string &&name)
: name(std::move(name)),
index(std::unique_ptr<label_index_t>(new label_index_t())) index(std::unique_ptr<label_index_t>(new label_index_t()))
{ {
} }
@ -21,6 +17,16 @@ bool operator==(const Label &lhs, const Label &rhs)
return lhs.name == rhs.name; return lhs.name == rhs.name;
} }
bool operator<(const CharStr &lhs, const Label &rhs)
{
return lhs < rhs.char_str();
}
bool operator==(const CharStr &lhs, const Label &rhs)
{
return lhs == rhs.char_str();
}
std::ostream &operator<<(std::ostream &stream, const Label &label) std::ostream &operator<<(std::ostream &stream, const Label &label)
{ {
return stream << label.name; return stream << label.name;

View File

@ -1,13 +1,19 @@
#include "storage/label/label_store.hpp" #include "storage/label/label_store.hpp"
const Label& LabelStore::find_or_create(const std::string& name) const Label &LabelStore::find_or_create(const char *name)
{ {
auto accessor = labels.access(); auto accessor = labels.access();
return accessor.insert(Label(name)).first; auto it = accessor.find(CharStr(name));
if (it == accessor.end()) {
auto l = std::make_unique<Label>(name);
auto k = l->char_str();
it = accessor.insert(k, std::move(l)).first;
}
return *(it->second);
} }
bool LabelStore::contains(const std::string& name) // const bool LabelStore::contains(const char *name) // const
{ {
auto accessor = labels.access(); auto accessor = labels.access();
return accessor.find(Label(name)) != accessor.end(); return accessor.find(CharStr(name)) != accessor.end();
} }

View File

@ -0,0 +1,70 @@
#include "storage/model/properties/array.hpp"
template <class T, Flags f_type>
Array<T, f_type>::Array(const Arr &value) : Property(f_type), value(value)
{
}
template <class T, Flags f_type>
Array<T, f_type>::Array(Arr &&value) : Property(f_type), value(value)
{
}
template <class T, Flags f_type>
Array<T, f_type>::operator const Arr &() const
{
return value;
}
template <class T, Flags f_type>
bool Array<T, f_type>::operator==(const Property &other) const
{
return other.is<Array>() && operator==(other.as<Array>());
}
template <class T, Flags f_type>
bool Array<T, f_type>::operator==(const Array &other) const
{
return this->operator==(other.value);
}
template <class T, Flags f_type>
bool Array<T, f_type>::operator==(const Arr &other) const
{
if (value.size() != other.size()) {
return false;
}
auto n = value.size();
for (size_t i = 0; i < n; i++) {
if (value[i] != other[i]) {
return false;
}
}
return true;
}
template <class T, Flags f_type>
std::ostream &operator<<(std::ostream &stream, const Array<T, f_type> &prop)
{
return prop.print(stream);
}
template <class T, Flags f_type>
std::ostream &Array<T, f_type>::print(std::ostream &stream) const
{
stream << "[";
for (auto e : value) {
stream << e << ",";
}
stream << "]";
return stream;
}
template class Array<std::string, Flags::ArrayString>;
template class Array<bool, Flags::ArrayBool>;
template class Array<int32_t, Flags::ArrayInt32>;
template class Array<int64_t, Flags::ArrayInt64>;
template class Array<float, Flags::ArrayFloat>;
template class Array<double, Flags::ArrayDouble>;

View File

@ -54,9 +54,8 @@ auto Vertex::Accessor::out() const
auto Vertex::Accessor::in() const auto Vertex::Accessor::in() const
{ {
DbTransaction &t = this->db; DbTransaction &t = this->db;
return iter::make_one_time_accessor( return iter::make_map(iter::make_iter_ref(record->data.in),
iter::make_map(iter::make_iter_ref(record->data.in), [&](auto e) -> auto { return Edge::Accessor(e, t); });
[&](auto e) -> auto { return Edge::Accessor(e, t); }));
} }
bool Vertex::Accessor::in_contains(Vertex::Accessor const &other) const bool Vertex::Accessor::in_contains(Vertex::Accessor const &other) const

8
tools/CMakeLists.txt Normal file
View File

@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.1)
project(memgraph_tools)
add_executable(import_tool tool.cpp)
target_link_libraries(import_tool memgraph)
target_link_libraries(import_tool Threads::Threads)
target_link_libraries(import_tool ${fmt_static_lib})

461
tools/tool.cpp Normal file
View File

@ -0,0 +1,461 @@
#include "database/db.hpp"
#include "database/db_accessor.hpp"
#include <chrono>
#include <ctime>
#include <strings.h>
#include <unistd.h>
#include <unordered_map>
#include "database/db_accessor.cpp"
#include "import/csv_import.hpp"
#include "storage/indexes/impl/nonunique_unordered_index.cpp"
#include "storage/model/properties/properties.cpp"
#include "storage/record_accessor.cpp"
#include "storage/vertex_accessor.cpp"
#include "utils/command_line/arguments.hpp"
using namespace std;
template <class C>
void fill_to_fill(Edge::Accessor &e, const EdgeType &type, C &&consumer)
{
if (e.fill() && e.edge_type() == type) {
auto to = e.to();
if (to.fill()) {
consumer(to);
}
}
}
template <class C>
void fill_from_fill(Edge::Accessor &e, const EdgeType &type, C &&consumer)
{
if (e.fill() && e.edge_type() == type) {
auto from = e.from();
if (from.fill()) {
consumer(from);
}
}
}
template <class C>
void fill_to_fill(Edge::Accessor &e, C &&consumer)
{
if (e.fill()) {
auto to = e.to();
if (to.fill()) {
consumer(to);
}
}
}
template <class C>
void to_fill(Edge::Accessor &e, C &&consumer)
{
auto to = e.to();
if (to.fill()) {
consumer(to);
}
}
template <class C>
void to_fill(Edge::Accessor &e, const Label &label, C &&consumer)
{
auto to = e.to();
if (to.fill() && to.has_label(label)) {
consumer(to);
}
}
template <class C>
void to_fill(Edge::Accessor &e, const EdgeType &type, const Label &label,
C &&consumer)
{
if (e.edge_type() == type) {
auto to = e.to();
if (to.fill() && to.has_label(label)) {
consumer(to);
}
}
}
template <class C>
void from_fill(Edge::Accessor &e, const EdgeType &type, C &&consumer)
{
if (e.edge_type() == type) {
auto from = e.from();
if (from.fill()) {
consumer(from);
}
}
}
template <class C>
void fill_from_fill(Edge::Accessor &e, C &&consumer)
{
if (e.fill()) {
auto from = e.from();
if (from.fill()) {
consumer(from);
}
}
}
namespace iter
{
template <class I, class C>
void for_all_fill(I iter, C &&consumer)
{
auto e = iter.next();
while (e.is_present()) {
if (e.get().fill()) consumer(e.take());
e = iter.next();
}
}
template <class I, class C>
void find(I iter, C &&consumer)
{
auto e = iter.next();
while (e.is_present()) {
if (consumer(e.take())) {
return;
}
e = iter.next();
}
}
template <class I, class C>
void find_fill(I iter, C &&consumer)
{
auto e = iter.next();
while (e.is_present()) {
if (e.get().fill()) {
if (consumer(e.take())) {
return;
}
}
e = iter.next();
}
}
}
void load(DbAccessor &t, vector<string> &para)
{
// DbAccessor t(db);
CSVImporter imp(t, cerr);
imp.parts_mark = get_argument(para, "-d", ",")[0];
imp.parts_array_mark = get_argument(para, "-ad", ",")[0];
imp.warning =
strcasecmp(get_argument(para, "-w", "true").c_str(), "true") == 0;
imp.error =
strcasecmp(get_argument(para, "-err", "true").c_str(), "true") == 0;
// IMPORT VERTICES
auto o = take_argument(para, "-v");
while (o.is_present()) {
std::fstream file(o.get());
// cout << "Importing vertices from file: " << o.get() << endl;
auto n = imp.import_vertices(file);
cout << "Loaded " << n << " vertices from " << o.get() << endl;
o = take_argument(para, "-v");
}
// IMPORT EDGES
o = take_argument(para, "-e");
while (o.is_present()) {
std::fstream file(o.get());
// cout << "Importing edges from file: " << o.get() << endl;
auto n = imp.import_edges(file);
cout << "Loaded " << n << " edges from " << o.get() << endl;
o = take_argument(para, "-e");
}
}
void fill_with_bt(unordered_map<string, double> &values, Vertex::Accessor &com,
double weight,
PropertyFamily::PropertyType::PropertyTypeKey<ArrayString>
&prop_vertex_business_types)
{
auto bus_t = com.at(prop_vertex_business_types);
if (bus_t.is_present()) {
for (auto &bt : *bus_t.get()) {
values[bt] += weight;
}
}
}
void oportunity_employe_company(
Vertex::Accessor &va, unordered_map<string, double> &values, double weight,
PropertyFamily::PropertyType::PropertyTypeKey<ArrayString>
&prop_vertex_business_types,
const EdgeType &type_created, const EdgeType &type_works_in,
const Label &label_company)
{
iter::for_all_fill(va.in(), [&](auto opp_e) {
// cout << " oec.in()" << endl;
from_fill(opp_e, type_created, [&](auto creator) {
// cout << " type_created" << endl;
iter::for_all_fill(creator.out(), [&](auto creator_e) {
// cout << " creator.out()" <<
// endl;
to_fill(creator_e, type_works_in, label_company,
[&](auto end_com) {
// cout << " fill_bt"
// << endl;
fill_with_bt(values, end_com, weight,
prop_vertex_business_types);
});
});
});
});
}
auto query(DbAccessor &t, const Id &start_id)
{
// DbAccessor t(db);
unordered_map<string, double> values;
const Label &label_company = t.label_find_or_create("Company");
const Label &label_opportunuty = t.label_find_or_create("Opportunity");
auto type_works_in = t.type_find_or_create("Works_In");
auto type_reached_to = t.type_find_or_create("Reached_To");
auto type_partnered_with = t.type_find_or_create("Partnered_With");
auto type_interested_in = t.type_find_or_create("Interested_In");
auto type_viewed = t.type_find_or_create("Viewed");
auto type_has_match = t.type_find_or_create("Has_Match");
auto type_searched_and_clicked =
t.type_find_or_create("Searched_And_Clicked");
auto type_is_employee = t.type_find_or_create("Is_Employee");
auto type_created = t.type_find_or_create("Created");
auto prop_edge_status = t.edge_property_family_get("status")
.get(Flags::String)
.type_key<String>();
auto prop_edge_count =
t.edge_property_family_get("count").get(Flags::Int32).type_key<Int32>();
auto prop_edge_feedback = t.edge_property_family_get("feedback")
.get(Flags::String)
.type_key<String>();
auto prop_vertex_business_types =
t.vertex_property_family_get("business_types")
.get(Flags::ArrayString)
.type_key<ArrayString>();
auto osva = t.vertex_find(start_id);
if (!option_fill(osva)) {
cout << "Illegal start vertex" << endl;
return values;
}
auto start = osva.take();
// PARTNERS
iter::for_all_fill(start.out(), [&](auto e) {
// cout << "start.out()" << endl;
to_fill(e, type_partnered_with, label_company, [&](auto end_com) {
fill_with_bt(values, end_com, 0.9, prop_vertex_business_types);
});
});
// PERSONELS
iter::for_all(start.in(), [&](auto e) {
// cout << "start.in()" << endl;
fill_from_fill(e, type_works_in, [&](auto employ) {
// cout << " type_works_in" << endl;
iter::for_all_fill(employ.out(), [&](auto employ_edge) {
// cout << " employ.out()" << endl;
auto ee_type = employ_edge.edge_type();
// cout << " ee_type: " << ee_type << endl;
if (ee_type == type_interested_in) {
// cout << " type_interested_in" << endl;
// INTERESTED IN OPPORTUNUTIES
to_fill(employ_edge, label_opportunuty, [&](auto opp) {
oportunity_employe_company(
opp, values, 1, prop_vertex_business_types,
type_created, type_works_in, label_company);
});
} else if (ee_type == type_created) {
// cout << " type_created" << endl;
// CREATED OPPORTUNUTIES
to_fill(employ_edge, label_opportunuty, [&](auto opp) {
iter::for_all_fill(opp.out(), [&](auto edge) {
auto feedback = edge.at(prop_edge_feedback);
if (!feedback.is_present()) {
return;
}
auto str = feedback.get()->c_str();
double weight = 0;
if (strcasecmp(str, "like") == 0) {
weight = 1;
} else if (strcasecmp(str, "dislike") == 0) {
weight = -1;
} else {
return;
}
to_fill(edge, label_company, [&](auto end_com) {
fill_with_bt(values, end_com, weight,
prop_vertex_business_types);
});
});
});
} else {
// cout << " company" << endl;
// COMPANY
double weight = 0;
if (ee_type == type_reached_to) {
auto os = employ_edge.at(prop_edge_status);
if (!os.is_present()) {
return;
}
auto str = os.get()->c_str();
if (strcasecmp(str, "pending") == 0) {
weight = 0.5;
} else if (strcasecmp(str, "connected") == 0) {
weight = 1;
} else if (strcasecmp(str, "unreachable") == 0) {
weight = 0.5;
} else if (strcasecmp(str, "not_a_match") == 0) {
weight = -1;
} else {
cout << "unknown status: " << str << endl;
}
} else if (ee_type == type_viewed ||
ee_type == type_searched_and_clicked) {
auto count = employ_edge.at(prop_edge_count);
if (count.is_present()) {
weight = 0.01 * (*count.get());
}
}
// TARGET COMPANY
if (weight != 0) {
to_fill(employ_edge, [&](auto t_com) {
fill_with_bt(values, t_com, weight,
prop_vertex_business_types);
});
}
}
});
});
});
return values;
}
Option<Id> find_company(DbAccessor &t, int64_t cid)
{
// DbAccessor t(db);
Option<Id> found;
auto prop_vertex_company_id = t.vertex_property_family_get("company_id")
.get(Flags::Int64)
.type_key<Int64>();
const Label &label_company = t.label_find_or_create("Company");
iter::find_fill(label_company.index->for_range_exact(t), [&](auto v) {
if (v.has_label(label_company)) {
auto id = v.at(prop_vertex_company_id);
if (id.is_present()) {
if ((*id.get()) == cid) {
found = Option<Id>(v.id());
return true;
}
}
}
return false;
});
return found;
}
int main(int argc, char **argv)
{
auto para = all_arguments(argc, argv);
Db db;
{
DbAccessor t(db);
load(t, para);
t.commit();
}
{
DbAccessor t(db);
// for (int i = 0; i < 100; i++)
// make_transactions(db);
// string line;
// while(std::getline(file, line))
int n = 300 * 1000;
vector<pair<Vertex::Accessor, unordered_map<string, double>>> coll;
auto begin = clock();
int i = 0;
iter::for_all_fill(
t.label_find_or_create("Company").index->for_range_exact(t),
[&](auto v) {
if (i < n) {
coll.push_back(make_pair(v, query(t, v.id())));
}
i++;
});
n = i;
clock_t end = clock();
double elapsed_s = (double(end - begin) / CLOCKS_PER_SEC);
if (n == 0) {
cout << "No companys" << endl;
return 0;
}
cout << endl
<< "Query duration: " << (elapsed_s / n) * 1000 * 1000 << " [us]"
<< endl;
cout << "Throughput: " << 1 / (elapsed_s / n) << " [query/sec]" << endl;
auto res = coll.back(); // query(t, fid.get());
while (res.second.empty()) {
coll.pop_back();
res = coll.back();
}
auto prop_vertex_id = t.vertex_property_family_get("company_id")
.get(Flags::Int64)
.type_key<Int64>();
cout << endl
<< "Example: " << *res.first.at(prop_vertex_id).get() << endl;
for (auto e : res.second) {
cout << e.first << " = " << e.second << endl;
}
double sum = 0;
for (auto r : coll) {
for (auto e : r.second) {
sum += e.second;
}
}
cout << endl << endl << "Compiler sum " << sum << endl;
t.commit();
}
// usleep(1000 * 1000 * 60);
return 0;
}