Split a single dump query into multiple queries

Summary:
Prior to this change, a huge query was returned by DumpGenerator that
dumped the entire graph. This change split the single query to multiple
queries, each dumping a single vertex/edge. For easier vertex matching
when dumping edge, an internal property id is assigned to each vertex and
removed after the whole graph is dumped.

Reviewers: teon.banek

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2038
This commit is contained in:
Tonko Sabolcec 2019-05-14 11:44:01 +02:00
parent bf4bf7a3bd
commit 5f24342502
3 changed files with 155 additions and 52 deletions

View File

@ -1,6 +1,7 @@
#include "database/single_node/dump.hpp"
#include <map>
#include <optional>
#include <ostream>
#include <utility>
#include <vector>
@ -15,6 +16,13 @@ namespace database {
namespace {
// Property that is used to make a difference among vertices. It is added to
// property set of vertices to match edges and removed after the entire graph
// is built.
// TODO(tsabolcec): We should create index for that property for faster
// matching.
const char *kInternalPropertyId = "__mg_id__";
void DumpPropertyValue(std::ostream *os, const PropertyValue &value) {
switch (value.type()) {
case PropertyValue::Type::Null:
@ -57,8 +65,13 @@ void DumpPropertyValue(std::ostream *os, const PropertyValue &value) {
}
void DumpProperties(std::ostream *os, GraphDbAccessor *dba,
const PropertyValueStore &store) {
const PropertyValueStore &store,
std::optional<uint64_t> property_id = std::nullopt) {
*os << "{";
if (property_id) {
*os << kInternalPropertyId << ": " << *property_id;
if (store.size() > 0) *os << ", ";
}
utils::PrintIterable(*os, store, ", ", [&dba](auto &os, const auto &kv) {
os << dba->PropertyName(kv.first) << ": ";
DumpPropertyValue(&os, kv.second);
@ -68,62 +81,67 @@ void DumpProperties(std::ostream *os, GraphDbAccessor *dba,
void DumpVertex(std::ostream *os, GraphDbAccessor *dba,
const VertexAccessor &vertex) {
*os << "(n" << vertex.gid();
*os << "CREATE (";
for (const auto &label : vertex.labels()) {
*os << ":" << dba->LabelName(label);
}
const auto &props = vertex.Properties();
if (props.size() > 0) {
*os << " ";
DumpProperties(os, dba, props);
}
*os << ")";
if (!vertex.labels().empty()) *os << " ";
DumpProperties(os, dba, vertex.Properties(),
std::optional<uint64_t>(vertex.CypherId()));
*os << ");";
}
void DumpEdge(std::ostream *os, GraphDbAccessor *dba,
const EdgeAccessor &edge) {
*os << "(n" << edge.from().gid() << ")-[";
*os << "MATCH (u), (v)";
*os << " WHERE ";
*os << "u." << kInternalPropertyId << " = " << edge.from().CypherId();
*os << " AND ";
*os << "v." << kInternalPropertyId << " = " << edge.to().CypherId() << " ";
*os << "CREATE (u)-[";
*os << ":" << dba->EdgeTypeName(edge.EdgeType());
const auto &props = edge.Properties();
if (props.size() > 0) {
*os << " ";
DumpProperties(os, dba, props);
DumpProperties(os, dba, edge.Properties());
}
*os << "]->(n" << edge.to().gid() << ")";
*os << "]->(v);";
}
void DumpInternalIndexCleanup(std::ostream *os) {
// TODO(tsabolcec): Don't forget to drop the index by internal id.
*os << "MATCH (u) REMOVE u." << kInternalPropertyId << ";";
}
} // namespace
DumpGenerator::DumpGenerator(GraphDbAccessor *dba) : dba_(dba), first_(true) {
CypherDumpGenerator::CypherDumpGenerator(GraphDbAccessor *dba)
: dba_(dba), cleaned_internals_(false) {
CHECK(dba);
vertices_state_.emplace(dba->Vertices(false));
edges_state_.emplace(dba->Edges(false));
}
bool DumpGenerator::NextQuery(std::ostream *os) {
if (vertices_state_->ReachedEnd() && edges_state_->ReachedEnd()) return false;
if (first_) {
first_ = false;
*os << "CREATE ";
} else {
*os << ", ";
}
bool CypherDumpGenerator::NextQuery(std::ostream *os) {
if (!vertices_state_->ReachedEnd()) {
DumpVertex(os, dba_, *vertices_state_->GetCurrentAndAdvance());
return true;
} else if (!edges_state_->ReachedEnd()) {
DumpEdge(os, dba_, *edges_state_->GetCurrentAndAdvance());
return true;
} else if (!vertices_state_->Empty() && !cleaned_internals_) {
DumpInternalIndexCleanup(os);
cleaned_internals_ = true;
return true;
}
if (vertices_state_->ReachedEnd() && edges_state_->ReachedEnd()) *os << ";";
return true;
return false;
}
void DumpToCypher(std::ostream *os, GraphDbAccessor *dba) {
CHECK(os && dba);
DumpGenerator dump(dba);
CypherDumpGenerator dump(dba);
while (dump.NextQuery(os)) continue;
}

View File

@ -6,16 +6,14 @@
namespace database {
/// Class which generates parts of openCypher query which can be used to dump
/// the database state.
/// Class which generates sequence of openCypher queries which can be used to
/// dump the database state.
///
/// Currently, all parts combined form a single query which dumps an entire
/// graph (vertices and edges). Since query can be quite long for larger graphs,
/// the graph should be split in multiple queries in the future. Indexes,
/// constraints, roles, etc. are currently not dumped.
class DumpGenerator {
/// Currently, only vertices and edges are being dumped, one-by-one in multiple
/// queries. Indices keys, constraints, roles, etc. are currently not dumped.
class CypherDumpGenerator {
public:
explicit DumpGenerator(GraphDbAccessor *dba);
explicit CypherDumpGenerator(GraphDbAccessor *dba);
bool NextQuery(std::ostream *os);
@ -27,7 +25,8 @@ class DumpGenerator {
explicit ContainerState(TContainer container)
: container_(std::move(container)),
current_(container_.begin()),
end_(container_.end()) {}
end_(container_.end()),
empty_(current_ == end_) {}
auto GetCurrentAndAdvance() {
auto to_be_returned = current_;
@ -37,19 +36,22 @@ class DumpGenerator {
bool ReachedEnd() const { return current_ == end_; }
// Returns true iff the container is empty.
bool Empty() const { return empty_; }
private:
TContainer container_;
using TIterator = decltype(container_.begin());
TIterator current_;
TIterator end_;
bool empty_;
};
GraphDbAccessor *dba_;
// Boolean which indicates if the `NextQuery` method is called for the first
// time.
bool first_;
bool cleaned_internals_;
std::optional<ContainerState<decltype(dba_->Vertices(false))>>
vertices_state_;

View File

@ -14,6 +14,8 @@
#include "query/typed_value.hpp"
#include "storage/common/types/property_value.hpp"
using database::CypherDumpGenerator;
const char *kPropertyId = "property_id";
// A helper struct that contains info about database that is used to compare
@ -69,6 +71,14 @@ bool operator==(const DatabaseState &first, const DatabaseState &second) {
return first.vertices == second.vertices && first.edges == second.edges;
}
// Returns next query if the end is not reached, otherwise returns an empty
// string.
std::string DumpNext(CypherDumpGenerator *dump) {
std::ostringstream oss;
if (dump->NextQuery(&oss)) return oss.str();
return "";
}
class DatabaseEnvironment {
public:
std::string DumpStr() {
@ -85,6 +95,8 @@ class DatabaseEnvironment {
dba.Commit();
}
database::GraphDbAccessor Access() { return db_.Access(); }
VertexAccessor CreateVertex(const std::vector<std::string> &labels,
const std::map<std::string, PropertyValue> &props,
bool add_property_id = true) {
@ -163,31 +175,53 @@ class DatabaseEnvironment {
TEST(DumpTest, EmptyGraph) {
DatabaseEnvironment db;
EXPECT_EQ("", db.DumpStr());
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, SingleVertex) {
DatabaseEnvironment db;
db.CreateVertex({}, {}, false);
EXPECT_EQ(db.DumpStr(), "CREATE (n0);");
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, VertexWithSingleLabel) {
DatabaseEnvironment db;
db.CreateVertex({"Label1"}, {}, false);
EXPECT_EQ(db.DumpStr(), "CREATE (n0:Label1);");
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "CREATE (:Label1 {__mg_id__: 0});");
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, VertexWithMultipleLabels) {
DatabaseEnvironment db;
db.CreateVertex({"Label1", "Label2"}, {}, false);
EXPECT_EQ(db.DumpStr(), "CREATE (n0:Label1:Label2);");
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "CREATE (:Label1:Label2 {__mg_id__: 0});");
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, VertexWithSingleProperty) {
DatabaseEnvironment db;
db.CreateVertex({}, {{"prop", PropertyValue(42)}}, false);
EXPECT_EQ(db.DumpStr(), "CREATE (n0 {prop: 42});");
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0, prop: 42});");
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, MultipleVertices) {
@ -195,7 +229,14 @@ TEST(DumpTest, MultipleVertices) {
db.CreateVertex({}, {}, false);
db.CreateVertex({}, {}, false);
db.CreateVertex({}, {}, false);
EXPECT_EQ(db.DumpStr(), "CREATE (n0), (n1), (n2);");
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 1});");
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 2});");
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, SingleEdge) {
@ -203,7 +244,16 @@ TEST(DumpTest, SingleEdge) {
auto u = db.CreateVertex({}, {}, false);
auto v = db.CreateVertex({}, {}, false);
db.CreateEdge(u, v, "EdgeType", {}, false);
EXPECT_EQ(db.DumpStr(), "CREATE (n0), (n1), (n0)-[:EdgeType]->(n1);");
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 1});");
EXPECT_EQ(DumpNext(&dump),
"MATCH (u), (v) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE "
"(u)-[:EdgeType]->(v);");
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, MultipleEdges) {
@ -214,10 +264,23 @@ TEST(DumpTest, MultipleEdges) {
db.CreateEdge(u, v, "EdgeType", {}, false);
db.CreateEdge(v, u, "EdgeType", {}, false);
db.CreateEdge(v, w, "EdgeType", {}, false);
const char *expected =
"CREATE (n0), (n1), (n2), (n0)-[:EdgeType]->(n1), "
"(n1)-[:EdgeType]->(n0), (n1)-[:EdgeType]->(n2);";
EXPECT_EQ(db.DumpStr(), expected);
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 1});");
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 2});");
EXPECT_EQ(DumpNext(&dump),
"MATCH (u), (v) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE "
"(u)-[:EdgeType]->(v);");
EXPECT_EQ(DumpNext(&dump),
"MATCH (u), (v) WHERE u.__mg_id__ = 1 AND v.__mg_id__ = 0 CREATE "
"(u)-[:EdgeType]->(v);");
EXPECT_EQ(DumpNext(&dump),
"MATCH (u), (v) WHERE u.__mg_id__ = 1 AND v.__mg_id__ = 2 CREATE "
"(u)-[:EdgeType]->(v);");
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, EdgeWithProperties) {
@ -225,8 +288,16 @@ TEST(DumpTest, EdgeWithProperties) {
auto u = db.CreateVertex({}, {}, false);
auto v = db.CreateVertex({}, {}, false);
db.CreateEdge(u, v, "EdgeType", {{"prop", PropertyValue(13)}}, false);
EXPECT_EQ(db.DumpStr(),
"CREATE (n0), (n1), (n0)-[:EdgeType {prop: 13}]->(n1);");
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 1});");
EXPECT_EQ(DumpNext(&dump),
"MATCH (u), (v) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE "
"(u)-[:EdgeType {prop: 13}]->(v);");
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
EXPECT_EQ(DumpNext(&dump), "");
}
TEST(DumpTest, CheckStateVertexWithMultipleProperties) {
@ -235,8 +306,14 @@ TEST(DumpTest, CheckStateVertexWithMultipleProperties) {
{"nested1", PropertyValue(1337)}, {"nested2", PropertyValue(3.14)}};
db.CreateVertex({"Label1", "Label2"},
{{"prop1", prop1}, {"prop2", PropertyValue("$'\t'")}});
DatabaseEnvironment db_dump;
db_dump.Execute(db.DumpStr());
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
std::string cmd;
while (!(cmd = DumpNext(&dump)).empty()) {
db_dump.Execute(cmd);
}
EXPECT_EQ(db.GetState(), db_dump.GetState());
}
@ -254,7 +331,13 @@ TEST(DumpTest, CheckStateSimpleGraph) {
db.CreateEdge(z, u, "Knows", {});
db.CreateEdge(w, z, "Knows", {{"how", "school"}});
db.CreateEdge(w, z, "Likes", {{"how", "very much"}});
DatabaseEnvironment db_dump;
db_dump.Execute(db.DumpStr());
auto dba = db.Access();
CypherDumpGenerator dump(&dba);
std::string cmd;
while (!(cmd = DumpNext(&dump)).empty()) {
db_dump.Execute(cmd);
}
EXPECT_EQ(db.GetState(), db_dump.GetState());
}