Split a single dump query into multiple queries
Summary: Prior to this change, a huge query was returned by DumpGenerator that dumped the entire graph. This change split the single query to multiple queries, each dumping a single vertex/edge. For easier vertex matching when dumping edge, an internal property id is assigned to each vertex and removed after the whole graph is dumped. Reviewers: teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2038
This commit is contained in:
parent
bf4bf7a3bd
commit
5f24342502
@ -1,6 +1,7 @@
|
||||
#include "database/single_node/dump.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -15,6 +16,13 @@ namespace database {
|
||||
|
||||
namespace {
|
||||
|
||||
// Property that is used to make a difference among vertices. It is added to
|
||||
// property set of vertices to match edges and removed after the entire graph
|
||||
// is built.
|
||||
// TODO(tsabolcec): We should create index for that property for faster
|
||||
// matching.
|
||||
const char *kInternalPropertyId = "__mg_id__";
|
||||
|
||||
void DumpPropertyValue(std::ostream *os, const PropertyValue &value) {
|
||||
switch (value.type()) {
|
||||
case PropertyValue::Type::Null:
|
||||
@ -57,8 +65,13 @@ void DumpPropertyValue(std::ostream *os, const PropertyValue &value) {
|
||||
}
|
||||
|
||||
void DumpProperties(std::ostream *os, GraphDbAccessor *dba,
|
||||
const PropertyValueStore &store) {
|
||||
const PropertyValueStore &store,
|
||||
std::optional<uint64_t> property_id = std::nullopt) {
|
||||
*os << "{";
|
||||
if (property_id) {
|
||||
*os << kInternalPropertyId << ": " << *property_id;
|
||||
if (store.size() > 0) *os << ", ";
|
||||
}
|
||||
utils::PrintIterable(*os, store, ", ", [&dba](auto &os, const auto &kv) {
|
||||
os << dba->PropertyName(kv.first) << ": ";
|
||||
DumpPropertyValue(&os, kv.second);
|
||||
@ -68,62 +81,67 @@ void DumpProperties(std::ostream *os, GraphDbAccessor *dba,
|
||||
|
||||
void DumpVertex(std::ostream *os, GraphDbAccessor *dba,
|
||||
const VertexAccessor &vertex) {
|
||||
*os << "(n" << vertex.gid();
|
||||
*os << "CREATE (";
|
||||
for (const auto &label : vertex.labels()) {
|
||||
*os << ":" << dba->LabelName(label);
|
||||
}
|
||||
const auto &props = vertex.Properties();
|
||||
if (props.size() > 0) {
|
||||
*os << " ";
|
||||
DumpProperties(os, dba, props);
|
||||
}
|
||||
*os << ")";
|
||||
if (!vertex.labels().empty()) *os << " ";
|
||||
DumpProperties(os, dba, vertex.Properties(),
|
||||
std::optional<uint64_t>(vertex.CypherId()));
|
||||
*os << ");";
|
||||
}
|
||||
|
||||
void DumpEdge(std::ostream *os, GraphDbAccessor *dba,
|
||||
const EdgeAccessor &edge) {
|
||||
*os << "(n" << edge.from().gid() << ")-[";
|
||||
*os << "MATCH (u), (v)";
|
||||
*os << " WHERE ";
|
||||
*os << "u." << kInternalPropertyId << " = " << edge.from().CypherId();
|
||||
*os << " AND ";
|
||||
*os << "v." << kInternalPropertyId << " = " << edge.to().CypherId() << " ";
|
||||
*os << "CREATE (u)-[";
|
||||
*os << ":" << dba->EdgeTypeName(edge.EdgeType());
|
||||
const auto &props = edge.Properties();
|
||||
if (props.size() > 0) {
|
||||
*os << " ";
|
||||
DumpProperties(os, dba, props);
|
||||
DumpProperties(os, dba, edge.Properties());
|
||||
}
|
||||
*os << "]->(n" << edge.to().gid() << ")";
|
||||
*os << "]->(v);";
|
||||
}
|
||||
|
||||
void DumpInternalIndexCleanup(std::ostream *os) {
|
||||
// TODO(tsabolcec): Don't forget to drop the index by internal id.
|
||||
*os << "MATCH (u) REMOVE u." << kInternalPropertyId << ";";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
DumpGenerator::DumpGenerator(GraphDbAccessor *dba) : dba_(dba), first_(true) {
|
||||
CypherDumpGenerator::CypherDumpGenerator(GraphDbAccessor *dba)
|
||||
: dba_(dba), cleaned_internals_(false) {
|
||||
CHECK(dba);
|
||||
vertices_state_.emplace(dba->Vertices(false));
|
||||
edges_state_.emplace(dba->Edges(false));
|
||||
}
|
||||
|
||||
bool DumpGenerator::NextQuery(std::ostream *os) {
|
||||
if (vertices_state_->ReachedEnd() && edges_state_->ReachedEnd()) return false;
|
||||
|
||||
if (first_) {
|
||||
first_ = false;
|
||||
*os << "CREATE ";
|
||||
} else {
|
||||
*os << ", ";
|
||||
}
|
||||
|
||||
bool CypherDumpGenerator::NextQuery(std::ostream *os) {
|
||||
if (!vertices_state_->ReachedEnd()) {
|
||||
DumpVertex(os, dba_, *vertices_state_->GetCurrentAndAdvance());
|
||||
return true;
|
||||
} else if (!edges_state_->ReachedEnd()) {
|
||||
DumpEdge(os, dba_, *edges_state_->GetCurrentAndAdvance());
|
||||
return true;
|
||||
} else if (!vertices_state_->Empty() && !cleaned_internals_) {
|
||||
DumpInternalIndexCleanup(os);
|
||||
cleaned_internals_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (vertices_state_->ReachedEnd() && edges_state_->ReachedEnd()) *os << ";";
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void DumpToCypher(std::ostream *os, GraphDbAccessor *dba) {
|
||||
CHECK(os && dba);
|
||||
|
||||
DumpGenerator dump(dba);
|
||||
CypherDumpGenerator dump(dba);
|
||||
while (dump.NextQuery(os)) continue;
|
||||
}
|
||||
|
||||
|
@ -6,16 +6,14 @@
|
||||
|
||||
namespace database {
|
||||
|
||||
/// Class which generates parts of openCypher query which can be used to dump
|
||||
/// the database state.
|
||||
/// Class which generates sequence of openCypher queries which can be used to
|
||||
/// dump the database state.
|
||||
///
|
||||
/// Currently, all parts combined form a single query which dumps an entire
|
||||
/// graph (vertices and edges). Since query can be quite long for larger graphs,
|
||||
/// the graph should be split in multiple queries in the future. Indexes,
|
||||
/// constraints, roles, etc. are currently not dumped.
|
||||
class DumpGenerator {
|
||||
/// Currently, only vertices and edges are being dumped, one-by-one in multiple
|
||||
/// queries. Indices keys, constraints, roles, etc. are currently not dumped.
|
||||
class CypherDumpGenerator {
|
||||
public:
|
||||
explicit DumpGenerator(GraphDbAccessor *dba);
|
||||
explicit CypherDumpGenerator(GraphDbAccessor *dba);
|
||||
|
||||
bool NextQuery(std::ostream *os);
|
||||
|
||||
@ -27,7 +25,8 @@ class DumpGenerator {
|
||||
explicit ContainerState(TContainer container)
|
||||
: container_(std::move(container)),
|
||||
current_(container_.begin()),
|
||||
end_(container_.end()) {}
|
||||
end_(container_.end()),
|
||||
empty_(current_ == end_) {}
|
||||
|
||||
auto GetCurrentAndAdvance() {
|
||||
auto to_be_returned = current_;
|
||||
@ -37,19 +36,22 @@ class DumpGenerator {
|
||||
|
||||
bool ReachedEnd() const { return current_ == end_; }
|
||||
|
||||
// Returns true iff the container is empty.
|
||||
bool Empty() const { return empty_; }
|
||||
|
||||
private:
|
||||
TContainer container_;
|
||||
|
||||
using TIterator = decltype(container_.begin());
|
||||
TIterator current_;
|
||||
TIterator end_;
|
||||
|
||||
bool empty_;
|
||||
};
|
||||
|
||||
GraphDbAccessor *dba_;
|
||||
|
||||
// Boolean which indicates if the `NextQuery` method is called for the first
|
||||
// time.
|
||||
bool first_;
|
||||
bool cleaned_internals_;
|
||||
|
||||
std::optional<ContainerState<decltype(dba_->Vertices(false))>>
|
||||
vertices_state_;
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include "query/typed_value.hpp"
|
||||
#include "storage/common/types/property_value.hpp"
|
||||
|
||||
using database::CypherDumpGenerator;
|
||||
|
||||
const char *kPropertyId = "property_id";
|
||||
|
||||
// A helper struct that contains info about database that is used to compare
|
||||
@ -69,6 +71,14 @@ bool operator==(const DatabaseState &first, const DatabaseState &second) {
|
||||
return first.vertices == second.vertices && first.edges == second.edges;
|
||||
}
|
||||
|
||||
// Returns next query if the end is not reached, otherwise returns an empty
|
||||
// string.
|
||||
std::string DumpNext(CypherDumpGenerator *dump) {
|
||||
std::ostringstream oss;
|
||||
if (dump->NextQuery(&oss)) return oss.str();
|
||||
return "";
|
||||
}
|
||||
|
||||
class DatabaseEnvironment {
|
||||
public:
|
||||
std::string DumpStr() {
|
||||
@ -85,6 +95,8 @@ class DatabaseEnvironment {
|
||||
dba.Commit();
|
||||
}
|
||||
|
||||
database::GraphDbAccessor Access() { return db_.Access(); }
|
||||
|
||||
VertexAccessor CreateVertex(const std::vector<std::string> &labels,
|
||||
const std::map<std::string, PropertyValue> &props,
|
||||
bool add_property_id = true) {
|
||||
@ -163,31 +175,53 @@ class DatabaseEnvironment {
|
||||
|
||||
TEST(DumpTest, EmptyGraph) {
|
||||
DatabaseEnvironment db;
|
||||
EXPECT_EQ("", db.DumpStr());
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, SingleVertex) {
|
||||
DatabaseEnvironment db;
|
||||
db.CreateVertex({}, {}, false);
|
||||
EXPECT_EQ(db.DumpStr(), "CREATE (n0);");
|
||||
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
|
||||
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, VertexWithSingleLabel) {
|
||||
DatabaseEnvironment db;
|
||||
db.CreateVertex({"Label1"}, {}, false);
|
||||
EXPECT_EQ(db.DumpStr(), "CREATE (n0:Label1);");
|
||||
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE (:Label1 {__mg_id__: 0});");
|
||||
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, VertexWithMultipleLabels) {
|
||||
DatabaseEnvironment db;
|
||||
db.CreateVertex({"Label1", "Label2"}, {}, false);
|
||||
EXPECT_EQ(db.DumpStr(), "CREATE (n0:Label1:Label2);");
|
||||
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE (:Label1:Label2 {__mg_id__: 0});");
|
||||
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, VertexWithSingleProperty) {
|
||||
DatabaseEnvironment db;
|
||||
db.CreateVertex({}, {{"prop", PropertyValue(42)}}, false);
|
||||
EXPECT_EQ(db.DumpStr(), "CREATE (n0 {prop: 42});");
|
||||
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0, prop: 42});");
|
||||
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, MultipleVertices) {
|
||||
@ -195,7 +229,14 @@ TEST(DumpTest, MultipleVertices) {
|
||||
db.CreateVertex({}, {}, false);
|
||||
db.CreateVertex({}, {}, false);
|
||||
db.CreateVertex({}, {}, false);
|
||||
EXPECT_EQ(db.DumpStr(), "CREATE (n0), (n1), (n2);");
|
||||
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 1});");
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 2});");
|
||||
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, SingleEdge) {
|
||||
@ -203,7 +244,16 @@ TEST(DumpTest, SingleEdge) {
|
||||
auto u = db.CreateVertex({}, {}, false);
|
||||
auto v = db.CreateVertex({}, {}, false);
|
||||
db.CreateEdge(u, v, "EdgeType", {}, false);
|
||||
EXPECT_EQ(db.DumpStr(), "CREATE (n0), (n1), (n0)-[:EdgeType]->(n1);");
|
||||
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 1});");
|
||||
EXPECT_EQ(DumpNext(&dump),
|
||||
"MATCH (u), (v) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE "
|
||||
"(u)-[:EdgeType]->(v);");
|
||||
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, MultipleEdges) {
|
||||
@ -214,10 +264,23 @@ TEST(DumpTest, MultipleEdges) {
|
||||
db.CreateEdge(u, v, "EdgeType", {}, false);
|
||||
db.CreateEdge(v, u, "EdgeType", {}, false);
|
||||
db.CreateEdge(v, w, "EdgeType", {}, false);
|
||||
const char *expected =
|
||||
"CREATE (n0), (n1), (n2), (n0)-[:EdgeType]->(n1), "
|
||||
"(n1)-[:EdgeType]->(n0), (n1)-[:EdgeType]->(n2);";
|
||||
EXPECT_EQ(db.DumpStr(), expected);
|
||||
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 1});");
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 2});");
|
||||
EXPECT_EQ(DumpNext(&dump),
|
||||
"MATCH (u), (v) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE "
|
||||
"(u)-[:EdgeType]->(v);");
|
||||
EXPECT_EQ(DumpNext(&dump),
|
||||
"MATCH (u), (v) WHERE u.__mg_id__ = 1 AND v.__mg_id__ = 0 CREATE "
|
||||
"(u)-[:EdgeType]->(v);");
|
||||
EXPECT_EQ(DumpNext(&dump),
|
||||
"MATCH (u), (v) WHERE u.__mg_id__ = 1 AND v.__mg_id__ = 2 CREATE "
|
||||
"(u)-[:EdgeType]->(v);");
|
||||
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, EdgeWithProperties) {
|
||||
@ -225,8 +288,16 @@ TEST(DumpTest, EdgeWithProperties) {
|
||||
auto u = db.CreateVertex({}, {}, false);
|
||||
auto v = db.CreateVertex({}, {}, false);
|
||||
db.CreateEdge(u, v, "EdgeType", {{"prop", PropertyValue(13)}}, false);
|
||||
EXPECT_EQ(db.DumpStr(),
|
||||
"CREATE (n0), (n1), (n0)-[:EdgeType {prop: 13}]->(n1);");
|
||||
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 0});");
|
||||
EXPECT_EQ(DumpNext(&dump), "CREATE ({__mg_id__: 1});");
|
||||
EXPECT_EQ(DumpNext(&dump),
|
||||
"MATCH (u), (v) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE "
|
||||
"(u)-[:EdgeType {prop: 13}]->(v);");
|
||||
EXPECT_EQ(DumpNext(&dump), "MATCH (u) REMOVE u.__mg_id__;");
|
||||
EXPECT_EQ(DumpNext(&dump), "");
|
||||
}
|
||||
|
||||
TEST(DumpTest, CheckStateVertexWithMultipleProperties) {
|
||||
@ -235,8 +306,14 @@ TEST(DumpTest, CheckStateVertexWithMultipleProperties) {
|
||||
{"nested1", PropertyValue(1337)}, {"nested2", PropertyValue(3.14)}};
|
||||
db.CreateVertex({"Label1", "Label2"},
|
||||
{{"prop1", prop1}, {"prop2", PropertyValue("$'\t'")}});
|
||||
|
||||
DatabaseEnvironment db_dump;
|
||||
db_dump.Execute(db.DumpStr());
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
std::string cmd;
|
||||
while (!(cmd = DumpNext(&dump)).empty()) {
|
||||
db_dump.Execute(cmd);
|
||||
}
|
||||
EXPECT_EQ(db.GetState(), db_dump.GetState());
|
||||
}
|
||||
|
||||
@ -254,7 +331,13 @@ TEST(DumpTest, CheckStateSimpleGraph) {
|
||||
db.CreateEdge(z, u, "Knows", {});
|
||||
db.CreateEdge(w, z, "Knows", {{"how", "school"}});
|
||||
db.CreateEdge(w, z, "Likes", {{"how", "very much"}});
|
||||
|
||||
DatabaseEnvironment db_dump;
|
||||
db_dump.Execute(db.DumpStr());
|
||||
auto dba = db.Access();
|
||||
CypherDumpGenerator dump(&dba);
|
||||
std::string cmd;
|
||||
while (!(cmd = DumpNext(&dump)).empty()) {
|
||||
db_dump.Execute(cmd);
|
||||
}
|
||||
EXPECT_EQ(db.GetState(), db_dump.GetState());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user