Collect symbols in variable length path

Summary:
Add EdgeList symbol type and check for redeclaration

The result of variable path is a list of edges, so the symbol type has
been added. In the future, we need to extend the type checker and the
type structure to have a generic list type.

We also currently do not support reusing an already bound symbol for a
variable path, so the SymbolGenerator will raise a redeclaration error.

Reviewers: florijan, mislav.bradac

Reviewed By: mislav.bradac

Subscribers: pullbot, lion

Differential Revision: https://phabricator.memgraph.io/D574
This commit is contained in:
Teon Banek 2017-07-25 11:57:38 +02:00
parent b59385a3ce
commit da7bfe05b7
4 changed files with 175 additions and 24 deletions

View File

@ -167,11 +167,11 @@ bool SymbolGenerator::PostVisit(Match &) {
scope_.in_match = false;
// Check variables in property maps after visiting Match, so that they can
// reference symbols out of bind order.
for (auto &ident : scope_.identifiers_in_property_maps) {
for (auto &ident : scope_.identifiers_in_match) {
if (!HasSymbol(ident->name_)) throw UnboundVariableError(ident->name_);
symbol_table_[*ident] = scope_.symbols[ident->name_];
}
scope_.identifiers_in_property_maps.clear();
scope_.identifiers_in_match.clear();
return true;
}
@ -185,7 +185,7 @@ SymbolGenerator::ReturnType SymbolGenerator::Visit(Identifier &ident) {
scope_.in_skip ? "SKIP" : "LIMIT");
}
Symbol symbol;
if (scope_.in_pattern && !scope_.in_property_map) {
if (scope_.in_pattern && scope_.in_pattern_identifier) {
// Patterns can bind new symbols or reference already bound. But there
// are the following special cases:
// 1) Patterns used to create nodes and edges cannot redeclare already
@ -203,15 +203,31 @@ SymbolGenerator::ReturnType SymbolGenerator::Visit(Identifier &ident) {
throw RedeclareVariableError(ident.name_);
}
auto type = Symbol::Type::Vertex;
if (scope_.in_edge_atom) {
type = Symbol::Type::Edge;
if (scope_.visiting_edge) {
if (scope_.visiting_edge->has_range_ && HasSymbol(ident.name_)) {
// TOOD: Support using variable paths with already obtained results from
// an existing symbol.
throw RedeclareVariableError(ident.name_);
}
if (scope_.visiting_edge->has_range_) {
type = Symbol::Type::EdgeList;
} else {
type = Symbol::Type::Edge;
}
}
symbol = GetOrCreateSymbol(ident.name_, ident.user_declared_, type);
} else if (scope_.in_pattern && scope_.in_property_map && scope_.in_match) {
// Variables in property maps during MATCH can reference symbols bound later
// in the same MATCH. We collect them here, so that they can be checked
// after visiting Match.
scope_.identifiers_in_property_maps.emplace_back(&ident);
} else if (scope_.in_pattern && !scope_.in_pattern_identifier &&
scope_.in_match) {
if (scope_.in_edge_range &&
scope_.visiting_edge->identifier_->name_ == ident.name_) {
// Prevent variable path bounds to reference the identifier which is bound
// by the variable path itself.
throw UnboundVariableError(ident.name_);
}
// Variables in property maps or bounds of variable length path during MATCH
// can reference symbols bound later in the same MATCH. We collect them
// here, so that they can be checked after visiting Match.
scope_.identifiers_in_match.emplace_back(&ident);
} else {
// Everything else references a bound symbol.
if (!HasSymbol(ident.name_)) throw UnboundVariableError(ident.name_);
@ -278,12 +294,12 @@ bool SymbolGenerator::PreVisit(NodeAtom &node_atom) {
"Cannot create node '" + node_name +
"' with labels or properties, because it is already declared.");
}
scope_.in_property_map = true;
for (auto kv : node_atom.properties_) {
kv.second->Accept(*this);
}
scope_.in_property_map = false;
scope_.in_pattern_identifier = true;
node_atom.identifier_->Accept(*this);
scope_.in_pattern_identifier = false;
return false;
}
@ -293,7 +309,7 @@ bool SymbolGenerator::PostVisit(NodeAtom &) {
}
bool SymbolGenerator::PreVisit(EdgeAtom &edge_atom) {
scope_.in_edge_atom = true;
scope_.visiting_edge = &edge_atom;
if (scope_.in_create || scope_.in_merge) {
scope_.in_create_edge = true;
if (edge_atom.edge_types_.size() != 1U) {
@ -313,17 +329,27 @@ bool SymbolGenerator::PreVisit(EdgeAtom &edge_atom) {
"edge.");
}
}
scope_.in_property_map = true;
for (auto kv : edge_atom.properties_) {
kv.second->Accept(*this);
}
scope_.in_property_map = false;
if (edge_atom.has_range_) {
scope_.in_edge_range = true;
if (edge_atom.lower_bound_) {
edge_atom.lower_bound_->Accept(*this);
}
if (edge_atom.upper_bound_) {
edge_atom.upper_bound_->Accept(*this);
}
scope_.in_edge_range = false;
}
scope_.in_pattern_identifier = true;
edge_atom.identifier_->Accept(*this);
scope_.in_pattern_identifier = false;
return false;
}
bool SymbolGenerator::PostVisit(EdgeAtom &) {
scope_.in_edge_atom = false;
scope_.visiting_edge = nullptr;
scope_.in_create_edge = false;
return true;
}

View File

@ -65,11 +65,10 @@ class SymbolGenerator : public HierarchicalTreeVisitor {
// in_node_atom.
bool in_create_node{false};
// True if creating an edge;
// shortcut for (in_create || in_merge) && in_edge_atom.
// shortcut for (in_create || in_merge) && visiting_edge.
bool in_create_edge{false};
bool in_node_atom{false};
bool in_edge_atom{false};
bool in_property_map{false};
EdgeAtom *visiting_edge{nullptr};
bool in_aggregation{false};
bool in_return{false};
bool in_with{false};
@ -78,13 +77,20 @@ class SymbolGenerator : public HierarchicalTreeVisitor {
bool in_order_by{false};
bool in_where{false};
bool in_match{false};
// True when visiting a pattern identifier, which can be reused or created
// in the pattern itself.
bool in_pattern_identifier{false};
// True when visiting range bounds of a variable path.
bool in_edge_range{false};
// True if the return/with contains an aggregation in any named expression.
bool has_aggregation{false};
// Map from variable names to symbols.
std::map<std::string, Symbol> symbols;
// Identifiers found in property maps of patterns in a single Match clause.
// They need to be checked after visiting Match.
std::vector<Identifier *> identifiers_in_property_maps;
// Identifiers found in property maps of patterns or as variable length path
// bounds in a single Match clause. They need to be checked after visiting
// Match. Identifiers created by naming vertices, edges and paths are *not*
// stored in here.
std::vector<Identifier *> identifiers_in_match;
};
bool HasSymbol(const std::string &name);

View File

@ -10,10 +10,12 @@ namespace query {
class Symbol {
public:
// This is similar to TypedValue::Type, but this has `Any` type.
enum class Type { Any, Vertex, Edge, Path, Number };
// TODO: Make a better Type structure which can store a generic List.
enum class Type { Any, Vertex, Edge, Path, Number, EdgeList };
static std::string TypeToString(Type type) {
const char *enum_string[] = {"Any", "Vertex", "Edge", "Path", "Number"};
const char *enum_string[] = {"Any", "Vertex", "Edge",
"Path", "Number", "EdgeList"};
return enum_string[static_cast<int>(type)];
}

View File

@ -868,4 +868,121 @@ TEST(TestSymbolGenerator, MatchEdgeWithIdentifierInProperty) {
EXPECT_EQ(n, symbol_table.at(*n_prop->expression_));
}
TEST(TestSymbolGenerator, MatchVariablePathUsingIdentifier) {
// Test MATCH (n) -[r *..l.prop]- (m), (l) RETURN r
Dbms dbms;
auto dba = dbms.active();
auto prop = dba->property("prop");
AstTreeStorage storage;
auto edge = EDGE("r");
edge->has_range_ = true;
auto l_prop = PROPERTY_LOOKUP("l", prop);
edge->upper_bound_ = l_prop;
auto node_l = NODE("l");
auto query = QUERY(
MATCH(PATTERN(NODE("n"), edge, NODE("m")), PATTERN(node_l)), RETURN("r"));
SymbolTable symbol_table;
SymbolGenerator symbol_generator(symbol_table);
query->Accept(symbol_generator);
// Symbols for `n`, `r`, `m`, `l` and implicit in RETURN `r AS r`
EXPECT_EQ(symbol_table.max_position(), 5);
auto l = symbol_table.at(*node_l->identifier_);
EXPECT_EQ(l, symbol_table.at(*l_prop->expression_));
auto r = symbol_table.at(*edge->identifier_);
EXPECT_EQ(r.type(), Symbol::Type::EdgeList);
}
TEST(TestSymbolGenerator, MatchVariablePathUsingUnboundIdentifier) {
// Test MATCH (n) -[r *..l.prop]- (m) MATCH (l) RETURN r
Dbms dbms;
auto dba = dbms.active();
auto prop = dba->property("prop");
AstTreeStorage storage;
auto edge = EDGE("r");
edge->has_range_ = true;
auto l_prop = PROPERTY_LOOKUP("l", prop);
edge->upper_bound_ = l_prop;
auto node_l = NODE("l");
auto query = QUERY(MATCH(PATTERN(NODE("n"), edge, NODE("m"))),
MATCH(PATTERN(node_l)), RETURN("r"));
SymbolTable symbol_table;
SymbolGenerator symbol_generator(symbol_table);
EXPECT_THROW(query->Accept(symbol_generator), SemanticException);
}
TEST(TestSymbolGenerator, CreateVariablePath) {
// Test CREATE (n) -[r *]-> (m) raises a SemanticException, since variable
// paths cannot be created.
AstTreeStorage storage;
auto edge = EDGE("r", EdgeAtom::Direction::OUT);
edge->has_range_ = true;
auto query = QUERY(CREATE(PATTERN(NODE("n"), edge, NODE("m"))));
SymbolTable symbol_table;
SymbolGenerator symbol_generator(symbol_table);
EXPECT_THROW(query->Accept(symbol_generator), SemanticException);
}
TEST(TestSymbolGenerator, MergeVariablePath) {
// Test MERGE (n) -[r *]-> (m) raises a SemanticException, since variable
// paths cannot be created.
AstTreeStorage storage;
auto edge = EDGE("r", EdgeAtom::Direction::OUT);
edge->has_range_ = true;
auto query = QUERY(MERGE(PATTERN(NODE("n"), edge, NODE("m"))));
SymbolTable symbol_table;
SymbolGenerator symbol_generator(symbol_table);
EXPECT_THROW(query->Accept(symbol_generator), SemanticException);
}
TEST(TestSymbolGenerator, RedeclareVariablePath) {
// Test MATCH (n) -[n*]-> (m) RETURN n raises RedeclareVariableError.
// This is just a temporary solution, before we add the support for using
// variable paths with already declared symbols. In the future, this test
// should be changed to check for type errors.
AstTreeStorage storage;
auto edge = EDGE("n", EdgeAtom::Direction::OUT);
edge->has_range_ = true;
auto query = QUERY(MATCH(PATTERN(NODE("n"), edge, NODE("m"))), RETURN("n"));
SymbolTable symbol_table;
SymbolGenerator symbol_generator(symbol_table);
EXPECT_THROW(query->Accept(symbol_generator), RedeclareVariableError);
}
TEST(TestSymbolGenerator, VariablePathSameIdentifier) {
// Test MATCH (n) -[r *r.prop..]-> (m) RETURN r raises UnboundVariableError.
// `r` cannot be used inside the range expression, since it is bound by the
// variable expansion itself.
Dbms dbms;
auto dba = dbms.active();
auto prop = dba->property("prop");
AstTreeStorage storage;
auto edge = EDGE("r", EdgeAtom::Direction::OUT);
edge->has_range_ = true;
edge->lower_bound_ = PROPERTY_LOOKUP("r", prop);
auto query = QUERY(MATCH(PATTERN(NODE("n"), edge, NODE("m"))), RETURN("r"));
SymbolTable symbol_table;
SymbolGenerator symbol_generator(symbol_table);
EXPECT_THROW(query->Accept(symbol_generator), UnboundVariableError);
}
TEST(TestSymbolGenerator, MatchPropertySameIdentifier) {
// Test MATCH (n {prop: n.prop}) RETURN n
// Using `n.prop` needs to work, because filters are run after the value for
// matched symbol is obtained.
Dbms dbms;
auto dba = dbms.active();
auto prop = dba->property("prop");
AstTreeStorage storage;
auto node_n = NODE("n");
auto n_prop = PROPERTY_LOOKUP("n", prop);
node_n->properties_[prop] = n_prop;
auto query = QUERY(MATCH(PATTERN(node_n)), RETURN("n"));
SymbolTable symbol_table;
SymbolGenerator symbol_generator(symbol_table);
query->Accept(symbol_generator);
auto n = symbol_table.at(*node_n->identifier_);
EXPECT_EQ(n, symbol_table.at(*n_prop->expression_));
}
} // namespace