Add regex matching to openCypher

Reviewers: mtomic, msantl

Reviewed By: mtomic

Subscribers: buda, pullbot

Differential Revision: https://phabricator.memgraph.io/D1880
This commit is contained in:
Teon Banek 2019-02-27 11:12:24 +01:00
parent ed28ed873d
commit 5084123de3
9 changed files with 194 additions and 7 deletions

View File

@ -10,6 +10,7 @@
* [Enterprise Ed.] Add new privilege, `STATS` for accessing storage info. * [Enterprise Ed.] Add new privilege, `STATS` for accessing storage info.
* Add `SHOW STORAGE INFO` feature. * Add `SHOW STORAGE INFO` feature.
* Add regular expression matching in queries.
## v0.14.0 ## v0.14.0

View File

@ -1250,6 +1250,41 @@ cpp<#
(:serialize (:slk) (:capnp)) (:serialize (:slk) (:capnp))
(:clone)) (:clone))
(lcp:define-class regex-match (expression)
((string-expr "Expression *" :scope :public
:slk-save #'slk-save-ast-pointer
:slk-load (slk-load-ast-pointer "Expression")
:capnp-type "Tree" :capnp-init nil
:capnp-save #'save-ast-pointer
:capnp-load (load-ast-pointer "Expression *"))
(regex "Expression *" :scope :public
:slk-save #'slk-save-ast-pointer
:slk-load (slk-load-ast-pointer "Expression")
:capnp-type "Tree" :capnp-init nil
:capnp-save #'save-ast-pointer
:capnp-load (load-ast-pointer "Expression *")))
(:public
#>cpp
RegexMatch() = default;
DEFVISITABLE(ExpressionVisitor<TypedValue>);
DEFVISITABLE(ExpressionVisitor<void>);
bool Accept(HierarchicalTreeVisitor &visitor) override {
if (visitor.PreVisit(*this)) {
string_expr_->Accept(visitor) && regex_->Accept(visitor);
}
return visitor.PostVisit(*this);
}
cpp<#)
(:private
#>cpp
friend class AstStorage;
RegexMatch(Expression *string_expr, Expression *regex)
: string_expr_(string_expr), regex_(regex) {}
cpp<#)
(:serialize (:slk) (:capnp))
(:clone))
(lcp:define-class named-expression (tree "::utils::Visitable<HierarchicalTreeVisitor>" (lcp:define-class named-expression (tree "::utils::Visitable<HierarchicalTreeVisitor>"
"::utils::Visitable<ExpressionVisitor<TypedValue>>" "::utils::Visitable<ExpressionVisitor<TypedValue>>"
"::utils::Visitable<ExpressionVisitor<void>>") "::utils::Visitable<ExpressionVisitor<void>>")

View File

@ -68,6 +68,7 @@ class IndexQuery;
class StreamQuery; class StreamQuery;
class InfoQuery; class InfoQuery;
class ConstraintQuery; class ConstraintQuery;
class RegexMatch;
using TreeCompositeVisitor = ::utils::CompositeVisitor< using TreeCompositeVisitor = ::utils::CompositeVisitor<
SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator, SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator,
@ -80,7 +81,7 @@ using TreeCompositeVisitor = ::utils::CompositeVisitor<
Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Create, Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Create,
Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where, Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where,
SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, Merge, SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, Merge,
Unwind>; Unwind, RegexMatch>;
using TreeLeafVisitor = using TreeLeafVisitor =
::utils::LeafVisitor<Identifier, PrimitiveLiteral, ParameterLookup>; ::utils::LeafVisitor<Identifier, PrimitiveLiteral, ParameterLookup>;
@ -105,8 +106,8 @@ class ExpressionVisitor
SubscriptOperator, ListSlicingOperator, IfOperator, UnaryPlusOperator, SubscriptOperator, ListSlicingOperator, IfOperator, UnaryPlusOperator,
UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral, UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral,
PropertyLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce, PropertyLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce,
Extract, All, Single, ParameterLookup, Identifier, PrimitiveLiteral> { Extract, All, Single, ParameterLookup, Identifier, PrimitiveLiteral,
}; RegexMatch> {};
template <class TResult> template <class TResult>
class QueryVisitor class QueryVisitor

View File

@ -1297,6 +1297,11 @@ antlrcpp::Any CypherMainVisitor::visitExpression3a(
} else if (op->IN()) { } else if (op->IN()) {
expression = static_cast<Expression *>(storage_->Create<InListOperator>( expression = static_cast<Expression *>(storage_->Create<InListOperator>(
expression, op->expression3b()->accept(this))); expression, op->expression3b()->accept(this)));
} else if (utils::StartsWith(op->getText(), "=~")) {
auto *regex_match = storage_->Create<RegexMatch>();
regex_match->string_expr_ = expression;
regex_match->regex_ = op->expression3b()->accept(this);
expression = regex_match;
} else { } else {
std::string function_name; std::string function_name;
if (op->STARTS() && op->WITH()) { if (op->STARTS() && op->WITH()) {

View File

@ -56,6 +56,7 @@ class ExpressionPrettyPrinter : public ExpressionVisitor<void> {
void Visit(PropertyLookup &op) override; void Visit(PropertyLookup &op) override;
void Visit(ParameterLookup &op) override; void Visit(ParameterLookup &op) override;
void Visit(NamedExpression &op) override; void Visit(NamedExpression &op) override;
void Visit(RegexMatch &op) override;
private: private:
std::ostream *out_; std::ostream *out_;
@ -305,6 +306,10 @@ void ExpressionPrettyPrinter::Visit(NamedExpression &op) {
PrintOperator(out_, "NamedExpression", op.name_, op.expression_); PrintOperator(out_, "NamedExpression", op.name_, op.expression_);
} }
void ExpressionPrettyPrinter::Visit(RegexMatch &op) {
PrintOperator(out_, "=~", op.string_expr_, op.regex_);
}
} // namespace } // namespace
void PrintExpression(Expression *expr, std::ostream *out) { void PrintExpression(Expression *expr, std::ostream *out) {

View File

@ -4,6 +4,7 @@
#include <algorithm> #include <algorithm>
#include <limits> #include <limits>
#include <map> #include <map>
#include <regex>
#include <vector> #include <vector>
#include "database/graph_db_accessor.hpp" #include "database/graph_db_accessor.hpp"
@ -480,6 +481,33 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> {
return ctx_->parameters.AtTokenPosition(param_lookup.token_position_); return ctx_->parameters.AtTokenPosition(param_lookup.token_position_);
} }
TypedValue Visit(RegexMatch &regex_match) override {
auto target_string_value = regex_match.string_expr_->Accept(*this);
auto regex_value = regex_match.regex_->Accept(*this);
if (target_string_value.IsNull() || regex_value.IsNull()) {
return TypedValue::Null;
}
if (regex_value.type() != TypedValue::Type::String) {
throw QueryRuntimeException(
"Regular expression must evaluate to a string, got {}.",
regex_value.type());
}
if (target_string_value.type() != TypedValue::Type::String) {
// Instead of error, we return Null which makes it compatible in case we
// use indexed lookup which filters out any non-string properties.
// Assuming a property lookup is the target_string_value.
return TypedValue::Null;
}
const auto &target_string = target_string_value.ValueString();
try {
std::regex regex(regex_value.ValueString());
return std::regex_match(target_string, regex);
} catch (const std::regex_error &e) {
throw QueryRuntimeException("Regex error in '{}': {}",
regex_value.ValueString(), e.what());
}
}
private: private:
storage::Property GetProperty(PropertyIx prop) { storage::Property GetProperty(PropertyIx prop) {
return ctx_->properties[prop.ix]; return ctx_->properties[prop.ix];

View File

@ -6,11 +6,11 @@
#include <vector> #include <vector>
#include <antlr4-runtime.h> #include <antlr4-runtime.h>
#include <capnp/message.h>
#include <gmock/gmock.h> #include <gmock/gmock.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "communication/rpc/serialization.hpp" #include "communication/rpc/serialization.hpp"
#include "query/context.hpp"
#include "query/frontend/ast/ast.hpp" #include "query/frontend/ast/ast.hpp"
#include "query/frontend/ast/ast_serialization.hpp" #include "query/frontend/ast/ast_serialization.hpp"
#include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/ast/cypher_main_visitor.hpp"
@ -18,8 +18,6 @@
#include "query/frontend/stripped.hpp" #include "query/frontend/stripped.hpp"
#include "query/typed_value.hpp" #include "query/typed_value.hpp"
#include "capnp/message.h"
namespace { namespace {
using namespace query; using namespace query;
@ -2432,4 +2430,40 @@ TYPED_TEST(CypherMainVisitorTest, TestProfileStreamQuery) {
SyntaxException); SyntaxException);
} }
TYPED_TEST(CypherMainVisitorTest, RegexMatch) {
{
TypeParam ast_generator(
"MATCH (n) WHERE n.name =~ \".*bla.*\" RETURN n.name");
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
ASSERT_TRUE(query);
ASSERT_TRUE(query->single_query_);
auto *single_query = query->single_query_;
ASSERT_EQ(single_query->clauses_.size(), 2U);
auto *match_clause = dynamic_cast<Match *>(single_query->clauses_[0]);
ASSERT_TRUE(match_clause);
auto *regex_match =
dynamic_cast<RegexMatch *>(match_clause->where_->expression_);
ASSERT_TRUE(regex_match);
ASSERT_TRUE(dynamic_cast<PropertyLookup *>(regex_match->string_expr_));
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
}
{
TypeParam ast_generator("RETURN \"text\" =~ \".*bla.*\"");
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
ASSERT_TRUE(query);
ASSERT_TRUE(query->single_query_);
auto *single_query = query->single_query_;
ASSERT_EQ(single_query->clauses_.size(), 1U);
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
ASSERT_TRUE(return_clause);
ASSERT_EQ(return_clause->body_.named_expressions.size(), 1U);
auto *named_expression = return_clause->body_.named_expressions[0];
auto *regex_match =
dynamic_cast<RegexMatch *>(named_expression->expression_);
ASSERT_TRUE(regex_match);
ast_generator.CheckLiteral(regex_match->string_expr_, "text");
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
}
}
} // namespace } // namespace

View File

@ -9,7 +9,6 @@
#include <gmock/gmock.h> #include <gmock/gmock.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "query/context.hpp"
#include "query/frontend/ast/ast.hpp" #include "query/frontend/ast/ast.hpp"
#include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/ast/cypher_main_visitor.hpp"
#include "query/frontend/opencypher/parser.hpp" #include "query/frontend/opencypher/parser.hpp"
@ -2582,4 +2581,40 @@ TYPED_TEST(CypherMainVisitorTest, DropConstraint) {
} }
} }
TYPED_TEST(CypherMainVisitorTest, RegexMatch) {
{
TypeParam ast_generator(
"MATCH (n) WHERE n.name =~ \".*bla.*\" RETURN n.name");
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
ASSERT_TRUE(query);
ASSERT_TRUE(query->single_query_);
auto *single_query = query->single_query_;
ASSERT_EQ(single_query->clauses_.size(), 2U);
auto *match_clause = dynamic_cast<Match *>(single_query->clauses_[0]);
ASSERT_TRUE(match_clause);
auto *regex_match =
dynamic_cast<RegexMatch *>(match_clause->where_->expression_);
ASSERT_TRUE(regex_match);
ASSERT_TRUE(dynamic_cast<PropertyLookup *>(regex_match->string_expr_));
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
}
{
TypeParam ast_generator("RETURN \"text\" =~ \".*bla.*\"");
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
ASSERT_TRUE(query);
ASSERT_TRUE(query->single_query_);
auto *single_query = query->single_query_;
ASSERT_EQ(single_query->clauses_.size(), 1U);
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
ASSERT_TRUE(return_clause);
ASSERT_EQ(return_clause->body_.named_expressions.size(), 1U);
auto *named_expression = return_clause->body_.named_expressions[0];
auto *regex_match =
dynamic_cast<RegexMatch *>(named_expression->expression_);
ASSERT_TRUE(regex_match);
ast_generator.CheckLiteral(regex_match->string_expr_, "text");
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
}
}
} // namespace } // namespace

View File

@ -847,6 +847,49 @@ TEST_F(ExpressionEvaluatorTest, Coalesce) {
.IsNull()); .IsNull());
} }
TEST_F(ExpressionEvaluatorTest, RegexMatchInvalidArguments) {
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LITERAL(TypedValue::Null),
LITERAL("regex")))
.IsNull());
EXPECT_TRUE(
Eval(storage.Create<RegexMatch>(LITERAL(3), LITERAL("regex"))).IsNull());
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LIST(LITERAL("string")),
LITERAL("regex")))
.IsNull());
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LITERAL("string"),
LITERAL(TypedValue::Null)))
.IsNull());
EXPECT_THROW(Eval(storage.Create<RegexMatch>(LITERAL("string"), LITERAL(42))),
QueryRuntimeException);
EXPECT_THROW(Eval(storage.Create<RegexMatch>(LITERAL("string"),
LIST(LITERAL("regex")))),
QueryRuntimeException);
}
TEST_F(ExpressionEvaluatorTest, RegexMatchInvalidRegex) {
EXPECT_THROW(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("*ext"))),
QueryRuntimeException);
EXPECT_THROW(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("[ext"))),
QueryRuntimeException);
}
TEST_F(ExpressionEvaluatorTest, RegexMatch) {
EXPECT_FALSE(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".*ex")))
.ValueBool());
EXPECT_TRUE(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".*ext")))
.ValueBool());
EXPECT_FALSE(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("[ext]")))
.ValueBool());
EXPECT_TRUE(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".+[ext]")))
.ValueBool());
}
class ExpressionEvaluatorPropertyLookup : public ExpressionEvaluatorTest { class ExpressionEvaluatorPropertyLookup : public ExpressionEvaluatorTest {
protected: protected:
std::pair<std::string, storage::Property> prop_age = std::pair<std::string, storage::Property> prop_age =