Add regex matching to openCypher

Reviewers: mtomic, msantl

Reviewed By: mtomic

Subscribers: buda, pullbot

Differential Revision: https://phabricator.memgraph.io/D1880
This commit is contained in:
Teon Banek 2019-02-27 11:12:24 +01:00
parent ed28ed873d
commit 5084123de3
9 changed files with 194 additions and 7 deletions

View File

@ -10,6 +10,7 @@
* [Enterprise Ed.] Add new privilege, `STATS` for accessing storage info.
* Add `SHOW STORAGE INFO` feature.
* Add regular expression matching in queries.
## v0.14.0

View File

@ -1250,6 +1250,41 @@ cpp<#
(:serialize (:slk) (:capnp))
(:clone))
(lcp:define-class regex-match (expression)
((string-expr "Expression *" :scope :public
:slk-save #'slk-save-ast-pointer
:slk-load (slk-load-ast-pointer "Expression")
:capnp-type "Tree" :capnp-init nil
:capnp-save #'save-ast-pointer
:capnp-load (load-ast-pointer "Expression *"))
(regex "Expression *" :scope :public
:slk-save #'slk-save-ast-pointer
:slk-load (slk-load-ast-pointer "Expression")
:capnp-type "Tree" :capnp-init nil
:capnp-save #'save-ast-pointer
:capnp-load (load-ast-pointer "Expression *")))
(:public
#>cpp
RegexMatch() = default;
DEFVISITABLE(ExpressionVisitor<TypedValue>);
DEFVISITABLE(ExpressionVisitor<void>);
bool Accept(HierarchicalTreeVisitor &visitor) override {
if (visitor.PreVisit(*this)) {
string_expr_->Accept(visitor) && regex_->Accept(visitor);
}
return visitor.PostVisit(*this);
}
cpp<#)
(:private
#>cpp
friend class AstStorage;
RegexMatch(Expression *string_expr, Expression *regex)
: string_expr_(string_expr), regex_(regex) {}
cpp<#)
(:serialize (:slk) (:capnp))
(:clone))
(lcp:define-class named-expression (tree "::utils::Visitable<HierarchicalTreeVisitor>"
"::utils::Visitable<ExpressionVisitor<TypedValue>>"
"::utils::Visitable<ExpressionVisitor<void>>")

View File

@ -68,6 +68,7 @@ class IndexQuery;
class StreamQuery;
class InfoQuery;
class ConstraintQuery;
class RegexMatch;
using TreeCompositeVisitor = ::utils::CompositeVisitor<
SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator,
@ -80,7 +81,7 @@ using TreeCompositeVisitor = ::utils::CompositeVisitor<
Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Create,
Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where,
SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, Merge,
Unwind>;
Unwind, RegexMatch>;
using TreeLeafVisitor =
::utils::LeafVisitor<Identifier, PrimitiveLiteral, ParameterLookup>;
@ -105,8 +106,8 @@ class ExpressionVisitor
SubscriptOperator, ListSlicingOperator, IfOperator, UnaryPlusOperator,
UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral,
PropertyLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce,
Extract, All, Single, ParameterLookup, Identifier, PrimitiveLiteral> {
};
Extract, All, Single, ParameterLookup, Identifier, PrimitiveLiteral,
RegexMatch> {};
template <class TResult>
class QueryVisitor

View File

@ -1297,6 +1297,11 @@ antlrcpp::Any CypherMainVisitor::visitExpression3a(
} else if (op->IN()) {
expression = static_cast<Expression *>(storage_->Create<InListOperator>(
expression, op->expression3b()->accept(this)));
} else if (utils::StartsWith(op->getText(), "=~")) {
auto *regex_match = storage_->Create<RegexMatch>();
regex_match->string_expr_ = expression;
regex_match->regex_ = op->expression3b()->accept(this);
expression = regex_match;
} else {
std::string function_name;
if (op->STARTS() && op->WITH()) {

View File

@ -56,6 +56,7 @@ class ExpressionPrettyPrinter : public ExpressionVisitor<void> {
void Visit(PropertyLookup &op) override;
void Visit(ParameterLookup &op) override;
void Visit(NamedExpression &op) override;
void Visit(RegexMatch &op) override;
private:
std::ostream *out_;
@ -305,6 +306,10 @@ void ExpressionPrettyPrinter::Visit(NamedExpression &op) {
PrintOperator(out_, "NamedExpression", op.name_, op.expression_);
}
void ExpressionPrettyPrinter::Visit(RegexMatch &op) {
PrintOperator(out_, "=~", op.string_expr_, op.regex_);
}
} // namespace
void PrintExpression(Expression *expr, std::ostream *out) {

View File

@ -4,6 +4,7 @@
#include <algorithm>
#include <limits>
#include <map>
#include <regex>
#include <vector>
#include "database/graph_db_accessor.hpp"
@ -480,6 +481,33 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> {
return ctx_->parameters.AtTokenPosition(param_lookup.token_position_);
}
TypedValue Visit(RegexMatch &regex_match) override {
auto target_string_value = regex_match.string_expr_->Accept(*this);
auto regex_value = regex_match.regex_->Accept(*this);
if (target_string_value.IsNull() || regex_value.IsNull()) {
return TypedValue::Null;
}
if (regex_value.type() != TypedValue::Type::String) {
throw QueryRuntimeException(
"Regular expression must evaluate to a string, got {}.",
regex_value.type());
}
if (target_string_value.type() != TypedValue::Type::String) {
// Instead of error, we return Null which makes it compatible in case we
// use indexed lookup which filters out any non-string properties.
// Assuming a property lookup is the target_string_value.
return TypedValue::Null;
}
const auto &target_string = target_string_value.ValueString();
try {
std::regex regex(regex_value.ValueString());
return std::regex_match(target_string, regex);
} catch (const std::regex_error &e) {
throw QueryRuntimeException("Regex error in '{}': {}",
regex_value.ValueString(), e.what());
}
}
private:
storage::Property GetProperty(PropertyIx prop) {
return ctx_->properties[prop.ix];

View File

@ -6,11 +6,11 @@
#include <vector>
#include <antlr4-runtime.h>
#include <capnp/message.h>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "communication/rpc/serialization.hpp"
#include "query/context.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/ast/ast_serialization.hpp"
#include "query/frontend/ast/cypher_main_visitor.hpp"
@ -18,8 +18,6 @@
#include "query/frontend/stripped.hpp"
#include "query/typed_value.hpp"
#include "capnp/message.h"
namespace {
using namespace query;
@ -2432,4 +2430,40 @@ TYPED_TEST(CypherMainVisitorTest, TestProfileStreamQuery) {
SyntaxException);
}
TYPED_TEST(CypherMainVisitorTest, RegexMatch) {
{
TypeParam ast_generator(
"MATCH (n) WHERE n.name =~ \".*bla.*\" RETURN n.name");
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
ASSERT_TRUE(query);
ASSERT_TRUE(query->single_query_);
auto *single_query = query->single_query_;
ASSERT_EQ(single_query->clauses_.size(), 2U);
auto *match_clause = dynamic_cast<Match *>(single_query->clauses_[0]);
ASSERT_TRUE(match_clause);
auto *regex_match =
dynamic_cast<RegexMatch *>(match_clause->where_->expression_);
ASSERT_TRUE(regex_match);
ASSERT_TRUE(dynamic_cast<PropertyLookup *>(regex_match->string_expr_));
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
}
{
TypeParam ast_generator("RETURN \"text\" =~ \".*bla.*\"");
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
ASSERT_TRUE(query);
ASSERT_TRUE(query->single_query_);
auto *single_query = query->single_query_;
ASSERT_EQ(single_query->clauses_.size(), 1U);
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
ASSERT_TRUE(return_clause);
ASSERT_EQ(return_clause->body_.named_expressions.size(), 1U);
auto *named_expression = return_clause->body_.named_expressions[0];
auto *regex_match =
dynamic_cast<RegexMatch *>(named_expression->expression_);
ASSERT_TRUE(regex_match);
ast_generator.CheckLiteral(regex_match->string_expr_, "text");
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
}
}
} // namespace

View File

@ -9,7 +9,6 @@
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "query/context.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/ast/cypher_main_visitor.hpp"
#include "query/frontend/opencypher/parser.hpp"
@ -2582,4 +2581,40 @@ TYPED_TEST(CypherMainVisitorTest, DropConstraint) {
}
}
TYPED_TEST(CypherMainVisitorTest, RegexMatch) {
{
TypeParam ast_generator(
"MATCH (n) WHERE n.name =~ \".*bla.*\" RETURN n.name");
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
ASSERT_TRUE(query);
ASSERT_TRUE(query->single_query_);
auto *single_query = query->single_query_;
ASSERT_EQ(single_query->clauses_.size(), 2U);
auto *match_clause = dynamic_cast<Match *>(single_query->clauses_[0]);
ASSERT_TRUE(match_clause);
auto *regex_match =
dynamic_cast<RegexMatch *>(match_clause->where_->expression_);
ASSERT_TRUE(regex_match);
ASSERT_TRUE(dynamic_cast<PropertyLookup *>(regex_match->string_expr_));
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
}
{
TypeParam ast_generator("RETURN \"text\" =~ \".*bla.*\"");
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
ASSERT_TRUE(query);
ASSERT_TRUE(query->single_query_);
auto *single_query = query->single_query_;
ASSERT_EQ(single_query->clauses_.size(), 1U);
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
ASSERT_TRUE(return_clause);
ASSERT_EQ(return_clause->body_.named_expressions.size(), 1U);
auto *named_expression = return_clause->body_.named_expressions[0];
auto *regex_match =
dynamic_cast<RegexMatch *>(named_expression->expression_);
ASSERT_TRUE(regex_match);
ast_generator.CheckLiteral(regex_match->string_expr_, "text");
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
}
}
} // namespace

View File

@ -847,6 +847,49 @@ TEST_F(ExpressionEvaluatorTest, Coalesce) {
.IsNull());
}
TEST_F(ExpressionEvaluatorTest, RegexMatchInvalidArguments) {
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LITERAL(TypedValue::Null),
LITERAL("regex")))
.IsNull());
EXPECT_TRUE(
Eval(storage.Create<RegexMatch>(LITERAL(3), LITERAL("regex"))).IsNull());
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LIST(LITERAL("string")),
LITERAL("regex")))
.IsNull());
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LITERAL("string"),
LITERAL(TypedValue::Null)))
.IsNull());
EXPECT_THROW(Eval(storage.Create<RegexMatch>(LITERAL("string"), LITERAL(42))),
QueryRuntimeException);
EXPECT_THROW(Eval(storage.Create<RegexMatch>(LITERAL("string"),
LIST(LITERAL("regex")))),
QueryRuntimeException);
}
TEST_F(ExpressionEvaluatorTest, RegexMatchInvalidRegex) {
EXPECT_THROW(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("*ext"))),
QueryRuntimeException);
EXPECT_THROW(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("[ext"))),
QueryRuntimeException);
}
TEST_F(ExpressionEvaluatorTest, RegexMatch) {
EXPECT_FALSE(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".*ex")))
.ValueBool());
EXPECT_TRUE(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".*ext")))
.ValueBool());
EXPECT_FALSE(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("[ext]")))
.ValueBool());
EXPECT_TRUE(
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".+[ext]")))
.ValueBool());
}
class ExpressionEvaluatorPropertyLookup : public ExpressionEvaluatorTest {
protected:
std::pair<std::string, storage::Property> prop_age =