Add regex matching to openCypher
Reviewers: mtomic, msantl Reviewed By: mtomic Subscribers: buda, pullbot Differential Revision: https://phabricator.memgraph.io/D1880
This commit is contained in:
parent
ed28ed873d
commit
5084123de3
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
* [Enterprise Ed.] Add new privilege, `STATS` for accessing storage info.
|
* [Enterprise Ed.] Add new privilege, `STATS` for accessing storage info.
|
||||||
* Add `SHOW STORAGE INFO` feature.
|
* Add `SHOW STORAGE INFO` feature.
|
||||||
|
* Add regular expression matching in queries.
|
||||||
|
|
||||||
## v0.14.0
|
## v0.14.0
|
||||||
|
|
||||||
|
@ -1250,6 +1250,41 @@ cpp<#
|
|||||||
(:serialize (:slk) (:capnp))
|
(:serialize (:slk) (:capnp))
|
||||||
(:clone))
|
(:clone))
|
||||||
|
|
||||||
|
(lcp:define-class regex-match (expression)
|
||||||
|
((string-expr "Expression *" :scope :public
|
||||||
|
:slk-save #'slk-save-ast-pointer
|
||||||
|
:slk-load (slk-load-ast-pointer "Expression")
|
||||||
|
:capnp-type "Tree" :capnp-init nil
|
||||||
|
:capnp-save #'save-ast-pointer
|
||||||
|
:capnp-load (load-ast-pointer "Expression *"))
|
||||||
|
(regex "Expression *" :scope :public
|
||||||
|
:slk-save #'slk-save-ast-pointer
|
||||||
|
:slk-load (slk-load-ast-pointer "Expression")
|
||||||
|
:capnp-type "Tree" :capnp-init nil
|
||||||
|
:capnp-save #'save-ast-pointer
|
||||||
|
:capnp-load (load-ast-pointer "Expression *")))
|
||||||
|
(:public
|
||||||
|
#>cpp
|
||||||
|
RegexMatch() = default;
|
||||||
|
|
||||||
|
DEFVISITABLE(ExpressionVisitor<TypedValue>);
|
||||||
|
DEFVISITABLE(ExpressionVisitor<void>);
|
||||||
|
bool Accept(HierarchicalTreeVisitor &visitor) override {
|
||||||
|
if (visitor.PreVisit(*this)) {
|
||||||
|
string_expr_->Accept(visitor) && regex_->Accept(visitor);
|
||||||
|
}
|
||||||
|
return visitor.PostVisit(*this);
|
||||||
|
}
|
||||||
|
cpp<#)
|
||||||
|
(:private
|
||||||
|
#>cpp
|
||||||
|
friend class AstStorage;
|
||||||
|
RegexMatch(Expression *string_expr, Expression *regex)
|
||||||
|
: string_expr_(string_expr), regex_(regex) {}
|
||||||
|
cpp<#)
|
||||||
|
(:serialize (:slk) (:capnp))
|
||||||
|
(:clone))
|
||||||
|
|
||||||
(lcp:define-class named-expression (tree "::utils::Visitable<HierarchicalTreeVisitor>"
|
(lcp:define-class named-expression (tree "::utils::Visitable<HierarchicalTreeVisitor>"
|
||||||
"::utils::Visitable<ExpressionVisitor<TypedValue>>"
|
"::utils::Visitable<ExpressionVisitor<TypedValue>>"
|
||||||
"::utils::Visitable<ExpressionVisitor<void>>")
|
"::utils::Visitable<ExpressionVisitor<void>>")
|
||||||
|
@ -68,6 +68,7 @@ class IndexQuery;
|
|||||||
class StreamQuery;
|
class StreamQuery;
|
||||||
class InfoQuery;
|
class InfoQuery;
|
||||||
class ConstraintQuery;
|
class ConstraintQuery;
|
||||||
|
class RegexMatch;
|
||||||
|
|
||||||
using TreeCompositeVisitor = ::utils::CompositeVisitor<
|
using TreeCompositeVisitor = ::utils::CompositeVisitor<
|
||||||
SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator,
|
SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator,
|
||||||
@ -80,7 +81,7 @@ using TreeCompositeVisitor = ::utils::CompositeVisitor<
|
|||||||
Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Create,
|
Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Create,
|
||||||
Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where,
|
Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where,
|
||||||
SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, Merge,
|
SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, Merge,
|
||||||
Unwind>;
|
Unwind, RegexMatch>;
|
||||||
|
|
||||||
using TreeLeafVisitor =
|
using TreeLeafVisitor =
|
||||||
::utils::LeafVisitor<Identifier, PrimitiveLiteral, ParameterLookup>;
|
::utils::LeafVisitor<Identifier, PrimitiveLiteral, ParameterLookup>;
|
||||||
@ -105,8 +106,8 @@ class ExpressionVisitor
|
|||||||
SubscriptOperator, ListSlicingOperator, IfOperator, UnaryPlusOperator,
|
SubscriptOperator, ListSlicingOperator, IfOperator, UnaryPlusOperator,
|
||||||
UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral,
|
UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral,
|
||||||
PropertyLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce,
|
PropertyLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce,
|
||||||
Extract, All, Single, ParameterLookup, Identifier, PrimitiveLiteral> {
|
Extract, All, Single, ParameterLookup, Identifier, PrimitiveLiteral,
|
||||||
};
|
RegexMatch> {};
|
||||||
|
|
||||||
template <class TResult>
|
template <class TResult>
|
||||||
class QueryVisitor
|
class QueryVisitor
|
||||||
|
@ -1297,6 +1297,11 @@ antlrcpp::Any CypherMainVisitor::visitExpression3a(
|
|||||||
} else if (op->IN()) {
|
} else if (op->IN()) {
|
||||||
expression = static_cast<Expression *>(storage_->Create<InListOperator>(
|
expression = static_cast<Expression *>(storage_->Create<InListOperator>(
|
||||||
expression, op->expression3b()->accept(this)));
|
expression, op->expression3b()->accept(this)));
|
||||||
|
} else if (utils::StartsWith(op->getText(), "=~")) {
|
||||||
|
auto *regex_match = storage_->Create<RegexMatch>();
|
||||||
|
regex_match->string_expr_ = expression;
|
||||||
|
regex_match->regex_ = op->expression3b()->accept(this);
|
||||||
|
expression = regex_match;
|
||||||
} else {
|
} else {
|
||||||
std::string function_name;
|
std::string function_name;
|
||||||
if (op->STARTS() && op->WITH()) {
|
if (op->STARTS() && op->WITH()) {
|
||||||
|
@ -56,6 +56,7 @@ class ExpressionPrettyPrinter : public ExpressionVisitor<void> {
|
|||||||
void Visit(PropertyLookup &op) override;
|
void Visit(PropertyLookup &op) override;
|
||||||
void Visit(ParameterLookup &op) override;
|
void Visit(ParameterLookup &op) override;
|
||||||
void Visit(NamedExpression &op) override;
|
void Visit(NamedExpression &op) override;
|
||||||
|
void Visit(RegexMatch &op) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::ostream *out_;
|
std::ostream *out_;
|
||||||
@ -305,6 +306,10 @@ void ExpressionPrettyPrinter::Visit(NamedExpression &op) {
|
|||||||
PrintOperator(out_, "NamedExpression", op.name_, op.expression_);
|
PrintOperator(out_, "NamedExpression", op.name_, op.expression_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ExpressionPrettyPrinter::Visit(RegexMatch &op) {
|
||||||
|
PrintOperator(out_, "=~", op.string_expr_, op.regex_);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void PrintExpression(Expression *expr, std::ostream *out) {
|
void PrintExpression(Expression *expr, std::ostream *out) {
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <regex>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "database/graph_db_accessor.hpp"
|
#include "database/graph_db_accessor.hpp"
|
||||||
@ -480,6 +481,33 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> {
|
|||||||
return ctx_->parameters.AtTokenPosition(param_lookup.token_position_);
|
return ctx_->parameters.AtTokenPosition(param_lookup.token_position_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TypedValue Visit(RegexMatch ®ex_match) override {
|
||||||
|
auto target_string_value = regex_match.string_expr_->Accept(*this);
|
||||||
|
auto regex_value = regex_match.regex_->Accept(*this);
|
||||||
|
if (target_string_value.IsNull() || regex_value.IsNull()) {
|
||||||
|
return TypedValue::Null;
|
||||||
|
}
|
||||||
|
if (regex_value.type() != TypedValue::Type::String) {
|
||||||
|
throw QueryRuntimeException(
|
||||||
|
"Regular expression must evaluate to a string, got {}.",
|
||||||
|
regex_value.type());
|
||||||
|
}
|
||||||
|
if (target_string_value.type() != TypedValue::Type::String) {
|
||||||
|
// Instead of error, we return Null which makes it compatible in case we
|
||||||
|
// use indexed lookup which filters out any non-string properties.
|
||||||
|
// Assuming a property lookup is the target_string_value.
|
||||||
|
return TypedValue::Null;
|
||||||
|
}
|
||||||
|
const auto &target_string = target_string_value.ValueString();
|
||||||
|
try {
|
||||||
|
std::regex regex(regex_value.ValueString());
|
||||||
|
return std::regex_match(target_string, regex);
|
||||||
|
} catch (const std::regex_error &e) {
|
||||||
|
throw QueryRuntimeException("Regex error in '{}': {}",
|
||||||
|
regex_value.ValueString(), e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
storage::Property GetProperty(PropertyIx prop) {
|
storage::Property GetProperty(PropertyIx prop) {
|
||||||
return ctx_->properties[prop.ix];
|
return ctx_->properties[prop.ix];
|
||||||
|
@ -6,11 +6,11 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <antlr4-runtime.h>
|
#include <antlr4-runtime.h>
|
||||||
|
#include <capnp/message.h>
|
||||||
#include <gmock/gmock.h>
|
#include <gmock/gmock.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include "communication/rpc/serialization.hpp"
|
#include "communication/rpc/serialization.hpp"
|
||||||
#include "query/context.hpp"
|
|
||||||
#include "query/frontend/ast/ast.hpp"
|
#include "query/frontend/ast/ast.hpp"
|
||||||
#include "query/frontend/ast/ast_serialization.hpp"
|
#include "query/frontend/ast/ast_serialization.hpp"
|
||||||
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
||||||
@ -18,8 +18,6 @@
|
|||||||
#include "query/frontend/stripped.hpp"
|
#include "query/frontend/stripped.hpp"
|
||||||
#include "query/typed_value.hpp"
|
#include "query/typed_value.hpp"
|
||||||
|
|
||||||
#include "capnp/message.h"
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
using namespace query;
|
using namespace query;
|
||||||
@ -2432,4 +2430,40 @@ TYPED_TEST(CypherMainVisitorTest, TestProfileStreamQuery) {
|
|||||||
SyntaxException);
|
SyntaxException);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TYPED_TEST(CypherMainVisitorTest, RegexMatch) {
|
||||||
|
{
|
||||||
|
TypeParam ast_generator(
|
||||||
|
"MATCH (n) WHERE n.name =~ \".*bla.*\" RETURN n.name");
|
||||||
|
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
|
||||||
|
ASSERT_TRUE(query);
|
||||||
|
ASSERT_TRUE(query->single_query_);
|
||||||
|
auto *single_query = query->single_query_;
|
||||||
|
ASSERT_EQ(single_query->clauses_.size(), 2U);
|
||||||
|
auto *match_clause = dynamic_cast<Match *>(single_query->clauses_[0]);
|
||||||
|
ASSERT_TRUE(match_clause);
|
||||||
|
auto *regex_match =
|
||||||
|
dynamic_cast<RegexMatch *>(match_clause->where_->expression_);
|
||||||
|
ASSERT_TRUE(regex_match);
|
||||||
|
ASSERT_TRUE(dynamic_cast<PropertyLookup *>(regex_match->string_expr_));
|
||||||
|
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
TypeParam ast_generator("RETURN \"text\" =~ \".*bla.*\"");
|
||||||
|
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
|
||||||
|
ASSERT_TRUE(query);
|
||||||
|
ASSERT_TRUE(query->single_query_);
|
||||||
|
auto *single_query = query->single_query_;
|
||||||
|
ASSERT_EQ(single_query->clauses_.size(), 1U);
|
||||||
|
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
|
||||||
|
ASSERT_TRUE(return_clause);
|
||||||
|
ASSERT_EQ(return_clause->body_.named_expressions.size(), 1U);
|
||||||
|
auto *named_expression = return_clause->body_.named_expressions[0];
|
||||||
|
auto *regex_match =
|
||||||
|
dynamic_cast<RegexMatch *>(named_expression->expression_);
|
||||||
|
ASSERT_TRUE(regex_match);
|
||||||
|
ast_generator.CheckLiteral(regex_match->string_expr_, "text");
|
||||||
|
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
#include <gmock/gmock.h>
|
#include <gmock/gmock.h>
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include "query/context.hpp"
|
|
||||||
#include "query/frontend/ast/ast.hpp"
|
#include "query/frontend/ast/ast.hpp"
|
||||||
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
||||||
#include "query/frontend/opencypher/parser.hpp"
|
#include "query/frontend/opencypher/parser.hpp"
|
||||||
@ -2582,4 +2581,40 @@ TYPED_TEST(CypherMainVisitorTest, DropConstraint) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TYPED_TEST(CypherMainVisitorTest, RegexMatch) {
|
||||||
|
{
|
||||||
|
TypeParam ast_generator(
|
||||||
|
"MATCH (n) WHERE n.name =~ \".*bla.*\" RETURN n.name");
|
||||||
|
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
|
||||||
|
ASSERT_TRUE(query);
|
||||||
|
ASSERT_TRUE(query->single_query_);
|
||||||
|
auto *single_query = query->single_query_;
|
||||||
|
ASSERT_EQ(single_query->clauses_.size(), 2U);
|
||||||
|
auto *match_clause = dynamic_cast<Match *>(single_query->clauses_[0]);
|
||||||
|
ASSERT_TRUE(match_clause);
|
||||||
|
auto *regex_match =
|
||||||
|
dynamic_cast<RegexMatch *>(match_clause->where_->expression_);
|
||||||
|
ASSERT_TRUE(regex_match);
|
||||||
|
ASSERT_TRUE(dynamic_cast<PropertyLookup *>(regex_match->string_expr_));
|
||||||
|
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
TypeParam ast_generator("RETURN \"text\" =~ \".*bla.*\"");
|
||||||
|
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
|
||||||
|
ASSERT_TRUE(query);
|
||||||
|
ASSERT_TRUE(query->single_query_);
|
||||||
|
auto *single_query = query->single_query_;
|
||||||
|
ASSERT_EQ(single_query->clauses_.size(), 1U);
|
||||||
|
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
|
||||||
|
ASSERT_TRUE(return_clause);
|
||||||
|
ASSERT_EQ(return_clause->body_.named_expressions.size(), 1U);
|
||||||
|
auto *named_expression = return_clause->body_.named_expressions[0];
|
||||||
|
auto *regex_match =
|
||||||
|
dynamic_cast<RegexMatch *>(named_expression->expression_);
|
||||||
|
ASSERT_TRUE(regex_match);
|
||||||
|
ast_generator.CheckLiteral(regex_match->string_expr_, "text");
|
||||||
|
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -847,6 +847,49 @@ TEST_F(ExpressionEvaluatorTest, Coalesce) {
|
|||||||
.IsNull());
|
.IsNull());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionEvaluatorTest, RegexMatchInvalidArguments) {
|
||||||
|
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LITERAL(TypedValue::Null),
|
||||||
|
LITERAL("regex")))
|
||||||
|
.IsNull());
|
||||||
|
EXPECT_TRUE(
|
||||||
|
Eval(storage.Create<RegexMatch>(LITERAL(3), LITERAL("regex"))).IsNull());
|
||||||
|
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LIST(LITERAL("string")),
|
||||||
|
LITERAL("regex")))
|
||||||
|
.IsNull());
|
||||||
|
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LITERAL("string"),
|
||||||
|
LITERAL(TypedValue::Null)))
|
||||||
|
.IsNull());
|
||||||
|
EXPECT_THROW(Eval(storage.Create<RegexMatch>(LITERAL("string"), LITERAL(42))),
|
||||||
|
QueryRuntimeException);
|
||||||
|
EXPECT_THROW(Eval(storage.Create<RegexMatch>(LITERAL("string"),
|
||||||
|
LIST(LITERAL("regex")))),
|
||||||
|
QueryRuntimeException);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionEvaluatorTest, RegexMatchInvalidRegex) {
|
||||||
|
EXPECT_THROW(
|
||||||
|
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("*ext"))),
|
||||||
|
QueryRuntimeException);
|
||||||
|
EXPECT_THROW(
|
||||||
|
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("[ext"))),
|
||||||
|
QueryRuntimeException);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(ExpressionEvaluatorTest, RegexMatch) {
|
||||||
|
EXPECT_FALSE(
|
||||||
|
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".*ex")))
|
||||||
|
.ValueBool());
|
||||||
|
EXPECT_TRUE(
|
||||||
|
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".*ext")))
|
||||||
|
.ValueBool());
|
||||||
|
EXPECT_FALSE(
|
||||||
|
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("[ext]")))
|
||||||
|
.ValueBool());
|
||||||
|
EXPECT_TRUE(
|
||||||
|
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".+[ext]")))
|
||||||
|
.ValueBool());
|
||||||
|
}
|
||||||
|
|
||||||
class ExpressionEvaluatorPropertyLookup : public ExpressionEvaluatorTest {
|
class ExpressionEvaluatorPropertyLookup : public ExpressionEvaluatorTest {
|
||||||
protected:
|
protected:
|
||||||
std::pair<std::string, storage::Property> prop_age =
|
std::pair<std::string, storage::Property> prop_age =
|
||||||
|
Loading…
Reference in New Issue
Block a user