Add regex matching to openCypher
Reviewers: mtomic, msantl Reviewed By: mtomic Subscribers: buda, pullbot Differential Revision: https://phabricator.memgraph.io/D1880
This commit is contained in:
parent
ed28ed873d
commit
5084123de3
@ -10,6 +10,7 @@
|
||||
|
||||
* [Enterprise Ed.] Add new privilege, `STATS` for accessing storage info.
|
||||
* Add `SHOW STORAGE INFO` feature.
|
||||
* Add regular expression matching in queries.
|
||||
|
||||
## v0.14.0
|
||||
|
||||
|
@ -1250,6 +1250,41 @@ cpp<#
|
||||
(:serialize (:slk) (:capnp))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-class regex-match (expression)
|
||||
((string-expr "Expression *" :scope :public
|
||||
:slk-save #'slk-save-ast-pointer
|
||||
:slk-load (slk-load-ast-pointer "Expression")
|
||||
:capnp-type "Tree" :capnp-init nil
|
||||
:capnp-save #'save-ast-pointer
|
||||
:capnp-load (load-ast-pointer "Expression *"))
|
||||
(regex "Expression *" :scope :public
|
||||
:slk-save #'slk-save-ast-pointer
|
||||
:slk-load (slk-load-ast-pointer "Expression")
|
||||
:capnp-type "Tree" :capnp-init nil
|
||||
:capnp-save #'save-ast-pointer
|
||||
:capnp-load (load-ast-pointer "Expression *")))
|
||||
(:public
|
||||
#>cpp
|
||||
RegexMatch() = default;
|
||||
|
||||
DEFVISITABLE(ExpressionVisitor<TypedValue>);
|
||||
DEFVISITABLE(ExpressionVisitor<void>);
|
||||
bool Accept(HierarchicalTreeVisitor &visitor) override {
|
||||
if (visitor.PreVisit(*this)) {
|
||||
string_expr_->Accept(visitor) && regex_->Accept(visitor);
|
||||
}
|
||||
return visitor.PostVisit(*this);
|
||||
}
|
||||
cpp<#)
|
||||
(:private
|
||||
#>cpp
|
||||
friend class AstStorage;
|
||||
RegexMatch(Expression *string_expr, Expression *regex)
|
||||
: string_expr_(string_expr), regex_(regex) {}
|
||||
cpp<#)
|
||||
(:serialize (:slk) (:capnp))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-class named-expression (tree "::utils::Visitable<HierarchicalTreeVisitor>"
|
||||
"::utils::Visitable<ExpressionVisitor<TypedValue>>"
|
||||
"::utils::Visitable<ExpressionVisitor<void>>")
|
||||
|
@ -68,6 +68,7 @@ class IndexQuery;
|
||||
class StreamQuery;
|
||||
class InfoQuery;
|
||||
class ConstraintQuery;
|
||||
class RegexMatch;
|
||||
|
||||
using TreeCompositeVisitor = ::utils::CompositeVisitor<
|
||||
SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator,
|
||||
@ -80,7 +81,7 @@ using TreeCompositeVisitor = ::utils::CompositeVisitor<
|
||||
Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Create,
|
||||
Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where,
|
||||
SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, Merge,
|
||||
Unwind>;
|
||||
Unwind, RegexMatch>;
|
||||
|
||||
using TreeLeafVisitor =
|
||||
::utils::LeafVisitor<Identifier, PrimitiveLiteral, ParameterLookup>;
|
||||
@ -105,8 +106,8 @@ class ExpressionVisitor
|
||||
SubscriptOperator, ListSlicingOperator, IfOperator, UnaryPlusOperator,
|
||||
UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral,
|
||||
PropertyLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce,
|
||||
Extract, All, Single, ParameterLookup, Identifier, PrimitiveLiteral> {
|
||||
};
|
||||
Extract, All, Single, ParameterLookup, Identifier, PrimitiveLiteral,
|
||||
RegexMatch> {};
|
||||
|
||||
template <class TResult>
|
||||
class QueryVisitor
|
||||
|
@ -1297,6 +1297,11 @@ antlrcpp::Any CypherMainVisitor::visitExpression3a(
|
||||
} else if (op->IN()) {
|
||||
expression = static_cast<Expression *>(storage_->Create<InListOperator>(
|
||||
expression, op->expression3b()->accept(this)));
|
||||
} else if (utils::StartsWith(op->getText(), "=~")) {
|
||||
auto *regex_match = storage_->Create<RegexMatch>();
|
||||
regex_match->string_expr_ = expression;
|
||||
regex_match->regex_ = op->expression3b()->accept(this);
|
||||
expression = regex_match;
|
||||
} else {
|
||||
std::string function_name;
|
||||
if (op->STARTS() && op->WITH()) {
|
||||
|
@ -56,6 +56,7 @@ class ExpressionPrettyPrinter : public ExpressionVisitor<void> {
|
||||
void Visit(PropertyLookup &op) override;
|
||||
void Visit(ParameterLookup &op) override;
|
||||
void Visit(NamedExpression &op) override;
|
||||
void Visit(RegexMatch &op) override;
|
||||
|
||||
private:
|
||||
std::ostream *out_;
|
||||
@ -305,6 +306,10 @@ void ExpressionPrettyPrinter::Visit(NamedExpression &op) {
|
||||
PrintOperator(out_, "NamedExpression", op.name_, op.expression_);
|
||||
}
|
||||
|
||||
void ExpressionPrettyPrinter::Visit(RegexMatch &op) {
|
||||
PrintOperator(out_, "=~", op.string_expr_, op.regex_);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void PrintExpression(Expression *expr, std::ostream *out) {
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include <vector>
|
||||
|
||||
#include "database/graph_db_accessor.hpp"
|
||||
@ -480,6 +481,33 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> {
|
||||
return ctx_->parameters.AtTokenPosition(param_lookup.token_position_);
|
||||
}
|
||||
|
||||
TypedValue Visit(RegexMatch ®ex_match) override {
|
||||
auto target_string_value = regex_match.string_expr_->Accept(*this);
|
||||
auto regex_value = regex_match.regex_->Accept(*this);
|
||||
if (target_string_value.IsNull() || regex_value.IsNull()) {
|
||||
return TypedValue::Null;
|
||||
}
|
||||
if (regex_value.type() != TypedValue::Type::String) {
|
||||
throw QueryRuntimeException(
|
||||
"Regular expression must evaluate to a string, got {}.",
|
||||
regex_value.type());
|
||||
}
|
||||
if (target_string_value.type() != TypedValue::Type::String) {
|
||||
// Instead of error, we return Null which makes it compatible in case we
|
||||
// use indexed lookup which filters out any non-string properties.
|
||||
// Assuming a property lookup is the target_string_value.
|
||||
return TypedValue::Null;
|
||||
}
|
||||
const auto &target_string = target_string_value.ValueString();
|
||||
try {
|
||||
std::regex regex(regex_value.ValueString());
|
||||
return std::regex_match(target_string, regex);
|
||||
} catch (const std::regex_error &e) {
|
||||
throw QueryRuntimeException("Regex error in '{}': {}",
|
||||
regex_value.ValueString(), e.what());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
storage::Property GetProperty(PropertyIx prop) {
|
||||
return ctx_->properties[prop.ix];
|
||||
|
@ -6,11 +6,11 @@
|
||||
#include <vector>
|
||||
|
||||
#include <antlr4-runtime.h>
|
||||
#include <capnp/message.h>
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "communication/rpc/serialization.hpp"
|
||||
#include "query/context.hpp"
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/frontend/ast/ast_serialization.hpp"
|
||||
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
||||
@ -18,8 +18,6 @@
|
||||
#include "query/frontend/stripped.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
|
||||
#include "capnp/message.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace query;
|
||||
@ -2432,4 +2430,40 @@ TYPED_TEST(CypherMainVisitorTest, TestProfileStreamQuery) {
|
||||
SyntaxException);
|
||||
}
|
||||
|
||||
TYPED_TEST(CypherMainVisitorTest, RegexMatch) {
|
||||
{
|
||||
TypeParam ast_generator(
|
||||
"MATCH (n) WHERE n.name =~ \".*bla.*\" RETURN n.name");
|
||||
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
|
||||
ASSERT_TRUE(query);
|
||||
ASSERT_TRUE(query->single_query_);
|
||||
auto *single_query = query->single_query_;
|
||||
ASSERT_EQ(single_query->clauses_.size(), 2U);
|
||||
auto *match_clause = dynamic_cast<Match *>(single_query->clauses_[0]);
|
||||
ASSERT_TRUE(match_clause);
|
||||
auto *regex_match =
|
||||
dynamic_cast<RegexMatch *>(match_clause->where_->expression_);
|
||||
ASSERT_TRUE(regex_match);
|
||||
ASSERT_TRUE(dynamic_cast<PropertyLookup *>(regex_match->string_expr_));
|
||||
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
|
||||
}
|
||||
{
|
||||
TypeParam ast_generator("RETURN \"text\" =~ \".*bla.*\"");
|
||||
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
|
||||
ASSERT_TRUE(query);
|
||||
ASSERT_TRUE(query->single_query_);
|
||||
auto *single_query = query->single_query_;
|
||||
ASSERT_EQ(single_query->clauses_.size(), 1U);
|
||||
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
|
||||
ASSERT_TRUE(return_clause);
|
||||
ASSERT_EQ(return_clause->body_.named_expressions.size(), 1U);
|
||||
auto *named_expression = return_clause->body_.named_expressions[0];
|
||||
auto *regex_match =
|
||||
dynamic_cast<RegexMatch *>(named_expression->expression_);
|
||||
ASSERT_TRUE(regex_match);
|
||||
ast_generator.CheckLiteral(regex_match->string_expr_, "text");
|
||||
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <gmock/gmock.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "query/context.hpp"
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
||||
#include "query/frontend/opencypher/parser.hpp"
|
||||
@ -2582,4 +2581,40 @@ TYPED_TEST(CypherMainVisitorTest, DropConstraint) {
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(CypherMainVisitorTest, RegexMatch) {
|
||||
{
|
||||
TypeParam ast_generator(
|
||||
"MATCH (n) WHERE n.name =~ \".*bla.*\" RETURN n.name");
|
||||
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
|
||||
ASSERT_TRUE(query);
|
||||
ASSERT_TRUE(query->single_query_);
|
||||
auto *single_query = query->single_query_;
|
||||
ASSERT_EQ(single_query->clauses_.size(), 2U);
|
||||
auto *match_clause = dynamic_cast<Match *>(single_query->clauses_[0]);
|
||||
ASSERT_TRUE(match_clause);
|
||||
auto *regex_match =
|
||||
dynamic_cast<RegexMatch *>(match_clause->where_->expression_);
|
||||
ASSERT_TRUE(regex_match);
|
||||
ASSERT_TRUE(dynamic_cast<PropertyLookup *>(regex_match->string_expr_));
|
||||
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
|
||||
}
|
||||
{
|
||||
TypeParam ast_generator("RETURN \"text\" =~ \".*bla.*\"");
|
||||
auto *query = dynamic_cast<CypherQuery *>(ast_generator.query_);
|
||||
ASSERT_TRUE(query);
|
||||
ASSERT_TRUE(query->single_query_);
|
||||
auto *single_query = query->single_query_;
|
||||
ASSERT_EQ(single_query->clauses_.size(), 1U);
|
||||
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
|
||||
ASSERT_TRUE(return_clause);
|
||||
ASSERT_EQ(return_clause->body_.named_expressions.size(), 1U);
|
||||
auto *named_expression = return_clause->body_.named_expressions[0];
|
||||
auto *regex_match =
|
||||
dynamic_cast<RegexMatch *>(named_expression->expression_);
|
||||
ASSERT_TRUE(regex_match);
|
||||
ast_generator.CheckLiteral(regex_match->string_expr_, "text");
|
||||
ast_generator.CheckLiteral(regex_match->regex_, ".*bla.*");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -847,6 +847,49 @@ TEST_F(ExpressionEvaluatorTest, Coalesce) {
|
||||
.IsNull());
|
||||
}
|
||||
|
||||
TEST_F(ExpressionEvaluatorTest, RegexMatchInvalidArguments) {
|
||||
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LITERAL(TypedValue::Null),
|
||||
LITERAL("regex")))
|
||||
.IsNull());
|
||||
EXPECT_TRUE(
|
||||
Eval(storage.Create<RegexMatch>(LITERAL(3), LITERAL("regex"))).IsNull());
|
||||
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LIST(LITERAL("string")),
|
||||
LITERAL("regex")))
|
||||
.IsNull());
|
||||
EXPECT_TRUE(Eval(storage.Create<RegexMatch>(LITERAL("string"),
|
||||
LITERAL(TypedValue::Null)))
|
||||
.IsNull());
|
||||
EXPECT_THROW(Eval(storage.Create<RegexMatch>(LITERAL("string"), LITERAL(42))),
|
||||
QueryRuntimeException);
|
||||
EXPECT_THROW(Eval(storage.Create<RegexMatch>(LITERAL("string"),
|
||||
LIST(LITERAL("regex")))),
|
||||
QueryRuntimeException);
|
||||
}
|
||||
|
||||
TEST_F(ExpressionEvaluatorTest, RegexMatchInvalidRegex) {
|
||||
EXPECT_THROW(
|
||||
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("*ext"))),
|
||||
QueryRuntimeException);
|
||||
EXPECT_THROW(
|
||||
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("[ext"))),
|
||||
QueryRuntimeException);
|
||||
}
|
||||
|
||||
TEST_F(ExpressionEvaluatorTest, RegexMatch) {
|
||||
EXPECT_FALSE(
|
||||
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".*ex")))
|
||||
.ValueBool());
|
||||
EXPECT_TRUE(
|
||||
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".*ext")))
|
||||
.ValueBool());
|
||||
EXPECT_FALSE(
|
||||
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL("[ext]")))
|
||||
.ValueBool());
|
||||
EXPECT_TRUE(
|
||||
Eval(storage.Create<RegexMatch>(LITERAL("text"), LITERAL(".+[ext]")))
|
||||
.ValueBool());
|
||||
}
|
||||
|
||||
class ExpressionEvaluatorPropertyLookup : public ExpressionEvaluatorTest {
|
||||
protected:
|
||||
std::pair<std::string, storage::Property> prop_age =
|
||||
|
Loading…
Reference in New Issue
Block a user