Split lexer and parser grammars

Summary: first step of grammar cleanup

Reviewers: teon.banek

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1508
This commit is contained in:
Marin Tomic 2018-07-26 10:33:15 +02:00
parent 9f460914ed
commit 7e92a7f41c
12 changed files with 678 additions and 820 deletions

View File

@ -160,14 +160,15 @@ include_directories(SYSTEM ${LIBRDKAFKA_INCLUDE_DIR})
# openCypher parser -----------------------------------------------------------
set(opencypher_frontend ${CMAKE_SOURCE_DIR}/src/query/frontend/opencypher)
set(opencypher_generated ${opencypher_frontend}/generated)
set(opencypher_grammar ${opencypher_frontend}/grammar/Cypher.g4)
set(opencypher_lexer_grammar ${opencypher_frontend}/grammar/CypherLexer.g4)
set(opencypher_parser_grammar ${opencypher_frontend}/grammar/CypherParser.g4)
# enumerate all files that are generated from antlr
set(antlr_opencypher_generated_src
${opencypher_generated}/CypherLexer.cpp
${opencypher_generated}/CypherParser.cpp
${opencypher_generated}/CypherBaseVisitor.cpp
${opencypher_generated}/CypherVisitor.cpp
${opencypher_generated}/CypherParserBaseVisitor.cpp
${opencypher_generated}/CypherParserVisitor.cpp
)
# Provide a command to generate sources if missing. If this were a
@ -176,9 +177,9 @@ add_custom_command(OUTPUT ${antlr_opencypher_generated_src}
COMMAND
${CMAKE_COMMAND} -E make_directory ${opencypher_generated}
COMMAND
java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.6-complete.jar -Dlanguage=Cpp -visitor -o ${opencypher_generated} -package antlropencypher ${opencypher_grammar}
java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.6-complete.jar -Dlanguage=Cpp -visitor -o ${opencypher_generated} -package antlropencypher ${opencypher_lexer_grammar} ${opencypher_parser_grammar}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
DEPENDS ${opencypher_grammar})
DEPENDS ${opencypher_lexer_grammar} ${opencypher_parser_grammar})
# add custom target for generation
add_custom_target(generate_opencypher_parser

View File

@ -488,7 +488,7 @@ antlrcpp::Any CypherMainVisitor::visitReturnItems(
named_expressions.push_back(item->accept(this));
}
return std::pair<bool, std::vector<NamedExpression *>>(
ctx->getTokens(kReturnAllTokenId).size(), named_expressions);
ctx->getTokens(CypherParser::ASTERISK).size(), named_expressions);
}
antlrcpp::Any CypherMainVisitor::visitReturnItem(
@ -628,7 +628,7 @@ antlrcpp::Any CypherMainVisitor::visitSymbolicName(
}
return name;
}
if (ctx->UnescapedSymbolicName() || ctx->HexLetter()) {
if (ctx->UnescapedSymbolicName()) {
return std::string(ctx->getText());
}
return ctx->getText();
@ -844,7 +844,7 @@ antlrcpp::Any CypherMainVisitor::visitVariableExpansion(
if (ctx->expression().size() == 0U) {
// Case -[*]-
} else if (ctx->expression().size() == 1U) {
auto dots_tokens = ctx->getTokens(kDotsTokenId);
auto dots_tokens = ctx->getTokens(CypherParser::DOTS);
Expression *bound = ctx->expression()[0]->accept(this);
if (!dots_tokens.size()) {
// Case -[*bound]-
@ -970,8 +970,9 @@ antlrcpp::Any CypherMainVisitor::visitPartialComparisonExpression(
// Addition and subtraction.
antlrcpp::Any CypherMainVisitor::visitExpression7(
CypherParser::Expression7Context *ctx) {
return LeftAssociativeOperatorExpression(ctx->expression6(), ctx->children,
{kPlusTokenId, kMinusTokenId});
return LeftAssociativeOperatorExpression(
ctx->expression6(), ctx->children,
{CypherParser::PLUS, CypherParser::MINUS});
}
// Multiplication, division, modding.
@ -979,7 +980,7 @@ antlrcpp::Any CypherMainVisitor::visitExpression6(
CypherParser::Expression6Context *ctx) {
return LeftAssociativeOperatorExpression(
ctx->expression5(), ctx->children,
{kMultTokenId, kDivTokenId, kModTokenId});
{CypherParser::ASTERISK, CypherParser::SLASH, CypherParser::PERCENT});
}
// Power.
@ -997,7 +998,7 @@ antlrcpp::Any CypherMainVisitor::visitExpression5(
antlrcpp::Any CypherMainVisitor::visitExpression4(
CypherParser::Expression4Context *ctx) {
return PrefixUnaryOperator(ctx->expression3a(), ctx->children,
{kUnaryPlusTokenId, kUnaryMinusTokenId});
{CypherParser::PLUS, CypherParser::MINUS});
}
// IS NULL, IS NOT NULL, STARTS WITH, ..
@ -1044,7 +1045,7 @@ antlrcpp::Any CypherMainVisitor::visitExpression3b(
CypherParser::Expression3bContext *ctx) {
Expression *expression = ctx->expression2a()->accept(this);
for (auto *list_op : ctx->listIndexingOrSlicing()) {
if (list_op->getTokens(kDotsTokenId).size() == 0U) {
if (list_op->getTokens(CypherParser::DOTS).size() == 0U) {
// If there is no '..' then we need to create list indexing operator.
expression = storage_.Create<SubscriptOperator>(
expression, list_op->expression()[0]->accept(this));
@ -1356,13 +1357,13 @@ antlrcpp::Any CypherMainVisitor::visitSetItem(
}
// SetProperties either assignment or update
if (ctx->getTokens(kPropertyAssignmentTokenId).size() ||
ctx->getTokens(kPropertyUpdateTokenId).size()) {
if (ctx->getTokens(CypherParser::EQ).size() ||
ctx->getTokens(CypherParser::PLUS_EQ).size()) {
auto *set_properties = storage_.Create<SetProperties>();
set_properties->identifier_ = storage_.Create<Identifier>(
ctx->variable()->accept(this).as<std::string>());
set_properties->expression_ = ctx->expression()->accept(this);
if (ctx->getTokens(kPropertyUpdateTokenId).size()) {
if (ctx->getTokens(CypherParser::PLUS_EQ).size()) {
set_properties->update_ = true;
}
return static_cast<Clause *>(set_properties);

View File

@ -9,8 +9,7 @@
#include "query/context.hpp"
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/ast/named_antlr_tokens.hpp"
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
#include "query/frontend/opencypher/generated/CypherParserBaseVisitor.h"
#include "utils/exceptions.hpp"
namespace query {
@ -19,7 +18,7 @@ namespace frontend {
using antlropencypher::CypherParser;
using query::Context;
class CypherMainVisitor : public antlropencypher::CypherBaseVisitor {
class CypherMainVisitor : public antlropencypher::CypherParserBaseVisitor {
public:
explicit CypherMainVisitor(Context &ctx) : ctx_(ctx) {}
@ -33,28 +32,28 @@ class CypherMainVisitor : public antlropencypher::CypherBaseVisitor {
return storage_.Create<XorOperator>(e1, e2);
case CypherParser::AND:
return storage_.Create<AndOperator>(e1, e2);
case kPlusTokenId:
case CypherParser::PLUS:
return storage_.Create<AdditionOperator>(e1, e2);
case kMinusTokenId:
case CypherParser::MINUS:
return storage_.Create<SubtractionOperator>(e1, e2);
case kMultTokenId:
case CypherParser::ASTERISK:
return storage_.Create<MultiplicationOperator>(e1, e2);
case kDivTokenId:
case CypherParser::SLASH:
return storage_.Create<DivisionOperator>(e1, e2);
case kModTokenId:
case CypherParser::PERCENT:
return storage_.Create<ModOperator>(e1, e2);
case kEqTokenId:
case CypherParser::EQ:
return storage_.Create<EqualOperator>(e1, e2);
case kNeTokenId1:
case kNeTokenId2:
case CypherParser::NEQ1:
case CypherParser::NEQ2:
return storage_.Create<NotEqualOperator>(e1, e2);
case kLtTokenId:
case CypherParser::LT:
return storage_.Create<LessOperator>(e1, e2);
case kGtTokenId:
case CypherParser::GT:
return storage_.Create<GreaterOperator>(e1, e2);
case kLeTokenId:
case CypherParser::LTE:
return storage_.Create<LessEqualOperator>(e1, e2);
case kGeTokenId:
case CypherParser::GTE:
return storage_.Create<GreaterEqualOperator>(e1, e2);
default:
throw utils::NotYetImplemented("binary operator");
@ -65,9 +64,9 @@ class CypherMainVisitor : public antlropencypher::CypherBaseVisitor {
switch (token) {
case CypherParser::NOT:
return storage_.Create<NotOperator>(e);
case kUnaryPlusTokenId:
case CypherParser::PLUS:
return storage_.Create<UnaryPlusOperator>(e);
case kUnaryMinusTokenId:
case CypherParser::MINUS:
return storage_.Create<UnaryMinusOperator>(e);
default:
throw utils::NotYetImplemented("unary operator");

View File

@ -1,27 +0,0 @@
#pragma once
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
using antlropencypher::CypherParser;
// List of unnamed tokens visitor needs to use. This should be reviewed on every
// grammar change since even changes in ordering of rules will cause antlr to
// generate different constants for unnamed tokens.
const auto kReturnAllTokenId = CypherParser::T__4; // *
const auto kDotsTokenId = CypherParser::T__10; // ..
const auto kEqTokenId = CypherParser::T__2; // =
const auto kNeTokenId1 = CypherParser::T__18; // <>
const auto kNeTokenId2 = CypherParser::T__19; // !=
const auto kLtTokenId = CypherParser::T__20; // <
const auto kGtTokenId = CypherParser::T__21; // >
const auto kLeTokenId = CypherParser::T__22; // <=
const auto kGeTokenId = CypherParser::T__23; // >=
const auto kPlusTokenId = CypherParser::T__12; // +
const auto kMinusTokenId = CypherParser::T__13; // -
const auto kMultTokenId = CypherParser::T__4; // *
const auto kDivTokenId = CypherParser::T__14; // /
const auto kModTokenId = CypherParser::T__15; // %
const auto kUnaryPlusTokenId = CypherParser::T__12; // +
const auto kUnaryMinusTokenId = CypherParser::T__13; // -
const auto kPropertyAssignmentTokenId = CypherParser::T__2; // =
const auto kPropertyUpdateTokenId = CypherParser::T__3; // +=

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,198 @@
lexer grammar CypherLexer ;
import UnicodeCategories ;
/* Skip whitespace and comments. */
Skipped : ( Whitespace | Comment ) -> skip ;
fragment Whitespace : '\u0020'
| [\u0009-\u000D]
| [\u001C-\u001F]
| '\u1680' | '\u180E'
| [\u2000-\u200A]
| '\u2028' | '\u2029'
| '\u205F'
| '\u3000'
| '\u00A0'
| '\u202F'
;
fragment Comment : '/*' .*? '*/'
| '//' ~[\r\n]*
;
/* Special symbols. */
LPAREN : '(' ;
RPAREN : ')' ;
LBRACK : '[' ;
RBRACK : ']' ;
LBRACE : '{' ;
RBRACE : '}' ;
COMMA : ',' ;
DOT : '.' ;
DOTS : '..' ;
COLON : ':' ;
DOLLAR : '$' ;
PIPE : '|' ;
EQ : '=' ;
LT : '<' ;
GT : '>' ;
LTE : '<=' ;
GTE : '>=' ;
NEQ1 : '<>' ;
NEQ2 : '!=' ;
SIM : '=~' ;
PLUS : '+' ;
MINUS : '-' ;
ASTERISK : '*' ;
SLASH : '/' ;
PERCENT : '%' ;
CARET : '^' ;
PLUS_EQ : '+=' ;
/* Some random unicode characters that can be used to draw arrows. */
LeftArrowHeadPart : '⟨' | '〈' | '﹤' | '' ;
RightArrowHeadPart : '⟩' | '〉' | '﹥' | '' ;
DashPart : '­' | '' | '' | '' | '' | '—' | '―'
| '' | '' | '﹣' | ''
;
/* Cypher reserved words. */
ALL : A L L ;
ALTER : A L T E R ;
AND : A N D ;
ANY : A N Y ;
AS : A S ;
ASC : A S C ;
ASCENDING : A S C E N D I N G ;
BATCHES : B A T C H E S ;
BATCH_INTERVAL : B A T C H '_' I N T E R V A L ;
BATCH_SIZE : B A T C H '_' S I Z E ;
BFS : B F S ;
BY : B Y ;
CASE : C A S E ;
CONTAINS : C O N T A I N S ;
COUNT : C O U N T ;
CREATE : C R E A T E ;
CYPHERNULL : N U L L ;
DATA : D A T A ;
DELETE : D E L E T E ;
DESC : D E S C ;
DESCENDING : D E S C E N D I N G ;
DETACH : D E T A C H ;
DISTINCT : D I S T I N C T ;
DROP : D R O P ;
ELSE : E L S E ;
END : E N D ;
ENDS : E N D S ;
EXTRACT : E X T R A C T ;
FALSE : F A L S E ;
FILTER : F I L T E R ;
IN : I N ;
INDEX : I N D E X ;
IS : I S ;
KAFKA : K A F K A ;
K_TEST : T E S T ;
LIMIT : L I M I T ;
LOAD : L O A D ;
L_SKIP : S K I P ;
MATCH : M A T C H ;
MERGE : M E R G E ;
NONE : N O N E ;
NOT : N O T ;
ON : O N ;
OPTIONAL : O P T I O N A L ;
OR : O R ;
ORDER : O R D E R ;
PASSWORD : P A S S W O R D ;
REDUCE : R E D U C E ;
REMOVE : R E M O V E ;
RETURN : R E T U R N ;
SET : S E T ;
SHOW : S H O W ;
SINGLE : S I N G L E ;
START : S T A R T ;
STARTS : S T A R T S ;
STOP : S T O P ;
STREAM : S T R E A M ;
STREAMS : S T R E A M S ;
THEN : T H E N ;
TOPIC : T O P I C ;
TRANSFORM : T R A N S F O R M ;
TRUE : T R U E ;
UNION : U N I O N ;
UNWIND : U N W I N D ;
USER : U S E R ;
WHEN : W H E N ;
WHERE : W H E R E ;
WITH : W I T H ;
WSHORTEST : W S H O R T E S T ;
XOR : X O R ;
/* Double and single quoted string literals. */
StringLiteral : '"' ( ~[\\"] | EscapeSequence )* '"'
| '\'' ( ~[\\'] | EscapeSequence )* '\''
;
fragment EscapeSequence : '\\' ( B | F | N | R | T | '\\' | '\'' | '"' )
| '\\u' HexDigit HexDigit HexDigit HexDigit
| '\\U' HexDigit HexDigit HexDigit HexDigit
HexDigit HexDigit HexDigit HexDigit
;
/* Number literals. */
DecimalLiteral : '0' | NonZeroDigit ( DecDigit )* ;
OctalLiteral : '0' ( OctDigit )+ ;
HexadecimalLiteral : '0x' ( HexDigit )+ ;
FloatingLiteral : DecDigit* '.' DecDigit+ ( E '-'? DecDigit+ )?
| DecDigit+ ( '.' DecDigit* )? ( E '-'? DecDigit+ )
| DecDigit+ ( E '-'? DecDigit+ )
;
fragment NonZeroDigit : [1-9] ;
fragment DecDigit : [0-9] ;
fragment OctDigit : [0-7] ;
fragment HexDigit : [0-9] | [a-f] | [A-F] ;
/* Symbolic names. */
UnescapedSymbolicName : IdentifierStart ( IdentifierPart )* ;
EscapedSymbolicName : ( '`' ~[`]* '`' )+ ;
/**
* Based on the unicode identifier and pattern syntax
* (http://www.unicode.org/reports/tr31/)
* and extended with a few characters.
*/
IdentifierStart : ID_Start | Pc ;
IdentifierPart : ID_Continue | Sc ;
/* Hack for case-insensitive reserved words */
fragment A : 'A' | 'a' ;
fragment B : 'B' | 'b' ;
fragment C : 'C' | 'c' ;
fragment D : 'D' | 'd' ;
fragment E : 'E' | 'e' ;
fragment F : 'F' | 'f' ;
fragment G : 'G' | 'g' ;
fragment H : 'H' | 'h' ;
fragment I : 'I' | 'i' ;
fragment J : 'J' | 'j' ;
fragment K : 'K' | 'k' ;
fragment L : 'L' | 'l' ;
fragment M : 'M' | 'm' ;
fragment N : 'N' | 'n' ;
fragment O : 'O' | 'o' ;
fragment P : 'P' | 'p' ;
fragment Q : 'Q' | 'q' ;
fragment R : 'R' | 'r' ;
fragment S : 'S' | 's' ;
fragment T : 'T' | 't' ;
fragment U : 'U' | 'u' ;
fragment V : 'V' | 'v' ;
fragment W : 'W' | 'w' ;
fragment X : 'X' | 'x' ;
fragment Y : 'Y' | 'y' ;
fragment Z : 'Z' | 'z' ;

View File

@ -0,0 +1,392 @@
/*
* Copyright (c) 2015-2016 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* When changing this grammar make sure to update constants in
* src/query/frontend/stripped_lexer_constants.hpp (kKeywords, kSpecialTokens
* and bitsets) if needed.
*/
parser grammar CypherParser ;
options { tokenVocab=CypherLexer; }
cypher : statement ;
statement : query ;
query : regularQuery ;
regularQuery : singleQuery ( cypherUnion )* ;
singleQuery : clause ( clause )* ;
cypherUnion : ( UNION ALL singleQuery )
| ( UNION singleQuery )
;
clause : cypherMatch
| unwind
| merge
| create
| set
| cypherDelete
| remove
| with
| cypherReturn
| createIndex
| modifyUser
| dropUser
| createStream
| dropStream
| showStreams
| startStopStream
| startStopAllStreams
| testStream
;
cypherMatch : OPTIONAL? MATCH pattern where? ;
unwind : UNWIND expression AS variable ;
merge : MERGE patternPart ( mergeAction )* ;
mergeAction : ( ON MATCH set )
| ( ON CREATE set )
;
create : CREATE pattern ;
set : SET setItem ( ',' setItem )* ;
setItem : ( propertyExpression '=' expression )
| ( variable '=' expression )
| ( variable '+=' expression )
| ( variable nodeLabels )
;
cypherDelete : DETACH? DELETE expression ( ',' expression )* ;
remove : REMOVE removeItem ( ',' removeItem )* ;
removeItem : ( variable nodeLabels )
| propertyExpression
;
with : WITH ( DISTINCT )? returnBody ( where )? ;
cypherReturn : RETURN ( DISTINCT )? returnBody ;
returnBody : returnItems ( order )? ( skip )? ( limit )? ;
returnItems : ( '*' ( ',' returnItem )* )
| ( returnItem ( ',' returnItem )* )
;
returnItem : ( expression AS variable )
| expression
;
order : ORDER BY sortItem ( ',' sortItem )* ;
skip : L_SKIP expression ;
limit : LIMIT expression ;
sortItem : expression ( ASCENDING | ASC | DESCENDING | DESC )? ;
where : WHERE expression ;
pattern : patternPart ( ',' patternPart )* ;
patternPart : ( variable '=' anonymousPatternPart )
| anonymousPatternPart
;
anonymousPatternPart : patternElement ;
patternElement : ( nodePattern ( patternElementChain )* )
| ( '(' patternElement ')' )
;
nodePattern : '(' ( variable )? ( nodeLabels )? ( properties )? ')' ;
patternElementChain : relationshipPattern nodePattern ;
relationshipPattern : ( leftArrowHead dash ( relationshipDetail )? dash rightArrowHead )
| ( leftArrowHead dash ( relationshipDetail )? dash )
| ( dash ( relationshipDetail )? dash rightArrowHead )
| ( dash ( relationshipDetail )? dash )
;
leftArrowHead : '<' | LeftArrowHeadPart ;
rightArrowHead : '>' | RightArrowHeadPart ;
dash : '-' | DashPart ;
relationshipDetail : '[' ( name=variable )? ( relationshipTypes )? ( variableExpansion )? properties ']'
| '[' ( name=variable )? ( relationshipTypes )? ( variableExpansion )? relationshipLambda ( total_weight=variable )? (relationshipLambda )? ']'
| '[' ( name=variable )? ( relationshipTypes )? ( variableExpansion )? (properties )* ( relationshipLambda total_weight=variable )? (relationshipLambda )? ']';
relationshipLambda: '(' traversed_edge=variable ',' traversed_node=variable '|' expression ')';
variableExpansion : '*' (BFS | WSHORTEST)? ( expression )? ( '..' ( expression )? )? ;
properties : mapLiteral
| parameter
;
relationshipTypes : ':' relTypeName ( '|' ':'? relTypeName )* ;
nodeLabels : nodeLabel ( nodeLabel )* ;
nodeLabel : ':' labelName ;
labelName : symbolicName ;
relTypeName : symbolicName ;
expression : expression12 ;
expression12 : expression11 ( OR expression11 )* ;
expression11 : expression10 ( XOR expression10 )* ;
expression10 : expression9 ( AND expression9 )* ;
expression9 : ( NOT )* expression8 ;
expression8 : expression7 ( partialComparisonExpression )* ;
expression7 : expression6 ( ( '+' expression6 ) | ( '-' expression6 ) )* ;
expression6 : expression5 ( ( '*' expression5 ) | ( '/' expression5 ) | ( '%' expression5 ) )* ;
expression5 : expression4 ( '^' expression4 )* ;
expression4 : ( ( '+' | '-' ) )* expression3a ;
expression3a : expression3b ( stringAndNullOperators )* ;
stringAndNullOperators : ( ( ( ( '=~' ) | ( IN ) | ( STARTS WITH ) | ( ENDS WITH ) | ( CONTAINS ) ) expression3b) | ( IS CYPHERNULL ) | ( IS NOT CYPHERNULL ) ) ;
expression3b : expression2a ( listIndexingOrSlicing )* ;
listIndexingOrSlicing : ( '[' expression ']' )
| ( '[' lower_bound=expression? '..' upper_bound=expression? ']' )
;
expression2a : expression2b ( nodeLabels )? ;
expression2b : atom ( propertyLookup )* ;
atom : literal
| parameter
| caseExpression
| ( COUNT '(' '*' ')' )
| listComprehension
| patternComprehension
| ( FILTER '(' filterExpression ')' )
| ( EXTRACT '(' extractExpression ')' )
| ( REDUCE '(' reduceExpression ')' )
| ( ALL '(' filterExpression ')' )
| ( ANY '(' filterExpression ')' )
| ( NONE '(' filterExpression ')' )
| ( SINGLE '(' filterExpression ')' )
| relationshipsPattern
| parenthesizedExpression
| functionInvocation
| variable
;
literal : numberLiteral
| StringLiteral
| booleanLiteral
| CYPHERNULL
| mapLiteral
| listLiteral
;
booleanLiteral : TRUE
| FALSE
;
listLiteral : '[' ( expression ( ',' expression )* )? ']' ;
partialComparisonExpression : ( '=' expression7 )
| ( '<>' expression7 )
| ( '!=' expression7 )
| ( '<' expression7 )
| ( '>' expression7 )
| ( '<=' expression7 )
| ( '>=' expression7 )
;
parenthesizedExpression : '(' expression ')' ;
relationshipsPattern : nodePattern ( patternElementChain )+ ;
filterExpression : idInColl ( where )? ;
reduceExpression : accumulator=variable '=' initial=expression ',' idInColl '|' expression ;
extractExpression : idInColl '|' expression ;
idInColl : variable IN expression ;
functionInvocation : functionName '(' ( DISTINCT )? ( expression ( ',' expression )* )? ')' ;
functionName : UnescapedSymbolicName
| EscapedSymbolicName
| COUNT ;
listComprehension : '[' filterExpression ( '|' expression )? ']' ;
patternComprehension : '[' ( variable '=' )? relationshipsPattern ( WHERE expression )? '|' expression ']' ;
propertyLookup : '.' ( propertyKeyName ) ;
caseExpression : ( ( CASE ( caseAlternatives )+ ) | ( CASE test=expression ( caseAlternatives )+ ) ) ( ELSE else_expression=expression )? END ;
caseAlternatives : WHEN when_expression=expression THEN then_expression=expression ;
variable : symbolicName ;
numberLiteral : doubleLiteral
| integerLiteral
;
mapLiteral : '{' ( propertyKeyName ':' expression ( ',' propertyKeyName ':' expression )* )? '}' ;
parameter : '$' ( symbolicName | DecimalLiteral ) ;
propertyExpression : atom ( propertyLookup )+ ;
propertyKeyName : symbolicName ;
integerLiteral : DecimalLiteral
| OctalLiteral
| HexadecimalLiteral
;
createIndex : CREATE INDEX ON ':' labelName '(' propertyKeyName ')' ;
userName : UnescapedSymbolicName ;
createUser : CREATE USER ;
alterUser : ALTER USER ;
modifyUser : ( createUser | alterUser ) userName ( WITH ( modifyUserOption )+ )? ;
modifyUserOption : passwordOption ;
passwordOption : PASSWORD literal;
dropUser : DROP USER userName ( ',' userName )* ;
streamName : UnescapedSymbolicName ;
createStream : CREATE STREAM streamName AS LOAD DATA KAFKA
streamUri=literal WITH TOPIC streamTopic=literal WITH TRANSFORM
transformUri=literal ( batchIntervalOption )? (batchSizeOption )? ;
batchIntervalOption : BATCH_INTERVAL literal ;
batchSizeOption : BATCH_SIZE literal ;
dropStream : DROP STREAM streamName ;
showStreams : SHOW STREAMS ;
startStopStream : ( START | STOP ) STREAM streamName ( limitBatchesOption )? ;
limitBatchesOption : LIMIT limitBatches=literal BATCHES ;
startStopAllStreams : ( START | STOP ) ALL STREAMS ;
testStream : K_TEST STREAM streamName ( limitBatchesOption )? ;
doubleLiteral : FloatingLiteral ;
symbolicName : UnescapedSymbolicName
| EscapedSymbolicName
| UNION
| ALL
| REDUCE
| OPTIONAL
| MATCH
| UNWIND
| AS
| MERGE
| ON
| CREATE
| SET
| DETACH
| DELETE
| REMOVE
| WITH
| DISTINCT
| RETURN
| ORDER
| BY
| L_SKIP
| LIMIT
| ASCENDING
| ASC
| DESCENDING
| DESC
| WHERE
| OR
| XOR
| AND
| NOT
| IN
| STARTS
| ENDS
| CONTAINS
| IS
| CYPHERNULL
| CASE
| WHEN
| THEN
| ELSE
| END
| COUNT
| FILTER
| EXTRACT
| ANY
| NONE
| SINGLE
| TRUE
| FALSE
| USER
| PASSWORD
| ALTER
| DROP
| STREAM
| STREAMS
| LOAD
| DATA
| KAFKA
| TRANSFORM
| BATCH_SIZE
| BATCH_INTERVAL
| SHOW
| START
| STOP
;

File diff suppressed because one or more lines are too long

View File

@ -10,9 +10,9 @@
#include "query/common.hpp"
#include "query/exceptions.hpp"
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
#include "query/frontend/opencypher/generated/CypherLexer.h"
#include "query/frontend/opencypher/generated/CypherParser.h"
#include "query/frontend/opencypher/generated/CypherParserBaseVisitor.h"
#include "query/frontend/stripped_lexer_constants.hpp"
#include "utils/hashing/fnv.hpp"
#include "utils/string.hpp"
@ -82,12 +82,9 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) {
// named expressions in return.
for (int i = 0; i < static_cast<int>(tokens.size()); ++i) {
auto &token = tokens[i];
// Position is calculated in query after stripping and whitespace
// normalisation, not before. There will be twice as much tokens before
// this one because space tokens will be inserted between every one we also
// need to shift token index for every parameter since antlr's parser thinks
// of parameter as two tokens.
int token_index = token_strings.size() * 2 + parameters_.size();
// We need to shift token index for every parameter since antlr's parser
// thinks of parameter as two tokens.
int token_index = token_strings.size() + parameters_.size();
switch (token.first) {
case Token::UNMATCHED:
LOG(FATAL) << "Shouldn't happen";

View File

@ -14,9 +14,11 @@ int main(int, const char **a) {
CypherLexer lexer(&input);
CommonTokenStream tokens(&lexer);
const auto &vocabulary = lexer.getVocabulary();
tokens.fill();
for (auto token : tokens.getTokens()) {
std::cout << "TYPE: " << token->getType() << "; TEXT: " << token->getText()
std::cout << "TYPE: " << vocabulary.getDisplayName(token->getType())
<< "; TEXT: " << token->getText()
<< "; STRING: " << token->toString() << std::endl;
}

View File

@ -394,7 +394,7 @@ TYPED_TEST(CypherMainVisitorTest, IntegerLiteral) {
auto *single_query = query->single_query_;
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_, 42, 2);
return_clause->body_.named_expressions[0]->expression_, 42, 1);
}
TYPED_TEST(CypherMainVisitorTest, IntegerLiteralTooLarge) {
@ -409,7 +409,7 @@ TYPED_TEST(CypherMainVisitorTest, BooleanLiteralTrue) {
auto *single_query = query->single_query_;
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_, true, 2);
return_clause->body_.named_expressions[0]->expression_, true, 1);
}
TYPED_TEST(CypherMainVisitorTest, BooleanLiteralFalse) {
@ -420,7 +420,7 @@ TYPED_TEST(CypherMainVisitorTest, BooleanLiteralFalse) {
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_, false,
2);
1);
}
TYPED_TEST(CypherMainVisitorTest, NullLiteral) {
@ -431,7 +431,7 @@ TYPED_TEST(CypherMainVisitorTest, NullLiteral) {
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_,
TypedValue::Null, 2);
TypedValue::Null, 1);
}
TYPED_TEST(CypherMainVisitorTest, ParenthesizedExpression) {
@ -815,7 +815,7 @@ TYPED_TEST(CypherMainVisitorTest, StringLiteralDoubleQuotes) {
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_, "mi'rko",
2);
1);
}
TYPED_TEST(CypherMainVisitorTest, StringLiteralSingleQuotes) {
@ -826,7 +826,7 @@ TYPED_TEST(CypherMainVisitorTest, StringLiteralSingleQuotes) {
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_,
"mi\"rko", 2);
"mi\"rko", 1);
}
TYPED_TEST(CypherMainVisitorTest, StringLiteralEscapedChars) {
@ -837,7 +837,7 @@ TYPED_TEST(CypherMainVisitorTest, StringLiteralEscapedChars) {
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_,
"\\'\"\b\b\f\f\n\n\r\r\t\t", 2);
"\\'\"\b\b\f\f\n\n\r\r\t\t", 1);
}
TYPED_TEST(CypherMainVisitorTest, StringLiteralEscapedUtf16) {
@ -848,7 +848,7 @@ TYPED_TEST(CypherMainVisitorTest, StringLiteralEscapedUtf16) {
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_,
u8"\u221daaa\u221daaa", 2);
u8"\u221daaa\u221daaa", 1);
}
TYPED_TEST(CypherMainVisitorTest, StringLiteralEscapedUtf16Error) {
@ -863,7 +863,7 @@ TYPED_TEST(CypherMainVisitorTest, StringLiteralEscapedUtf32) {
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_,
u8"\U0001F600aaaa\U0001F600aaaaaaaa", 2);
u8"\U0001F600aaaa\U0001F600aaaaaaaa", 1);
}
TYPED_TEST(CypherMainVisitorTest, DoubleLiteral) {
@ -873,7 +873,7 @@ TYPED_TEST(CypherMainVisitorTest, DoubleLiteral) {
auto *single_query = query->single_query_;
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_, 3.5, 2);
return_clause->body_.named_expressions[0]->expression_, 3.5, 1);
}
TYPED_TEST(CypherMainVisitorTest, DoubleLiteralExponent) {
@ -883,7 +883,7 @@ TYPED_TEST(CypherMainVisitorTest, DoubleLiteralExponent) {
auto *single_query = query->single_query_;
auto *return_clause = dynamic_cast<Return *>(single_query->clauses_[0]);
CheckLiteral(ast_generator.context_,
return_clause->body_.named_expressions[0]->expression_, 0.5, 2);
return_clause->body_.named_expressions[0]->expression_, 0.5, 1);
}
TYPED_TEST(CypherMainVisitorTest, ListLiteral) {

View File

@ -33,18 +33,18 @@ TEST(QueryStripper, NoLiterals) {
TEST(QueryStripper, ZeroInteger) {
StrippedQuery stripped("RETURN 0");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_EQ(stripped.literals().At(0).first, 2);
EXPECT_EQ(stripped.literals().At(0).first, 1);
EXPECT_EQ(stripped.literals().At(0).second.Value<int64_t>(), 0);
EXPECT_EQ(stripped.literals().AtTokenPosition(2).Value<int64_t>(), 0);
EXPECT_EQ(stripped.literals().AtTokenPosition(1).Value<int64_t>(), 0);
EXPECT_EQ(stripped.query(), "RETURN " + kStrippedIntToken);
}
TEST(QueryStripper, DecimalInteger) {
StrippedQuery stripped("RETURN 42");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_EQ(stripped.literals().At(0).first, 2);
EXPECT_EQ(stripped.literals().At(0).first, 1);
EXPECT_EQ(stripped.literals().At(0).second.Value<int64_t>(), 42);
EXPECT_EQ(stripped.literals().AtTokenPosition(2).Value<int64_t>(), 42);
EXPECT_EQ(stripped.literals().AtTokenPosition(1).Value<int64_t>(), 42);
EXPECT_EQ(stripped.query(), "RETURN " + kStrippedIntToken);
}
@ -264,7 +264,7 @@ TEST(QueryStripper, OtherTokens) {
TEST(QueryStripper, NamedExpression) {
StrippedQuery stripped("RETURN 2 + 3");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "2 + 3")));
UnorderedElementsAre(Pair(1, "2 + 3")));
}
TEST(QueryStripper, AliasedNamedExpression) {
@ -276,32 +276,32 @@ TEST(QueryStripper, MultipleNamedExpressions) {
StrippedQuery stripped("RETURN 2 + 3, x as s, x, n.x");
EXPECT_THAT(
stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "2 + 3"), Pair(18, "x"), Pair(22, "n.x")));
UnorderedElementsAre(Pair(1, "2 + 3"), Pair(9, "x"), Pair(11, "n.x")));
}
TEST(QueryStripper, ReturnOrderBy) {
StrippedQuery stripped("RETURN 2 + 3 ORDER BY n.x, x");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "2 + 3")));
UnorderedElementsAre(Pair(1, "2 + 3")));
}
TEST(QueryStripper, ReturnSkip) {
StrippedQuery stripped("RETURN 2 + 3 SKIP 10");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "2 + 3")));
UnorderedElementsAre(Pair(1, "2 + 3")));
}
TEST(QueryStripper, ReturnLimit) {
StrippedQuery stripped("RETURN 2 + 3 LIMIT 12");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "2 + 3")));
UnorderedElementsAre(Pair(1, "2 + 3")));
}
TEST(QueryStripper, ReturnListsAndFunctionCalls) {
StrippedQuery stripped("RETURN [1,2,[3, 4] , 5], f(1, 2), 3");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "[1,2,[3, 4] , 5]"),
Pair(30, "f(1, 2)"), Pair(44, "3")));
UnorderedElementsAre(Pair(1, "[1,2,[3, 4] , 5]"),
Pair(15, "f(1, 2)"), Pair(22, "3")));
}
TEST(QueryStripper, Parameters) {
@ -309,11 +309,11 @@ TEST(QueryStripper, Parameters) {
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "RETURN $123 , $pero , $`mirko ``slavko`");
EXPECT_THAT(stripped.parameters(),
UnorderedElementsAre(Pair(2, "123"), Pair(7, "pero"),
Pair(12, "mirko `slavko")));
UnorderedElementsAre(Pair(1, "123"), Pair(4, "pero"),
Pair(7, "mirko `slavko")));
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "$123"), Pair(7, "$pero"),
Pair(12, "$`mirko ``slavko`")));
UnorderedElementsAre(Pair(1, "$123"), Pair(4, "$pero"),
Pair(7, "$`mirko ``slavko`")));
}
TEST(QueryStripper, KeywordInNamedExpression) {
@ -321,7 +321,7 @@ TEST(QueryStripper, KeywordInNamedExpression) {
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "RETURN CoUnT ( n )");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "CoUnT(n)")));
UnorderedElementsAre(Pair(1, "CoUnT(n)")));
}
TEST(QueryStripper, UnionMultipleReturnStatementsAliasedExpression) {
@ -332,29 +332,29 @@ TEST(QueryStripper, UnionMultipleReturnStatementsAliasedExpression) {
TEST(QueryStripper, UnionMultipleReturnStatementsNamedExpressions) {
StrippedQuery stripped("RETURN x UNION RETURN x");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "x"), Pair(8, "x")));
UnorderedElementsAre(Pair(1, "x"), Pair(4, "x")));
}
TEST(QueryStripper, UnionAllMultipleReturnStatementsNamedExpressions) {
StrippedQuery stripped("RETURN x UNION ALL RETURN x");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "x"), Pair(10, "x")));
UnorderedElementsAre(Pair(1, "x"), Pair(5, "x")));
}
TEST(QueryStripper, QueryReturnMap) {
StrippedQuery stripped("RETURN {a: 1, b: 'foo'}");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "{a: 1, b: 'foo'}")));
UnorderedElementsAre(Pair(1, "{a: 1, b: 'foo'}")));
}
TEST(QueryStripper, QuerySemicolonEndingQuery1) {
StrippedQuery stripped("RETURN 1;");
EXPECT_THAT(stripped.named_expressions(), UnorderedElementsAre(Pair(2, "1")));
EXPECT_THAT(stripped.named_expressions(), UnorderedElementsAre(Pair(1, "1")));
}
TEST(QueryStripper, QuerySemicolonEndingQuery2) {
StrippedQuery stripped("RETURN 42 ;");
EXPECT_THAT(stripped.named_expressions(),
UnorderedElementsAre(Pair(2, "42")));
UnorderedElementsAre(Pair(1, "42")));
}
} // namespace