Implementation of new lexer
Summary: WORK IN PROGRESS Reviewers: buda, teon.banek, dgleich Reviewed By: dgleich Subscribers: dgleich, pullbot Differential Revision: https://phabricator.memgraph.io/D496
This commit is contained in:
parent
e86d73eb98
commit
3a922de963
@ -12,6 +12,12 @@ class QueryException : public utils::BasicException {
|
||||
using utils::BasicException::BasicException;
|
||||
};
|
||||
|
||||
class LexingException : public QueryException {
|
||||
public:
|
||||
using QueryException::QueryException;
|
||||
LexingException() : QueryException("") {}
|
||||
};
|
||||
|
||||
class SyntaxException : public QueryException {
|
||||
public:
|
||||
using QueryException::QueryException;
|
||||
|
@ -1408,10 +1408,12 @@ class CachedAst {
|
||||
bool Visit(PrimitiveLiteral &literal) override {
|
||||
// TODO: If literal is a part of NamedExpression then we need to change
|
||||
// text in NamedExpression, otherwise wrong header will be returned.
|
||||
permanent_assert(
|
||||
literal.token_position_ != -1,
|
||||
"Use AstPlugLiteralsVisitor only on ast created by parsing queries");
|
||||
literal.value_ = parameters_.AtTokenPosition(literal.token_position_);
|
||||
if (!literal.value_.IsNull()) {
|
||||
permanent_assert(literal.token_position_ != -1,
|
||||
"Use AstPlugLiteralsVisitor only on ast created by "
|
||||
"parsing queries");
|
||||
literal.value_ = parameters_.AtTokenPosition(literal.token_position_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,7 @@ returnItem : ( expression SP AS SP variable )
|
||||
| expression
|
||||
;
|
||||
|
||||
order : ORDER SP BY SP sortItem ( ',' SP? sortItem )* ;
|
||||
order : ORDER SP BY SP sortItem ( SP? ',' SP? sortItem )* ;
|
||||
|
||||
skip : L_SKIP SP expression ;
|
||||
|
||||
@ -102,7 +102,7 @@ patternPart : ( variable SP? '=' SP? anonymousPatternPart )
|
||||
anonymousPatternPart : patternElement ;
|
||||
|
||||
patternElement : ( nodePattern ( SP? patternElementChain )* )
|
||||
| ( '(' patternElement ')' )
|
||||
| ( '(' SP? patternElement SP? ')' )
|
||||
;
|
||||
|
||||
nodePattern : '(' SP? ( variable SP? )? ( nodeLabels SP? )? ( properties SP? )? ')' ;
|
||||
|
@ -1,35 +1,64 @@
|
||||
#include "query/frontend/stripped.hpp"
|
||||
|
||||
#include <cctype>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "antlr4-runtime.h"
|
||||
#include "query/common.hpp"
|
||||
#include "query/exceptions.hpp"
|
||||
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
|
||||
#include "query/frontend/opencypher/generated/CypherLexer.h"
|
||||
#include "query/frontend/opencypher/generated/CypherParser.h"
|
||||
#include "query/frontend/stripped_lexer_constants.hpp"
|
||||
#include "utils/assert.hpp"
|
||||
#include "utils/hashing/fnv.hpp"
|
||||
#include "utils/string.hpp"
|
||||
|
||||
using namespace antlropencypher;
|
||||
using namespace antlr4;
|
||||
|
||||
namespace query {
|
||||
|
||||
StrippedQuery::StrippedQuery(const std::string &query) {
|
||||
// Tokenize the query.
|
||||
ANTLRInputStream input(query);
|
||||
CypherLexer lexer(&input);
|
||||
CommonTokenStream token_stream(&lexer);
|
||||
token_stream.fill();
|
||||
auto tokens = token_stream.getTokens();
|
||||
using namespace lexer_constants;
|
||||
|
||||
StrippedQuery::StrippedQuery(const std::string &query) : original_(query) {
|
||||
enum class Token {
|
||||
UNMATCHED,
|
||||
KEYWORD, // Including true, false and null.
|
||||
SPECIAL, // +, .., +=, (, { and so on.
|
||||
STRING,
|
||||
INT, // Decimal, octal and hexadecimal.
|
||||
REAL,
|
||||
ESCAPED_NAME,
|
||||
UNESCAPED_NAME,
|
||||
SPACE
|
||||
};
|
||||
|
||||
std::vector<std::pair<Token, std::string>> tokens;
|
||||
for (int i = 0; i < static_cast<int>(original_.size());) {
|
||||
Token token = Token::UNMATCHED;
|
||||
int len = 0;
|
||||
auto update = [&](int new_len, Token new_token) {
|
||||
if (new_len > len) {
|
||||
len = new_len;
|
||||
token = new_token;
|
||||
}
|
||||
};
|
||||
update(MatchKeyword(i), Token::KEYWORD);
|
||||
update(MatchSpecial(i), Token::SPECIAL);
|
||||
update(MatchString(i), Token::STRING);
|
||||
update(MatchDecimalInt(i), Token::INT);
|
||||
update(MatchOctalInt(i), Token::INT);
|
||||
update(MatchHexadecimalInt(i), Token::INT);
|
||||
update(MatchReal(i), Token::REAL);
|
||||
update(MatchEscapedName(i), Token::ESCAPED_NAME);
|
||||
update(MatchUnescapedName(i), Token::UNESCAPED_NAME);
|
||||
update(MatchWhitespaceAndComments(i), Token::SPACE);
|
||||
if (token == Token::UNMATCHED) throw LexingException("Invalid query");
|
||||
tokens.emplace_back(token, original_.substr(i, len));
|
||||
i += len;
|
||||
}
|
||||
|
||||
// Initialize data structures we return.
|
||||
std::vector<std::string> token_strings;
|
||||
token_strings.reserve(tokens.size());
|
||||
|
||||
// A helper function that stores literal and its token position in a
|
||||
// literals_. In stripped query text literal is replaced with a new_value.
|
||||
// new_value can be any value that is lexed as a literal.
|
||||
@ -40,88 +69,44 @@ StrippedQuery::StrippedQuery(const std::string &query) {
|
||||
};
|
||||
|
||||
// Convert tokens to strings, perform lowercasing and filtering.
|
||||
for (const auto *token : tokens) {
|
||||
for (const auto &token : tokens) {
|
||||
// Position is calculated in query after stripping and whitespace
|
||||
// normalisation, not before.
|
||||
int position = token_strings.size() * 2;
|
||||
|
||||
switch (token->getType()) {
|
||||
case CypherLexer::UNION:
|
||||
case CypherLexer::ALL:
|
||||
case CypherLexer::OPTIONAL:
|
||||
case CypherLexer::MATCH:
|
||||
case CypherLexer::UNWIND:
|
||||
case CypherLexer::AS:
|
||||
case CypherLexer::MERGE:
|
||||
case CypherLexer::ON:
|
||||
case CypherLexer::CREATE:
|
||||
case CypherLexer::SET:
|
||||
case CypherLexer::DETACH:
|
||||
case CypherLexer::DELETE:
|
||||
case CypherLexer::REMOVE:
|
||||
case CypherLexer::WITH:
|
||||
case CypherLexer::DISTINCT:
|
||||
case CypherLexer::RETURN:
|
||||
case CypherLexer::ORDER:
|
||||
case CypherLexer::BY:
|
||||
case CypherLexer::L_SKIP:
|
||||
case CypherLexer::LIMIT:
|
||||
case CypherLexer::ASCENDING:
|
||||
case CypherLexer::ASC:
|
||||
case CypherLexer::DESCENDING:
|
||||
case CypherLexer::DESC:
|
||||
case CypherLexer::WHERE:
|
||||
case CypherLexer::OR:
|
||||
case CypherLexer::XOR:
|
||||
case CypherLexer::AND:
|
||||
case CypherLexer::NOT:
|
||||
case CypherLexer::IN:
|
||||
case CypherLexer::STARTS:
|
||||
case CypherLexer::ENDS:
|
||||
case CypherLexer::CONTAINS:
|
||||
case CypherLexer::IS:
|
||||
// We don't strip NULL, since it can appear in special expressions like IS
|
||||
// NULL and IS NOT NULL.
|
||||
case CypherLexer::CYPHERNULL:
|
||||
case CypherLexer::COUNT:
|
||||
case CypherLexer::FILTER:
|
||||
case CypherLexer::EXTRACT:
|
||||
case CypherLexer::ANY:
|
||||
case CypherLexer::NONE:
|
||||
case CypherLexer::SINGLE:
|
||||
token_strings.push_back(utils::ToLowerCase(token->getText()));
|
||||
// normalisation, not before. There will be twice as much tokens before this
|
||||
// one because space tokens will be inserted between every one.
|
||||
int token_index = token_strings.size() * 2;
|
||||
switch (token.first) {
|
||||
case Token::UNMATCHED:
|
||||
debug_assert(false, "Shouldn't happen");
|
||||
case Token::KEYWORD: {
|
||||
auto s = utils::ToLowerCase(token.second);
|
||||
// We don't strip NULL, since it can appear in special expressions like
|
||||
// IS NULL and IS NOT NULL, but we strip true and false keywords.
|
||||
if (s == "true") {
|
||||
replace_stripped(token_index, true, kStrippedBooleanToken);
|
||||
} else if (s == "false") {
|
||||
replace_stripped(token_index, false, kStrippedBooleanToken);
|
||||
} else {
|
||||
token_strings.push_back(s);
|
||||
}
|
||||
} break;
|
||||
case Token::SPACE:
|
||||
break;
|
||||
|
||||
case CypherLexer::SP:
|
||||
case Token::EOF:
|
||||
break;
|
||||
|
||||
case CypherLexer::DecimalInteger:
|
||||
case CypherLexer::HexInteger:
|
||||
case CypherLexer::OctalInteger:
|
||||
replace_stripped(position, ParseIntegerLiteral(token->getText()),
|
||||
kStrippedIntToken);
|
||||
break;
|
||||
|
||||
case CypherLexer::StringLiteral:
|
||||
replace_stripped(position, ParseStringLiteral(token->getText()),
|
||||
case Token::STRING:
|
||||
replace_stripped(token_index, ParseStringLiteral(token.second),
|
||||
kStrippedStringToken);
|
||||
break;
|
||||
|
||||
case CypherLexer::RegularDecimalReal:
|
||||
case CypherLexer::ExponentDecimalReal:
|
||||
replace_stripped(position, ParseDoubleLiteral(token->getText()),
|
||||
case Token::INT:
|
||||
replace_stripped(token_index, ParseIntegerLiteral(token.second),
|
||||
kStrippedIntToken);
|
||||
break;
|
||||
case Token::REAL:
|
||||
replace_stripped(token_index, ParseDoubleLiteral(token.second),
|
||||
kStrippedDoubleToken);
|
||||
break;
|
||||
case CypherLexer::TRUE:
|
||||
replace_stripped(position, true, kStrippedBooleanToken);
|
||||
break;
|
||||
case CypherLexer::FALSE:
|
||||
replace_stripped(position, false, kStrippedBooleanToken);
|
||||
break;
|
||||
|
||||
default:
|
||||
token_strings.push_back(token->getText());
|
||||
case Token::SPECIAL:
|
||||
case Token::ESCAPED_NAME:
|
||||
case Token::UNESCAPED_NAME:
|
||||
token_strings.push_back(token.second);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -129,4 +114,245 @@ StrippedQuery::StrippedQuery(const std::string &query) {
|
||||
query_ = utils::Join(token_strings, " ");
|
||||
hash_ = fnv(query_);
|
||||
}
|
||||
|
||||
std::string StrippedQuery::GetFirstUtf8Symbol(const char *_s) const {
|
||||
// According to
|
||||
// https://stackoverflow.com/questions/16260033/reinterpret-cast-between-char-and-stduint8-t-safe
|
||||
// this checks if casting from const char * to uint8_t is undefined behaviour.
|
||||
static_assert(
|
||||
std::is_same<std::uint8_t, unsigned char>::value,
|
||||
"This library requires std::uint8_t to be implemented as unsigned char.");
|
||||
const uint8_t *s = reinterpret_cast<const uint8_t *>(_s);
|
||||
if ((*s >> 7) == 0x00) return std::string(_s, _s + 1);
|
||||
if ((*s >> 5) == 0x06) {
|
||||
auto *s1 = s + 1;
|
||||
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
|
||||
return std::string(_s, _s + 2);
|
||||
}
|
||||
if ((*s >> 4) == 0x0e) {
|
||||
auto *s1 = s + 1;
|
||||
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
|
||||
auto *s2 = s + 2;
|
||||
if ((*s2 >> 6) != 0x02) throw LexingException("Invalid character");
|
||||
return std::string(_s, _s + 3);
|
||||
}
|
||||
if ((*s >> 3) == 0x1e) {
|
||||
auto *s1 = s + 1;
|
||||
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
|
||||
auto *s2 = s + 2;
|
||||
if ((*s2 >> 6) != 0x02) throw LexingException("Invalid character");
|
||||
auto *s3 = s + 3;
|
||||
if ((*s3 >> 6) != 0x02) throw LexingException("Invalid character");
|
||||
return std::string(_s, _s + 4);
|
||||
}
|
||||
throw LexingException("Invalid character");
|
||||
}
|
||||
|
||||
// From here until end of file there are functions that calculate matches for
|
||||
// every possible token. Functions are more or less compatible with Cypher.g4
|
||||
// grammar. Unfortunately, they contain a lof of special cases and shouldn't be
|
||||
// changed without good reasons.
|
||||
//
|
||||
// Here be dragons, do not touch!
|
||||
// ____ __
|
||||
// { --.\ | .)%%%)%%
|
||||
// '-._\\ | (\___ %)%%(%%(%%%
|
||||
// `\\|{/ ^ _)-%(%%%%)%%;%%%
|
||||
// .'^^^^^^^ /` %%)%%%%)%%%'
|
||||
// //\ ) , / '%%%%(%%'
|
||||
// , _.'/ `\<-- \<
|
||||
// `^^^` ^^ ^^
|
||||
int StrippedQuery::MatchKeyword(int start) const {
|
||||
int match = 0;
|
||||
for (const auto &s : kKeywords) {
|
||||
int len = s.size();
|
||||
if (len < match) continue;
|
||||
if (start + len > static_cast<int>(original_.size())) continue;
|
||||
int i = 0;
|
||||
while (i < len && s[i] == tolower(original_[start + i])) {
|
||||
++i;
|
||||
}
|
||||
if (i == len) {
|
||||
match = len;
|
||||
}
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchSpecial(int start) const {
|
||||
int match = 0;
|
||||
for (const auto &s : kSpecialTokens) {
|
||||
if (!original_.compare(start, s.size(), s)) {
|
||||
match = std::max(match, static_cast<int>(s.size()));
|
||||
}
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchString(int start) const {
|
||||
if (original_[start] != '"' && original_[start] != '\'') return 0;
|
||||
char start_char = original_[start];
|
||||
bool escaped = false;
|
||||
for (auto *p = original_.data() + start + 1; *p; ++p) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
} else if (!escaped) {
|
||||
if (*p == start_char) return p - (original_.data() + start) + 1;
|
||||
if (*p == '\\') {
|
||||
escaped = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchDecimalInt(int start) const {
|
||||
if (original_[start] == '0') return 1;
|
||||
int i = start;
|
||||
while (i < static_cast<int>(original_.size()) && '0' <= original_[i] &&
|
||||
original_[i] <= '9') {
|
||||
++i;
|
||||
}
|
||||
return i - start;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchOctalInt(int start) const {
|
||||
if (original_[start] != '0') return 0;
|
||||
int i = start + 1;
|
||||
while (i < static_cast<int>(original_.size()) && '0' <= original_[i] &&
|
||||
original_[i] <= '7') {
|
||||
++i;
|
||||
}
|
||||
if (i == start + 1) return 0;
|
||||
return i - start;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchHexadecimalInt(int start) const {
|
||||
if (original_[start] != '0') return 0;
|
||||
if (start + 1 >= static_cast<int>(original_.size())) return 0;
|
||||
if (original_[start + 1] != 'x') return 0;
|
||||
int i = start + 2;
|
||||
while (i < static_cast<int>(original_.size()) &&
|
||||
(('0' <= original_[i] && original_[i] <= '9') ||
|
||||
('a' <= original_[i] && original_[i] <= 'f') ||
|
||||
('A' <= original_[i] && original_[i] <= 'F'))) {
|
||||
++i;
|
||||
}
|
||||
if (i == start + 2) return 0;
|
||||
return i - start;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchReal(int start) const {
|
||||
enum class State { BEFORE_DOT, DOT, AFTER_DOT, E, E_MINUS, AFTER_E };
|
||||
State state = State::BEFORE_DOT;
|
||||
auto i = start;
|
||||
while (i < static_cast<int>(original_.size())) {
|
||||
if (original_[i] == '.') {
|
||||
if (state != State::BEFORE_DOT) break;
|
||||
state = State::DOT;
|
||||
} else if ('0' <= original_[i] && original_[i] <= '9') {
|
||||
if (state == State::DOT) {
|
||||
state = State::AFTER_DOT;
|
||||
} else if (state == State::E || state == State::E_MINUS) {
|
||||
state = State::AFTER_E;
|
||||
}
|
||||
} else if (original_[i] == 'e' || original_[i] == 'E') {
|
||||
if (state != State::BEFORE_DOT && state != State::AFTER_DOT) break;
|
||||
state = State::E;
|
||||
} else if (original_[i] == '-') {
|
||||
if (state != State::E) break;
|
||||
state = State::E_MINUS;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (state == State::DOT) --i;
|
||||
if (state == State::E) --i;
|
||||
if (state == State::E_MINUS) i -= 2;
|
||||
return i - start;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchEscapedName(int start) const {
|
||||
int len = original_.size();
|
||||
int i = start;
|
||||
while (i < len) {
|
||||
if (original_[i] != '`') break;
|
||||
int j = i + 1;
|
||||
while (j < len && original_[j] != '`') {
|
||||
++j;
|
||||
}
|
||||
if (j == len) break;
|
||||
i = j + 1;
|
||||
}
|
||||
return i - start;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchUnescapedName(int start) const {
|
||||
auto i = start;
|
||||
auto s = GetFirstUtf8Symbol(original_.data() + i);
|
||||
if (!kUnescapedNameAllowedStarts.count(s)) return 0;
|
||||
i += s.size();
|
||||
while (i < static_cast<int>(original_.size())) {
|
||||
s = GetFirstUtf8Symbol(original_.data() + i);
|
||||
if (!kUnescapedNameAllowedParts.count(s)) break;
|
||||
i += s.size();
|
||||
}
|
||||
return i - start;
|
||||
}
|
||||
|
||||
int StrippedQuery::MatchWhitespaceAndComments(int start) const {
|
||||
enum class State { OUT, IN_LINE_COMMENT, IN_BLOCK_COMMENT };
|
||||
State state = State::OUT;
|
||||
int i = start;
|
||||
int len = original_.size();
|
||||
// We need to remember at which position comment started because if we faile
|
||||
// to match comment finish we have a match until comment start position.
|
||||
int comment_position = -1;
|
||||
while (i < len) {
|
||||
if (state == State::OUT) {
|
||||
auto s = GetFirstUtf8Symbol(original_.data() + i);
|
||||
if (kSpaceParts.count(s)) {
|
||||
i += s.size();
|
||||
} else if (i + 1 < len && original_[i] == '/' &&
|
||||
original_[i + 1] == '*') {
|
||||
comment_position = i;
|
||||
state = State::IN_BLOCK_COMMENT;
|
||||
i += 2;
|
||||
} else if (i + 1 < len && original_[i] == '/' &&
|
||||
original_[i + 1] == '/') {
|
||||
comment_position = i;
|
||||
state = State::IN_LINE_COMMENT;
|
||||
i += 2;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else if (state == State::IN_LINE_COMMENT) {
|
||||
if (original_[i] == '\n') {
|
||||
state = State::OUT;
|
||||
++i;
|
||||
} else if (i + 1 < len && original_[i] == '\r' &&
|
||||
original_[i + 1] == '\n') {
|
||||
state = State::OUT;
|
||||
i += 2;
|
||||
} else if (original_[i] == '\r') {
|
||||
break;
|
||||
} else if (i + 1 == len) {
|
||||
state = State::OUT;
|
||||
++i;
|
||||
} else {
|
||||
++i;
|
||||
}
|
||||
} else if (state == State::IN_BLOCK_COMMENT) {
|
||||
if (i + 1 < len && original_[i] == '*' && original_[i + 1] == '/') {
|
||||
i += 2;
|
||||
state = State::OUT;
|
||||
} else {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (state != State::OUT) return comment_position - start;
|
||||
return i - start;
|
||||
}
|
||||
}
|
||||
|
@ -49,6 +49,23 @@ class StrippedQuery {
|
||||
HashType hash() const { return hash_; }
|
||||
|
||||
private:
|
||||
std::string GetFirstUtf8Symbol(const char *s) const;
|
||||
|
||||
// Return len of matched keyword if something is matched, otherwise 0.
|
||||
int MatchKeyword(int start) const;
|
||||
int MatchString(int start) const;
|
||||
int MatchSpecial(int start) const;
|
||||
int MatchDecimalInt(int start) const;
|
||||
int MatchOctalInt(int start) const;
|
||||
int MatchHexadecimalInt(int start) const;
|
||||
int MatchReal(int start) const;
|
||||
int MatchEscapedName(int start) const;
|
||||
int MatchUnescapedName(int start) const;
|
||||
int MatchWhitespaceAndComments(int start) const;
|
||||
|
||||
// Original query.
|
||||
std::string original_;
|
||||
|
||||
// Stripped query.
|
||||
std::string query_;
|
||||
|
||||
|
16512
src/query/frontend/stripped_lexer_constants.hpp
Normal file
16512
src/query/frontend/stripped_lexer_constants.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -12,6 +12,7 @@
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/frontend/ast/cypher_main_visitor.hpp"
|
||||
#include "query/frontend/opencypher/parser.hpp"
|
||||
#include "query/frontend/stripped.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
|
||||
namespace {
|
||||
@ -83,11 +84,38 @@ class ClonedAstGenerator {
|
||||
Query *query_;
|
||||
};
|
||||
|
||||
// This generator strips ast, clones it and then plugs stripped out literals in
|
||||
// the same way it is done in ast cacheing in interpreter.
|
||||
class CachedAstGenerator {
|
||||
public:
|
||||
CachedAstGenerator(const std::string &query)
|
||||
: dbms_(),
|
||||
db_accessor_(dbms_.active()),
|
||||
context_(Config{}, *db_accessor_),
|
||||
query_string_(query),
|
||||
storage_([&]() {
|
||||
StrippedQuery stripped(query_string_);
|
||||
::frontend::opencypher::Parser parser(stripped.query());
|
||||
CypherMainVisitor visitor(context_);
|
||||
visitor.visit(parser.tree());
|
||||
CachedAst cached(std::move(visitor.storage()));
|
||||
return cached.Plug(stripped.literals());
|
||||
}()),
|
||||
query_(storage_.query()) {}
|
||||
|
||||
Dbms dbms_;
|
||||
std::unique_ptr<GraphDbAccessor> db_accessor_;
|
||||
Context context_;
|
||||
std::string query_string_;
|
||||
AstTreeStorage storage_;
|
||||
Query *query_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class CypherMainVisitorTest : public ::testing::Test {};
|
||||
|
||||
typedef ::testing::Types<AstGenerator, OriginalAfterCloningAstGenerator,
|
||||
ClonedAstGenerator>
|
||||
ClonedAstGenerator, CachedAstGenerator>
|
||||
AstGeneratorTypes;
|
||||
TYPED_TEST_CASE(CypherMainVisitorTest, AstGeneratorTypes);
|
||||
|
||||
|
@ -10,6 +10,8 @@
|
||||
|
||||
using namespace query;
|
||||
|
||||
namespace {
|
||||
|
||||
void EXPECT_PROP_TRUE(const TypedValue& a) {
|
||||
EXPECT_TRUE(a.type() == TypedValue::Type::Bool && a.Value<bool>());
|
||||
}
|
||||
@ -24,6 +26,15 @@ TEST(QueryStripper, NoLiterals) {
|
||||
EXPECT_EQ(stripped.query(), "create ( n )");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, ZeroInteger) {
|
||||
StrippedQuery stripped("RETURN 0");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
EXPECT_EQ(stripped.literals().At(0).first, 2);
|
||||
EXPECT_EQ(stripped.literals().At(0).second.Value<int64_t>(), 0);
|
||||
EXPECT_EQ(stripped.literals().AtTokenPosition(2).Value<int64_t>(), 0);
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedIntToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, DecimalInteger) {
|
||||
StrippedQuery stripped("RETURN 42");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
@ -61,6 +72,27 @@ TEST(QueryStripper, ExponentDecimal) {
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, ExponentDecimal2) {
|
||||
StrippedQuery stripped("RETURN 4e-2");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), 4e-2);
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, ExponentDecimal3) {
|
||||
StrippedQuery stripped("RETURN 0.1e-2");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), 0.1e-2);
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, ExponentDecimal4) {
|
||||
StrippedQuery stripped("RETURN .1e-2");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), .1e-2);
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, StringLiteral) {
|
||||
StrippedQuery stripped("RETURN 'something'");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
@ -68,13 +100,40 @@ TEST(QueryStripper, StringLiteral) {
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, BoolLiteral) {
|
||||
StrippedQuery stripped("RETURN true");
|
||||
TEST(QueryStripper, StringLiteral2) {
|
||||
StrippedQuery stripped("RETURN 'so\\'me'");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
EXPECT_EQ(stripped.literals().At(0).second.Value<std::string>(), "so'me");
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, StringLiteral3) {
|
||||
StrippedQuery stripped("RETURN \"so\\\"me'\"");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
EXPECT_EQ(stripped.literals().At(0).second.Value<std::string>(), "so\"me'");
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, TrueLiteral) {
|
||||
StrippedQuery stripped("RETURN trUE");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(true));
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, FalseLiteral) {
|
||||
StrippedQuery stripped("RETURN fAlse");
|
||||
EXPECT_EQ(stripped.literals().size(), 1);
|
||||
EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(false));
|
||||
EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken);
|
||||
}
|
||||
|
||||
TEST(QueryStripper, NullLiteral) {
|
||||
StrippedQuery stripped("RETURN NuLl");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "return null");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, ListLiteral) {
|
||||
StrippedQuery stripped("MATCH (n) RETURN [n, n.prop]");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
@ -96,3 +155,71 @@ TEST(QueryStripper, RangeLiteral) {
|
||||
" .. " + kStrippedIntToken +
|
||||
" ] - ( ) return n");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, EscapedName) {
|
||||
StrippedQuery stripped("MATCH (n:`mirko``slavko`)");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n : `mirko``slavko` )");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, UnescapedName) {
|
||||
StrippedQuery stripped("MATCH (n:peropero)");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, UnescapedName2) {
|
||||
StrippedQuery stripped(u8"MATCH (n:\uffd5\u04c2\u04c2pero\u0078pe)");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), u8"match ( n : \uffd5\u04c2\u04c2pero\u0078pe )");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, MixedCaseKeyword) {
|
||||
StrippedQuery stripped("MaTch (n:peropero)");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, BlockComment) {
|
||||
StrippedQuery stripped("MaTch (n:/**fhf/gf\n\r\n//fjhf*/peropero)");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, LineComment1) {
|
||||
StrippedQuery stripped("MaTch (n:peropero) // komentar\nreturn n");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, LineComment2) {
|
||||
StrippedQuery stripped("MaTch (n:peropero) // komentar\r\nreturn n");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, LineComment3) {
|
||||
StrippedQuery stripped("MaTch (n:peropero) return n // komentar");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, LineComment4) {
|
||||
StrippedQuery stripped("MaTch (n:peropero) return n // komentar\r");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
// Didn't manage to parse comment because it ends with \r.
|
||||
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n / / komentar");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, Spaces) {
|
||||
StrippedQuery stripped(u8"RETURN \r\n\u202f\t\u2007 NuLl");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "return null");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, OtherTokens) {
|
||||
StrippedQuery stripped("++=...");
|
||||
EXPECT_EQ(stripped.literals().size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "+ += .. .");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user