Implementation of new lexer

Summary: WORK IN PROGRESS

Reviewers: buda, teon.banek, dgleich

Reviewed By: dgleich

Subscribers: dgleich, pullbot

Differential Revision: https://phabricator.memgraph.io/D496
This commit is contained in:
Mislav Bradac 2017-06-21 16:10:52 +02:00
parent e86d73eb98
commit 3a922de963
8 changed files with 17017 additions and 99 deletions

View File

@ -12,6 +12,12 @@ class QueryException : public utils::BasicException {
using utils::BasicException::BasicException; using utils::BasicException::BasicException;
}; };
class LexingException : public QueryException {
public:
using QueryException::QueryException;
LexingException() : QueryException("") {}
};
class SyntaxException : public QueryException { class SyntaxException : public QueryException {
public: public:
using QueryException::QueryException; using QueryException::QueryException;

View File

@ -1408,10 +1408,12 @@ class CachedAst {
bool Visit(PrimitiveLiteral &literal) override { bool Visit(PrimitiveLiteral &literal) override {
// TODO: If literal is a part of NamedExpression then we need to change // TODO: If literal is a part of NamedExpression then we need to change
// text in NamedExpression, otherwise wrong header will be returned. // text in NamedExpression, otherwise wrong header will be returned.
permanent_assert( if (!literal.value_.IsNull()) {
literal.token_position_ != -1, permanent_assert(literal.token_position_ != -1,
"Use AstPlugLiteralsVisitor only on ast created by parsing queries"); "Use AstPlugLiteralsVisitor only on ast created by "
literal.value_ = parameters_.AtTokenPosition(literal.token_position_); "parsing queries");
literal.value_ = parameters_.AtTokenPosition(literal.token_position_);
}
return true; return true;
} }

View File

@ -83,7 +83,7 @@ returnItem : ( expression SP AS SP variable )
| expression | expression
; ;
order : ORDER SP BY SP sortItem ( ',' SP? sortItem )* ; order : ORDER SP BY SP sortItem ( SP? ',' SP? sortItem )* ;
skip : L_SKIP SP expression ; skip : L_SKIP SP expression ;
@ -102,7 +102,7 @@ patternPart : ( variable SP? '=' SP? anonymousPatternPart )
anonymousPatternPart : patternElement ; anonymousPatternPart : patternElement ;
patternElement : ( nodePattern ( SP? patternElementChain )* ) patternElement : ( nodePattern ( SP? patternElementChain )* )
| ( '(' patternElement ')' ) | ( '(' SP? patternElement SP? ')' )
; ;
nodePattern : '(' SP? ( variable SP? )? ( nodeLabels SP? )? ( properties SP? )? ')' ; nodePattern : '(' SP? ( variable SP? )? ( nodeLabels SP? )? ( properties SP? )? ')' ;

View File

@ -1,35 +1,64 @@
#include "query/frontend/stripped.hpp" #include "query/frontend/stripped.hpp"
#include <cctype>
#include <cstdint>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <vector> #include <vector>
#include "antlr4-runtime.h"
#include "query/common.hpp" #include "query/common.hpp"
#include "query/exceptions.hpp"
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h" #include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
#include "query/frontend/opencypher/generated/CypherLexer.h" #include "query/frontend/opencypher/generated/CypherLexer.h"
#include "query/frontend/opencypher/generated/CypherParser.h" #include "query/frontend/opencypher/generated/CypherParser.h"
#include "query/frontend/stripped_lexer_constants.hpp"
#include "utils/assert.hpp" #include "utils/assert.hpp"
#include "utils/hashing/fnv.hpp" #include "utils/hashing/fnv.hpp"
#include "utils/string.hpp" #include "utils/string.hpp"
using namespace antlropencypher;
using namespace antlr4;
namespace query { namespace query {
StrippedQuery::StrippedQuery(const std::string &query) { using namespace lexer_constants;
// Tokenize the query.
ANTLRInputStream input(query); StrippedQuery::StrippedQuery(const std::string &query) : original_(query) {
CypherLexer lexer(&input); enum class Token {
CommonTokenStream token_stream(&lexer); UNMATCHED,
token_stream.fill(); KEYWORD, // Including true, false and null.
auto tokens = token_stream.getTokens(); SPECIAL, // +, .., +=, (, { and so on.
STRING,
INT, // Decimal, octal and hexadecimal.
REAL,
ESCAPED_NAME,
UNESCAPED_NAME,
SPACE
};
std::vector<std::pair<Token, std::string>> tokens;
for (int i = 0; i < static_cast<int>(original_.size());) {
Token token = Token::UNMATCHED;
int len = 0;
auto update = [&](int new_len, Token new_token) {
if (new_len > len) {
len = new_len;
token = new_token;
}
};
update(MatchKeyword(i), Token::KEYWORD);
update(MatchSpecial(i), Token::SPECIAL);
update(MatchString(i), Token::STRING);
update(MatchDecimalInt(i), Token::INT);
update(MatchOctalInt(i), Token::INT);
update(MatchHexadecimalInt(i), Token::INT);
update(MatchReal(i), Token::REAL);
update(MatchEscapedName(i), Token::ESCAPED_NAME);
update(MatchUnescapedName(i), Token::UNESCAPED_NAME);
update(MatchWhitespaceAndComments(i), Token::SPACE);
if (token == Token::UNMATCHED) throw LexingException("Invalid query");
tokens.emplace_back(token, original_.substr(i, len));
i += len;
}
// Initialize data structures we return.
std::vector<std::string> token_strings; std::vector<std::string> token_strings;
token_strings.reserve(tokens.size());
// A helper function that stores literal and its token position in a // A helper function that stores literal and its token position in a
// literals_. In stripped query text literal is replaced with a new_value. // literals_. In stripped query text literal is replaced with a new_value.
// new_value can be any value that is lexed as a literal. // new_value can be any value that is lexed as a literal.
@ -40,88 +69,44 @@ StrippedQuery::StrippedQuery(const std::string &query) {
}; };
// Convert tokens to strings, perform lowercasing and filtering. // Convert tokens to strings, perform lowercasing and filtering.
for (const auto *token : tokens) { for (const auto &token : tokens) {
// Position is calculated in query after stripping and whitespace // Position is calculated in query after stripping and whitespace
// normalisation, not before. // normalisation, not before. There will be twice as much tokens before this
int position = token_strings.size() * 2; // one because space tokens will be inserted between every one.
int token_index = token_strings.size() * 2;
switch (token->getType()) { switch (token.first) {
case CypherLexer::UNION: case Token::UNMATCHED:
case CypherLexer::ALL: debug_assert(false, "Shouldn't happen");
case CypherLexer::OPTIONAL: case Token::KEYWORD: {
case CypherLexer::MATCH: auto s = utils::ToLowerCase(token.second);
case CypherLexer::UNWIND: // We don't strip NULL, since it can appear in special expressions like
case CypherLexer::AS: // IS NULL and IS NOT NULL, but we strip true and false keywords.
case CypherLexer::MERGE: if (s == "true") {
case CypherLexer::ON: replace_stripped(token_index, true, kStrippedBooleanToken);
case CypherLexer::CREATE: } else if (s == "false") {
case CypherLexer::SET: replace_stripped(token_index, false, kStrippedBooleanToken);
case CypherLexer::DETACH: } else {
case CypherLexer::DELETE: token_strings.push_back(s);
case CypherLexer::REMOVE: }
case CypherLexer::WITH: } break;
case CypherLexer::DISTINCT: case Token::SPACE:
case CypherLexer::RETURN:
case CypherLexer::ORDER:
case CypherLexer::BY:
case CypherLexer::L_SKIP:
case CypherLexer::LIMIT:
case CypherLexer::ASCENDING:
case CypherLexer::ASC:
case CypherLexer::DESCENDING:
case CypherLexer::DESC:
case CypherLexer::WHERE:
case CypherLexer::OR:
case CypherLexer::XOR:
case CypherLexer::AND:
case CypherLexer::NOT:
case CypherLexer::IN:
case CypherLexer::STARTS:
case CypherLexer::ENDS:
case CypherLexer::CONTAINS:
case CypherLexer::IS:
// We don't strip NULL, since it can appear in special expressions like IS
// NULL and IS NOT NULL.
case CypherLexer::CYPHERNULL:
case CypherLexer::COUNT:
case CypherLexer::FILTER:
case CypherLexer::EXTRACT:
case CypherLexer::ANY:
case CypherLexer::NONE:
case CypherLexer::SINGLE:
token_strings.push_back(utils::ToLowerCase(token->getText()));
break; break;
case Token::STRING:
case CypherLexer::SP: replace_stripped(token_index, ParseStringLiteral(token.second),
case Token::EOF:
break;
case CypherLexer::DecimalInteger:
case CypherLexer::HexInteger:
case CypherLexer::OctalInteger:
replace_stripped(position, ParseIntegerLiteral(token->getText()),
kStrippedIntToken);
break;
case CypherLexer::StringLiteral:
replace_stripped(position, ParseStringLiteral(token->getText()),
kStrippedStringToken); kStrippedStringToken);
break; break;
case Token::INT:
case CypherLexer::RegularDecimalReal: replace_stripped(token_index, ParseIntegerLiteral(token.second),
case CypherLexer::ExponentDecimalReal: kStrippedIntToken);
replace_stripped(position, ParseDoubleLiteral(token->getText()), break;
case Token::REAL:
replace_stripped(token_index, ParseDoubleLiteral(token.second),
kStrippedDoubleToken); kStrippedDoubleToken);
break; break;
case CypherLexer::TRUE: case Token::SPECIAL:
replace_stripped(position, true, kStrippedBooleanToken); case Token::ESCAPED_NAME:
break; case Token::UNESCAPED_NAME:
case CypherLexer::FALSE: token_strings.push_back(token.second);
replace_stripped(position, false, kStrippedBooleanToken);
break;
default:
token_strings.push_back(token->getText());
break; break;
} }
} }
@ -129,4 +114,245 @@ StrippedQuery::StrippedQuery(const std::string &query) {
query_ = utils::Join(token_strings, " "); query_ = utils::Join(token_strings, " ");
hash_ = fnv(query_); hash_ = fnv(query_);
} }
std::string StrippedQuery::GetFirstUtf8Symbol(const char *_s) const {
// According to
// https://stackoverflow.com/questions/16260033/reinterpret-cast-between-char-and-stduint8-t-safe
// this checks if casting from const char * to uint8_t is undefined behaviour.
static_assert(
std::is_same<std::uint8_t, unsigned char>::value,
"This library requires std::uint8_t to be implemented as unsigned char.");
const uint8_t *s = reinterpret_cast<const uint8_t *>(_s);
if ((*s >> 7) == 0x00) return std::string(_s, _s + 1);
if ((*s >> 5) == 0x06) {
auto *s1 = s + 1;
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
return std::string(_s, _s + 2);
}
if ((*s >> 4) == 0x0e) {
auto *s1 = s + 1;
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
auto *s2 = s + 2;
if ((*s2 >> 6) != 0x02) throw LexingException("Invalid character");
return std::string(_s, _s + 3);
}
if ((*s >> 3) == 0x1e) {
auto *s1 = s + 1;
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
auto *s2 = s + 2;
if ((*s2 >> 6) != 0x02) throw LexingException("Invalid character");
auto *s3 = s + 3;
if ((*s3 >> 6) != 0x02) throw LexingException("Invalid character");
return std::string(_s, _s + 4);
}
throw LexingException("Invalid character");
}
// From here until end of file there are functions that calculate matches for
// every possible token. Functions are more or less compatible with Cypher.g4
// grammar. Unfortunately, they contain a lof of special cases and shouldn't be
// changed without good reasons.
//
// Here be dragons, do not touch!
// ____ __
// { --.\ | .)%%%)%%
// '-._\\ | (\___ %)%%(%%(%%%
// `\\|{/ ^ _)-%(%%%%)%%;%%%
// .'^^^^^^^ /` %%)%%%%)%%%'
// //\ ) , / '%%%%(%%'
// , _.'/ `\<-- \<
// `^^^` ^^ ^^
int StrippedQuery::MatchKeyword(int start) const {
int match = 0;
for (const auto &s : kKeywords) {
int len = s.size();
if (len < match) continue;
if (start + len > static_cast<int>(original_.size())) continue;
int i = 0;
while (i < len && s[i] == tolower(original_[start + i])) {
++i;
}
if (i == len) {
match = len;
}
}
return match;
}
int StrippedQuery::MatchSpecial(int start) const {
int match = 0;
for (const auto &s : kSpecialTokens) {
if (!original_.compare(start, s.size(), s)) {
match = std::max(match, static_cast<int>(s.size()));
}
}
return match;
}
int StrippedQuery::MatchString(int start) const {
if (original_[start] != '"' && original_[start] != '\'') return 0;
char start_char = original_[start];
bool escaped = false;
for (auto *p = original_.data() + start + 1; *p; ++p) {
if (escaped) {
escaped = false;
} else if (!escaped) {
if (*p == start_char) return p - (original_.data() + start) + 1;
if (*p == '\\') {
escaped = true;
}
}
}
return 0;
}
int StrippedQuery::MatchDecimalInt(int start) const {
if (original_[start] == '0') return 1;
int i = start;
while (i < static_cast<int>(original_.size()) && '0' <= original_[i] &&
original_[i] <= '9') {
++i;
}
return i - start;
}
int StrippedQuery::MatchOctalInt(int start) const {
if (original_[start] != '0') return 0;
int i = start + 1;
while (i < static_cast<int>(original_.size()) && '0' <= original_[i] &&
original_[i] <= '7') {
++i;
}
if (i == start + 1) return 0;
return i - start;
}
int StrippedQuery::MatchHexadecimalInt(int start) const {
if (original_[start] != '0') return 0;
if (start + 1 >= static_cast<int>(original_.size())) return 0;
if (original_[start + 1] != 'x') return 0;
int i = start + 2;
while (i < static_cast<int>(original_.size()) &&
(('0' <= original_[i] && original_[i] <= '9') ||
('a' <= original_[i] && original_[i] <= 'f') ||
('A' <= original_[i] && original_[i] <= 'F'))) {
++i;
}
if (i == start + 2) return 0;
return i - start;
}
int StrippedQuery::MatchReal(int start) const {
enum class State { BEFORE_DOT, DOT, AFTER_DOT, E, E_MINUS, AFTER_E };
State state = State::BEFORE_DOT;
auto i = start;
while (i < static_cast<int>(original_.size())) {
if (original_[i] == '.') {
if (state != State::BEFORE_DOT) break;
state = State::DOT;
} else if ('0' <= original_[i] && original_[i] <= '9') {
if (state == State::DOT) {
state = State::AFTER_DOT;
} else if (state == State::E || state == State::E_MINUS) {
state = State::AFTER_E;
}
} else if (original_[i] == 'e' || original_[i] == 'E') {
if (state != State::BEFORE_DOT && state != State::AFTER_DOT) break;
state = State::E;
} else if (original_[i] == '-') {
if (state != State::E) break;
state = State::E_MINUS;
} else {
break;
}
++i;
}
if (state == State::DOT) --i;
if (state == State::E) --i;
if (state == State::E_MINUS) i -= 2;
return i - start;
}
int StrippedQuery::MatchEscapedName(int start) const {
int len = original_.size();
int i = start;
while (i < len) {
if (original_[i] != '`') break;
int j = i + 1;
while (j < len && original_[j] != '`') {
++j;
}
if (j == len) break;
i = j + 1;
}
return i - start;
}
int StrippedQuery::MatchUnescapedName(int start) const {
auto i = start;
auto s = GetFirstUtf8Symbol(original_.data() + i);
if (!kUnescapedNameAllowedStarts.count(s)) return 0;
i += s.size();
while (i < static_cast<int>(original_.size())) {
s = GetFirstUtf8Symbol(original_.data() + i);
if (!kUnescapedNameAllowedParts.count(s)) break;
i += s.size();
}
return i - start;
}
int StrippedQuery::MatchWhitespaceAndComments(int start) const {
enum class State { OUT, IN_LINE_COMMENT, IN_BLOCK_COMMENT };
State state = State::OUT;
int i = start;
int len = original_.size();
// We need to remember at which position comment started because if we faile
// to match comment finish we have a match until comment start position.
int comment_position = -1;
while (i < len) {
if (state == State::OUT) {
auto s = GetFirstUtf8Symbol(original_.data() + i);
if (kSpaceParts.count(s)) {
i += s.size();
} else if (i + 1 < len && original_[i] == '/' &&
original_[i + 1] == '*') {
comment_position = i;
state = State::IN_BLOCK_COMMENT;
i += 2;
} else if (i + 1 < len && original_[i] == '/' &&
original_[i + 1] == '/') {
comment_position = i;
state = State::IN_LINE_COMMENT;
i += 2;
} else {
break;
}
} else if (state == State::IN_LINE_COMMENT) {
if (original_[i] == '\n') {
state = State::OUT;
++i;
} else if (i + 1 < len && original_[i] == '\r' &&
original_[i + 1] == '\n') {
state = State::OUT;
i += 2;
} else if (original_[i] == '\r') {
break;
} else if (i + 1 == len) {
state = State::OUT;
++i;
} else {
++i;
}
} else if (state == State::IN_BLOCK_COMMENT) {
if (i + 1 < len && original_[i] == '*' && original_[i + 1] == '/') {
i += 2;
state = State::OUT;
} else {
++i;
}
}
}
if (state != State::OUT) return comment_position - start;
return i - start;
}
} }

View File

@ -49,6 +49,23 @@ class StrippedQuery {
HashType hash() const { return hash_; } HashType hash() const { return hash_; }
private: private:
std::string GetFirstUtf8Symbol(const char *s) const;
// Return len of matched keyword if something is matched, otherwise 0.
int MatchKeyword(int start) const;
int MatchString(int start) const;
int MatchSpecial(int start) const;
int MatchDecimalInt(int start) const;
int MatchOctalInt(int start) const;
int MatchHexadecimalInt(int start) const;
int MatchReal(int start) const;
int MatchEscapedName(int start) const;
int MatchUnescapedName(int start) const;
int MatchWhitespaceAndComments(int start) const;
// Original query.
std::string original_;
// Stripped query. // Stripped query.
std::string query_; std::string query_;

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,7 @@
#include "query/frontend/ast/ast.hpp" #include "query/frontend/ast/ast.hpp"
#include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/ast/cypher_main_visitor.hpp"
#include "query/frontend/opencypher/parser.hpp" #include "query/frontend/opencypher/parser.hpp"
#include "query/frontend/stripped.hpp"
#include "query/typed_value.hpp" #include "query/typed_value.hpp"
namespace { namespace {
@ -83,11 +84,38 @@ class ClonedAstGenerator {
Query *query_; Query *query_;
}; };
// This generator strips ast, clones it and then plugs stripped out literals in
// the same way it is done in ast cacheing in interpreter.
class CachedAstGenerator {
public:
CachedAstGenerator(const std::string &query)
: dbms_(),
db_accessor_(dbms_.active()),
context_(Config{}, *db_accessor_),
query_string_(query),
storage_([&]() {
StrippedQuery stripped(query_string_);
::frontend::opencypher::Parser parser(stripped.query());
CypherMainVisitor visitor(context_);
visitor.visit(parser.tree());
CachedAst cached(std::move(visitor.storage()));
return cached.Plug(stripped.literals());
}()),
query_(storage_.query()) {}
Dbms dbms_;
std::unique_ptr<GraphDbAccessor> db_accessor_;
Context context_;
std::string query_string_;
AstTreeStorage storage_;
Query *query_;
};
template <typename T> template <typename T>
class CypherMainVisitorTest : public ::testing::Test {}; class CypherMainVisitorTest : public ::testing::Test {};
typedef ::testing::Types<AstGenerator, OriginalAfterCloningAstGenerator, typedef ::testing::Types<AstGenerator, OriginalAfterCloningAstGenerator,
ClonedAstGenerator> ClonedAstGenerator, CachedAstGenerator>
AstGeneratorTypes; AstGeneratorTypes;
TYPED_TEST_CASE(CypherMainVisitorTest, AstGeneratorTypes); TYPED_TEST_CASE(CypherMainVisitorTest, AstGeneratorTypes);

View File

@ -10,6 +10,8 @@
using namespace query; using namespace query;
namespace {
void EXPECT_PROP_TRUE(const TypedValue& a) { void EXPECT_PROP_TRUE(const TypedValue& a) {
EXPECT_TRUE(a.type() == TypedValue::Type::Bool && a.Value<bool>()); EXPECT_TRUE(a.type() == TypedValue::Type::Bool && a.Value<bool>());
} }
@ -24,6 +26,15 @@ TEST(QueryStripper, NoLiterals) {
EXPECT_EQ(stripped.query(), "create ( n )"); EXPECT_EQ(stripped.query(), "create ( n )");
} }
TEST(QueryStripper, ZeroInteger) {
StrippedQuery stripped("RETURN 0");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_EQ(stripped.literals().At(0).first, 2);
EXPECT_EQ(stripped.literals().At(0).second.Value<int64_t>(), 0);
EXPECT_EQ(stripped.literals().AtTokenPosition(2).Value<int64_t>(), 0);
EXPECT_EQ(stripped.query(), "return " + kStrippedIntToken);
}
TEST(QueryStripper, DecimalInteger) { TEST(QueryStripper, DecimalInteger) {
StrippedQuery stripped("RETURN 42"); StrippedQuery stripped("RETURN 42");
EXPECT_EQ(stripped.literals().size(), 1); EXPECT_EQ(stripped.literals().size(), 1);
@ -61,6 +72,27 @@ TEST(QueryStripper, ExponentDecimal) {
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken); EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
} }
TEST(QueryStripper, ExponentDecimal2) {
StrippedQuery stripped("RETURN 4e-2");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), 4e-2);
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
}
TEST(QueryStripper, ExponentDecimal3) {
StrippedQuery stripped("RETURN 0.1e-2");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), 0.1e-2);
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
}
TEST(QueryStripper, ExponentDecimal4) {
StrippedQuery stripped("RETURN .1e-2");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), .1e-2);
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
}
TEST(QueryStripper, StringLiteral) { TEST(QueryStripper, StringLiteral) {
StrippedQuery stripped("RETURN 'something'"); StrippedQuery stripped("RETURN 'something'");
EXPECT_EQ(stripped.literals().size(), 1); EXPECT_EQ(stripped.literals().size(), 1);
@ -68,13 +100,40 @@ TEST(QueryStripper, StringLiteral) {
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken); EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
} }
TEST(QueryStripper, BoolLiteral) { TEST(QueryStripper, StringLiteral2) {
StrippedQuery stripped("RETURN true"); StrippedQuery stripped("RETURN 'so\\'me'");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_EQ(stripped.literals().At(0).second.Value<std::string>(), "so'me");
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
}
TEST(QueryStripper, StringLiteral3) {
StrippedQuery stripped("RETURN \"so\\\"me'\"");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_EQ(stripped.literals().At(0).second.Value<std::string>(), "so\"me'");
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
}
TEST(QueryStripper, TrueLiteral) {
StrippedQuery stripped("RETURN trUE");
EXPECT_EQ(stripped.literals().size(), 1); EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(true)); EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(true));
EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken); EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken);
} }
TEST(QueryStripper, FalseLiteral) {
StrippedQuery stripped("RETURN fAlse");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(false));
EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken);
}
TEST(QueryStripper, NullLiteral) {
StrippedQuery stripped("RETURN NuLl");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "return null");
}
TEST(QueryStripper, ListLiteral) { TEST(QueryStripper, ListLiteral) {
StrippedQuery stripped("MATCH (n) RETURN [n, n.prop]"); StrippedQuery stripped("MATCH (n) RETURN [n, n.prop]");
EXPECT_EQ(stripped.literals().size(), 0); EXPECT_EQ(stripped.literals().size(), 0);
@ -96,3 +155,71 @@ TEST(QueryStripper, RangeLiteral) {
" .. " + kStrippedIntToken + " .. " + kStrippedIntToken +
" ] - ( ) return n"); " ] - ( ) return n");
} }
TEST(QueryStripper, EscapedName) {
StrippedQuery stripped("MATCH (n:`mirko``slavko`)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : `mirko``slavko` )");
}
TEST(QueryStripper, UnescapedName) {
StrippedQuery stripped("MATCH (n:peropero)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
}
TEST(QueryStripper, UnescapedName2) {
StrippedQuery stripped(u8"MATCH (n:\uffd5\u04c2\u04c2pero\u0078pe)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), u8"match ( n : \uffd5\u04c2\u04c2pero\u0078pe )");
}
TEST(QueryStripper, MixedCaseKeyword) {
StrippedQuery stripped("MaTch (n:peropero)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
}
TEST(QueryStripper, BlockComment) {
StrippedQuery stripped("MaTch (n:/**fhf/gf\n\r\n//fjhf*/peropero)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
}
TEST(QueryStripper, LineComment1) {
StrippedQuery stripped("MaTch (n:peropero) // komentar\nreturn n");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
}
TEST(QueryStripper, LineComment2) {
StrippedQuery stripped("MaTch (n:peropero) // komentar\r\nreturn n");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
}
TEST(QueryStripper, LineComment3) {
StrippedQuery stripped("MaTch (n:peropero) return n // komentar");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
}
TEST(QueryStripper, LineComment4) {
StrippedQuery stripped("MaTch (n:peropero) return n // komentar\r");
EXPECT_EQ(stripped.literals().size(), 0);
// Didn't manage to parse comment because it ends with \r.
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n / / komentar");
}
TEST(QueryStripper, Spaces) {
StrippedQuery stripped(u8"RETURN \r\n\u202f\t\u2007 NuLl");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "return null");
}
TEST(QueryStripper, OtherTokens) {
StrippedQuery stripped("++=...");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "+ += .. .");
}
}