Implementation of new lexer

Summary: WORK IN PROGRESS

Reviewers: buda, teon.banek, dgleich

Reviewed By: dgleich

Subscribers: dgleich, pullbot

Differential Revision: https://phabricator.memgraph.io/D496
This commit is contained in:
Mislav Bradac 2017-06-21 16:10:52 +02:00
parent e86d73eb98
commit 3a922de963
8 changed files with 17017 additions and 99 deletions

View File

@ -12,6 +12,12 @@ class QueryException : public utils::BasicException {
using utils::BasicException::BasicException;
};
class LexingException : public QueryException {
public:
using QueryException::QueryException;
LexingException() : QueryException("") {}
};
class SyntaxException : public QueryException {
public:
using QueryException::QueryException;

View File

@ -1408,10 +1408,12 @@ class CachedAst {
bool Visit(PrimitiveLiteral &literal) override {
// TODO: If literal is a part of NamedExpression then we need to change
// text in NamedExpression, otherwise wrong header will be returned.
permanent_assert(
literal.token_position_ != -1,
"Use AstPlugLiteralsVisitor only on ast created by parsing queries");
if (!literal.value_.IsNull()) {
permanent_assert(literal.token_position_ != -1,
"Use AstPlugLiteralsVisitor only on ast created by "
"parsing queries");
literal.value_ = parameters_.AtTokenPosition(literal.token_position_);
}
return true;
}

View File

@ -83,7 +83,7 @@ returnItem : ( expression SP AS SP variable )
| expression
;
order : ORDER SP BY SP sortItem ( ',' SP? sortItem )* ;
order : ORDER SP BY SP sortItem ( SP? ',' SP? sortItem )* ;
skip : L_SKIP SP expression ;
@ -102,7 +102,7 @@ patternPart : ( variable SP? '=' SP? anonymousPatternPart )
anonymousPatternPart : patternElement ;
patternElement : ( nodePattern ( SP? patternElementChain )* )
| ( '(' patternElement ')' )
| ( '(' SP? patternElement SP? ')' )
;
nodePattern : '(' SP? ( variable SP? )? ( nodeLabels SP? )? ( properties SP? )? ')' ;

View File

@ -1,35 +1,64 @@
#include "query/frontend/stripped.hpp"
#include <cctype>
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>
#include "antlr4-runtime.h"
#include "query/common.hpp"
#include "query/exceptions.hpp"
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
#include "query/frontend/opencypher/generated/CypherLexer.h"
#include "query/frontend/opencypher/generated/CypherParser.h"
#include "query/frontend/stripped_lexer_constants.hpp"
#include "utils/assert.hpp"
#include "utils/hashing/fnv.hpp"
#include "utils/string.hpp"
using namespace antlropencypher;
using namespace antlr4;
namespace query {
StrippedQuery::StrippedQuery(const std::string &query) {
// Tokenize the query.
ANTLRInputStream input(query);
CypherLexer lexer(&input);
CommonTokenStream token_stream(&lexer);
token_stream.fill();
auto tokens = token_stream.getTokens();
using namespace lexer_constants;
StrippedQuery::StrippedQuery(const std::string &query) : original_(query) {
enum class Token {
UNMATCHED,
KEYWORD, // Including true, false and null.
SPECIAL, // +, .., +=, (, { and so on.
STRING,
INT, // Decimal, octal and hexadecimal.
REAL,
ESCAPED_NAME,
UNESCAPED_NAME,
SPACE
};
std::vector<std::pair<Token, std::string>> tokens;
for (int i = 0; i < static_cast<int>(original_.size());) {
Token token = Token::UNMATCHED;
int len = 0;
auto update = [&](int new_len, Token new_token) {
if (new_len > len) {
len = new_len;
token = new_token;
}
};
update(MatchKeyword(i), Token::KEYWORD);
update(MatchSpecial(i), Token::SPECIAL);
update(MatchString(i), Token::STRING);
update(MatchDecimalInt(i), Token::INT);
update(MatchOctalInt(i), Token::INT);
update(MatchHexadecimalInt(i), Token::INT);
update(MatchReal(i), Token::REAL);
update(MatchEscapedName(i), Token::ESCAPED_NAME);
update(MatchUnescapedName(i), Token::UNESCAPED_NAME);
update(MatchWhitespaceAndComments(i), Token::SPACE);
if (token == Token::UNMATCHED) throw LexingException("Invalid query");
tokens.emplace_back(token, original_.substr(i, len));
i += len;
}
// Initialize data structures we return.
std::vector<std::string> token_strings;
token_strings.reserve(tokens.size());
// A helper function that stores literal and its token position in a
// literals_. In stripped query text literal is replaced with a new_value.
// new_value can be any value that is lexed as a literal.
@ -40,88 +69,44 @@ StrippedQuery::StrippedQuery(const std::string &query) {
};
// Convert tokens to strings, perform lowercasing and filtering.
for (const auto *token : tokens) {
for (const auto &token : tokens) {
// Position is calculated in query after stripping and whitespace
// normalisation, not before.
int position = token_strings.size() * 2;
switch (token->getType()) {
case CypherLexer::UNION:
case CypherLexer::ALL:
case CypherLexer::OPTIONAL:
case CypherLexer::MATCH:
case CypherLexer::UNWIND:
case CypherLexer::AS:
case CypherLexer::MERGE:
case CypherLexer::ON:
case CypherLexer::CREATE:
case CypherLexer::SET:
case CypherLexer::DETACH:
case CypherLexer::DELETE:
case CypherLexer::REMOVE:
case CypherLexer::WITH:
case CypherLexer::DISTINCT:
case CypherLexer::RETURN:
case CypherLexer::ORDER:
case CypherLexer::BY:
case CypherLexer::L_SKIP:
case CypherLexer::LIMIT:
case CypherLexer::ASCENDING:
case CypherLexer::ASC:
case CypherLexer::DESCENDING:
case CypherLexer::DESC:
case CypherLexer::WHERE:
case CypherLexer::OR:
case CypherLexer::XOR:
case CypherLexer::AND:
case CypherLexer::NOT:
case CypherLexer::IN:
case CypherLexer::STARTS:
case CypherLexer::ENDS:
case CypherLexer::CONTAINS:
case CypherLexer::IS:
// We don't strip NULL, since it can appear in special expressions like IS
// NULL and IS NOT NULL.
case CypherLexer::CYPHERNULL:
case CypherLexer::COUNT:
case CypherLexer::FILTER:
case CypherLexer::EXTRACT:
case CypherLexer::ANY:
case CypherLexer::NONE:
case CypherLexer::SINGLE:
token_strings.push_back(utils::ToLowerCase(token->getText()));
// normalisation, not before. There will be twice as much tokens before this
// one because space tokens will be inserted between every one.
int token_index = token_strings.size() * 2;
switch (token.first) {
case Token::UNMATCHED:
debug_assert(false, "Shouldn't happen");
case Token::KEYWORD: {
auto s = utils::ToLowerCase(token.second);
// We don't strip NULL, since it can appear in special expressions like
// IS NULL and IS NOT NULL, but we strip true and false keywords.
if (s == "true") {
replace_stripped(token_index, true, kStrippedBooleanToken);
} else if (s == "false") {
replace_stripped(token_index, false, kStrippedBooleanToken);
} else {
token_strings.push_back(s);
}
} break;
case Token::SPACE:
break;
case CypherLexer::SP:
case Token::EOF:
break;
case CypherLexer::DecimalInteger:
case CypherLexer::HexInteger:
case CypherLexer::OctalInteger:
replace_stripped(position, ParseIntegerLiteral(token->getText()),
kStrippedIntToken);
break;
case CypherLexer::StringLiteral:
replace_stripped(position, ParseStringLiteral(token->getText()),
case Token::STRING:
replace_stripped(token_index, ParseStringLiteral(token.second),
kStrippedStringToken);
break;
case CypherLexer::RegularDecimalReal:
case CypherLexer::ExponentDecimalReal:
replace_stripped(position, ParseDoubleLiteral(token->getText()),
case Token::INT:
replace_stripped(token_index, ParseIntegerLiteral(token.second),
kStrippedIntToken);
break;
case Token::REAL:
replace_stripped(token_index, ParseDoubleLiteral(token.second),
kStrippedDoubleToken);
break;
case CypherLexer::TRUE:
replace_stripped(position, true, kStrippedBooleanToken);
break;
case CypherLexer::FALSE:
replace_stripped(position, false, kStrippedBooleanToken);
break;
default:
token_strings.push_back(token->getText());
case Token::SPECIAL:
case Token::ESCAPED_NAME:
case Token::UNESCAPED_NAME:
token_strings.push_back(token.second);
break;
}
}
@ -129,4 +114,245 @@ StrippedQuery::StrippedQuery(const std::string &query) {
query_ = utils::Join(token_strings, " ");
hash_ = fnv(query_);
}
std::string StrippedQuery::GetFirstUtf8Symbol(const char *_s) const {
// According to
// https://stackoverflow.com/questions/16260033/reinterpret-cast-between-char-and-stduint8-t-safe
// this checks if casting from const char * to uint8_t is undefined behaviour.
static_assert(
std::is_same<std::uint8_t, unsigned char>::value,
"This library requires std::uint8_t to be implemented as unsigned char.");
const uint8_t *s = reinterpret_cast<const uint8_t *>(_s);
if ((*s >> 7) == 0x00) return std::string(_s, _s + 1);
if ((*s >> 5) == 0x06) {
auto *s1 = s + 1;
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
return std::string(_s, _s + 2);
}
if ((*s >> 4) == 0x0e) {
auto *s1 = s + 1;
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
auto *s2 = s + 2;
if ((*s2 >> 6) != 0x02) throw LexingException("Invalid character");
return std::string(_s, _s + 3);
}
if ((*s >> 3) == 0x1e) {
auto *s1 = s + 1;
if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
auto *s2 = s + 2;
if ((*s2 >> 6) != 0x02) throw LexingException("Invalid character");
auto *s3 = s + 3;
if ((*s3 >> 6) != 0x02) throw LexingException("Invalid character");
return std::string(_s, _s + 4);
}
throw LexingException("Invalid character");
}
// From here until end of file there are functions that calculate matches for
// every possible token. Functions are more or less compatible with Cypher.g4
// grammar. Unfortunately, they contain a lof of special cases and shouldn't be
// changed without good reasons.
//
// Here be dragons, do not touch!
// ____ __
// { --.\ | .)%%%)%%
// '-._\\ | (\___ %)%%(%%(%%%
// `\\|{/ ^ _)-%(%%%%)%%;%%%
// .'^^^^^^^ /` %%)%%%%)%%%'
// //\ ) , / '%%%%(%%'
// , _.'/ `\<-- \<
// `^^^` ^^ ^^
int StrippedQuery::MatchKeyword(int start) const {
int match = 0;
for (const auto &s : kKeywords) {
int len = s.size();
if (len < match) continue;
if (start + len > static_cast<int>(original_.size())) continue;
int i = 0;
while (i < len && s[i] == tolower(original_[start + i])) {
++i;
}
if (i == len) {
match = len;
}
}
return match;
}
int StrippedQuery::MatchSpecial(int start) const {
int match = 0;
for (const auto &s : kSpecialTokens) {
if (!original_.compare(start, s.size(), s)) {
match = std::max(match, static_cast<int>(s.size()));
}
}
return match;
}
int StrippedQuery::MatchString(int start) const {
if (original_[start] != '"' && original_[start] != '\'') return 0;
char start_char = original_[start];
bool escaped = false;
for (auto *p = original_.data() + start + 1; *p; ++p) {
if (escaped) {
escaped = false;
} else if (!escaped) {
if (*p == start_char) return p - (original_.data() + start) + 1;
if (*p == '\\') {
escaped = true;
}
}
}
return 0;
}
int StrippedQuery::MatchDecimalInt(int start) const {
if (original_[start] == '0') return 1;
int i = start;
while (i < static_cast<int>(original_.size()) && '0' <= original_[i] &&
original_[i] <= '9') {
++i;
}
return i - start;
}
int StrippedQuery::MatchOctalInt(int start) const {
if (original_[start] != '0') return 0;
int i = start + 1;
while (i < static_cast<int>(original_.size()) && '0' <= original_[i] &&
original_[i] <= '7') {
++i;
}
if (i == start + 1) return 0;
return i - start;
}
int StrippedQuery::MatchHexadecimalInt(int start) const {
if (original_[start] != '0') return 0;
if (start + 1 >= static_cast<int>(original_.size())) return 0;
if (original_[start + 1] != 'x') return 0;
int i = start + 2;
while (i < static_cast<int>(original_.size()) &&
(('0' <= original_[i] && original_[i] <= '9') ||
('a' <= original_[i] && original_[i] <= 'f') ||
('A' <= original_[i] && original_[i] <= 'F'))) {
++i;
}
if (i == start + 2) return 0;
return i - start;
}
int StrippedQuery::MatchReal(int start) const {
enum class State { BEFORE_DOT, DOT, AFTER_DOT, E, E_MINUS, AFTER_E };
State state = State::BEFORE_DOT;
auto i = start;
while (i < static_cast<int>(original_.size())) {
if (original_[i] == '.') {
if (state != State::BEFORE_DOT) break;
state = State::DOT;
} else if ('0' <= original_[i] && original_[i] <= '9') {
if (state == State::DOT) {
state = State::AFTER_DOT;
} else if (state == State::E || state == State::E_MINUS) {
state = State::AFTER_E;
}
} else if (original_[i] == 'e' || original_[i] == 'E') {
if (state != State::BEFORE_DOT && state != State::AFTER_DOT) break;
state = State::E;
} else if (original_[i] == '-') {
if (state != State::E) break;
state = State::E_MINUS;
} else {
break;
}
++i;
}
if (state == State::DOT) --i;
if (state == State::E) --i;
if (state == State::E_MINUS) i -= 2;
return i - start;
}
int StrippedQuery::MatchEscapedName(int start) const {
int len = original_.size();
int i = start;
while (i < len) {
if (original_[i] != '`') break;
int j = i + 1;
while (j < len && original_[j] != '`') {
++j;
}
if (j == len) break;
i = j + 1;
}
return i - start;
}
int StrippedQuery::MatchUnescapedName(int start) const {
auto i = start;
auto s = GetFirstUtf8Symbol(original_.data() + i);
if (!kUnescapedNameAllowedStarts.count(s)) return 0;
i += s.size();
while (i < static_cast<int>(original_.size())) {
s = GetFirstUtf8Symbol(original_.data() + i);
if (!kUnescapedNameAllowedParts.count(s)) break;
i += s.size();
}
return i - start;
}
int StrippedQuery::MatchWhitespaceAndComments(int start) const {
enum class State { OUT, IN_LINE_COMMENT, IN_BLOCK_COMMENT };
State state = State::OUT;
int i = start;
int len = original_.size();
// We need to remember at which position comment started because if we faile
// to match comment finish we have a match until comment start position.
int comment_position = -1;
while (i < len) {
if (state == State::OUT) {
auto s = GetFirstUtf8Symbol(original_.data() + i);
if (kSpaceParts.count(s)) {
i += s.size();
} else if (i + 1 < len && original_[i] == '/' &&
original_[i + 1] == '*') {
comment_position = i;
state = State::IN_BLOCK_COMMENT;
i += 2;
} else if (i + 1 < len && original_[i] == '/' &&
original_[i + 1] == '/') {
comment_position = i;
state = State::IN_LINE_COMMENT;
i += 2;
} else {
break;
}
} else if (state == State::IN_LINE_COMMENT) {
if (original_[i] == '\n') {
state = State::OUT;
++i;
} else if (i + 1 < len && original_[i] == '\r' &&
original_[i + 1] == '\n') {
state = State::OUT;
i += 2;
} else if (original_[i] == '\r') {
break;
} else if (i + 1 == len) {
state = State::OUT;
++i;
} else {
++i;
}
} else if (state == State::IN_BLOCK_COMMENT) {
if (i + 1 < len && original_[i] == '*' && original_[i + 1] == '/') {
i += 2;
state = State::OUT;
} else {
++i;
}
}
}
if (state != State::OUT) return comment_position - start;
return i - start;
}
}

View File

@ -49,6 +49,23 @@ class StrippedQuery {
HashType hash() const { return hash_; }
private:
std::string GetFirstUtf8Symbol(const char *s) const;
// Return len of matched keyword if something is matched, otherwise 0.
int MatchKeyword(int start) const;
int MatchString(int start) const;
int MatchSpecial(int start) const;
int MatchDecimalInt(int start) const;
int MatchOctalInt(int start) const;
int MatchHexadecimalInt(int start) const;
int MatchReal(int start) const;
int MatchEscapedName(int start) const;
int MatchUnescapedName(int start) const;
int MatchWhitespaceAndComments(int start) const;
// Original query.
std::string original_;
// Stripped query.
std::string query_;

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,7 @@
#include "query/frontend/ast/ast.hpp"
#include "query/frontend/ast/cypher_main_visitor.hpp"
#include "query/frontend/opencypher/parser.hpp"
#include "query/frontend/stripped.hpp"
#include "query/typed_value.hpp"
namespace {
@ -83,11 +84,38 @@ class ClonedAstGenerator {
Query *query_;
};
// This generator strips ast, clones it and then plugs stripped out literals in
// the same way it is done in ast cacheing in interpreter.
class CachedAstGenerator {
public:
CachedAstGenerator(const std::string &query)
: dbms_(),
db_accessor_(dbms_.active()),
context_(Config{}, *db_accessor_),
query_string_(query),
storage_([&]() {
StrippedQuery stripped(query_string_);
::frontend::opencypher::Parser parser(stripped.query());
CypherMainVisitor visitor(context_);
visitor.visit(parser.tree());
CachedAst cached(std::move(visitor.storage()));
return cached.Plug(stripped.literals());
}()),
query_(storage_.query()) {}
Dbms dbms_;
std::unique_ptr<GraphDbAccessor> db_accessor_;
Context context_;
std::string query_string_;
AstTreeStorage storage_;
Query *query_;
};
template <typename T>
class CypherMainVisitorTest : public ::testing::Test {};
typedef ::testing::Types<AstGenerator, OriginalAfterCloningAstGenerator,
ClonedAstGenerator>
ClonedAstGenerator, CachedAstGenerator>
AstGeneratorTypes;
TYPED_TEST_CASE(CypherMainVisitorTest, AstGeneratorTypes);

View File

@ -10,6 +10,8 @@
using namespace query;
namespace {
void EXPECT_PROP_TRUE(const TypedValue& a) {
EXPECT_TRUE(a.type() == TypedValue::Type::Bool && a.Value<bool>());
}
@ -24,6 +26,15 @@ TEST(QueryStripper, NoLiterals) {
EXPECT_EQ(stripped.query(), "create ( n )");
}
TEST(QueryStripper, ZeroInteger) {
StrippedQuery stripped("RETURN 0");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_EQ(stripped.literals().At(0).first, 2);
EXPECT_EQ(stripped.literals().At(0).second.Value<int64_t>(), 0);
EXPECT_EQ(stripped.literals().AtTokenPosition(2).Value<int64_t>(), 0);
EXPECT_EQ(stripped.query(), "return " + kStrippedIntToken);
}
TEST(QueryStripper, DecimalInteger) {
StrippedQuery stripped("RETURN 42");
EXPECT_EQ(stripped.literals().size(), 1);
@ -61,6 +72,27 @@ TEST(QueryStripper, ExponentDecimal) {
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
}
TEST(QueryStripper, ExponentDecimal2) {
StrippedQuery stripped("RETURN 4e-2");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), 4e-2);
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
}
TEST(QueryStripper, ExponentDecimal3) {
StrippedQuery stripped("RETURN 0.1e-2");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), 0.1e-2);
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
}
TEST(QueryStripper, ExponentDecimal4) {
StrippedQuery stripped("RETURN .1e-2");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), .1e-2);
EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
}
TEST(QueryStripper, StringLiteral) {
StrippedQuery stripped("RETURN 'something'");
EXPECT_EQ(stripped.literals().size(), 1);
@ -68,13 +100,40 @@ TEST(QueryStripper, StringLiteral) {
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
}
TEST(QueryStripper, BoolLiteral) {
StrippedQuery stripped("RETURN true");
TEST(QueryStripper, StringLiteral2) {
StrippedQuery stripped("RETURN 'so\\'me'");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_EQ(stripped.literals().At(0).second.Value<std::string>(), "so'me");
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
}
TEST(QueryStripper, StringLiteral3) {
StrippedQuery stripped("RETURN \"so\\\"me'\"");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_EQ(stripped.literals().At(0).second.Value<std::string>(), "so\"me'");
EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
}
TEST(QueryStripper, TrueLiteral) {
StrippedQuery stripped("RETURN trUE");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(true));
EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken);
}
TEST(QueryStripper, FalseLiteral) {
StrippedQuery stripped("RETURN fAlse");
EXPECT_EQ(stripped.literals().size(), 1);
EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(false));
EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken);
}
TEST(QueryStripper, NullLiteral) {
StrippedQuery stripped("RETURN NuLl");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "return null");
}
TEST(QueryStripper, ListLiteral) {
StrippedQuery stripped("MATCH (n) RETURN [n, n.prop]");
EXPECT_EQ(stripped.literals().size(), 0);
@ -96,3 +155,71 @@ TEST(QueryStripper, RangeLiteral) {
" .. " + kStrippedIntToken +
" ] - ( ) return n");
}
TEST(QueryStripper, EscapedName) {
StrippedQuery stripped("MATCH (n:`mirko``slavko`)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : `mirko``slavko` )");
}
TEST(QueryStripper, UnescapedName) {
StrippedQuery stripped("MATCH (n:peropero)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
}
TEST(QueryStripper, UnescapedName2) {
StrippedQuery stripped(u8"MATCH (n:\uffd5\u04c2\u04c2pero\u0078pe)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), u8"match ( n : \uffd5\u04c2\u04c2pero\u0078pe )");
}
TEST(QueryStripper, MixedCaseKeyword) {
StrippedQuery stripped("MaTch (n:peropero)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
}
TEST(QueryStripper, BlockComment) {
StrippedQuery stripped("MaTch (n:/**fhf/gf\n\r\n//fjhf*/peropero)");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero )");
}
TEST(QueryStripper, LineComment1) {
StrippedQuery stripped("MaTch (n:peropero) // komentar\nreturn n");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
}
TEST(QueryStripper, LineComment2) {
StrippedQuery stripped("MaTch (n:peropero) // komentar\r\nreturn n");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
}
TEST(QueryStripper, LineComment3) {
StrippedQuery stripped("MaTch (n:peropero) return n // komentar");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
}
TEST(QueryStripper, LineComment4) {
StrippedQuery stripped("MaTch (n:peropero) return n // komentar\r");
EXPECT_EQ(stripped.literals().size(), 0);
// Didn't manage to parse comment because it ends with \r.
EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n / / komentar");
}
TEST(QueryStripper, Spaces) {
StrippedQuery stripped(u8"RETURN \r\n\u202f\t\u2007 NuLl");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "return null");
}
TEST(QueryStripper, OtherTokens) {
StrippedQuery stripped("++=...");
EXPECT_EQ(stripped.literals().size(), 0);
EXPECT_EQ(stripped.query(), "+ += .. .");
}
}