Implementation of new lexer

Summary: WORK IN PROGRESS Reviewers: buda, teon.banek, dgleich Reviewed By: dgleich Subscribers: dgleich, pullbot Differential Revision: https://phabricator.memgraph.io/D496
2017-06-21 16:10:52 +02:00 · 2017-06-21 16:10:52 +02:00 · 3a922de963
commit 3a922de963
parent e86d73eb98
8 changed files with 17017 additions and 99 deletions
--- a/src/query/exceptions.hpp
+++ b/src/query/exceptions.hpp
@ -12,6 +12,12 @@ class QueryException : public utils::BasicException {
  using utils::BasicException::BasicException;
 };

+class LexingException : public QueryException {
+ public:
+  using QueryException::QueryException;
+  LexingException() : QueryException("") {}
+};
+
 class SyntaxException : public QueryException {
 public:
  using QueryException::QueryException;
--- a/src/query/frontend/ast/ast.hpp
+++ b/src/query/frontend/ast/ast.hpp
@ -1408,10 +1408,12 @@ class CachedAst {
    bool Visit(PrimitiveLiteral &literal) override {
      // TODO: If literal is a part of NamedExpression then we need to change
      // text in NamedExpression, otherwise wrong header will be returned.
-      permanent_assert(
-          literal.token_position_ != -1,
-          "Use AstPlugLiteralsVisitor only on ast created by parsing queries");
-      literal.value_ = parameters_.AtTokenPosition(literal.token_position_);
+      if (!literal.value_.IsNull()) {
+        permanent_assert(literal.token_position_ != -1,
+                         "Use AstPlugLiteralsVisitor only on ast created by "
+                         "parsing queries");
+        literal.value_ = parameters_.AtTokenPosition(literal.token_position_);
+      }
      return true;
    }

--- a/src/query/frontend/opencypher/grammar/Cypher.g4
+++ b/src/query/frontend/opencypher/grammar/Cypher.g4
@ -83,7 +83,7 @@ returnItem : ( expression SP AS SP variable )
           | expression
           ;

-order : ORDER SP BY SP sortItem ( ',' SP? sortItem )* ;
+order : ORDER SP BY SP sortItem ( SP? ',' SP? sortItem )* ;

 skip : L_SKIP SP expression ;

@ -102,7 +102,7 @@ patternPart : ( variable SP? '=' SP? anonymousPatternPart )
 anonymousPatternPart : patternElement ;

 patternElement : ( nodePattern ( SP? patternElementChain )* )
-               | ( '(' patternElement ')' )
+               | ( '(' SP? patternElement SP? ')' )
               ;

 nodePattern : '(' SP? ( variable SP? )? ( nodeLabels SP? )? ( properties SP? )? ')' ;
--- a/src/query/frontend/stripped.cpp
+++ b/src/query/frontend/stripped.cpp
@ -1,35 +1,64 @@
 #include "query/frontend/stripped.hpp"

+#include <cctype>
+#include <cstdint>
 #include <iostream>
 #include <string>
 #include <vector>

-#include "antlr4-runtime.h"
 #include "query/common.hpp"
+#include "query/exceptions.hpp"
 #include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
 #include "query/frontend/opencypher/generated/CypherLexer.h"
 #include "query/frontend/opencypher/generated/CypherParser.h"
+#include "query/frontend/stripped_lexer_constants.hpp"
 #include "utils/assert.hpp"
 #include "utils/hashing/fnv.hpp"
 #include "utils/string.hpp"

-using namespace antlropencypher;
-using namespace antlr4;
-
 namespace query {

-StrippedQuery::StrippedQuery(const std::string &query) {
-  // Tokenize the query.
-  ANTLRInputStream input(query);
-  CypherLexer lexer(&input);
-  CommonTokenStream token_stream(&lexer);
-  token_stream.fill();
-  auto tokens = token_stream.getTokens();
+using namespace lexer_constants;
+
+StrippedQuery::StrippedQuery(const std::string &query) : original_(query) {
+  enum class Token {
+    UNMATCHED,
+    KEYWORD,  // Including true, false and null.
+    SPECIAL,  // +, .., +=, (, { and so on.
+    STRING,
+    INT,  // Decimal, octal and hexadecimal.
+    REAL,
+    ESCAPED_NAME,
+    UNESCAPED_NAME,
+    SPACE
+  };
+
+  std::vector<std::pair<Token, std::string>> tokens;
+  for (int i = 0; i < static_cast<int>(original_.size());) {
+    Token token = Token::UNMATCHED;
+    int len = 0;
+    auto update = [&](int new_len, Token new_token) {
+      if (new_len > len) {
+        len = new_len;
+        token = new_token;
+      }
+    };
+    update(MatchKeyword(i), Token::KEYWORD);
+    update(MatchSpecial(i), Token::SPECIAL);
+    update(MatchString(i), Token::STRING);
+    update(MatchDecimalInt(i), Token::INT);
+    update(MatchOctalInt(i), Token::INT);
+    update(MatchHexadecimalInt(i), Token::INT);
+    update(MatchReal(i), Token::REAL);
+    update(MatchEscapedName(i), Token::ESCAPED_NAME);
+    update(MatchUnescapedName(i), Token::UNESCAPED_NAME);
+    update(MatchWhitespaceAndComments(i), Token::SPACE);
+    if (token == Token::UNMATCHED) throw LexingException("Invalid query");
+    tokens.emplace_back(token, original_.substr(i, len));
+    i += len;
+  }

-  // Initialize data structures we return.
  std::vector<std::string> token_strings;
-  token_strings.reserve(tokens.size());
-
  // A helper function that stores literal and its token position in a
  // literals_. In stripped query text literal is replaced with a new_value.
  // new_value can be any value that is lexed as a literal.
@ -40,88 +69,44 @@ StrippedQuery::StrippedQuery(const std::string &query) {
  };

  // Convert tokens to strings, perform lowercasing and filtering.
-  for (const auto *token : tokens) {
+  for (const auto &token : tokens) {
    // Position is calculated in query after stripping and whitespace
-    // normalisation, not before.
-    int position = token_strings.size() * 2;
-
-    switch (token->getType()) {
-      case CypherLexer::UNION:
-      case CypherLexer::ALL:
-      case CypherLexer::OPTIONAL:
-      case CypherLexer::MATCH:
-      case CypherLexer::UNWIND:
-      case CypherLexer::AS:
-      case CypherLexer::MERGE:
-      case CypherLexer::ON:
-      case CypherLexer::CREATE:
-      case CypherLexer::SET:
-      case CypherLexer::DETACH:
-      case CypherLexer::DELETE:
-      case CypherLexer::REMOVE:
-      case CypherLexer::WITH:
-      case CypherLexer::DISTINCT:
-      case CypherLexer::RETURN:
-      case CypherLexer::ORDER:
-      case CypherLexer::BY:
-      case CypherLexer::L_SKIP:
-      case CypherLexer::LIMIT:
-      case CypherLexer::ASCENDING:
-      case CypherLexer::ASC:
-      case CypherLexer::DESCENDING:
-      case CypherLexer::DESC:
-      case CypherLexer::WHERE:
-      case CypherLexer::OR:
-      case CypherLexer::XOR:
-      case CypherLexer::AND:
-      case CypherLexer::NOT:
-      case CypherLexer::IN:
-      case CypherLexer::STARTS:
-      case CypherLexer::ENDS:
-      case CypherLexer::CONTAINS:
-      case CypherLexer::IS:
-      // We don't strip NULL, since it can appear in special expressions like IS
-      // NULL and IS NOT NULL.
-      case CypherLexer::CYPHERNULL:
-      case CypherLexer::COUNT:
-      case CypherLexer::FILTER:
-      case CypherLexer::EXTRACT:
-      case CypherLexer::ANY:
-      case CypherLexer::NONE:
-      case CypherLexer::SINGLE:
-        token_strings.push_back(utils::ToLowerCase(token->getText()));
+    // normalisation, not before. There will be twice as much tokens before this
+    // one because space tokens will be inserted between every one.
+    int token_index = token_strings.size() * 2;
+    switch (token.first) {
+      case Token::UNMATCHED:
+        debug_assert(false, "Shouldn't happen");
+      case Token::KEYWORD: {
+        auto s = utils::ToLowerCase(token.second);
+        // We don't strip NULL, since it can appear in special expressions like
+        // IS NULL and IS NOT NULL, but we strip true and false keywords.
+        if (s == "true") {
+          replace_stripped(token_index, true, kStrippedBooleanToken);
+        } else if (s == "false") {
+          replace_stripped(token_index, false, kStrippedBooleanToken);
+        } else {
+          token_strings.push_back(s);
+        }
+      } break;
+      case Token::SPACE:
        break;
-
-      case CypherLexer::SP:
-      case Token::EOF:
-        break;
-
-      case CypherLexer::DecimalInteger:
-      case CypherLexer::HexInteger:
-      case CypherLexer::OctalInteger:
-        replace_stripped(position, ParseIntegerLiteral(token->getText()),
-                         kStrippedIntToken);
-        break;
-
-      case CypherLexer::StringLiteral:
-        replace_stripped(position, ParseStringLiteral(token->getText()),
+      case Token::STRING:
+        replace_stripped(token_index, ParseStringLiteral(token.second),
                         kStrippedStringToken);
        break;
-
-      case CypherLexer::RegularDecimalReal:
-      case CypherLexer::ExponentDecimalReal:
-        replace_stripped(position, ParseDoubleLiteral(token->getText()),
+      case Token::INT:
+        replace_stripped(token_index, ParseIntegerLiteral(token.second),
+                         kStrippedIntToken);
+        break;
+      case Token::REAL:
+        replace_stripped(token_index, ParseDoubleLiteral(token.second),
                         kStrippedDoubleToken);
        break;
-      case CypherLexer::TRUE:
-        replace_stripped(position, true, kStrippedBooleanToken);
-        break;
-      case CypherLexer::FALSE:
-        replace_stripped(position, false, kStrippedBooleanToken);
-        break;
-
-      default:
-        token_strings.push_back(token->getText());
+      case Token::SPECIAL:
+      case Token::ESCAPED_NAME:
+      case Token::UNESCAPED_NAME:
+        token_strings.push_back(token.second);
        break;
    }
  }
@ -129,4 +114,245 @@ StrippedQuery::StrippedQuery(const std::string &query) {
  query_ = utils::Join(token_strings, " ");
  hash_ = fnv(query_);
 }
+
+std::string StrippedQuery::GetFirstUtf8Symbol(const char *_s) const {
+  // According to
+  // https://stackoverflow.com/questions/16260033/reinterpret-cast-between-char-and-stduint8-t-safe
+  // this checks if casting from const char * to uint8_t is undefined behaviour.
+  static_assert(
+      std::is_same<std::uint8_t, unsigned char>::value,
+      "This library requires std::uint8_t to be implemented as unsigned char.");
+  const uint8_t *s = reinterpret_cast<const uint8_t *>(_s);
+  if ((*s >> 7) == 0x00) return std::string(_s, _s + 1);
+  if ((*s >> 5) == 0x06) {
+    auto *s1 = s + 1;
+    if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
+    return std::string(_s, _s + 2);
+  }
+  if ((*s >> 4) == 0x0e) {
+    auto *s1 = s + 1;
+    if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
+    auto *s2 = s + 2;
+    if ((*s2 >> 6) != 0x02) throw LexingException("Invalid character");
+    return std::string(_s, _s + 3);
+  }
+  if ((*s >> 3) == 0x1e) {
+    auto *s1 = s + 1;
+    if ((*s1 >> 6) != 0x02) throw LexingException("Invalid character");
+    auto *s2 = s + 2;
+    if ((*s2 >> 6) != 0x02) throw LexingException("Invalid character");
+    auto *s3 = s + 3;
+    if ((*s3 >> 6) != 0x02) throw LexingException("Invalid character");
+    return std::string(_s, _s + 4);
+  }
+  throw LexingException("Invalid character");
+}
+
+// From here until end of file there are functions that calculate matches for
+// every possible token. Functions are more or less compatible with Cypher.g4
+// grammar. Unfortunately, they contain a lof of special cases and shouldn't be
+// changed without good reasons.
+//
+// Here be dragons, do not touch!
+//           ____ __
+//          { --.\  |          .)%%%)%%
+//           '-._\\ | (\___   %)%%(%%(%%%
+//               `\\|{/ ^ _)-%(%%%%)%%;%%%
+//           .'^^^^^^^  /`    %%)%%%%)%%%'
+//          //\   ) ,  /       '%%%%(%%'
+//    ,  _.'/  `\<-- \<
+//     `^^^`     ^^   ^^
+int StrippedQuery::MatchKeyword(int start) const {
+  int match = 0;
+  for (const auto &s : kKeywords) {
+    int len = s.size();
+    if (len < match) continue;
+    if (start + len > static_cast<int>(original_.size())) continue;
+    int i = 0;
+    while (i < len && s[i] == tolower(original_[start + i])) {
+      ++i;
+    }
+    if (i == len) {
+      match = len;
+    }
+  }
+  return match;
+}
+
+int StrippedQuery::MatchSpecial(int start) const {
+  int match = 0;
+  for (const auto &s : kSpecialTokens) {
+    if (!original_.compare(start, s.size(), s)) {
+      match = std::max(match, static_cast<int>(s.size()));
+    }
+  }
+  return match;
+}
+
+int StrippedQuery::MatchString(int start) const {
+  if (original_[start] != '"' && original_[start] != '\'') return 0;
+  char start_char = original_[start];
+  bool escaped = false;
+  for (auto *p = original_.data() + start + 1; *p; ++p) {
+    if (escaped) {
+      escaped = false;
+    } else if (!escaped) {
+      if (*p == start_char) return p - (original_.data() + start) + 1;
+      if (*p == '\\') {
+        escaped = true;
+      }
+    }
+  }
+  return 0;
+}
+
+int StrippedQuery::MatchDecimalInt(int start) const {
+  if (original_[start] == '0') return 1;
+  int i = start;
+  while (i < static_cast<int>(original_.size()) && '0' <= original_[i] &&
+         original_[i] <= '9') {
+    ++i;
+  }
+  return i - start;
+}
+
+int StrippedQuery::MatchOctalInt(int start) const {
+  if (original_[start] != '0') return 0;
+  int i = start + 1;
+  while (i < static_cast<int>(original_.size()) && '0' <= original_[i] &&
+         original_[i] <= '7') {
+    ++i;
+  }
+  if (i == start + 1) return 0;
+  return i - start;
+}
+
+int StrippedQuery::MatchHexadecimalInt(int start) const {
+  if (original_[start] != '0') return 0;
+  if (start + 1 >= static_cast<int>(original_.size())) return 0;
+  if (original_[start + 1] != 'x') return 0;
+  int i = start + 2;
+  while (i < static_cast<int>(original_.size()) &&
+         (('0' <= original_[i] && original_[i] <= '9') ||
+          ('a' <= original_[i] && original_[i] <= 'f') ||
+          ('A' <= original_[i] && original_[i] <= 'F'))) {
+    ++i;
+  }
+  if (i == start + 2) return 0;
+  return i - start;
+}
+
+int StrippedQuery::MatchReal(int start) const {
+  enum class State { BEFORE_DOT, DOT, AFTER_DOT, E, E_MINUS, AFTER_E };
+  State state = State::BEFORE_DOT;
+  auto i = start;
+  while (i < static_cast<int>(original_.size())) {
+    if (original_[i] == '.') {
+      if (state != State::BEFORE_DOT) break;
+      state = State::DOT;
+    } else if ('0' <= original_[i] && original_[i] <= '9') {
+      if (state == State::DOT) {
+        state = State::AFTER_DOT;
+      } else if (state == State::E || state == State::E_MINUS) {
+        state = State::AFTER_E;
+      }
+    } else if (original_[i] == 'e' || original_[i] == 'E') {
+      if (state != State::BEFORE_DOT && state != State::AFTER_DOT) break;
+      state = State::E;
+    } else if (original_[i] == '-') {
+      if (state != State::E) break;
+      state = State::E_MINUS;
+    } else {
+      break;
+    }
+    ++i;
+  }
+  if (state == State::DOT) --i;
+  if (state == State::E) --i;
+  if (state == State::E_MINUS) i -= 2;
+  return i - start;
+}
+
+int StrippedQuery::MatchEscapedName(int start) const {
+  int len = original_.size();
+  int i = start;
+  while (i < len) {
+    if (original_[i] != '`') break;
+    int j = i + 1;
+    while (j < len && original_[j] != '`') {
+      ++j;
+    }
+    if (j == len) break;
+    i = j + 1;
+  }
+  return i - start;
+}
+
+int StrippedQuery::MatchUnescapedName(int start) const {
+  auto i = start;
+  auto s = GetFirstUtf8Symbol(original_.data() + i);
+  if (!kUnescapedNameAllowedStarts.count(s)) return 0;
+  i += s.size();
+  while (i < static_cast<int>(original_.size())) {
+    s = GetFirstUtf8Symbol(original_.data() + i);
+    if (!kUnescapedNameAllowedParts.count(s)) break;
+    i += s.size();
+  }
+  return i - start;
+}
+
+int StrippedQuery::MatchWhitespaceAndComments(int start) const {
+  enum class State { OUT, IN_LINE_COMMENT, IN_BLOCK_COMMENT };
+  State state = State::OUT;
+  int i = start;
+  int len = original_.size();
+  // We need to remember at which position comment started because if we faile
+  // to match comment finish we have a match until comment start position.
+  int comment_position = -1;
+  while (i < len) {
+    if (state == State::OUT) {
+      auto s = GetFirstUtf8Symbol(original_.data() + i);
+      if (kSpaceParts.count(s)) {
+        i += s.size();
+      } else if (i + 1 < len && original_[i] == '/' &&
+                 original_[i + 1] == '*') {
+        comment_position = i;
+        state = State::IN_BLOCK_COMMENT;
+        i += 2;
+      } else if (i + 1 < len && original_[i] == '/' &&
+                 original_[i + 1] == '/') {
+        comment_position = i;
+        state = State::IN_LINE_COMMENT;
+        i += 2;
+      } else {
+        break;
+      }
+    } else if (state == State::IN_LINE_COMMENT) {
+      if (original_[i] == '\n') {
+        state = State::OUT;
+        ++i;
+      } else if (i + 1 < len && original_[i] == '\r' &&
+                 original_[i + 1] == '\n') {
+        state = State::OUT;
+        i += 2;
+      } else if (original_[i] == '\r') {
+        break;
+      } else if (i + 1 == len) {
+        state = State::OUT;
+        ++i;
+      } else {
+        ++i;
+      }
+    } else if (state == State::IN_BLOCK_COMMENT) {
+      if (i + 1 < len && original_[i] == '*' && original_[i + 1] == '/') {
+        i += 2;
+        state = State::OUT;
+      } else {
+        ++i;
+      }
+    }
+  }
+  if (state != State::OUT) return comment_position - start;
+  return i - start;
+}
 }
--- a/src/query/frontend/stripped.hpp
+++ b/src/query/frontend/stripped.hpp
@ -49,6 +49,23 @@ class StrippedQuery {
  HashType hash() const { return hash_; }

 private:
+  std::string GetFirstUtf8Symbol(const char *s) const;
+
+  // Return len of matched keyword if something is matched, otherwise 0.
+  int MatchKeyword(int start) const;
+  int MatchString(int start) const;
+  int MatchSpecial(int start) const;
+  int MatchDecimalInt(int start) const;
+  int MatchOctalInt(int start) const;
+  int MatchHexadecimalInt(int start) const;
+  int MatchReal(int start) const;
+  int MatchEscapedName(int start) const;
+  int MatchUnescapedName(int start) const;
+  int MatchWhitespaceAndComments(int start) const;
+
+  // Original query.
+  std::string original_;
+
  // Stripped query.
  std::string query_;

--- a/src/query/frontend/stripped_lexer_constants.hpp
+++ b/src/query/frontend/stripped_lexer_constants.hpp
--- a/tests/unit/cypher_main_visitor.cpp
+++ b/tests/unit/cypher_main_visitor.cpp
@ -12,6 +12,7 @@
 #include "query/frontend/ast/ast.hpp"
 #include "query/frontend/ast/cypher_main_visitor.hpp"
 #include "query/frontend/opencypher/parser.hpp"
+#include "query/frontend/stripped.hpp"
 #include "query/typed_value.hpp"

 namespace {
@ -83,11 +84,38 @@ class ClonedAstGenerator {
  Query *query_;
 };

+// This generator strips ast, clones it and then plugs stripped out literals in
+// the same way it is done in ast cacheing in interpreter.
+class CachedAstGenerator {
+ public:
+  CachedAstGenerator(const std::string &query)
+      : dbms_(),
+        db_accessor_(dbms_.active()),
+        context_(Config{}, *db_accessor_),
+        query_string_(query),
+        storage_([&]() {
+          StrippedQuery stripped(query_string_);
+          ::frontend::opencypher::Parser parser(stripped.query());
+          CypherMainVisitor visitor(context_);
+          visitor.visit(parser.tree());
+          CachedAst cached(std::move(visitor.storage()));
+          return cached.Plug(stripped.literals());
+        }()),
+        query_(storage_.query()) {}
+
+  Dbms dbms_;
+  std::unique_ptr<GraphDbAccessor> db_accessor_;
+  Context context_;
+  std::string query_string_;
+  AstTreeStorage storage_;
+  Query *query_;
+};
+
 template <typename T>
 class CypherMainVisitorTest : public ::testing::Test {};

 typedef ::testing::Types<AstGenerator, OriginalAfterCloningAstGenerator,
-                         ClonedAstGenerator>
+                         ClonedAstGenerator, CachedAstGenerator>
    AstGeneratorTypes;
 TYPED_TEST_CASE(CypherMainVisitorTest, AstGeneratorTypes);

--- a/tests/unit/stripped.cpp
+++ b/tests/unit/stripped.cpp
@ -10,6 +10,8 @@

 using namespace query;

+namespace {
+
 void EXPECT_PROP_TRUE(const TypedValue& a) {
  EXPECT_TRUE(a.type() == TypedValue::Type::Bool && a.Value<bool>());
 }
@ -24,6 +26,15 @@ TEST(QueryStripper, NoLiterals) {
  EXPECT_EQ(stripped.query(), "create ( n )");
 }

+TEST(QueryStripper, ZeroInteger) {
+  StrippedQuery stripped("RETURN 0");
+  EXPECT_EQ(stripped.literals().size(), 1);
+  EXPECT_EQ(stripped.literals().At(0).first, 2);
+  EXPECT_EQ(stripped.literals().At(0).second.Value<int64_t>(), 0);
+  EXPECT_EQ(stripped.literals().AtTokenPosition(2).Value<int64_t>(), 0);
+  EXPECT_EQ(stripped.query(), "return " + kStrippedIntToken);
+}
+
 TEST(QueryStripper, DecimalInteger) {
  StrippedQuery stripped("RETURN 42");
  EXPECT_EQ(stripped.literals().size(), 1);
@ -61,6 +72,27 @@ TEST(QueryStripper, ExponentDecimal) {
  EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
 }

+TEST(QueryStripper, ExponentDecimal2) {
+  StrippedQuery stripped("RETURN 4e-2");
+  EXPECT_EQ(stripped.literals().size(), 1);
+  EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), 4e-2);
+  EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
+}
+
+TEST(QueryStripper, ExponentDecimal3) {
+  StrippedQuery stripped("RETURN 0.1e-2");
+  EXPECT_EQ(stripped.literals().size(), 1);
+  EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), 0.1e-2);
+  EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
+}
+
+TEST(QueryStripper, ExponentDecimal4) {
+  StrippedQuery stripped("RETURN .1e-2");
+  EXPECT_EQ(stripped.literals().size(), 1);
+  EXPECT_FLOAT_EQ(stripped.literals().At(0).second.Value<double>(), .1e-2);
+  EXPECT_EQ(stripped.query(), "return " + kStrippedDoubleToken);
+}
+
 TEST(QueryStripper, StringLiteral) {
  StrippedQuery stripped("RETURN 'something'");
  EXPECT_EQ(stripped.literals().size(), 1);
@ -68,13 +100,40 @@ TEST(QueryStripper, StringLiteral) {
  EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
 }

-TEST(QueryStripper, BoolLiteral) {
-  StrippedQuery stripped("RETURN true");
+TEST(QueryStripper, StringLiteral2) {
+  StrippedQuery stripped("RETURN 'so\\'me'");
+  EXPECT_EQ(stripped.literals().size(), 1);
+  EXPECT_EQ(stripped.literals().At(0).second.Value<std::string>(), "so'me");
+  EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
+}
+
+TEST(QueryStripper, StringLiteral3) {
+  StrippedQuery stripped("RETURN \"so\\\"me'\"");
+  EXPECT_EQ(stripped.literals().size(), 1);
+  EXPECT_EQ(stripped.literals().At(0).second.Value<std::string>(), "so\"me'");
+  EXPECT_EQ(stripped.query(), "return " + kStrippedStringToken);
+}
+
+TEST(QueryStripper, TrueLiteral) {
+  StrippedQuery stripped("RETURN trUE");
  EXPECT_EQ(stripped.literals().size(), 1);
  EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(true));
  EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken);
 }

+TEST(QueryStripper, FalseLiteral) {
+  StrippedQuery stripped("RETURN fAlse");
+  EXPECT_EQ(stripped.literals().size(), 1);
+  EXPECT_PROP_EQ(stripped.literals().At(0).second, TypedValue(false));
+  EXPECT_EQ(stripped.query(), "return " + kStrippedBooleanToken);
+}
+
+TEST(QueryStripper, NullLiteral) {
+  StrippedQuery stripped("RETURN NuLl");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "return null");
+}
+
 TEST(QueryStripper, ListLiteral) {
  StrippedQuery stripped("MATCH (n) RETURN [n, n.prop]");
  EXPECT_EQ(stripped.literals().size(), 0);
@ -96,3 +155,71 @@ TEST(QueryStripper, RangeLiteral) {
                                  " .. " + kStrippedIntToken +
                                  " ] - ( ) return n");
 }
+
+TEST(QueryStripper, EscapedName) {
+  StrippedQuery stripped("MATCH (n:`mirko``slavko`)");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "match ( n : `mirko``slavko` )");
+}
+
+TEST(QueryStripper, UnescapedName) {
+  StrippedQuery stripped("MATCH (n:peropero)");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "match ( n : peropero )");
+}
+
+TEST(QueryStripper, UnescapedName2) {
+  StrippedQuery stripped(u8"MATCH (n:\uffd5\u04c2\u04c2pero\u0078pe)");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), u8"match ( n : \uffd5\u04c2\u04c2pero\u0078pe )");
+}
+
+TEST(QueryStripper, MixedCaseKeyword) {
+  StrippedQuery stripped("MaTch (n:peropero)");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "match ( n : peropero )");
+}
+
+TEST(QueryStripper, BlockComment) {
+  StrippedQuery stripped("MaTch (n:/**fhf/gf\n\r\n//fjhf*/peropero)");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "match ( n : peropero )");
+}
+
+TEST(QueryStripper, LineComment1) {
+  StrippedQuery stripped("MaTch (n:peropero) // komentar\nreturn n");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
+}
+
+TEST(QueryStripper, LineComment2) {
+  StrippedQuery stripped("MaTch (n:peropero) // komentar\r\nreturn n");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
+}
+
+TEST(QueryStripper, LineComment3) {
+  StrippedQuery stripped("MaTch (n:peropero) return n // komentar");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n");
+}
+
+TEST(QueryStripper, LineComment4) {
+  StrippedQuery stripped("MaTch (n:peropero) return n // komentar\r");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  // Didn't manage to parse comment because it ends with \r.
+  EXPECT_EQ(stripped.query(), "match ( n : peropero ) return n / / komentar");
+}
+
+TEST(QueryStripper, Spaces) {
+  StrippedQuery stripped(u8"RETURN \r\n\u202f\t\u2007  NuLl");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "return null");
+}
+
+TEST(QueryStripper, OtherTokens) {
+  StrippedQuery stripped("++=...");
+  EXPECT_EQ(stripped.literals().size(), 0);
+  EXPECT_EQ(stripped.query(), "+ += .. .");
+}
+}