Move Parsing utils from query/common to frontend/parsing

Reviewers: mtomic Reviewed By: mtomic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2195
2019-07-10 14:30:11 +02:00 · 2019-07-10 14:30:11 +02:00 · c4c6febbc4
commit c4c6febbc4
parent 8414479abe
14 changed files with 223 additions and 203 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -50,6 +50,7 @@ set(mg_single_node_sources
    query/common.cpp
    query/frontend/ast/cypher_main_visitor.cpp
    query/frontend/ast/pretty_print.cpp
+    query/frontend/parsing.cpp
    query/frontend/semantic/required_privileges.cpp
    query/frontend/semantic/symbol_generator.cpp
    query/frontend/stripped.cpp
@ -65,7 +66,7 @@ set(mg_single_node_sources
    query/repl.cpp
    query/typed_value.cpp
    storage/common/constraints/record.cpp
-		storage/common/constraints/unique_constraints.cpp
+    storage/common/constraints/unique_constraints.cpp
    storage/common/locking/record_lock.cpp
    storage/common/types/property_value.cpp
    storage/common/types/property_value_store.cpp
@ -159,6 +160,7 @@ set(mg_distributed_sources
    query/common.cpp
    query/frontend/ast/pretty_print.cpp
    query/frontend/ast/cypher_main_visitor.cpp
+    query/frontend/parsing.cpp
    query/frontend/semantic/required_privileges.cpp
    query/frontend/semantic/symbol_generator.cpp
    query/frontend/stripped.cpp
@ -258,6 +260,7 @@ set(mg_single_node_ha_sources
    query/common.cpp
    query/frontend/ast/cypher_main_visitor.cpp
    query/frontend/ast/pretty_print.cpp
+    query/frontend/parsing.cpp
    query/frontend/semantic/required_privileges.cpp
    query/frontend/semantic/symbol_generator.cpp
    query/frontend/stripped.cpp
@ -273,7 +276,7 @@ set(mg_single_node_ha_sources
    query/repl.cpp
    query/typed_value.cpp
    storage/common/constraints/record.cpp
-		storage/common/constraints/unique_constraints.cpp
+    storage/common/constraints/unique_constraints.cpp
    storage/common/types/property_value.cpp
    storage/common/types/slk.cpp
    storage/common/types/property_value_store.cpp
--- a/src/query/common.cpp
+++ b/src/query/common.cpp
@ -1,186 +1,7 @@
 #include "query/common.hpp"

-#include <cctype>
-#include <codecvt>
-#include <locale>
-#include <stdexcept>
-
-#include "glog/logging.h"
-
-#include "query/exceptions.hpp"
-#include "utils/string.hpp"
-
 namespace query {

-int64_t ParseIntegerLiteral(const std::string &s) {
-  try {
-    // Not really correct since long long can have a bigger range than int64_t.
-    return static_cast<int64_t>(std::stoll(s, 0, 0));
-  } catch (const std::out_of_range &) {
-    throw SemanticException("Integer literal exceeds 64 bits.");
-  }
-}
-
-std::string ParseStringLiteral(const std::string &s) {
-  // These functions is declared as lambda since its semantics is highly
-  // specific for this conxtext and shouldn't be used elsewhere.
-  auto EncodeEscapedUnicodeCodepointUtf32 = [](const std::string &s, int &i) {
-    const int kLongUnicodeLength = 8;
-    int j = i + 1;
-    while (j < static_cast<int>(s.size()) - 1 &&
-           j < i + kLongUnicodeLength + 1 && isxdigit(s[j])) {
-      ++j;
-    }
-    if (j - i == kLongUnicodeLength + 1) {
-      char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16);
-      i += kLongUnicodeLength;
-      std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
-      return converter.to_bytes(t);
-    }
-    throw SyntaxException(
-        "Expected 8 hex digits as unicode codepoint started with \\U. "
-        "Use \\u for 4 hex digits format.");
-  };
-  auto EncodeEscapedUnicodeCodepointUtf16 = [](const std::string &s, int &i) {
-    const int kShortUnicodeLength = 4;
-    int j = i + 1;
-    while (j < static_cast<int>(s.size()) - 1 &&
-           j < i + kShortUnicodeLength + 1 && isxdigit(s[j])) {
-      ++j;
-    }
-    if (j - i >= kShortUnicodeLength + 1) {
-      char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16);
-      if (t >= 0xD800 && t <= 0xDBFF) {
-        // t is high surrogate pair. Expect one more utf16 codepoint.
-        j = i + kShortUnicodeLength + 1;
-        if (j >= static_cast<int>(s.size()) - 1 || s[j] != '\\') {
-          throw SemanticException("Invalid UTF codepoint.");
-        }
-        ++j;
-        if (j >= static_cast<int>(s.size()) - 1 ||
-            (s[j] != 'u' && s[j] != 'U')) {
-          throw SemanticException("Invalid UTF codepoint.");
-        }
-        ++j;
-        int k = j;
-        while (k < static_cast<int>(s.size()) - 1 &&
-               k < j + kShortUnicodeLength && isxdigit(s[k])) {
-          ++k;
-        }
-        if (k != j + kShortUnicodeLength) {
-          throw SemanticException("Invalid UTF codepoint.");
-        }
-        char16_t surrogates[3] = {t,
-                                  static_cast<char16_t>(stoi(
-                                      s.substr(j, kShortUnicodeLength), 0, 16)),
-                                  0};
-        i += kShortUnicodeLength + 2 + kShortUnicodeLength;
-        std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
-            converter;
-        return converter.to_bytes(surrogates);
-      } else {
-        i += kShortUnicodeLength;
-        std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
-            converter;
-        return converter.to_bytes(t);
-      }
-    }
-    throw SyntaxException(
-        "Expected 4 hex digits as unicode codepoint started with \\u. "
-        "Use \\U for 8 hex digits format.");
-  };
-
-  std::string unescaped;
-  bool escape = false;
-
-  // First and last char is quote, we don't need to look at them.
-  for (int i = 1; i < static_cast<int>(s.size()) - 1; ++i) {
-    if (escape) {
-      switch (s[i]) {
-        case '\\':
-          unescaped += '\\';
-          break;
-        case '\'':
-          unescaped += '\'';
-          break;
-        case '"':
-          unescaped += '"';
-          break;
-        case 'B':
-        case 'b':
-          unescaped += '\b';
-          break;
-        case 'F':
-        case 'f':
-          unescaped += '\f';
-          break;
-        case 'N':
-        case 'n':
-          unescaped += '\n';
-          break;
-        case 'R':
-        case 'r':
-          unescaped += '\r';
-          break;
-        case 'T':
-        case 't':
-          unescaped += '\t';
-          break;
-        case 'U':
-          try {
-            unescaped += EncodeEscapedUnicodeCodepointUtf32(s, i);
-          } catch (const std::range_error &) {
-            throw SemanticException("Invalid UTF codepoint.");
-          }
-          break;
-        case 'u':
-          try {
-            unescaped += EncodeEscapedUnicodeCodepointUtf16(s, i);
-          } catch (const std::range_error &) {
-            throw SemanticException("Invalid UTF codepoint.");
-          }
-          break;
-        default:
-          // This should never happen, except grammar changes and we don't
-          // notice change in this production.
-          DLOG(FATAL) << "can't happen";
-          throw std::exception();
-      }
-      escape = false;
-    } else if (s[i] == '\\') {
-      escape = true;
-    } else {
-      unescaped += s[i];
-    }
-  }
-  return unescaped;
-}
-
-double ParseDoubleLiteral(const std::string &s) {
-  try {
-    return utils::ParseDouble(s);
-  } catch (const utils::BasicException &) {
-    throw SemanticException("Couldn't parse string to double.");
-  }
-}
-
-std::string ParseParameter(const std::string &s) {
-  DCHECK(s[0] == '$') << "Invalid string passed as parameter name";
-  if (s[1] != '`') return s.substr(1);
-  // If parameter name is escaped symbolic name then symbolic name should be
-  // unescaped and leading and trailing backquote should be removed.
-  DCHECK(s.size() > 3U && s.back() == '`')
-      << "Invalid string passed as parameter name";
-  std::string out;
-  for (int i = 2; i < static_cast<int>(s.size()) - 1; ++i) {
-    if (s[i] == '`') {
-      ++i;
-    }
-    out.push_back(s[i]);
-  }
-  return out;
-}
-
 void ReconstructTypedValue(TypedValue &value) {
  using Type = TypedValue::Type;
  switch (value.type()) {
--- a/src/query/common.hpp
+++ b/src/query/common.hpp
@ -15,13 +15,6 @@

 namespace query {

-// These are the functions for parsing literals and parameter names from
-// opencypher query.
-int64_t ParseIntegerLiteral(const std::string &s);
-std::string ParseStringLiteral(const std::string &s);
-double ParseDoubleLiteral(const std::string &s);
-std::string ParseParameter(const std::string &s);
-
 /// Indicates that some part of query execution should see the OLD graph state
 /// (the latest state before the current transaction+command), or NEW (state as
 /// changed by the current transaction+command).
--- a/src/query/frontend/ast/cypher_main_visitor.cpp
+++ b/src/query/frontend/ast/cypher_main_visitor.cpp
@ -14,8 +14,8 @@

 #include <glog/logging.h>

-#include "query/common.hpp"
 #include "query/exceptions.hpp"
+#include "query/frontend/parsing.hpp"
 #include "query/interpret/awesome_memgraph_functions.hpp"
 #include "utils/exceptions.hpp"
 #include "utils/string.hpp"
--- a/src/query/frontend/parsing.cpp
+++ b/src/query/frontend/parsing.cpp
@ -0,0 +1,184 @@
+#include "query/frontend/parsing.hpp"
+
+#include <cctype>
+#include <codecvt>
+#include <locale>
+#include <stdexcept>
+
+#include <glog/logging.h>
+
+#include "query/exceptions.hpp"
+#include "utils/string.hpp"
+
+namespace query::frontend {
+
+int64_t ParseIntegerLiteral(const std::string &s) {
+  try {
+    // Not really correct since long long can have a bigger range than int64_t.
+    return static_cast<int64_t>(std::stoll(s, 0, 0));
+  } catch (const std::out_of_range &) {
+    throw SemanticException("Integer literal exceeds 64 bits.");
+  }
+}
+
+std::string ParseStringLiteral(const std::string &s) {
+  // These functions is declared as lambda since its semantics is highly
+  // specific for this conxtext and shouldn't be used elsewhere.
+  auto EncodeEscapedUnicodeCodepointUtf32 = [](const std::string &s, int &i) {
+    const int kLongUnicodeLength = 8;
+    int j = i + 1;
+    while (j < static_cast<int>(s.size()) - 1 &&
+           j < i + kLongUnicodeLength + 1 && isxdigit(s[j])) {
+      ++j;
+    }
+    if (j - i == kLongUnicodeLength + 1) {
+      char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16);
+      i += kLongUnicodeLength;
+      std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
+      return converter.to_bytes(t);
+    }
+    throw SyntaxException(
+        "Expected 8 hex digits as unicode codepoint started with \\U. "
+        "Use \\u for 4 hex digits format.");
+  };
+  auto EncodeEscapedUnicodeCodepointUtf16 = [](const std::string &s, int &i) {
+    const int kShortUnicodeLength = 4;
+    int j = i + 1;
+    while (j < static_cast<int>(s.size()) - 1 &&
+           j < i + kShortUnicodeLength + 1 && isxdigit(s[j])) {
+      ++j;
+    }
+    if (j - i >= kShortUnicodeLength + 1) {
+      char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16);
+      if (t >= 0xD800 && t <= 0xDBFF) {
+        // t is high surrogate pair. Expect one more utf16 codepoint.
+        j = i + kShortUnicodeLength + 1;
+        if (j >= static_cast<int>(s.size()) - 1 || s[j] != '\\') {
+          throw SemanticException("Invalid UTF codepoint.");
+        }
+        ++j;
+        if (j >= static_cast<int>(s.size()) - 1 ||
+            (s[j] != 'u' && s[j] != 'U')) {
+          throw SemanticException("Invalid UTF codepoint.");
+        }
+        ++j;
+        int k = j;
+        while (k < static_cast<int>(s.size()) - 1 &&
+               k < j + kShortUnicodeLength && isxdigit(s[k])) {
+          ++k;
+        }
+        if (k != j + kShortUnicodeLength) {
+          throw SemanticException("Invalid UTF codepoint.");
+        }
+        char16_t surrogates[3] = {t,
+                                  static_cast<char16_t>(stoi(
+                                      s.substr(j, kShortUnicodeLength), 0, 16)),
+                                  0};
+        i += kShortUnicodeLength + 2 + kShortUnicodeLength;
+        std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
+            converter;
+        return converter.to_bytes(surrogates);
+      } else {
+        i += kShortUnicodeLength;
+        std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
+            converter;
+        return converter.to_bytes(t);
+      }
+    }
+    throw SyntaxException(
+        "Expected 4 hex digits as unicode codepoint started with \\u. "
+        "Use \\U for 8 hex digits format.");
+  };
+
+  std::string unescaped;
+  bool escape = false;
+
+  // First and last char is quote, we don't need to look at them.
+  for (int i = 1; i < static_cast<int>(s.size()) - 1; ++i) {
+    if (escape) {
+      switch (s[i]) {
+        case '\\':
+          unescaped += '\\';
+          break;
+        case '\'':
+          unescaped += '\'';
+          break;
+        case '"':
+          unescaped += '"';
+          break;
+        case 'B':
+        case 'b':
+          unescaped += '\b';
+          break;
+        case 'F':
+        case 'f':
+          unescaped += '\f';
+          break;
+        case 'N':
+        case 'n':
+          unescaped += '\n';
+          break;
+        case 'R':
+        case 'r':
+          unescaped += '\r';
+          break;
+        case 'T':
+        case 't':
+          unescaped += '\t';
+          break;
+        case 'U':
+          try {
+            unescaped += EncodeEscapedUnicodeCodepointUtf32(s, i);
+          } catch (const std::range_error &) {
+            throw SemanticException("Invalid UTF codepoint.");
+          }
+          break;
+        case 'u':
+          try {
+            unescaped += EncodeEscapedUnicodeCodepointUtf16(s, i);
+          } catch (const std::range_error &) {
+            throw SemanticException("Invalid UTF codepoint.");
+          }
+          break;
+        default:
+          // This should never happen, except grammar changes and we don't
+          // notice change in this production.
+          DLOG(FATAL) << "can't happen";
+          throw std::exception();
+      }
+      escape = false;
+    } else if (s[i] == '\\') {
+      escape = true;
+    } else {
+      unescaped += s[i];
+    }
+  }
+  return unescaped;
+}
+
+double ParseDoubleLiteral(const std::string &s) {
+  try {
+    return utils::ParseDouble(s);
+  } catch (const utils::BasicException &) {
+    throw SemanticException("Couldn't parse string to double.");
+  }
+}
+
+std::string ParseParameter(const std::string &s) {
+  DCHECK(s[0] == '$') << "Invalid string passed as parameter name";
+  if (s[1] != '`') return s.substr(1);
+  // If parameter name is escaped symbolic name then symbolic name should be
+  // unescaped and leading and trailing backquote should be removed.
+  DCHECK(s.size() > 3U && s.back() == '`')
+      << "Invalid string passed as parameter name";
+  std::string out;
+  for (int i = 2; i < static_cast<int>(s.size()) - 1; ++i) {
+    if (s[i] == '`') {
+      ++i;
+    }
+    out.push_back(s[i]);
+  }
+  return out;
+}
+
+}  // namespace query::frontend
--- a/src/query/frontend/parsing.hpp
+++ b/src/query/frontend/parsing.hpp
@ -0,0 +1,16 @@
+/// @file
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+namespace query::frontend {
+
+// These are the functions for parsing literals and parameter names from
+// opencypher query.
+int64_t ParseIntegerLiteral(const std::string &s);
+std::string ParseStringLiteral(const std::string &s);
+double ParseDoubleLiteral(const std::string &s);
+std::string ParseParameter(const std::string &s);
+
+}  // namespace query::frontend
--- a/src/query/frontend/stripped.cpp
+++ b/src/query/frontend/stripped.cpp
@ -8,16 +8,16 @@

 #include "glog/logging.h"

-#include "query/common.hpp"
 #include "query/exceptions.hpp"
 #include "query/frontend/opencypher/generated/MemgraphCypher.h"
 #include "query/frontend/opencypher/generated/MemgraphCypherBaseVisitor.h"
 #include "query/frontend/opencypher/generated/MemgraphCypherLexer.h"
+#include "query/frontend/parsing.hpp"
 #include "query/frontend/stripped_lexer_constants.hpp"
 #include "utils/hashing/fnv.hpp"
 #include "utils/string.hpp"

-namespace query {
+namespace query::frontend {

 using namespace lexer_constants;

@ -515,4 +515,5 @@ int StrippedQuery::MatchWhitespaceAndComments(int start) const {
  if (state != State::OUT) return comment_position - start;
  return i - start;
 }
-}  // namespace query
+
+}  // namespace query::frontend
--- a/src/query/frontend/stripped.hpp
+++ b/src/query/frontend/stripped.hpp
@ -6,7 +6,7 @@
 #include "query/parameters.hpp"
 #include "utils/hashing/fnv.hpp"

-namespace query {
+namespace query::frontend {

 // Strings used to replace original tokens. Different types are replaced with
 // different token.
@ -88,4 +88,5 @@ class StrippedQuery {
  // Hash based on the stripped query.
  HashType hash_;
 };
-}  // namespace query
+
+}  // namespace query::frontend
--- a/src/query/interpreter.cpp
+++ b/src/query/interpreter.cpp
@ -791,7 +791,7 @@ Interpreter::Results Interpreter::operator()(
  utils::Timer parsing_timer;
  auto queries = StripAndParseQuery(query_string, &parameters, &ast_storage,
                                    &db_accessor, params);
-  StrippedQuery &stripped_query = queries.first;
+  frontend::StrippedQuery &stripped_query = queries.first;
  ParsedQuery &parsed_query = queries.second;
  auto parsing_time = parsing_timer.Elapsed();

@ -874,7 +874,7 @@ Interpreter::Results Interpreter::operator()(
    auto queries =
        StripAndParseQuery(query_string.substr(kExplainQueryStart.size()),
                           &parameters, &ast_storage, &db_accessor, params);
-    StrippedQuery &stripped_query = queries.first;
+    frontend::StrippedQuery &stripped_query = queries.first;
    ParsedQuery &parsed_query = queries.second;
    auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query);
    CHECK(cypher_query)
@ -933,7 +933,7 @@ Interpreter::Results Interpreter::operator()(
    auto queries =
        StripAndParseQuery(query_string.substr(kProfileQueryStart.size()),
                           &parameters, &ast_storage, &db_accessor, params);
-    StrippedQuery &stripped_query = queries.first;
+    frontend::StrippedQuery &stripped_query = queries.first;
    ParsedQuery &parsed_query = queries.second;
    auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query);
    CHECK(cypher_query)
@ -1167,12 +1167,12 @@ Interpreter::ParsedQuery Interpreter::ParseQuery(
                     ast_it->second.required_privileges};
 }

-std::pair<StrippedQuery, Interpreter::ParsedQuery>
+std::pair<frontend::StrippedQuery, Interpreter::ParsedQuery>
 Interpreter::StripAndParseQuery(
    const std::string &query_string, Parameters *parameters,
    AstStorage *ast_storage, database::GraphDbAccessor *db_accessor,
    const std::map<std::string, PropertyValue> &params) {
-  StrippedQuery stripped_query(query_string);
+  frontend::StrippedQuery stripped_query(query_string);

  *parameters = stripped_query.literals();
  for (const auto &param_pair : stripped_query.parameters()) {
--- a/src/query/interpreter.hpp
+++ b/src/query/interpreter.hpp
@ -225,7 +225,7 @@ class Interpreter {
  integrations::kafka::Streams *kafka_streams_ = nullptr;

 protected:
-  std::pair<StrippedQuery, ParsedQuery> StripAndParseQuery(
+  std::pair<frontend::StrippedQuery, ParsedQuery> StripAndParseQuery(
      const std::string &, Parameters *, AstStorage *ast_storage,
      database::GraphDbAccessor *,
      const std::map<std::string, PropertyValue> &);
--- a/tests/benchmark/query/stripped.cpp
+++ b/tests/benchmark/query/stripped.cpp
@ -50,7 +50,7 @@ int main(int argc, char *argv[]) {
  google::InitGoogleLogging(argv[0]);

  auto preprocess = [](const std::string &query) {
-    return query::StrippedQuery(query);
+    return query::frontend::StrippedQuery(query);
  };

  for (auto test : kQueries) {
--- a/tests/manual/query_hash.cpp
+++ b/tests/manual/query_hash.cpp
@ -21,7 +21,7 @@ int main(int argc, char **argv) {
  auto query = FLAGS_q;

  // run preprocessing
-  query::StrippedQuery preprocessed(query);
+  query::frontend::StrippedQuery preprocessed(query);

  // print query, stripped query, hash and variable values (propertie values)
  std::cout << fmt::format("Query: {}\n", query);
--- a/tests/manual/stripped_timing.cpp
+++ b/tests/manual/stripped_timing.cpp
@ -19,7 +19,7 @@ int main(int argc, const char **a) {

  clock_t begin = clock();
  for (int i = 0; i < REPEATS; ++i) {
-    query::StrippedQuery(std::string(query));
+    query::frontend::StrippedQuery(std::string(query));
  }
  clock_t end = clock();

--- a/tests/unit/stripped.cpp
+++ b/tests/unit/stripped.cpp
@ -10,6 +10,7 @@
 #include "query/typed_value.hpp"

 using namespace query;
+using namespace query::frontend;

 namespace {