diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f62ef47fe..19e287a24 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -50,6 +50,7 @@ set(mg_single_node_sources query/common.cpp query/frontend/ast/cypher_main_visitor.cpp query/frontend/ast/pretty_print.cpp + query/frontend/parsing.cpp query/frontend/semantic/required_privileges.cpp query/frontend/semantic/symbol_generator.cpp query/frontend/stripped.cpp @@ -65,7 +66,7 @@ set(mg_single_node_sources query/repl.cpp query/typed_value.cpp storage/common/constraints/record.cpp - storage/common/constraints/unique_constraints.cpp + storage/common/constraints/unique_constraints.cpp storage/common/locking/record_lock.cpp storage/common/types/property_value.cpp storage/common/types/property_value_store.cpp @@ -159,6 +160,7 @@ set(mg_distributed_sources query/common.cpp query/frontend/ast/pretty_print.cpp query/frontend/ast/cypher_main_visitor.cpp + query/frontend/parsing.cpp query/frontend/semantic/required_privileges.cpp query/frontend/semantic/symbol_generator.cpp query/frontend/stripped.cpp @@ -258,6 +260,7 @@ set(mg_single_node_ha_sources query/common.cpp query/frontend/ast/cypher_main_visitor.cpp query/frontend/ast/pretty_print.cpp + query/frontend/parsing.cpp query/frontend/semantic/required_privileges.cpp query/frontend/semantic/symbol_generator.cpp query/frontend/stripped.cpp @@ -273,7 +276,7 @@ set(mg_single_node_ha_sources query/repl.cpp query/typed_value.cpp storage/common/constraints/record.cpp - storage/common/constraints/unique_constraints.cpp + storage/common/constraints/unique_constraints.cpp storage/common/types/property_value.cpp storage/common/types/slk.cpp storage/common/types/property_value_store.cpp diff --git a/src/query/common.cpp b/src/query/common.cpp index f72ddde55..05bf5492d 100644 --- a/src/query/common.cpp +++ b/src/query/common.cpp @@ -1,186 +1,7 @@ #include "query/common.hpp" -#include <cctype> -#include <codecvt> -#include <locale> -#include <stdexcept> - -#include "glog/logging.h" - -#include "query/exceptions.hpp" -#include "utils/string.hpp" - namespace query { -int64_t ParseIntegerLiteral(const std::string &s) { - try { - // Not really correct since long long can have a bigger range than int64_t. - return static_cast<int64_t>(std::stoll(s, 0, 0)); - } catch (const std::out_of_range &) { - throw SemanticException("Integer literal exceeds 64 bits."); - } -} - -std::string ParseStringLiteral(const std::string &s) { - // These functions is declared as lambda since its semantics is highly - // specific for this conxtext and shouldn't be used elsewhere. - auto EncodeEscapedUnicodeCodepointUtf32 = [](const std::string &s, int &i) { - const int kLongUnicodeLength = 8; - int j = i + 1; - while (j < static_cast<int>(s.size()) - 1 && - j < i + kLongUnicodeLength + 1 && isxdigit(s[j])) { - ++j; - } - if (j - i == kLongUnicodeLength + 1) { - char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16); - i += kLongUnicodeLength; - std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter; - return converter.to_bytes(t); - } - throw SyntaxException( - "Expected 8 hex digits as unicode codepoint started with \\U. " - "Use \\u for 4 hex digits format."); - }; - auto EncodeEscapedUnicodeCodepointUtf16 = [](const std::string &s, int &i) { - const int kShortUnicodeLength = 4; - int j = i + 1; - while (j < static_cast<int>(s.size()) - 1 && - j < i + kShortUnicodeLength + 1 && isxdigit(s[j])) { - ++j; - } - if (j - i >= kShortUnicodeLength + 1) { - char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16); - if (t >= 0xD800 && t <= 0xDBFF) { - // t is high surrogate pair. Expect one more utf16 codepoint. - j = i + kShortUnicodeLength + 1; - if (j >= static_cast<int>(s.size()) - 1 || s[j] != '\\') { - throw SemanticException("Invalid UTF codepoint."); - } - ++j; - if (j >= static_cast<int>(s.size()) - 1 || - (s[j] != 'u' && s[j] != 'U')) { - throw SemanticException("Invalid UTF codepoint."); - } - ++j; - int k = j; - while (k < static_cast<int>(s.size()) - 1 && - k < j + kShortUnicodeLength && isxdigit(s[k])) { - ++k; - } - if (k != j + kShortUnicodeLength) { - throw SemanticException("Invalid UTF codepoint."); - } - char16_t surrogates[3] = {t, - static_cast<char16_t>(stoi( - s.substr(j, kShortUnicodeLength), 0, 16)), - 0}; - i += kShortUnicodeLength + 2 + kShortUnicodeLength; - std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> - converter; - return converter.to_bytes(surrogates); - } else { - i += kShortUnicodeLength; - std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> - converter; - return converter.to_bytes(t); - } - } - throw SyntaxException( - "Expected 4 hex digits as unicode codepoint started with \\u. " - "Use \\U for 8 hex digits format."); - }; - - std::string unescaped; - bool escape = false; - - // First and last char is quote, we don't need to look at them. - for (int i = 1; i < static_cast<int>(s.size()) - 1; ++i) { - if (escape) { - switch (s[i]) { - case '\\': - unescaped += '\\'; - break; - case '\'': - unescaped += '\''; - break; - case '"': - unescaped += '"'; - break; - case 'B': - case 'b': - unescaped += '\b'; - break; - case 'F': - case 'f': - unescaped += '\f'; - break; - case 'N': - case 'n': - unescaped += '\n'; - break; - case 'R': - case 'r': - unescaped += '\r'; - break; - case 'T': - case 't': - unescaped += '\t'; - break; - case 'U': - try { - unescaped += EncodeEscapedUnicodeCodepointUtf32(s, i); - } catch (const std::range_error &) { - throw SemanticException("Invalid UTF codepoint."); - } - break; - case 'u': - try { - unescaped += EncodeEscapedUnicodeCodepointUtf16(s, i); - } catch (const std::range_error &) { - throw SemanticException("Invalid UTF codepoint."); - } - break; - default: - // This should never happen, except grammar changes and we don't - // notice change in this production. - DLOG(FATAL) << "can't happen"; - throw std::exception(); - } - escape = false; - } else if (s[i] == '\\') { - escape = true; - } else { - unescaped += s[i]; - } - } - return unescaped; -} - -double ParseDoubleLiteral(const std::string &s) { - try { - return utils::ParseDouble(s); - } catch (const utils::BasicException &) { - throw SemanticException("Couldn't parse string to double."); - } -} - -std::string ParseParameter(const std::string &s) { - DCHECK(s[0] == '$') << "Invalid string passed as parameter name"; - if (s[1] != '`') return s.substr(1); - // If parameter name is escaped symbolic name then symbolic name should be - // unescaped and leading and trailing backquote should be removed. - DCHECK(s.size() > 3U && s.back() == '`') - << "Invalid string passed as parameter name"; - std::string out; - for (int i = 2; i < static_cast<int>(s.size()) - 1; ++i) { - if (s[i] == '`') { - ++i; - } - out.push_back(s[i]); - } - return out; -} - void ReconstructTypedValue(TypedValue &value) { using Type = TypedValue::Type; switch (value.type()) { diff --git a/src/query/common.hpp b/src/query/common.hpp index 76bdfb8f0..5011a4aef 100644 --- a/src/query/common.hpp +++ b/src/query/common.hpp @@ -15,13 +15,6 @@ namespace query { -// These are the functions for parsing literals and parameter names from -// opencypher query. -int64_t ParseIntegerLiteral(const std::string &s); -std::string ParseStringLiteral(const std::string &s); -double ParseDoubleLiteral(const std::string &s); -std::string ParseParameter(const std::string &s); - /// Indicates that some part of query execution should see the OLD graph state /// (the latest state before the current transaction+command), or NEW (state as /// changed by the current transaction+command). diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index cd00284be..e8c821105 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -14,8 +14,8 @@ #include <glog/logging.h> -#include "query/common.hpp" #include "query/exceptions.hpp" +#include "query/frontend/parsing.hpp" #include "query/interpret/awesome_memgraph_functions.hpp" #include "utils/exceptions.hpp" #include "utils/string.hpp" diff --git a/src/query/frontend/parsing.cpp b/src/query/frontend/parsing.cpp new file mode 100644 index 000000000..27637848a --- /dev/null +++ b/src/query/frontend/parsing.cpp @@ -0,0 +1,184 @@ +#include "query/frontend/parsing.hpp" + +#include <cctype> +#include <codecvt> +#include <locale> +#include <stdexcept> + +#include <glog/logging.h> + +#include "query/exceptions.hpp" +#include "utils/string.hpp" + +namespace query::frontend { + +int64_t ParseIntegerLiteral(const std::string &s) { + try { + // Not really correct since long long can have a bigger range than int64_t. + return static_cast<int64_t>(std::stoll(s, 0, 0)); + } catch (const std::out_of_range &) { + throw SemanticException("Integer literal exceeds 64 bits."); + } +} + +std::string ParseStringLiteral(const std::string &s) { + // These functions is declared as lambda since its semantics is highly + // specific for this conxtext and shouldn't be used elsewhere. + auto EncodeEscapedUnicodeCodepointUtf32 = [](const std::string &s, int &i) { + const int kLongUnicodeLength = 8; + int j = i + 1; + while (j < static_cast<int>(s.size()) - 1 && + j < i + kLongUnicodeLength + 1 && isxdigit(s[j])) { + ++j; + } + if (j - i == kLongUnicodeLength + 1) { + char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16); + i += kLongUnicodeLength; + std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter; + return converter.to_bytes(t); + } + throw SyntaxException( + "Expected 8 hex digits as unicode codepoint started with \\U. " + "Use \\u for 4 hex digits format."); + }; + auto EncodeEscapedUnicodeCodepointUtf16 = [](const std::string &s, int &i) { + const int kShortUnicodeLength = 4; + int j = i + 1; + while (j < static_cast<int>(s.size()) - 1 && + j < i + kShortUnicodeLength + 1 && isxdigit(s[j])) { + ++j; + } + if (j - i >= kShortUnicodeLength + 1) { + char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16); + if (t >= 0xD800 && t <= 0xDBFF) { + // t is high surrogate pair. Expect one more utf16 codepoint. + j = i + kShortUnicodeLength + 1; + if (j >= static_cast<int>(s.size()) - 1 || s[j] != '\\') { + throw SemanticException("Invalid UTF codepoint."); + } + ++j; + if (j >= static_cast<int>(s.size()) - 1 || + (s[j] != 'u' && s[j] != 'U')) { + throw SemanticException("Invalid UTF codepoint."); + } + ++j; + int k = j; + while (k < static_cast<int>(s.size()) - 1 && + k < j + kShortUnicodeLength && isxdigit(s[k])) { + ++k; + } + if (k != j + kShortUnicodeLength) { + throw SemanticException("Invalid UTF codepoint."); + } + char16_t surrogates[3] = {t, + static_cast<char16_t>(stoi( + s.substr(j, kShortUnicodeLength), 0, 16)), + 0}; + i += kShortUnicodeLength + 2 + kShortUnicodeLength; + std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> + converter; + return converter.to_bytes(surrogates); + } else { + i += kShortUnicodeLength; + std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> + converter; + return converter.to_bytes(t); + } + } + throw SyntaxException( + "Expected 4 hex digits as unicode codepoint started with \\u. " + "Use \\U for 8 hex digits format."); + }; + + std::string unescaped; + bool escape = false; + + // First and last char is quote, we don't need to look at them. + for (int i = 1; i < static_cast<int>(s.size()) - 1; ++i) { + if (escape) { + switch (s[i]) { + case '\\': + unescaped += '\\'; + break; + case '\'': + unescaped += '\''; + break; + case '"': + unescaped += '"'; + break; + case 'B': + case 'b': + unescaped += '\b'; + break; + case 'F': + case 'f': + unescaped += '\f'; + break; + case 'N': + case 'n': + unescaped += '\n'; + break; + case 'R': + case 'r': + unescaped += '\r'; + break; + case 'T': + case 't': + unescaped += '\t'; + break; + case 'U': + try { + unescaped += EncodeEscapedUnicodeCodepointUtf32(s, i); + } catch (const std::range_error &) { + throw SemanticException("Invalid UTF codepoint."); + } + break; + case 'u': + try { + unescaped += EncodeEscapedUnicodeCodepointUtf16(s, i); + } catch (const std::range_error &) { + throw SemanticException("Invalid UTF codepoint."); + } + break; + default: + // This should never happen, except grammar changes and we don't + // notice change in this production. + DLOG(FATAL) << "can't happen"; + throw std::exception(); + } + escape = false; + } else if (s[i] == '\\') { + escape = true; + } else { + unescaped += s[i]; + } + } + return unescaped; +} + +double ParseDoubleLiteral(const std::string &s) { + try { + return utils::ParseDouble(s); + } catch (const utils::BasicException &) { + throw SemanticException("Couldn't parse string to double."); + } +} + +std::string ParseParameter(const std::string &s) { + DCHECK(s[0] == '$') << "Invalid string passed as parameter name"; + if (s[1] != '`') return s.substr(1); + // If parameter name is escaped symbolic name then symbolic name should be + // unescaped and leading and trailing backquote should be removed. + DCHECK(s.size() > 3U && s.back() == '`') + << "Invalid string passed as parameter name"; + std::string out; + for (int i = 2; i < static_cast<int>(s.size()) - 1; ++i) { + if (s[i] == '`') { + ++i; + } + out.push_back(s[i]); + } + return out; +} + +} // namespace query::frontend diff --git a/src/query/frontend/parsing.hpp b/src/query/frontend/parsing.hpp new file mode 100644 index 000000000..0e8a99050 --- /dev/null +++ b/src/query/frontend/parsing.hpp @@ -0,0 +1,16 @@ +/// @file +#pragma once + +#include <cstdint> +#include <string> + +namespace query::frontend { + +// These are the functions for parsing literals and parameter names from +// opencypher query. +int64_t ParseIntegerLiteral(const std::string &s); +std::string ParseStringLiteral(const std::string &s); +double ParseDoubleLiteral(const std::string &s); +std::string ParseParameter(const std::string &s); + +} // namespace query::frontend diff --git a/src/query/frontend/stripped.cpp b/src/query/frontend/stripped.cpp index b5267d993..ab4fd3eb0 100644 --- a/src/query/frontend/stripped.cpp +++ b/src/query/frontend/stripped.cpp @@ -8,16 +8,16 @@ #include "glog/logging.h" -#include "query/common.hpp" #include "query/exceptions.hpp" #include "query/frontend/opencypher/generated/MemgraphCypher.h" #include "query/frontend/opencypher/generated/MemgraphCypherBaseVisitor.h" #include "query/frontend/opencypher/generated/MemgraphCypherLexer.h" +#include "query/frontend/parsing.hpp" #include "query/frontend/stripped_lexer_constants.hpp" #include "utils/hashing/fnv.hpp" #include "utils/string.hpp" -namespace query { +namespace query::frontend { using namespace lexer_constants; @@ -515,4 +515,5 @@ int StrippedQuery::MatchWhitespaceAndComments(int start) const { if (state != State::OUT) return comment_position - start; return i - start; } -} // namespace query + +} // namespace query::frontend diff --git a/src/query/frontend/stripped.hpp b/src/query/frontend/stripped.hpp index 22919b9ce..ff1138423 100644 --- a/src/query/frontend/stripped.hpp +++ b/src/query/frontend/stripped.hpp @@ -6,7 +6,7 @@ #include "query/parameters.hpp" #include "utils/hashing/fnv.hpp" -namespace query { +namespace query::frontend { // Strings used to replace original tokens. Different types are replaced with // different token. @@ -88,4 +88,5 @@ class StrippedQuery { // Hash based on the stripped query. HashType hash_; }; -} // namespace query + +} // namespace query::frontend diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 61753ebca..e45491c02 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -791,7 +791,7 @@ Interpreter::Results Interpreter::operator()( utils::Timer parsing_timer; auto queries = StripAndParseQuery(query_string, ¶meters, &ast_storage, &db_accessor, params); - StrippedQuery &stripped_query = queries.first; + frontend::StrippedQuery &stripped_query = queries.first; ParsedQuery &parsed_query = queries.second; auto parsing_time = parsing_timer.Elapsed(); @@ -874,7 +874,7 @@ Interpreter::Results Interpreter::operator()( auto queries = StripAndParseQuery(query_string.substr(kExplainQueryStart.size()), ¶meters, &ast_storage, &db_accessor, params); - StrippedQuery &stripped_query = queries.first; + frontend::StrippedQuery &stripped_query = queries.first; ParsedQuery &parsed_query = queries.second; auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query); CHECK(cypher_query) @@ -933,7 +933,7 @@ Interpreter::Results Interpreter::operator()( auto queries = StripAndParseQuery(query_string.substr(kProfileQueryStart.size()), ¶meters, &ast_storage, &db_accessor, params); - StrippedQuery &stripped_query = queries.first; + frontend::StrippedQuery &stripped_query = queries.first; ParsedQuery &parsed_query = queries.second; auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query); CHECK(cypher_query) @@ -1167,12 +1167,12 @@ Interpreter::ParsedQuery Interpreter::ParseQuery( ast_it->second.required_privileges}; } -std::pair<StrippedQuery, Interpreter::ParsedQuery> +std::pair<frontend::StrippedQuery, Interpreter::ParsedQuery> Interpreter::StripAndParseQuery( const std::string &query_string, Parameters *parameters, AstStorage *ast_storage, database::GraphDbAccessor *db_accessor, const std::map<std::string, PropertyValue> ¶ms) { - StrippedQuery stripped_query(query_string); + frontend::StrippedQuery stripped_query(query_string); *parameters = stripped_query.literals(); for (const auto ¶m_pair : stripped_query.parameters()) { diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index c8321f01b..ab869dfb6 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -225,7 +225,7 @@ class Interpreter { integrations::kafka::Streams *kafka_streams_ = nullptr; protected: - std::pair<StrippedQuery, ParsedQuery> StripAndParseQuery( + std::pair<frontend::StrippedQuery, ParsedQuery> StripAndParseQuery( const std::string &, Parameters *, AstStorage *ast_storage, database::GraphDbAccessor *, const std::map<std::string, PropertyValue> &); diff --git a/tests/benchmark/query/stripped.cpp b/tests/benchmark/query/stripped.cpp index d9cacdcc6..20896df7f 100644 --- a/tests/benchmark/query/stripped.cpp +++ b/tests/benchmark/query/stripped.cpp @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { google::InitGoogleLogging(argv[0]); auto preprocess = [](const std::string &query) { - return query::StrippedQuery(query); + return query::frontend::StrippedQuery(query); }; for (auto test : kQueries) { diff --git a/tests/manual/query_hash.cpp b/tests/manual/query_hash.cpp index 7e26fd72f..c4b2138c8 100644 --- a/tests/manual/query_hash.cpp +++ b/tests/manual/query_hash.cpp @@ -21,7 +21,7 @@ int main(int argc, char **argv) { auto query = FLAGS_q; // run preprocessing - query::StrippedQuery preprocessed(query); + query::frontend::StrippedQuery preprocessed(query); // print query, stripped query, hash and variable values (propertie values) std::cout << fmt::format("Query: {}\n", query); diff --git a/tests/manual/stripped_timing.cpp b/tests/manual/stripped_timing.cpp index fcd73deb8..3ba28cd83 100644 --- a/tests/manual/stripped_timing.cpp +++ b/tests/manual/stripped_timing.cpp @@ -19,7 +19,7 @@ int main(int argc, const char **a) { clock_t begin = clock(); for (int i = 0; i < REPEATS; ++i) { - query::StrippedQuery(std::string(query)); + query::frontend::StrippedQuery(std::string(query)); } clock_t end = clock(); diff --git a/tests/unit/stripped.cpp b/tests/unit/stripped.cpp index 8517abb8d..9f1ac53c9 100644 --- a/tests/unit/stripped.cpp +++ b/tests/unit/stripped.cpp @@ -10,6 +10,7 @@ #include "query/typed_value.hpp" using namespace query; +using namespace query::frontend; namespace {