Move Parsing utils from query/common to frontend/parsing

Reviewers: mtomic

Reviewed By: mtomic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2195
This commit is contained in:
Teon Banek 2019-07-10 14:30:11 +02:00
parent 8414479abe
commit c4c6febbc4
14 changed files with 223 additions and 203 deletions

View File

@ -50,6 +50,7 @@ set(mg_single_node_sources
query/common.cpp
query/frontend/ast/cypher_main_visitor.cpp
query/frontend/ast/pretty_print.cpp
query/frontend/parsing.cpp
query/frontend/semantic/required_privileges.cpp
query/frontend/semantic/symbol_generator.cpp
query/frontend/stripped.cpp
@ -65,7 +66,7 @@ set(mg_single_node_sources
query/repl.cpp
query/typed_value.cpp
storage/common/constraints/record.cpp
storage/common/constraints/unique_constraints.cpp
storage/common/constraints/unique_constraints.cpp
storage/common/locking/record_lock.cpp
storage/common/types/property_value.cpp
storage/common/types/property_value_store.cpp
@ -159,6 +160,7 @@ set(mg_distributed_sources
query/common.cpp
query/frontend/ast/pretty_print.cpp
query/frontend/ast/cypher_main_visitor.cpp
query/frontend/parsing.cpp
query/frontend/semantic/required_privileges.cpp
query/frontend/semantic/symbol_generator.cpp
query/frontend/stripped.cpp
@ -258,6 +260,7 @@ set(mg_single_node_ha_sources
query/common.cpp
query/frontend/ast/cypher_main_visitor.cpp
query/frontend/ast/pretty_print.cpp
query/frontend/parsing.cpp
query/frontend/semantic/required_privileges.cpp
query/frontend/semantic/symbol_generator.cpp
query/frontend/stripped.cpp
@ -273,7 +276,7 @@ set(mg_single_node_ha_sources
query/repl.cpp
query/typed_value.cpp
storage/common/constraints/record.cpp
storage/common/constraints/unique_constraints.cpp
storage/common/constraints/unique_constraints.cpp
storage/common/types/property_value.cpp
storage/common/types/slk.cpp
storage/common/types/property_value_store.cpp

View File

@ -1,186 +1,7 @@
#include "query/common.hpp"
#include <cctype>
#include <codecvt>
#include <locale>
#include <stdexcept>
#include "glog/logging.h"
#include "query/exceptions.hpp"
#include "utils/string.hpp"
namespace query {
int64_t ParseIntegerLiteral(const std::string &s) {
try {
// Not really correct since long long can have a bigger range than int64_t.
return static_cast<int64_t>(std::stoll(s, 0, 0));
} catch (const std::out_of_range &) {
throw SemanticException("Integer literal exceeds 64 bits.");
}
}
std::string ParseStringLiteral(const std::string &s) {
// These functions is declared as lambda since its semantics is highly
// specific for this conxtext and shouldn't be used elsewhere.
auto EncodeEscapedUnicodeCodepointUtf32 = [](const std::string &s, int &i) {
const int kLongUnicodeLength = 8;
int j = i + 1;
while (j < static_cast<int>(s.size()) - 1 &&
j < i + kLongUnicodeLength + 1 && isxdigit(s[j])) {
++j;
}
if (j - i == kLongUnicodeLength + 1) {
char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16);
i += kLongUnicodeLength;
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
return converter.to_bytes(t);
}
throw SyntaxException(
"Expected 8 hex digits as unicode codepoint started with \\U. "
"Use \\u for 4 hex digits format.");
};
auto EncodeEscapedUnicodeCodepointUtf16 = [](const std::string &s, int &i) {
const int kShortUnicodeLength = 4;
int j = i + 1;
while (j < static_cast<int>(s.size()) - 1 &&
j < i + kShortUnicodeLength + 1 && isxdigit(s[j])) {
++j;
}
if (j - i >= kShortUnicodeLength + 1) {
char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16);
if (t >= 0xD800 && t <= 0xDBFF) {
// t is high surrogate pair. Expect one more utf16 codepoint.
j = i + kShortUnicodeLength + 1;
if (j >= static_cast<int>(s.size()) - 1 || s[j] != '\\') {
throw SemanticException("Invalid UTF codepoint.");
}
++j;
if (j >= static_cast<int>(s.size()) - 1 ||
(s[j] != 'u' && s[j] != 'U')) {
throw SemanticException("Invalid UTF codepoint.");
}
++j;
int k = j;
while (k < static_cast<int>(s.size()) - 1 &&
k < j + kShortUnicodeLength && isxdigit(s[k])) {
++k;
}
if (k != j + kShortUnicodeLength) {
throw SemanticException("Invalid UTF codepoint.");
}
char16_t surrogates[3] = {t,
static_cast<char16_t>(stoi(
s.substr(j, kShortUnicodeLength), 0, 16)),
0};
i += kShortUnicodeLength + 2 + kShortUnicodeLength;
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
converter;
return converter.to_bytes(surrogates);
} else {
i += kShortUnicodeLength;
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
converter;
return converter.to_bytes(t);
}
}
throw SyntaxException(
"Expected 4 hex digits as unicode codepoint started with \\u. "
"Use \\U for 8 hex digits format.");
};
std::string unescaped;
bool escape = false;
// First and last char is quote, we don't need to look at them.
for (int i = 1; i < static_cast<int>(s.size()) - 1; ++i) {
if (escape) {
switch (s[i]) {
case '\\':
unescaped += '\\';
break;
case '\'':
unescaped += '\'';
break;
case '"':
unescaped += '"';
break;
case 'B':
case 'b':
unescaped += '\b';
break;
case 'F':
case 'f':
unescaped += '\f';
break;
case 'N':
case 'n':
unescaped += '\n';
break;
case 'R':
case 'r':
unescaped += '\r';
break;
case 'T':
case 't':
unescaped += '\t';
break;
case 'U':
try {
unescaped += EncodeEscapedUnicodeCodepointUtf32(s, i);
} catch (const std::range_error &) {
throw SemanticException("Invalid UTF codepoint.");
}
break;
case 'u':
try {
unescaped += EncodeEscapedUnicodeCodepointUtf16(s, i);
} catch (const std::range_error &) {
throw SemanticException("Invalid UTF codepoint.");
}
break;
default:
// This should never happen, except grammar changes and we don't
// notice change in this production.
DLOG(FATAL) << "can't happen";
throw std::exception();
}
escape = false;
} else if (s[i] == '\\') {
escape = true;
} else {
unescaped += s[i];
}
}
return unescaped;
}
double ParseDoubleLiteral(const std::string &s) {
try {
return utils::ParseDouble(s);
} catch (const utils::BasicException &) {
throw SemanticException("Couldn't parse string to double.");
}
}
std::string ParseParameter(const std::string &s) {
DCHECK(s[0] == '$') << "Invalid string passed as parameter name";
if (s[1] != '`') return s.substr(1);
// If parameter name is escaped symbolic name then symbolic name should be
// unescaped and leading and trailing backquote should be removed.
DCHECK(s.size() > 3U && s.back() == '`')
<< "Invalid string passed as parameter name";
std::string out;
for (int i = 2; i < static_cast<int>(s.size()) - 1; ++i) {
if (s[i] == '`') {
++i;
}
out.push_back(s[i]);
}
return out;
}
void ReconstructTypedValue(TypedValue &value) {
using Type = TypedValue::Type;
switch (value.type()) {

View File

@ -15,13 +15,6 @@
namespace query {
// These are the functions for parsing literals and parameter names from
// opencypher query.
int64_t ParseIntegerLiteral(const std::string &s);
std::string ParseStringLiteral(const std::string &s);
double ParseDoubleLiteral(const std::string &s);
std::string ParseParameter(const std::string &s);
/// Indicates that some part of query execution should see the OLD graph state
/// (the latest state before the current transaction+command), or NEW (state as
/// changed by the current transaction+command).

View File

@ -14,8 +14,8 @@
#include <glog/logging.h>
#include "query/common.hpp"
#include "query/exceptions.hpp"
#include "query/frontend/parsing.hpp"
#include "query/interpret/awesome_memgraph_functions.hpp"
#include "utils/exceptions.hpp"
#include "utils/string.hpp"

View File

@ -0,0 +1,184 @@
#include "query/frontend/parsing.hpp"
#include <cctype>
#include <codecvt>
#include <locale>
#include <stdexcept>
#include <glog/logging.h>
#include "query/exceptions.hpp"
#include "utils/string.hpp"
namespace query::frontend {
int64_t ParseIntegerLiteral(const std::string &s) {
try {
// Not really correct since long long can have a bigger range than int64_t.
return static_cast<int64_t>(std::stoll(s, 0, 0));
} catch (const std::out_of_range &) {
throw SemanticException("Integer literal exceeds 64 bits.");
}
}
std::string ParseStringLiteral(const std::string &s) {
// These functions is declared as lambda since its semantics is highly
// specific for this conxtext and shouldn't be used elsewhere.
auto EncodeEscapedUnicodeCodepointUtf32 = [](const std::string &s, int &i) {
const int kLongUnicodeLength = 8;
int j = i + 1;
while (j < static_cast<int>(s.size()) - 1 &&
j < i + kLongUnicodeLength + 1 && isxdigit(s[j])) {
++j;
}
if (j - i == kLongUnicodeLength + 1) {
char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16);
i += kLongUnicodeLength;
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
return converter.to_bytes(t);
}
throw SyntaxException(
"Expected 8 hex digits as unicode codepoint started with \\U. "
"Use \\u for 4 hex digits format.");
};
auto EncodeEscapedUnicodeCodepointUtf16 = [](const std::string &s, int &i) {
const int kShortUnicodeLength = 4;
int j = i + 1;
while (j < static_cast<int>(s.size()) - 1 &&
j < i + kShortUnicodeLength + 1 && isxdigit(s[j])) {
++j;
}
if (j - i >= kShortUnicodeLength + 1) {
char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16);
if (t >= 0xD800 && t <= 0xDBFF) {
// t is high surrogate pair. Expect one more utf16 codepoint.
j = i + kShortUnicodeLength + 1;
if (j >= static_cast<int>(s.size()) - 1 || s[j] != '\\') {
throw SemanticException("Invalid UTF codepoint.");
}
++j;
if (j >= static_cast<int>(s.size()) - 1 ||
(s[j] != 'u' && s[j] != 'U')) {
throw SemanticException("Invalid UTF codepoint.");
}
++j;
int k = j;
while (k < static_cast<int>(s.size()) - 1 &&
k < j + kShortUnicodeLength && isxdigit(s[k])) {
++k;
}
if (k != j + kShortUnicodeLength) {
throw SemanticException("Invalid UTF codepoint.");
}
char16_t surrogates[3] = {t,
static_cast<char16_t>(stoi(
s.substr(j, kShortUnicodeLength), 0, 16)),
0};
i += kShortUnicodeLength + 2 + kShortUnicodeLength;
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
converter;
return converter.to_bytes(surrogates);
} else {
i += kShortUnicodeLength;
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
converter;
return converter.to_bytes(t);
}
}
throw SyntaxException(
"Expected 4 hex digits as unicode codepoint started with \\u. "
"Use \\U for 8 hex digits format.");
};
std::string unescaped;
bool escape = false;
// First and last char is quote, we don't need to look at them.
for (int i = 1; i < static_cast<int>(s.size()) - 1; ++i) {
if (escape) {
switch (s[i]) {
case '\\':
unescaped += '\\';
break;
case '\'':
unescaped += '\'';
break;
case '"':
unescaped += '"';
break;
case 'B':
case 'b':
unescaped += '\b';
break;
case 'F':
case 'f':
unescaped += '\f';
break;
case 'N':
case 'n':
unescaped += '\n';
break;
case 'R':
case 'r':
unescaped += '\r';
break;
case 'T':
case 't':
unescaped += '\t';
break;
case 'U':
try {
unescaped += EncodeEscapedUnicodeCodepointUtf32(s, i);
} catch (const std::range_error &) {
throw SemanticException("Invalid UTF codepoint.");
}
break;
case 'u':
try {
unescaped += EncodeEscapedUnicodeCodepointUtf16(s, i);
} catch (const std::range_error &) {
throw SemanticException("Invalid UTF codepoint.");
}
break;
default:
// This should never happen, except grammar changes and we don't
// notice change in this production.
DLOG(FATAL) << "can't happen";
throw std::exception();
}
escape = false;
} else if (s[i] == '\\') {
escape = true;
} else {
unescaped += s[i];
}
}
return unescaped;
}
double ParseDoubleLiteral(const std::string &s) {
try {
return utils::ParseDouble(s);
} catch (const utils::BasicException &) {
throw SemanticException("Couldn't parse string to double.");
}
}
std::string ParseParameter(const std::string &s) {
DCHECK(s[0] == '$') << "Invalid string passed as parameter name";
if (s[1] != '`') return s.substr(1);
// If parameter name is escaped symbolic name then symbolic name should be
// unescaped and leading and trailing backquote should be removed.
DCHECK(s.size() > 3U && s.back() == '`')
<< "Invalid string passed as parameter name";
std::string out;
for (int i = 2; i < static_cast<int>(s.size()) - 1; ++i) {
if (s[i] == '`') {
++i;
}
out.push_back(s[i]);
}
return out;
}
} // namespace query::frontend

View File

@ -0,0 +1,16 @@
/// @file
#pragma once
#include <cstdint>
#include <string>
namespace query::frontend {
// These are the functions for parsing literals and parameter names from
// opencypher query.
int64_t ParseIntegerLiteral(const std::string &s);
std::string ParseStringLiteral(const std::string &s);
double ParseDoubleLiteral(const std::string &s);
std::string ParseParameter(const std::string &s);
} // namespace query::frontend

View File

@ -8,16 +8,16 @@
#include "glog/logging.h"
#include "query/common.hpp"
#include "query/exceptions.hpp"
#include "query/frontend/opencypher/generated/MemgraphCypher.h"
#include "query/frontend/opencypher/generated/MemgraphCypherBaseVisitor.h"
#include "query/frontend/opencypher/generated/MemgraphCypherLexer.h"
#include "query/frontend/parsing.hpp"
#include "query/frontend/stripped_lexer_constants.hpp"
#include "utils/hashing/fnv.hpp"
#include "utils/string.hpp"
namespace query {
namespace query::frontend {
using namespace lexer_constants;
@ -515,4 +515,5 @@ int StrippedQuery::MatchWhitespaceAndComments(int start) const {
if (state != State::OUT) return comment_position - start;
return i - start;
}
} // namespace query
} // namespace query::frontend

View File

@ -6,7 +6,7 @@
#include "query/parameters.hpp"
#include "utils/hashing/fnv.hpp"
namespace query {
namespace query::frontend {
// Strings used to replace original tokens. Different types are replaced with
// different token.
@ -88,4 +88,5 @@ class StrippedQuery {
// Hash based on the stripped query.
HashType hash_;
};
} // namespace query
} // namespace query::frontend

View File

@ -791,7 +791,7 @@ Interpreter::Results Interpreter::operator()(
utils::Timer parsing_timer;
auto queries = StripAndParseQuery(query_string, &parameters, &ast_storage,
&db_accessor, params);
StrippedQuery &stripped_query = queries.first;
frontend::StrippedQuery &stripped_query = queries.first;
ParsedQuery &parsed_query = queries.second;
auto parsing_time = parsing_timer.Elapsed();
@ -874,7 +874,7 @@ Interpreter::Results Interpreter::operator()(
auto queries =
StripAndParseQuery(query_string.substr(kExplainQueryStart.size()),
&parameters, &ast_storage, &db_accessor, params);
StrippedQuery &stripped_query = queries.first;
frontend::StrippedQuery &stripped_query = queries.first;
ParsedQuery &parsed_query = queries.second;
auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query);
CHECK(cypher_query)
@ -933,7 +933,7 @@ Interpreter::Results Interpreter::operator()(
auto queries =
StripAndParseQuery(query_string.substr(kProfileQueryStart.size()),
&parameters, &ast_storage, &db_accessor, params);
StrippedQuery &stripped_query = queries.first;
frontend::StrippedQuery &stripped_query = queries.first;
ParsedQuery &parsed_query = queries.second;
auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query);
CHECK(cypher_query)
@ -1167,12 +1167,12 @@ Interpreter::ParsedQuery Interpreter::ParseQuery(
ast_it->second.required_privileges};
}
std::pair<StrippedQuery, Interpreter::ParsedQuery>
std::pair<frontend::StrippedQuery, Interpreter::ParsedQuery>
Interpreter::StripAndParseQuery(
const std::string &query_string, Parameters *parameters,
AstStorage *ast_storage, database::GraphDbAccessor *db_accessor,
const std::map<std::string, PropertyValue> &params) {
StrippedQuery stripped_query(query_string);
frontend::StrippedQuery stripped_query(query_string);
*parameters = stripped_query.literals();
for (const auto &param_pair : stripped_query.parameters()) {

View File

@ -225,7 +225,7 @@ class Interpreter {
integrations::kafka::Streams *kafka_streams_ = nullptr;
protected:
std::pair<StrippedQuery, ParsedQuery> StripAndParseQuery(
std::pair<frontend::StrippedQuery, ParsedQuery> StripAndParseQuery(
const std::string &, Parameters *, AstStorage *ast_storage,
database::GraphDbAccessor *,
const std::map<std::string, PropertyValue> &);

View File

@ -50,7 +50,7 @@ int main(int argc, char *argv[]) {
google::InitGoogleLogging(argv[0]);
auto preprocess = [](const std::string &query) {
return query::StrippedQuery(query);
return query::frontend::StrippedQuery(query);
};
for (auto test : kQueries) {

View File

@ -21,7 +21,7 @@ int main(int argc, char **argv) {
auto query = FLAGS_q;
// run preprocessing
query::StrippedQuery preprocessed(query);
query::frontend::StrippedQuery preprocessed(query);
// print query, stripped query, hash and variable values (propertie values)
std::cout << fmt::format("Query: {}\n", query);

View File

@ -19,7 +19,7 @@ int main(int argc, const char **a) {
clock_t begin = clock();
for (int i = 0; i < REPEATS; ++i) {
query::StrippedQuery(std::string(query));
query::frontend::StrippedQuery(std::string(query));
}
clock_t end = clock();

View File

@ -10,6 +10,7 @@
#include "query/typed_value.hpp"
using namespace query;
using namespace query::frontend;
namespace {