Refactor stripper
Summary: Fix tests Reviewers: buda, florijan, teon.banek Reviewed By: buda Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D435
This commit is contained in:
parent
e9ca6d8243
commit
fea9031605
CMakeLists.txt
src
communication
copy_hardcoded_queries.cppquery
common.cppcommon.hppconsole.cppengine.hpp
frontend/ast
interpreter.hppparameters.hpppreprocessor.hppstripped.cppstripped.hppstripper.cppstripper.hpputils
tests
benchmark/query/strip
integration
manual
unit
@ -344,10 +344,6 @@ set(memgraph_src_files
|
||||
${src_dir}/mvcc/id.cpp
|
||||
${src_dir}/durability/snapshooter.cpp
|
||||
${src_dir}/durability/recovery.cpp
|
||||
# ${src_dir}/snapshot/snapshot_engine.cpp
|
||||
# ${src_dir}/snapshot/snapshoter.cpp
|
||||
# ${src_dir}/snapshot/snapshot_encoder.cpp
|
||||
# ${src_dir}/snapshot/snapshot_decoder.cpp
|
||||
${src_dir}/storage/property_value.cpp
|
||||
${src_dir}/storage/locking/record_lock.cpp
|
||||
# ${src_dir}/storage/garbage/garbage.cpp
|
||||
@ -368,8 +364,9 @@ set(memgraph_src_files
|
||||
${src_dir}/database/graph_db.cpp
|
||||
${src_dir}/database/graph_db_accessor.cpp
|
||||
${src_dir}/data_structures/concurrent/skiplist_gc.cpp
|
||||
${src_dir}/query/stripper.cpp
|
||||
${src_dir}/query/engine.cpp
|
||||
${src_dir}/query/stripped.cpp
|
||||
${src_dir}/query/common.cpp
|
||||
${src_dir}/query/console.cpp
|
||||
${src_dir}/query/frontend/ast/cypher_main_visitor.cpp
|
||||
${src_dir}/query/typed_value.cpp
|
||||
|
@ -35,6 +35,7 @@ namespace communication {
|
||||
*/
|
||||
template <typename Session, typename OutputStream, typename Socket>
|
||||
class Worker
|
||||
|
||||
: public io::network::StreamReader<Worker<Session, OutputStream, Socket>,
|
||||
Session> {
|
||||
using StreamBuffer = io::network::StreamBuffer;
|
||||
|
@ -5,8 +5,9 @@
|
||||
#include <experimental/filesystem>
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
#include "logging/logger.hpp"
|
||||
#include "logging/streams/stdout.hpp"
|
||||
#include "query/preprocessor.hpp"
|
||||
#include "query/stripped.hpp"
|
||||
#include "utils/command_line/arguments.hpp"
|
||||
#include "utils/exceptions.hpp"
|
||||
#include "utils/file.hpp"
|
||||
@ -67,10 +68,9 @@ int main(int argc, char **argv) {
|
||||
|
||||
auto src_files = utils::LoadFilePaths(src_path, "cpp");
|
||||
|
||||
QueryPreprocessor preprocessor;
|
||||
for (auto &src_file : src_files) {
|
||||
auto query = ExtractQuery(src_file);
|
||||
auto query_hash = preprocessor.preprocess(query).hash;
|
||||
auto query_hash = query::StrippedQuery(query).hash();
|
||||
auto dst_file = dst_path / fs::path(std::to_string(query_hash) + ".cpp");
|
||||
fs::copy(src_file, dst_file, fs::copy_options::overwrite_existing);
|
||||
logger.info("{} - (copy) -> {}", src_file, dst_file);
|
||||
|
116
src/query/common.cpp
Normal file
116
src/query/common.cpp
Normal file
@ -0,0 +1,116 @@
|
||||
#include "query/common.hpp"
|
||||
|
||||
#include <cctype>
|
||||
#include <codecvt>
|
||||
#include <locale>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "query/exceptions.hpp"
|
||||
#include "utils/assert.hpp"
|
||||
#include "utils/string.hpp"
|
||||
|
||||
namespace query {
|
||||
|
||||
int64_t ParseIntegerLiteral(const std::string &s) {
|
||||
try {
|
||||
// Not really correct since long long can have a bigger range than int64_t.
|
||||
return static_cast<int64_t>(std::stoll(s, 0, 0));
|
||||
} catch (const std::out_of_range &) {
|
||||
throw SemanticException();
|
||||
}
|
||||
}
|
||||
|
||||
std::string ParseStringLiteral(const std::string &s) {
|
||||
// This function is declared as lambda since its semantics is highly specific
|
||||
// for this conxtext and shouldn't be used elsewhere.
|
||||
auto EncodeEscapedUnicodeCodepoint = [](const std::string &s, int &i) {
|
||||
int j = i + 1;
|
||||
const int kShortUnicodeLength = 4;
|
||||
const int kLongUnicodeLength = 8;
|
||||
while (j < (int)s.size() - 1 && j < i + kLongUnicodeLength + 1 &&
|
||||
isxdigit(s[j])) {
|
||||
++j;
|
||||
}
|
||||
if (j - i == kLongUnicodeLength + 1) {
|
||||
char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16);
|
||||
i += kLongUnicodeLength;
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
||||
return converter.to_bytes(t);
|
||||
} else if (j - i >= kShortUnicodeLength + 1) {
|
||||
char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16);
|
||||
i += kShortUnicodeLength;
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
|
||||
converter;
|
||||
return converter.to_bytes(t);
|
||||
} else {
|
||||
// This should never happen, except grammar changes and we don't notice
|
||||
// change in this production.
|
||||
debug_assert(false, "can't happen");
|
||||
throw std::exception();
|
||||
}
|
||||
};
|
||||
|
||||
std::string unescaped;
|
||||
bool escape = false;
|
||||
|
||||
// First and last char is quote, we don't need to look at them.
|
||||
for (int i = 1; i < static_cast<int>(s.size()) - 1; ++i) {
|
||||
if (escape) {
|
||||
switch (s[i]) {
|
||||
case '\\':
|
||||
unescaped += '\\';
|
||||
break;
|
||||
case '\'':
|
||||
unescaped += '\'';
|
||||
break;
|
||||
case '"':
|
||||
unescaped += '"';
|
||||
break;
|
||||
case 'B':
|
||||
case 'b':
|
||||
unescaped += '\b';
|
||||
break;
|
||||
case 'F':
|
||||
case 'f':
|
||||
unescaped += '\f';
|
||||
break;
|
||||
case 'N':
|
||||
case 'n':
|
||||
unescaped += '\n';
|
||||
break;
|
||||
case 'R':
|
||||
case 'r':
|
||||
unescaped += '\r';
|
||||
break;
|
||||
case 'T':
|
||||
case 't':
|
||||
unescaped += '\t';
|
||||
break;
|
||||
case 'U':
|
||||
case 'u':
|
||||
unescaped += EncodeEscapedUnicodeCodepoint(s, i);
|
||||
break;
|
||||
default:
|
||||
// This should never happen, except grammar changes and we don't
|
||||
// notice change in this production.
|
||||
debug_assert(false, "can't happen");
|
||||
throw std::exception();
|
||||
}
|
||||
escape = false;
|
||||
} else if (s[i] == '\\') {
|
||||
escape = true;
|
||||
} else {
|
||||
unescaped += s[i];
|
||||
}
|
||||
}
|
||||
return unescaped;
|
||||
}
|
||||
|
||||
double ParseDoubleLiteral(const std::string &s) {
|
||||
try {
|
||||
return utils::ParseDouble(s);
|
||||
} catch (const utils::BasicException &) {
|
||||
throw SemanticException("Couldn't parse string to double");
|
||||
}
|
||||
}
|
||||
}
|
@ -1,7 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
namespace query {
|
||||
|
||||
// These are the functions for parsing literals from opepncypher query.
|
||||
int64_t ParseIntegerLiteral(const std::string &s);
|
||||
std::string ParseStringLiteral(const std::string &s);
|
||||
double ParseDoubleLiteral(const std::string &s);
|
||||
|
||||
/**
|
||||
* Indicates that some part of query execution should
|
||||
* see the OLD graph state (the latest state before the
|
||||
|
@ -131,11 +131,13 @@ void query::Repl(Dbms &dbms) {
|
||||
// special commands
|
||||
if (command == "quit") break;
|
||||
|
||||
query::Interpreter interpeter;
|
||||
|
||||
// regular cypher queries
|
||||
try {
|
||||
auto dba = dbms.active();
|
||||
ResultStreamFaker results;
|
||||
query::Interpret(command, *dba, results);
|
||||
interpeter.Interpret(command, *dba, results);
|
||||
PrintResults(results);
|
||||
dba->commit();
|
||||
} catch (const query::SyntaxException &e) {
|
||||
|
@ -11,7 +11,6 @@ namespace fs = std::experimental::filesystem;
|
||||
#include "query/interpreter.hpp"
|
||||
#include "query/plan_compiler.hpp"
|
||||
#include "query/plan_interface.hpp"
|
||||
#include "query/preprocessor.hpp"
|
||||
#include "utils/dynamic_lib.hpp"
|
||||
|
||||
DECLARE_bool(INTERPRET);
|
||||
@ -47,9 +46,8 @@ class QueryEngine : public Loggable {
|
||||
* @return void
|
||||
*/
|
||||
auto ReloadCustom(const std::string &query, const fs::path &plan_path) {
|
||||
auto preprocessed = preprocessor.preprocess(query);
|
||||
Unload(query);
|
||||
LoadCpp(plan_path, preprocessed.hash);
|
||||
LoadCpp(plan_path, query::StrippedQuery(query).hash());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -67,16 +65,16 @@ class QueryEngine : public Loggable {
|
||||
auto Run(const std::string &query, GraphDbAccessor &db_accessor,
|
||||
Stream &stream) {
|
||||
if (FLAGS_INTERPRET) {
|
||||
query::Interpret(query, db_accessor, stream);
|
||||
interpreter_.Interpret(query, db_accessor, stream);
|
||||
return true;
|
||||
}
|
||||
|
||||
clock_t start_time = clock();
|
||||
auto preprocessed = preprocessor.preprocess(query);
|
||||
query::StrippedQuery stripped(query);
|
||||
clock_t end_parsing_time = clock();
|
||||
auto plan = LoadCypher(preprocessed);
|
||||
auto plan = LoadCypher(stripped);
|
||||
clock_t end_planning_time = clock();
|
||||
auto result = plan->run(db_accessor, preprocessed.arguments, stream);
|
||||
auto result = plan->run(db_accessor, stripped.parameters(), stream);
|
||||
clock_t end_execution_time = clock();
|
||||
if (UNLIKELY(!result)) {
|
||||
// info because it might be something like deadlock in which
|
||||
@ -112,7 +110,7 @@ class QueryEngine : public Loggable {
|
||||
* return bool is the plan unloaded
|
||||
*/
|
||||
auto Unload(const std::string &query) {
|
||||
return query_plans.access().remove(preprocessor.preprocess(query).hash);
|
||||
return query_plans_.access().remove(query::StrippedQuery(query).hash());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -123,8 +121,8 @@ class QueryEngine : public Loggable {
|
||||
* return bool
|
||||
*/
|
||||
auto Loaded(const std::string &query) {
|
||||
auto plans_accessor = query_plans.access();
|
||||
return plans_accessor.find(preprocessor.preprocess(query).hash) !=
|
||||
auto plans_accessor = query_plans_.access();
|
||||
return plans_accessor.find(query::StrippedQuery(query).hash()) !=
|
||||
plans_accessor.end();
|
||||
}
|
||||
|
||||
@ -134,9 +132,8 @@ class QueryEngine : public Loggable {
|
||||
* @return size_t the number of loaded query plans
|
||||
*/
|
||||
auto Size() { // TODO: const once whan ConcurrentMap::Accessor becomes const
|
||||
return query_plans.access().size();
|
||||
return query_plans_.access().size();
|
||||
}
|
||||
// return query_plans.access().size(); }
|
||||
|
||||
private:
|
||||
/**
|
||||
@ -147,29 +144,25 @@ class QueryEngine : public Loggable {
|
||||
*
|
||||
* @return runnable query plan
|
||||
*/
|
||||
auto LoadCypher(const StrippedQuery &stripped) {
|
||||
auto plans_accessor = query_plans.access();
|
||||
auto LoadCypher(const query::StrippedQuery &stripped) {
|
||||
auto plans_accessor = query_plans_.access();
|
||||
|
||||
// code is already compiled and loaded, just return runnable
|
||||
// instance
|
||||
auto query_plan_it = plans_accessor.find(stripped.hash);
|
||||
auto query_plan_it = plans_accessor.find(stripped.hash());
|
||||
if (query_plan_it != plans_accessor.end())
|
||||
return query_plan_it->second->instance();
|
||||
|
||||
// find hardcoded query plan if exists
|
||||
auto hardcoded_path = fs::path(FLAGS_COMPILE_DIRECTORY + "hardcode/" +
|
||||
std::to_string(stripped.hash) + ".cpp");
|
||||
std::to_string(stripped.hash()) + ".cpp");
|
||||
if (fs::exists(hardcoded_path))
|
||||
return LoadCpp(hardcoded_path, stripped.hash);
|
||||
return LoadCpp(hardcoded_path, stripped.hash());
|
||||
|
||||
// generate query plan
|
||||
auto generated_path = fs::path(FLAGS_COMPILE_DIRECTORY +
|
||||
std::to_string(stripped.hash) + ".cpp");
|
||||
|
||||
query::frontend::opencypher::Parser parser(stripped.query);
|
||||
// backend::cpp::Generator(parser.tree(), stripped.query, stripped.hash,
|
||||
// generated_path);
|
||||
return LoadCpp(generated_path, stripped.hash);
|
||||
std::to_string(stripped.hash()) + ".cpp");
|
||||
return LoadCpp(generated_path, stripped.hash());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -182,7 +175,7 @@ class QueryEngine : public Loggable {
|
||||
* @return runnable query plan
|
||||
*/
|
||||
auto LoadCpp(const fs::path &path_cpp, const HashType hash) {
|
||||
auto plans_accessor = query_plans.access();
|
||||
auto plans_accessor = query_plans_.access();
|
||||
|
||||
// code is already compiled and loaded, just return runnable
|
||||
// instance
|
||||
@ -199,7 +192,7 @@ class QueryEngine : public Loggable {
|
||||
auto path_so = FLAGS_COMPILE_DIRECTORY + std::to_string(hash) + "_" +
|
||||
(std::string)Timestamp::now() + ".so";
|
||||
|
||||
plan_compiler.Compile(path_cpp, path_so);
|
||||
PlanCompiler().Compile(path_cpp, path_so);
|
||||
|
||||
auto query_plan = std::make_unique<QueryPlanLib>(path_so);
|
||||
// TODO: underlying object has to be live during query execution
|
||||
@ -212,7 +205,6 @@ class QueryEngine : public Loggable {
|
||||
return query_plan_instance;
|
||||
}
|
||||
|
||||
QueryPreprocessor preprocessor;
|
||||
PlanCompiler plan_compiler;
|
||||
ConcurrentMap<HashType, std::unique_ptr<QueryPlanLib>> query_plans;
|
||||
query::Interpreter interpreter_;
|
||||
ConcurrentMap<HashType, std::unique_ptr<QueryPlanLib>> query_plans_;
|
||||
};
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "database/graph_db.hpp"
|
||||
#include "query/common.hpp"
|
||||
#include "query/exceptions.hpp"
|
||||
#include "query/interpret/awesome_memgraph_functions.hpp"
|
||||
#include "utils/assert.hpp"
|
||||
@ -823,122 +824,29 @@ antlrcpp::Any CypherMainVisitor::visitFunctionName(
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitDoubleLiteral(
|
||||
CypherParser::DoubleLiteralContext *ctx) {
|
||||
try {
|
||||
return utils::ParseDouble(ctx->getText());
|
||||
} catch (const utils::BasicException &) {
|
||||
throw SemanticException("Couldn't parse string to double");
|
||||
}
|
||||
return ParseDoubleLiteral(ctx->getText());
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitIntegerLiteral(
|
||||
CypherParser::IntegerLiteralContext *ctx) {
|
||||
try {
|
||||
// Not really correct since long long can have a bigger range than int64_t.
|
||||
return static_cast<int64_t>(std::stoll(ctx->getText(), 0, 0));
|
||||
} catch (const std::out_of_range &) {
|
||||
throw SemanticException();
|
||||
}
|
||||
return ParseIntegerLiteral(ctx->getText());
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitStringLiteral(
|
||||
const std::string &escaped) {
|
||||
// This function is declared as lambda since its semantics is highly specific
|
||||
// for this conxtext and shouldn't be used elsewhere.
|
||||
auto EncodeEscapedUnicodeCodepoint = [](const std::string &s, int &i) {
|
||||
int j = i + 1;
|
||||
const int kShortUnicodeLength = 4;
|
||||
const int kLongUnicodeLength = 8;
|
||||
while (j < (int)s.size() - 1 && j < i + kLongUnicodeLength + 1 &&
|
||||
isxdigit(s[j])) {
|
||||
++j;
|
||||
}
|
||||
if (j - i == kLongUnicodeLength + 1) {
|
||||
char32_t t = stoi(s.substr(i + 1, kLongUnicodeLength), 0, 16);
|
||||
i += kLongUnicodeLength;
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
||||
return converter.to_bytes(t);
|
||||
} else if (j - i >= kShortUnicodeLength + 1) {
|
||||
char16_t t = stoi(s.substr(i + 1, kShortUnicodeLength), 0, 16);
|
||||
i += kShortUnicodeLength;
|
||||
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
|
||||
converter;
|
||||
return converter.to_bytes(t);
|
||||
} else {
|
||||
// This should never happen, except grammar changes and we don't notice
|
||||
// change in this production.
|
||||
debug_assert(false, "can't happen");
|
||||
throw std::exception();
|
||||
}
|
||||
};
|
||||
|
||||
std::string unescaped;
|
||||
bool escape = false;
|
||||
|
||||
// First and last char is quote, we don't need to look at them.
|
||||
for (int i = 1; i < (int)escaped.size() - 1; ++i) {
|
||||
if (escape) {
|
||||
switch (escaped[i]) {
|
||||
case '\\':
|
||||
unescaped += '\\';
|
||||
break;
|
||||
case '\'':
|
||||
unescaped += '\'';
|
||||
break;
|
||||
case '"':
|
||||
unescaped += '"';
|
||||
break;
|
||||
case 'B':
|
||||
case 'b':
|
||||
unescaped += '\b';
|
||||
break;
|
||||
case 'F':
|
||||
case 'f':
|
||||
unescaped += '\f';
|
||||
break;
|
||||
case 'N':
|
||||
case 'n':
|
||||
unescaped += '\n';
|
||||
break;
|
||||
case 'R':
|
||||
case 'r':
|
||||
unescaped += '\r';
|
||||
break;
|
||||
case 'T':
|
||||
case 't':
|
||||
unescaped += '\t';
|
||||
break;
|
||||
case 'U':
|
||||
case 'u':
|
||||
unescaped += EncodeEscapedUnicodeCodepoint(escaped, i);
|
||||
break;
|
||||
default:
|
||||
// This should never happen, except grammar changes and we don't
|
||||
// notice change in this production.
|
||||
debug_assert(false, "can't happen");
|
||||
throw std::exception();
|
||||
}
|
||||
escape = false;
|
||||
} else if (escaped[i] == '\\') {
|
||||
escape = true;
|
||||
} else {
|
||||
unescaped += escaped[i];
|
||||
}
|
||||
}
|
||||
return unescaped;
|
||||
return ParseStringLiteral(escaped);
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitBooleanLiteral(
|
||||
CypherParser::BooleanLiteralContext *ctx) {
|
||||
if (ctx->getTokens(CypherParser::TRUE).size()) {
|
||||
return true;
|
||||
} else if (ctx->getTokens(CypherParser::FALSE).size()) {
|
||||
return false;
|
||||
} else {
|
||||
// This should never happen, except grammar changes and we don't
|
||||
// notice change in this production.
|
||||
debug_assert(false, "can't happen");
|
||||
throw std::exception();
|
||||
}
|
||||
if (ctx->getTokens(CypherParser::FALSE).size()) {
|
||||
return false;
|
||||
}
|
||||
debug_assert(false, "Shouldn't happend");
|
||||
throw std::exception();
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitCypherDelete(
|
||||
|
@ -9,125 +9,133 @@
|
||||
#include "query/frontend/opencypher/parser.hpp"
|
||||
#include "query/frontend/semantic/symbol_generator.hpp"
|
||||
#include "query/interpret/frame.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
#include "query/plan/cost_estimator.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
|
||||
namespace query {
|
||||
|
||||
template <typename Stream>
|
||||
void Interpret(const std::string &query, GraphDbAccessor &db_accessor,
|
||||
Stream &stream) {
|
||||
clock_t start_time = clock();
|
||||
class Interpreter {
|
||||
public:
|
||||
template <typename Stream>
|
||||
void Interpret(const std::string &query, GraphDbAccessor &db_accessor,
|
||||
Stream &stream) {
|
||||
clock_t start_time = clock();
|
||||
|
||||
Config config;
|
||||
Context ctx(config, db_accessor);
|
||||
std::map<std::string, TypedValue> summary;
|
||||
Config config;
|
||||
Context ctx(config, db_accessor);
|
||||
std::map<std::string, TypedValue> summary;
|
||||
|
||||
// query -> AST
|
||||
frontend::opencypher::Parser parser(query);
|
||||
// query -> AST
|
||||
frontend::opencypher::Parser parser(query);
|
||||
|
||||
auto low_level_tree = parser.tree();
|
||||
auto low_level_tree = parser.tree();
|
||||
|
||||
clock_t antlr_end_time = clock();
|
||||
clock_t antlr_end_time = clock();
|
||||
|
||||
// AST -> high level tree
|
||||
frontend::CypherMainVisitor visitor(ctx);
|
||||
visitor.visit(low_level_tree);
|
||||
auto high_level_tree = visitor.query();
|
||||
// AST -> high level tree
|
||||
frontend::CypherMainVisitor visitor(ctx);
|
||||
visitor.visit(low_level_tree);
|
||||
auto high_level_tree = visitor.query();
|
||||
|
||||
// symbol table fill
|
||||
SymbolTable symbol_table;
|
||||
SymbolGenerator symbol_generator(symbol_table);
|
||||
high_level_tree->Accept(symbol_generator);
|
||||
// symbol table fill
|
||||
SymbolTable symbol_table;
|
||||
SymbolGenerator symbol_generator(symbol_table);
|
||||
high_level_tree->Accept(symbol_generator);
|
||||
|
||||
// high level tree -> logical plan
|
||||
std::unique_ptr<plan::LogicalOperator> logical_plan;
|
||||
double query_plan_cost_estimation = 0.0;
|
||||
// TODO: Use gflags
|
||||
bool FLAGS_query_cost_planner = true;
|
||||
if (FLAGS_query_cost_planner) {
|
||||
auto plans = plan::MakeLogicalPlan<plan::VariableStartPlanner>(
|
||||
visitor.storage(), symbol_table, &db_accessor);
|
||||
double min_cost = std::numeric_limits<double>::max();
|
||||
for (auto &plan : plans) {
|
||||
plan::CostEstimator estimator(db_accessor);
|
||||
plan->Accept(estimator);
|
||||
auto cost = estimator.cost();
|
||||
if (!logical_plan || cost < min_cost) {
|
||||
// We won't be iterating over plans anymore, so it's ok to invalidate
|
||||
// unique_ptrs inside.
|
||||
logical_plan = std::move(plan);
|
||||
min_cost = cost;
|
||||
// high level tree -> logical plan
|
||||
std::unique_ptr<plan::LogicalOperator> logical_plan;
|
||||
double query_plan_cost_estimation = 0.0;
|
||||
// TODO: Use gflags
|
||||
bool FLAGS_query_cost_planner = true;
|
||||
if (FLAGS_query_cost_planner) {
|
||||
auto plans = plan::MakeLogicalPlan<plan::VariableStartPlanner>(
|
||||
visitor.storage(), symbol_table, &db_accessor);
|
||||
double min_cost = std::numeric_limits<double>::max();
|
||||
for (auto &plan : plans) {
|
||||
plan::CostEstimator estimator(db_accessor);
|
||||
plan->Accept(estimator);
|
||||
auto cost = estimator.cost();
|
||||
if (!logical_plan || cost < min_cost) {
|
||||
// We won't be iterating over plans anymore, so it's ok to invalidate
|
||||
// unique_ptrs inside.
|
||||
logical_plan = std::move(plan);
|
||||
min_cost = cost;
|
||||
}
|
||||
}
|
||||
query_plan_cost_estimation = min_cost;
|
||||
} else {
|
||||
logical_plan = plan::MakeLogicalPlan<plan::RuleBasedPlanner>(
|
||||
visitor.storage(), symbol_table, &db_accessor);
|
||||
plan::CostEstimator cost_estimator(db_accessor);
|
||||
logical_plan->Accept(cost_estimator);
|
||||
query_plan_cost_estimation = cost_estimator.cost();
|
||||
}
|
||||
query_plan_cost_estimation = min_cost;
|
||||
} else {
|
||||
logical_plan = plan::MakeLogicalPlan<plan::RuleBasedPlanner>(
|
||||
visitor.storage(), symbol_table, &db_accessor);
|
||||
plan::CostEstimator cost_estimator(db_accessor);
|
||||
logical_plan->Accept(cost_estimator);
|
||||
query_plan_cost_estimation = cost_estimator.cost();
|
||||
|
||||
// generate frame based on symbol table max_position
|
||||
Frame frame(symbol_table.max_position());
|
||||
|
||||
clock_t planning_end_time = clock();
|
||||
|
||||
std::vector<std::string> header;
|
||||
std::vector<Symbol> output_symbols(
|
||||
logical_plan->OutputSymbols(symbol_table));
|
||||
if (!output_symbols.empty()) {
|
||||
// Since we have output symbols, this means that the query contains RETURN
|
||||
// clause, so stream out the results.
|
||||
|
||||
// generate header
|
||||
for (const auto &symbol : output_symbols) header.push_back(symbol.name());
|
||||
stream.Header(header);
|
||||
|
||||
// stream out results
|
||||
auto cursor = logical_plan->MakeCursor(db_accessor);
|
||||
while (cursor->Pull(frame, symbol_table)) {
|
||||
std::vector<TypedValue> values;
|
||||
for (const auto &symbol : output_symbols)
|
||||
values.emplace_back(frame[symbol]);
|
||||
stream.Result(values);
|
||||
}
|
||||
} else if (dynamic_cast<plan::CreateNode *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::CreateExpand *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::SetProperty *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::SetProperties *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::SetLabels *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::RemoveProperty *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::RemoveLabels *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::Delete *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::Merge *>(logical_plan.get())) {
|
||||
stream.Header(header);
|
||||
auto cursor = logical_plan->MakeCursor(db_accessor);
|
||||
while (cursor->Pull(frame, symbol_table)) continue;
|
||||
} else {
|
||||
throw QueryRuntimeException("Unknown top level LogicalOperator");
|
||||
}
|
||||
|
||||
clock_t execution_end_time = clock();
|
||||
|
||||
// helper function for calculating time in seconds
|
||||
auto time_second = [](clock_t start, clock_t end) {
|
||||
return TypedValue(double(end - start) / CLOCKS_PER_SEC);
|
||||
};
|
||||
|
||||
summary["query_parsing_time"] = time_second(start_time, antlr_end_time);
|
||||
summary["query_planning_time"] =
|
||||
time_second(antlr_end_time, planning_end_time);
|
||||
summary["query_plan_execution_time"] =
|
||||
time_second(planning_end_time, execution_end_time);
|
||||
summary["query_cost_estimate"] = query_plan_cost_estimation;
|
||||
|
||||
// TODO: set summary['type'] based on transaction metadata
|
||||
// the type can't be determined based only on top level LogicalOp
|
||||
// (for example MATCH DELETE RETURN will have Produce as it's top)
|
||||
// for now always use "rw" because something must be set, but it doesn't
|
||||
// have to be correct (for Bolt clients)
|
||||
summary["type"] = "rw";
|
||||
stream.Summary(summary);
|
||||
}
|
||||
|
||||
// generate frame based on symbol table max_position
|
||||
Frame frame(symbol_table.max_position());
|
||||
|
||||
clock_t planning_end_time = clock();
|
||||
|
||||
std::vector<std::string> header;
|
||||
std::vector<Symbol> output_symbols(logical_plan->OutputSymbols(symbol_table));
|
||||
if (!output_symbols.empty()) {
|
||||
// Since we have output symbols, this means that the query contains RETURN
|
||||
// clause, so stream out the results.
|
||||
|
||||
// generate header
|
||||
for (const auto &symbol : output_symbols) header.push_back(symbol.name());
|
||||
stream.Header(header);
|
||||
|
||||
// stream out results
|
||||
auto cursor = logical_plan->MakeCursor(db_accessor);
|
||||
while (cursor->Pull(frame, symbol_table)) {
|
||||
std::vector<TypedValue> values;
|
||||
for (const auto &symbol : output_symbols)
|
||||
values.emplace_back(frame[symbol]);
|
||||
stream.Result(values);
|
||||
}
|
||||
} else if (dynamic_cast<plan::CreateNode *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::CreateExpand *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::SetProperty *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::SetProperties *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::SetLabels *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::RemoveProperty *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::RemoveLabels *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::Delete *>(logical_plan.get()) ||
|
||||
dynamic_cast<plan::Merge *>(logical_plan.get())) {
|
||||
stream.Header(header);
|
||||
auto cursor = logical_plan->MakeCursor(db_accessor);
|
||||
while (cursor->Pull(frame, symbol_table)) continue;
|
||||
} else {
|
||||
throw QueryRuntimeException("Unknown top level LogicalOperator");
|
||||
}
|
||||
|
||||
clock_t execution_end_time = clock();
|
||||
|
||||
// helper function for calculating time in seconds
|
||||
auto time_second = [](clock_t start, clock_t end) {
|
||||
return TypedValue(double(end - start) / CLOCKS_PER_SEC);
|
||||
};
|
||||
|
||||
summary["query_parsing_time"] = time_second(start_time, antlr_end_time);
|
||||
summary["query_planning_time"] =
|
||||
time_second(antlr_end_time, planning_end_time);
|
||||
summary["query_plan_execution_time"] =
|
||||
time_second(planning_end_time, execution_end_time);
|
||||
summary["query_cost_estimate"] = query_plan_cost_estimation;
|
||||
// TODO set summary['type'] based on transaction metadata
|
||||
// the type can't be determined based only on top level LogicalOperator
|
||||
// (for example MATCH DELETE RETURN will have Produce as it's top)
|
||||
// for now always use "rw" because something must be set, but it doesn't
|
||||
// have to be correct (for Bolt clients)
|
||||
summary["type"] = "rw";
|
||||
stream.Summary(summary);
|
||||
}
|
||||
private:
|
||||
// ConcurrentMap<HashType, std::unique_ptr<QueryPlanLib>> query_plans;
|
||||
};
|
||||
|
||||
} // namespace query
|
||||
|
@ -57,7 +57,7 @@ struct Parameters {
|
||||
}
|
||||
|
||||
/** Returns the number of arguments in this container */
|
||||
const size_t Size() const { return storage_.size(); }
|
||||
size_t Size() const { return storage_.size(); }
|
||||
|
||||
private:
|
||||
std::map<std::string, query::TypedValue> storage_;
|
||||
@ -71,4 +71,4 @@ struct Parameters {
|
||||
}
|
||||
};
|
||||
|
||||
#endif //MEMGRAPH_PARAMETERS_HPP
|
||||
#endif // MEMGRAPH_PARAMETERS_HPP
|
||||
|
@ -1,40 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "logging/loggable.hpp"
|
||||
#include "query/stripper.hpp"
|
||||
|
||||
/*
|
||||
* Query preprocessing contains:
|
||||
* * query stripping
|
||||
*
|
||||
* This class is here because conceptually process of query preprocessing
|
||||
* might have more than one step + in current version of C++ standard
|
||||
* isn't trivial to instantiate QueryStripper because of template arguments +
|
||||
* it depends on underlying lexical analyser.
|
||||
*
|
||||
* The preprocessing results are:
|
||||
* * stripped query |
|
||||
* * stripped arguments |-> StrippedQuery
|
||||
* * stripped query hash |
|
||||
*/
|
||||
class QueryPreprocessor : public Loggable {
|
||||
public:
|
||||
QueryPreprocessor() : Loggable("QueryPreprocessor") {}
|
||||
|
||||
/**
|
||||
* Preprocess the query:
|
||||
* * strip parameters
|
||||
* * calculate query hash
|
||||
*
|
||||
* @param query that is going to be stripped
|
||||
*
|
||||
* @return QueryStripped object
|
||||
*/
|
||||
auto preprocess(const std::string &query) {
|
||||
auto preprocessed = query::Strip(query);
|
||||
logger.info("stripped_query = {}", preprocessed.query);
|
||||
logger.info("query_hash = {}", preprocessed.hash);
|
||||
|
||||
return preprocessed;
|
||||
}
|
||||
};
|
126
src/query/stripped.cpp
Normal file
126
src/query/stripped.cpp
Normal file
@ -0,0 +1,126 @@
|
||||
#include "query/stripped.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "antlr4-runtime.h"
|
||||
#include "logging/loggable.hpp"
|
||||
#include "query/common.hpp"
|
||||
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
|
||||
#include "query/frontend/opencypher/generated/CypherLexer.h"
|
||||
#include "query/frontend/opencypher/generated/CypherParser.h"
|
||||
#include "utils/assert.hpp"
|
||||
#include "utils/hashing/fnv.hpp"
|
||||
#include "utils/string.hpp"
|
||||
|
||||
using namespace antlropencypher;
|
||||
using namespace antlr4;
|
||||
|
||||
namespace query {
|
||||
|
||||
StrippedQuery::StrippedQuery(const std::string &query)
|
||||
: Loggable("StrippedQuery") {
|
||||
// Tokenize the query.
|
||||
ANTLRInputStream input(query);
|
||||
CypherLexer lexer(&input);
|
||||
CommonTokenStream token_stream(&lexer);
|
||||
token_stream.fill();
|
||||
auto tokens = token_stream.getTokens();
|
||||
|
||||
// Initialize data structures we return.
|
||||
std::vector<std::string> token_strings;
|
||||
token_strings.reserve(tokens.size());
|
||||
|
||||
// A helper function that generates a new param name for the stripped
|
||||
// literal, appends is to the the stripped_query and adds the passed
|
||||
// value to stripped args.
|
||||
auto replace_stripped = [this, &token_strings](const TypedValue &value) {
|
||||
const auto &stripped_name = parameters_.Add(value);
|
||||
token_strings.push_back("$" + stripped_name);
|
||||
};
|
||||
|
||||
// Convert tokens to strings, perform lowercasing and filtering.
|
||||
for (const auto *token : tokens) {
|
||||
switch (token->getType()) {
|
||||
case CypherLexer::UNION:
|
||||
case CypherLexer::ALL:
|
||||
case CypherLexer::OPTIONAL:
|
||||
case CypherLexer::MATCH:
|
||||
case CypherLexer::UNWIND:
|
||||
case CypherLexer::AS:
|
||||
case CypherLexer::MERGE:
|
||||
case CypherLexer::ON:
|
||||
case CypherLexer::CREATE:
|
||||
case CypherLexer::SET:
|
||||
case CypherLexer::DETACH:
|
||||
case CypherLexer::DELETE:
|
||||
case CypherLexer::REMOVE:
|
||||
case CypherLexer::WITH:
|
||||
case CypherLexer::DISTINCT:
|
||||
case CypherLexer::RETURN:
|
||||
case CypherLexer::ORDER:
|
||||
case CypherLexer::BY:
|
||||
case CypherLexer::L_SKIP:
|
||||
case CypherLexer::LIMIT:
|
||||
case CypherLexer::ASCENDING:
|
||||
case CypherLexer::ASC:
|
||||
case CypherLexer::DESCENDING:
|
||||
case CypherLexer::DESC:
|
||||
case CypherLexer::WHERE:
|
||||
case CypherLexer::OR:
|
||||
case CypherLexer::XOR:
|
||||
case CypherLexer::AND:
|
||||
case CypherLexer::NOT:
|
||||
case CypherLexer::IN:
|
||||
case CypherLexer::STARTS:
|
||||
case CypherLexer::ENDS:
|
||||
case CypherLexer::CONTAINS:
|
||||
case CypherLexer::IS:
|
||||
case CypherLexer::CYPHERNULL:
|
||||
case CypherLexer::COUNT:
|
||||
case CypherLexer::FILTER:
|
||||
case CypherLexer::EXTRACT:
|
||||
case CypherLexer::ANY:
|
||||
case CypherLexer::NONE:
|
||||
case CypherLexer::SINGLE:
|
||||
token_strings.push_back(utils::ToLowerCase(token->getText()));
|
||||
break;
|
||||
|
||||
case CypherLexer::SP:
|
||||
case Token::EOF:
|
||||
break;
|
||||
|
||||
case CypherLexer::DecimalInteger:
|
||||
case CypherLexer::HexInteger:
|
||||
case CypherLexer::OctalInteger:
|
||||
replace_stripped(ParseIntegerLiteral(token->getText()));
|
||||
break;
|
||||
|
||||
case CypherLexer::StringLiteral:
|
||||
replace_stripped(ParseStringLiteral(token->getText()));
|
||||
break;
|
||||
|
||||
case CypherLexer::RegularDecimalReal:
|
||||
case CypherLexer::ExponentDecimalReal:
|
||||
replace_stripped(ParseDoubleLiteral(token->getText()));
|
||||
break;
|
||||
case CypherLexer::TRUE:
|
||||
replace_stripped(true);
|
||||
break;
|
||||
case CypherLexer::FALSE:
|
||||
replace_stripped(false);
|
||||
break;
|
||||
|
||||
default:
|
||||
token_strings.push_back(token->getText());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
query_ = utils::Join(token_strings, " ");
|
||||
hash_ = fnv(query_);
|
||||
logger.info("stripped_query = {}", query_);
|
||||
logger.info("query_hash = {}", hash_);
|
||||
}
|
||||
}
|
@ -2,24 +2,29 @@
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "logging/loggable.hpp"
|
||||
#include "parameters.hpp"
|
||||
#include "storage/property_value_store.hpp"
|
||||
#include "utils/assert.hpp"
|
||||
#include "utils/hashing/fnv.hpp"
|
||||
|
||||
namespace query {
|
||||
|
||||
/*
|
||||
* StrippedQuery contains:
|
||||
* * stripped query
|
||||
* * plan arguments stripped from query
|
||||
* * hash of stripped query
|
||||
*/
|
||||
struct StrippedQuery {
|
||||
StrippedQuery(const std::string &unstripped_query, const std::string &&query,
|
||||
const Parameters &arguments, HashType hash)
|
||||
: unstripped_query(unstripped_query),
|
||||
query(query),
|
||||
arguments(arguments),
|
||||
hash(hash) {}
|
||||
class StrippedQuery : Loggable {
|
||||
public:
|
||||
/**
|
||||
* Strips the input query and stores stripped query, stripped arguments and
|
||||
* stripped query hash.
|
||||
*
|
||||
* @param query input query
|
||||
*/
|
||||
explicit StrippedQuery(const std::string &query);
|
||||
|
||||
/**
|
||||
* Copy constructor is deleted because we don't want to make unnecessary
|
||||
@ -35,15 +40,18 @@ struct StrippedQuery {
|
||||
StrippedQuery(StrippedQuery &&other) = default;
|
||||
StrippedQuery &operator=(StrippedQuery &&other) = default;
|
||||
|
||||
// original, unstripped query
|
||||
const std::string unstripped_query;
|
||||
const std::string &query() const { return query_; }
|
||||
const Parameters ¶meters() const { return parameters_; }
|
||||
HashType hash() const { return hash_; }
|
||||
|
||||
private:
|
||||
// stripped query
|
||||
const std::string query;
|
||||
std::string query_;
|
||||
|
||||
// striped arguments
|
||||
const Parameters arguments;
|
||||
Parameters parameters_;
|
||||
|
||||
// hash based on the stripped query
|
||||
const HashType hash;
|
||||
HashType hash_;
|
||||
};
|
||||
}
|
||||
|
@ -1,222 +0,0 @@
|
||||
//
|
||||
// Copyright 2017 Memgraph
|
||||
// Created by Florijan Stamenkovic on 07.03.17.
|
||||
//
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "logging/loggable.hpp"
|
||||
#include "query/stripper.hpp"
|
||||
#include "utils/hashing/fnv.hpp"
|
||||
#include "utils/assert.hpp"
|
||||
|
||||
#include "query/frontend/opencypher/generated/CypherBaseVisitor.h"
|
||||
#include "query/frontend/opencypher/generated/CypherLexer.h"
|
||||
#include "query/frontend/opencypher/generated/CypherParser.h"
|
||||
|
||||
using namespace antlr4;
|
||||
using namespace antlropencypher;
|
||||
|
||||
namespace query {
|
||||
|
||||
/**
|
||||
* A visitor that for each literal that is not enclosed
|
||||
* by a range literal calls the given callback (which should
|
||||
* then replace Tokens of literals with placeholders).
|
||||
*/
|
||||
class StripperVisitor : public antlropencypher::CypherBaseVisitor {
|
||||
public:
|
||||
/**
|
||||
* @param callback Callback function (see class description) called
|
||||
* with start and stop tokens of a literal.
|
||||
*/
|
||||
StripperVisitor(const std::function<void(Token *, Token *)> callback)
|
||||
: callback_(callback) {}
|
||||
|
||||
antlrcpp::Any visitRangeLiteral(
|
||||
CypherParser::RangeLiteralContext *ctx) override {
|
||||
is_in_range_ = true;
|
||||
auto r_val = visitChildren(ctx);
|
||||
is_in_range_ = false;
|
||||
return r_val;
|
||||
}
|
||||
|
||||
antlrcpp::Any visitLiteral(
|
||||
CypherParser::LiteralContext *ctx) override {
|
||||
if (ctx->booleanLiteral() != nullptr ||
|
||||
ctx->StringLiteral() != nullptr ||
|
||||
ctx->numberLiteral() != nullptr)
|
||||
callback_(ctx->getStart(), ctx->getStop());
|
||||
|
||||
is_in_literal_ = true;
|
||||
auto r_val = visitChildren(ctx);
|
||||
is_in_literal_ = false;
|
||||
return r_val;
|
||||
}
|
||||
|
||||
antlrcpp::Any visitIntegerLiteral(
|
||||
CypherParser::IntegerLiteralContext *ctx) override {
|
||||
// convert integer literals into param placeholders only if not in range
|
||||
// literal
|
||||
if (!is_in_range_ && !is_in_literal_) callback_(ctx->getStart(), ctx->getStop());
|
||||
return visitChildren(ctx);
|
||||
}
|
||||
|
||||
private:
|
||||
const std::function<void(Token *, Token *)> callback_;
|
||||
bool is_in_range_{false};
|
||||
bool is_in_literal_{false};
|
||||
};
|
||||
|
||||
/**
|
||||
* Strips the input query and returns stripped query, stripped arguments and
|
||||
* stripped query hash.
|
||||
*
|
||||
* @param query input query
|
||||
* @return stripped query, stripped arguments and stripped query hash as a
|
||||
* single object of class StrippedQuery
|
||||
*/
|
||||
StrippedQuery Strip(const std::string &query) {
|
||||
|
||||
// tokenize the query
|
||||
ANTLRInputStream input(query);
|
||||
CypherLexer lexer(&input);
|
||||
CommonTokenStream token_stream(&lexer);
|
||||
token_stream.fill();
|
||||
std::vector<Token *> tokens = token_stream.getTokens();
|
||||
|
||||
// initialize data structures we return
|
||||
Parameters stripped_arguments;
|
||||
|
||||
// convert tokens to strings, perform lowercasing and filtering
|
||||
std::vector<std::string> token_strings;
|
||||
token_strings.reserve(tokens.size());
|
||||
for (int i = 0; i < tokens.size(); ++i)
|
||||
switch (tokens[i]->getType()) {
|
||||
case CypherLexer::UNION:
|
||||
case CypherLexer::ALL:
|
||||
case CypherLexer::OPTIONAL:
|
||||
case CypherLexer::MATCH:
|
||||
case CypherLexer::UNWIND:
|
||||
case CypherLexer::AS:
|
||||
case CypherLexer::MERGE:
|
||||
case CypherLexer::ON:
|
||||
case CypherLexer::CREATE:
|
||||
case CypherLexer::SET:
|
||||
case CypherLexer::DETACH:
|
||||
case CypherLexer::DELETE:
|
||||
case CypherLexer::REMOVE:
|
||||
case CypherLexer::WITH:
|
||||
case CypherLexer::DISTINCT:
|
||||
case CypherLexer::RETURN:
|
||||
case CypherLexer::ORDER:
|
||||
case CypherLexer::BY:
|
||||
case CypherLexer::L_SKIP:
|
||||
case CypherLexer::LIMIT:
|
||||
case CypherLexer::ASCENDING:
|
||||
case CypherLexer::ASC:
|
||||
case CypherLexer::DESCENDING:
|
||||
case CypherLexer::DESC:
|
||||
case CypherLexer::WHERE:
|
||||
case CypherLexer::OR:
|
||||
case CypherLexer::XOR:
|
||||
case CypherLexer::AND:
|
||||
case CypherLexer::NOT:
|
||||
case CypherLexer::IN:
|
||||
case CypherLexer::STARTS:
|
||||
case CypherLexer::ENDS:
|
||||
case CypherLexer::CONTAINS:
|
||||
case CypherLexer::IS:
|
||||
case CypherLexer::CYPHERNULL:
|
||||
case CypherLexer::COUNT:
|
||||
case CypherLexer::FILTER:
|
||||
case CypherLexer::EXTRACT:
|
||||
case CypherLexer::ANY:
|
||||
case CypherLexer::NONE:
|
||||
case CypherLexer::SINGLE:
|
||||
token_strings.push_back(tokens[i]->getText());
|
||||
std::transform(token_strings.back().begin(),
|
||||
token_strings.back().end(),
|
||||
token_strings.back().begin(), ::tolower);
|
||||
break;
|
||||
|
||||
case CypherLexer::SP:
|
||||
case Token::EOF:
|
||||
token_strings.push_back("");
|
||||
break;
|
||||
|
||||
default:
|
||||
token_strings.push_back(tokens[i]->getText());
|
||||
break;
|
||||
}
|
||||
|
||||
// a helper function that generates a new param name for the stripped
|
||||
// literal, appends is to the the stripped_query and adds the passed
|
||||
// value to stripped args
|
||||
auto replace_stripped = [&stripped_arguments, &token_strings](
|
||||
const TypedValue &value, size_t token_position) {
|
||||
const auto &stripped_name = stripped_arguments.Add(value);
|
||||
token_strings[token_position] = "$" + stripped_name;
|
||||
};
|
||||
|
||||
// callback for every literal that should be changed
|
||||
// TODO consider literal parsing problems (like an int with 100 digits)
|
||||
auto callback = [&replace_stripped](Token *start, Token *end) {
|
||||
assert(start->getTokenIndex() == end->getTokenIndex());
|
||||
switch (start->getType()) {
|
||||
case CypherLexer::DecimalInteger:
|
||||
replace_stripped(std::stoi(start->getText()),
|
||||
start->getTokenIndex());
|
||||
break;
|
||||
case CypherLexer::HexInteger:
|
||||
replace_stripped(std::stoi(start->getText(), 0, 16),
|
||||
start->getTokenIndex());
|
||||
break;
|
||||
case CypherLexer::OctalInteger:
|
||||
replace_stripped(std::stoi(start->getText(), 0, 8),
|
||||
start->getTokenIndex());
|
||||
break;
|
||||
case CypherLexer::StringLiteral:
|
||||
replace_stripped(start->getText(),
|
||||
start->getTokenIndex());
|
||||
break;
|
||||
case CypherLexer::RegularDecimalReal:
|
||||
case CypherLexer::ExponentDecimalReal:
|
||||
replace_stripped(std::stof(start->getText()),
|
||||
start->getTokenIndex());
|
||||
break;
|
||||
case CypherLexer::TRUE:
|
||||
replace_stripped(true, start->getTokenIndex());
|
||||
break;
|
||||
case CypherLexer::FALSE:
|
||||
replace_stripped(false, start->getTokenIndex());
|
||||
break;
|
||||
|
||||
default:
|
||||
permanent_assert(true, "Unsupported literal type");
|
||||
}
|
||||
};
|
||||
|
||||
// parse the query and visit the AST with a stripping visitor
|
||||
CypherParser parser(&token_stream);
|
||||
tree::ParseTree *tree = parser.cypher();
|
||||
StripperVisitor stripper_visitor(callback);
|
||||
stripper_visitor.visit(tree);
|
||||
|
||||
// concatenate the stripped query tokens
|
||||
std::string stripped_query;
|
||||
stripped_query.reserve(query.size());
|
||||
for (const std::string &token_string : token_strings) {
|
||||
stripped_query += token_string;
|
||||
if (token_string.size() > 0)
|
||||
stripped_query += " ";
|
||||
}
|
||||
|
||||
// return stripped query, stripped arguments and stripped query hash
|
||||
return StrippedQuery(query,
|
||||
std::move(stripped_query),
|
||||
std::move(stripped_arguments),
|
||||
fnv(stripped_query));
|
||||
}
|
||||
};
|
@ -1,7 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "query/stripped.hpp"
|
||||
|
||||
namespace query {
|
||||
StrippedQuery Strip(const std::string &query);
|
||||
};
|
@ -59,11 +59,21 @@ inline std::string ToUpperCase(std::string s) {
|
||||
* Join strings in vector separated by a given separator.
|
||||
*/
|
||||
inline std::string Join(const std::vector<std::string>& strings,
|
||||
const char* separator) {
|
||||
std::ostringstream oss;
|
||||
std::copy(strings.begin(), strings.end(),
|
||||
std::ostream_iterator<std::string>(oss, separator));
|
||||
return oss.str();
|
||||
const std::string& separator) {
|
||||
if (strings.size() == 0U) return "";
|
||||
int64_t total_size = 0;
|
||||
for (const auto& x : strings) {
|
||||
total_size += x.size();
|
||||
}
|
||||
total_size += separator.size() * (static_cast<int64_t>(strings.size()) - 1);
|
||||
std::string s;
|
||||
s.reserve(total_size);
|
||||
s += strings[0];
|
||||
for (auto it = strings.begin() + 1; it != strings.end(); ++it) {
|
||||
s += separator;
|
||||
s += *it;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include "benchmark/benchmark_api.h"
|
||||
#include "logging/default.hpp"
|
||||
#include "logging/streams/stdout.hpp"
|
||||
#include "query/preprocessor.hpp"
|
||||
#include "query/stripped.hpp"
|
||||
#include "yaml-cpp/yaml.h"
|
||||
|
||||
auto BM_Strip = [](benchmark::State &state, auto &function, std::string query) {
|
||||
@ -22,10 +22,9 @@ int main(int argc, char **argv) {
|
||||
YAML::Node dataset = YAML::LoadFile(
|
||||
"../../tests/data/cypher_queries/stripper/query_dict.yaml");
|
||||
|
||||
QueryPreprocessor processor;
|
||||
using std::placeholders::_1;
|
||||
std::function<StrippedQuery(const std::string &query)> preprocess =
|
||||
std::bind(&QueryPreprocessor::preprocess, &processor, _1);
|
||||
auto preprocess = [](const std::string &query) {
|
||||
return query::StrippedQuery(query);
|
||||
};
|
||||
|
||||
auto tests = dataset["benchmark_queries"].as<std::vector<std::string>>();
|
||||
for (auto &test : tests) {
|
||||
|
@ -8,7 +8,7 @@ namespace fs = std::experimental::filesystem;
|
||||
#include "logging/default.hpp"
|
||||
#include "logging/streams/stdout.cpp"
|
||||
#include "query/engine.hpp"
|
||||
#include "query/preprocessor.hpp"
|
||||
#include "query/stripped.hpp"
|
||||
#include "stream/print_record_stream.hpp"
|
||||
#include "utils/command_line/arguments.hpp"
|
||||
#include "utils/file.hpp"
|
||||
@ -50,14 +50,13 @@ auto LoadQueryHashes(Logger &log, const fs::path &path) {
|
||||
// the intention of following block is to get all hashes
|
||||
// for which query implementations have to be compiled
|
||||
// calculate all hashes from queries file
|
||||
QueryPreprocessor preprocessor;
|
||||
// hashes calculated from all queries in queries file
|
||||
QueryHashesT query_hashes;
|
||||
// fill the above set
|
||||
auto queries = utils::ReadLines(path);
|
||||
for (auto &query : queries) {
|
||||
if (query.empty()) continue;
|
||||
query_hashes.insert(preprocessor.preprocess(query).hash);
|
||||
query_hashes.insert(query::StrippedQuery(query).hash());
|
||||
}
|
||||
permanent_assert(query_hashes.size() > 0,
|
||||
"At least one hash has to be present");
|
||||
@ -78,7 +77,6 @@ auto LoadQueryHashes(Logger &log, const fs::path &path) {
|
||||
auto LoadQueryPlans(Logger &log, QueryEngineT &engine,
|
||||
const QueryHashesT &query_hashes, const fs::path &path) {
|
||||
log.info("*** Load/compile needed query implementations ***");
|
||||
QueryPreprocessor preprocessor;
|
||||
auto plan_paths = LoadFilePaths(path, "cpp");
|
||||
// query mark will be used to extract queries from files (because we want
|
||||
// to be independent to a query hash)
|
||||
@ -105,7 +103,7 @@ auto LoadQueryPlans(Logger &log, QueryEngineT &engine,
|
||||
// load/compile implementations only for the queries which are
|
||||
// contained in queries_file
|
||||
// it doesn't make sense to compile something which won't be runned
|
||||
if (query_hashes.find(preprocessor.preprocess(query).hash) ==
|
||||
if (query_hashes.find(query::StrippedQuery(query).hash()) ==
|
||||
query_hashes.end())
|
||||
continue;
|
||||
log.info("Path {} will be loaded.", plan_path.c_str());
|
||||
|
@ -27,7 +27,6 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
// init watcher
|
||||
FSWatcher watcher;
|
||||
QueryPreprocessor preprocessor;
|
||||
|
||||
int i = 0;
|
||||
watcher.watch(
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#include "logging/default.hpp"
|
||||
#include "logging/streams/stdout.hpp"
|
||||
#include "query/preprocessor.hpp"
|
||||
#include "query/stripped.hpp"
|
||||
#include "utils/command_line/arguments.hpp"
|
||||
#include "utils/type_discovery.hpp"
|
||||
|
||||
@ -24,16 +24,15 @@ int main(int argc, char **argv) {
|
||||
auto query = GET_ARG("-q", "CREATE (n) RETURN n").get_string();
|
||||
|
||||
// run preprocessing
|
||||
QueryPreprocessor preprocessor;
|
||||
auto preprocessed = preprocessor.preprocess(query);
|
||||
query::StrippedQuery preprocessed(query);
|
||||
|
||||
// print query, stripped query, hash and variable values (propertie values)
|
||||
std::cout << fmt::format("Query: {}\n", query);
|
||||
std::cout << fmt::format("Stripped query: {}\n", preprocessed.query);
|
||||
std::cout << fmt::format("Query hash: {}\n", preprocessed.hash);
|
||||
std::cout << fmt::format("Stripped query: {}\n", preprocessed.query());
|
||||
std::cout << fmt::format("Query hash: {}\n", preprocessed.hash());
|
||||
std::cout << fmt::format("Property values:\n");
|
||||
for (int i = 0; i < static_cast<int>(preprocessed.arguments.Size()); ++i) {
|
||||
fmt::format(" {}", preprocessed.arguments.At(i));
|
||||
for (int i = 0; i < static_cast<int>(preprocessed.parameters().Size()); ++i) {
|
||||
fmt::format(" {}", preprocessed.parameters().At(i));
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
|
||||
#include "query/stripper.hpp"
|
||||
#include "query/stripped.hpp"
|
||||
|
||||
int main(int argc, const char **a) {
|
||||
if (argc < 2) {
|
||||
@ -18,8 +18,9 @@ int main(int argc, const char **a) {
|
||||
const int REPEATS = 100;
|
||||
|
||||
clock_t begin = clock();
|
||||
for (int i = 0; i < REPEATS; ++i)
|
||||
query::Strip(query);
|
||||
for (int i = 0; i < REPEATS; ++i) {
|
||||
query::StrippedQuery(std::string(query));
|
||||
}
|
||||
clock_t end = clock();
|
||||
|
||||
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
|
||||
|
@ -5,10 +5,11 @@
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "query/stripper.hpp"
|
||||
#include "query/stripped.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
|
||||
using query::TypedValue;
|
||||
using query::StrippedQuery;
|
||||
|
||||
void EXPECT_PROP_TRUE(const TypedValue& a) {
|
||||
EXPECT_TRUE(a.type() == TypedValue::Type::Bool && a.Value<bool>());
|
||||
@ -19,75 +20,78 @@ void EXPECT_PROP_EQ(const TypedValue& a, const TypedValue& b) {
|
||||
}
|
||||
|
||||
TEST(QueryStripper, NoLiterals) {
|
||||
StrippedQuery stripped = query::Strip("CREATE (n)");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 0);
|
||||
EXPECT_EQ(stripped.query, "create ( n ) ");
|
||||
StrippedQuery stripped("CREATE (n)");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "create ( n )");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, DecimalInteger) {
|
||||
StrippedQuery stripped = query::Strip("RETURN 42");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 1);
|
||||
EXPECT_EQ(stripped.arguments.At(0).Value<int64_t>(), 42);
|
||||
EXPECT_EQ(stripped.query, "return $stripped_arg_0 ");
|
||||
StrippedQuery stripped("RETURN 42");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 1);
|
||||
EXPECT_EQ(stripped.parameters().At(0).Value<int64_t>(), 42);
|
||||
EXPECT_EQ(stripped.query(), "return $stripped_arg_0");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, OctalInteger) {
|
||||
StrippedQuery stripped = query::Strip("RETURN 010");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 1);
|
||||
EXPECT_EQ(stripped.arguments.At(0).Value<int64_t>(), 8);
|
||||
EXPECT_EQ(stripped.query, "return $stripped_arg_0 ");
|
||||
StrippedQuery stripped("RETURN 010");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 1);
|
||||
EXPECT_EQ(stripped.parameters().At(0).Value<int64_t>(), 8);
|
||||
EXPECT_EQ(stripped.query(), "return $stripped_arg_0");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, HexInteger) {
|
||||
StrippedQuery stripped = query::Strip("RETURN 0xa");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 1);
|
||||
EXPECT_EQ(stripped.arguments.At(0).Value<int64_t>(), 10);
|
||||
EXPECT_EQ(stripped.query, "return $stripped_arg_0 ");
|
||||
StrippedQuery stripped("RETURN 0xa");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 1);
|
||||
EXPECT_EQ(stripped.parameters().At(0).Value<int64_t>(), 10);
|
||||
EXPECT_EQ(stripped.query(), "return $stripped_arg_0");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, RegularDecimal) {
|
||||
StrippedQuery stripped = query::Strip("RETURN 42.3");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 1);
|
||||
EXPECT_FLOAT_EQ(stripped.arguments.At(0).Value<double>(), 42.3);
|
||||
EXPECT_EQ(stripped.query, "return $stripped_arg_0 ");
|
||||
StrippedQuery stripped("RETURN 42.3");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 1);
|
||||
EXPECT_FLOAT_EQ(stripped.parameters().At(0).Value<double>(), 42.3);
|
||||
EXPECT_EQ(stripped.query(), "return $stripped_arg_0");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, ExponentDecimal) {
|
||||
StrippedQuery stripped = query::Strip("RETURN 4e2");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 1);
|
||||
EXPECT_FLOAT_EQ(stripped.arguments.At(0).Value<double>(), 4e2);
|
||||
EXPECT_EQ(stripped.query, "return $stripped_arg_0 ");
|
||||
StrippedQuery stripped("RETURN 4e2");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 1);
|
||||
EXPECT_FLOAT_EQ(stripped.parameters().At(0).Value<double>(), 4e2);
|
||||
EXPECT_EQ(stripped.query(), "return $stripped_arg_0");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, StringLiteral) {
|
||||
StrippedQuery stripped = query::Strip("RETURN 'something'");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 1);
|
||||
EXPECT_EQ(stripped.arguments.At(0).Value<std::string>(), "'something'");
|
||||
EXPECT_EQ(stripped.query, "return $stripped_arg_0 ");
|
||||
StrippedQuery stripped("RETURN 'something'");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 1);
|
||||
EXPECT_EQ(stripped.parameters().At(0).Value<std::string>(), "something");
|
||||
EXPECT_EQ(stripped.query(), "return $stripped_arg_0");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, BoolLiteral) {
|
||||
StrippedQuery stripped = query::Strip("RETURN true");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 1);
|
||||
EXPECT_PROP_EQ(stripped.arguments.At(0), TypedValue(true));
|
||||
EXPECT_EQ(stripped.query, "return $stripped_arg_0 ");
|
||||
StrippedQuery stripped("RETURN true");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 1);
|
||||
EXPECT_PROP_EQ(stripped.parameters().At(0), TypedValue(true));
|
||||
EXPECT_EQ(stripped.query(), "return $stripped_arg_0");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, ListLiteral) {
|
||||
StrippedQuery stripped = query::Strip("MATCH (n) RETURN [n, n.prop]");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 0);
|
||||
EXPECT_EQ(stripped.query, "match ( n ) return [ n , n . prop ] ");
|
||||
StrippedQuery stripped("MATCH (n) RETURN [n, n.prop]");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n ) return [ n , n . prop ]");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, MapLiteral) {
|
||||
StrippedQuery stripped = query::Strip("MATCH (n) RETURN {val: n}");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 0);
|
||||
EXPECT_EQ(stripped.query, "match ( n ) return { val : n } ");
|
||||
StrippedQuery stripped("MATCH (n) RETURN {val: n}");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 0);
|
||||
EXPECT_EQ(stripped.query(), "match ( n ) return { val : n }");
|
||||
}
|
||||
|
||||
TEST(QueryStripper, RangeLiteral) {
|
||||
StrippedQuery stripped = query::Strip("MATCH (n)-[*2..3]-() RETURN n");
|
||||
EXPECT_EQ(stripped.arguments.Size(), 0);
|
||||
EXPECT_EQ(stripped.query, "match ( n ) - [ * 2 .. 3 ] - ( ) return n ");
|
||||
StrippedQuery stripped("MATCH (n)-[*2..3]-() RETURN n");
|
||||
EXPECT_EQ(stripped.parameters().Size(), 2);
|
||||
EXPECT_EQ(stripped.parameters().At(0).Value<int64_t>(), 2);
|
||||
EXPECT_EQ(stripped.parameters().At(1).Value<int64_t>(), 3);
|
||||
EXPECT_EQ(
|
||||
stripped.query(),
|
||||
"match ( n ) - [ * $stripped_arg_0 .. $stripped_arg_1 ] - ( ) return n");
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user