From 8c2ca44cb98a2295b072e933c183213b9f32f074 Mon Sep 17 00:00:00 2001 From: Teon Banek Date: Wed, 10 Oct 2018 10:23:10 +0200 Subject: [PATCH] Split manual/query_planner for distributed and single Reviewers: mtomic, llugovic, mferencevic Reviewed By: mferencevic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1644 --- tests/manual/CMakeLists.txt | 11 +- tests/manual/distributed_query_planner.cpp | 50 ++ tests/manual/interactive_planning.cpp | 522 ++++++++++++++++++ tests/manual/interactive_planning.hpp | 41 ++ tests/manual/query_planner.cpp | 581 +-------------------- 5 files changed, 626 insertions(+), 579 deletions(-) create mode 100644 tests/manual/distributed_query_planner.cpp create mode 100644 tests/manual/interactive_planning.cpp create mode 100644 tests/manual/interactive_planning.hpp diff --git a/tests/manual/CMakeLists.txt b/tests/manual/CMakeLists.txt index ed9002079..73c5cc0dc 100644 --- a/tests/manual/CMakeLists.txt +++ b/tests/manual/CMakeLists.txt @@ -6,7 +6,7 @@ function(add_manual_test test_cpp) # get exec name (remove extension from the abs path) get_filename_component(exec_name ${test_cpp} NAME_WE) set(target_name ${test_prefix}${exec_name}) - add_executable(${target_name} ${test_cpp}) + add_executable(${target_name} ${test_cpp} ${ARGN}) # OUTPUT_NAME sets the real name of a target when it is built and can be # used to help create two targets of the same name even though CMake # requires unique logical target names @@ -36,6 +36,13 @@ target_link_libraries(${test_prefix}card_fraud_generate_snapshot mg-distributed add_manual_test(card_fraud_local.cpp) target_link_libraries(${test_prefix}card_fraud_local mg-distributed kvstore_dummy_lib gtest) +add_manual_test(distributed_query_planner.cpp interactive_planning.cpp) +target_link_libraries(${test_prefix}distributed_query_planner mg-distributed + kvstore_dummy_lib) +if (READLINE_FOUND) + target_link_libraries(${test_prefix}distributed_query_planner readline) +endif() + add_manual_test(distributed_repl.cpp) target_link_libraries(${test_prefix}distributed_repl mg-distributed kvstore_dummy_lib gtest readline) @@ -53,7 +60,7 @@ target_link_libraries(${test_prefix}kvstore_console kvstore_lib gflags glog) add_manual_test(query_hash.cpp) target_link_libraries(${test_prefix}query_hash mg-single-node kvstore_dummy_lib) -add_manual_test(query_planner.cpp) +add_manual_test(query_planner.cpp interactive_planning.cpp) target_link_libraries(${test_prefix}query_planner mg-single-node kvstore_dummy_lib) add_manual_test(repl.cpp) diff --git a/tests/manual/distributed_query_planner.cpp b/tests/manual/distributed_query_planner.cpp new file mode 100644 index 000000000..b40f6a46d --- /dev/null +++ b/tests/manual/distributed_query_planner.cpp @@ -0,0 +1,50 @@ +#include "interactive_planning.hpp" + +#include +#include + +#include "database/distributed/distributed_graph_db.hpp" +#include "database/distributed/graph_db_accessor.hpp" +#include "query/plan/distributed.hpp" +#include "query/plan/distributed_pretty_print.hpp" + +DECLARE_int32(min_log_level); + +DEFCOMMAND(ShowDistributed) { + int64_t plan_ix = 0; + std::stringstream ss(args[0]); + ss >> plan_ix; + if (ss.fail() || !ss.eof() || plan_ix >= plans.size()) return; + const auto &plan = plans[plan_ix].first; + std::atomic plan_id{0}; + auto distributed_plan = MakeDistributedPlan(*plan, symbol_table, plan_id); + { + std::cout << "---- Master Plan ---- " << std::endl; + query::plan::DistributedPrettyPrint(dba, + distributed_plan.master_plan.get()); + std::cout << std::endl; + } + for (size_t i = 0; i < distributed_plan.worker_plans.size(); ++i) { + int64_t id; + std::shared_ptr worker_plan; + std::tie(id, worker_plan) = distributed_plan.worker_plans[i]; + std::cout << "---- Worker Plan #" << id << " ---- " << std::endl; + query::plan::DistributedPrettyPrint(dba, worker_plan.get()); + std::cout << std::endl; + } +} + +int main(int argc, char *argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + FLAGS_min_log_level = google::ERROR; + google::InitGoogleLogging(argv[0]); + AddCommand("show-distributed", + {ShowDistributedCommand, 1, + "Show the Nth plan as for distributed execution"}); + database::Master db; + auto dba = db.Access(); + RunInteractivePlanning(dba.get()); + db.Shutdown(); + db.AwaitShutdown(); + return 0; +} diff --git a/tests/manual/interactive_planning.cpp b/tests/manual/interactive_planning.cpp new file mode 100644 index 000000000..91e3de5e2 --- /dev/null +++ b/tests/manual/interactive_planning.cpp @@ -0,0 +1,522 @@ +#include "interactive_planning.hpp" + +#include +#include +#include +#include + +#include +#include + +#include "database/graph_db_accessor.hpp" +#include "query/context.hpp" +#include "query/frontend/ast/cypher_main_visitor.hpp" +#include "query/frontend/opencypher/parser.hpp" +#include "query/frontend/semantic/symbol_generator.hpp" +#include "query/plan/operator.hpp" +#include "query/plan/planner.hpp" +#include "query/plan/pretty_print.hpp" +#include "query/typed_value.hpp" +#include "utils/string.hpp" + +DEFINE_string(save_mock_db_file, "", + "File where the mock database should be saved (on exit)"); + +DEFINE_string(load_mock_db_file, "", + "File from which the mock database should be loaded"); + +#ifdef HAS_READLINE +// TODO: This is copied from src/query/repl.cpp +// It should probably be moved to some utils file. + +#include "readline/history.h" +#include "readline/readline.h" + +/** + * Helper function that reads a line from the + * standard input using the 'readline' lib. + * Adds support for history and reverse-search. + * + * @param prompt The prompt to display. + * @return A single command the user entered, or nullopt on EOF. + */ +std::experimental::optional ReadLine(const std::string &prompt) { + char *line = readline(prompt.c_str()); + if (!line) return std::experimental::nullopt; + + if (*line) add_history(line); + std::string r_val(line); + free(line); + return r_val; +} + +#else + +std::experimental::optional ReadLine(const std::string &prompt) { + std::cout << prompt; + std::string line; + std::getline(std::cin, line); + if (std::cin.eof()) return std::experimental::nullopt; + return line; +} + +#endif // HAS_READLINE + +// Repeats the prompt untile the user inputs an integer. +int64_t ReadInt(const std::string &prompt) { + int64_t val = 0; + std::stringstream ss; + do { + auto line = ReadLine(prompt); + if (!line) continue; + ss.str(*line); + ss.clear(); + ss >> val; + } while (ss.fail() || !ss.eof()); + return val; +} + +bool AskYesNo(const std::string &prompt) { + while (auto line = ReadLine(prompt + " (y/n) ")) { + if (*line == "y" || *line == "Y") return true; + if (*line == "n" || *line == "N") return false; + } + return false; +} + +class Timer { + public: + void Start() { + duration_ = duration_.zero(); + start_time_ = std::chrono::steady_clock::now(); + } + + void Pause() { + if (pause_ == 0) { + duration_ += std::chrono::steady_clock::now() - start_time_; + } + ++pause_; + } + + void Resume() { + if (pause_ == 1) { + start_time_ = std::chrono::steady_clock::now(); + } + pause_ = std::max(0, pause_ - 1); + } + + template + auto WithPause(const TFun &fun) { + Pause(); + auto ret = fun(); + Resume(); + return std::move(ret); + } + + std::chrono::duration Elapsed() { + if (pause_ == 0) { + return duration_ + (std::chrono::steady_clock::now() - start_time_); + } + return duration_; + } + + private: + std::chrono::duration duration_; + std::chrono::time_point start_time_; + int pause_ = 0; +}; + +// Dummy DbAccessor which forwards user input for various vertex counts. +class InteractiveDbAccessor { + public: + InteractiveDbAccessor(database::GraphDbAccessor &dba, int64_t vertices_count, + Timer &timer) + : dba_(dba), vertices_count_(vertices_count), timer_(timer) {} + + int64_t VerticesCount() const { return vertices_count_; } + + int64_t VerticesCount(storage::Label label_id) const { + auto label = dba_.LabelName(label_id); + if (label_vertex_count_.find(label) == label_vertex_count_.end()) { + label_vertex_count_[label] = ReadVertexCount("label '" + label + "'"); + } + return label_vertex_count_.at(label); + } + + int64_t VerticesCount(storage::Label label_id, + storage::Property property_id) const { + auto label = dba_.LabelName(label_id); + auto property = dba_.PropertyName(property_id); + auto key = std::make_pair(label, property); + if (label_property_vertex_count_.find(key) == + label_property_vertex_count_.end()) { + label_property_vertex_count_[key] = ReadVertexCount( + "label '" + label + "' and property '" + property + "'"); + } + return label_property_vertex_count_.at(key); + } + + int64_t VerticesCount(storage::Label label_id, storage::Property property_id, + const PropertyValue &value) const { + auto label = dba_.LabelName(label_id); + auto property = dba_.PropertyName(property_id); + auto label_prop = std::make_pair(label, property); + if (label_property_index_.find(label_prop) == label_property_index_.end()) { + return 0; + } + auto &value_vertex_count = property_value_vertex_count_[label_prop]; + if (value_vertex_count.find(value) == value_vertex_count.end()) { + std::stringstream ss; + ss << value; + int64_t count = ReadVertexCount("label '" + label + "' and property '" + + property + "' value '" + ss.str() + "'"); + value_vertex_count[value] = count; + } + return value_vertex_count.at(value); + } + + int64_t VerticesCount( + storage::Label label_id, storage::Property property_id, + const std::experimental::optional> lower, + const std::experimental::optional> upper) + const { + auto label = dba_.LabelName(label_id); + auto property = dba_.PropertyName(property_id); + std::stringstream range_string; + if (lower) { + range_string << (lower->IsInclusive() ? "[" : "(") << lower->value() + << (upper ? "," : ", inf)"); + } else { + range_string << "(-inf, "; + } + if (upper) { + range_string << upper->value() << (upper->IsInclusive() ? "]" : ")"); + } + return ReadVertexCount("label '" + label + "' and property '" + property + + "' in range " + range_string.str()); + } + + bool LabelPropertyIndexExists(storage::Label label_id, + storage::Property property_id) const { + auto label = dba_.LabelName(label_id); + auto property = dba_.PropertyName(property_id); + auto key = std::make_pair(label, property); + if (label_property_index_.find(key) == label_property_index_.end()) { + bool resp = timer_.WithPause([&label, &property]() { + return AskYesNo("Index for ':" + label + "(" + property + ")' exists:"); + }); + label_property_index_[key] = resp; + } + return label_property_index_.at(key); + } + + // Save the cached vertex counts to a stream. + void Save(std::ostream &out) { + out << "vertex-count " << vertices_count_ << std::endl; + out << "label-index-count " << label_vertex_count_.size() << std::endl; + for (const auto &label_count : label_vertex_count_) { + out << " " << label_count.first << " " << label_count.second + << std::endl; + } + auto save_label_prop_map = [&](const auto &name, + const auto &label_prop_map) { + out << name << " " << label_prop_map.size() << std::endl; + for (const auto &label_prop : label_prop_map) { + out << " " << label_prop.first.first << " " << label_prop.first.second + << " " << label_prop.second << std::endl; + } + }; + save_label_prop_map("label-property-index-exists", label_property_index_); + save_label_prop_map("label-property-index-count", + label_property_vertex_count_); + out << "label-property-value-index-count " + << property_value_vertex_count_.size() << std::endl; + for (const auto &prop_value_count : property_value_vertex_count_) { + out << " " << prop_value_count.first.first << " " + << prop_value_count.first.second << " " + << prop_value_count.second.size() << std::endl; + for (const auto &value_count : prop_value_count.second) { + const auto &value = value_count.first; + out << " " << value.type() << " " << value << " " + << value_count.second << std::endl; + } + } + } + + // Load the cached vertex counts from a stream. + // If loading fails, raises utils::BasicException. + void Load(std::istream &in) { + auto load_named_size = [&](const auto &name) { + int size; + in.ignore(std::numeric_limits::max(), ' ') >> size; + if (in.fail()) { + throw utils::BasicException("Unable to load {}", name); + } + DLOG(INFO) << "Load " << name << " " << size; + return size; + }; + vertices_count_ = load_named_size("vertex-count"); + int label_vertex_size = load_named_size("label-index-count"); + for (int i = 0; i < label_vertex_size; ++i) { + std::string label; + int64_t count; + in >> label >> count; + if (in.fail()) { + throw utils::BasicException("Unable to load label count"); + } + label_vertex_count_[label] = count; + DLOG(INFO) << "Load " << label << " " << count; + } + auto load_label_prop_map = [&](const auto &name, auto &label_prop_map) { + int size = load_named_size(name); + for (int i = 0; i < size; ++i) { + std::string label; + std::string property; + in >> label >> property; + auto &mapped = label_prop_map[std::make_pair(label, property)]; + in >> mapped; + if (in.fail()) { + throw utils::BasicException("Unable to load label property"); + } + DLOG(INFO) << "Load " << label << " " << property << " " << mapped; + } + }; + load_label_prop_map("label-property-index-exists", label_property_index_); + load_label_prop_map("label-property-index-count", + label_property_vertex_count_); + int label_property_value_index_size = + load_named_size("label-property-value-index-count"); + for (int i = 0; i < label_property_value_index_size; ++i) { + std::string label; + std::string property; + int64_t value_count; + in >> label >> property >> value_count; + if (in.fail()) { + throw utils::BasicException("Unable to load label property value"); + } + DLOG(INFO) << "Load " << label << " " << property << " " << value_count; + for (int v = 0; v < value_count; ++v) { + auto value = LoadTypedValue(in); + int64_t count; + in >> count; + if (in.fail()) { + throw utils::BasicException("Unable to load label property value"); + } + DLOG(INFO) << "Load " << value.type() << " " << value << " " << count; + property_value_vertex_count_[std::make_pair(label, property)][value] = + count; + } + } + } + + private: + typedef std::pair LabelPropertyKey; + + database::GraphDbAccessor &dba_; + int64_t vertices_count_; + Timer &timer_; + mutable std::map label_vertex_count_; + mutable std::map, int64_t> + label_property_vertex_count_; + mutable std::map, bool> + label_property_index_; + mutable std::map< + std::pair, + std::unordered_map> + property_value_vertex_count_; + // TODO: Cache faked index counts by range. + + int64_t ReadVertexCount(const std::string &message) const { + return timer_.WithPause( + [&message]() { return ReadInt("Vertices with " + message + ": "); }); + } + + query::TypedValue LoadTypedValue(std::istream &in) { + std::string type; + in >> type; + if (type == "bool") { + return LoadTypedValue(in); + } else if (type == "int") { + return LoadTypedValue(in); + } else if (type == "double") { + return LoadTypedValue(in); + } else if (type == "string") { + return LoadTypedValue(in); + } else { + throw utils::BasicException("Unable to read type '{}'", type); + } + } + + template + query::TypedValue LoadTypedValue(std::istream &in) { + T val; + in >> val; + return query::TypedValue(val); + } +}; + +DEFCOMMAND(Top) { + int64_t n_plans = 0; + std::stringstream ss(args[0]); + ss >> n_plans; + if (ss.fail() || !ss.eof()) return; + n_plans = std::min(static_cast(plans.size()), n_plans); + for (int64_t i = 0; i < n_plans; ++i) { + auto &plan_pair = plans[i]; + std::cout << "---- Plan #" << i << " ---- " << std::endl; + std::cout << "cost: " << plan_pair.second << std::endl; + query::plan::PrettyPrint(dba, plan_pair.first.get()); + std::cout << std::endl; + } +} + +DEFCOMMAND(Show) { + int64_t plan_ix = 0; + std::stringstream ss(args[0]); + ss >> plan_ix; + if (ss.fail() || !ss.eof() || plan_ix >= plans.size()) return; + const auto &plan = plans[plan_ix].first; + auto cost = plans[plan_ix].second; + std::cout << "Plan cost: " << cost << std::endl; + query::plan::PrettyPrint(dba, plan.get()); +} + +DEFCOMMAND(Help); + +std::map commands = { + {"top", {TopCommand, 1, "Show top N plans"}}, + {"show", {ShowCommand, 1, "Show the Nth plan"}}, + {"help", {HelpCommand, 0, "Show available commands"}}, +}; + +void AddCommand(const std::string &name, const Command &command) { + commands[name] = command; +} + +DEFCOMMAND(Help) { + std::cout << "Available commands:" << std::endl; + for (const auto &command : commands) { + std::cout << command.first; + for (int i = 1; i <= command.second.arg_count; ++i) { + std::cout << " arg" << i; + } + std::cout << " -- " << command.second.documentation << std::endl; + } +} + +void ExaminePlans( + database::GraphDbAccessor &dba, const query::SymbolTable &symbol_table, + std::vector, + double>> &plans) { + while (true) { + auto line = ReadLine("plan? "); + if (!line || *line == "quit") break; + auto words = utils::Split(utils::ToLowerCase(*line)); + if (words.empty()) continue; + auto command_name = words[0]; + std::vector args(words.begin() + 1, words.end()); + auto command_it = commands.find(command_name); + if (command_it == commands.end()) { + std::cout << "Undefined command: '" << command_name << "'. Try 'help'." + << std::endl; + continue; + } + const auto &command = command_it->second; + if (args.size() < command.arg_count) { + std::cout << command_name << " expects " << command.arg_count + << " arguments" << std::endl; + continue; + } + command.function(dba, symbol_table, plans, args); + } +} + +query::AstStorage MakeAst(const std::string &query, + database::GraphDbAccessor &dba) { + query::ParsingContext parsing_context; + parsing_context.is_query_cached = false; + // query -> AST + auto parser = std::make_unique(query); + // AST -> high level tree + query::frontend::CypherMainVisitor visitor(parsing_context, &dba); + visitor.visit(parser->tree()); + return std::move(visitor.storage()); +} + +query::SymbolTable MakeSymbolTable(const query::AstStorage &ast) { + query::SymbolTable symbol_table; + query::SymbolGenerator symbol_generator(symbol_table); + ast.query()->Accept(symbol_generator); + return symbol_table; +} + +// Returns a list of pairs (plan, estimated cost), sorted in the ascending +// order by cost. +auto MakeLogicalPlans(query::AstStorage &ast, query::SymbolTable &symbol_table, + InteractiveDbAccessor &dba) { + auto query_parts = query::plan::CollectQueryParts(symbol_table, ast); + std::vector, double>> + plans_with_cost; + auto ctx = query::plan::MakePlanningContext(ast, symbol_table, dba); + if (query_parts.query_parts.size() <= 0) { + std::cerr << "Failed to extract query parts" << std::endl; + std::exit(EXIT_FAILURE); + } + auto plans = query::plan::MakeLogicalPlanForSingleQuery< + query::plan::VariableStartPlanner>( + query_parts.query_parts.at(0).single_query_parts, ctx); + query::Parameters parameters; + for (auto plan : plans) { + query::plan::CostEstimator estimator(dba, + parameters); + plan->Accept(estimator); + plans_with_cost.emplace_back(std::move(plan), estimator.cost()); + } + std::stable_sort( + plans_with_cost.begin(), plans_with_cost.end(), + [](const auto &a, const auto &b) { return a.second < b.second; }); + return plans_with_cost; +} + +void RunInteractivePlanning(database::GraphDbAccessor *dba) { + auto in_db_filename = utils::Trim(FLAGS_load_mock_db_file); + if (!in_db_filename.empty() && + !std::experimental::filesystem::exists(in_db_filename)) { + std::cerr << "File '" << in_db_filename << "' does not exist!" << std::endl; + std::exit(EXIT_FAILURE); + } + Timer planning_timer; + InteractiveDbAccessor interactive_db( + *dba, in_db_filename.empty() ? ReadInt("Vertices in DB: ") : 0, + planning_timer); + if (!in_db_filename.empty()) { + std::ifstream db_file(in_db_filename); + interactive_db.Load(db_file); + } + while (true) { + auto line = ReadLine("query? "); + if (!line || *line == "quit") break; + if (line->empty()) continue; + try { + auto ast = MakeAst(*line, *dba); + auto symbol_table = MakeSymbolTable(ast); + planning_timer.Start(); + auto plans = MakeLogicalPlans(ast, symbol_table, interactive_db); + auto planning_time = planning_timer.Elapsed(); + std::cout + << "Planning took " + << std::chrono::duration(planning_time).count() + << "ms" << std::endl; + std::cout << "Generated " << plans.size() << " plans" << std::endl; + ExaminePlans(*dba, symbol_table, plans); + } catch (const utils::BasicException &e) { + std::cout << "Error: " << e.what() << std::endl; + } + } + auto db_filename = utils::Trim(FLAGS_save_mock_db_file); + if (!db_filename.empty()) { + std::ofstream db_file(db_filename); + interactive_db.Save(db_file); + } +} diff --git a/tests/manual/interactive_planning.hpp b/tests/manual/interactive_planning.hpp new file mode 100644 index 000000000..da45f2fa7 --- /dev/null +++ b/tests/manual/interactive_planning.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "query/frontend/semantic/symbol_table.hpp" +#include "query/plan/operator.hpp" + +namespace database { +class GraphDbAccessor; +} + +// Shorthand for a vector of pairs (logical_plan, cost). +typedef std::vector< + std::pair, double>> + PlansWithCost; + +// Encapsulates a consoles command function. +struct Command { + typedef std::vector Args; + // Function of this command + std::function + function; + // Number of arguments the function works with. + int arg_count; + // Explanation of the command. + std::string documentation; +}; + +#define DEFCOMMAND(Name) \ + void Name##Command(database::GraphDbAccessor &dba, \ + const query::SymbolTable &symbol_table, \ + PlansWithCost &plans, const Command::Args &args) + +void AddCommand(const std::string &name, const Command &command); + +void RunInteractivePlanning(database::GraphDbAccessor *dba); diff --git a/tests/manual/query_planner.cpp b/tests/manual/query_planner.cpp index 364731bf9..7ebb664f4 100644 --- a/tests/manual/query_planner.cpp +++ b/tests/manual/query_planner.cpp @@ -1,592 +1,19 @@ -#include -#include -#include -#include -#include -#include -#include -#include +#include "interactive_planning.hpp" -#include "gflags/gflags.h" -#include "glog/logging.h" +#include +#include #include "database/single_node/graph_db.hpp" #include "database/single_node/graph_db_accessor.hpp" -#include "query/context.hpp" -#include "query/frontend/ast/ast.hpp" -#include "query/frontend/ast/cypher_main_visitor.hpp" -#include "query/frontend/opencypher/parser.hpp" -#include "query/frontend/semantic/symbol_generator.hpp" -#include "query/frontend/stripped.hpp" -#include "query/plan/cost_estimator.hpp" -/* TODO: FIXME -#include "query/plan/distributed.hpp" -#include "query/plan/distributed_pretty_print.hpp" -*/ -#include "query/plan/planner.hpp" -#include "query/typed_value.hpp" -#include "utils/hashing/fnv.hpp" -#include "utils/string.hpp" - -DEFINE_string(save_mock_db_file, "", - "File where the mock database should be saved (on exit)"); - -DEFINE_string(load_mock_db_file, "", - "File from which the mock database should be loaded"); DECLARE_int32(min_log_level); -#ifdef HAS_READLINE -// TODO: This is copied from src/query/repl.cpp -// It should probably be moved to some utils file. - -#include "readline/history.h" -#include "readline/readline.h" - -/** - * Helper function that reads a line from the - * standard input using the 'readline' lib. - * Adds support for history and reverse-search. - * - * @param prompt The prompt to display. - * @return A single command the user entered, or nullopt on EOF. - */ -std::experimental::optional ReadLine(const std::string &prompt) { - char *line = readline(prompt.c_str()); - if (!line) return std::experimental::nullopt; - - if (*line) add_history(line); - std::string r_val(line); - free(line); - return r_val; -} - -#else - -std::experimental::optional ReadLine(const std::string &prompt) { - std::cout << prompt; - std::string line; - std::getline(std::cin, line); - if (std::cin.eof()) return std::experimental::nullopt; - return line; -} - -#endif // HAS_READLINE - -// Repeats the prompt untile the user inputs an integer. -int64_t ReadInt(const std::string &prompt) { - int64_t val = 0; - std::stringstream ss; - do { - auto line = ReadLine(prompt); - if (!line) continue; - ss.str(*line); - ss.clear(); - ss >> val; - } while (ss.fail() || !ss.eof()); - return val; -} - -bool AskYesNo(const std::string &prompt) { - while (auto line = ReadLine(prompt + " (y/n) ")) { - if (*line == "y" || *line == "Y") return true; - if (*line == "n" || *line == "N") return false; - } - return false; -} - -class Timer { - public: - void Start() { - duration_ = duration_.zero(); - start_time_ = std::chrono::steady_clock::now(); - } - - void Pause() { - if (pause_ == 0) { - duration_ += std::chrono::steady_clock::now() - start_time_; - } - ++pause_; - } - - void Resume() { - if (pause_ == 1) { - start_time_ = std::chrono::steady_clock::now(); - } - pause_ = std::max(0, pause_ - 1); - } - - template - auto WithPause(const TFun &fun) { - Pause(); - auto ret = fun(); - Resume(); - return std::move(ret); - } - - std::chrono::duration Elapsed() { - if (pause_ == 0) { - return duration_ + (std::chrono::steady_clock::now() - start_time_); - } - return duration_; - } - - private: - std::chrono::duration duration_; - std::chrono::time_point start_time_; - int pause_ = 0; -}; - -// Dummy DbAccessor which forwards user input for various vertex counts. -class InteractiveDbAccessor { - public: - InteractiveDbAccessor(database::GraphDbAccessor &dba, int64_t vertices_count, - Timer &timer) - : dba_(dba), vertices_count_(vertices_count), timer_(timer) {} - - int64_t VerticesCount() const { return vertices_count_; } - - int64_t VerticesCount(storage::Label label_id) const { - auto label = dba_.LabelName(label_id); - if (label_vertex_count_.find(label) == label_vertex_count_.end()) { - label_vertex_count_[label] = ReadVertexCount("label '" + label + "'"); - } - return label_vertex_count_.at(label); - } - - int64_t VerticesCount(storage::Label label_id, - storage::Property property_id) const { - auto label = dba_.LabelName(label_id); - auto property = dba_.PropertyName(property_id); - auto key = std::make_pair(label, property); - if (label_property_vertex_count_.find(key) == - label_property_vertex_count_.end()) { - label_property_vertex_count_[key] = ReadVertexCount( - "label '" + label + "' and property '" + property + "'"); - } - return label_property_vertex_count_.at(key); - } - - int64_t VerticesCount(storage::Label label_id, storage::Property property_id, - const PropertyValue &value) const { - auto label = dba_.LabelName(label_id); - auto property = dba_.PropertyName(property_id); - auto label_prop = std::make_pair(label, property); - if (label_property_index_.find(label_prop) == label_property_index_.end()) { - return 0; - } - auto &value_vertex_count = property_value_vertex_count_[label_prop]; - if (value_vertex_count.find(value) == value_vertex_count.end()) { - std::stringstream ss; - ss << value; - int64_t count = ReadVertexCount("label '" + label + "' and property '" + - property + "' value '" + ss.str() + "'"); - value_vertex_count[value] = count; - } - return value_vertex_count.at(value); - } - - int64_t VerticesCount( - storage::Label label_id, storage::Property property_id, - const std::experimental::optional> lower, - const std::experimental::optional> upper) - const { - auto label = dba_.LabelName(label_id); - auto property = dba_.PropertyName(property_id); - std::stringstream range_string; - if (lower) { - range_string << (lower->IsInclusive() ? "[" : "(") << lower->value() - << (upper ? "," : ", inf)"); - } else { - range_string << "(-inf, "; - } - if (upper) { - range_string << upper->value() << (upper->IsInclusive() ? "]" : ")"); - } - return ReadVertexCount("label '" + label + "' and property '" + property + - "' in range " + range_string.str()); - } - - bool LabelPropertyIndexExists(storage::Label label_id, - storage::Property property_id) const { - auto label = dba_.LabelName(label_id); - auto property = dba_.PropertyName(property_id); - auto key = std::make_pair(label, property); - if (label_property_index_.find(key) == label_property_index_.end()) { - bool resp = timer_.WithPause([&label, &property]() { - return AskYesNo("Index for ':" + label + "(" + property + ")' exists:"); - }); - label_property_index_[key] = resp; - } - return label_property_index_.at(key); - } - - // Save the cached vertex counts to a stream. - void Save(std::ostream &out) { - out << "vertex-count " << vertices_count_ << std::endl; - out << "label-index-count " << label_vertex_count_.size() << std::endl; - for (const auto &label_count : label_vertex_count_) { - out << " " << label_count.first << " " << label_count.second - << std::endl; - } - auto save_label_prop_map = [&](const auto &name, - const auto &label_prop_map) { - out << name << " " << label_prop_map.size() << std::endl; - for (const auto &label_prop : label_prop_map) { - out << " " << label_prop.first.first << " " << label_prop.first.second - << " " << label_prop.second << std::endl; - } - }; - save_label_prop_map("label-property-index-exists", label_property_index_); - save_label_prop_map("label-property-index-count", - label_property_vertex_count_); - out << "label-property-value-index-count " - << property_value_vertex_count_.size() << std::endl; - for (const auto &prop_value_count : property_value_vertex_count_) { - out << " " << prop_value_count.first.first << " " - << prop_value_count.first.second << " " - << prop_value_count.second.size() << std::endl; - for (const auto &value_count : prop_value_count.second) { - const auto &value = value_count.first; - out << " " << value.type() << " " << value << " " - << value_count.second << std::endl; - } - } - } - - // Load the cached vertex counts from a stream. - // If loading fails, raises utils::BasicException. - void Load(std::istream &in) { - auto load_named_size = [&](const auto &name) { - int size; - in.ignore(std::numeric_limits::max(), ' ') >> size; - if (in.fail()) { - throw utils::BasicException("Unable to load {}", name); - } - DLOG(INFO) << "Load " << name << " " << size; - return size; - }; - vertices_count_ = load_named_size("vertex-count"); - int label_vertex_size = load_named_size("label-index-count"); - for (int i = 0; i < label_vertex_size; ++i) { - std::string label; - int64_t count; - in >> label >> count; - if (in.fail()) { - throw utils::BasicException("Unable to load label count"); - } - label_vertex_count_[label] = count; - DLOG(INFO) << "Load " << label << " " << count; - } - auto load_label_prop_map = [&](const auto &name, auto &label_prop_map) { - int size = load_named_size(name); - for (int i = 0; i < size; ++i) { - std::string label; - std::string property; - in >> label >> property; - auto &mapped = label_prop_map[std::make_pair(label, property)]; - in >> mapped; - if (in.fail()) { - throw utils::BasicException("Unable to load label property"); - } - DLOG(INFO) << "Load " << label << " " << property << " " << mapped; - } - }; - load_label_prop_map("label-property-index-exists", label_property_index_); - load_label_prop_map("label-property-index-count", - label_property_vertex_count_); - int label_property_value_index_size = - load_named_size("label-property-value-index-count"); - for (int i = 0; i < label_property_value_index_size; ++i) { - std::string label; - std::string property; - int64_t value_count; - in >> label >> property >> value_count; - if (in.fail()) { - throw utils::BasicException("Unable to load label property value"); - } - DLOG(INFO) << "Load " << label << " " << property << " " << value_count; - for (int v = 0; v < value_count; ++v) { - auto value = LoadTypedValue(in); - int64_t count; - in >> count; - if (in.fail()) { - throw utils::BasicException("Unable to load label property value"); - } - DLOG(INFO) << "Load " << value.type() << " " << value << " " << count; - property_value_vertex_count_[std::make_pair(label, property)][value] = - count; - } - } - } - - private: - typedef std::pair LabelPropertyKey; - - database::GraphDbAccessor &dba_; - int64_t vertices_count_; - Timer &timer_; - mutable std::map label_vertex_count_; - mutable std::map, int64_t> - label_property_vertex_count_; - mutable std::map, bool> - label_property_index_; - mutable std::map< - std::pair, - std::unordered_map> - property_value_vertex_count_; - // TODO: Cache faked index counts by range. - - int64_t ReadVertexCount(const std::string &message) const { - return timer_.WithPause( - [&message]() { return ReadInt("Vertices with " + message + ": "); }); - } - - query::TypedValue LoadTypedValue(std::istream &in) { - std::string type; - in >> type; - if (type == "bool") { - return LoadTypedValue(in); - } else if (type == "int") { - return LoadTypedValue(in); - } else if (type == "double") { - return LoadTypedValue(in); - } else if (type == "string") { - return LoadTypedValue(in); - } else { - throw utils::BasicException("Unable to read type '{}'", type); - } - } - - template - query::TypedValue LoadTypedValue(std::istream &in) { - T val; - in >> val; - return query::TypedValue(val); - } -}; - -// Shorthand for a vector of pairs (logical_plan, cost). -typedef std::vector< - std::pair, double>> - PlansWithCost; - -// Encapsulates a consoles command function. -struct Command { - typedef std::vector Args; - // Function of this command - std::function - function; - // Number of arguments the function works with. - int arg_count; - // Explanation of the command. - std::string documentation; -}; - -#define DEFCOMMAND(Name) \ - void Name##Command(database::GraphDbAccessor &dba, \ - const query::SymbolTable &symbol_table, \ - PlansWithCost &plans, const Command::Args &args) - -DEFCOMMAND(Top) { - int64_t n_plans = 0; - std::stringstream ss(args[0]); - ss >> n_plans; - if (ss.fail() || !ss.eof()) return; - n_plans = std::min(static_cast(plans.size()), n_plans); - for (int64_t i = 0; i < n_plans; ++i) { - auto &plan_pair = plans[i]; - std::cout << "---- Plan #" << i << " ---- " << std::endl; - std::cout << "cost: " << plan_pair.second << std::endl; - // TODO: Was `DistributedPrettyPrint` - query::plan::PrettyPrint(dba, plan_pair.first.get()); - std::cout << std::endl; - } -} - -DEFCOMMAND(Show) { - int64_t plan_ix = 0; - std::stringstream ss(args[0]); - ss >> plan_ix; - if (ss.fail() || !ss.eof() || plan_ix >= plans.size()) return; - const auto &plan = plans[plan_ix].first; - auto cost = plans[plan_ix].second; - std::cout << "Plan cost: " << cost << std::endl; - // TODO: Was `DistributedPrettyPrint` - query::plan::PrettyPrint(dba, plan.get()); -} - -/* TODO: FIXME -DEFCOMMAND(ShowDistributed) { - int64_t plan_ix = 0; - std::stringstream ss(args[0]); - ss >> plan_ix; - if (ss.fail() || !ss.eof() || plan_ix >= plans.size()) return; - const auto &plan = plans[plan_ix].first; - std::atomic plan_id{0}; - auto distributed_plan = MakeDistributedPlan(*plan, symbol_table, plan_id); - { - std::cout << "---- Master Plan ---- " << std::endl; - query::plan::DistributedPrettyPrint(dba, distributed_plan.master_plan.get()); - std::cout << std::endl; - } - for (size_t i = 0; i < distributed_plan.worker_plans.size(); ++i) { - int64_t id; - std::shared_ptr worker_plan; - std::tie(id, worker_plan) = distributed_plan.worker_plans[i]; - std::cout << "---- Worker Plan #" << id << " ---- " << std::endl; - query::plan::DistributedPrettyPrint(dba, worker_plan.get()); - std::cout << std::endl; - } -} -*/ - -DEFCOMMAND(Help); - -std::map commands = { - {"top", {TopCommand, 1, "Show top N plans"}}, - {"show", {ShowCommand, 1, "Show the Nth plan"}}, - /* TODO: FIXME - {"show-distributed", - {ShowDistributedCommand, 1, - "Show the Nth plan as for distributed execution"}}, - */ - {"help", {HelpCommand, 0, "Show available commands"}}, -}; - -DEFCOMMAND(Help) { - std::cout << "Available commands:" << std::endl; - for (const auto &command : commands) { - std::cout << command.first; - for (int i = 1; i <= command.second.arg_count; ++i) { - std::cout << " arg" << i; - } - std::cout << " -- " << command.second.documentation << std::endl; - } -} - -#undef DEFCOMMAND - -void ExaminePlans( - database::GraphDbAccessor &dba, const query::SymbolTable &symbol_table, - std::vector, - double>> &plans) { - while (true) { - auto line = ReadLine("plan? "); - if (!line || *line == "quit") break; - auto words = utils::Split(utils::ToLowerCase(*line)); - if (words.empty()) continue; - auto command_name = words[0]; - std::vector args(words.begin() + 1, words.end()); - auto command_it = commands.find(command_name); - if (command_it == commands.end()) { - std::cout << "Undefined command: '" << command_name << "'. Try 'help'." - << std::endl; - continue; - } - const auto &command = command_it->second; - if (args.size() < command.arg_count) { - std::cout << command_name << " expects " << command.arg_count - << " arguments" << std::endl; - continue; - } - command.function(dba, symbol_table, plans, args); - } -} - -query::AstStorage MakeAst(const std::string &query, - database::GraphDbAccessor &dba) { - query::ParsingContext parsing_context; - parsing_context.is_query_cached = false; - // query -> AST - auto parser = std::make_unique(query); - // AST -> high level tree - query::frontend::CypherMainVisitor visitor(parsing_context, &dba); - visitor.visit(parser->tree()); - return std::move(visitor.storage()); -} - -query::SymbolTable MakeSymbolTable(const query::AstStorage &ast) { - query::SymbolTable symbol_table; - query::SymbolGenerator symbol_generator(symbol_table); - ast.query()->Accept(symbol_generator); - return symbol_table; -} - -// Returns a list of pairs (plan, estimated cost), sorted in the ascending -// order by cost. -auto MakeLogicalPlans(query::AstStorage &ast, query::SymbolTable &symbol_table, - InteractiveDbAccessor &dba) { - auto query_parts = query::plan::CollectQueryParts(symbol_table, ast); - std::vector, double>> - plans_with_cost; - auto ctx = query::plan::MakePlanningContext(ast, symbol_table, dba); - if (query_parts.query_parts.size() <= 0) { - std::cerr << "Failed to extract query parts" << std::endl; - std::exit(EXIT_FAILURE); - } - auto plans = query::plan::MakeLogicalPlanForSingleQuery< - query::plan::VariableStartPlanner>( - query_parts.query_parts.at(0).single_query_parts, ctx); - query::Parameters parameters; - for (auto plan : plans) { - query::plan::CostEstimator estimator(dba, - parameters); - plan->Accept(estimator); - plans_with_cost.emplace_back(std::move(plan), estimator.cost()); - } - std::stable_sort( - plans_with_cost.begin(), plans_with_cost.end(), - [](const auto &a, const auto &b) { return a.second < b.second; }); - return plans_with_cost; -} - int main(int argc, char *argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, true); FLAGS_min_log_level = google::ERROR; google::InitGoogleLogging(argv[0]); - auto in_db_filename = utils::Trim(FLAGS_load_mock_db_file); - if (!in_db_filename.empty() && - !std::experimental::filesystem::exists(in_db_filename)) { - std::cerr << "File '" << in_db_filename << "' does not exist!" << std::endl; - std::exit(EXIT_FAILURE); - } database::GraphDb db; auto dba = db.Access(); - Timer planning_timer; - InteractiveDbAccessor interactive_db( - *dba, in_db_filename.empty() ? ReadInt("Vertices in DB: ") : 0, - planning_timer); - if (!in_db_filename.empty()) { - std::ifstream db_file(in_db_filename); - interactive_db.Load(db_file); - } - while (true) { - auto line = ReadLine("query? "); - if (!line || *line == "quit") break; - if (line->empty()) continue; - try { - auto ast = MakeAst(*line, *dba); - auto symbol_table = MakeSymbolTable(ast); - planning_timer.Start(); - auto plans = MakeLogicalPlans(ast, symbol_table, interactive_db); - auto planning_time = planning_timer.Elapsed(); - std::cout - << "Planning took " - << std::chrono::duration(planning_time).count() - << "ms" << std::endl; - std::cout << "Generated " << plans.size() << " plans" << std::endl; - ExaminePlans(*dba, symbol_table, plans); - } catch (const utils::BasicException &e) { - std::cout << "Error: " << e.what() << std::endl; - } - } - auto db_filename = utils::Trim(FLAGS_save_mock_db_file); - if (!db_filename.empty()) { - std::ofstream db_file(db_filename); - interactive_db.Save(db_file); - } + RunInteractivePlanning(dba.get()); return 0; }