Provide textual information for inefficient plans with notifications (#1343)

This commit is contained in:
Josipmrden 2023-10-24 22:20:05 +02:00 committed by GitHub
parent be16ca7362
commit e617ff9b59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 598 additions and 97 deletions

View File

@ -4,7 +4,8 @@ include(GNUInstallDirs)
include(ProcessorCount)
ProcessorCount(NPROC)
if (NPROC EQUAL 0)
if(NPROC EQUAL 0)
set(NPROC 1)
endif()
@ -12,6 +13,7 @@ find_package(Boost 1.78 REQUIRED)
find_package(BZip2 1.0.6 REQUIRED)
find_package(Threads REQUIRED)
set(GFLAGS_NOTHREADS OFF)
# NOTE: config/generate.py depends on the gflags help XML format.
find_package(gflags REQUIRED)
find_package(fmt 8.0.1)
@ -23,24 +25,27 @@ set(LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR})
function(import_header_library name include_dir)
add_library(${name} INTERFACE IMPORTED GLOBAL)
set_property(TARGET ${name} PROPERTY
INTERFACE_INCLUDE_DIRECTORIES ${include_dir})
INTERFACE_INCLUDE_DIRECTORIES ${include_dir})
string(TOUPPER ${name} _upper_name)
set(${_upper_name}_INCLUDE_DIR ${include_dir} CACHE FILEPATH
"Path to ${name} include directory" FORCE)
"Path to ${name} include directory" FORCE)
mark_as_advanced(${_upper_name}_INCLUDE_DIR)
add_library(lib::${name} ALIAS ${name})
endfunction(import_header_library)
function(import_library name type location include_dir)
add_library(${name} ${type} IMPORTED GLOBAL)
if (${ARGN})
if(${ARGN})
# Optional argument is the name of the external project that we need to
# depend on.
add_dependencies(${name} ${ARGN0})
else()
add_dependencies(${name} ${name}-proj)
endif()
set_property(TARGET ${name} PROPERTY IMPORTED_LOCATION ${location})
# We need to create the include directory first in order to be able to add it
# as an include directory. The header files in the include directory will be
# generated later during the build process.
@ -60,29 +65,34 @@ function(add_external_project name)
set(options NO_C_COMPILER)
set(one_value_kwargs SOURCE_DIR BUILD_IN_SOURCE)
set(multi_value_kwargs CMAKE_ARGS DEPENDS INSTALL_COMMAND BUILD_COMMAND
CONFIGURE_COMMAND)
CONFIGURE_COMMAND)
cmake_parse_arguments(KW "${options}" "${one_value_kwargs}" "${multi_value_kwargs}" ${ARGN})
set(source_dir ${CMAKE_CURRENT_SOURCE_DIR}/${name})
if (KW_SOURCE_DIR)
if(KW_SOURCE_DIR)
set(source_dir ${KW_SOURCE_DIR})
endif()
set(build_in_source 0)
if (KW_BUILD_IN_SOURCE)
if(KW_BUILD_IN_SOURCE)
set(build_in_source ${KW_BUILD_IN_SOURCE})
endif()
if (NOT KW_NO_C_COMPILER)
if(NOT KW_NO_C_COMPILER)
set(KW_CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} ${KW_CMAKE_ARGS})
endif()
ExternalProject_Add(${name}-proj DEPENDS ${KW_DEPENDS}
PREFIX ${source_dir} SOURCE_DIR ${source_dir}
BUILD_IN_SOURCE ${build_in_source}
CONFIGURE_COMMAND ${KW_CONFIGURE_COMMAND}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_INSTALL_PREFIX=${source_dir}
${KW_CMAKE_ARGS}
INSTALL_COMMAND ${KW_INSTALL_COMMAND}
BUILD_COMMAND ${KW_BUILD_COMMAND})
PREFIX ${source_dir} SOURCE_DIR ${source_dir}
BUILD_IN_SOURCE ${build_in_source}
CONFIGURE_COMMAND ${KW_CONFIGURE_COMMAND}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_INSTALL_PREFIX=${source_dir}
${KW_CMAKE_ARGS}
INSTALL_COMMAND ${KW_INSTALL_COMMAND}
BUILD_COMMAND ${KW_BUILD_COMMAND})
endfunction(add_external_project)
# Calls `add_external_project`, sets NAME_LIBRARY, NAME_INCLUDE_DIR variables
@ -91,9 +101,9 @@ macro(import_external_library name type library_location include_dir)
add_external_project(${name} ${ARGN})
string(TOUPPER ${name} _upper_name)
set(${_upper_name}_LIBRARY ${library_location} CACHE FILEPATH
"Path to ${name} library" FORCE)
"Path to ${name} library" FORCE)
set(${_upper_name}_INCLUDE_DIR ${include_dir} CACHE FILEPATH
"Path to ${name} include directory" FORCE)
"Path to ${name} include directory" FORCE)
mark_as_advanced(${_upper_name}_LIBRARY ${_upper_name}_INCLUDE_DIR)
import_library(${name} ${type} ${${_upper_name}_LIBRARY} ${${_upper_name}_INCLUDE_DIR})
endmacro(import_external_library)
@ -115,10 +125,10 @@ import_external_library(antlr4 STATIC
${CMAKE_CURRENT_SOURCE_DIR}/antlr4/runtime/Cpp/include/antlr4-runtime
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/antlr4/runtime/Cpp
CMAKE_ARGS # http://stackoverflow.com/questions/37096062/get-a-basic-c-program-to-compile-using-clang-on-ubuntu-16/38385967#38385967
-DWITH_LIBCXX=OFF # because of debian bug
-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=true
-DCMAKE_CXX_STANDARD=20
-DANTLR_BUILD_CPP_TESTS=OFF
-DWITH_LIBCXX=OFF # because of debian bug
-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=true
-DCMAKE_CXX_STANDARD=20
-DANTLR_BUILD_CPP_TESTS=OFF
BUILD_COMMAND $(MAKE) antlr4_static
INSTALL_COMMAND $(MAKE) install)
@ -126,6 +136,7 @@ import_external_library(antlr4 STATIC
import_external_library(benchmark STATIC
${CMAKE_CURRENT_SOURCE_DIR}/benchmark/${CMAKE_INSTALL_LIBDIR}/libbenchmark.a
${CMAKE_CURRENT_SOURCE_DIR}/benchmark/include
# Skip testing. The tests don't compile with Clang 8.
CMAKE_ARGS -DBENCHMARK_ENABLE_TESTING=OFF)
@ -141,15 +152,15 @@ add_subdirectory(rapidcheck EXCLUDE_FROM_ALL)
# setup google test
add_external_project(gtest SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/googletest)
set(GTEST_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/googletest/include
CACHE PATH "Path to gtest and gmock include directory" FORCE)
CACHE PATH "Path to gtest and gmock include directory" FORCE)
set(GMOCK_LIBRARY ${CMAKE_CURRENT_SOURCE_DIR}/googletest/lib/libgmock.a
CACHE FILEPATH "Path to gmock library" FORCE)
CACHE FILEPATH "Path to gmock library" FORCE)
set(GMOCK_MAIN_LIBRARY ${CMAKE_CURRENT_SOURCE_DIR}/googletest/lib/libgmock_main.a
CACHE FILEPATH "Path to gmock_main library" FORCE)
CACHE FILEPATH "Path to gmock_main library" FORCE)
set(GTEST_LIBRARY ${CMAKE_CURRENT_SOURCE_DIR}/googletest/lib/libgtest.a
CACHE FILEPATH "Path to gtest library" FORCE)
CACHE FILEPATH "Path to gtest library" FORCE)
set(GTEST_MAIN_LIBRARY ${CMAKE_CURRENT_SOURCE_DIR}/googletest/lib/libgtest_main.a
CACHE FILEPATH "Path to gtest_main library" FORCE)
CACHE FILEPATH "Path to gtest_main library" FORCE)
mark_as_advanced(GTEST_INCLUDE_DIR GMOCK_LIBRARY GMOCK_MAIN_LIBRARY GTEST_LIBRARY GTEST_MAIN_LIBRARY)
import_library(gtest STATIC ${GTEST_LIBRARY} ${GTEST_INCLUDE_DIR} gtest-proj)
import_library(gtest_main STATIC ${GTEST_MAIN_LIBRARY} ${GTEST_INCLUDE_DIR} gtest-proj)
@ -167,10 +178,10 @@ import_external_library(rocksdb STATIC
${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/lib/librocksdb.a
${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/include
CMAKE_ARGS -DUSE_RTTI=ON
-DWITH_TESTS=OFF
-DGFLAGS_NOTHREADS=OFF
-DCMAKE_INSTALL_LIBDIR=lib
-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=true
-DWITH_TESTS=OFF
-DGFLAGS_NOTHREADS=OFF
-DCMAKE_INSTALL_LIBDIR=lib
-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=true
BUILD_COMMAND $(MAKE) rocksdb)
# Setup libbcrypt
@ -179,8 +190,8 @@ import_external_library(libbcrypt STATIC
${CMAKE_CURRENT_SOURCE_DIR}/libbcrypt
CONFIGURE_COMMAND sed s/-Wcast-align// -i ${CMAKE_CURRENT_SOURCE_DIR}/libbcrypt/crypt_blowfish/Makefile
BUILD_COMMAND make -C ${CMAKE_CURRENT_SOURCE_DIR}/libbcrypt
CC=${CMAKE_C_COMPILER}
CXX=${CMAKE_CXX_COMPILER}
CC=${CMAKE_C_COMPILER}
CXX=${CMAKE_CXX_COMPILER}
INSTALL_COMMAND true)
# Setup mgclient
@ -188,16 +199,16 @@ import_external_library(mgclient STATIC
${CMAKE_CURRENT_SOURCE_DIR}/mgclient/lib/libmgclient.a
${CMAKE_CURRENT_SOURCE_DIR}/mgclient/include
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DBUILD_TESTING=OFF
-DBUILD_CPP_BINDINGS=ON)
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DBUILD_TESTING=OFF
-DBUILD_CPP_BINDINGS=ON)
find_package(OpenSSL REQUIRED)
target_link_libraries(mgclient INTERFACE ${OPENSSL_LIBRARIES})
add_external_project(mgconsole
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/mgconsole
CMAKE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_BINARY_DIR}
-DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_BINARY_DIR}
BUILD_COMMAND $(MAKE) mgconsole)
add_custom_target(mgconsole DEPENDS mgconsole-proj)
@ -214,14 +225,15 @@ import_external_library(librdkafka STATIC
${CMAKE_CURRENT_SOURCE_DIR}/librdkafka/lib/librdkafka.a
${CMAKE_CURRENT_SOURCE_DIR}/librdkafka/include
CMAKE_ARGS -DRDKAFKA_BUILD_STATIC=ON
-DRDKAFKA_BUILD_EXAMPLES=OFF
-DRDKAFKA_BUILD_TESTS=OFF
-DWITH_ZSTD=OFF
-DENABLE_LZ4_EXT=OFF
-DCMAKE_INSTALL_LIBDIR=lib
-DWITH_SSL=ON
# If we want SASL, we need to install it on build machines
-DWITH_SASL=OFF)
-DRDKAFKA_BUILD_EXAMPLES=OFF
-DRDKAFKA_BUILD_TESTS=OFF
-DWITH_ZSTD=OFF
-DENABLE_LZ4_EXT=OFF
-DCMAKE_INSTALL_LIBDIR=lib
-DWITH_SSL=ON
# If we want SASL, we need to install it on build machines
-DWITH_SASL=OFF)
target_link_libraries(librdkafka INTERFACE ${OPENSSL_LIBRARIES} ZLIB::ZLIB)
import_library(librdkafka++ STATIC
@ -242,24 +254,24 @@ import_external_library(pulsar STATIC
${CMAKE_CURRENT_SOURCE_DIR}/pulsar/install/include
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND cmake pulsar-client-cpp
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/pulsar/install
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DBUILD_DYNAMIC_LIB=OFF
-DBUILD_STATIC_LIB=ON
-DBUILD_TESTS=OFF
-DLINK_STATIC=ON
-DPROTOC_PATH=${PROTOBUF_ROOT}/bin/protoc
-DBOOST_ROOT=${BOOST_ROOT}
-DCMAKE_PREFIX_PATH=${PROTOBUF_ROOT}
-DProtobuf_INCLUDE_DIRS=${PROTOBUF_ROOT}/include
-DBUILD_PYTHON_WRAPPER=OFF
-DBUILD_PERF_TOOLS=OFF
-DUSE_LOG4CXX=OFF
BUILD_COMMAND $(MAKE) pulsarStaticWithDeps)
-DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_SOURCE_DIR}/pulsar/install
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DBUILD_DYNAMIC_LIB=OFF
-DBUILD_STATIC_LIB=ON
-DBUILD_TESTS=OFF
-DLINK_STATIC=ON
-DPROTOC_PATH=${PROTOBUF_ROOT}/bin/protoc
-DBOOST_ROOT=${BOOST_ROOT}
-DCMAKE_PREFIX_PATH=${PROTOBUF_ROOT}
-DProtobuf_INCLUDE_DIRS=${PROTOBUF_ROOT}/include
-DBUILD_PYTHON_WRAPPER=OFF
-DBUILD_PERF_TOOLS=OFF
-DUSE_LOG4CXX=OFF
BUILD_COMMAND $(MAKE) pulsarStaticWithDeps)
add_dependencies(pulsar-proj protobuf)
if (${MG_ARCH} STREQUAL "ARM64")
if(${MG_ARCH} STREQUAL "ARM64")
set(MG_LIBRDTSC_CMAKE_ARGS -DLIBRDTSC_ARCH_x86=OFF -DLIBRDTSC_ARCH_ARM64=ON)
endif()
@ -280,3 +292,5 @@ add_subdirectory(absl EXCLUDE_FROM_ALL)
set_path_external_library(jemalloc STATIC
${CMAKE_CURRENT_SOURCE_DIR}/jemalloc/lib/libjemalloc.a
${CMAKE_CURRENT_SOURCE_DIR}/jemalloc/include/)
import_header_library(rangev3 ${CMAKE_CURRENT_SOURCE_DIR}/rangev3/include)

View File

@ -125,6 +125,7 @@ declare -A primary_urls=(
["ctre"]="http://$local_cache_host/file/hanickadot/compile-time-regular-expressions/v3.7.2/single-header/ctre.hpp"
["absl"]="https://$local_cache_host/git/abseil-cpp.git"
["jemalloc"]="https://$local_cache_host/git/jemalloc.git"
["range-v3"]="https://$local_cache_host/git/ericniebler/range-v3.git"
)
# The goal of secondary urls is to have links to the "source of truth" of
@ -153,6 +154,7 @@ declare -A secondary_urls=(
["ctre"]="https://raw.githubusercontent.com/hanickadot/compile-time-regular-expressions/v3.7.2/single-header/ctre.hpp"
["absl"]="https://github.com/abseil/abseil-cpp.git"
["jemalloc"]="https://github.com/jemalloc/jemalloc.git"
["range-v3"]="https://github.com/ericniebler/range-v3.git"
)
# antlr
@ -255,7 +257,6 @@ cd ..
absl_ref="20230125.3"
repo_clone_try_double "${primary_urls[absl]}" "${secondary_urls[absl]}" "absl" "$absl_ref"
# jemalloc ea6b3e973b477b8061e0076bb257dbd7f3faa756
JEMALLOC_COMMIT_VERSION="5.2.1"
repo_clone_try_double "${secondary_urls[jemalloc]}" "${secondary_urls[jemalloc]}" "jemalloc" "$JEMALLOC_COMMIT_VERSION"
@ -272,3 +273,7 @@ MALLOC_CONF="retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_m
make -j$CPUS install
popd
#range-v3 release-0.12.0
range_v3_ref="release-0.12.0"
repo_clone_try_double "${primary_urls[range-v3]}" "${secondary_urls[range-v3]}" "rangev3" "$range_v3_ref"

View File

@ -15,6 +15,7 @@ set(mg_query_sources
interpret/eval.cpp
interpreter.cpp
metadata.cpp
plan/hint_provider.cpp
plan/operator.cpp
plan/preprocess.cpp
plan/pretty_print.cpp
@ -44,22 +45,25 @@ set(mg_query_sources
add_library(mg-query STATIC ${mg_query_sources})
target_include_directories(mg-query PUBLIC ${CMAKE_SOURCE_DIR}/include)
target_link_libraries(mg-query PUBLIC dl
cppitertools
Python3::Python
mg-integrations-pulsar
mg-integrations-kafka
mg-storage-v2
mg-license
mg-utils
mg-kvstore
mg-memory
mg::csv
mg-flags
mg-dbms
mg-events)
cppitertools
rangev3
Python3::Python
mg-integrations-pulsar
mg-integrations-kafka
mg-storage-v2
mg-license
mg-utils
mg-kvstore
mg-memory
mg::csv
mg-flags
mg-dbms
mg-events)
if(NOT "${MG_PYTHON_PATH}" STREQUAL "")
set(Python3_ROOT_DIR "${MG_PYTHON_PATH}")
endif()
if("${MG_PYTHON_VERSION}" STREQUAL "")
find_package(Python3 3.5 REQUIRED COMPONENTS Development)
else()
@ -67,7 +71,6 @@ else()
endif()
# Generate Antlr openCypher parser
set(opencypher_frontend ${CMAKE_CURRENT_SOURCE_DIR}/frontend/opencypher)
set(opencypher_generated ${opencypher_frontend}/generated)
set(opencypher_lexer_grammar ${opencypher_frontend}/grammar/MemgraphCypherLexer.g4)
@ -90,15 +93,15 @@ add_custom_command(
OUTPUT ${antlr_opencypher_generated_src} ${antlr_opencypher_generated_include}
COMMAND ${CMAKE_COMMAND} -E make_directory ${opencypher_generated}
COMMAND
java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.10.1-complete.jar
-Dlanguage=Cpp -visitor -package antlropencypher
-o ${opencypher_generated}
${opencypher_lexer_grammar} ${opencypher_parser_grammar}
java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.10.1-complete.jar
-Dlanguage=Cpp -visitor -package antlropencypher
-o ${opencypher_generated}
${opencypher_lexer_grammar} ${opencypher_parser_grammar}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
DEPENDS
${opencypher_lexer_grammar} ${opencypher_parser_grammar}
${opencypher_frontend}/grammar/CypherLexer.g4
${opencypher_frontend}/grammar/Cypher.g4)
${opencypher_lexer_grammar} ${opencypher_parser_grammar}
${opencypher_frontend}/grammar/CypherLexer.g4
${opencypher_frontend}/grammar/Cypher.g4)
add_custom_target(generate_opencypher_parser
DEPENDS ${antlr_opencypher_generated_src} ${antlr_opencypher_generated_include})

View File

@ -56,6 +56,7 @@
#include "query/interpret/eval.hpp"
#include "query/interpret/frame.hpp"
#include "query/metadata.hpp"
#include "query/plan/hint_provider.hpp"
#include "query/plan/planner.hpp"
#include "query/plan/profile.hpp"
#include "query/plan/vertex_count_cache.hpp"
@ -1610,6 +1611,11 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string,
auto plan = CypherQueryToPlan(parsed_query.stripped_query.hash(), std::move(parsed_query.ast_storage), cypher_query,
parsed_query.parameters, plan_cache, dba);
auto hints = plan::ProvidePlanHints(&plan->plan(), plan->symbol_table());
for (const auto &hint : hints) {
notifications->emplace_back(SeverityLevel::INFO, NotificationCode::PLAN_HINTING, hint);
}
TryCaching(plan->ast_storage(), frame_change_collector);
summary->insert_or_assign("cost_estimate", plan->cost());
auto rw_type_checker = plan::ReadWriteTypeChecker();
@ -1646,7 +1652,8 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string,
}
PreparedQuery PrepareExplainQuery(ParsedQuery parsed_query, std::map<std::string, TypedValue> *summary,
InterpreterContext *interpreter_context, CurrentDB &current_db) {
std::vector<Notification> *notifications, InterpreterContext *interpreter_context,
CurrentDB &current_db) {
const std::string kExplainQueryStart = "explain ";
MG_ASSERT(utils::StartsWith(utils::ToLowerCase(parsed_query.stripped_query.query()), kExplainQueryStart),
"Expected stripped query to start with '{}'", kExplainQueryStart);
@ -1673,6 +1680,11 @@ PreparedQuery PrepareExplainQuery(ParsedQuery parsed_query, std::map<std::string
CypherQueryToPlan(parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage),
cypher_query, parsed_inner_query.parameters, plan_cache, dba);
auto hints = plan::ProvidePlanHints(&cypher_query_plan->plan(), cypher_query_plan->symbol_table());
for (const auto &hint : hints) {
notifications->emplace_back(SeverityLevel::INFO, NotificationCode::PLAN_HINTING, hint);
}
std::stringstream printed_plan;
plan::PrettyPrint(*dba, &cypher_query_plan->plan(), &printed_plan);
@ -1696,9 +1708,9 @@ PreparedQuery PrepareExplainQuery(ParsedQuery parsed_query, std::map<std::string
}
PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_transaction,
std::map<std::string, TypedValue> *summary, InterpreterContext *interpreter_context,
CurrentDB &current_db, utils::MemoryResource *execution_memory,
std::optional<std::string> const &username,
std::map<std::string, TypedValue> *summary, std::vector<Notification> *notifications,
InterpreterContext *interpreter_context, CurrentDB &current_db,
utils::MemoryResource *execution_memory, std::optional<std::string> const &username,
std::atomic<TransactionStatus> *transaction_status,
std::shared_ptr<utils::AsyncTimer> tx_timer,
FrameChangeCollector *frame_change_collector) {
@ -1766,6 +1778,12 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra
CypherQueryToPlan(parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage),
cypher_query, parsed_inner_query.parameters, plan_cache, dba);
TryCaching(cypher_query_plan->ast_storage(), frame_change_collector);
auto hints = plan::ProvidePlanHints(&cypher_query_plan->plan(), cypher_query_plan->symbol_table());
for (const auto &hint : hints) {
notifications->emplace_back(SeverityLevel::INFO, NotificationCode::PLAN_HINTING, hint);
}
auto rw_type_checker = plan::ReadWriteTypeChecker();
rw_type_checker.InferRWType(const_cast<plan::LogicalOperator &>(cypher_query_plan->plan()));
@ -3732,13 +3750,13 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
current_db_, memory_resource, &query_execution->notifications, username_,
&transaction_status_, current_timeout_timer_, &*frame_change_collector_);
} else if (utils::Downcast<ExplainQuery>(parsed_query.query)) {
prepared_query =
PrepareExplainQuery(std::move(parsed_query), &query_execution->summary, interpreter_context_, current_db_);
prepared_query = PrepareExplainQuery(std::move(parsed_query), &query_execution->summary,
&query_execution->notifications, interpreter_context_, current_db_);
} else if (utils::Downcast<ProfileQuery>(parsed_query.query)) {
prepared_query =
PrepareProfileQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->summary,
interpreter_context_, current_db_, &query_execution->execution_memory_with_exception,
username_, &transaction_status_, current_timeout_timer_, &*frame_change_collector_);
prepared_query = PrepareProfileQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->summary,
&query_execution->notifications, interpreter_context_, current_db_,
&query_execution->execution_memory_with_exception, username_,
&transaction_status_, current_timeout_timer_, &*frame_change_collector_);
} else if (utils::Downcast<DumpQuery>(parsed_query.query)) {
prepared_query = PrepareDumpQuery(std::move(parsed_query), current_db_);
} else if (utils::Downcast<IndexQuery>(parsed_query.query)) {

View File

@ -1,4 +1,4 @@
// Copyright 2022 Memgraph Ltd.
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -62,6 +62,8 @@ constexpr std::string_view GetCodeString(const NotificationCode code) {
return "IndexDoesNotExist"sv;
case NotificationCode::NONEXISTENT_CONSTRAINT:
return "ConstraintDoesNotExist"sv;
case NotificationCode::PLAN_HINTING:
return "PlanHinting"sv;
case NotificationCode::REGISTER_REPLICA:
return "RegisterReplica"sv;
case NotificationCode::REPLICA_PORT_WARNING:

View File

@ -1,4 +1,4 @@
// Copyright 2022 Memgraph Ltd.
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -39,6 +39,7 @@ enum class NotificationCode : uint8_t {
LOAD_CSV_TIP,
NONEXISTENT_INDEX,
NONEXISTENT_CONSTRAINT,
PLAN_HINTING,
REPLICA_PORT_WARNING,
REGISTER_REPLICA,
SET_REPLICA,

View File

@ -0,0 +1,24 @@
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include "hint_provider.hpp"
namespace memgraph::query::plan {
std::vector<std::string> ProvidePlanHints(const LogicalOperator *plan_root, const SymbolTable &symbol_table) {
PlanHintsProvider plan_hinter(symbol_table);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
const_cast<LogicalOperator *>(plan_root)->Accept(plan_hinter);
return plan_hinter.hints();
}
} // namespace memgraph::query::plan

View File

@ -0,0 +1,255 @@
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
/// @file
/// This file provides textual information about possible inefficiencies in the query planner.
/// An inefficiency is for example having a sequential scan with filtering, without the usage of indices.
#pragma once
#include <algorithm>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <range/v3/view.hpp>
#include "query/plan/operator.hpp"
#include "query/plan/preprocess.hpp"
#include "utils/logging.hpp"
#include "utils/string.hpp"
namespace memgraph::query::plan {
std::vector<std::string> ProvidePlanHints(const LogicalOperator *plan_root, const SymbolTable &symbol_table);
class PlanHintsProvider final : public HierarchicalLogicalOperatorVisitor {
public:
explicit PlanHintsProvider(const SymbolTable &symbol_table) : symbol_table_(symbol_table) {}
std::vector<std::string> &hints() { return hints_; }
using HierarchicalLogicalOperatorVisitor::PostVisit;
using HierarchicalLogicalOperatorVisitor::PreVisit;
using HierarchicalLogicalOperatorVisitor::Visit;
bool Visit(Once & /*unused*/) override { return true; }
bool PreVisit(Filter & /*unused*/) override { return true; }
bool PostVisit(Filter &op) override {
HintIndexUsage(op);
return true;
}
bool PreVisit(ScanAll & /*unused*/) override { return true; }
bool PostVisit(ScanAll & /*unused*/) override { return true; }
bool PreVisit(Expand & /*unused*/) override { return true; }
bool PostVisit(Expand & /*expand*/) override { return true; }
bool PreVisit(ExpandVariable & /*unused*/) override { return true; }
bool PostVisit(ExpandVariable & /*unused*/) override { return true; }
bool PreVisit(Merge &op) override {
op.input()->Accept(*this);
op.merge_match_->Accept(*this);
return false;
}
bool PostVisit(Merge & /*unused*/) override { return true; }
bool PreVisit(Optional &op) override {
op.input()->Accept(*this);
op.optional_->Accept(*this);
return false;
}
bool PostVisit(Optional & /*unused*/) override { return true; }
bool PreVisit(Cartesian &op) override {
op.left_op_->Accept(*this);
op.right_op_->Accept(*this);
return false;
}
bool PostVisit(Cartesian & /*unused*/) override { return true; }
bool PreVisit(Union &op) override {
op.left_op_->Accept(*this);
op.right_op_->Accept(*this);
return false;
}
bool PostVisit(Union & /*unused*/) override { return true; }
bool PreVisit(CreateNode & /*unused*/) override { return true; }
bool PostVisit(CreateNode & /*unused*/) override { return true; }
bool PreVisit(CreateExpand & /*unused*/) override { return true; }
bool PostVisit(CreateExpand & /*unused*/) override { return true; }
bool PreVisit(ScanAllByLabel & /*unused*/) override { return true; }
bool PostVisit(ScanAllByLabel & /*unused*/) override { return true; }
bool PreVisit(ScanAllByLabelPropertyRange & /*unused*/) override { return true; }
bool PostVisit(ScanAllByLabelPropertyRange & /*unused*/) override { return true; }
bool PreVisit(ScanAllByLabelPropertyValue & /*unused*/) override { return true; }
bool PostVisit(ScanAllByLabelPropertyValue & /*unused*/) override { return true; }
bool PreVisit(ScanAllByLabelProperty & /*unused*/) override { return true; }
bool PostVisit(ScanAllByLabelProperty & /*unused*/) override { return true; }
bool PreVisit(ScanAllById & /*unused*/) override { return true; }
bool PostVisit(ScanAllById & /*unused*/) override { return true; }
bool PreVisit(ConstructNamedPath & /*unused*/) override { return true; }
bool PostVisit(ConstructNamedPath & /*unused*/) override { return true; }
bool PreVisit(Produce & /*unused*/) override { return true; }
bool PostVisit(Produce & /*unused*/) override { return true; }
bool PreVisit(EmptyResult & /*unused*/) override { return true; }
bool PostVisit(EmptyResult & /*unused*/) override { return true; }
bool PreVisit(Delete & /*unused*/) override { return true; }
bool PostVisit(Delete & /*unused*/) override { return true; }
bool PreVisit(SetProperty & /*unused*/) override { return true; }
bool PostVisit(SetProperty & /*unused*/) override { return true; }
bool PreVisit(SetProperties & /*unused*/) override { return true; }
bool PostVisit(SetProperties & /*unused*/) override { return true; }
bool PreVisit(SetLabels & /*unused*/) override { return true; }
bool PostVisit(SetLabels & /*unused*/) override { return true; }
bool PreVisit(RemoveProperty & /*unused*/) override { return true; }
bool PostVisit(RemoveProperty & /*unused*/) override { return true; }
bool PreVisit(RemoveLabels & /*unused*/) override { return true; }
bool PostVisit(RemoveLabels & /*unused*/) override { return true; }
bool PreVisit(EdgeUniquenessFilter & /*unused*/) override { return true; }
bool PostVisit(EdgeUniquenessFilter & /*unused*/) override { return true; }
bool PreVisit(Accumulate & /*unused*/) override { return true; }
bool PostVisit(Accumulate & /*unused*/) override { return true; }
bool PreVisit(Aggregate & /*unused*/) override { return true; }
bool PostVisit(Aggregate & /*unused*/) override { return true; }
bool PreVisit(Skip & /*unused*/) override { return true; }
bool PostVisit(Skip & /*unused*/) override { return true; }
bool PreVisit(Limit & /*unused*/) override { return true; }
bool PostVisit(Limit & /*unused*/) override { return true; }
bool PreVisit(OrderBy & /*unused*/) override { return true; }
bool PostVisit(OrderBy & /*unused*/) override { return true; }
bool PreVisit(Unwind & /*unused*/) override { return true; }
bool PostVisit(Unwind & /*unused*/) override { return true; }
bool PreVisit(Distinct & /*unused*/) override { return true; }
bool PostVisit(Distinct & /*unused*/) override { return true; }
bool PreVisit(CallProcedure & /*unused*/) override { return true; }
bool PostVisit(CallProcedure & /*unused*/) override { return true; }
bool PreVisit(Foreach &op) override {
op.input()->Accept(*this);
op.update_clauses_->Accept(*this);
return false;
}
bool PostVisit(Foreach & /*unused*/) override { return true; }
bool PreVisit(EvaluatePatternFilter & /*unused*/) override { return true; }
bool PostVisit(EvaluatePatternFilter & /*op*/) override { return true; }
bool PreVisit(Apply &op) override {
op.input()->Accept(*this);
op.subquery_->Accept(*this);
return false;
}
bool PostVisit(Apply & /*op*/) override { return true; }
bool PreVisit(LoadCsv & /*unused*/) override { return true; }
bool PostVisit(LoadCsv & /*op*/) override { return true; }
private:
const SymbolTable &symbol_table_;
std::vector<std::string> hints_;
bool DefaultPreVisit() override { LOG_FATAL("Operator not implemented for providing plan hints!"); }
void HintIndexUsage(Filter &op) {
if (auto *maybe_scan_operator = dynamic_cast<ScanAll *>(op.input().get()); !maybe_scan_operator) {
return;
}
auto const scan_symbol = dynamic_cast<ScanAll *>(op.input().get())->output_symbol_;
auto const scan_type = op.input()->GetTypeInfo();
Filters filters;
filters.CollectFilterExpression(op.expression_, symbol_table_);
const std::string filtered_labels = ExtractAndJoin(filters.FilteredLabels(scan_symbol),
[](const auto &item) { return fmt::format(":{0}", item.name); });
const std::string filtered_properties =
ExtractAndJoin(filters.FilteredProperties(scan_symbol), [](const auto &item) { return item.name; });
if (filtered_labels.empty() && filtered_properties.empty()) {
return;
}
if (scan_type == ScanAll::kType) {
if (!filtered_labels.empty() && !filtered_properties.empty()) {
hints_.push_back(
fmt::format("Sequential scan will be used on symbol `{0}` although there is a filter on labels {1} and "
"properties {2}. Consider "
"creating a label-property index.",
scan_symbol.name(), filtered_labels, filtered_properties));
return;
}
if (!filtered_labels.empty()) {
hints_.push_back(fmt::format(
"Sequential scan will be used on symbol `{0}` although there is a filter on labels {1}. Consider "
"creating a label index.",
scan_symbol.name(), filtered_labels));
return;
}
return;
}
if (scan_type == ScanAllByLabel::kType && !filtered_properties.empty()) {
hints_.push_back(fmt::format(
"Label index will be used on symbol `{0}` although there is also a filter on properties {1}. Consider "
"creating a label-property index.",
scan_symbol.name(), filtered_properties));
return;
}
}
std::string ExtractAndJoin(auto &&collection, auto &&projection) {
auto elements = collection | ranges::views::transform(projection);
return boost::algorithm::join(elements, ", ");
}
};
} // namespace memgraph::query::plan

View File

@ -345,6 +345,17 @@ class Filters final {
return labels;
}
auto FilteredProperties(const Symbol &symbol) const -> std::unordered_set<PropertyIx> {
std::unordered_set<PropertyIx> properties;
for (const auto &filter : all_filters_) {
if (filter.type == FilterInfo::Type::Property && filter.property_filter->symbol_ == symbol) {
properties.insert(filter.property_filter->property_);
}
}
return properties;
}
/// Remove a filter; may invalidate iterators.
/// Removal is done by comparing only the expression, so that multiple
/// FilterInfo objects using the same original expression are removed.

View File

@ -109,7 +109,7 @@ add_unit_test(query_plan_edge_cases.cpp ${CMAKE_SOURCE_DIR}/src/glue/communicati
target_link_libraries(${test_prefix}query_plan_edge_cases mg-communication mg-query)
add_unit_test(query_plan_match_filter_return.cpp)
target_link_libraries(${test_prefix}query_plan_match_filter_return mg-query mg-query mg-glue)
target_link_libraries(${test_prefix}query_plan_match_filter_return mg-query mg-glue)
add_unit_test(query_plan_operator_to_string.cpp)
target_link_libraries(${test_prefix}query_plan_operator_to_string mg-query)
@ -422,3 +422,5 @@ add_unit_test(distributed_lamport_clock.cpp)
target_link_libraries(${test_prefix}distributed_lamport_clock mg-distributed)
target_include_directories(${test_prefix}distributed_lamport_clock PRIVATE ${CMAKE_SOURCE_DIR}/include)
add_unit_test(query_hint_provider.cpp)
target_link_libraries(${test_prefix}query_hint_provider mg-query mg-glue)

View File

@ -0,0 +1,166 @@
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include <gtest/gtest.h>
#include "query_plan_common.hpp"
#include "query/db_accessor.hpp"
#include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/hint_provider.hpp"
#include "storage/v2/inmemory/storage.hpp"
using namespace memgraph::query;
using namespace memgraph::query::plan;
using namespace memgraph::storage;
class HintProviderSuite : public ::testing::Test {
protected:
std::unique_ptr<Storage> db = std::make_unique<InMemoryStorage>();
std::optional<std::unique_ptr<Storage::Accessor>> storage_dba;
std::optional<memgraph::query::DbAccessor> dba;
LabelId label = db->NameToLabel("label");
PropertyId property = db->NameToProperty("property");
PropertyId other_property = db->NameToProperty("other_property");
std::vector<std::shared_ptr<LogicalOperator>> pattern_filters_{};
AstStorage storage;
SymbolTable symbol_table;
View view = View::OLD;
int symbol_count = 0;
void SetUp() {
storage_dba.emplace(db->Access());
dba.emplace(storage_dba->get());
}
Symbol NextSymbol() { return symbol_table.CreateSymbol("Symbol" + std::to_string(symbol_count++), true); }
void VerifyHintMessages(LogicalOperator *plan, const std::vector<std::string> &expected_messages) {
auto messages = ProvidePlanHints(plan, symbol_table);
ASSERT_EQ(expected_messages.size(), messages.size());
for (size_t i = 0; i < messages.size(); i++) {
const auto &expected_message = expected_messages[i];
const auto &actual_message = messages[i];
ASSERT_EQ(expected_message, actual_message);
}
}
std::vector<LabelIx> GetLabelIx(std::vector<LabelId> labels) {
std::vector<LabelIx> label_ixs{};
for (const auto &label : labels) {
label_ixs.emplace_back(storage.GetLabelIx(db->LabelToName(label)));
}
return label_ixs;
}
};
TEST_F(HintProviderSuite, HintWhenFilteringByLabel) {
auto scan_all = MakeScanAll(storage, symbol_table, "n");
auto *filter_expr = storage.template Create<LabelsTest>(scan_all.node_->identifier_, GetLabelIx({label}));
auto filter = std::make_shared<Filter>(scan_all.op_, pattern_filters_, filter_expr);
const std::vector<std::string> expected_messages{
"Sequential scan will be used on symbol `n` although there is a filter on labels :label. Consider "
"creating a label index."};
VerifyHintMessages(filter.get(), expected_messages);
}
TEST_F(HintProviderSuite, DontHintWhenLabelOperatorPresent) {
auto scan_all_by_label = MakeScanAllByLabel(storage, symbol_table, "n", label);
auto produce = MakeProduce(scan_all_by_label.op_, nullptr);
const std::vector<std::string> expected_messages{};
VerifyHintMessages(produce.get(), expected_messages);
}
TEST_F(HintProviderSuite, HintWhenFilteringByLabelAndProperty) {
auto scan_all = MakeScanAll(storage, symbol_table, "n");
auto *filter_expr = storage.template Create<AndOperator>(
storage.template Create<LabelsTest>(scan_all.node_->identifier_, GetLabelIx({label})),
EQ(PROPERTY_LOOKUP(*dba, scan_all.node_->identifier_, property), LITERAL(42)));
auto filter = std::make_shared<Filter>(scan_all.op_, pattern_filters_, filter_expr);
const std::vector<std::string> expected_messages{
"Sequential scan will be used on symbol `n` although there is a filter on labels :label and properties property. "
"Consider "
"creating a label-property index."};
VerifyHintMessages(filter.get(), expected_messages);
}
TEST_F(HintProviderSuite, DontHintWhenLabelPropertyOperatorPresent) {
auto scan_all_by_label_prop_value = MakeScanAllByLabelPropertyValue(
storage, symbol_table, "n", label, property, "property", storage.template Create<Identifier>("n"));
auto produce = MakeProduce(scan_all_by_label_prop_value.op_, nullptr);
const std::vector<std::string> expected_messages{};
VerifyHintMessages(produce.get(), expected_messages);
}
TEST_F(HintProviderSuite, DontHintWhenLabelPropertyOperatorPresentAndAdditionalPropertyFilterPresent) {
auto scan_all_by_label_prop_value = MakeScanAllByLabelPropertyValue(
storage, symbol_table, "n", label, property, "property", storage.template Create<Identifier>("n"));
auto *filter_expr =
EQ(PROPERTY_LOOKUP(*dba, scan_all_by_label_prop_value.node_->identifier_, other_property), LITERAL(42));
auto filter = std::make_shared<Filter>(scan_all_by_label_prop_value.op_, pattern_filters_, filter_expr);
const std::vector<std::string> expected_messages{};
VerifyHintMessages(filter.get(), expected_messages);
}
TEST_F(HintProviderSuite, HintWhenLabelOperatorPresentButFilteringAlsoByProperty) {
auto scan_all_by_label = MakeScanAllByLabel(storage, symbol_table, "n", label);
auto *filter_expr = EQ(PROPERTY_LOOKUP(*dba, scan_all_by_label.node_->identifier_, property), LITERAL(42));
auto filter = std::make_shared<Filter>(scan_all_by_label.op_, pattern_filters_, filter_expr);
const std::vector<std::string> expected_messages{
"Label index will be used on symbol `n` although there is also a filter on properties property. "
"Consider "
"creating a label-property index."};
VerifyHintMessages(filter.get(), expected_messages);
}
TEST_F(HintProviderSuite, DoubleHintWhenCartesianInFilters) {
auto first_scan_all = MakeScanAll(storage, symbol_table, "n");
auto *first_filter_expr = storage.template Create<LabelsTest>(first_scan_all.node_->identifier_, GetLabelIx({label}));
auto first_filter = std::make_shared<Filter>(first_scan_all.op_, pattern_filters_, first_filter_expr);
auto second_scan_all = MakeScanAll(storage, symbol_table, "m");
auto *second_filter_expr =
storage.template Create<LabelsTest>(second_scan_all.node_->identifier_, GetLabelIx({label}));
auto second_filter = std::make_shared<Filter>(second_scan_all.op_, pattern_filters_, second_filter_expr);
const std::vector<Symbol> empty_symbols{};
auto cartesian = std::make_shared<Cartesian>(first_filter, empty_symbols, second_filter, empty_symbols);
const std::vector<std::string> expected_messages{
"Sequential scan will be used on symbol `n` although there is a filter on labels :label. Consider "
"creating a label index.",
"Sequential scan will be used on symbol `m` although there is a filter on labels :label. Consider "
"creating a label index."};
VerifyHintMessages(cartesian.get(), expected_messages);
}