From dc5eb4befdbaf8d3269f3405b4597265b112f4d5 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Mon, 22 Mar 2021 09:37:35 +0100 Subject: [PATCH 01/63] Fix module reload (#114) * Fix module reload --- CHANGELOG.md | 1 + .../frontend/ast/cypher_main_visitor.cpp | 9 +++ .../frontend/ast/cypher_main_visitor.hpp | 4 + src/query/interpreter.cpp | 78 +++++++++++++------ 4 files changed, 67 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97bc4f122..a85d71fa3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ * Fixed garbage collector by correctly marking the oldest current timestamp after the database was recovered using the durability files. +* Fixed reloading of the modules with changed result names. ## v1.3.0 diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index a59f05a80..a679ea138 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -410,6 +410,14 @@ antlrcpp::Any CypherMainVisitor::visitCreate(MemgraphCypher::CreateContext *ctx) } antlrcpp::Any CypherMainVisitor::visitCallProcedure(MemgraphCypher::CallProcedureContext *ctx) { + // Don't cache queries which call procedures because the + // procedure definition can affect the behaviour of the visitor and + // the execution of the query. + // If a user recompiles and reloads the procedure with different result + // names, because of the cache, old result names will be expected while the + // procedure will return results mapped to new names. + is_cacheable_ = false; + auto *call_proc = storage_->Create(); MG_ASSERT(!ctx->procedureName()->symbolicName().empty()); std::vector procedure_subnames; @@ -493,6 +501,7 @@ antlrcpp::Any CypherMainVisitor::visitCallProcedure(MemgraphCypher::CallProcedur // fields removed, then the query execution will report an error that we are // yielding missing fields. The user can then just retry the query. } + return call_proc; } diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 7aa31b8ad..85a861f97 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -693,6 +693,8 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { Query *query() { return query_; } const static std::string kAnonPrefix; + bool IsCacheable() const { return is_cacheable_; } + private: LabelIx AddLabel(const std::string &name); PropertyIx AddProperty(const std::string &name); @@ -710,6 +712,8 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { // We use this variable in visitReturnItem to check if we are in with or // return. bool in_with_ = false; + + bool is_cacheable_ = true; }; } // namespace frontend } // namespace query diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 4fd28e1be..fcf0ef2c4 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -9,6 +9,7 @@ #include "query/db_accessor.hpp" #include "query/dump.hpp" #include "query/exceptions.hpp" +#include "query/frontend/ast/ast.hpp" #include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/opencypher/parser.hpp" #include "query/frontend/semantic/required_privileges.hpp" @@ -71,6 +72,7 @@ struct ParsedQuery { AstStorage ast_storage; Query *query; std::vector required_privileges; + bool is_cacheable{true}; }; ParsedQuery ParseQuery(const std::string &query_string, const std::map ¶ms, @@ -101,6 +103,19 @@ ParsedQuery ParseQuery(const std::string &query_string, const std::map parser; + // Return a copy of both the AST storage and the query. + CachedQuery result; + bool is_cacheable = true; + + auto get_information_from_cache = [&](const auto &cached_query) { + result.ast_storage.properties_ = cached_query.ast_storage.properties_; + result.ast_storage.labels_ = cached_query.ast_storage.labels_; + result.ast_storage.edge_types_ = cached_query.ast_storage.edge_types_; + + result.query = cached_query.query->Clone(&result.ast_storage); + result.required_privileges = cached_query.required_privileges; + }; + if (it == accessor.end()) { { std::unique_lock guard(*antlr_lock); @@ -125,21 +140,33 @@ ParsedQuery ParseQuery(const std::string &query_string, const std::maptree()); - CachedQuery cached_query{std::move(ast_storage), visitor.query(), query::GetRequiredPrivileges(visitor.query())}; + if (visitor.IsCacheable()) { + CachedQuery cached_query{std::move(ast_storage), visitor.query(), query::GetRequiredPrivileges(visitor.query())}; + it = accessor.insert({hash, std::move(cached_query)}).first; - it = accessor.insert({hash, std::move(cached_query)}).first; + get_information_from_cache(it->second); + } else { + result.ast_storage.properties_ = ast_storage.properties_; + result.ast_storage.labels_ = ast_storage.labels_; + result.ast_storage.edge_types_ = ast_storage.edge_types_; + + result.query = visitor.query()->Clone(&result.ast_storage); + result.required_privileges = query::GetRequiredPrivileges(visitor.query()); + + is_cacheable = false; + } + } else { + get_information_from_cache(it->second); } - // Return a copy of both the AST storage and the query. - AstStorage ast_storage; - ast_storage.properties_ = it->second.ast_storage.properties_; - ast_storage.labels_ = it->second.ast_storage.labels_; - ast_storage.edge_types_ = it->second.ast_storage.edge_types_; - - Query *query = it->second.query->Clone(&ast_storage); - - return ParsedQuery{query_string, params, std::move(parameters), std::move(stripped_query), - std::move(ast_storage), query, it->second.required_privileges}; + return ParsedQuery{query_string, + params, + std::move(parameters), + std::move(stripped_query), + std::move(result.ast_storage), + result.query, + std::move(result.required_privileges), + is_cacheable}; } class SingleNodeLogicalPlan final : public LogicalPlan { @@ -712,7 +739,7 @@ std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery */ std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, utils::SkipList *plan_cache, - DbAccessor *db_accessor) { + DbAccessor *db_accessor, const bool is_cacheable = true) { auto plan_cache_access = plan_cache->access(); auto it = plan_cache_access.find(hash); if (it != plan_cache_access.end()) { @@ -722,10 +749,12 @@ std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_stor return it->second; } } - return plan_cache_access - .insert({hash, - std::make_shared(MakeLogicalPlan(std::move(ast_storage), (query), parameters, db_accessor))}) - .first->second; + + auto plan = std::make_shared(MakeLogicalPlan(std::move(ast_storage), (query), parameters, db_accessor)); + if (is_cacheable) { + plan_cache_access.insert({hash, plan}); + } + return plan; } using RWType = plan::ReadWriteTypeChecker::RWType; @@ -792,7 +821,7 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map(parsed_query.query), parsed_query.parameters, - &interpreter_context->plan_cache, dba); + &interpreter_context->plan_cache, dba, parsed_query.is_cacheable); summary->insert_or_assign("cost_estimate", plan->cost()); auto rw_type_checker = plan::ReadWriteTypeChecker(); @@ -844,9 +873,9 @@ PreparedQuery PrepareExplainQuery(ParsedQuery parsed_query, std::map(parsed_inner_query.query); MG_ASSERT(cypher_query, "Cypher grammar should not allow other queries in EXPLAIN"); - auto cypher_query_plan = - CypherQueryToPlan(parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), - cypher_query, parsed_inner_query.parameters, &interpreter_context->plan_cache, dba); + auto cypher_query_plan = CypherQueryToPlan( + parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), cypher_query, + parsed_inner_query.parameters, &interpreter_context->plan_cache, dba, parsed_inner_query.is_cacheable); std::stringstream printed_plan; plan::PrettyPrint(*dba, &cypher_query_plan->plan(), &printed_plan); @@ -911,10 +940,9 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra auto *cypher_query = utils::Downcast(parsed_inner_query.query); MG_ASSERT(cypher_query, "Cypher grammar should not allow other queries in PROFILE"); - auto cypher_query_plan = - CypherQueryToPlan(parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), - cypher_query, parsed_inner_query.parameters, &interpreter_context->plan_cache, dba); - + auto cypher_query_plan = CypherQueryToPlan( + parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), cypher_query, + parsed_inner_query.parameters, &interpreter_context->plan_cache, dba, parsed_inner_query.is_cacheable); auto rw_type_checker = plan::ReadWriteTypeChecker(); rw_type_checker.InferRWType(const_cast(cypher_query_plan->plan())); From f6d5f576d5cc3bfafc8b270b3d284fe785dabc68 Mon Sep 17 00:00:00 2001 From: Josip Seljan <62958579+the-joksim@users.noreply.github.com> Date: Mon, 1 Mar 2021 19:47:41 +0100 Subject: [PATCH 02/63] CSV reader class (#98) * CSV parsing class * Basic CSV parsing tests --- src/utils/csv_parsing.cpp | 227 +++++++++++++++++++++++++++++++++++++ src/utils/csv_parsing.hpp | 97 ++++++++++++++++ src/utils/file.hpp | 2 +- tests/unit/CMakeLists.txt | 3 + tests/unit/csv_parsing.cpp | 194 +++++++++++++++++++++++++++++++ 5 files changed, 522 insertions(+), 1 deletion(-) create mode 100644 src/utils/csv_parsing.cpp create mode 100644 src/utils/csv_parsing.hpp create mode 100644 tests/unit/csv_parsing.cpp diff --git a/src/utils/csv_parsing.cpp b/src/utils/csv_parsing.cpp new file mode 100644 index 000000000..d2b526c3d --- /dev/null +++ b/src/utils/csv_parsing.cpp @@ -0,0 +1,227 @@ +#include "utils/csv_parsing.hpp" + +#include + +#include "utils/file.hpp" +#include "utils/string.hpp" + +namespace csv { + +using ParseError = Reader::ParseError; + +void Reader::InitializeStream() { + if (!std::filesystem::exists(path_)) { + throw CsvReadException("CSV file not found: {}", path_.string()); + } + csv_stream_.open(path_); + if (!csv_stream_.good()) { + throw CsvReadException("CSV file {} couldn't be opened!", path_.string()); + } +} + +std::optional Reader::GetNextLine() { + std::string line; + if (!std::getline(csv_stream_, line)) { + // reached end of file or an I/0 error occurred + if (!csv_stream_.good()) { + csv_stream_.close(); + } + return std::nullopt; + } + ++line_count_; + return line; +} + +std::optional Reader::ParseHeader() { + // header must be the very first line in the file + MG_ASSERT(line_count_ == 1, fmt::format("Invalid use of {}", __func__)); + const auto maybe_line = GetNextLine(); + if (!maybe_line) { + throw CsvReadException("CSV file {} empty!", path_); + } + Header header; + // set the 'number_of_fields_' once this method is implemented fully + return std::nullopt; +} + +namespace { +enum class CsvParserState : uint8_t { + INITIAL_FIELD, + NEXT_FIELD, + QUOTING, + NOT_QUOTING, + EXPECT_DELIMITER, +}; + +bool SubstringStartsWith(const std::string_view str, size_t pos, const std::string_view what) { + return utils::StartsWith(utils::Substr(str, pos), what); +} +} // namespace + +Reader::ParsingResult Reader::ParseRow() { + std::vector row; + std::string column; + + auto state = CsvParserState::INITIAL_FIELD; + + do { + const auto maybe_line = GetNextLine(); + if (!maybe_line) { + // The whole file was processed. + break; + } + + for (size_t i = 0; i < maybe_line->size(); ++i) { + const auto c = (*maybe_line)[i]; + + // Line feeds and carriage returns are ignored in CSVs. + if (c == '\n' || c == '\r') continue; + // Null bytes aren't allowed in CSVs. + if (c == '\0') { + return ParseError(ParseError::ErrorCode::NULL_BYTE, + fmt::format("CSV: Line {:d} contains NULL byte", line_count_)); + } + + switch (state) { + case CsvParserState::INITIAL_FIELD: + case CsvParserState::NEXT_FIELD: { + if (SubstringStartsWith(*maybe_line, i, read_config_.quote)) { + // The current field is a quoted field. + state = CsvParserState::QUOTING; + i += read_config_.quote.size() - 1; + } else if (SubstringStartsWith(*maybe_line, i, read_config_.delimiter)) { + // The current field has an empty value. + row.emplace_back(""); + state = CsvParserState::NEXT_FIELD; + i += read_config_.delimiter.size() - 1; + } else { + // The current field is a regular field. + column.push_back(c); + state = CsvParserState::NOT_QUOTING; + } + break; + } + case CsvParserState::QUOTING: { + auto quote_now = SubstringStartsWith(*maybe_line, i, read_config_.quote); + auto quote_next = SubstringStartsWith(*maybe_line, i + read_config_.quote.size(), read_config_.quote); + if (quote_now && quote_next) { + // This is an escaped quote character. + column += read_config_.quote; + i += read_config_.quote.size() * 2 - 1; + } else if (quote_now && !quote_next) { + // This is the end of the quoted field. + row.emplace_back(std::move(column)); + state = CsvParserState::EXPECT_DELIMITER; + i += read_config_.quote.size() - 1; + } else { + column.push_back(c); + } + break; + } + case CsvParserState::NOT_QUOTING: { + if (SubstringStartsWith(*maybe_line, i, read_config_.delimiter)) { + row.emplace_back(std::move(column)); + state = CsvParserState::NEXT_FIELD; + i += read_config_.delimiter.size() - 1; + } else { + column.push_back(c); + } + break; + } + case CsvParserState::EXPECT_DELIMITER: { + if (SubstringStartsWith(*maybe_line, i, read_config_.delimiter)) { + state = CsvParserState::NEXT_FIELD; + i += read_config_.delimiter.size() - 1; + } else { + return ParseError(ParseError::ErrorCode::UNEXPECTED_TOKEN, + fmt::format("CSV Reader: Expected '{}' after '{}', but got '{}'", read_config_.delimiter, + read_config_.quote, c)); + } + break; + } + } + } + } while (state == CsvParserState::QUOTING); + + switch (state) { + case CsvParserState::INITIAL_FIELD: { + break; + } + case CsvParserState::NEXT_FIELD: { + row.emplace_back(std::move(column)); + break; + } + case CsvParserState::QUOTING: { + return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE, + "There is no more data left to load while inside a quoted string. " + "Did you forget to close the quote?"); + break; + } + case CsvParserState::NOT_QUOTING: { + row.emplace_back(std::move(column)); + break; + } + case CsvParserState::EXPECT_DELIMITER: { + break; + } + } + + // reached the end of file - return empty row + if (row.empty()) { + return Row(row); + } + + // if there's no header, then: + // - if we skip bad rows, then the very first __valid__ row will + // determine the allowed number of columns + // - if we don't skip bad rows, the very first row will determine the allowed + // number of columns in all subsequent rows + if (!read_config_.with_header && number_of_columns_ == 0) { + MG_ASSERT(!row.empty()); + number_of_columns_ = row.size(); + } + + if (row.size() != number_of_columns_) { + return ParseError( + ParseError::ErrorCode::BAD_NUM_OF_COLUMNS, + // ToDo(the-joksim): + // - 'line_count_ - 1' is the last line of a row (as a + // row may span several lines) ==> should have a row + // counter + fmt::format("Expected {:d} columns in row {:d}, but got {:d}", number_of_columns_, line_count_, row.size())); + } + + return Row(row); +} + +// Returns Reader::Row if the read row if valid; +// Returns std::nullopt if end of file is reached or an error occurred +// making it unreadable; +// @throws CsvReadException if a bad row is encountered, and the skip_bad is set +// to 'true' in the Reader::Config. +std::optional Reader::GetNextRow() { + auto row = ParseRow(); + + if (row.HasError()) { + if (!read_config_.skip_bad) { + throw CsvReadException("CSV Reader: Bad row at line {:d}: {}", line_count_, row.GetError().message); + } + // try to parse as many times as necessary to reach a valid row + do { + spdlog::debug("CSV Reader: Bad row at line {:d}: {}", line_count_, row.GetError().message); + if (!csv_stream_.good()) { + return std::nullopt; + } + row = ParseRow(); + } while (row.HasError()); + } + + auto ret = row.GetValue(); + if (ret.columns.empty()) { + // reached end of file + return std::nullopt; + } + return ret; +} + +} // namespace csv diff --git a/src/utils/csv_parsing.hpp b/src/utils/csv_parsing.hpp new file mode 100644 index 000000000..b44e850a0 --- /dev/null +++ b/src/utils/csv_parsing.hpp @@ -0,0 +1,97 @@ +/** + * @file + * + * This file contains utilities for parsing CSV files. + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "utils/exceptions.hpp" +#include "utils/result.hpp" + +namespace csv { + +class CsvReadException : public utils::BasicException { + using utils::BasicException::BasicException; +}; + +class Reader { + public: + struct Config { + Config(){}; + Config(std::string delimiter, std::string quote, const bool with_header, const bool skip_bad) + : delimiter(std::move(delimiter)), quote(std::move(quote)), with_header(with_header), skip_bad(skip_bad) {} + + std::string delimiter{","}; + std::string quote{"\""}; + bool with_header{false}; + bool skip_bad{false}; + }; + + struct Row { + Row() = default; + explicit Row(std::vector cols) : columns(std::move(cols)) {} + std::vector columns; + }; + + explicit Reader(const std::filesystem::path &path, const Config cfg = {}) : path_(path), read_config_(cfg) { + InitializeStream(); + if (read_config_.with_header) { + header_ = ParseHeader(); + } + } + + Reader(const Reader &) = delete; + Reader &operator=(const Reader &) = delete; + + Reader(Reader &&) = delete; + Reader &operator=(Reader &&) = delete; + + ~Reader() { + if (csv_stream_.is_open()) csv_stream_.close(); + } + + struct ParseError { + enum class ErrorCode : uint8_t { BAD_HEADER, NO_CLOSING_QUOTE, UNEXPECTED_TOKEN, BAD_NUM_OF_COLUMNS, NULL_BYTE }; + ParseError(ErrorCode code, std::string message) : code(code), message(std::move(message)) {} + + ErrorCode code; + std::string message; + }; + + using ParsingResult = utils::BasicResult; + std::optional GetNextRow(); + + private: + std::filesystem::path path_; + std::ifstream csv_stream_; + Config read_config_; + uint64_t line_count_{1}; + uint16_t number_of_columns_{0}; + + struct Header { + Header() = default; + explicit Header(std::vector cols) : columns(std::move(cols)) {} + std::vector columns; + }; + + std::optional
header_{}; + + void InitializeStream(); + + std::optional GetNextLine(); + + std::optional
ParseHeader(); + + ParsingResult ParseRow(); +}; + +} // namespace csv diff --git a/src/utils/file.hpp b/src/utils/file.hpp index 6bf3e3b0d..3d281a3bc 100644 --- a/src/utils/file.hpp +++ b/src/utils/file.hpp @@ -57,7 +57,7 @@ bool RenamePath(const std::filesystem::path &src, const std::filesystem::path &d /// `write` for each of our (very small) logical reads/writes. Because of that, /// `read` or `write` is only called when the buffer is full and/or needs /// emptying. -const size_t kFileBufferSize = 262144; +constexpr size_t kFileBufferSize = 262144; /// This class implements a file handler that is used to read binary files. It /// was developed because the C++ standard library has an awful API and makes diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index beb45f44b..c7bc0942e 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -226,6 +226,9 @@ target_link_libraries(${test_prefix}utils_file_locker mg-utils fmt) add_unit_test(utils_thread_pool.cpp) target_link_libraries(${test_prefix}utils_thread_pool mg-utils fmt) +add_unit_test(csv_parsing.cpp ${CMAKE_SOURCE_DIR}/src/utils/csv_parsing.cpp) +target_link_libraries(${test_prefix}csv_parsing mg-utils fmt) + # Test mg-storage-v2 add_unit_test(commit_log_v2.cpp) diff --git a/tests/unit/csv_parsing.cpp b/tests/unit/csv_parsing.cpp new file mode 100644 index 000000000..60b197564 --- /dev/null +++ b/tests/unit/csv_parsing.cpp @@ -0,0 +1,194 @@ +#include "utils/csv_parsing.hpp" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "utils/string.hpp" + + +class CsvReaderTest : public ::testing::Test { + protected: + const std::filesystem::path csv_directory{std::filesystem::temp_directory_path() / "csv_testing"}; + + void SetUp() override { Clear(); CreateCsvDir(); } + + void TearDown() override { Clear(); } + + private: + + void CreateCsvDir() { + if (!std::filesystem::exists(csv_directory)) { + std::filesystem::create_directory(csv_directory); + } + } + void Clear() { + if (!std::filesystem::exists(csv_directory)) return; + std::filesystem::remove_all(csv_directory); + } +}; + +namespace { +class FileWriter { + public: + explicit FileWriter(const std::filesystem::path path) { stream_.open(path); } + + FileWriter(const FileWriter &) = delete; + FileWriter &operator=(const FileWriter &) = delete; + + FileWriter(FileWriter &&) = delete; + FileWriter &operator=(FileWriter &&) = delete; + + void Close() { stream_.close(); } + + size_t WriteLine(const std::string_view line) { + if (!stream_.is_open()) { + return 0; + } + + stream_ << line << std::endl; + + // including the newline character + return line.size() + 1; + } + + private: + std::ofstream stream_; +}; + +std::string CreateRow(const std::vector &columns, const std::string_view delim) { + return utils::Join(columns, delim); +} + +} // namespace + +TEST_F(CsvReaderTest, CommaDelimiter) { + // create a file with a valid and an invalid row; + // the invalid row has wrong delimiters; + // expect the parser's output to be a single string for the invalid row; + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + const std::vector columns1{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns1, ",")); + + const std::vector columns2{"D", "E", "F"}; + writer.WriteLine(CreateRow(columns2, ";")); + + writer.Close(); + + // note - default delimiter is "," + auto reader = csv::Reader(filepath); + + auto parsed_row = reader.GetNextRow(); + ASSERT_EQ(parsed_row->columns, columns1); + + EXPECT_THROW(reader.GetNextRow(), csv::CsvReadException); +} + +TEST_F(CsvReaderTest, SemicolonDelimiter) { + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + const std::string delimiter = ";"; + const std::vector columns1{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns1, delimiter)); + + const std::vector columns2{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns2, ",")); + + writer.Close(); + + const csv::Reader::Config cfg(delimiter, "\"", false, false); + auto reader = csv::Reader(filepath, cfg); + + auto parsed_row = reader.GetNextRow(); + ASSERT_EQ(parsed_row->columns, columns1); + + EXPECT_THROW(reader.GetNextRow(), csv::CsvReadException); +} + +TEST_F(CsvReaderTest, SkipBad) { + // create a file with invalid first two rows (containing a string with a + // missing closing quote); + // the last row is valid; + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + const std::string delimiter = ","; + + const std::vector columns_bad{"A", "B", "\"C"}; + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + + const std::vector columns_good{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns_good, delimiter)); + + writer.Close(); + + { + // we set the 'skip_bad' flag in the read configuration to 'true'; + // parser's output should be solely the valid row; + const bool skip_bad = true; + const csv::Reader::Config cfg(delimiter, "\"", false, skip_bad); + auto reader = csv::Reader(filepath, cfg); + + auto parsed_row = reader.GetNextRow(); + ASSERT_EQ(parsed_row->columns, columns_good); + } + + { + // we set the 'skip_bad' flag in the read configuration to 'false'; + // an exception must be thrown; + const bool skip_bad = false; + const csv::Reader::Config cfg(delimiter, "\"", false, skip_bad); + auto reader = csv::Reader(filepath, cfg); + + EXPECT_THROW(reader.GetNextRow(), csv::CsvReadException); + } +} + +TEST_F(CsvReaderTest, AllRowsValid) { + // create a file with all rows valid; + // parser should return 'std::nullopt' + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + const std::string delimiter = ","; + + const std::vector columns{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + + writer.Close(); + + const bool skip_bad = false; + const csv::Reader::Config cfg(delimiter, "\"", false, skip_bad); + auto reader = csv::Reader(filepath, cfg); + + while (auto parsed_row = reader.GetNextRow()) { + ASSERT_EQ(parsed_row->columns, columns); + } +} + +TEST_F(CsvReaderTest, SkipAllRows) { + // create a file with all rows invalid (containing a string with a missing closing quote); + // parser should return 'std::nullopt' + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + const std::string delimiter = ","; + + const std::vector columns_bad{"A", "B", "\"C"}; + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + + writer.Close(); + + const bool skip_bad = true; + const csv::Reader::Config cfg(delimiter, "\"", false, skip_bad); + auto reader = csv::Reader(filepath, cfg); + + auto parsed_row = reader.GetNextRow(); + ASSERT_EQ(parsed_row, std::nullopt); +} From f950a91732f30be84069a5db0504839486b53a65 Mon Sep 17 00:00:00 2001 From: Josip Seljan <62958579+the-joksim@users.noreply.github.com> Date: Thu, 18 Mar 2021 17:24:25 +0100 Subject: [PATCH 03/63] Add LOAD CSV clause infrastructure (#101) * Add LOAD CSV clause infrastructure * Add LoadCsv operator * Update csv::Reader class * Support csv files with and without header Co-authored-by: jseljan --- src/auth/models.cpp | 2 + src/auth/models.hpp | 7 +- src/glue/auth.cpp | 2 + src/query/frontend/ast/ast.lcp | 43 ++- src/query/frontend/ast/ast_visitor.hpp | 8 +- .../frontend/ast/cypher_main_visitor.cpp | 53 ++++ .../frontend/ast/cypher_main_visitor.hpp | 5 + .../opencypher/grammar/MemgraphCypher.g4 | 36 +++ .../opencypher/grammar/MemgraphCypherLexer.g4 | 8 + .../frontend/semantic/required_privileges.cpp | 2 + .../frontend/semantic/symbol_generator.cpp | 10 + .../frontend/semantic/symbol_generator.hpp | 2 + src/query/interpreter.cpp | 8 +- src/query/plan/operator.cpp | 135 +++++++++- src/query/plan/operator.lcp | 36 ++- src/query/plan/pretty_print.cpp | 22 ++ src/query/plan/pretty_print.hpp | 2 + src/query/plan/rule_based_planner.hpp | 7 + src/utils/CMakeLists.txt | 1 + src/utils/csv_parsing.cpp | 116 ++++---- src/utils/csv_parsing.hpp | 60 ++--- tests/unit/CMakeLists.txt | 4 +- tests/unit/csv_parsing.cpp | 194 -------------- tests/unit/cypher_main_visitor.cpp | 66 +++++ tests/unit/interpreter.cpp | 119 +++++++++ tests/unit/utils_csv_parsing.cpp | 250 ++++++++++++++++++ 26 files changed, 907 insertions(+), 291 deletions(-) delete mode 100644 tests/unit/csv_parsing.cpp create mode 100644 tests/unit/utils_csv_parsing.cpp diff --git a/src/auth/models.cpp b/src/auth/models.cpp index 2f5bb210f..b3e650b44 100644 --- a/src/auth/models.cpp +++ b/src/auth/models.cpp @@ -43,6 +43,8 @@ std::string PermissionToString(Permission permission) { return "REPLICATION"; case Permission::LOCK_PATH: return "LOCK_PATH"; + case Permission::READ_FILE: + return "READ_FILE"; case Permission::AUTH: return "AUTH"; } diff --git a/src/auth/models.hpp b/src/auth/models.hpp index cb627b052..4453adaf8 100644 --- a/src/auth/models.hpp +++ b/src/auth/models.hpp @@ -23,15 +23,16 @@ enum class Permission : uint64_t { DUMP = 1U << 9U, REPLICATION = 1U << 10U, LOCK_PATH = 1U << 11U, + READ_FILE = 1U << 12U, AUTH = 1U << 16U }; // clang-format on // Constant list of all available permissions. const std::vector kPermissionsAll = { - Permission::MATCH, Permission::CREATE, Permission::MERGE, Permission::DELETE, Permission::SET, - Permission::REMOVE, Permission::INDEX, Permission::STATS, Permission::CONSTRAINT, Permission::DUMP, - Permission::AUTH, Permission::REPLICATION, Permission::LOCK_PATH}; + Permission::MATCH, Permission::CREATE, Permission::MERGE, Permission::DELETE, Permission::SET, + Permission::REMOVE, Permission::INDEX, Permission::STATS, Permission::CONSTRAINT, Permission::DUMP, + Permission::AUTH, Permission::REPLICATION, Permission::LOCK_PATH, Permission::READ_FILE}; // Function that converts a permission to its string representation. std::string PermissionToString(Permission permission); diff --git a/src/glue/auth.cpp b/src/glue/auth.cpp index d5b8b706b..43af863b5 100644 --- a/src/glue/auth.cpp +++ b/src/glue/auth.cpp @@ -28,6 +28,8 @@ auth::Permission PrivilegeToPermission(query::AuthQuery::Privilege privilege) { return auth::Permission::REPLICATION; case query::AuthQuery::Privilege::LOCK_PATH: return auth::Permission::LOCK_PATH; + case query::AuthQuery::Privilege::READ_FILE: + return auth::Permission::READ_FILE; case query::AuthQuery::Privilege::AUTH: return auth::Permission::AUTH; } diff --git a/src/query/frontend/ast/ast.lcp b/src/query/frontend/ast/ast.lcp index ee0e2b975..bc70c822d 100644 --- a/src/query/frontend/ast/ast.lcp +++ b/src/query/frontend/ast/ast.lcp @@ -2191,7 +2191,7 @@ cpp<# (:serialize)) (lcp:define-enum privilege (create delete match merge set remove index stats auth constraint - dump replication lock_path) + dump replication lock_path read_file) (:serialize)) #>cpp AuthQuery() = default; @@ -2353,4 +2353,45 @@ cpp<# (:serialize (:slk)) (:clone)) +(lcp:define-class load-csv (clause) + ((file "Expression *" :scope :public) + (with_header "bool" :scope :public) + (ignore_bad "bool" :scope :public) + (delimiter "Expression *" :initval "nullptr" :scope :public) + (quote "Expression *" :initval "nullptr" :scope :public) + (row_var "Identifier *" :initval "nullptr" :scope :public + :slk-save #'slk-save-ast-pointer + :slk-load (slk-load-ast-pointer "Identifier"))) + + (:public + #>cpp + LoadCsv() = default; + + bool Accept(HierarchicalTreeVisitor &visitor) override { + if (visitor.PreVisit(*this)) { + row_var_->Accept(visitor); + } + return visitor.PostVisit(*this); + } + cpp<#) + (:protected + #>cpp + explicit LoadCsv(Expression *file, bool with_header, bool ignore_bad, Expression *delimiter, + Expression* quote, Identifier* row_var) + : file_(file), + with_header_(with_header), + ignore_bad_(ignore_bad), + delimiter_(delimiter), + quote_(quote), + row_var_(row_var) { + DMG_ASSERT(row_var, "LoadCsv cannot take nullptr for identifier"); + } + cpp<#) + (:private + #>cpp + friend class AstStorage; + cpp<#) + (:serialize (:slk)) + (:clone)) + (lcp:pop-namespace) ;; namespace query diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index f637c84e9..cc4b7d268 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -74,6 +74,7 @@ class RegexMatch; class DumpQuery; class ReplicationQuery; class LockPathQuery; +class LoadCsv; using TreeCompositeVisitor = ::utils::CompositeVisitor< SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator, AndOperator, NotOperator, AdditionOperator, @@ -82,7 +83,7 @@ using TreeCompositeVisitor = ::utils::CompositeVisitor< ListSlicingOperator, IfOperator, UnaryPlusOperator, UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral, PropertyLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Any, None, CallProcedure, Create, Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where, SetProperty, SetProperties, SetLabels, - RemoveProperty, RemoveLabels, Merge, Unwind, RegexMatch>; + RemoveProperty, RemoveLabels, Merge, Unwind, RegexMatch, LoadCsv>; using TreeLeafVisitor = ::utils::LeafVisitor; @@ -105,7 +106,8 @@ class ExpressionVisitor None, ParameterLookup, Identifier, PrimitiveLiteral, RegexMatch> {}; template -class QueryVisitor : public ::utils::Visitor {}; +class QueryVisitor + : public ::utils::Visitor {}; } // namespace query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index a679ea138..b206929ea 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -263,6 +263,47 @@ antlrcpp::Any CypherMainVisitor::visitLockPathQuery(MemgraphCypher::LockPathQuer return lock_query; } +antlrcpp::Any CypherMainVisitor::visitLoadCsv(MemgraphCypher::LoadCsvContext *ctx) { + auto *load_csv = storage_->Create(); + // handle file name + if (ctx->csvFile()->literal()->StringLiteral()) { + load_csv->file_ = ctx->csvFile()->accept(this); + } else { + throw SemanticException("CSV file path should be a string literal"); + } + + // handle header options + // Don't have to check for ctx->HEADER(), as it's a mandatory token. + // Just need to check if ctx->WITH() is not nullptr - otherwise, we have a + // ctx->NO() and ctx->HEADER() present. + load_csv->with_header_ = ctx->WITH() != nullptr; + + // handle skip bad row option + load_csv->ignore_bad_ = ctx->IGNORE() && ctx->BAD(); + + // handle delimiter + if (ctx->DELIMITER()) { + if (ctx->delimiter()->literal()->StringLiteral()) { + load_csv->delimiter_ = ctx->delimiter()->accept(this); + } else { + throw SemanticException("Delimiter should be a string literal"); + } + } + + // handle quote + if (ctx->QUOTE()) { + if (ctx->quote()->literal()->StringLiteral()) { + load_csv->quote_ = ctx->quote()->accept(this); + } else { + throw SemanticException("Quote should be a string literal"); + } + } + + // handle row variable + load_csv->row_var_ = storage_->Create(ctx->rowVar()->variable()->accept(this).as()); + return load_csv; +} + antlrcpp::Any CypherMainVisitor::visitCypherUnion(MemgraphCypher::CypherUnionContext *ctx) { bool distinct = !ctx->ALL(); auto *cypher_union = storage_->Create(distinct); @@ -292,6 +333,7 @@ antlrcpp::Any CypherMainVisitor::visitSingleQuery(MemgraphCypher::SingleQueryCon bool has_return = false; bool has_optional_match = false; bool has_call_procedure = false; + bool has_load_csv = false; for (Clause *clause : single_query->clauses_) { const auto &clause_type = clause->GetTypeInfo(); @@ -304,6 +346,14 @@ antlrcpp::Any CypherMainVisitor::visitSingleQuery(MemgraphCypher::SingleQueryCon if (has_update || has_return) { throw SemanticException("UNWIND can't be put after RETURN clause or after an update."); } + } else if (utils::IsSubtype(clause_type, LoadCsv::kType)) { + if (has_load_csv) { + throw SemanticException("Can't have multiple LOAD CSV clauses in a single query."); + } + if (has_return) { + throw SemanticException("LOAD CSV can't be put after RETURN clause."); + } + has_load_csv = true; } else if (auto *match = utils::Downcast(clause)) { if (has_update || has_return) { throw SemanticException("MATCH can't be put after RETURN clause or after an update."); @@ -388,6 +438,9 @@ antlrcpp::Any CypherMainVisitor::visitClause(MemgraphCypher::ClauseContext *ctx) if (ctx->callProcedure()) { return static_cast(ctx->callProcedure()->accept(this).as()); } + if (ctx->loadCsv()) { + return static_cast(ctx->loadCsv()->accept(this).as()); + } // TODO: implement other clauses. throw utils::NotYetImplemented("clause '{}'", ctx->getText()); return 0; diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 85a861f97..15eebdf30 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -208,6 +208,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitLockPathQuery(MemgraphCypher::LockPathQueryContext *ctx) override; + /** + * @return LoadCsvQuery* + */ + antlrcpp::Any visitLoadCsv(MemgraphCypher::LoadCsvContext *ctx) override; + /** * @return CypherUnion* */ diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index c3d3c75e7..f8785d13c 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -10,7 +10,11 @@ memgraphCypherKeyword : cypherKeyword | ALTER | ASYNC | AUTH + | BAD | CLEAR + | CSV + | DATA + | DELIMITER | DATABASE | DENY | DROP @@ -18,10 +22,13 @@ memgraphCypherKeyword : cypherKeyword | FOR | FROM | GRANT + | HEADER | IDENTIFIED + | LOAD | LOCK | MAIN | MODE + | NO | PASSWORD | PORT | PRIVILEGES @@ -32,6 +39,7 @@ memgraphCypherKeyword : cypherKeyword | REVOKE | ROLE | ROLES + | QUOTE | STATS | SYNC | TIMEOUT @@ -82,6 +90,33 @@ replicationQuery : setReplicationRole | showReplicas ; +clause : cypherMatch + | unwind + | merge + | create + | set + | cypherDelete + | remove + | with + | cypherReturn + | callProcedure + | loadCsv + ; + +loadCsv : LOAD CSV FROM csvFile ( WITH | NO ) HEADER + ( IGNORE BAD ) ? + ( DELIMITER delimiter ) ? + ( QUOTE quote ) ? + AS rowVar ; + +csvFile : literal ; + +delimiter : literal ; + +quote : literal ; + +rowVar : variable ; + userOrRoleName : symbolicName ; createRole : CREATE ROLE role=userOrRoleName ; @@ -141,3 +176,4 @@ showReplicas : SHOW REPLICAS ; lockPathQuery : ( LOCK | UNLOCK ) DATA DIRECTORY ; + diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 index 15923dcf2..2f3a4be03 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 @@ -13,8 +13,11 @@ import CypherLexer ; ALTER : A L T E R ; ASYNC : A S Y N C ; AUTH : A U T H ; +BAD : B A D ; CLEAR : C L E A R ; +CSV : C S V ; DATA : D A T A ; +DELIMITER : D E L I M I T E R ; DATABASE : D A T A B A S E ; DENY : D E N Y ; DIRECTORY : D I R E C T O R Y ; @@ -24,10 +27,14 @@ FOR : F O R ; FROM : F R O M ; GRANT : G R A N T ; GRANTS : G R A N T S ; +HEADER : H E A D E R ; IDENTIFIED : I D E N T I F I E D ; +IGNORE : I G N O R E ; +LOAD : L O A D ; LOCK : L O C K ; MAIN : M A I N ; MODE : M O D E ; +NO : N O ; PASSWORD : P A S S W O R D ; PORT : P O R T ; PRIVILEGES : P R I V I L E G E S ; @@ -38,6 +45,7 @@ REPLICATION : R E P L I C A T I O N ; REVOKE : R E V O K E ; ROLE : R O L E ; ROLES : R O L E S ; +QUOTE : Q U O T E ; STATS : S T A T S ; SYNC : S Y N C ; TIMEOUT : T I M E O U T ; diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index e65b60aa0..9f187e48f 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -50,6 +50,8 @@ class PrivilegeExtractor : public QueryVisitor, public HierarchicalTreeVis void Visit(LockPathQuery &lock_path_query) override { AddPrivilege(AuthQuery::Privilege::LOCK_PATH); } + void Visit(LoadCsv &load_csv) override { AddPrivilege(AuthQuery::Privilege::READ_FILE); } + void Visit(ReplicationQuery &replication_query) override { switch (replication_query.action_) { case ReplicationQuery::Action::SET_REPLICATION_ROLE: diff --git a/src/query/frontend/semantic/symbol_generator.cpp b/src/query/frontend/semantic/symbol_generator.cpp index 81455175f..ea6a0f8b9 100644 --- a/src/query/frontend/semantic/symbol_generator.cpp +++ b/src/query/frontend/semantic/symbol_generator.cpp @@ -162,6 +162,16 @@ bool SymbolGenerator::PostVisit(CallProcedure &call_proc) { return true; } +bool SymbolGenerator::PreVisit(LoadCsv &load_csv) { return false; } + +bool SymbolGenerator::PostVisit(LoadCsv &load_csv) { + if (HasSymbol(load_csv.row_var_->name_)) { + throw RedeclareVariableError(load_csv.row_var_->name_); + } + load_csv.row_var_->MapTo(CreateSymbol(load_csv.row_var_->name_, true)); + return true; +} + bool SymbolGenerator::PreVisit(Return &ret) { scope_.in_return = true; VisitReturnBody(ret.body_); diff --git a/src/query/frontend/semantic/symbol_generator.hpp b/src/query/frontend/semantic/symbol_generator.hpp index 4d827d61d..0103c7f90 100644 --- a/src/query/frontend/semantic/symbol_generator.hpp +++ b/src/query/frontend/semantic/symbol_generator.hpp @@ -36,6 +36,8 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool PostVisit(Create &) override; bool PreVisit(CallProcedure &) override; bool PostVisit(CallProcedure &) override; + bool PreVisit(LoadCsv &) override; + bool PostVisit(LoadCsv &) override; bool PreVisit(Return &) override; bool PostVisit(Return &) override; bool PreVisit(With &) override; diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index fcf0ef2c4..1b55b627e 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -20,6 +20,7 @@ #include "query/plan/vertex_count_cache.hpp" #include "query/typed_value.hpp" #include "utils/algorithm.hpp" +#include "utils/csv_parsing.hpp" #include "utils/event_counter.hpp" #include "utils/exceptions.hpp" #include "utils/flag_validation.hpp" @@ -78,7 +79,7 @@ struct ParsedQuery { ParsedQuery ParseQuery(const std::string &query_string, const std::map ¶ms, utils::SkipList *cache, utils::SpinLock *antlr_lock) { // Strip the query for caching purposes. The process of stripping a query - // "normalizes" it by replacing any literals with new parameters . This + // "normalizes" it by replacing any literals with new parameters. This // results in just the *structure* of the query being taken into account for // caching. frontend::StrippedQuery stripped_query{query_string}; @@ -473,6 +474,8 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler * Frame frame(0); SymbolTable symbol_table; EvaluationContext evaluation_context; + // TODO: MemoryResource for EvaluationContext, it should probably be passed as + // the argument to Callback. evaluation_context.timestamp = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) .count(); @@ -713,6 +716,7 @@ std::optional PullPlan::Pull(AnyStream *stream, std::optional< ctx_.profile_execution_time = execution_time_; return ctx_; } + } // namespace /** @@ -1093,7 +1097,7 @@ PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transa RWType::NONE}; } -PreparedQuery PrepareReplicationQuery(ParsedQuery parsed_query, bool in_explicit_transaction, +PreparedQuery PrepareReplicationQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, InterpreterContext *interpreter_context, DbAccessor *dba) { if (in_explicit_transaction) { throw ReplicationModificationInMulticommandTxException(); diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 6b3ad4b47..6ec3859c1 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -24,9 +24,11 @@ #include "query/procedure/mg_procedure_impl.hpp" #include "query/procedure/module.hpp" #include "utils/algorithm.hpp" +#include "utils/csv_parsing.hpp" #include "utils/event_counter.hpp" #include "utils/exceptions.hpp" #include "utils/fnv.hpp" +#include "utils/likely.hpp" #include "utils/logging.hpp" #include "utils/pmr/unordered_map.hpp" #include "utils/pmr/unordered_set.hpp" @@ -1794,8 +1796,7 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { if (!input_cursor_->Pull(frame, context)) return false; // Delete should get the latest information, this way it is also possible - // to - // delete newly added nodes and edges. + // to delete newly added nodes and edges. ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, storage::View::NEW); auto *pull_memory = context.evaluation_context.memory; @@ -3679,4 +3680,134 @@ UniqueCursorPtr CallProcedure::MakeCursor(utils::MemoryResource *mem) const { return MakeUniqueCursorPtr(mem, this, mem); } +LoadCsv::LoadCsv(std::shared_ptr input, Expression *file, bool with_header, bool ignore_bad, + Expression *delimiter, Expression *quote, Symbol row_var) + : input_(input ? input : (std::make_shared())), + file_(file), + with_header_(with_header), + ignore_bad_(ignore_bad), + delimiter_(delimiter), + quote_(quote), + row_var_(row_var) { + MG_ASSERT(file_, "Something went wrong - '{}' member file_ shouldn't be a nullptr", __func__); +} + +bool LoadCsv::Accept(HierarchicalLogicalOperatorVisitor &visitor) { return false; }; + +class LoadCsvCursor; + +std::vector LoadCsv::OutputSymbols(const SymbolTable &sym_table) const { return {row_var_}; }; + +std::vector LoadCsv::ModifiedSymbols(const SymbolTable &sym_table) const { + auto symbols = input_->ModifiedSymbols(sym_table); + symbols.push_back(row_var_); + return symbols; +}; + +namespace { +// copy-pasted from interpreter.cpp +TypedValue EvaluateOptionalExpression(Expression *expression, ExpressionEvaluator *eval) { + return expression ? expression->Accept(*eval) : TypedValue(); +} + +auto ToOptionalString(ExpressionEvaluator *evaluator, Expression *expression) -> std::optional { + const auto evaluated_expr = EvaluateOptionalExpression(expression, evaluator); + if (evaluated_expr.IsString()) { + return utils::pmr::string(evaluated_expr.ValueString(), evaluator->GetMemoryResource()); + } + return std::nullopt; +}; + +TypedValue CsvRowToTypedList(csv::Reader::Row row, utils::MemoryResource *mem) { + auto typed_columns = utils::pmr::vector(mem); + std::transform(begin(row), end(row), std::back_inserter(typed_columns), + [mem = mem](auto &column) { return TypedValue(column, mem); }); + return TypedValue(typed_columns, mem); +} + +TypedValue CsvRowToTypedMap(csv::Reader::Row row, csv::Reader::Header header, utils::MemoryResource *mem) { + // a valid row has the same number of elements as the header + utils::pmr::map m(mem); + for (auto i = 0; i < row.size(); ++i) { + m.emplace(header[i], TypedValue(row[i], mem)); + } + return TypedValue(m, mem); +} + +} // namespace + +class LoadCsvCursor : public Cursor { + const LoadCsv *self_; + const UniqueCursorPtr input_cursor_; + bool input_is_once_; + std::optional reader_{}; + + public: + LoadCsvCursor(const LoadCsv *self, utils::MemoryResource *mem) + : self_(self), input_cursor_(self_->input_->MakeCursor(mem)) { + input_is_once_ = dynamic_cast(self_->input_.get()); + } + + bool Pull(Frame &frame, ExecutionContext &context) override { + SCOPED_PROFILE_OP("LoadCsv"); + + if (MustAbort(context)) throw HintedAbortError(); + + // ToDo(the-joksim): + // - this is an ungodly hack because the pipeline of creating a plan + // doesn't allow evaluating the expressions contained in self_->file_, + // self_->delimiter_, and self_->quote_ earlier (say, in the interpreter.cpp) + // without massacring the code even worse than I did here + if (UNLIKELY(!reader_)) { + reader_ = MakeReader(&context.evaluation_context); + } + + bool input_pulled = input_cursor_->Pull(frame, context); + + // If the input is Once, we have to keep going until we read all the rows, + // regardless of whether the pull on Once returned false. + // If we have e.g. MATCH(n) LOAD CSV ... AS x SET n.name = x.name, then we + // have to read at most cardinality(n) rows (but we can read less and stop + // pulling MATCH). + if (!input_is_once_ && !input_pulled) return false; + + if (auto row = reader_->GetNextRow()) { + if (!reader_->HasHeader()) { + frame[self_->row_var_] = CsvRowToTypedList(std::move(*row), context.evaluation_context.memory); + } else { + frame[self_->row_var_] = + CsvRowToTypedMap(std::move(*row), *reader_->GetHeader(), context.evaluation_context.memory); + } + return true; + } + + return false; + } + + void Reset() override { input_cursor_->Reset(); } + void Shutdown() override { input_cursor_->Shutdown(); } + + private: + csv::Reader MakeReader(EvaluationContext *eval_context) { + Frame frame(0); + SymbolTable symbol_table; + DbAccessor *dba = nullptr; + auto evaluator = ExpressionEvaluator(&frame, symbol_table, *eval_context, dba, storage::View::OLD); + + auto maybe_file = ToOptionalString(&evaluator, self_->file_); + auto maybe_delim = ToOptionalString(&evaluator, self_->delimiter_); + auto maybe_quote = ToOptionalString(&evaluator, self_->quote_); + + // no need to check if maybe_file is std::nullopt, as the parser makes sure + // we can't get a nullptr for the 'file_' member in the LoadCsv clause + return csv::Reader(*maybe_file, + csv::Reader::Config(self_->with_header_, self_->ignore_bad_, maybe_delim, maybe_quote), + eval_context->memory); + } +}; + +UniqueCursorPtr LoadCsv::MakeCursor(utils::MemoryResource *mem) const { + return MakeUniqueCursorPtr(mem, this, mem); +}; + } // namespace query::plan diff --git a/src/query/plan/operator.lcp b/src/query/plan/operator.lcp index 929cf4a97..139890356 100644 --- a/src/query/plan/operator.lcp +++ b/src/query/plan/operator.lcp @@ -117,6 +117,7 @@ class Distinct; class Union; class Cartesian; class CallProcedure; +class LoadCsv; using LogicalOperatorCompositeVisitor = ::utils::CompositeVisitor< Once, CreateNode, CreateExpand, ScanAll, ScanAllByLabel, @@ -125,7 +126,7 @@ using LogicalOperatorCompositeVisitor = ::utils::CompositeVisitor< Expand, ExpandVariable, ConstructNamedPath, Filter, Produce, Delete, SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, EdgeUniquenessFilter, Accumulate, Aggregate, Skip, Limit, OrderBy, Merge, - Optional, Unwind, Distinct, Union, Cartesian, CallProcedure>; + Optional, Unwind, Distinct, Union, Cartesian, CallProcedure, LoadCsv>; using LogicalOperatorLeafVisitor = ::utils::LeafVisitor; @@ -2156,5 +2157,38 @@ at once. Instead, each call of the callback should return a single row of the ta (:serialize (:slk)) (:clone)) +(lcp:define-class load-csv (logical-operator) + ((input "std::shared_ptr" :scope :public + :slk-save #'slk-save-operator-pointer + :slk-load #'slk-load-operator-pointer) + (file "Expression *" :scope :public) + (with_header "bool" :scope :public) + (ignore_bad "bool" :scope :public) + (delimiter "Expression *" :initval "nullptr" :scope :public + :slk-save #'slk-save-ast-pointer + :slk-load (slk-load-ast-pointer "Expression")) + (quote "Expression *" :initval "nullptr" :scope :public + :slk-save #'slk-save-ast-pointer + :slk-load (slk-load-ast-pointer "Expression")) + (row_var "Symbol" :scope :public)) + (:public + #>cpp + LoadCsv() = default; + LoadCsv(std::shared_ptr input, Expression *file, bool with_header, bool ignore_bad, + Expression* delimiter, Expression* quote, Symbol row_var); + bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override; + UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override; + std::vector OutputSymbols(const SymbolTable &) const override; + std::vector ModifiedSymbols(const SymbolTable &) const override; + + bool HasSingleInput() const override { return true; } + std::shared_ptr input() const override { return input_; } + void set_input(std::shared_ptr input) override { + input_ = input; + } + cpp<#) + (:serialize (:slk)) + (:clone)) + (lcp:pop-namespace) ;; plan (lcp:pop-namespace) ;; query diff --git a/src/query/plan/pretty_print.cpp b/src/query/plan/pretty_print.cpp index b440664dc..cd3fe0069 100644 --- a/src/query/plan/pretty_print.cpp +++ b/src/query/plan/pretty_print.cpp @@ -206,6 +206,11 @@ bool PlanPrinter::PreVisit(query::plan::CallProcedure &op) { return true; } +bool PlanPrinter::PreVisit(query::plan::LoadCsv &op) { + WithPrintLn([&op](auto &out) { out << "* LoadCsv {" << op.row_var_.name() << "}"; }); + return true; +} + bool PlanPrinter::Visit(query::plan::Once &op) { WithPrintLn([](auto &out) { out << "* Once"; }); return true; @@ -803,6 +808,23 @@ bool PlanToJsonVisitor::PreVisit(query::plan::CallProcedure &op) { return false; } +bool PlanToJsonVisitor::PreVisit(query::plan::LoadCsv &op) { + json self; + self["name"] = "LoadCsv"; + self["file"] = ToJson(op.file_); + self["with_header"] = op.with_header_; + self["ignore_bad"] = op.ignore_bad_; + self["delimiter"] = ToJson(op.delimiter_); + self["quote"] = ToJson(op.quote_); + self["row_variable"] = ToJson(op.row_var_); + + op.input_->Accept(*this); + self["input"] = PopOutput(); + + output_ = std::move(self); + return false; +} + bool PlanToJsonVisitor::PreVisit(Distinct &op) { json self; self["name"] = "Distinct"; diff --git a/src/query/plan/pretty_print.hpp b/src/query/plan/pretty_print.hpp index 2cb102840..08d5b9c33 100644 --- a/src/query/plan/pretty_print.hpp +++ b/src/query/plan/pretty_print.hpp @@ -81,6 +81,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor { bool PreVisit(Unwind &) override; bool PreVisit(CallProcedure &) override; + bool PreVisit(LoadCsv &) override; bool Visit(Once &) override; @@ -194,6 +195,7 @@ class PlanToJsonVisitor : public virtual HierarchicalLogicalOperatorVisitor { bool PreVisit(Unwind &) override; bool PreVisit(CallProcedure &) override; + bool PreVisit(LoadCsv &) override; bool Visit(Once &) override; diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp index 2c9489afa..ac548aa6e 100644 --- a/src/query/plan/rule_based_planner.hpp +++ b/src/query/plan/rule_based_planner.hpp @@ -203,6 +203,13 @@ class RuleBasedPlanner { input_op = std::make_unique( std::move(input_op), call_proc->procedure_name_, call_proc->arguments_, call_proc->result_fields_, result_symbols, call_proc->memory_limit_, call_proc->memory_scale_); + } else if (auto *load_csv = utils::Downcast(clause)) { + const auto &row_sym = context.symbol_table->at(*load_csv->row_var_); + context.bound_symbols.insert(row_sym); + + input_op = + std::make_unique(std::move(input_op), load_csv->file_, load_csv->with_header_, + load_csv->ignore_bad_, load_csv->delimiter_, load_csv->quote_, row_sym); } else { throw utils::NotYetImplemented("clause '{}' conversion to operator(s)", clause->GetTypeInfo().name); } diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index b87a6d291..653283e11 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -1,5 +1,6 @@ set(utils_src_files event_counter.cpp + csv_parsing.cpp file.cpp file_locker.cpp memory.cpp diff --git a/src/utils/csv_parsing.cpp b/src/utils/csv_parsing.cpp index d2b526c3d..027f74fa4 100644 --- a/src/utils/csv_parsing.cpp +++ b/src/utils/csv_parsing.cpp @@ -19,8 +19,8 @@ void Reader::InitializeStream() { } } -std::optional Reader::GetNextLine() { - std::string line; +std::optional Reader::GetNextLine() { + utils::pmr::string line(memory_); if (!std::getline(csv_stream_, line)) { // reached end of file or an I/0 error occurred if (!csv_stream_.good()) { @@ -32,18 +32,34 @@ std::optional Reader::GetNextLine() { return line; } -std::optional Reader::ParseHeader() { +Reader::ParsingResult Reader::ParseHeader() { // header must be the very first line in the file MG_ASSERT(line_count_ == 1, fmt::format("Invalid use of {}", __func__)); - const auto maybe_line = GetNextLine(); - if (!maybe_line) { + return ParseRow(); +} + +void Reader::TryInitializeHeader() { + if (!HasHeader()) { + return; + } + + auto header = ParseHeader(); + if (header.HasError()) { + throw CsvReadException("CSV reading : {}", header.GetError().message); + } + + if (header->empty()) { throw CsvReadException("CSV file {} empty!", path_); } - Header header; - // set the 'number_of_fields_' once this method is implemented fully - return std::nullopt; + + number_of_columns_ = header->size(); + header_ = *header; } +[[nodiscard]] bool Reader::HasHeader() const { return read_config_.with_header; } + +const std::optional &Reader::GetHeader() const { return header_; } + namespace { enum class CsvParserState : uint8_t { INITIAL_FIELD, @@ -59,8 +75,8 @@ bool SubstringStartsWith(const std::string_view str, size_t pos, const std::stri } // namespace Reader::ParsingResult Reader::ParseRow() { - std::vector row; - std::string column; + utils::pmr::vector row(memory_); + utils::pmr::string column(memory_); auto state = CsvParserState::INITIAL_FIELD; @@ -79,21 +95,21 @@ Reader::ParsingResult Reader::ParseRow() { // Null bytes aren't allowed in CSVs. if (c == '\0') { return ParseError(ParseError::ErrorCode::NULL_BYTE, - fmt::format("CSV: Line {:d} contains NULL byte", line_count_)); + fmt::format("CSV: Line {:d} contains NULL byte", line_count_ - 1)); } switch (state) { case CsvParserState::INITIAL_FIELD: case CsvParserState::NEXT_FIELD: { - if (SubstringStartsWith(*maybe_line, i, read_config_.quote)) { + if (SubstringStartsWith(*maybe_line, i, *read_config_.quote)) { // The current field is a quoted field. state = CsvParserState::QUOTING; - i += read_config_.quote.size() - 1; - } else if (SubstringStartsWith(*maybe_line, i, read_config_.delimiter)) { + i += read_config_.quote->size() - 1; + } else if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) { // The current field has an empty value. row.emplace_back(""); state = CsvParserState::NEXT_FIELD; - i += read_config_.delimiter.size() - 1; + i += read_config_.delimiter->size() - 1; } else { // The current field is a regular field. column.push_back(c); @@ -102,40 +118,40 @@ Reader::ParsingResult Reader::ParseRow() { break; } case CsvParserState::QUOTING: { - auto quote_now = SubstringStartsWith(*maybe_line, i, read_config_.quote); - auto quote_next = SubstringStartsWith(*maybe_line, i + read_config_.quote.size(), read_config_.quote); + auto quote_now = SubstringStartsWith(*maybe_line, i, *read_config_.quote); + auto quote_next = SubstringStartsWith(*maybe_line, i + read_config_.quote->size(), *read_config_.quote); if (quote_now && quote_next) { // This is an escaped quote character. - column += read_config_.quote; - i += read_config_.quote.size() * 2 - 1; + column += *read_config_.quote; + i += read_config_.quote->size() * 2 - 1; } else if (quote_now && !quote_next) { // This is the end of the quoted field. row.emplace_back(std::move(column)); state = CsvParserState::EXPECT_DELIMITER; - i += read_config_.quote.size() - 1; + i += read_config_.quote->size() - 1; } else { column.push_back(c); } break; } case CsvParserState::NOT_QUOTING: { - if (SubstringStartsWith(*maybe_line, i, read_config_.delimiter)) { + if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) { row.emplace_back(std::move(column)); state = CsvParserState::NEXT_FIELD; - i += read_config_.delimiter.size() - 1; + i += read_config_.delimiter->size() - 1; } else { column.push_back(c); } break; } case CsvParserState::EXPECT_DELIMITER: { - if (SubstringStartsWith(*maybe_line, i, read_config_.delimiter)) { + if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) { state = CsvParserState::NEXT_FIELD; - i += read_config_.delimiter.size() - 1; + i += read_config_.delimiter->size() - 1; } else { return ParseError(ParseError::ErrorCode::UNEXPECTED_TOKEN, - fmt::format("CSV Reader: Expected '{}' after '{}', but got '{}'", read_config_.delimiter, - read_config_.quote, c)); + fmt::format("CSV Reader: Expected '{}' after '{}', but got '{}' at line {:d}", + *read_config_.delimiter, *read_config_.quote, c, line_count_ - 1)); } break; } @@ -168,47 +184,42 @@ Reader::ParsingResult Reader::ParseRow() { // reached the end of file - return empty row if (row.empty()) { - return Row(row); + return row; } - // if there's no header, then: - // - if we skip bad rows, then the very first __valid__ row will - // determine the allowed number of columns - // - if we don't skip bad rows, the very first row will determine the allowed - // number of columns in all subsequent rows - if (!read_config_.with_header && number_of_columns_ == 0) { - MG_ASSERT(!row.empty()); - number_of_columns_ = row.size(); + // Has header, but the header has already been read and the number_of_columns_ + // is already set. Otherwise, we would get an error every time we'd try to + // parse the header. + // Also, if we don't have a header, the 'number_of_columns_' will be 0, so no + // need to check the number of columns. + if (UNLIKELY(number_of_columns_ != 0 && row.size() != number_of_columns_)) { + return ParseError(ParseError::ErrorCode::BAD_NUM_OF_COLUMNS, + // ToDo(the-joksim): + // - 'line_count_ - 1' is the last line of a row (as a + // row may span several lines) ==> should have a row + // counter + fmt::format("Expected {:d} columns in row {:d}, but got {:d}", number_of_columns_, + line_count_ - 1, row.size())); } - if (row.size() != number_of_columns_) { - return ParseError( - ParseError::ErrorCode::BAD_NUM_OF_COLUMNS, - // ToDo(the-joksim): - // - 'line_count_ - 1' is the last line of a row (as a - // row may span several lines) ==> should have a row - // counter - fmt::format("Expected {:d} columns in row {:d}, but got {:d}", number_of_columns_, line_count_, row.size())); - } - - return Row(row); + return row; } // Returns Reader::Row if the read row if valid; // Returns std::nullopt if end of file is reached or an error occurred // making it unreadable; -// @throws CsvReadException if a bad row is encountered, and the skip_bad is set +// @throws CsvReadException if a bad row is encountered, and the ignore_bad is set // to 'true' in the Reader::Config. std::optional Reader::GetNextRow() { auto row = ParseRow(); if (row.HasError()) { - if (!read_config_.skip_bad) { - throw CsvReadException("CSV Reader: Bad row at line {:d}: {}", line_count_, row.GetError().message); + if (!read_config_.ignore_bad) { + throw CsvReadException("CSV Reader: Bad row at line {:d}: {}", line_count_ - 1, row.GetError().message); } // try to parse as many times as necessary to reach a valid row do { - spdlog::debug("CSV Reader: Bad row at line {:d}: {}", line_count_, row.GetError().message); + spdlog::debug("CSV Reader: Bad row at line {:d}: {}", line_count_ - 1, row.GetError().message); if (!csv_stream_.good()) { return std::nullopt; } @@ -216,12 +227,11 @@ std::optional Reader::GetNextRow() { } while (row.HasError()); } - auto ret = row.GetValue(); - if (ret.columns.empty()) { + if (row->empty()) { // reached end of file return std::nullopt; } - return ret; + return *row; } } // namespace csv diff --git a/src/utils/csv_parsing.hpp b/src/utils/csv_parsing.hpp index b44e850a0..8727ab3c6 100644 --- a/src/utils/csv_parsing.hpp +++ b/src/utils/csv_parsing.hpp @@ -8,13 +8,15 @@ #pragma once #include +#include #include #include -#include #include #include #include "utils/exceptions.hpp" +#include "utils/pmr/string.hpp" +#include "utils/pmr/vector.hpp" #include "utils/result.hpp" namespace csv { @@ -26,38 +28,38 @@ class CsvReadException : public utils::BasicException { class Reader { public: struct Config { - Config(){}; - Config(std::string delimiter, std::string quote, const bool with_header, const bool skip_bad) - : delimiter(std::move(delimiter)), quote(std::move(quote)), with_header(with_header), skip_bad(skip_bad) {} + Config() = default; + Config(const bool with_header, const bool ignore_bad, std::optional delim, + std::optional qt) + : with_header(with_header), ignore_bad(ignore_bad), delimiter(std::move(delim)), quote(std::move(qt)) {} - std::string delimiter{","}; - std::string quote{"\""}; bool with_header{false}; - bool skip_bad{false}; + bool ignore_bad{false}; + std::optional delimiter{}; + std::optional quote{}; }; - struct Row { - Row() = default; - explicit Row(std::vector cols) : columns(std::move(cols)) {} - std::vector columns; - }; + using Row = utils::pmr::vector; + using Header = utils::pmr::vector; - explicit Reader(const std::filesystem::path &path, const Config cfg = {}) : path_(path), read_config_(cfg) { + Reader() = default; + explicit Reader(std::filesystem::path path, Config cfg, utils::MemoryResource *mem = utils::NewDeleteResource()) + : path_(std::move(path)), memory_(mem) { + read_config_.with_header = cfg.with_header; + read_config_.ignore_bad = cfg.ignore_bad; + read_config_.delimiter = cfg.delimiter ? std::move(*cfg.delimiter) : utils::pmr::string{",", memory_}; + read_config_.quote = cfg.quote ? std::move(*cfg.quote) : utils::pmr::string{"\"", memory_}; InitializeStream(); - if (read_config_.with_header) { - header_ = ParseHeader(); - } + TryInitializeHeader(); } Reader(const Reader &) = delete; Reader &operator=(const Reader &) = delete; - Reader(Reader &&) = delete; - Reader &operator=(Reader &&) = delete; + Reader(Reader &&) = default; + Reader &operator=(Reader &&) = default; - ~Reader() { - if (csv_stream_.is_open()) csv_stream_.close(); - } + ~Reader() = default; struct ParseError { enum class ErrorCode : uint8_t { BAD_HEADER, NO_CLOSING_QUOTE, UNEXPECTED_TOKEN, BAD_NUM_OF_COLUMNS, NULL_BYTE }; @@ -68,6 +70,8 @@ class Reader { }; using ParsingResult = utils::BasicResult; + [[nodiscard]] bool HasHeader() const; + const std::optional
&GetHeader() const; std::optional GetNextRow(); private: @@ -76,20 +80,16 @@ class Reader { Config read_config_; uint64_t line_count_{1}; uint16_t number_of_columns_{0}; - - struct Header { - Header() = default; - explicit Header(std::vector cols) : columns(std::move(cols)) {} - std::vector columns; - }; - std::optional
header_{}; + utils::MemoryResource *memory_; void InitializeStream(); - std::optional GetNextLine(); + void TryInitializeHeader(); - std::optional
ParseHeader(); + std::optional GetNextLine(); + + ParsingResult ParseHeader(); ParsingResult ParseRow(); }; diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index c7bc0942e..89d6815fc 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -226,8 +226,8 @@ target_link_libraries(${test_prefix}utils_file_locker mg-utils fmt) add_unit_test(utils_thread_pool.cpp) target_link_libraries(${test_prefix}utils_thread_pool mg-utils fmt) -add_unit_test(csv_parsing.cpp ${CMAKE_SOURCE_DIR}/src/utils/csv_parsing.cpp) -target_link_libraries(${test_prefix}csv_parsing mg-utils fmt) +add_unit_test(utils_csv_parsing.cpp ${CMAKE_SOURCE_DIR}/src/utils/csv_parsing.cpp) +target_link_libraries(${test_prefix}utils_csv_parsing mg-utils fmt) # Test mg-storage-v2 diff --git a/tests/unit/csv_parsing.cpp b/tests/unit/csv_parsing.cpp deleted file mode 100644 index 60b197564..000000000 --- a/tests/unit/csv_parsing.cpp +++ /dev/null @@ -1,194 +0,0 @@ -#include "utils/csv_parsing.hpp" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -#include "utils/string.hpp" - - -class CsvReaderTest : public ::testing::Test { - protected: - const std::filesystem::path csv_directory{std::filesystem::temp_directory_path() / "csv_testing"}; - - void SetUp() override { Clear(); CreateCsvDir(); } - - void TearDown() override { Clear(); } - - private: - - void CreateCsvDir() { - if (!std::filesystem::exists(csv_directory)) { - std::filesystem::create_directory(csv_directory); - } - } - void Clear() { - if (!std::filesystem::exists(csv_directory)) return; - std::filesystem::remove_all(csv_directory); - } -}; - -namespace { -class FileWriter { - public: - explicit FileWriter(const std::filesystem::path path) { stream_.open(path); } - - FileWriter(const FileWriter &) = delete; - FileWriter &operator=(const FileWriter &) = delete; - - FileWriter(FileWriter &&) = delete; - FileWriter &operator=(FileWriter &&) = delete; - - void Close() { stream_.close(); } - - size_t WriteLine(const std::string_view line) { - if (!stream_.is_open()) { - return 0; - } - - stream_ << line << std::endl; - - // including the newline character - return line.size() + 1; - } - - private: - std::ofstream stream_; -}; - -std::string CreateRow(const std::vector &columns, const std::string_view delim) { - return utils::Join(columns, delim); -} - -} // namespace - -TEST_F(CsvReaderTest, CommaDelimiter) { - // create a file with a valid and an invalid row; - // the invalid row has wrong delimiters; - // expect the parser's output to be a single string for the invalid row; - const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); - - const std::vector columns1{"A", "B", "C"}; - writer.WriteLine(CreateRow(columns1, ",")); - - const std::vector columns2{"D", "E", "F"}; - writer.WriteLine(CreateRow(columns2, ";")); - - writer.Close(); - - // note - default delimiter is "," - auto reader = csv::Reader(filepath); - - auto parsed_row = reader.GetNextRow(); - ASSERT_EQ(parsed_row->columns, columns1); - - EXPECT_THROW(reader.GetNextRow(), csv::CsvReadException); -} - -TEST_F(CsvReaderTest, SemicolonDelimiter) { - const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); - - const std::string delimiter = ";"; - const std::vector columns1{"A", "B", "C"}; - writer.WriteLine(CreateRow(columns1, delimiter)); - - const std::vector columns2{"A", "B", "C"}; - writer.WriteLine(CreateRow(columns2, ",")); - - writer.Close(); - - const csv::Reader::Config cfg(delimiter, "\"", false, false); - auto reader = csv::Reader(filepath, cfg); - - auto parsed_row = reader.GetNextRow(); - ASSERT_EQ(parsed_row->columns, columns1); - - EXPECT_THROW(reader.GetNextRow(), csv::CsvReadException); -} - -TEST_F(CsvReaderTest, SkipBad) { - // create a file with invalid first two rows (containing a string with a - // missing closing quote); - // the last row is valid; - const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); - - const std::string delimiter = ","; - - const std::vector columns_bad{"A", "B", "\"C"}; - writer.WriteLine(CreateRow(columns_bad, delimiter)); - writer.WriteLine(CreateRow(columns_bad, delimiter)); - - const std::vector columns_good{"A", "B", "C"}; - writer.WriteLine(CreateRow(columns_good, delimiter)); - - writer.Close(); - - { - // we set the 'skip_bad' flag in the read configuration to 'true'; - // parser's output should be solely the valid row; - const bool skip_bad = true; - const csv::Reader::Config cfg(delimiter, "\"", false, skip_bad); - auto reader = csv::Reader(filepath, cfg); - - auto parsed_row = reader.GetNextRow(); - ASSERT_EQ(parsed_row->columns, columns_good); - } - - { - // we set the 'skip_bad' flag in the read configuration to 'false'; - // an exception must be thrown; - const bool skip_bad = false; - const csv::Reader::Config cfg(delimiter, "\"", false, skip_bad); - auto reader = csv::Reader(filepath, cfg); - - EXPECT_THROW(reader.GetNextRow(), csv::CsvReadException); - } -} - -TEST_F(CsvReaderTest, AllRowsValid) { - // create a file with all rows valid; - // parser should return 'std::nullopt' - const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); - - const std::string delimiter = ","; - - const std::vector columns{"A", "B", "C"}; - writer.WriteLine(CreateRow(columns, delimiter)); - writer.WriteLine(CreateRow(columns, delimiter)); - writer.WriteLine(CreateRow(columns, delimiter)); - - writer.Close(); - - const bool skip_bad = false; - const csv::Reader::Config cfg(delimiter, "\"", false, skip_bad); - auto reader = csv::Reader(filepath, cfg); - - while (auto parsed_row = reader.GetNextRow()) { - ASSERT_EQ(parsed_row->columns, columns); - } -} - -TEST_F(CsvReaderTest, SkipAllRows) { - // create a file with all rows invalid (containing a string with a missing closing quote); - // parser should return 'std::nullopt' - const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); - - const std::string delimiter = ","; - - const std::vector columns_bad{"A", "B", "\"C"}; - writer.WriteLine(CreateRow(columns_bad, delimiter)); - writer.WriteLine(CreateRow(columns_bad, delimiter)); - writer.WriteLine(CreateRow(columns_bad, delimiter)); - - writer.Close(); - - const bool skip_bad = true; - const csv::Reader::Config cfg(delimiter, "\"", false, skip_bad); - auto reader = csv::Reader(filepath, cfg); - - auto parsed_row = reader.GetNextRow(); - ASSERT_EQ(parsed_row, std::nullopt); -} diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 87ceeded0..46c7847fa 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -2869,4 +2869,70 @@ TEST_P(CypherMainVisitorTest, TestLockPathQuery) { test_lock_path_query("UNLOCK", LockPathQuery::Action::UNLOCK_PATH); } +TEST_P(CypherMainVisitorTest, TestLoadCsvClause) { + auto &ast_generator = *GetParam(); + + { + const std::string query = R"(LOAD CSV FROM "file.csv")"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER DELIMITER ";")"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER DELIMITER ";" QUOTE "'")"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER DELIMITER ";" QUOTE "'" AS)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM file WITH HEADER IGNORE BAD DELIMITER ";" QUOTE "'" AS x)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER IGNORE BAD DELIMITER 0 QUOTE "'" AS x)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SemanticException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER IGNORE BAD DELIMITER ";" QUOTE 0 AS x)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SemanticException); + } + + { + // can't be a standalone clause + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER IGNORE BAD DELIMITER ";" QUOTE "'" AS x)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SemanticException); + } + + { + const std::string query = + R"(LOAD CSV FROM "file.csv" WITH HEADER IGNORE BAD DELIMITER ";" QUOTE "'" AS x RETURN x)"; + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(query)); + ASSERT_TRUE(parsed_query); + auto *load_csv_clause = dynamic_cast(parsed_query->single_query_->clauses_[0]); + ASSERT_TRUE(load_csv_clause); + ASSERT_TRUE(load_csv_clause->with_header_); + ASSERT_TRUE(load_csv_clause->ignore_bad_); + } +} + } // namespace diff --git a/tests/unit/interpreter.cpp b/tests/unit/interpreter.cpp index 3ceb6183a..51444ed4f 100644 --- a/tests/unit/interpreter.cpp +++ b/tests/unit/interpreter.cpp @@ -11,6 +11,7 @@ #include "query/typed_value.hpp" #include "query_common.hpp" #include "storage/v2/property_value.hpp" +#include "utils/csv_parsing.hpp" #include "utils/logging.hpp" namespace { @@ -193,6 +194,11 @@ TEST_F(InterpreterTest, Parameters) { } } +TEST_F(InterpreterTest, LoadCsv) { + // for debug purposes + auto [stream, qid] = Prepare(R"(LOAD CSV FROM "simple.csv" NO HEADER AS row RETURN row)"); +} + // Test bfs end to end. TEST_F(InterpreterTest, Bfs) { srand(0); @@ -776,3 +782,116 @@ TEST_F(InterpreterTest, Qid) { interpreter_.CommitTransaction(); } } + +namespace { +// copied from utils_csv_parsing.cpp - tmp dir management and csv file writer +class TmpCsvDirManager final { + public: + TmpCsvDirManager() { CreateCsvDir(); } + ~TmpCsvDirManager() { Clear(); } + + const std::filesystem::path &Path() const { return tmp_dir_; } + + private: + const std::filesystem::path tmp_dir_{std::filesystem::temp_directory_path() / "csv_directory"}; + + void CreateCsvDir() { + if (!std::filesystem::exists(tmp_dir_)) { + std::filesystem::create_directory(tmp_dir_); + } + } + + void Clear() { + if (!std::filesystem::exists(tmp_dir_)) return; + std::filesystem::remove_all(tmp_dir_); + } +}; + +class FileWriter { + public: + explicit FileWriter(const std::filesystem::path path) { stream_.open(path); } + + FileWriter(const FileWriter &) = delete; + FileWriter &operator=(const FileWriter &) = delete; + + FileWriter(FileWriter &&) = delete; + FileWriter &operator=(FileWriter &&) = delete; + + void Close() { stream_.close(); } + + size_t WriteLine(const std::string_view line) { + if (!stream_.is_open()) { + return 0; + } + + stream_ << line << std::endl; + + // including the newline character + return line.size() + 1; + } + + private: + std::ofstream stream_; +}; + +std::string CreateRow(const std::vector &columns, const std::string_view delim) { + return utils::Join(columns, delim); +} +} // namespace + +TEST_F(InterpreterTest, LoadCsvClause) { + auto dir_manager = TmpCsvDirManager(); + const auto csv_path = dir_manager.Path() / "file.csv"; + auto writer = FileWriter(csv_path); + + const std::string delimiter{"|"}; + + const std::vector header{"A", "B", "C"}; + writer.WriteLine(CreateRow(header, delimiter)); + + const std::vector good_columns_1{"a", "b", "c"}; + writer.WriteLine(CreateRow(good_columns_1, delimiter)); + + const std::vector bad_columns{"\"\"1", "2", "3"}; + writer.WriteLine(CreateRow(bad_columns, delimiter)); + + const std::vector good_columns_2{"d", "e", "f"}; + writer.WriteLine(CreateRow(good_columns_2, delimiter)); + + writer.Close(); + + { + const std::string query = fmt::format(R"(LOAD CSV FROM "{}" WITH HEADER IGNORE BAD DELIMITER "{}" AS x RETURN x.A)", + csv_path.string(), delimiter); + auto [stream, qid] = Prepare(query); + ASSERT_EQ(stream.GetHeader().size(), 1U); + EXPECT_EQ(stream.GetHeader()[0], "x.A"); + + Pull(&stream, 1); + ASSERT_EQ(stream.GetSummary().count("has_more"), 1); + ASSERT_TRUE(stream.GetSummary().at("has_more").ValueBool()); + ASSERT_EQ(stream.GetResults().size(), 1U); + ASSERT_EQ(stream.GetResults()[0][0].ValueString(), "a"); + + Pull(&stream, 1); + ASSERT_EQ(stream.GetSummary().count("has_more"), 1); + ASSERT_FALSE(stream.GetSummary().at("has_more").ValueBool()); + ASSERT_EQ(stream.GetResults().size(), 2U); + ASSERT_EQ(stream.GetResults()[1][0].ValueString(), "d"); + } + + { + const std::string query = fmt::format(R"(LOAD CSV FROM "{}" WITH HEADER IGNORE BAD DELIMITER "{}" AS x RETURN x.C)", + csv_path.string(), delimiter); + auto [stream, qid] = Prepare(query); + ASSERT_EQ(stream.GetHeader().size(), 1U); + EXPECT_EQ(stream.GetHeader()[0], "x.C"); + + Pull(&stream); + ASSERT_EQ(stream.GetSummary().count("has_more"), 1); + ASSERT_FALSE(stream.GetSummary().at("has_more").ValueBool()); + ASSERT_EQ(stream.GetResults().size(), 2U); + ASSERT_EQ(stream.GetResults()[0][0].ValueString(), "c"); + ASSERT_EQ(stream.GetResults()[1][0].ValueString(), "f"); + } +} diff --git a/tests/unit/utils_csv_parsing.cpp b/tests/unit/utils_csv_parsing.cpp new file mode 100644 index 000000000..f405dfe24 --- /dev/null +++ b/tests/unit/utils_csv_parsing.cpp @@ -0,0 +1,250 @@ +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "utils/csv_parsing.hpp" + +#include "utils/string.hpp" + +class CsvReaderTest : public ::testing::Test { + protected: + const std::filesystem::path csv_directory{std::filesystem::temp_directory_path() / "csv_testing"}; + + void SetUp() override { + Clear(); + CreateCsvDir(); + } + + void TearDown() override { Clear(); } + + private: + void CreateCsvDir() { + if (!std::filesystem::exists(csv_directory)) { + std::filesystem::create_directory(csv_directory); + } + } + void Clear() { + if (!std::filesystem::exists(csv_directory)) return; + std::filesystem::remove_all(csv_directory); + } +}; + +namespace { +class FileWriter { + public: + explicit FileWriter(const std::filesystem::path path) { stream_.open(path); } + + FileWriter(const FileWriter &) = delete; + FileWriter &operator=(const FileWriter &) = delete; + + FileWriter(FileWriter &&) = delete; + FileWriter &operator=(FileWriter &&) = delete; + + void Close() { stream_.close(); } + + size_t WriteLine(const std::string_view line) { + if (!stream_.is_open()) { + return 0; + } + + stream_ << line << std::endl; + + // including the newline character + return line.size() + 1; + } + + private: + std::ofstream stream_; +}; + +std::string CreateRow(const std::vector &columns, const std::string_view delim) { + return utils::Join(columns, delim); +} + +auto ToPmrColumns(const std::vector &columns) { + utils::pmr::vector pmr_columns(utils::NewDeleteResource()); + for (const auto &col : columns) { + pmr_columns.emplace_back(col); + } + return pmr_columns; +} + +} // namespace + +TEST_F(CsvReaderTest, CommaDelimiter) { + // create a file with a single valid row; + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + const std::vector columns{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns, ",")); + + writer.Close(); + + utils::MemoryResource *mem{utils::NewDeleteResource()}; + + bool with_header = false; + bool ignore_bad = false; + utils::pmr::string delimiter{",", mem}; + utils::pmr::string quote{"\"", mem}; + + csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg, mem); + + auto parsed_row = reader.GetNextRow(); + ASSERT_EQ(parsed_row, ToPmrColumns(columns)); +} + +TEST_F(CsvReaderTest, SemicolonDelimiter) { + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{";", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector columns{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns, delimiter)); + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = false; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg, mem); + + auto parsed_row = reader.GetNextRow(); + ASSERT_EQ(parsed_row, ToPmrColumns(columns)); +} + +TEST_F(CsvReaderTest, SkipBad) { + // create a file with invalid first two rows (containing a string with a + // missing closing quote); + // the last row is valid; + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{";", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector columns_bad{"A", "B", "\"\"C"}; + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + + const std::vector columns_good{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns_good, delimiter)); + + writer.Close(); + + { + // we set the 'ignore_bad' flag in the read configuration to 'true'; + // parser's output should be solely the valid row; + const bool with_header = false; + const bool ignore_bad = true; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg, mem); + + auto parsed_row = reader.GetNextRow(); + ASSERT_EQ(parsed_row, ToPmrColumns(columns_good)); + } + + { + // we set the 'ignore_bad' flag in the read configuration to 'false'; + // an exception must be thrown; + const bool with_header = false; + const bool ignore_bad = false; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg, mem); + + EXPECT_THROW(reader.GetNextRow(), csv::CsvReadException); + } +} + +TEST_F(CsvReaderTest, AllRowsValid) { + // create a file with all rows valid; + // parser should return 'std::nullopt' + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + std::vector columns{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = false; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg); + + const auto pmr_columns = ToPmrColumns(columns); + while (auto parsed_row = reader.GetNextRow()) { + ASSERT_EQ(parsed_row, pmr_columns); + } +} + +TEST_F(CsvReaderTest, SkipAllRows) { + // create a file with all rows invalid (containing a string with a missing closing quote); + // parser should return 'std::nullopt' + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector columns_bad{"A", "B", "\"\"C"}; + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = true; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg); + + auto parsed_row = reader.GetNextRow(); + ASSERT_EQ(parsed_row, std::nullopt); +} + +TEST_F(CsvReaderTest, WithHeader) { + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector header{"A", "B", "C"}; + const std::vector columns{"1", "2", "3"}; + writer.WriteLine(CreateRow(header, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + + writer.Close(); + + const bool with_header = true; + const bool ignore_bad = false; + const csv::Reader::Config cfg(with_header, ignore_bad, delimiter, quote); + auto reader = csv::Reader(filepath, cfg); + + const auto pmr_header = ToPmrColumns(header); + ASSERT_EQ(reader.GetHeader(), pmr_header); + + const auto pmr_columns = ToPmrColumns(columns); + while (auto parsed_row = reader.GetNextRow()) { + ASSERT_EQ(parsed_row, pmr_columns); + } +} From d913a67e160112676ce38724ac852ebc3da7992c Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Fri, 19 Mar 2021 14:40:09 +0100 Subject: [PATCH 04/63] Add parsing improvements (#108) * Add parsing improvements --- src/query/plan/operator.cpp | 15 ++++--- src/utils/csv_parsing.cpp | 86 +++++++++++++++++-------------------- src/utils/string.hpp | 2 +- 3 files changed, 50 insertions(+), 53 deletions(-) diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 6ec3859c1..2ec02787f 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -3720,8 +3720,10 @@ auto ToOptionalString(ExpressionEvaluator *evaluator, Expression *expression) -> TypedValue CsvRowToTypedList(csv::Reader::Row row, utils::MemoryResource *mem) { auto typed_columns = utils::pmr::vector(mem); - std::transform(begin(row), end(row), std::back_inserter(typed_columns), - [mem = mem](auto &column) { return TypedValue(column, mem); }); + typed_columns.reserve(row.size()); + for (auto &column : row) { + typed_columns.emplace_back(std::move(column)); + } return TypedValue(typed_columns, mem); } @@ -3729,7 +3731,7 @@ TypedValue CsvRowToTypedMap(csv::Reader::Row row, csv::Reader::Header header, ut // a valid row has the same number of elements as the header utils::pmr::map m(mem); for (auto i = 0; i < row.size(); ++i) { - m.emplace(header[i], TypedValue(row[i], mem)); + m.emplace(std::move(header[i]), std::move(row[i])); } return TypedValue(m, mem); } @@ -3800,9 +3802,10 @@ class LoadCsvCursor : public Cursor { // no need to check if maybe_file is std::nullopt, as the parser makes sure // we can't get a nullptr for the 'file_' member in the LoadCsv clause - return csv::Reader(*maybe_file, - csv::Reader::Config(self_->with_header_, self_->ignore_bad_, maybe_delim, maybe_quote), - eval_context->memory); + return csv::Reader( + *maybe_file, + csv::Reader::Config(self_->with_header_, self_->ignore_bad_, std::move(maybe_delim), std::move(maybe_quote)), + eval_context->memory); } }; diff --git a/src/utils/csv_parsing.cpp b/src/utils/csv_parsing.cpp index 027f74fa4..61d539b47 100644 --- a/src/utils/csv_parsing.cpp +++ b/src/utils/csv_parsing.cpp @@ -65,17 +65,17 @@ enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, - NOT_QUOTING, EXPECT_DELIMITER, }; -bool SubstringStartsWith(const std::string_view str, size_t pos, const std::string_view what) { - return utils::StartsWith(utils::Substr(str, pos), what); -} -} // namespace +} // namespace Reader::ParsingResult Reader::ParseRow() { utils::pmr::vector row(memory_); + if (number_of_columns_ != 0) { + row.reserve(number_of_columns_); + } + utils::pmr::string column(memory_); auto state = CsvParserState::INITIAL_FIELD; @@ -87,11 +87,16 @@ Reader::ParsingResult Reader::ParseRow() { break; } - for (size_t i = 0; i < maybe_line->size(); ++i) { - const auto c = (*maybe_line)[i]; + std::string_view line_string_view = *maybe_line; + + while (!line_string_view.empty()) { + const auto c = line_string_view[0]; // Line feeds and carriage returns are ignored in CSVs. - if (c == '\n' || c == '\r') continue; + if (c == '\n' || c == '\r') { + line_string_view.remove_prefix(1); + continue; + } // Null bytes aren't allowed in CSVs. if (c == '\0') { return ParseError(ParseError::ErrorCode::NULL_BYTE, @@ -101,53 +106,52 @@ Reader::ParsingResult Reader::ParseRow() { switch (state) { case CsvParserState::INITIAL_FIELD: case CsvParserState::NEXT_FIELD: { - if (SubstringStartsWith(*maybe_line, i, *read_config_.quote)) { + if (utils::StartsWith(line_string_view, *read_config_.quote)) { // The current field is a quoted field. state = CsvParserState::QUOTING; - i += read_config_.quote->size() - 1; - } else if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) { + line_string_view.remove_prefix(read_config_.quote->size()); + } else if (utils::StartsWith(line_string_view, *read_config_.delimiter)) { // The current field has an empty value. row.emplace_back(""); state = CsvParserState::NEXT_FIELD; - i += read_config_.delimiter->size() - 1; + line_string_view.remove_prefix(read_config_.delimiter->size()); } else { // The current field is a regular field. - column.push_back(c); - state = CsvParserState::NOT_QUOTING; + const auto delimiter_idx = line_string_view.find(*read_config_.delimiter); + row.emplace_back(line_string_view.substr(0, delimiter_idx)); + if (delimiter_idx == std::string_view::npos) { + line_string_view.remove_prefix(line_string_view.size()); + } else { + line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size()); + } + state = CsvParserState::NEXT_FIELD; } break; } case CsvParserState::QUOTING: { - auto quote_now = SubstringStartsWith(*maybe_line, i, *read_config_.quote); - auto quote_next = SubstringStartsWith(*maybe_line, i + read_config_.quote->size(), *read_config_.quote); + const auto quote_now = utils::StartsWith(line_string_view, *read_config_.quote); + const auto quote_next = + utils::StartsWith(line_string_view.substr(read_config_.quote->size()), *read_config_.quote); if (quote_now && quote_next) { // This is an escaped quote character. column += *read_config_.quote; - i += read_config_.quote->size() * 2 - 1; - } else if (quote_now && !quote_next) { + line_string_view.remove_prefix(read_config_.quote->size() * 2); + } else if (quote_now) { // This is the end of the quoted field. row.emplace_back(std::move(column)); + column.clear(); state = CsvParserState::EXPECT_DELIMITER; - i += read_config_.quote->size() - 1; - } else { - column.push_back(c); - } - break; - } - case CsvParserState::NOT_QUOTING: { - if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) { - row.emplace_back(std::move(column)); - state = CsvParserState::NEXT_FIELD; - i += read_config_.delimiter->size() - 1; + line_string_view.remove_prefix(read_config_.quote->size()); } else { column.push_back(c); + line_string_view.remove_prefix(1); } break; } case CsvParserState::EXPECT_DELIMITER: { - if (SubstringStartsWith(*maybe_line, i, *read_config_.delimiter)) { + if (utils::StartsWith(line_string_view, *read_config_.delimiter)) { state = CsvParserState::NEXT_FIELD; - i += read_config_.delimiter->size() - 1; + line_string_view.remove_prefix(read_config_.delimiter->size()); } else { return ParseError(ParseError::ErrorCode::UNEXPECTED_TOKEN, fmt::format("CSV Reader: Expected '{}' after '{}', but got '{}' at line {:d}", @@ -160,26 +164,16 @@ Reader::ParsingResult Reader::ParseRow() { } while (state == CsvParserState::QUOTING); switch (state) { - case CsvParserState::INITIAL_FIELD: { + case CsvParserState::INITIAL_FIELD: + case CsvParserState::NEXT_FIELD: + case CsvParserState::EXPECT_DELIMITER: break; - } - case CsvParserState::NEXT_FIELD: { - row.emplace_back(std::move(column)); - break; - } case CsvParserState::QUOTING: { return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE, "There is no more data left to load while inside a quoted string. " "Did you forget to close the quote?"); break; } - case CsvParserState::NOT_QUOTING: { - row.emplace_back(std::move(column)); - break; - } - case CsvParserState::EXPECT_DELIMITER: { - break; - } } // reached the end of file - return empty row @@ -202,7 +196,7 @@ Reader::ParsingResult Reader::ParseRow() { line_count_ - 1, row.size())); } - return row; + return std::move(row); } // Returns Reader::Row if the read row if valid; @@ -231,7 +225,7 @@ std::optional Reader::GetNextRow() { // reached end of file return std::nullopt; } - return *row; + return std::move(*row); } } // namespace csv diff --git a/src/utils/string.hpp b/src/utils/string.hpp index 18f399387..7edafe475 100644 --- a/src/utils/string.hpp +++ b/src/utils/string.hpp @@ -437,7 +437,7 @@ inline std::string Escape(const std::string_view &src) { inline std::string_view Substr(const std::string_view &string, size_t pos = 0, size_t count = std::string::npos) { if (pos >= string.size()) return std::string_view(string.data(), 0); auto len = std::min(string.size() - pos, count); - return std::string_view(string.data() + pos, len); + return string.substr(pos, len); } } // namespace utils From b3914c6b5d5d1c834867a02e0c99faaa96945cbb Mon Sep 17 00:00:00 2001 From: Josip Seljan <62958579+the-joksim@users.noreply.github.com> Date: Mon, 22 Mar 2021 19:23:37 +0100 Subject: [PATCH 05/63] Update CHANGELOG.md (#117) * Update CHANGELOG.md Co-authored-by: jseljan --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a85d71fa3..9e1a0ec95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ * Add support for multiple query modules directories at the same time. You can now define multiple, comma-separated paths to directories from which the modules will be loaded using the `--query-modules-directory` flag. +* Added support for programatically reading in data from CSV files through the + `LOAD CSV` clause. We support CSV files with and without a header, the + supported dialect being Excel. ### Bug Fixes From 25eb2c147a4cd728184a56e920e64cdd0cbf47dd Mon Sep 17 00:00:00 2001 From: Josip Seljan <62958579+the-joksim@users.noreply.github.com> Date: Wed, 24 Mar 2021 11:02:55 +0100 Subject: [PATCH 06/63] LOAD CSV implementation fixes (#120) * Change how csv::Reader handles memory resources * Add multiline quoted string test --- src/query/plan/operator.cpp | 25 ++++++++------ src/utils/csv_parsing.cpp | 22 ++++++------ src/utils/csv_parsing.hpp | 14 ++++---- tests/unit/utils_csv_parsing.cpp | 59 +++++++++++++++++++++++++------- 4 files changed, 80 insertions(+), 40 deletions(-) diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 2ec02787f..4de436c88 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -3713,12 +3713,13 @@ TypedValue EvaluateOptionalExpression(Expression *expression, ExpressionEvaluato auto ToOptionalString(ExpressionEvaluator *evaluator, Expression *expression) -> std::optional { const auto evaluated_expr = EvaluateOptionalExpression(expression, evaluator); if (evaluated_expr.IsString()) { - return utils::pmr::string(evaluated_expr.ValueString(), evaluator->GetMemoryResource()); + return utils::pmr::string(evaluated_expr.ValueString(), utils::NewDeleteResource()); } return std::nullopt; }; -TypedValue CsvRowToTypedList(csv::Reader::Row row, utils::MemoryResource *mem) { +TypedValue CsvRowToTypedList(csv::Reader::Row row) { + auto *mem = row.get_allocator().GetMemoryResource(); auto typed_columns = utils::pmr::vector(mem); typed_columns.reserve(row.size()); for (auto &column : row) { @@ -3727,8 +3728,9 @@ TypedValue CsvRowToTypedList(csv::Reader::Row row, utils::MemoryResource *mem) { return TypedValue(typed_columns, mem); } -TypedValue CsvRowToTypedMap(csv::Reader::Row row, csv::Reader::Header header, utils::MemoryResource *mem) { +TypedValue CsvRowToTypedMap(csv::Reader::Row row, csv::Reader::Header header) { // a valid row has the same number of elements as the header + auto *mem = row.get_allocator().GetMemoryResource(); utils::pmr::map m(mem); for (auto i = 0; i < row.size(); ++i) { m.emplace(std::move(header[i]), std::move(row[i])); @@ -3773,12 +3775,12 @@ class LoadCsvCursor : public Cursor { // pulling MATCH). if (!input_is_once_ && !input_pulled) return false; - if (auto row = reader_->GetNextRow()) { + if (auto row = reader_->GetNextRow(context.evaluation_context.memory)) { if (!reader_->HasHeader()) { - frame[self_->row_var_] = CsvRowToTypedList(std::move(*row), context.evaluation_context.memory); + frame[self_->row_var_] = CsvRowToTypedList(std::move(*row)); } else { - frame[self_->row_var_] = - CsvRowToTypedMap(std::move(*row), *reader_->GetHeader(), context.evaluation_context.memory); + frame[self_->row_var_] = CsvRowToTypedMap( + std::move(*row), csv::Reader::Header(reader_->GetHeader(), context.evaluation_context.memory)); } return true; } @@ -3800,12 +3802,15 @@ class LoadCsvCursor : public Cursor { auto maybe_delim = ToOptionalString(&evaluator, self_->delimiter_); auto maybe_quote = ToOptionalString(&evaluator, self_->quote_); - // no need to check if maybe_file is std::nullopt, as the parser makes sure - // we can't get a nullptr for the 'file_' member in the LoadCsv clause + // No need to check if maybe_file is std::nullopt, as the parser makes sure + // we can't get a nullptr for the 'file_' member in the LoadCsv clause. + // Note that the reader has to be given its own memory resource, as it + // persists between pulls, so it can't use the evalutation context memory + // resource. return csv::Reader( *maybe_file, csv::Reader::Config(self_->with_header_, self_->ignore_bad_, std::move(maybe_delim), std::move(maybe_quote)), - eval_context->memory); + utils::NewDeleteResource()); } }; diff --git a/src/utils/csv_parsing.cpp b/src/utils/csv_parsing.cpp index 61d539b47..d3843b795 100644 --- a/src/utils/csv_parsing.cpp +++ b/src/utils/csv_parsing.cpp @@ -19,8 +19,8 @@ void Reader::InitializeStream() { } } -std::optional Reader::GetNextLine() { - utils::pmr::string line(memory_); +std::optional Reader::GetNextLine(utils::MemoryResource *mem) { + utils::pmr::string line(mem); if (!std::getline(csv_stream_, line)) { // reached end of file or an I/0 error occurred if (!csv_stream_.good()) { @@ -35,7 +35,7 @@ std::optional Reader::GetNextLine() { Reader::ParsingResult Reader::ParseHeader() { // header must be the very first line in the file MG_ASSERT(line_count_ == 1, fmt::format("Invalid use of {}", __func__)); - return ParseRow(); + return ParseRow(memory_); } void Reader::TryInitializeHeader() { @@ -53,12 +53,12 @@ void Reader::TryInitializeHeader() { } number_of_columns_ = header->size(); - header_ = *header; + header_ = std::move(*header); } [[nodiscard]] bool Reader::HasHeader() const { return read_config_.with_header; } -const std::optional &Reader::GetHeader() const { return header_; } +const Reader::Header &Reader::GetHeader() const { return header_; } namespace { enum class CsvParserState : uint8_t { @@ -70,8 +70,8 @@ enum class CsvParserState : uint8_t { } // namespace -Reader::ParsingResult Reader::ParseRow() { - utils::pmr::vector row(memory_); +Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) { + utils::pmr::vector row(mem); if (number_of_columns_ != 0) { row.reserve(number_of_columns_); } @@ -81,7 +81,7 @@ Reader::ParsingResult Reader::ParseRow() { auto state = CsvParserState::INITIAL_FIELD; do { - const auto maybe_line = GetNextLine(); + const auto maybe_line = GetNextLine(mem); if (!maybe_line) { // The whole file was processed. break; @@ -204,8 +204,8 @@ Reader::ParsingResult Reader::ParseRow() { // making it unreadable; // @throws CsvReadException if a bad row is encountered, and the ignore_bad is set // to 'true' in the Reader::Config. -std::optional Reader::GetNextRow() { - auto row = ParseRow(); +std::optional Reader::GetNextRow(utils::MemoryResource *mem) { + auto row = ParseRow(mem); if (row.HasError()) { if (!read_config_.ignore_bad) { @@ -217,7 +217,7 @@ std::optional Reader::GetNextRow() { if (!csv_stream_.good()) { return std::nullopt; } - row = ParseRow(); + row = ParseRow(mem); } while (row.HasError()); } diff --git a/src/utils/csv_parsing.hpp b/src/utils/csv_parsing.hpp index 8727ab3c6..efe4bf469 100644 --- a/src/utils/csv_parsing.hpp +++ b/src/utils/csv_parsing.hpp @@ -44,7 +44,7 @@ class Reader { Reader() = default; explicit Reader(std::filesystem::path path, Config cfg, utils::MemoryResource *mem = utils::NewDeleteResource()) - : path_(std::move(path)), memory_(mem) { + : memory_(mem), path_(std::move(path)) { read_config_.with_header = cfg.with_header; read_config_.ignore_bad = cfg.ignore_bad; read_config_.delimiter = cfg.delimiter ? std::move(*cfg.delimiter) : utils::pmr::string{",", memory_}; @@ -71,27 +71,27 @@ class Reader { using ParsingResult = utils::BasicResult; [[nodiscard]] bool HasHeader() const; - const std::optional
&GetHeader() const; - std::optional GetNextRow(); + const Header &GetHeader() const; + std::optional GetNextRow(utils::MemoryResource *mem); private: + utils::MemoryResource *memory_; std::filesystem::path path_; std::ifstream csv_stream_; Config read_config_; uint64_t line_count_{1}; uint16_t number_of_columns_{0}; - std::optional
header_{}; - utils::MemoryResource *memory_; + Header header_{memory_}; void InitializeStream(); void TryInitializeHeader(); - std::optional GetNextLine(); + std::optional GetNextLine(utils::MemoryResource *mem); ParsingResult ParseHeader(); - ParsingResult ParseRow(); + ParsingResult ParseRow(utils::MemoryResource *mem); }; } // namespace csv diff --git a/tests/unit/utils_csv_parsing.cpp b/tests/unit/utils_csv_parsing.cpp index f405dfe24..0305e3b05 100644 --- a/tests/unit/utils_csv_parsing.cpp +++ b/tests/unit/utils_csv_parsing.cpp @@ -89,8 +89,8 @@ TEST_F(CsvReaderTest, CommaDelimiter) { csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; auto reader = csv::Reader(filepath, cfg, mem); - auto parsed_row = reader.GetNextRow(); - ASSERT_EQ(parsed_row, ToPmrColumns(columns)); + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(columns)); } TEST_F(CsvReaderTest, SemicolonDelimiter) { @@ -112,8 +112,8 @@ TEST_F(CsvReaderTest, SemicolonDelimiter) { const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; auto reader = csv::Reader(filepath, cfg, mem); - auto parsed_row = reader.GetNextRow(); - ASSERT_EQ(parsed_row, ToPmrColumns(columns)); + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(columns)); } TEST_F(CsvReaderTest, SkipBad) { @@ -145,8 +145,8 @@ TEST_F(CsvReaderTest, SkipBad) { const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; auto reader = csv::Reader(filepath, cfg, mem); - auto parsed_row = reader.GetNextRow(); - ASSERT_EQ(parsed_row, ToPmrColumns(columns_good)); + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(columns_good)); } { @@ -157,7 +157,7 @@ TEST_F(CsvReaderTest, SkipBad) { const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; auto reader = csv::Reader(filepath, cfg, mem); - EXPECT_THROW(reader.GetNextRow(), csv::CsvReadException); + EXPECT_THROW(reader.GetNextRow(mem), csv::CsvReadException); } } @@ -185,8 +185,8 @@ TEST_F(CsvReaderTest, AllRowsValid) { auto reader = csv::Reader(filepath, cfg); const auto pmr_columns = ToPmrColumns(columns); - while (auto parsed_row = reader.GetNextRow()) { - ASSERT_EQ(parsed_row, pmr_columns); + while (auto parsed_row = reader.GetNextRow(mem)) { + ASSERT_EQ(*parsed_row, pmr_columns); } } @@ -213,7 +213,7 @@ TEST_F(CsvReaderTest, SkipAllRows) { const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; auto reader = csv::Reader(filepath, cfg); - auto parsed_row = reader.GetNextRow(); + auto parsed_row = reader.GetNextRow(mem); ASSERT_EQ(parsed_row, std::nullopt); } @@ -244,7 +244,42 @@ TEST_F(CsvReaderTest, WithHeader) { ASSERT_EQ(reader.GetHeader(), pmr_header); const auto pmr_columns = ToPmrColumns(columns); - while (auto parsed_row = reader.GetNextRow()) { - ASSERT_EQ(parsed_row, pmr_columns); + while (auto parsed_row = reader.GetNextRow(mem)) { + ASSERT_EQ(*parsed_row, pmr_columns); } } + +TEST_F(CsvReaderTest, MultilineQuotedString) { + // create a file with first row valid and the second row containing a quoted + // string spanning two lines; + // parser should return two valid rows + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector first_row{"A", "B", "C"}; + const std::vector multiline_first{"D", "\"E", "\"\"F"}; + const std::vector multiline_second{"G\"", "H"}; + + writer.WriteLine(CreateRow(first_row, delimiter)); + writer.WriteLine(CreateRow(multiline_first, delimiter)); + writer.WriteLine(CreateRow(multiline_second, delimiter)); + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = true; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg); + + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(first_row)); + + const std::vector expected_multiline{"D", "E,\"FG", "H"}; + parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline)); +} From e8810a415211cc6660f6ac04ea3d01274e555174 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Mon, 15 Feb 2021 09:52:40 +0100 Subject: [PATCH 07/63] Add and use jemalloc lib (#90) * Add and use jemalloc lib * Add autoconf * Silence macro redefinition warning --- environment/os/centos-7.sh | 1 + environment/os/centos-8.sh | 1 + environment/os/debian-10.sh | 1 + environment/os/debian-9.sh | 1 + environment/os/ubuntu-18.04.sh | 1 + environment/os/ubuntu-20.04.sh | 1 + libs/.gitignore | 1 + libs/CMakeLists.txt | 4 +++ libs/jemalloc.cmake | 52 ++++++++++++++++++++++++++++++ libs/setup.sh | 19 +++++++++++ src/CMakeLists.txt | 2 +- src/utils/CMakeLists.txt | 3 ++ src/utils/new_delete.cpp | 58 ++++++++++++++++++++++++++++++++++ tests/unit/CMakeLists.txt | 2 +- 14 files changed, 145 insertions(+), 2 deletions(-) create mode 100644 libs/jemalloc.cmake create mode 100644 src/utils/new_delete.cpp diff --git a/environment/os/centos-7.sh b/environment/os/centos-7.sh index 6f8c514b5..3b381c97d 100755 --- a/environment/os/centos-7.sh +++ b/environment/os/centos-7.sh @@ -46,6 +46,7 @@ MEMGRAPH_BUILD_DEPS=( rpm-build rpmlint # for RPM package building doxygen graphviz # source documentation generators which mono-complete dotnet-sdk-3.1 golang nodejs zip unzip java-11-openjdk-devel # for driver tests + autoconf # for jemalloc code generation ) list() { echo "$1" diff --git a/environment/os/centos-8.sh b/environment/os/centos-8.sh index 0312926fa..aa61f8afe 100755 --- a/environment/os/centos-8.sh +++ b/environment/os/centos-8.sh @@ -45,6 +45,7 @@ MEMGRAPH_BUILD_DEPS=( doxygen graphviz # source documentation generators which mono-complete dotnet-sdk-3.1 nodejs golang zip unzip java-11-openjdk-devel # for driver tests sbcl # for custom Lisp C++ preprocessing + autoconf # for jemalloc code generation ) list() { echo "$1" diff --git a/environment/os/debian-10.sh b/environment/os/debian-10.sh index 4c3b5ec2b..87cde6588 100755 --- a/environment/os/debian-10.sh +++ b/environment/os/debian-10.sh @@ -43,6 +43,7 @@ MEMGRAPH_BUILD_DEPS=( doxygen graphviz # source documentation generators mono-runtime mono-mcs zip unzip default-jdk-headless # for driver tests dotnet-sdk-3.1 golang nodejs npm + autoconf # for jemalloc code generation ) list() { echo "$1" diff --git a/environment/os/debian-9.sh b/environment/os/debian-9.sh index 1ceb83231..956f84cd2 100755 --- a/environment/os/debian-9.sh +++ b/environment/os/debian-9.sh @@ -41,6 +41,7 @@ MEMGRAPH_BUILD_DEPS=( sbcl # for custom Lisp C++ preprocessing doxygen graphviz # source documentation generators mono-runtime mono-mcs nodejs zip unzip default-jdk-headless # for driver tests + autoconf # for jemalloc code generation ) list() { echo "$1" diff --git a/environment/os/ubuntu-18.04.sh b/environment/os/ubuntu-18.04.sh index b8f393a40..397fe7a65 100755 --- a/environment/os/ubuntu-18.04.sh +++ b/environment/os/ubuntu-18.04.sh @@ -42,6 +42,7 @@ MEMGRAPH_BUILD_DEPS=( sbcl # custom Lisp C++ preprocessing doxygen graphviz # source documentation generators mono-runtime mono-mcs nodejs zip unzip default-jdk-headless # driver tests + autoconf # for jemalloc code generation ) list() { echo "$1" diff --git a/environment/os/ubuntu-20.04.sh b/environment/os/ubuntu-20.04.sh index e4a650697..508db0893 100755 --- a/environment/os/ubuntu-20.04.sh +++ b/environment/os/ubuntu-20.04.sh @@ -43,6 +43,7 @@ MEMGRAPH_BUILD_DEPS=( doxygen graphviz # source documentation generators mono-runtime mono-mcs zip unzip default-jdk-headless # for driver tests dotnet-sdk-3.1 golang nodejs npm + autoconf # for jemalloc code generation ) list() { echo "$1" diff --git a/libs/.gitignore b/libs/.gitignore index 51e7aac4c..08bd2de9f 100644 --- a/libs/.gitignore +++ b/libs/.gitignore @@ -4,3 +4,4 @@ !cleanup.sh !CMakeLists.txt !__main.cpp +!jemalloc.cmake diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 2dd4c1bf9..0d165815d 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -8,6 +8,8 @@ if (NPROC EQUAL 0) set(NPROC 1) endif() +set(LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + # convenience functions function(import_header_library name include_dir) add_library(${name} INTERFACE IMPORTED GLOBAL) @@ -212,3 +214,5 @@ import_external_library(spdlog STATIC ${CMAKE_CURRENT_SOURCE_DIR}/spdlog/${CMAKE_INSTALL_LIBDIR}/libspdlog.a ${CMAKE_CURRENT_SOURCE_DIR}/spdlog/include BUILD_COMMAND $(MAKE) spdlog) + +include(jemalloc.cmake) diff --git a/libs/jemalloc.cmake b/libs/jemalloc.cmake new file mode 100644 index 000000000..4625f98bc --- /dev/null +++ b/libs/jemalloc.cmake @@ -0,0 +1,52 @@ +set(JEMALLOC_DIR "${LIB_DIR}/jemalloc") + +set(JEMALLOC_SRCS + ${JEMALLOC_DIR}/src/arena.c + ${JEMALLOC_DIR}/src/background_thread.c + ${JEMALLOC_DIR}/src/base.c + ${JEMALLOC_DIR}/src/bin.c + ${JEMALLOC_DIR}/src/bitmap.c + ${JEMALLOC_DIR}/src/ckh.c + ${JEMALLOC_DIR}/src/ctl.c + ${JEMALLOC_DIR}/src/div.c + ${JEMALLOC_DIR}/src/extent.c + ${JEMALLOC_DIR}/src/extent_dss.c + ${JEMALLOC_DIR}/src/extent_mmap.c + ${JEMALLOC_DIR}/src/hash.c + ${JEMALLOC_DIR}/src/hook.c + ${JEMALLOC_DIR}/src/jemalloc.c + ${JEMALLOC_DIR}/src/large.c + ${JEMALLOC_DIR}/src/log.c + ${JEMALLOC_DIR}/src/malloc_io.c + ${JEMALLOC_DIR}/src/mutex.c + ${JEMALLOC_DIR}/src/mutex_pool.c + ${JEMALLOC_DIR}/src/nstime.c + ${JEMALLOC_DIR}/src/pages.c + ${JEMALLOC_DIR}/src/prng.c + ${JEMALLOC_DIR}/src/prof.c + ${JEMALLOC_DIR}/src/rtree.c + ${JEMALLOC_DIR}/src/sc.c + ${JEMALLOC_DIR}/src/stats.c + ${JEMALLOC_DIR}/src/sz.c + ${JEMALLOC_DIR}/src/tcache.c + ${JEMALLOC_DIR}/src/test_hooks.c + ${JEMALLOC_DIR}/src/ticker.c + ${JEMALLOC_DIR}/src/tsd.c + ${JEMALLOC_DIR}/src/witness.c + ${JEMALLOC_DIR}/src/safety_check.c +) + +add_library(jemalloc ${JEMALLOC_SRCS}) +target_include_directories(jemalloc PUBLIC "${JEMALLOC_DIR}/include") + +target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) + +if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") + target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_DEBUG=1 -DJEMALLOC_PROF=1) +endif() + +target_compile_options(jemalloc PRIVATE -Wno-redundant-decls) +# for RTLD_NEXT +target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE) + +set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1) diff --git a/libs/setup.sh b/libs/setup.sh index 9c7df826e..d00c13e2e 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -121,3 +121,22 @@ clone https://github.com/memgraph/pymgclient.git pymgclient $pymgclient_tag spdlog_tag="46d418164dd4cd9822cf8ca62a116a3f71569241" # (2020-12-01) clone https://github.com/gabime/spdlog spdlog $spdlog_tag + +jemalloc_tag="ea6b3e973b477b8061e0076bb257dbd7f3faa756" # (2021-02-11) +clone https://github.com/jemalloc/jemalloc.git jemalloc $jemalloc_tag +pushd jemalloc +# ThreadPool select job randomly, and there can be some threads that had been +# performed some memory heavy task before and will be inactive for some time, +# but until it will became active again, the memory will not be freed since by +# default each thread has it's own arena, but there should be not more then +# 4*CPU arenas (see opt.nareans description). +# +# By enabling percpu_arena number of arenas limited to number of CPUs and hence +# this problem should go away. +# +# muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to +# avoid spurious latencies and additional work associated with +# MADV_DONTNEED. See +# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. +./autogen.sh --with-malloc-conf="percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000" +popd diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 143e128ec..2dbba0529 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -38,7 +38,7 @@ if (MG_ENTERPRISE) endif() set(MG_SINGLE_NODE_V2_LIBS stdc++fs Threads::Threads - telemetry_lib mg-query mg-communication) + telemetry_lib mg-query mg-communication mg-new-delete) if (MG_ENTERPRISE) # These are enterprise subsystems set(MG_SINGLE_NODE_V2_LIBS ${MG_SINGLE_NODE_V2_LIBS} mg-auth mg-audit) diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 653283e11..58b1ec821 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -11,3 +11,6 @@ set(utils_src_files add_library(mg-utils STATIC ${utils_src_files}) target_link_libraries(mg-utils stdc++fs Threads::Threads spdlog fmt gflags uuid) + +add_library(mg-new-delete STATIC new_delete.cpp) +target_link_libraries(mg-new-delete jemalloc) diff --git a/src/utils/new_delete.cpp b/src/utils/new_delete.cpp new file mode 100644 index 000000000..78ad1d53d --- /dev/null +++ b/src/utils/new_delete.cpp @@ -0,0 +1,58 @@ +#include +#include + +#if USE_JEMALLOC +#include +#else +#include +#endif + +#include "utils/likely.hpp" + +namespace { +void *newImpl(std::size_t size) { + auto *ptr = malloc(size); + if (LIKELY(ptr != nullptr)) { + return ptr; + } + + throw std::bad_alloc{}; +} + +void *newNoExcept(const std::size_t size) noexcept { return malloc(size); } + +void deleteImpl(void *ptr) noexcept { free(ptr); } + +#if USE_JEMALLOC + +void deleteSized(void *ptr, const std::size_t size) noexcept { + if (UNLIKELY(ptr == nullptr)) { + return; + } + + sdallocx(ptr, size, 0); +} + +#else + +void deleteSized(void *ptr, const std::size_t /*unused*/) noexcept { free(ptr); } + +#endif + +} // namespace + +void *operator new(std::size_t size) { return newImpl(size); } + +void *operator new[](std::size_t size) { return newImpl(size); } + +void *operator new(std::size_t size, const std::nothrow_t & /*unused*/) noexcept { return newNoExcept(size); } + +void *operator new[](std::size_t size, const std::nothrow_t & /*unused*/) noexcept { return newNoExcept(size); } + +void operator delete(void *ptr) noexcept { deleteImpl(ptr); } + +void operator delete[](void *ptr) noexcept { deleteImpl(ptr); } + +void operator delete(void *ptr, std::size_t size) noexcept { deleteSized(ptr, size); } + +void operator delete[](void *ptr, std::size_t size) noexcept { deleteSized(ptr, size); } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 89d6815fc..eedfaecec 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -32,7 +32,7 @@ function(_add_unit_test test_cpp custom_main) # used to help create two targets of the same name even though CMake # requires unique logical target names set_target_properties(${target_name} PROPERTIES OUTPUT_NAME ${exec_name}) - target_link_libraries(${target_name} mg-utils gtest gmock Threads::Threads) + target_link_libraries(${target_name} mg-utils mg-new-delete gtest gmock Threads::Threads dl) # register test if(TEST_COVERAGE) add_test(${target_name} env LLVM_PROFILE_FILE=${exec_name}.profraw ./${exec_name}) From bbed7a23975df0457f41e3fd6fb6d87b7d318f13 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Mon, 22 Feb 2021 20:51:46 +0100 Subject: [PATCH 08/63] Add memory tracker definition (#93) * Allow size 0 in MemoryTracker * Block only exception throwing * Subtract unsuccessfully allocated size in memory tracker * Add oom exception enablers and blockers --- src/memgraph.cpp | 3 + src/utils/CMakeLists.txt | 3 +- src/utils/memory_tracker.cpp | 103 ++++++++++++++++++++++++++++ src/utils/memory_tracker.hpp | 85 +++++++++++++++++++++++ src/utils/new_delete.cpp | 85 ++++++++++++++++++++--- tests/unit/CMakeLists.txt | 3 + tests/unit/utils_memory_tracker.cpp | 54 +++++++++++++++ 7 files changed, 326 insertions(+), 10 deletions(-) create mode 100644 src/utils/memory_tracker.cpp create mode 100644 src/utils/memory_tracker.hpp create mode 100644 tests/unit/utils_memory_tracker.cpp diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 4f2bbcb61..29037a0c0 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -35,6 +35,7 @@ #include "utils/file.hpp" #include "utils/flag_validation.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" #include "utils/signals.hpp" #include "utils/string.hpp" #include "utils/sysinfo/memory.hpp" @@ -1024,5 +1025,7 @@ int main(int argc, char **argv) { // Shutdown Python Py_Finalize(); PyMem_RawFree(program_name); + + utils::total_memory_tracker.LogPeakMemoryUsage(); return 0; } diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 58b1ec821..f93e4ab2f 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -4,6 +4,7 @@ set(utils_src_files file.cpp file_locker.cpp memory.cpp + memory_tracker.cpp signals.cpp thread.cpp thread_pool.cpp @@ -13,4 +14,4 @@ add_library(mg-utils STATIC ${utils_src_files}) target_link_libraries(mg-utils stdc++fs Threads::Threads spdlog fmt gflags uuid) add_library(mg-new-delete STATIC new_delete.cpp) -target_link_libraries(mg-new-delete jemalloc) +target_link_libraries(mg-new-delete jemalloc fmt) diff --git a/src/utils/memory_tracker.cpp b/src/utils/memory_tracker.cpp new file mode 100644 index 000000000..624b3ca3e --- /dev/null +++ b/src/utils/memory_tracker.cpp @@ -0,0 +1,103 @@ +#include "utils/memory_tracker.hpp" + +#include +#include +#include + +#include "utils/likely.hpp" +#include "utils/logging.hpp" +#include "utils/on_scope_exit.hpp" + +namespace utils { + +namespace { + +// Prevent memory tracker for throwing during the stack unwinding +bool MemoryTrackerCanThrow() { + return !std::uncaught_exceptions() && MemoryTracker::OutOfMemoryExceptionEnabler::CanThrow() && + !MemoryTracker::OutOfMemoryExceptionBlocker::IsBlocked(); +} + +std::string GetReadableSize(double size) { + // TODO (antonio2368): Add support for base 1000 (KB, GB, TB...) + constexpr std::array units = {"B", "KiB", "MiB", "GiB", "TiB"}; + constexpr double delimiter = 1024; + + size_t i = 0; + for (; i + 1 < units.size() && size >= delimiter; ++i) { + size /= delimiter; + } + + // bytes don't need decimals + if (i == 0) { + return fmt::format("{:.0f}{}", size, units[i]); + } + + return fmt::format("{:.2f}{}", size, units[i]); +} + +} // namespace + +thread_local uint64_t MemoryTracker::OutOfMemoryExceptionEnabler::counter_ = 0; +MemoryTracker::OutOfMemoryExceptionEnabler::OutOfMemoryExceptionEnabler() { ++counter_; } +MemoryTracker::OutOfMemoryExceptionEnabler::~OutOfMemoryExceptionEnabler() { --counter_; } +bool MemoryTracker::OutOfMemoryExceptionEnabler::CanThrow() { return counter_ > 0; } + +thread_local uint64_t MemoryTracker::OutOfMemoryExceptionBlocker::counter_ = 0; +MemoryTracker::OutOfMemoryExceptionBlocker::OutOfMemoryExceptionBlocker() { ++counter_; } +MemoryTracker::OutOfMemoryExceptionBlocker::~OutOfMemoryExceptionBlocker() { --counter_; } +bool MemoryTracker::OutOfMemoryExceptionBlocker::IsBlocked() { return counter_ > 0; } + +MemoryTracker total_memory_tracker; + +// TODO (antonio2368): Define how should the peak memory be logged. +// Logging every time the peak changes is too much so some kind of distribution +// should be used. +void MemoryTracker::LogPeakMemoryUsage() const { spdlog::info("Peak memory usage: {}", GetReadableSize(peak_)); } + +// TODO (antonio2368): Define how should the memory be logged. +// Logging on each allocation is too much so some kind of distribution +// should be used. +void MemoryTracker::LogMemoryUsage(const int64_t current) { + spdlog::info("Current memory usage: {}", GetReadableSize(current)); +} + +void MemoryTracker::UpdatePeak(const int64_t will_be) { + auto peak_old = peak_.load(std::memory_order_relaxed); + if (will_be > peak_old) { + peak_.store(will_be, std::memory_order_relaxed); + } +} + +void MemoryTracker::SetHardLimit(const int64_t limit) { hard_limit_.store(limit, std::memory_order_relaxed); } + +void MemoryTracker::SetOrRaiseHardLimit(const int64_t limit) { + int64_t old_limit = hard_limit_.load(std::memory_order_relaxed); + while (old_limit < limit && !hard_limit_.compare_exchange_weak(old_limit, limit)) + ; +} + +void MemoryTracker::Alloc(const int64_t size) { + MG_ASSERT(size >= 0, "Negative size passed to the MemoryTracker."); + + const int64_t will_be = size + amount_.fetch_add(size, std::memory_order_relaxed); + + const auto current_hard_limit = hard_limit_.load(std::memory_order_relaxed); + + if (UNLIKELY(current_hard_limit && will_be > current_hard_limit && MemoryTrackerCanThrow())) { + MemoryTracker::OutOfMemoryExceptionBlocker exception_blocker; + + amount_.fetch_sub(size, std::memory_order_relaxed); + + throw OutOfMemoryException( + fmt::format("Memory limit exceeded! Atempting to allocate a chunk of {} which would put the current " + "use to {}, while the maximum allowed size for allocation is set to {}.", + GetReadableSize(size), GetReadableSize(will_be), GetReadableSize(current_hard_limit))); + } + + UpdatePeak(will_be); +} + +void MemoryTracker::Free(const int64_t size) { amount_.fetch_sub(size, std::memory_order_relaxed); } + +} // namespace utils diff --git a/src/utils/memory_tracker.hpp b/src/utils/memory_tracker.hpp new file mode 100644 index 000000000..14cb4f307 --- /dev/null +++ b/src/utils/memory_tracker.hpp @@ -0,0 +1,85 @@ +#pragma once + +#include + +#include "utils/exceptions.hpp" + +namespace utils { + +class OutOfMemoryException : public utils::BasicException { + public: + explicit OutOfMemoryException(const std::string &msg) : utils::BasicException(msg) {} +}; + +class MemoryTracker final { + private: + std::atomic amount_{0}; + std::atomic peak_{0}; + std::atomic hard_limit_{0}; + + void UpdatePeak(int64_t will_be); + + static void LogMemoryUsage(int64_t current); + + public: + void LogPeakMemoryUsage() const; + + MemoryTracker() = default; + ~MemoryTracker() = default; + + MemoryTracker(const MemoryTracker &) = delete; + MemoryTracker &operator=(const MemoryTracker &) = delete; + MemoryTracker(MemoryTracker &&) = delete; + MemoryTracker &operator=(MemoryTracker &&) = delete; + + void Alloc(int64_t size); + void Free(int64_t size); + + auto Amount() const { return amount_.load(std::memory_order_relaxed); } + + auto Peak() const { return peak_.load(std::memory_order_relaxed); } + + void SetHardLimit(int64_t limit); + void SetOrRaiseHardLimit(int64_t limit); + + // By creating an object of this class, every allocation in its scope that goes over + // the set hard limit produces an OutOfMemoryException. + class OutOfMemoryExceptionEnabler final { + public: + OutOfMemoryExceptionEnabler(const OutOfMemoryExceptionEnabler &) = delete; + OutOfMemoryExceptionEnabler &operator=(const OutOfMemoryExceptionEnabler &) = delete; + OutOfMemoryExceptionEnabler(OutOfMemoryExceptionEnabler &&) = delete; + OutOfMemoryExceptionEnabler &operator=(OutOfMemoryExceptionEnabler &&) = delete; + + OutOfMemoryExceptionEnabler(); + ~OutOfMemoryExceptionEnabler(); + + static bool CanThrow(); + + private: + static thread_local uint64_t counter_; + }; + + // By creating an object of this class, we negate the effect of every OutOfMemoryExceptionEnabler + // object. We need this object so we can guard only the smaller parts of code from exceptions while + // allowing the exception in the other parts if the OutOfMemoryExceptionEnabler is defined. + class OutOfMemoryExceptionBlocker final { + public: + OutOfMemoryExceptionBlocker(const OutOfMemoryExceptionBlocker &) = delete; + OutOfMemoryExceptionBlocker &operator=(const OutOfMemoryExceptionBlocker &) = delete; + OutOfMemoryExceptionBlocker(OutOfMemoryExceptionBlocker &&) = delete; + OutOfMemoryExceptionBlocker &operator=(OutOfMemoryExceptionBlocker &&) = delete; + + OutOfMemoryExceptionBlocker(); + ~OutOfMemoryExceptionBlocker(); + + static bool IsBlocked(); + + private: + static thread_local uint64_t counter_; + }; +}; + +// Global memory tracker which tracks every allocation in the application. +extern MemoryTracker total_memory_tracker; +} // namespace utils diff --git a/src/utils/new_delete.cpp b/src/utils/new_delete.cpp index 78ad1d53d..a82a6a524 100644 --- a/src/utils/new_delete.cpp +++ b/src/utils/new_delete.cpp @@ -1,4 +1,3 @@ -#include #include #if USE_JEMALLOC @@ -8,6 +7,7 @@ #endif #include "utils/likely.hpp" +#include "utils/memory_tracker.hpp" namespace { void *newImpl(std::size_t size) { @@ -39,20 +39,87 @@ void deleteSized(void *ptr, const std::size_t /*unused*/) noexcept { free(ptr); #endif +void TrackMemory(const size_t size) { + size_t actual_size = size; + +#if USE_JEMALLOC + if (LIKELY(size != 0)) { + actual_size = nallocx(size, 0); + } +#endif + utils::total_memory_tracker.Alloc(actual_size); +} + +bool TrackMemoryNoExcept(const size_t size) { + try { + TrackMemory(size); + } catch (...) { + return false; + } + + return true; +} + +void UntrackMemory([[maybe_unused]] void *ptr, [[maybe_unused]] size_t size = 0) noexcept { + try { +#if USE_JEMALLOC + if (LIKELY(ptr != nullptr)) { + utils::total_memory_tracker.Free(sallocx(ptr, 0)); + } +#else + if (size) { + utils::total_memory_tracker.Free(size); + } else { + // Innaccurate because malloc_usable_size() result is greater or equal to allocated size. + utils::total_memory_tracker.Free(malloc_usable_size(ptr)); + } +#endif + } catch (...) { + } +} + } // namespace -void *operator new(std::size_t size) { return newImpl(size); } +void *operator new(std::size_t size) { + TrackMemory(size); + return newImpl(size); +} -void *operator new[](std::size_t size) { return newImpl(size); } +void *operator new[](std::size_t size) { + TrackMemory(size); + return newImpl(size); +} -void *operator new(std::size_t size, const std::nothrow_t & /*unused*/) noexcept { return newNoExcept(size); } +void *operator new(std::size_t size, const std::nothrow_t & /*unused*/) noexcept { + if (LIKELY(TrackMemoryNoExcept(size))) { + return newNoExcept(size); + } + return nullptr; +} -void *operator new[](std::size_t size, const std::nothrow_t & /*unused*/) noexcept { return newNoExcept(size); } +void *operator new[](std::size_t size, const std::nothrow_t & /*unused*/) noexcept { + if (LIKELY(TrackMemoryNoExcept(size))) { + return newNoExcept(size); + } + return nullptr; +} -void operator delete(void *ptr) noexcept { deleteImpl(ptr); } +void operator delete(void *ptr) noexcept { + UntrackMemory(ptr); + deleteImpl(ptr); +} -void operator delete[](void *ptr) noexcept { deleteImpl(ptr); } +void operator delete[](void *ptr) noexcept { + UntrackMemory(ptr); + deleteImpl(ptr); +} -void operator delete(void *ptr, std::size_t size) noexcept { deleteSized(ptr, size); } +void operator delete(void *ptr, std::size_t size) noexcept { + UntrackMemory(ptr, size); + deleteSized(ptr, size); +} -void operator delete[](void *ptr, std::size_t size) noexcept { deleteSized(ptr, size); } +void operator delete[](void *ptr, std::size_t size) noexcept { + UntrackMemory(ptr, size); + deleteSized(ptr, size); +} diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index eedfaecec..b1f4a147e 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -193,6 +193,9 @@ target_link_libraries(${test_prefix}utils_math mg-utils) add_unit_test(utils_memory.cpp) target_link_libraries(${test_prefix}utils_memory mg-utils) +add_unit_test(utils_memory_tracker.cpp) +target_link_libraries(${test_prefix}utils_memory_tracker mg-utils) + add_unit_test(utils_on_scope_exit.cpp) target_link_libraries(${test_prefix}utils_on_scope_exit mg-utils) diff --git a/tests/unit/utils_memory_tracker.cpp b/tests/unit/utils_memory_tracker.cpp new file mode 100644 index 000000000..42012d5b4 --- /dev/null +++ b/tests/unit/utils_memory_tracker.cpp @@ -0,0 +1,54 @@ +#include + +#include + +#include + +TEST(MemoryTrackerTest, ExceptionEnabler) { + utils::MemoryTracker memory_tracker; + + constexpr size_t hard_limit = 10; + memory_tracker.SetHardLimit(hard_limit); + + std::atomic can_continue{false}; + std::atomic enabler_created{false}; + std::thread t1{[&] { + // wait until the second thread creates exception enabler + while (!enabler_created) + ; + ASSERT_NO_THROW(memory_tracker.Alloc(hard_limit + 1)); + ASSERT_EQ(memory_tracker.Amount(), hard_limit + 1); + + // tell the second thread it can finish its test + can_continue = true; + }}; + + std::thread t2{[&] { + utils::MemoryTracker::OutOfMemoryExceptionEnabler exception_enabler; + enabler_created = true; + ASSERT_THROW(memory_tracker.Alloc(hard_limit + 1), utils::OutOfMemoryException); + + // hold the enabler until the first thread finishes + while (!can_continue) + ; + }}; + + t1.join(); + t2.join(); +} + +TEST(MemoryTrackerTest, ExceptionBlocker) { + utils::MemoryTracker memory_tracker; + + constexpr size_t hard_limit = 10; + memory_tracker.SetHardLimit(hard_limit); + + utils::MemoryTracker::OutOfMemoryExceptionEnabler exception_enabler; + { + utils::MemoryTracker::OutOfMemoryExceptionBlocker exception_blocker; + + ASSERT_NO_THROW(memory_tracker.Alloc(hard_limit + 1)); + ASSERT_EQ(memory_tracker.Amount(), hard_limit + 1); + } + ASSERT_THROW(memory_tracker.Alloc(hard_limit + 1), utils::OutOfMemoryException); +} From dee885d69ccda0743bc29ea0d9adc2feb4d8f0c8 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Thu, 25 Feb 2021 16:10:42 +0100 Subject: [PATCH 09/63] Add exception enablers and blockers (#99) * Throw OOMException while creating vertices and edges * Throw on indices creation * Throw on setting a property * Throw oom exception while recovering * Throw exception when query engine asks for extra memory * Block out of memor exception during skip list GC --- src/query/interpreter.cpp | 8 ++++- src/storage/v2/durability/durability.cpp | 2 ++ src/storage/v2/edge_accessor.cpp | 2 ++ src/storage/v2/indices.cpp | 43 ++++++++++++++++-------- src/storage/v2/storage.cpp | 7 ++++ src/storage/v2/vertex_accessor.cpp | 3 ++ src/utils/memory.hpp | 22 ++++++++++++ src/utils/skip_list.hpp | 4 +++ 8 files changed, 76 insertions(+), 15 deletions(-) diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 1b55b627e..c1b8ee7c5 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -26,6 +26,7 @@ #include "utils/flag_validation.hpp" #include "utils/logging.hpp" #include "utils/memory.hpp" +#include "utils/memory_tracker.hpp" #include "utils/string.hpp" #include "utils/tsc.hpp" @@ -659,7 +660,12 @@ std::optional PullPlan::Pull(AnyStream *stream, std::optional< // Returns true if a result was pulled. const auto pull_result = [&]() -> bool { - utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size); + // We can throw on every query because a simple queries for deleting will use only + // the stack allocated buffer. + // Also, we want to throw only when the query engine requests more memory and not the storage + // so we add the exception to the allocator. + utils::ResourceWithOutOfMemoryException resource_with_exception; + utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); // TODO (mferencevic): Tune the parameters accordingly. utils::PoolResource pool_memory(128, 1024, &monotonic_memory); ctx_.evaluation_context.memory = &pool_memory; diff --git a/src/storage/v2/durability/durability.cpp b/src/storage/v2/durability/durability.cpp index d26728f3f..4fddeaffd 100644 --- a/src/storage/v2/durability/durability.cpp +++ b/src/storage/v2/durability/durability.cpp @@ -17,6 +17,7 @@ #include "storage/v2/durability/snapshot.hpp" #include "storage/v2/durability/wal.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" namespace storage::durability { @@ -135,6 +136,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di std::atomic *edge_count, NameIdMapper *name_id_mapper, Indices *indices, Constraints *constraints, Config::Items items, uint64_t *wal_seq_num) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; if (!utils::DirExists(snapshot_directory) && !utils::DirExists(wal_directory)) return std::nullopt; auto snapshot_files = GetSnapshotFiles(snapshot_directory); diff --git a/src/storage/v2/edge_accessor.cpp b/src/storage/v2/edge_accessor.cpp index 7c2fff5ec..2dd29b759 100644 --- a/src/storage/v2/edge_accessor.cpp +++ b/src/storage/v2/edge_accessor.cpp @@ -4,6 +4,7 @@ #include "storage/v2/mvcc.hpp" #include "storage/v2/vertex_accessor.hpp" +#include "utils/memory_tracker.hpp" namespace storage { @@ -16,6 +17,7 @@ VertexAccessor EdgeAccessor::ToVertex() const { } Result EdgeAccessor::SetProperty(PropertyId property, const PropertyValue &value) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; if (!config_.properties_on_edges) return Error::PROPERTIES_DISABLED; std::lock_guard guard(edge_.ptr->lock); diff --git a/src/storage/v2/indices.cpp b/src/storage/v2/indices.cpp index 057690726..d873e8ecf 100644 --- a/src/storage/v2/indices.cpp +++ b/src/storage/v2/indices.cpp @@ -2,6 +2,7 @@ #include "storage/v2/mvcc.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" namespace storage { @@ -256,17 +257,24 @@ void LabelIndex::UpdateOnAddLabel(LabelId label, Vertex *vertex, const Transacti } bool LabelIndex::CreateIndex(LabelId label, utils::SkipList::Accessor vertices) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; auto [it, emplaced] = index_.emplace(std::piecewise_construct, std::forward_as_tuple(label), std::forward_as_tuple()); if (!emplaced) { // Index already exists. return false; } - auto acc = it->second.access(); - for (Vertex &vertex : vertices) { - if (vertex.deleted || !utils::Contains(vertex.labels, label)) { - continue; + try { + auto acc = it->second.access(); + for (Vertex &vertex : vertices) { + if (vertex.deleted || !utils::Contains(vertex.labels, label)) { + continue; + } + acc.insert(Entry{&vertex, 0}); } - acc.insert(Entry{&vertex, 0}); + } catch (const utils::OutOfMemoryException &) { + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_exception_blocker; + index_.erase(it); + throw; } return true; } @@ -389,22 +397,29 @@ void LabelPropertyIndex::UpdateOnSetProperty(PropertyId property, const Property } bool LabelPropertyIndex::CreateIndex(LabelId label, PropertyId property, utils::SkipList::Accessor vertices) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; auto [it, emplaced] = index_.emplace(std::piecewise_construct, std::forward_as_tuple(label, property), std::forward_as_tuple()); if (!emplaced) { // Index already exists. return false; } - auto acc = it->second.access(); - for (Vertex &vertex : vertices) { - if (vertex.deleted || !utils::Contains(vertex.labels, label)) { - continue; + try { + auto acc = it->second.access(); + for (Vertex &vertex : vertices) { + if (vertex.deleted || !utils::Contains(vertex.labels, label)) { + continue; + } + auto value = vertex.properties.GetProperty(property); + if (value.IsNull()) { + continue; + } + acc.insert(Entry{std::move(value), &vertex, 0}); } - auto value = vertex.properties.GetProperty(property); - if (value.IsNull()) { - continue; - } - acc.insert(Entry{std::move(value), &vertex, 0}); + } catch (const utils::OutOfMemoryException &) { + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_exception_blocker; + index_.erase(it); + throw; } return true; } diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index ba7dc5d8d..f0ebe9fe9 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -19,6 +19,7 @@ #include "storage/v2/transaction.hpp" #include "utils/file.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" #include "utils/rw_lock.hpp" #include "utils/spin_lock.hpp" #include "utils/stat.hpp" @@ -31,6 +32,8 @@ namespace storage { +using OOMExceptionEnabler = utils::MemoryTracker::OutOfMemoryExceptionEnabler; + namespace { [[maybe_unused]] constexpr uint16_t kEpochHistoryRetention = 1000; } // namespace @@ -415,6 +418,7 @@ Storage::Accessor::~Accessor() { } VertexAccessor Storage::Accessor::CreateVertex() { + OOMExceptionEnabler oom_exception; auto gid = storage_->vertex_id_.fetch_add(1, std::memory_order_acq_rel); auto acc = storage_->vertices_.access(); auto delta = CreateDeleteObjectDelta(&transaction_); @@ -426,6 +430,7 @@ VertexAccessor Storage::Accessor::CreateVertex() { } VertexAccessor Storage::Accessor::CreateVertex(storage::Gid gid) { + OOMExceptionEnabler oom_exception; // NOTE: When we update the next `vertex_id_` here we perform a RMW // (read-modify-write) operation that ISN'T atomic! But, that isn't an issue // because this function is only called from the replication delta applier @@ -528,6 +533,7 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { } Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type) { + OOMExceptionEnabler oom_exception; MG_ASSERT(from->transaction_ == to->transaction_, "VertexAccessors must be from the same transaction when creating " "an edge!"); @@ -587,6 +593,7 @@ Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexA Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type, storage::Gid gid) { + OOMExceptionEnabler oom_exception; MG_ASSERT(from->transaction_ == to->transaction_, "VertexAccessors must be from the same transaction when creating " "an edge!"); diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index d06d0fa94..234f61375 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -7,6 +7,7 @@ #include "storage/v2/indices.hpp" #include "storage/v2/mvcc.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" namespace storage { @@ -44,6 +45,7 @@ std::optional VertexAccessor::Create(Vertex *vertex, Transaction } Result VertexAccessor::AddLabel(LabelId label) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; std::lock_guard guard(vertex_->lock); if (!PrepareForWrite(transaction_, vertex_)) return Error::SERIALIZATION_ERROR; @@ -176,6 +178,7 @@ Result> VertexAccessor::Labels(View view) const { } Result VertexAccessor::SetProperty(PropertyId property, const PropertyValue &value) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; std::lock_guard guard(vertex_->lock); if (!PrepareForWrite(transaction_, vertex_)) return Error::SERIALIZATION_ERROR; diff --git a/src/utils/memory.hpp b/src/utils/memory.hpp index 0d5607dde..1f061d257 100644 --- a/src/utils/memory.hpp +++ b/src/utils/memory.hpp @@ -23,6 +23,7 @@ #include "utils/logging.hpp" #include "utils/math.hpp" +#include "utils/memory_tracker.hpp" #include "utils/spin_lock.hpp" namespace utils { @@ -552,4 +553,25 @@ class LimitedMemoryResource final : public utils::MemoryResource { bool DoIsEqual(const MemoryResource &other) const noexcept override { return this == &other; } }; +// Allocate memory with the OutOfMemoryException enabled if the requested size +// puts total allocated amount over the limit. +class ResourceWithOutOfMemoryException : public MemoryResource { + public: + explicit ResourceWithOutOfMemoryException(utils::MemoryResource *upstream = utils::NewDeleteResource()) + : upstream_{upstream} {} + + utils::MemoryResource *GetUpstream() noexcept { return upstream_; } + + private: + void *DoAllocate(size_t bytes, size_t alignment) override { + utils::MemoryTracker::OutOfMemoryExceptionEnabler exception_enabler; + return upstream_->Allocate(bytes, alignment); + } + + void DoDeallocate(void *p, size_t bytes, size_t alignment) override { upstream_->Deallocate(p, bytes, alignment); } + + bool DoIsEqual(const utils::MemoryResource &other) const noexcept override { return upstream_->IsEqual(other); } + + MemoryResource *upstream_{utils::NewDeleteResource()}; +}; } // namespace utils diff --git a/src/utils/skip_list.hpp b/src/utils/skip_list.hpp index d770ca768..fd7687504 100644 --- a/src/utils/skip_list.hpp +++ b/src/utils/skip_list.hpp @@ -264,6 +264,10 @@ class SkipListGc final { } void Run() { + // This method can be called after any skip list method, including the add method + // which could have OOMException enabled in its thread so to ensure no exception + // is thrown while cleaning the skip list, we add the blocker. + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_blocker; if (!lock_.try_lock()) return; OnScopeExit cleanup([&] { lock_.unlock(); }); Block *tail = tail_.load(std::memory_order_acquire); From cc56ac3dd8deaec5e98b6c0a709ac87e97e7c26b Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Thu, 4 Mar 2021 12:20:11 +0100 Subject: [PATCH 10/63] Expose query for cleaning memory and add memory limit flag (#100) * Add memory flag, add additional meminfo utilities * Add free memory query --- CHANGELOG.md | 2 + config/flags.yaml | 4 + src/CMakeLists.txt | 8 +- src/auth/models.cpp | 2 + src/auth/models.hpp | 10 ++- src/glue/auth.cpp | 2 + src/memgraph.cpp | 30 +++++++- src/query/exceptions.hpp | 5 ++ src/query/frontend/ast/ast.lcp | 13 +++- src/query/frontend/ast/ast_visitor.hpp | 3 +- .../frontend/ast/cypher_main_visitor.cpp | 6 ++ .../frontend/ast/cypher_main_visitor.hpp | 5 ++ .../opencypher/grammar/MemgraphCypher.g4 | 4 +- .../opencypher/grammar/MemgraphCypherLexer.g4 | 1 + .../frontend/semantic/required_privileges.cpp | 3 + .../frontend/stripped_lexer_constants.hpp | 2 +- src/query/interpreter.cpp | 18 +++++ src/storage/v2/indices.cpp | 12 +++ src/storage/v2/indices.hpp | 5 +- src/storage/v2/storage.cpp | 73 +++++++++++++++---- src/storage/v2/storage.hpp | 13 ++++ src/utils/CMakeLists.txt | 1 + src/utils/skip_list.hpp | 4 +- src/utils/sysinfo/memory.cpp | 33 +++++++++ src/utils/sysinfo/memory.hpp | 34 ++++----- tests/unit/utils_memory_tracker.cpp | 13 +++- 26 files changed, 252 insertions(+), 54 deletions(-) create mode 100644 src/utils/sysinfo/memory.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e1a0ec95..f10c646bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ * Added support for programatically reading in data from CSV files through the `LOAD CSV` clause. We support CSV files with and without a header, the supported dialect being Excel. +* Added a new flag `--memory-limit` which enables the user to set the maximum total amount of memory + memgraph can allocate during its runtime. ### Bug Fixes diff --git a/config/flags.yaml b/config/flags.yaml index 5245a9d90..c610591d1 100644 --- a/config/flags.yaml +++ b/config/flags.yaml @@ -83,6 +83,10 @@ modifications: value: "/usr/lib/memgraph/auth_module/example.py" override: false + - name: "memory_limit" + value: "0" + override: true + undocumented: - "flag_file" - "also_log_to_stderr" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2dbba0529..ddae1b4ad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,17 +37,17 @@ if (MG_ENTERPRISE) glue/auth.cpp) endif() -set(MG_SINGLE_NODE_V2_LIBS stdc++fs Threads::Threads - telemetry_lib mg-query mg-communication mg-new-delete) +set(mg_single_node_v2_libs stdc++fs Threads::Threads + telemetry_lib mg-query mg-communication mg-new-delete mg-utils) if (MG_ENTERPRISE) # These are enterprise subsystems - set(MG_SINGLE_NODE_V2_LIBS ${MG_SINGLE_NODE_V2_LIBS} mg-auth mg-audit) + set(mg_single_node_v2_libs ${mg_single_node_v2_libs} mg-auth mg-audit) endif() # memgraph main executable add_executable(memgraph ${mg_single_node_v2_sources}) target_include_directories(memgraph PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_link_libraries(memgraph ${MG_SINGLE_NODE_V2_LIBS}) +target_link_libraries(memgraph ${mg_single_node_v2_libs}) # NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which # should be dynamically exported, so that `dlopen` can correctly link the # symbols in custom procedure module libraries. diff --git a/src/auth/models.cpp b/src/auth/models.cpp index b3e650b44..33ebc72fe 100644 --- a/src/auth/models.cpp +++ b/src/auth/models.cpp @@ -45,6 +45,8 @@ std::string PermissionToString(Permission permission) { return "LOCK_PATH"; case Permission::READ_FILE: return "READ_FILE"; + case Permission::FREE_MEMORY: + return "FREE_MEMORY"; case Permission::AUTH: return "AUTH"; } diff --git a/src/auth/models.hpp b/src/auth/models.hpp index 4453adaf8..f1139e1ca 100644 --- a/src/auth/models.hpp +++ b/src/auth/models.hpp @@ -24,15 +24,17 @@ enum class Permission : uint64_t { REPLICATION = 1U << 10U, LOCK_PATH = 1U << 11U, READ_FILE = 1U << 12U, + FREE_MEMORY = 1U << 13U, AUTH = 1U << 16U }; // clang-format on // Constant list of all available permissions. -const std::vector kPermissionsAll = { - Permission::MATCH, Permission::CREATE, Permission::MERGE, Permission::DELETE, Permission::SET, - Permission::REMOVE, Permission::INDEX, Permission::STATS, Permission::CONSTRAINT, Permission::DUMP, - Permission::AUTH, Permission::REPLICATION, Permission::LOCK_PATH, Permission::READ_FILE}; +const std::vector kPermissionsAll = {Permission::MATCH, Permission::CREATE, Permission::MERGE, + Permission::DELETE, Permission::SET, Permission::REMOVE, + Permission::INDEX, Permission::STATS, Permission::CONSTRAINT, + Permission::DUMP, Permission::AUTH, Permission::REPLICATION, + Permission::LOCK_PATH, Permission::READ_FILE, Permission::FREE_MEMORY}; // Function that converts a permission to its string representation. std::string PermissionToString(Permission permission); diff --git a/src/glue/auth.cpp b/src/glue/auth.cpp index 43af863b5..2a9932595 100644 --- a/src/glue/auth.cpp +++ b/src/glue/auth.cpp @@ -30,6 +30,8 @@ auth::Permission PrivilegeToPermission(query::AuthQuery::Privilege privilege) { return auth::Permission::LOCK_PATH; case query::AuthQuery::Privilege::READ_FILE: return auth::Permission::READ_FILE; + case query::AuthQuery::Privilege::FREE_MEMORY: + return auth::Permission::FREE_MEMORY; case query::AuthQuery::Privilege::AUTH: return auth::Permission::AUTH; } diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 29037a0c0..e064a5c9e 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -167,6 +167,10 @@ DEFINE_VALIDATED_string(query_modules_directory, "", DEFINE_bool(also_log_to_stderr, false, "Log messages go to stderr in addition to logfiles"); DEFINE_string(log_file, "", "Path to where the log should be stored."); +DEFINE_uint64( + memory_limit, 0, + "Total memory limit in MiB. Set to 0 to use the default values which are 100\% of the phyisical memory if the swap " + "is enabled and 90\% of the physical memory otherwise."); namespace { constexpr std::array log_level_mappings{ std::pair{"TRACE", spdlog::level::trace}, std::pair{"DEBUG", spdlog::level::debug}, @@ -236,6 +240,25 @@ void ConfigureLogging() { spdlog::flush_on(spdlog::level::trace); ParseLogLevel(); } + +int64_t GetMemoryLimit() { + if (FLAGS_memory_limit == 0) { + auto maybe_total_memory = utils::sysinfo::TotalMemory(); + MG_ASSERT(maybe_total_memory, "Failed to fetch the total physical memory"); + const auto maybe_swap_memory = utils::sysinfo::SwapTotalMemory(); + MG_ASSERT(maybe_swap_memory, "Failed to fetch the total swap memory"); + + if (*maybe_swap_memory == 0) { + // take only 90% of the total memory + *maybe_total_memory *= 9; + *maybe_total_memory /= 10; + } + return *maybe_total_memory * 1024; + } + + // We parse the memory as MiB every time + return FLAGS_memory_limit * 1024 * 1024; +} } // namespace /// Encapsulates Dbms and Interpreter that are passed through the network server @@ -876,10 +899,10 @@ int main(int argc, char **argv) { // Start memory warning logger. utils::Scheduler mem_log_scheduler; if (FLAGS_memory_warning_threshold > 0) { - auto free_ram = utils::sysinfo::AvailableMemoryKilobytes(); + auto free_ram = utils::sysinfo::AvailableMemory(); if (free_ram) { mem_log_scheduler.Run("Memory warning", std::chrono::seconds(3), [] { - auto free_ram = utils::sysinfo::AvailableMemoryKilobytes(); + auto free_ram = utils::sysinfo::AvailableMemory(); if (free_ram && *free_ram / 1024 < FLAGS_memory_warning_threshold) spdlog::warn("Running out of available RAM, only {} MB left", *free_ram / 1024); }); @@ -925,8 +948,9 @@ int main(int argc, char **argv) { // End enterprise features initialization #endif - // Main storage and execution engines initialization + utils::total_memory_tracker.SetHardLimit(GetMemoryLimit()); + // Main storage and execution engines initialization storage::Config db_config{ .gc = {.type = storage::Config::Gc::Type::PERIODIC, .interval = std::chrono::seconds(FLAGS_storage_gc_cycle_sec)}, .items = {.properties_on_edges = FLAGS_storage_properties_on_edges}, diff --git a/src/query/exceptions.hpp b/src/query/exceptions.hpp index 0a8bd5b11..cde5433d2 100644 --- a/src/query/exceptions.hpp +++ b/src/query/exceptions.hpp @@ -164,4 +164,9 @@ class LockPathModificationInMulticommandTxException : public QueryException { : QueryException("Lock path clause not allowed in multicommand transactions.") {} }; +class FreeMemoryModificationInMulticommandTxException : public QueryException { + public: + FreeMemoryModificationInMulticommandTxException() + : QueryException("Lock path clause not allowed in multicommand transactions.") {} +}; } // namespace query diff --git a/src/query/frontend/ast/ast.lcp b/src/query/frontend/ast/ast.lcp index bc70c822d..aaa855ac5 100644 --- a/src/query/frontend/ast/ast.lcp +++ b/src/query/frontend/ast/ast.lcp @@ -2191,7 +2191,7 @@ cpp<# (:serialize)) (lcp:define-enum privilege (create delete match merge set remove index stats auth constraint - dump replication lock_path read_file) + dump replication lock_path read_file free_memory) (:serialize)) #>cpp AuthQuery() = default; @@ -2228,7 +2228,8 @@ const std::vector kPrivilegesAll = { AuthQuery::Privilege::AUTH, AuthQuery::Privilege::CONSTRAINT, AuthQuery::Privilege::DUMP, AuthQuery::Privilege::REPLICATION, - AuthQuery::Privilege::LOCK_PATH}; + AuthQuery::Privilege::LOCK_PATH, + AuthQuery::Privilege::FREE_MEMORY}; cpp<# (lcp:define-class info-query (query) @@ -2394,4 +2395,12 @@ cpp<# (:serialize (:slk)) (:clone)) + (lcp:define-class free-memory-query (query) () + (:public + #>cpp + DEFVISITABLE(QueryVisitor); + cpp<#) + (:serialize (:slk)) + (:clone)) + (lcp:pop-namespace) ;; namespace query diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index cc4b7d268..2d5373853 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -75,6 +75,7 @@ class DumpQuery; class ReplicationQuery; class LockPathQuery; class LoadCsv; +class FreeMemoryQuery; using TreeCompositeVisitor = ::utils::CompositeVisitor< SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator, AndOperator, NotOperator, AdditionOperator, @@ -108,6 +109,6 @@ class ExpressionVisitor template class QueryVisitor : public ::utils::Visitor {}; + ConstraintQuery, DumpQuery, ReplicationQuery, LockPathQuery, LoadCsv, FreeMemoryQuery> {}; } // namespace query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index b206929ea..0c7fb2b33 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -304,6 +304,12 @@ antlrcpp::Any CypherMainVisitor::visitLoadCsv(MemgraphCypher::LoadCsvContext *ct return load_csv; } +antlrcpp::Any CypherMainVisitor::visitFreeMemoryQuery(MemgraphCypher::FreeMemoryQueryContext *ctx) { + auto *free_memory_query = storage_->Create(); + query_ = free_memory_query; + return free_memory_query; +} + antlrcpp::Any CypherMainVisitor::visitCypherUnion(MemgraphCypher::CypherUnionContext *ctx) { bool distinct = !ctx->ALL(); auto *cypher_union = storage_->Create(distinct); diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 15eebdf30..03fde1119 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -213,6 +213,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitLoadCsv(MemgraphCypher::LoadCsvContext *ctx) override; + /** + * @return FreeMemoryQuery* + */ + antlrcpp::Any visitFreeMemoryQuery(MemgraphCypher::FreeMemoryQueryContext *ctx) override; + /** * @return CypherUnion* */ diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index f8785d13c..4d96ace10 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -20,6 +20,7 @@ memgraphCypherKeyword : cypherKeyword | DROP | DUMP | FOR + | FREE | FROM | GRANT | HEADER @@ -64,6 +65,7 @@ query : cypherQuery | dumpQuery | replicationQuery | lockPathQuery + | freeMemoryQuery ; authQuery : createRole @@ -176,4 +178,4 @@ showReplicas : SHOW REPLICAS ; lockPathQuery : ( LOCK | UNLOCK ) DATA DIRECTORY ; - +freeMemoryQuery : FREE MEMORY ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 index 2f3a4be03..fbfc5a2d2 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 @@ -24,6 +24,7 @@ DIRECTORY : D I R E C T O R Y ; DROP : D R O P ; DUMP : D U M P ; FOR : F O R ; +FREE : F R E E ; FROM : F R O M ; GRANT : G R A N T ; GRANTS : G R A N T S ; diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index 9f187e48f..4cd046bf5 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -1,4 +1,5 @@ #include "query/frontend/ast/ast.hpp" +#include "query/frontend/ast/ast_visitor.hpp" namespace query { @@ -52,6 +53,8 @@ class PrivilegeExtractor : public QueryVisitor, public HierarchicalTreeVis void Visit(LoadCsv &load_csv) override { AddPrivilege(AuthQuery::Privilege::READ_FILE); } + void Visit(FreeMemoryQuery &free_memory_query) override { AddPrivilege(AuthQuery::Privilege::FREE_MEMORY); } + void Visit(ReplicationQuery &replication_query) override { switch (replication_query.action_) { case ReplicationQuery::Action::SET_REPLICATION_ROLE: diff --git a/src/query/frontend/stripped_lexer_constants.hpp b/src/query/frontend/stripped_lexer_constants.hpp index 89d1503f9..71949c4a7 100644 --- a/src/query/frontend/stripped_lexer_constants.hpp +++ b/src/query/frontend/stripped_lexer_constants.hpp @@ -87,7 +87,7 @@ const trie::Trie kKeywords = { "single", "true", "false", "reduce", "coalesce", "user", "password", "alter", "drop", "show", "stats", "unique", "explain", "profile", "storage", "index", "info", "exists", "assert", "constraint", "node", "key", "dump", "database", "call", "yield", "memory", - "mb", "kb", "unlimited"}; + "mb", "kb", "unlimited", "free"}; // Unicode codepoints that are allowed at the start of the unescaped name. const std::bitset kUnescapedNameAllowedStarts( diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index c1b8ee7c5..f9de34707 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -1162,6 +1162,22 @@ PreparedQuery PrepareLockPathQuery(ParsedQuery parsed_query, const bool in_expli RWType::NONE}; } +PreparedQuery PrepareFreeMemoryQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, + InterpreterContext *interpreter_context) { + if (in_explicit_transaction) { + throw FreeMemoryModificationInMulticommandTxException(); + } + + interpreter_context->db->FreeMemory(); + + return PreparedQuery{{}, + std::move(parsed_query.required_privileges), + [](AnyStream *stream, std::optional n) -> std::optional { + return QueryHandlerResult::COMMIT; + }, + RWType::NONE}; +} + PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, InterpreterContext *interpreter_context, storage::Storage *db, utils::MonotonicBufferResource *execution_memory) { @@ -1488,6 +1504,8 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareLockPathQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_, &*execution_db_accessor_); + } else if (utils::Downcast(parsed_query.query)) { + prepared_query = PrepareFreeMemoryQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_); } else { LOG_FATAL("Should not get here -- unknown query type!"); } diff --git a/src/storage/v2/indices.cpp b/src/storage/v2/indices.cpp index d873e8ecf..f7bd634fd 100644 --- a/src/storage/v2/indices.cpp +++ b/src/storage/v2/indices.cpp @@ -349,6 +349,12 @@ LabelIndex::Iterable::Iterable(utils::SkipList::Accessor index_accessor, constraints_(constraints), config_(config) {} +void LabelIndex::RunGC() { + for (auto &index_entry : index_) { + index_entry.second.run_gc(); + } +} + bool LabelPropertyIndex::Entry::operator<(const Entry &rhs) { if (value < rhs.value) { return true; @@ -661,6 +667,12 @@ int64_t LabelPropertyIndex::ApproximateVertexCount(LabelId label, PropertyId pro return acc.estimate_range_count(lower, upper, utils::SkipListLayerForCountEstimation(acc.size())); } +void LabelPropertyIndex::RunGC() { + for (auto &index_entry : index_) { + index_entry.second.run_gc(); + } +} + void RemoveObsoleteEntries(Indices *indices, uint64_t oldest_active_start_timestamp) { indices->label_index.RemoveObsoleteEntries(oldest_active_start_timestamp); indices->label_property_index.RemoveObsoleteEntries(oldest_active_start_timestamp); diff --git a/src/storage/v2/indices.hpp b/src/storage/v2/indices.hpp index 7d7a30222..dd58e16fc 100644 --- a/src/storage/v2/indices.hpp +++ b/src/storage/v2/indices.hpp @@ -110,6 +110,8 @@ class LabelIndex { void Clear() { index_.clear(); } + void RunGC(); + private: std::map> index_; Indices *indices_; @@ -225,6 +227,8 @@ class LabelPropertyIndex { void Clear() { index_.clear(); } + void RunGC(); + private: std::map, utils::SkipList> index_; Indices *indices_; @@ -263,5 +267,4 @@ void UpdateOnAddLabel(Indices *indices, LabelId label, Vertex *vertex, const Tra /// @throw std::bad_alloc void UpdateOnSetProperty(Indices *indices, PropertyId property, const PropertyValue &value, Vertex *vertex, const Transaction &tx); - } // namespace storage diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index f0ebe9fe9..b699dc250 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -361,7 +361,7 @@ Storage::Storage(Config config) snapshot_runner_.Run("Snapshot", config_.durability.snapshot_interval, [this] { this->CreateSnapshot(); }); } if (config_.gc.type == Config::Gc::Type::PERIODIC) { - gc_runner_.Run("Storage GC", config_.gc.interval, [this] { this->CollectGarbage(); }); + gc_runner_.Run("Storage GC", config_.gc.interval, [this] { this->CollectGarbage(); }); } if (timestamp_ == kTimestampInitialId) { @@ -1221,11 +1221,29 @@ Transaction Storage::CreateTransaction() { return {transaction_id, start_timestamp}; } +template void Storage::CollectGarbage() { - // Because the garbage collector iterates through the indices and constraints - // to clean them up, it must take the main lock for reading to make sure that - // the indices and constraints aren't concurrently being modified. - std::shared_lock main_guard(main_lock_); + if constexpr (force) { + // We take the unique lock on the main storage lock so we can forcefully clean + // everything we can + if (!main_lock_.try_lock()) { + CollectGarbage(); + return; + } + } else { + // Because the garbage collector iterates through the indices and constraints + // to clean them up, it must take the main lock for reading to make sure that + // the indices and constraints aren't concurrently being modified. + main_lock_.lock_shared(); + } + + utils::OnScopeExit lock_releaser{[&] { + if constexpr (force) { + main_lock_.unlock(); + } else { + main_lock_.unlock_shared(); + } + }}; // Garbage collection must be performed in two phases. In the first phase, // deltas that won't be applied by any transaction anymore are unlinked from @@ -1418,19 +1436,32 @@ void Storage::CollectGarbage() { } } - while (true) { - auto garbage_undo_buffers_ptr = garbage_undo_buffers_.Lock(); - if (garbage_undo_buffers_ptr->empty() || garbage_undo_buffers_ptr->front().first > oldest_active_start_timestamp) { - break; + garbage_undo_buffers_.WithLock([&](auto &undo_buffers) { + // if force is set to true we can simply delete all the leftover undos because + // no transaction is active + if constexpr (force) { + undo_buffers.clear(); + } else { + while (!undo_buffers.empty() && undo_buffers.front().first <= oldest_active_start_timestamp) { + undo_buffers.pop_front(); + } } - garbage_undo_buffers_ptr->pop_front(); - } + }); { auto vertex_acc = vertices_.access(); - while (!garbage_vertices_.empty() && garbage_vertices_.front().first < oldest_active_start_timestamp) { - MG_ASSERT(vertex_acc.remove(garbage_vertices_.front().second), "Invalid database state!"); - garbage_vertices_.pop_front(); + if constexpr (force) { + // if force is set to true, then we have unique_lock and no transactions are active + // so we can clean all of the deleted vertices + while (!garbage_vertices_.empty()) { + MG_ASSERT(vertex_acc.remove(garbage_vertices_.front().second), "Invalid database state!"); + garbage_vertices_.pop_front(); + } + } else { + while (!garbage_vertices_.empty() && garbage_vertices_.front().first < oldest_active_start_timestamp) { + MG_ASSERT(vertex_acc.remove(garbage_vertices_.front().second), "Invalid database state!"); + garbage_vertices_.pop_front(); + } } } { @@ -1441,6 +1472,10 @@ void Storage::CollectGarbage() { } } +// tell the linker he can find the CollectGarbage definitions here +template void Storage::CollectGarbage(); +template void Storage::CollectGarbage(); + bool Storage::InitializeWalFile() { if (config_.durability.snapshot_wal_mode != Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL) return false; @@ -1702,6 +1737,16 @@ bool Storage::UnlockPath() { return true; } +void Storage::FreeMemory() { + CollectGarbage(); + + // SkipList is already threadsafe + vertices_.run_gc(); + edges_.run_gc(); + indices_.label_index.RunGC(); + indices_.label_property_index.RunGC(); +} + uint64_t Storage::CommitTimestamp(const std::optional desired_commit_timestamp) { if (!desired_commit_timestamp) { return timestamp_++; diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 712519701..6e3275caa 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -21,6 +21,7 @@ #include "storage/v2/vertex.hpp" #include "storage/v2/vertex_accessor.hpp" #include "utils/file_locker.hpp" +#include "utils/on_scope_exit.hpp" #include "utils/rw_lock.hpp" #include "utils/scheduler.hpp" #include "utils/skip_list.hpp" @@ -412,11 +413,23 @@ class Storage final { std::vector ReplicasInfo(); + void FreeMemory(); + private: Transaction CreateTransaction(); + /// The force parameter determines the behaviour of the garbage collector. + /// If it's set to true, it will behave as a global operation, i.e. it can't + /// be part of a transaction, and no other transaction can be active at the same time. + /// This allows it to delete immediately vertices without worrying that some other + /// transaction is possibly using it. If there are active transactions when this method + /// is called with force set to true, it will fallback to the same method with the force + /// set to false. + /// If it's set to false, it will execute in parallel with other transactions, ensuring + /// that no object in use can be deleted. /// @throw std::system_error /// @throw std::bad_alloc + template void CollectGarbage(); bool InitializeWalFile(); diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index f93e4ab2f..36e143317 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -6,6 +6,7 @@ set(utils_src_files memory.cpp memory_tracker.cpp signals.cpp + sysinfo/memory.cpp thread.cpp thread_pool.cpp uuid.cpp) diff --git a/src/utils/skip_list.hpp b/src/utils/skip_list.hpp index fd7687504..fd2295fea 100644 --- a/src/utils/skip_list.hpp +++ b/src/utils/skip_list.hpp @@ -259,7 +259,7 @@ class SkipListGc final { } void Collect(TNode *node) { - std::lock_guard guard(lock_); + std::unique_lock guard(lock_); deleted_.Push({accessor_id_.load(std::memory_order_acquire), node}); } @@ -895,6 +895,8 @@ class SkipList final { gc_.Clear(); } + void run_gc() { gc_.Run(); } + private: template int find_node(const TKey &key, TNode *preds[], TNode *succs[]) const { diff --git a/src/utils/sysinfo/memory.cpp b/src/utils/sysinfo/memory.cpp new file mode 100644 index 000000000..5ee1c4fc4 --- /dev/null +++ b/src/utils/sysinfo/memory.cpp @@ -0,0 +1,33 @@ +#include "utils/sysinfo/memory.hpp" + +namespace utils::sysinfo { + +namespace { +std::optional ExtractAmountFromMemInfo(const std::string_view header_name) { + std::string token; + std::ifstream meminfo("/proc/meminfo"); + const auto meminfo_header = fmt::format("{}:", header_name); + while (meminfo >> token) { + if (token == meminfo_header) { + uint64_t mem = 0; + if (meminfo >> mem) { + return mem; + } else { + return std::nullopt; + } + } + meminfo.ignore(std::numeric_limits::max(), '\n'); + } + SPDLOG_WARN("Failed to read {} from /proc/meminfo", header_name); + return std::nullopt; +} + +} // namespace + +std::optional AvailableMemory() { return ExtractAmountFromMemInfo("MemAvailable"); } + +std::optional TotalMemory() { return ExtractAmountFromMemInfo("MemTotal"); } + +std::optional SwapTotalMemory() { return ExtractAmountFromMemInfo("SwapTotal"); } + +} // namespace utils::sysinfo diff --git a/src/utils/sysinfo/memory.hpp b/src/utils/sysinfo/memory.hpp index 264d94079..6971833a8 100644 --- a/src/utils/sysinfo/memory.hpp +++ b/src/utils/sysinfo/memory.hpp @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -8,25 +10,21 @@ namespace utils::sysinfo { /** - * Gets the amount of available RAM in kilobytes. If the information is + * Gets the amount of available RAM in KiB. If the information is * unavalable an empty value is returned. */ -inline std::optional AvailableMemoryKilobytes() { - std::string token; - std::ifstream meminfo("/proc/meminfo"); - while (meminfo >> token) { - if (token == "MemAvailable:") { - uint64_t mem = 0; - if (meminfo >> mem) { - return mem; - } else { - return std::nullopt; - } - } - meminfo.ignore(std::numeric_limits::max(), '\n'); - } - SPDLOG_WARN("Failed to read amount of available memory from /proc/meminfo"); - return std::nullopt; -} +std::optional AvailableMemory(); + +/** + * Gets the amount of total RAM in KiB. If the information is + * unavalable an empty value is returned. + */ +std::optional TotalMemory(); + +/** + * Gets the amount of total swap space in KiB. If the information is + * unavalable an empty value is returned. + */ +std::optional SwapTotalMemory(); } // namespace utils::sysinfo diff --git a/tests/unit/utils_memory_tracker.cpp b/tests/unit/utils_memory_tracker.cpp index 42012d5b4..37acc07b4 100644 --- a/tests/unit/utils_memory_tracker.cpp +++ b/tests/unit/utils_memory_tracker.cpp @@ -3,6 +3,7 @@ #include #include +#include TEST(MemoryTrackerTest, ExceptionEnabler) { utils::MemoryTracker memory_tracker; @@ -16,11 +17,15 @@ TEST(MemoryTrackerTest, ExceptionEnabler) { // wait until the second thread creates exception enabler while (!enabler_created) ; - ASSERT_NO_THROW(memory_tracker.Alloc(hard_limit + 1)); - ASSERT_EQ(memory_tracker.Amount(), hard_limit + 1); - // tell the second thread it can finish its test - can_continue = true; + // we use the OnScopeExit so the test doesn't deadlock when + // an ASSERT fails + utils::OnScopeExit thread_notifier{[&] { + // tell the second thread it can finish its test + can_continue = true; + }}; + + ASSERT_NO_THROW(memory_tracker.Alloc(hard_limit + 1)); }}; std::thread t2{[&] { From 9c6bf4b1b8c83552e5ce7658670ad4d2ecee9005 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Fri, 5 Mar 2021 10:24:08 +0100 Subject: [PATCH 11/63] Updated storage info (#102) * Refactor readable size function * Log set memory limit * Add memory allocation info to storage * Updated changelog --- CHANGELOG.md | 2 ++ src/memgraph.cpp | 5 ++++- src/query/interpreter.cpp | 5 ++++- src/utils/CMakeLists.txt | 1 + src/utils/memory_tracker.cpp | 21 ++------------------- src/utils/memory_tracker.hpp | 4 +++- src/utils/readable_size.cpp | 27 +++++++++++++++++++++++++++ src/utils/readable_size.hpp | 6 ++++++ 8 files changed, 49 insertions(+), 22 deletions(-) create mode 100644 src/utils/readable_size.cpp create mode 100644 src/utils/readable_size.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index f10c646bd..b09c241f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ supported dialect being Excel. * Added a new flag `--memory-limit` which enables the user to set the maximum total amount of memory memgraph can allocate during its runtime. +* Added `FREE MEMORY` query which tries to free unusued memory chunks in different parts of storage. +* Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query. ### Bug Fixes diff --git a/src/memgraph.cpp b/src/memgraph.cpp index e064a5c9e..c9c17b334 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -36,6 +36,7 @@ #include "utils/flag_validation.hpp" #include "utils/logging.hpp" #include "utils/memory_tracker.hpp" +#include "utils/readable_size.hpp" #include "utils/signals.hpp" #include "utils/string.hpp" #include "utils/sysinfo/memory.hpp" @@ -948,7 +949,9 @@ int main(int argc, char **argv) { // End enterprise features initialization #endif - utils::total_memory_tracker.SetHardLimit(GetMemoryLimit()); + const auto memory_limit = GetMemoryLimit(); + spdlog::info("Memory limit set to {}", utils::GetReadableSize(memory_limit)); + utils::total_memory_tracker.SetHardLimit(memory_limit); // Main storage and execution engines initialization storage::Config db_config{ diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index f9de34707..62391e218 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -1199,7 +1199,10 @@ PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transa {TypedValue("edge_count"), TypedValue(static_cast(info.edge_count))}, {TypedValue("average_degree"), TypedValue(info.average_degree)}, {TypedValue("memory_usage"), TypedValue(static_cast(info.memory_usage))}, - {TypedValue("disk_usage"), TypedValue(static_cast(info.disk_usage))}}; + {TypedValue("disk_usage"), TypedValue(static_cast(info.disk_usage))}, + {TypedValue("memory_allocated"), TypedValue(static_cast(utils::total_memory_tracker.Amount()))}, + {TypedValue("allocation_limit"), + TypedValue(static_cast(utils::total_memory_tracker.HardLimit()))}}; return std::pair{results, QueryHandlerResult::COMMIT}; }; break; diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 36e143317..b63d8e530 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -5,6 +5,7 @@ set(utils_src_files file_locker.cpp memory.cpp memory_tracker.cpp + readable_size.cpp signals.cpp sysinfo/memory.cpp thread.cpp diff --git a/src/utils/memory_tracker.cpp b/src/utils/memory_tracker.cpp index 624b3ca3e..d1ccceade 100644 --- a/src/utils/memory_tracker.cpp +++ b/src/utils/memory_tracker.cpp @@ -7,6 +7,7 @@ #include "utils/likely.hpp" #include "utils/logging.hpp" #include "utils/on_scope_exit.hpp" +#include "utils/readable_size.hpp" namespace utils { @@ -18,24 +19,6 @@ bool MemoryTrackerCanThrow() { !MemoryTracker::OutOfMemoryExceptionBlocker::IsBlocked(); } -std::string GetReadableSize(double size) { - // TODO (antonio2368): Add support for base 1000 (KB, GB, TB...) - constexpr std::array units = {"B", "KiB", "MiB", "GiB", "TiB"}; - constexpr double delimiter = 1024; - - size_t i = 0; - for (; i + 1 < units.size() && size >= delimiter; ++i) { - size /= delimiter; - } - - // bytes don't need decimals - if (i == 0) { - return fmt::format("{:.0f}{}", size, units[i]); - } - - return fmt::format("{:.2f}{}", size, units[i]); -} - } // namespace thread_local uint64_t MemoryTracker::OutOfMemoryExceptionEnabler::counter_ = 0; @@ -71,7 +54,7 @@ void MemoryTracker::UpdatePeak(const int64_t will_be) { void MemoryTracker::SetHardLimit(const int64_t limit) { hard_limit_.store(limit, std::memory_order_relaxed); } -void MemoryTracker::SetOrRaiseHardLimit(const int64_t limit) { +void MemoryTracker::TryRaiseHardLimit(const int64_t limit) { int64_t old_limit = hard_limit_.load(std::memory_order_relaxed); while (old_limit < limit && !hard_limit_.compare_exchange_weak(old_limit, limit)) ; diff --git a/src/utils/memory_tracker.hpp b/src/utils/memory_tracker.hpp index 14cb4f307..28019664d 100644 --- a/src/utils/memory_tracker.hpp +++ b/src/utils/memory_tracker.hpp @@ -39,8 +39,10 @@ class MemoryTracker final { auto Peak() const { return peak_.load(std::memory_order_relaxed); } + auto HardLimit() const { return hard_limit_.load(std::memory_order_relaxed); } + void SetHardLimit(int64_t limit); - void SetOrRaiseHardLimit(int64_t limit); + void TryRaiseHardLimit(int64_t limit); // By creating an object of this class, every allocation in its scope that goes over // the set hard limit produces an OutOfMemoryException. diff --git a/src/utils/readable_size.cpp b/src/utils/readable_size.cpp new file mode 100644 index 000000000..27f7b4c8d --- /dev/null +++ b/src/utils/readable_size.cpp @@ -0,0 +1,27 @@ +#include "utils/readable_size.hpp" + +#include + +#include + +namespace utils { + +std::string GetReadableSize(double size) { + // TODO (antonio2368): Add support for base 1000 (KB, GB, TB...) + constexpr std::array units = {"B", "KiB", "MiB", "GiB", "TiB"}; + constexpr double delimiter = 1024; + + size_t i = 0; + for (; i + 1 < units.size() && size >= delimiter; ++i) { + size /= delimiter; + } + + // bytes don't need decimals + if (i == 0) { + return fmt::format("{:.0f}{}", size, units[i]); + } + + return fmt::format("{:.2f}{}", size, units[i]); +} + +} // namespace utils diff --git a/src/utils/readable_size.hpp b/src/utils/readable_size.hpp new file mode 100644 index 000000000..d7b959cf3 --- /dev/null +++ b/src/utils/readable_size.hpp @@ -0,0 +1,6 @@ +#pragma once +#include + +namespace utils { +std::string GetReadableSize(double size); +} // namespace utils From ad4c80af13d1e39806cd8044d032f05909b65b2c Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 16 Mar 2021 09:05:38 +0100 Subject: [PATCH 12/63] Add memory limit clause for cypher queries (#106) * Add memory limit clause for cypher queries * Add PROCEDURE and QUERY keywords * Improve memory limit logs * Update CHANGELOG --- CHANGELOG.md | 7 + src/query/CMakeLists.txt | 1 + src/query/frontend/ast/ast.lcp | 6 +- .../frontend/ast/cypher_main_visitor.cpp | 46 +++++-- .../frontend/opencypher/grammar/Cypher.g4 | 14 +- .../opencypher/grammar/CypherLexer.g4 | 2 + src/query/frontend/stripped.cpp | 3 +- .../frontend/stripped_lexer_constants.hpp | 2 +- src/query/interpret/eval.cpp | 24 ++++ src/query/interpret/eval.hpp | 11 +- src/query/plan/operator.cpp | 16 +-- tests/unit/cypher_main_visitor.cpp | 129 ++++++++++++++++-- 12 files changed, 216 insertions(+), 45 deletions(-) create mode 100644 src/query/interpret/eval.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index b09c241f5..6d166530e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## Future +### Breaking Changes + +* Changed `MEMORY LIMIT num (KB|MB)` clause in the procedure calls to `PROCEDURE MEMORY LIMIT num (KB|MB)`. + The functionality is still the same. + ### Major Feature and Improvements * Added replication to community version. @@ -15,6 +20,8 @@ memgraph can allocate during its runtime. * Added `FREE MEMORY` query which tries to free unusued memory chunks in different parts of storage. * Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query. +* Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for + the entire query. It can be added only at the end of the entire Cypher query. ### Bug Fixes diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index 17626e681..d68774818 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -17,6 +17,7 @@ set(mg_query_sources frontend/semantic/symbol_generator.cpp frontend/stripped.cpp interpret/awesome_memgraph_functions.cpp + interpret/eval.cpp interpreter.cpp plan/operator.cpp plan/preprocess.cpp diff --git a/src/query/frontend/ast/ast.lcp b/src/query/frontend/ast/ast.lcp index aaa855ac5..b42e07a08 100644 --- a/src/query/frontend/ast/ast.lcp +++ b/src/query/frontend/ast/ast.lcp @@ -1543,7 +1543,11 @@ cpp<# :scope :public :slk-save #'slk-save-ast-vector :slk-load (slk-load-ast-vector "CypherUnion") - :documentation "Contains remaining queries that should form and union with `single_query_`.")) + :documentation "Contains remaining queries that should form and union with `single_query_`.") + (memory-limit "Expression *" :initval "nullptr" :scope :public + :slk-save #'slk-save-ast-pointer + :slk-load (slk-load-ast-pointer "Expression")) + (memory-scale "size_t" :initval "1024U" :scope :public)) (:public #>cpp CypherQuery() = default; diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 0c7fb2b33..7773ace33 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -33,6 +33,28 @@ namespace query::frontend { const std::string CypherMainVisitor::kAnonPrefix = "anon"; +namespace { +template +std::optional> VisitMemoryLimit( + MemgraphCypher::MemoryLimitContext *memory_limit_ctx, TVisitor *visitor) { + MG_ASSERT(memory_limit_ctx); + if (memory_limit_ctx->UNLIMITED()) { + return std::nullopt; + } + + auto memory_limit = memory_limit_ctx->literal()->accept(visitor); + size_t memory_scale = 1024U; + if (memory_limit_ctx->MB()) { + memory_scale = 1024U * 1024U; + } else { + MG_ASSERT(memory_limit_ctx->KB()); + memory_scale = 1024U; + } + + return std::make_pair(memory_limit, memory_scale); +} +} // namespace + antlrcpp::Any CypherMainVisitor::visitExplainQuery(MemgraphCypher::ExplainQueryContext *ctx) { MG_ASSERT(ctx->children.size() == 2, "ExplainQuery should have exactly two children!"); auto *cypher_query = ctx->children[1]->accept(this).as(); @@ -127,6 +149,14 @@ antlrcpp::Any CypherMainVisitor::visitCypherQuery(MemgraphCypher::CypherQueryCon cypher_query->cypher_unions_.push_back(child->accept(this).as()); } + if (auto *memory_limit_ctx = ctx->queryMemoryLimit()) { + const auto memory_limit_info = VisitMemoryLimit(memory_limit_ctx->memoryLimit(), this); + if (memory_limit_info) { + cypher_query->memory_limit_ = memory_limit_info->first; + cypher_query->memory_scale_ = memory_limit_info->second; + } + } + query_ = cypher_query; return cypher_query; } @@ -489,21 +519,19 @@ antlrcpp::Any CypherMainVisitor::visitCallProcedure(MemgraphCypher::CallProcedur for (auto *expr : ctx->expression()) { call_proc->arguments_.push_back(expr->accept(this)); } - if (auto *memory_limit_ctx = ctx->callProcedureMemoryLimit()) { - if (memory_limit_ctx->LIMIT()) { - call_proc->memory_limit_ = memory_limit_ctx->literal()->accept(this); - if (memory_limit_ctx->MB()) { - call_proc->memory_scale_ = 1024U * 1024U; - } else { - MG_ASSERT(memory_limit_ctx->KB()); - call_proc->memory_scale_ = 1024U; - } + + if (auto *memory_limit_ctx = ctx->procedureMemoryLimit()) { + const auto memory_limit_info = VisitMemoryLimit(memory_limit_ctx->memoryLimit(), this); + if (memory_limit_info) { + call_proc->memory_limit_ = memory_limit_info->first; + call_proc->memory_scale_ = memory_limit_info->second; } } else { // Default to 100 MB call_proc->memory_limit_ = storage_->Create(TypedValue(100)); call_proc->memory_scale_ = 1024U * 1024U; } + auto *yield_ctx = ctx->yieldProcedureResults(); if (!yield_ctx) { const auto &maybe_found = diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index c26a331c2..b2276389b 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -52,7 +52,7 @@ explainQuery : EXPLAIN cypherQuery ; profileQuery : PROFILE cypherQuery ; -cypherQuery : singleQuery ( cypherUnion )* ; +cypherQuery : singleQuery ( cypherUnion )* ( queryMemoryLimit )? ; indexQuery : createIndex | dropIndex; @@ -106,14 +106,18 @@ with : WITH ( DISTINCT )? returnBody ( where )? ; cypherReturn : RETURN ( DISTINCT )? returnBody ; -callProcedure : CALL procedureName '(' ( expression ( ',' expression )* )? ')' ( callProcedureMemoryLimit )? ( yieldProcedureResults )? ; +callProcedure : CALL procedureName '(' ( expression ( ',' expression )* )? ')' ( procedureMemoryLimit )? ( yieldProcedureResults )? ; procedureName : symbolicName ( '.' symbolicName )* ; -callProcedureMemoryLimit : MEMORY ( UNLIMITED | LIMIT literal ( MB | KB ) ) ; - yieldProcedureResults : YIELD ( '*' | ( procedureResult ( ',' procedureResult )* ) ) ; +memoryLimit : MEMORY ( UNLIMITED | LIMIT literal ( MB | KB ) ) ; + +queryMemoryLimit : QUERY memoryLimit ; + +procedureMemoryLimit : PROCEDURE memoryLimit ; + procedureResult : ( variable AS variable ) | variable ; returnBody : returnItems ( order )? ( skip )? ( limit )? ; @@ -357,7 +361,9 @@ cypherKeyword : ALL | OPTIONAL | OR | ORDER + | PROCEDURE | PROFILE + | QUERY | REDUCE | REMOVE | RETURN diff --git a/src/query/frontend/opencypher/grammar/CypherLexer.g4 b/src/query/frontend/opencypher/grammar/CypherLexer.g4 index c36114c61..1377fbc82 100644 --- a/src/query/frontend/opencypher/grammar/CypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/CypherLexer.g4 @@ -118,7 +118,9 @@ ON : O N ; OPTIONAL : O P T I O N A L ; OR : O R ; ORDER : O R D E R ; +PROCEDURE : P R O C E D U R E ; PROFILE : P R O F I L E ; +QUERY : Q U E R Y ; REDUCE : R E D U C E ; REMOVE : R E M O V E ; RETURN : R E T U R N ; diff --git a/src/query/frontend/stripped.cpp b/src/query/frontend/stripped.cpp index 4f8988389..8f2a3e67d 100644 --- a/src/query/frontend/stripped.cpp +++ b/src/query/frontend/stripped.cpp @@ -168,7 +168,8 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { for (; jt != tokens.end() && (jt->second != "," || num_open_braces || num_open_parantheses || num_open_brackets) && !utils::IEquals(jt->second, "order") && !utils::IEquals(jt->second, "skip") && - !utils::IEquals(jt->second, "limit") && !utils::IEquals(jt->second, "union") && jt->second != ";"; + !utils::IEquals(jt->second, "limit") && !utils::IEquals(jt->second, "union") && + !utils::IEquals(jt->second, "query") && jt->second != ";"; ++jt) { if (jt->second == "(") { ++num_open_parantheses; diff --git a/src/query/frontend/stripped_lexer_constants.hpp b/src/query/frontend/stripped_lexer_constants.hpp index 71949c4a7..a73b777a6 100644 --- a/src/query/frontend/stripped_lexer_constants.hpp +++ b/src/query/frontend/stripped_lexer_constants.hpp @@ -87,7 +87,7 @@ const trie::Trie kKeywords = { "single", "true", "false", "reduce", "coalesce", "user", "password", "alter", "drop", "show", "stats", "unique", "explain", "profile", "storage", "index", "info", "exists", "assert", "constraint", "node", "key", "dump", "database", "call", "yield", "memory", - "mb", "kb", "unlimited", "free"}; + "mb", "kb", "unlimited", "free", "procedure", "query"}; // Unicode codepoints that are allowed at the start of the unescaped name. const std::bitset kUnescapedNameAllowedStarts( diff --git a/src/query/interpret/eval.cpp b/src/query/interpret/eval.cpp new file mode 100644 index 000000000..6d32d38aa --- /dev/null +++ b/src/query/interpret/eval.cpp @@ -0,0 +1,24 @@ +#include "query/interpret/eval.hpp" + +namespace query { + +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what) { + TypedValue value = expr->Accept(*evaluator); + try { + return value.ValueInt(); + } catch (TypedValueException &e) { + throw QueryRuntimeException(what + " must be an int"); + } +} + +std::optional EvaluateMemoryLimit(ExpressionEvaluator *eval, Expression *memory_limit, size_t memory_scale) { + if (!memory_limit) return std::nullopt; + auto limit_value = memory_limit->Accept(*eval); + if (!limit_value.IsInt() || limit_value.ValueInt() <= 0) + throw QueryRuntimeException("Memory limit must be a non-negative integer."); + size_t limit = limit_value.ValueInt(); + if (std::numeric_limits::max() / memory_scale < limit) throw QueryRuntimeException("Memory limit overflow."); + return limit * memory_scale; +} + +} // namespace query diff --git a/src/query/interpret/eval.hpp b/src/query/interpret/eval.hpp index d0ffec3fc..40d0b1abb 100644 --- a/src/query/interpret/eval.hpp +++ b/src/query/interpret/eval.hpp @@ -656,13 +656,8 @@ class ExpressionEvaluator : public ExpressionVisitor { /// @param what - Name of what's getting evaluated. Used for user feedback (via /// exception) when the evaluated value is not an int. /// @throw QueryRuntimeException if expression doesn't evaluate to an int. -inline int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what) { - TypedValue value = expr->Accept(*evaluator); - try { - return value.ValueInt(); - } catch (TypedValueException &e) { - throw QueryRuntimeException(what + " must be an int"); - } -} +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what); + +std::optional EvaluateMemoryLimit(ExpressionEvaluator *eval, Expression *memory_limit, size_t memory_scale); } // namespace query diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 4de436c88..7511aab03 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -33,6 +33,7 @@ #include "utils/pmr/unordered_map.hpp" #include "utils/pmr/unordered_set.hpp" #include "utils/pmr/vector.hpp" +#include "utils/readable_size.hpp" #include "utils/string.hpp" // macro for the default implementation of LogicalOperator::Accept @@ -3486,16 +3487,6 @@ std::unordered_map CallProcedure::GetAndResetCounters() { namespace { -std::optional EvalMemoryLimit(ExpressionEvaluator *eval, Expression *memory_limit, size_t memory_scale) { - if (!memory_limit) return std::nullopt; - auto limit_value = memory_limit->Accept(*eval); - if (!limit_value.IsInt() || limit_value.ValueInt() <= 0) - throw QueryRuntimeException("Memory limit must be a non-negative integer."); - size_t limit = limit_value.ValueInt(); - if (std::numeric_limits::max() / memory_scale < limit) throw QueryRuntimeException("Memory limit overflow."); - return limit * memory_scale; -} - void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name, const mgp_proc &proc, const std::vector &args, const mgp_graph &graph, ExpressionEvaluator *evaluator, utils::MemoryResource *memory, std::optional memory_limit, mgp_result *result) { @@ -3545,7 +3536,8 @@ void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name, proc_args.elems.emplace_back(std::get<2>(proc.opt_args[i]), &graph); } if (memory_limit) { - SPDLOG_INFO("Running '{}' with memory limit of {} bytes", fully_qualified_procedure_name, *memory_limit); + SPDLOG_INFO("Running '{}' with memory limit of {}", fully_qualified_procedure_name, + utils::GetReadableSize(*memory_limit)); utils::LimitedMemoryResource limited_mem(memory, *memory_limit); mgp_memory proc_memory{&limited_mem}; MG_ASSERT(result->signature == &proc.results); @@ -3624,7 +3616,7 @@ class CallProcedureCursor : public Cursor { // TODO: This will probably need to be changed when we add support for // generator like procedures which yield a new result on each invocation. auto *memory = context.evaluation_context.memory; - auto memory_limit = EvalMemoryLimit(&evaluator, self_->memory_limit_, self_->memory_scale_); + auto memory_limit = EvaluateMemoryLimit(&evaluator, self_->memory_limit_, self_->memory_scale_); mgp_graph graph{context.db_accessor, graph_view, &context}; CallCustomProcedure(self_->procedure_name_, *proc, self_->arguments_, graph, &evaluator, memory, memory_limit, &result_); diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 46c7847fa..294cdbf24 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -18,6 +18,7 @@ #include #include +#include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/opencypher/parser.hpp" @@ -2730,7 +2731,8 @@ TEST_P(CypherMainVisitorTest, CallWithoutYield) { TEST_P(CypherMainVisitorTest, CallWithMemoryLimitWithoutYield) { auto &ast_generator = *GetParam(); - auto *query = dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() MEMORY LIMIT 32 KB")); + auto *query = + dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() PROCEDURE MEMORY LIMIT 32 KB")); ASSERT_TRUE(query); ASSERT_TRUE(query->single_query_); auto *single_query = query->single_query_; @@ -2747,7 +2749,7 @@ TEST_P(CypherMainVisitorTest, CallWithMemoryLimitWithoutYield) { TEST_P(CypherMainVisitorTest, CallWithMemoryUnlimitedWithoutYield) { auto &ast_generator = *GetParam(); - auto *query = dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() MEMORY UNLIMITED")); + auto *query = dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() PROCEDURE MEMORY UNLIMITED")); ASSERT_TRUE(query); ASSERT_TRUE(query->single_query_); auto *single_query = query->single_query_; @@ -2763,8 +2765,8 @@ TEST_P(CypherMainVisitorTest, CallWithMemoryUnlimitedWithoutYield) { TEST_P(CypherMainVisitorTest, CallProcedureWithMemoryLimit) { auto &ast_generator = *GetParam(); - auto *query = - dynamic_cast(ast_generator.ParseQuery("CALL proc.with.dots() MEMORY LIMIT 32 MB YIELD res")); + auto *query = dynamic_cast( + ast_generator.ParseQuery("CALL proc.with.dots() PROCEDURE MEMORY LIMIT 32 MB YIELD res")); ASSERT_TRUE(query); ASSERT_TRUE(query->single_query_); auto *single_query = query->single_query_; @@ -2788,8 +2790,8 @@ TEST_P(CypherMainVisitorTest, CallProcedureWithMemoryLimit) { TEST_P(CypherMainVisitorTest, CallProcedureWithMemoryUnlimited) { auto &ast_generator = *GetParam(); - auto *query = - dynamic_cast(ast_generator.ParseQuery("CALL proc.with.dots() MEMORY UNLIMITED YIELD res")); + auto *query = dynamic_cast( + ast_generator.ParseQuery("CALL proc.with.dots() PROCEDURE MEMORY UNLIMITED YIELD res")); ASSERT_TRUE(query); ASSERT_TRUE(query->single_query_); auto *single_query = query->single_query_; @@ -2822,12 +2824,10 @@ TEST_P(CypherMainVisitorTest, IncorrectCallProcedure) { ASSERT_THROW(ast_generator.ParseQuery("RETURN 42, CALL procedure() YIELD"), SyntaxException); ASSERT_THROW(ast_generator.ParseQuery("RETURN 42, CALL procedure() YIELD res"), SyntaxException); ASSERT_THROW(ast_generator.ParseQuery("RETURN 42 AS x CALL procedure() YIELD res"), SemanticException); - ASSERT_THROW(ast_generator.ParseQuery("CALL proc.with.dots() YIELD res MEMORY UNLIMITED"), SyntaxException); - ASSERT_THROW(ast_generator.ParseQuery("CALL proc.with.dots() YIELD res MEMORY LIMIT 32 KB"), SyntaxException); ASSERT_THROW(ast_generator.ParseQuery("CALL proc.with.dots() MEMORY YIELD res"), SyntaxException); // mg.procedures returns something, so it needs to have a YIELD. ASSERT_THROW(ast_generator.ParseQuery("CALL mg.procedures()"), SemanticException); - ASSERT_THROW(ast_generator.ParseQuery("CALL mg.procedures() MEMORY UNLIMITED"), SemanticException); + ASSERT_THROW(ast_generator.ParseQuery("CALL mg.procedures() PROCEDURE MEMORY UNLIMITED"), SemanticException); // TODO: Implement support for the following syntax. These are defined in // Neo4j and accepted in openCypher CIP. ASSERT_THROW(ast_generator.ParseQuery("CALL proc"), SyntaxException); @@ -2935,4 +2935,115 @@ TEST_P(CypherMainVisitorTest, TestLoadCsvClause) { } } +TEST_P(CypherMainVisitorTest, MemoryLimit) { + auto &ast_generator = *GetParam(); + + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUE"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEM"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIM"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT KB"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT 12GB"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("QUERY MEMORY LIMIT 12KB RETURN x"), SyntaxException); + + { + auto *query = dynamic_cast(ast_generator.ParseQuery("RETURN x")); + ASSERT_TRUE(query); + ASSERT_FALSE(query->memory_limit_); + } + + { + auto *query = dynamic_cast(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT 12KB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 12); + ASSERT_EQ(query->memory_scale_, 1024U); + } + + { + auto *query = dynamic_cast(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT 12MB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 12); + ASSERT_EQ(query->memory_scale_, 1024U * 1024U); + } + + { + auto *query = dynamic_cast( + ast_generator.ParseQuery("CALL mg.procedures() YIELD x RETURN x QUERY MEMORY LIMIT 12MB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 12); + ASSERT_EQ(query->memory_scale_, 1024U * 1024U); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 2U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + CheckCallProcedureDefaultMemoryLimit(ast_generator, *call_proc); + } + + { + auto *query = dynamic_cast(ast_generator.ParseQuery( + "CALL mg.procedures() PROCEDURE MEMORY LIMIT 3KB YIELD x RETURN x QUERY MEMORY LIMIT 12MB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 12); + ASSERT_EQ(query->memory_scale_, 1024U * 1024U); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 2U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + ASSERT_TRUE(call_proc->memory_limit_); + ast_generator.CheckLiteral(call_proc->memory_limit_, 3); + ASSERT_EQ(call_proc->memory_scale_, 1024U); + } + + { + auto *query = dynamic_cast( + ast_generator.ParseQuery("CALL mg.procedures() PROCEDURE MEMORY LIMIT 3KB YIELD x RETURN x")); + ASSERT_TRUE(query); + ASSERT_FALSE(query->memory_limit_); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 2U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + ASSERT_TRUE(call_proc->memory_limit_); + ast_generator.CheckLiteral(call_proc->memory_limit_, 3); + ASSERT_EQ(call_proc->memory_scale_, 1024U); + } + + { + auto *query = + dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() PROCEDURE MEMORY LIMIT 3KB")); + ASSERT_TRUE(query); + ASSERT_FALSE(query->memory_limit_); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 1U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + ASSERT_TRUE(call_proc->memory_limit_); + ast_generator.CheckLiteral(call_proc->memory_limit_, 3); + ASSERT_EQ(call_proc->memory_scale_, 1024U); + } + + { + auto *query = dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() QUERY MEMORY LIMIT 3KB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 3); + ASSERT_EQ(query->memory_scale_, 1024U); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 1U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + CheckCallProcedureDefaultMemoryLimit(ast_generator, *call_proc); + } +} } // namespace From e8e4cd7f977f9ab459bfb3da03d12a2750f3ffe8 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Fri, 19 Mar 2021 08:43:48 +0100 Subject: [PATCH 13/63] Memory control e2e test (#115) * Add memory control e2e test * Fix cmake for jemalloc --- .github/workflows/diff.yaml | 8 ++++ .github/workflows/release_centos8.yaml | 8 ++++ .github/workflows/release_debian10.yaml | 8 ++++ .github/workflows/release_ubuntu2004.yaml | 8 ++++ libs/jemalloc.cmake | 2 +- tests/e2e/CMakeLists.txt | 2 + tests/e2e/memory/CMakeLists.txt | 2 + tests/e2e/memory/memory_control.cpp | 56 +++++++++++++++++++++++ tests/e2e/memory/workloads.yaml | 15 ++++++ tests/e2e/runner.py | 2 +- 10 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 tests/e2e/memory/CMakeLists.txt create mode 100644 tests/e2e/memory/memory_control.cpp create mode 100644 tests/e2e/memory/workloads.yaml diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 10bf2de97..50ede0351 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -299,6 +299,14 @@ jobs: cd e2e LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml + - name: Run e2e memory control tests + run: | + cd tests + ./setup.sh + source ve3/bin/activate + cd e2e + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path memory/workloads.yaml + - name: Run stress test (plain) run: | cd tests/stress diff --git a/.github/workflows/release_centos8.yaml b/.github/workflows/release_centos8.yaml index 49584f5bd..b7433416d 100644 --- a/.github/workflows/release_centos8.yaml +++ b/.github/workflows/release_centos8.yaml @@ -292,6 +292,14 @@ jobs: cd e2e LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml + - name: Run e2e memory control tests + run: | + cd tests + ./setup.sh + source ve3/bin/activate + cd e2e + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path memory/workloads.yaml + - name: Run stress test (plain) run: | cd tests/stress diff --git a/.github/workflows/release_debian10.yaml b/.github/workflows/release_debian10.yaml index 7a728fef8..da94818ea 100644 --- a/.github/workflows/release_debian10.yaml +++ b/.github/workflows/release_debian10.yaml @@ -290,6 +290,14 @@ jobs: cd e2e LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml + - name: Run e2e memory control tests + run: | + cd tests + ./setup.sh + source ve3/bin/activate + cd e2e + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path memory/workloads.yaml + - name: Run stress test (plain) run: | cd tests/stress diff --git a/.github/workflows/release_ubuntu2004.yaml b/.github/workflows/release_ubuntu2004.yaml index 90386fce5..41421b5a7 100644 --- a/.github/workflows/release_ubuntu2004.yaml +++ b/.github/workflows/release_ubuntu2004.yaml @@ -290,6 +290,14 @@ jobs: cd e2e LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml + - name: Run e2e memory control tests + run: | + cd tests + ./setup.sh + source ve3/bin/activate + cd e2e + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path memory/workloads.yaml + - name: Run stress test (plain) run: | cd tests/stress diff --git a/libs/jemalloc.cmake b/libs/jemalloc.cmake index 4625f98bc..390b92a44 100644 --- a/libs/jemalloc.cmake +++ b/libs/jemalloc.cmake @@ -47,6 +47,6 @@ endif() target_compile_options(jemalloc PRIVATE -Wno-redundant-decls) # for RTLD_NEXT -target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE) +target_compile_definitions(jemalloc PRIVATE _GNU_SOURCE) set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1) diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt index 29749fd19..9d6d7bd8e 100644 --- a/tests/e2e/CMakeLists.txt +++ b/tests/e2e/CMakeLists.txt @@ -1 +1,3 @@ add_subdirectory(replication) + +add_subdirectory(memory) diff --git a/tests/e2e/memory/CMakeLists.txt b/tests/e2e/memory/CMakeLists.txt new file mode 100644 index 000000000..4e258f61a --- /dev/null +++ b/tests/e2e/memory/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(memgraph__e2e__memory__control memory_control.cpp) +target_link_libraries(memgraph__e2e__memory__control gflags mgclient mg-utils mg-io Threads::Threads) diff --git a/tests/e2e/memory/memory_control.cpp b/tests/e2e/memory/memory_control.cpp new file mode 100644 index 000000000..537fba5e7 --- /dev/null +++ b/tests/e2e/memory/memory_control.cpp @@ -0,0 +1,56 @@ +#include +#include + +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); +DEFINE_uint64(timeout, 120, "Timeout seconds"); + +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph E2E Memory Control"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + if (!client) { + LOG_FATAL("Failed to connect!"); + } + + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + + const auto *create_query = "UNWIND range(1, 50) as u CREATE (n {string: \"Some longer string\"}) RETURN n;"; + + utils::Timer timer; + while (true) { + if (timer.Elapsed>().count() > FLAGS_timeout) { + LOG_FATAL("The test timed out"); + } + client->Execute(create_query); + if (!client->FetchOne()) { + break; + } + client->DiscardAll(); + } + + spdlog::info("Memgraph is out of memory"); + + spdlog::info("Cleaning up unused memory"); + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + client->Execute("FREE MEMORY;"); + client->DiscardAll(); + + // now it should succeed + spdlog::info("Retrying the query with the memory cleaned up"); + client->Execute(create_query); + if (!client->FetchOne()) { + LOG_FATAL("Memgraph is still out of memory"); + } + + return 0; +} diff --git a/tests/e2e/memory/workloads.yaml b/tests/e2e/memory/workloads.yaml new file mode 100644 index 000000000..147b87b04 --- /dev/null +++ b/tests/e2e/memory/workloads.yaml @@ -0,0 +1,15 @@ +bolt_port: &bolt_port "7687" +template_cluster: &template_cluster + cluster: + main: + args: ["--bolt-port", *bolt_port, "--memory-limit=500", "--storage-gc-cycle-sec=180"] + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Memory control" + binary: "tests/e2e/memory/memgraph__e2e__memory__control" + args: ["--bolt-port", *bolt_port, "--timeout", "180"] + <<: *template_cluster + + diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index d1d29ac23..453c8d3de 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -12,7 +12,7 @@ PROJECT_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..")) BUILD_DIR = os.path.join(PROJECT_DIR, "build") MEMGRAPH_BINARY = os.path.join(BUILD_DIR, "memgraph") -log = logging.getLogger("memgraph.tests.e2e.replication") +log = logging.getLogger("memgraph.tests.e2e") def load_args(): From ee555b0c0de469f5a61008a92a72882ce06ad97f Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Wed, 24 Mar 2021 12:24:59 +0100 Subject: [PATCH 14/63] Link jemalloc with Threads lib (#122) --- libs/jemalloc.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/jemalloc.cmake b/libs/jemalloc.cmake index 390b92a44..4e95c55e6 100644 --- a/libs/jemalloc.cmake +++ b/libs/jemalloc.cmake @@ -39,6 +39,9 @@ set(JEMALLOC_SRCS add_library(jemalloc ${JEMALLOC_SRCS}) target_include_directories(jemalloc PUBLIC "${JEMALLOC_DIR}/include") +find_package(Threads REQUIRED) +target_link_libraries(jemalloc PUBLIC Threads::Threads) + target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") From 60da033010fe2bb358a050e00bd660f416ca4cd4 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Thu, 25 Mar 2021 08:19:00 +0100 Subject: [PATCH 15/63] Use maximum number of threads for test (#124) --- .github/workflows/diff.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 50ede0351..67be98576 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -42,7 +42,7 @@ jobs: # Run unit tests. cd build - ctest -R memgraph__unit --output-on-failure + ctest -R memgraph__unit --output-on-failure -j$THREADS - name: Run stress test run: | @@ -99,7 +99,7 @@ jobs: # Run unit tests. cd build - ctest -R memgraph__unit --output-on-failure + ctest -R memgraph__unit --output-on-failure -j$THREADS - name: Compute code coverage run: | From 50ddd594501fc5cb3564a30c9682181b759e7145 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Fri, 26 Mar 2021 12:05:58 +0100 Subject: [PATCH 16/63] Fix Centos8 build (#126) * Increase server wait * Use standard shebang for python scripts --- src/auth/reference_modules/example.py | 2 +- src/auth/reference_modules/ldap.py | 2 +- tests/integration/ldap/runner.py | 2 +- tests/integration/telemetry/runner.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/auth/reference_modules/example.py b/src/auth/reference_modules/example.py index e1898f4f6..d40f34892 100755 --- a/src/auth/reference_modules/example.py +++ b/src/auth/reference_modules/example.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/python3 import json import io diff --git a/src/auth/reference_modules/ldap.py b/src/auth/reference_modules/ldap.py index ac090a09c..761db8fd6 100755 --- a/src/auth/reference_modules/ldap.py +++ b/src/auth/reference_modules/ldap.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/python3 import json import io import ssl diff --git a/tests/integration/ldap/runner.py b/tests/integration/ldap/runner.py index 33f575535..f5f8b4c5b 100755 --- a/tests/integration/ldap/runner.py +++ b/tests/integration/ldap/runner.py @@ -73,7 +73,7 @@ class Memgraph: virtualenv_bin = os.path.join(SCRIPT_DIR, "ve3", "bin", "python3") with open(script_file) as fin: data = fin.read() - data = data.replace("/usr/bin/env python3", virtualenv_bin) + data = data.replace("/usr/bin/python3", virtualenv_bin) data = data.replace("/etc/memgraph/auth/ldap.yaml", self._auth_config) with open(self._auth_module, "w") as fout: diff --git a/tests/integration/telemetry/runner.py b/tests/integration/telemetry/runner.py index da958489d..766ba9f43 100755 --- a/tests/integration/telemetry/runner.py +++ b/tests/integration/telemetry/runner.py @@ -45,7 +45,7 @@ def execute_test(**kwargs): server = None if start_server: server = subprocess.Popen(list(map(str, server_args))) - time.sleep(0.1) + time.sleep(0.4) assert server.poll() is None, "Server process died prematurely!" try: From 06f761bdf9098b5dfabe41becd292ea7885a8374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 26 Mar 2021 15:02:35 +0100 Subject: [PATCH 17/63] Add logs for loading snapshot and WAL files (#121) * Add logs for loading snapshot and WAL files --- src/storage/v2/durability/durability.cpp | 44 ++++++++++++++++--- src/storage/v2/durability/snapshot.cpp | 55 ++++++++++++++++++++++-- src/storage/v2/durability/wal.cpp | 10 ++++- src/utils/logging.hpp | 2 +- tests/e2e/replication/constraints.cpp | 2 +- 5 files changed, 101 insertions(+), 12 deletions(-) diff --git a/src/storage/v2/durability/durability.cpp b/src/storage/v2/durability/durability.cpp index 4fddeaffd..f904099b7 100644 --- a/src/storage/v2/durability/durability.cpp +++ b/src/storage/v2/durability/durability.cpp @@ -102,30 +102,47 @@ std::optional> GetWalFiles(const std::filesystem: // recovery process. void RecoverIndicesAndConstraints(const RecoveredIndicesAndConstraints &indices_constraints, Indices *indices, Constraints *constraints, utils::SkipList *vertices) { + spdlog::info("Recreating indices from metadata."); // Recover label indices. + spdlog::info("Recreating {} label indices from metadata.", indices_constraints.indices.label.size()); for (const auto &item : indices_constraints.indices.label) { if (!indices->label_index.CreateIndex(item, vertices->access())) throw RecoveryFailure("The label index must be created here!"); + spdlog::info("A label index is recreated from metadata."); } + spdlog::info("Label indices are recreated."); // Recover label+property indices. + spdlog::info("Recreating {} label+property indices from metadata.", + indices_constraints.indices.label_property.size()); for (const auto &item : indices_constraints.indices.label_property) { if (!indices->label_property_index.CreateIndex(item.first, item.second, vertices->access())) throw RecoveryFailure("The label+property index must be created here!"); + spdlog::info("A label+property index is recreated from metadata."); } + spdlog::info("Label+property indices are recreated."); + spdlog::info("Indices are recreated."); + spdlog::info("Recreating constraints from metadata."); // Recover existence constraints. + spdlog::info("Recreating {} existence constraints from metadata.", indices_constraints.constraints.existence.size()); for (const auto &item : indices_constraints.constraints.existence) { auto ret = CreateExistenceConstraint(constraints, item.first, item.second, vertices->access()); if (ret.HasError() || !ret.GetValue()) throw RecoveryFailure("The existence constraint must be created here!"); + spdlog::info("A existence constraint is recreated from metadata."); } + spdlog::info("Existence constraints are recreated from metadata."); // Recover unique constraints. + spdlog::info("Recreating {} unique constraints from metadata.", indices_constraints.constraints.unique.size()); for (const auto &item : indices_constraints.constraints.unique) { auto ret = constraints->unique_constraints.CreateConstraint(item.first, item.second, vertices->access()); if (ret.HasError() || ret.GetValue() != UniqueConstraints::CreationStatus::SUCCESS) throw RecoveryFailure("The unique constraint must be created here!"); + spdlog::info("A unique constraint is recreated from metadata."); } + spdlog::info("Unique constraints are recreated from metadata."); + spdlog::info("Constraints are recreated from metadata."); } std::optional RecoverData(const std::filesystem::path &snapshot_directory, @@ -137,7 +154,12 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di Indices *indices, Constraints *constraints, Config::Items items, uint64_t *wal_seq_num) { utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; - if (!utils::DirExists(snapshot_directory) && !utils::DirExists(wal_directory)) return std::nullopt; + spdlog::info("Recovering persisted data using snapshot ({}) and WAL directory ({}).", snapshot_directory, + wal_directory); + if (!utils::DirExists(snapshot_directory) && !utils::DirExists(wal_directory)) { + spdlog::warn("Snapshot or WAL directory don't exist, there is nothing to recover."); + return std::nullopt; + } auto snapshot_files = GetSnapshotFiles(snapshot_directory); @@ -145,6 +167,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di RecoveredIndicesAndConstraints indices_constraints; std::optional snapshot_timestamp; if (!snapshot_files.empty()) { + spdlog::info("Try recovering from snapshot directory {}.", snapshot_directory); // Order the files by name std::sort(snapshot_files.begin(), snapshot_files.end()); @@ -157,13 +180,13 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di spdlog::warn("The snapshot file {} isn't related to the latest snapshot file!", path); continue; } - spdlog::info("Starting snapshot recovery from {}", path); + spdlog::info("Starting snapshot recovery from {}.", path); try { recovered_snapshot = LoadSnapshot(path, vertices, edges, epoch_history, name_id_mapper, edge_count, items); spdlog::info("Snapshot recovery successful!"); break; } catch (const RecoveryFailure &e) { - spdlog::warn("Couldn't recover snapshot from {} because of: {}", path, e.what()); + spdlog::warn("Couldn't recover snapshot from {} because of: {}.", path, e.what()); continue; } } @@ -181,6 +204,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di return recovered_snapshot->recovery_info; } } else { + spdlog::info("No snapshot file was found, collecting information from WAL directory {}.", wal_directory); std::error_code error_code; if (!utils::DirExists(wal_directory)) return std::nullopt; // We use this smaller struct that contains only a subset of information @@ -206,7 +230,10 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di } } MG_ASSERT(!error_code, "Couldn't recover data because an error occurred: {}!", error_code.message()); - if (wal_files.empty()) return std::nullopt; + if (wal_files.empty()) { + spdlog::warn("No snapshot or WAL file found!"); + return std::nullopt; + } std::sort(wal_files.begin(), wal_files.end()); // UUID used for durability is the UUID of the last WAL file. // Same for the epoch id. @@ -215,7 +242,10 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di } auto maybe_wal_files = GetWalFiles(wal_directory, *uuid); - if (!maybe_wal_files) return std::nullopt; + if (!maybe_wal_files) { + spdlog::warn("Couldn't get WAL file info from the WAL directory!"); + return std::nullopt; + } // Array of all discovered WAL files, ordered by sequence number. auto &wal_files = *maybe_wal_files; @@ -232,6 +262,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di "files that match the last WAL file!"); if (!wal_files.empty()) { + spdlog::info("Checking WAL files."); { const auto &first_wal = wal_files[0]; if (first_wal.seq_num != 0) { @@ -255,6 +286,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di } std::optional previous_seq_num; auto last_loaded_timestamp = snapshot_timestamp; + spdlog::info("Trying to load WAL files."); for (auto &wal_file : wal_files) { if (previous_seq_num && (wal_file.seq_num - *previous_seq_num) > 1) { LOG_FATAL("You are missing a WAL file with the sequence number {}!", *previous_seq_num + 1); @@ -290,6 +322,8 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di // The sequence number needs to be recovered even though `LoadWal` didn't // load any deltas from that file. *wal_seq_num = *previous_seq_num + 1; + + spdlog::info("All necessary WAL files are loaded successfully."); } RecoverIndicesAndConstraints(indices_constraints, indices, constraints, vertices); diff --git a/src/storage/v2/durability/snapshot.cpp b/src/storage/v2/durability/snapshot.cpp index 171645b59..db2d82a6c 100644 --- a/src/storage/v2/durability/snapshot.cpp +++ b/src/storage/v2/durability/snapshot.cpp @@ -168,14 +168,15 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis }); // Read snapshot info. - auto info = ReadSnapshotInfo(path); - + const auto info = ReadSnapshotInfo(path); + spdlog::info("Recovering {} vertices and {} edges.", info.vertices_count, info.edges_count); // Check for edges. bool snapshot_has_edges = info.offset_edges != 0; // Recover mapper. std::unordered_map snapshot_id_map; { + spdlog::info("Recovering mapper metadata."); if (!snapshot.SetPosition(info.offset_mapper)) throw RecoveryFailure("Couldn't read data from snapshot!"); auto marker = snapshot.ReadMarker(); @@ -191,6 +192,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!name) throw RecoveryFailure("Invalid snapshot data!"); auto my_id = name_id_mapper->NameToId(*name); snapshot_id_map.emplace(*id, my_id); + SPDLOG_TRACE("Mapping \"{}\"from snapshot id {} to actual id {}.", *name, *id, my_id); } } auto get_label_from_id = [&snapshot_id_map](uint64_t snapshot_id) { @@ -217,10 +219,11 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis auto edge_acc = edges->access(); uint64_t last_edge_gid = 0; if (snapshot_has_edges) { + spdlog::info("Recovering {} edges.", info.edges_count); if (!snapshot.SetPosition(info.offset_edges)) throw RecoveryFailure("Couldn't read data from snapshot!"); for (uint64_t i = 0; i < info.edges_count; ++i) { { - auto marker = snapshot.ReadMarker(); + const auto marker = snapshot.ReadMarker(); if (!marker || *marker != Marker::SECTION_EDGE) throw RecoveryFailure("Invalid snapshot data!"); } @@ -230,6 +233,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!gid) throw RecoveryFailure("Invalid snapshot data!"); if (i > 0 && *gid <= last_edge_gid) throw RecoveryFailure("Invalid snapshot data!"); last_edge_gid = *gid; + spdlog::debug("Recovering edge {} with properties.", *gid); auto [it, inserted] = edge_acc.insert(Edge{Gid::FromUint(*gid), nullptr}); if (!inserted) throw RecoveryFailure("The edge must be inserted here!"); @@ -243,6 +247,8 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!key) throw RecoveryFailure("Invalid snapshot data!"); auto value = snapshot.ReadPropertyValue(); if (!value) throw RecoveryFailure("Invalid snapshot data!"); + SPDLOG_TRACE("Recovered property \"{}\" with value \"{}\" for edge {}.", + name_id_mapper->IdToName(snapshot_id_map.at(*key)), *value, *gid); props.SetProperty(get_property_from_id(*key), *value); } } @@ -253,6 +259,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (i > 0 && *gid <= last_edge_gid) throw RecoveryFailure("Invalid snapshot data!"); last_edge_gid = *gid; + spdlog::debug("Ensuring edge {} doesn't have any properties.", *gid); // Read properties. { auto props_size = snapshot.ReadUint(); @@ -264,12 +271,14 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis } } } + spdlog::info("Edges are recovered."); } // Recover vertices (labels and properties). if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!"); auto vertex_acc = vertices->access(); uint64_t last_vertex_gid = 0; + spdlog::info("Recovering {} vertices.", info.vertices_count); for (uint64_t i = 0; i < info.vertices_count; ++i) { { auto marker = snapshot.ReadMarker(); @@ -283,10 +292,12 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis throw RecoveryFailure("Invalid snapshot data!"); } last_vertex_gid = *gid; + spdlog::debug("Recovering vertex {}.", *gid); auto [it, inserted] = vertex_acc.insert(Vertex{Gid::FromUint(*gid), nullptr}); if (!inserted) throw RecoveryFailure("The vertex must be inserted here!"); // Recover labels. + spdlog::trace("Recovering labels for vertex {}.", *gid); { auto labels_size = snapshot.ReadUint(); if (!labels_size) throw RecoveryFailure("Invalid snapshot data!"); @@ -295,11 +306,14 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis for (uint64_t j = 0; j < *labels_size; ++j) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); + SPDLOG_TRACE("Recovered label \"{}\" for vertex {}.", name_id_mapper->IdToName(snapshot_id_map.at(*label)), + *gid); labels.emplace_back(get_label_from_id(*label)); } } // Recover properties. + spdlog::trace("Recovering properties for vertex {}.", *gid); { auto props_size = snapshot.ReadUint(); if (!props_size) throw RecoveryFailure("Invalid snapshot data!"); @@ -309,6 +323,8 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!key) throw RecoveryFailure("Invalid snapshot data!"); auto value = snapshot.ReadPropertyValue(); if (!value) throw RecoveryFailure("Invalid snapshot data!"); + SPDLOG_TRACE("Recovered property \"{}\" with value \"{}\" for vertex {}.", + name_id_mapper->IdToName(snapshot_id_map.at(*key)), *value, *gid); props.SetProperty(get_property_from_id(*key), *value); } } @@ -339,8 +355,10 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!edge_type) throw RecoveryFailure("Invalid snapshot data!"); } } + spdlog::info("Vertices are recovered."); // Recover vertices (in/out edges). + spdlog::info("Recovering connectivity."); if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!"); for (auto &vertex : vertex_acc) { { @@ -348,6 +366,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!marker || *marker != Marker::SECTION_VERTEX) throw RecoveryFailure("Invalid snapshot data!"); } + spdlog::trace("Recovering connectivity for vertex {}.", vertex.gid.AsUint()); // Check vertex. auto gid = snapshot.ReadUint(); if (!gid) throw RecoveryFailure("Invalid snapshot data!"); @@ -377,6 +396,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis // Recover in edges. { + spdlog::trace("Recovering inbound edges for vertex {}.", vertex.gid.AsUint()); auto in_size = snapshot.ReadUint(); if (!in_size) throw RecoveryFailure("Invalid snapshot data!"); vertex.in_edges.reserve(*in_size); @@ -404,12 +424,15 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis edge_ref = EdgeRef(&*edge); } } + SPDLOG_TRACE("Recovered inbound edge {} with label \"{}\" from vertex {}.", *edge_gid, + name_id_mapper->IdToName(snapshot_id_map.at(*edge_type)), from_vertex->gid.AsUint()); vertex.in_edges.emplace_back(get_edge_type_from_id(*edge_type), &*from_vertex, edge_ref); } } // Recover out edges. { + spdlog::trace("Recovering outbound edges for vertex {}.", vertex.gid.AsUint()); auto out_size = snapshot.ReadUint(); if (!out_size) throw RecoveryFailure("Invalid snapshot data!"); vertex.out_edges.reserve(*out_size); @@ -437,6 +460,8 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis edge_ref = EdgeRef(&*edge); } } + SPDLOG_TRACE("Recovered outbound edge {} with label \"{}\" to vertex {}.", *edge_gid, + name_id_mapper->IdToName(snapshot_id_map.at(*edge_type)), to_vertex->gid.AsUint()); vertex.out_edges.emplace_back(get_edge_type_from_id(*edge_type), &*to_vertex, edge_ref); } // Increment edge count. We only increment the count here because the @@ -444,6 +469,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis edge_count->fetch_add(*out_size, std::memory_order_acq_rel); } } + spdlog::info("Connectivity is recovered."); // Set initial values for edge/vertex ID generators. ret.next_edge_id = last_edge_gid + 1; @@ -452,6 +478,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis // Recover indices. { + spdlog::info("Recovering metadata of indices."); if (!snapshot.SetPosition(info.offset_indices)) throw RecoveryFailure("Couldn't read data from snapshot!"); auto marker = snapshot.ReadMarker(); @@ -461,18 +488,22 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis { auto size = snapshot.ReadUint(); if (!size) throw RecoveryFailure("Invalid snapshot data!"); + spdlog::info("Recovering metadata of {} label indices.", *size); for (uint64_t i = 0; i < *size; ++i) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); AddRecoveredIndexConstraint(&indices_constraints.indices.label, get_label_from_id(*label), "The label index already exists!"); + SPDLOG_TRACE("Recovered metadata of label index for :{}", name_id_mapper->IdToName(snapshot_id_map.at(*label))); } + spdlog::info("Metadata of label indices are recovered."); } // Recover label+property indices. { auto size = snapshot.ReadUint(); if (!size) throw RecoveryFailure("Invalid snapshot data!"); + spdlog::info("Recovering metadata of {} label+property indices.", *size); for (uint64_t i = 0; i < *size; ++i) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); @@ -481,12 +512,18 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis AddRecoveredIndexConstraint(&indices_constraints.indices.label_property, {get_label_from_id(*label), get_property_from_id(*property)}, "The label+property index already exists!"); + SPDLOG_TRACE("Recovered metadata of label+property index for :{}({})", + name_id_mapper->IdToName(snapshot_id_map.at(*label)), + name_id_mapper->IdToName(snapshot_id_map.at(*property))); } + spdlog::info("Metadata of label+property indices are recovered."); } + spdlog::info("Metadata of indices are recovered."); } // Recover constraints. { + spdlog::info("Recovering metadata of constraints."); if (!snapshot.SetPosition(info.offset_constraints)) throw RecoveryFailure("Couldn't read data from snapshot!"); auto marker = snapshot.ReadMarker(); @@ -496,6 +533,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis { auto size = snapshot.ReadUint(); if (!size) throw RecoveryFailure("Invalid snapshot data!"); + spdlog::info("Recovering metadata of {} existence constraints.", *size); for (uint64_t i = 0; i < *size; ++i) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); @@ -504,7 +542,11 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis AddRecoveredIndexConstraint(&indices_constraints.constraints.existence, {get_label_from_id(*label), get_property_from_id(*property)}, "The existence constraint already exists!"); + SPDLOG_TRACE("Recovered metadata of existence constraint for :{}({})", + name_id_mapper->IdToName(snapshot_id_map.at(*label)), + name_id_mapper->IdToName(snapshot_id_map.at(*property))); } + spdlog::info("Metadata of existence constraints are recovered."); } // Recover unique constraints. @@ -513,6 +555,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (*version >= kUniqueConstraintVersion) { auto size = snapshot.ReadUint(); if (!size) throw RecoveryFailure("Invalid snapshot data!"); + spdlog::info("Recovering metadata of {} unique constraints.", *size); for (uint64_t i = 0; i < *size; ++i) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); @@ -526,10 +569,15 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis } AddRecoveredIndexConstraint(&indices_constraints.constraints.unique, {get_label_from_id(*label), properties}, "The unique constraint already exists!"); + SPDLOG_TRACE("Recovered metadata of unique constraints for :{}", + name_id_mapper->IdToName(snapshot_id_map.at(*label))); } + spdlog::info("Metadata of unique constraints are recovered."); } + spdlog::info("Metadata of constraints are recovered."); } + spdlog::info("Recovering metadata."); // Recover epoch history { if (!snapshot.SetPosition(info.offset_epoch_history)) throw RecoveryFailure("Couldn't read data from snapshot!"); @@ -555,6 +603,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis } } + spdlog::info("Metadata recovered."); // Recover timestamp. ret.next_timestamp = info.start_timestamp + 1; diff --git a/src/storage/v2/durability/wal.cpp b/src/storage/v2/durability/wal.cpp index 4f362bc3b..9f4bc46e7 100644 --- a/src/storage/v2/durability/wal.cpp +++ b/src/storage/v2/durability/wal.cpp @@ -610,6 +610,7 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst const std::optional last_loaded_timestamp, utils::SkipList *vertices, utils::SkipList *edges, NameIdMapper *name_id_mapper, std::atomic *edge_count, Config::Items items) { + spdlog::info("Trying to load WAL file {}.", path); RecoveryInfo ret; Decoder wal; @@ -622,13 +623,17 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst ret.last_commit_timestamp = info.to_timestamp; // Check timestamp. - if (last_loaded_timestamp && info.to_timestamp <= *last_loaded_timestamp) return ret; + if (last_loaded_timestamp && info.to_timestamp <= *last_loaded_timestamp) { + spdlog::info("Skip loading WAL file because it is too old."); + return ret; + } // Recover deltas. wal.SetPosition(info.offset_deltas); uint64_t deltas_applied = 0; auto edge_acc = edges->access(); auto vertex_acc = vertices->access(); + spdlog::info("WAL file contains {} deltas.", info.num_deltas); for (uint64_t i = 0; i < info.num_deltas; ++i) { // Read WAL delta header to find out the delta timestamp. auto timestamp = ReadWalDeltaHeader(&wal); @@ -839,7 +844,8 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst } } - spdlog::info("Applied {} deltas from WAL", deltas_applied, path); + spdlog::info("Applied {} deltas from WAL. Skipped {} deltas, because they were too old.", deltas_applied, + info.num_deltas - deltas_applied); return ret; } diff --git a/src/utils/logging.hpp b/src/utils/logging.hpp index 152917563..72bbff309 100644 --- a/src/utils/logging.hpp +++ b/src/utils/logging.hpp @@ -65,7 +65,7 @@ void Fatal(const char *msg, const Args &...msg_args) { do { \ spdlog::critical(__VA_ARGS__); \ std::terminate(); \ - } while (0); + } while (0) #ifndef NDEBUG #define DLOG_FATAL(...) LOG_FATAL(__VA_ARGS__) diff --git a/tests/e2e/replication/constraints.cpp b/tests/e2e/replication/constraints.cpp index 004fd0f79..2fcd6eb4e 100644 --- a/tests/e2e/replication/constraints.cpp +++ b/tests/e2e/replication/constraints.cpp @@ -39,7 +39,7 @@ int main(int argc, char **argv) { const auto label_name = (*data)[0][1].ValueString(); const auto property_name = (*data)[0][2].ValueList()[0].ValueString(); if (label_name != "Node" || property_name != "id") { - LOG_FATAL("{} does NOT hava valid constraint created.", database_endpoint) + LOG_FATAL("{} does NOT hava valid constraint created.", database_endpoint); } } else { LOG_FATAL("Unable to get CONSTRAINT INFO from {}", database_endpoint); From 92dfc93b209072d4e760cb07dc1a815093eed925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= Date: Fri, 26 Mar 2021 18:19:48 +0100 Subject: [PATCH 18/63] Update community license (#128) --- release/LICENSE_COMMUNITY.md | 99 +++++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 31 deletions(-) diff --git a/release/LICENSE_COMMUNITY.md b/release/LICENSE_COMMUNITY.md index a93a80d68..34a75c4d5 100644 --- a/release/LICENSE_COMMUNITY.md +++ b/release/LICENSE_COMMUNITY.md @@ -1,33 +1,40 @@ -# User License Agreement +# Memgraph Community User License Agreement -1. Description +This License Agreement governs your use of the Memgraph Community Release (the +"Software") and documentation ("Documentation"). -THIS LICENSE AGREEMENT GOVERNS LICENSEE’S USE OF THE MEMGRAPH COMMUNITY -RELEASE AND DOCUMENTATION. +BY DOWNLOADING AND/OR ACCESSING THIS SOFTWARE, YOU ("LICENSEE") AGREE TO THESE +TERMS. -2. License Grant +1. License Grant The Software and Documentation are provided to Licensee at no charge and are licensed, not sold to Licensee. No ownership of any part of the Software and Documentation is hereby transferred to Licensee. Subject to (i) the terms and -conditions of this License Agreement, (ii) any additional license restrictions -and parameters contained on Licensor’s quotation, website, or order form -(“Order Form”), Licensor hereby grants Licensee a personal, non-assignable, +conditions of this License Agreement, and (ii) any additional license +restrictions and parameters contained on Licensor’s quotation, website, or +order form, Licensor hereby grants Licensee a personal, non-assignable, non-transferable and non-exclusive license to install, access and use the Software (in object code form only) and Documentation for Licensee’s internal -business purposes only. All rights relating to the Software and Documentation -that are not expressly licensed in this License Agreement, whether now existing -or which may hereafter come into existence are reserved for Licensor. Licensee -shall not remove, obscure, or alter any proprietary rights notices (including -without limitation copyright and trademark notices), which may be affixed to or -contained within the Software or Documentation. +business purposes (including for use in a production environment) only. All +rights relating to the Software and Documentation that are not expressly +licensed in this License Agreement, whether now existing or which may hereafter +come into existence are reserved for Licensor. Licensee shall not remove, +obscure, or alter any proprietary rights notices (including without limitation +copyright and trademark notices), which may be affixed to or contained within +the Software or Documentation. -3. Restrictions +Licensor may terminate this License Agreement with immediate effect upon +written notice to the Licensee. Upon termination Licensee shall delete all +electronic copies of all or any part of the Software and/or the Documentation +resident in its systems or elsewhere. + +2. Restrictions Licensee will not, directly or indirectly, (a) copy the Software or Documentation in any manner or for any purpose; (b) install, access or use any component of the Software or Documentation for any purpose not expressly -granted in Section 2 above; (c) resell, distribute, publicly display or +granted in Section 1 above; (c) resell, distribute, publicly display or publicly perform the Software or Documentation or any component thereof, by transfer, lease, loan or any other means, or make it available for use by others in any time-sharing, service bureau or similar arrangement; (d) @@ -37,25 +44,55 @@ algorithms or techniques incorporated in the Software; (e) export the Software or Documentation in violation of any applicable laws or regulations; (f) modify, translate, adapt, or create derivative works from the Software or Documentation; (g) circumvent, disable or otherwise interfere with -security-related features of the Software or Documentation; (h) -reverse-engineer, disassemble, attempt to derive the source code; (i) use the +security-related features of the Software or Documentation; (h) use the Software or Documentation for any illegal purpose, in any manner that is inconsistent with the terms of this License Agreement, or to engage in illegal -activity; (j) remove or alter any trademark, logo, copyright or other +activity; (i) remove or alter any trademark, logo, copyright or other proprietary notices, legends, symbols or labels on, or embedded in, the -Software or Documentation; or (k) provide access to the Software or +Software or Documentation; or (j) provide access to the Software or Documentation to third parties. -4. Warranty Disclaimer +3. Warranty Disclaimer -THE MEMGRAPH COMMUNITY RELEASE AND DOCUMENTATION ARE PROVIDED “AS IS” FOR -DEVELOPMENT, TESTING AND EVALUATION PURPOSES ONLY. IT IS NOT LICENSED FOR -PRODUCTION USE AND LICENSOR MAKES NO AND DISCLAIMS ALL WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NONINFRINGEMENT OF -THIRD PARTIES’ INTELLECTUAL PROPERTY RIGHTS OR OTHER PROPRIETARY RIGHTS. -NEITHER THIS LICENSE AGREEMENT NOR ANY DOCUMENTATION FURNISHED UNDER IT IS -INTENDED TO EXPRESS OR IMPLY ANY WARRANTY THAT THE OPERATION OF THE SOFTWARE -WILL BE UNINTERRUPTED, TIMELY, OR ERROR-FREE. +THE SOFTWARE AND DOCUMENTATION ARE PROVIDED "AS IS" AND LICENSOR MAKES NO +WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON +INFRINGEMENT OF THIRD PARTIES’ INTELLECTUAL PROPERTY RIGHTS OR OTHER +PROPRIETARY RIGHTS. NEITHER THIS LICENSE AGREEMENT NOR ANY DOCUMENTATION +FURNISHED UNDER IT IS INTENDED TO EXPRESS OR IMPLY ANY WARRANTY THAT THE +OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED, TIMELY, OR ERROR-FREE. -BY DOWNLOADING AND/OR ACCESSING THIS SOFTWARE, YOU AGREE TO SUCH TERMS. +4. Limitation of Liability + +Licensor shall not in any circumstances be liable, whether in tort (including +for negligence or breach of statutory duty howsoever arising), contract, +misrepresentation (whether innocent or negligent) or otherwise for: loss of +profits, loss of business, depletion of goodwill or similar losses, loss of +anticipated savings, loss of goods, loss or corruption of data or computer +downtime, or any special, indirect, consequential or pure economic loss, costs, +damages, charges or expenses. + +Licensor's total aggregate liability in contract, tort (including without +limitation negligence or breach of statutory duty howsoever arising), +misrepresentation (whether innocent or negligent), restitution or otherwise, +arising in connection with the performance or contemplated performance of this +License Agreement shall in all circumstances be limited to GBP10.00 (ten pounds +sterling). + +Nothing in this License Agreement shall limit Licensor’s liability in the case +of death or personal injury caused by negligence, fraud, or fraudulent +misrepresentation, or where it otherwise cannot be limited by law. + +5. Technical Data + +Licensor may collect and use technical information (such as usage patterns) +gathered when the Licensee downloads and uses the Software. This is generally +statistical data which does not identify an identified or identifiable +individual. It may also include Licensee’s IP address which is personal data +and is processed in accordance with our Privacy Policy. We only use this +technical information to improve our products. + +6. Law and Jurisdiction + +This License Agreement is governed by the laws of England and is subject to the +non-exclusive jurisdiction of the courts of England. From 276e09d7d34f14191bae51f3404a1fd03c898eee Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Sat, 27 Mar 2021 09:47:41 +0100 Subject: [PATCH 19/63] Fix reading rows with empty columns at the end (#127) * Fix reading rows with empty columns at the end * Update CHANGELOG for the recovery logs --- CHANGELOG.md | 2 ++ src/utils/csv_parsing.cpp | 21 +++++++++++---------- tests/unit/utils_csv_parsing.cpp | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d166530e..c6a261446 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ * Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query. * Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for the entire query. It can be added only at the end of the entire Cypher query. +* Added logs for the different parts of the recovery process. `INFO`, `DEBUG` and `TRACE` level all contain + additional information that is printed out while the recovery is in progress. ### Bug Fixes diff --git a/src/utils/csv_parsing.cpp b/src/utils/csv_parsing.cpp index d3843b795..fb9dbd29e 100644 --- a/src/utils/csv_parsing.cpp +++ b/src/utils/csv_parsing.cpp @@ -61,12 +61,7 @@ void Reader::TryInitializeHeader() { const Reader::Header &Reader::GetHeader() const { return header_; } namespace { -enum class CsvParserState : uint8_t { - INITIAL_FIELD, - NEXT_FIELD, - QUOTING, - EXPECT_DELIMITER, -}; +enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE }; } // namespace @@ -89,7 +84,7 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) { std::string_view line_string_view = *maybe_line; - while (!line_string_view.empty()) { + while (state != CsvParserState::DONE && !line_string_view.empty()) { const auto c = line_string_view[0]; // Line feeds and carriage returns are ignored in CSVs. @@ -120,11 +115,11 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) { const auto delimiter_idx = line_string_view.find(*read_config_.delimiter); row.emplace_back(line_string_view.substr(0, delimiter_idx)); if (delimiter_idx == std::string_view::npos) { - line_string_view.remove_prefix(line_string_view.size()); + state = CsvParserState::DONE; } else { line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size()); + state = CsvParserState::NEXT_FIELD; } - state = CsvParserState::NEXT_FIELD; } break; } @@ -159,15 +154,21 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) { } break; } + case CsvParserState::DONE: { + LOG_FATAL("Invalid state of the CSV parser!"); + } } } } while (state == CsvParserState::QUOTING); switch (state) { case CsvParserState::INITIAL_FIELD: - case CsvParserState::NEXT_FIELD: + case CsvParserState::DONE: case CsvParserState::EXPECT_DELIMITER: break; + case CsvParserState::NEXT_FIELD: + row.emplace_back(""); + break; case CsvParserState::QUOTING: { return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE, "There is no more data left to load while inside a quoted string. " diff --git a/tests/unit/utils_csv_parsing.cpp b/tests/unit/utils_csv_parsing.cpp index 0305e3b05..34c7729b4 100644 --- a/tests/unit/utils_csv_parsing.cpp +++ b/tests/unit/utils_csv_parsing.cpp @@ -283,3 +283,35 @@ TEST_F(CsvReaderTest, MultilineQuotedString) { parsed_row = reader.GetNextRow(mem); ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline)); } + +TEST_F(CsvReaderTest, EmptyColumns) { + // create a file with all rows valid; + // parser should return 'std::nullopt' + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + std::vector> expected_rows{{"", "B", "C"}, {"A", "", "C"}, {"A", "B", ""}}; + + for (const auto &row : expected_rows) { + writer.WriteLine(CreateRow(row, delimiter)); + } + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = false; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg); + + for (const auto &expected_row : expected_rows) { + const auto pmr_expected_row = ToPmrColumns(expected_row); + const auto parsed_row = reader.GetNextRow(mem); + ASSERT_TRUE(parsed_row.has_value()); + ASSERT_EQ(*parsed_row, pmr_expected_row); + } +} From 6dd9d327216a549eda55642a306f873b5e29dd9e Mon Sep 17 00:00:00 2001 From: Josip Seljan <62958579+the-joksim@users.noreply.github.com> Date: Sun, 28 Mar 2021 09:27:02 +0200 Subject: [PATCH 20/63] Fix scoped profile operator names for ScanAll operators (#129) --- CHANGELOG.md | 3 ++- src/query/plan/operator.cpp | 23 ++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6a261446..d8e1af591 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ ### Major Feature and Improvements * Added replication to community version. -* Add support for multiple query modules directories at the same time. +* Added support for multiple query modules directories at the same time. You can now define multiple, comma-separated paths to directories from which the modules will be loaded using the `--query-modules-directory` flag. * Added support for programatically reading in data from CSV files through the @@ -30,6 +30,7 @@ * Fixed garbage collector by correctly marking the oldest current timestamp after the database was recovered using the durability files. * Fixed reloading of the modules with changed result names. +* Fixed profile query to show the correct name of the ScanAll operator variant. ## v1.3.0 diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 7511aab03..567cc04b4 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -324,11 +324,15 @@ VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(Frame &frame, Exec template class ScanAllCursor : public Cursor { public: - explicit ScanAllCursor(Symbol output_symbol, UniqueCursorPtr input_cursor, TVerticesFun get_vertices) - : output_symbol_(output_symbol), input_cursor_(std::move(input_cursor)), get_vertices_(std::move(get_vertices)) {} + explicit ScanAllCursor(Symbol output_symbol, UniqueCursorPtr input_cursor, TVerticesFun get_vertices, + const char *op_name) + : output_symbol_(output_symbol), + input_cursor_(std::move(input_cursor)), + get_vertices_(std::move(get_vertices)), + op_name_(op_name) {} bool Pull(Frame &frame, ExecutionContext &context) override { - SCOPED_PROFILE_OP("ScanAll"); + SCOPED_PROFILE_OP(op_name_); if (MustAbort(context)) throw HintedAbortError(); @@ -364,6 +368,7 @@ class ScanAllCursor : public Cursor { TVerticesFun get_vertices_; std::optional::type::value_type> vertices_; std::optional vertices_it_; + const char *op_name_; }; ScanAll::ScanAll(const std::shared_ptr &input, Symbol output_symbol, storage::View view) @@ -379,7 +384,7 @@ UniqueCursorPtr ScanAll::MakeCursor(utils::MemoryResource *mem) const { return std::make_optional(db->Vertices(view_)); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAll"); } std::vector ScanAll::ModifiedSymbols(const SymbolTable &table) const { @@ -402,7 +407,7 @@ UniqueCursorPtr ScanAllByLabel::MakeCursor(utils::MemoryResource *mem) const { return std::make_optional(db->Vertices(view_, label_)); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllByLabel"); } // TODO(buda): Implement ScanAllByLabelProperty operator to iterate over @@ -466,7 +471,7 @@ UniqueCursorPtr ScanAllByLabelPropertyRange::MakeCursor(utils::MemoryResource *m return std::make_optional(db->Vertices(view_, label_, property_, maybe_lower, maybe_upper)); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllByLabelPropertyRange"); } ScanAllByLabelPropertyValue::ScanAllByLabelPropertyValue(const std::shared_ptr &input, @@ -498,7 +503,7 @@ UniqueCursorPtr ScanAllByLabelPropertyValue::MakeCursor(utils::MemoryResource *m return std::make_optional(db->Vertices(view_, label_, property_, storage::PropertyValue(value))); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllByLabelPropertyValue"); } ScanAllByLabelProperty::ScanAllByLabelProperty(const std::shared_ptr &input, Symbol output_symbol, @@ -516,7 +521,7 @@ UniqueCursorPtr ScanAllByLabelProperty::MakeCursor(utils::MemoryResource *mem) c return std::make_optional(db->Vertices(view_, label_, property_)); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllByLabelProperty"); } ScanAllById::ScanAllById(const std::shared_ptr &input, Symbol output_symbol, Expression *expression, @@ -542,7 +547,7 @@ UniqueCursorPtr ScanAllById::MakeCursor(utils::MemoryResource *mem) const { return std::vector{*maybe_vertex}; }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllById"); } namespace { From 27f09e1c0a82ad6ac19c5dfb69f0b76f231abbb9 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Mon, 29 Mar 2021 16:40:55 +0200 Subject: [PATCH 21/63] Make LOAD CSV query part separator (#130) --- src/query/plan/preprocess.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/query/plan/preprocess.cpp b/src/query/plan/preprocess.cpp index 7b8e08975..b89071709 100644 --- a/src/query/plan/preprocess.cpp +++ b/src/query/plan/preprocess.cpp @@ -522,7 +522,8 @@ std::vector CollectSingleQueryParts(SymbolTable &symbol_table, query_part->merge_matching.emplace_back(Matching{}); AddMatching({merge->pattern_}, nullptr, symbol_table, storage, query_part->merge_matching.back()); } else if (utils::IsSubtype(*clause, With::kType) || utils::IsSubtype(*clause, query::Unwind::kType) || - utils::IsSubtype(*clause, query::CallProcedure::kType)) { + utils::IsSubtype(*clause, query::CallProcedure::kType) || + utils::IsSubtype(*clause, query::LoadCsv::kType)) { // This query part is done, continue with a new one. query_parts.emplace_back(SingleQueryPart{}); query_part = &query_parts.back(); From e8a41e44572fbb179813d19bb0a2462579d6d2aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= Date: Tue, 30 Mar 2021 12:08:51 +0200 Subject: [PATCH 22/63] Add automated build of all Memgraph packages (#123) --- .github/workflows/package_all.yaml | 248 ++++++++++++++++++ .github/workflows/release_debian10.yaml | 2 +- .github/workflows/release_ubuntu2004.yaml | 2 +- environment/os/centos-7.sh | 16 +- environment/os/centos-8.sh | 17 +- environment/os/debian-10.sh | 6 + environment/os/debian-9.sh | 6 + environment/os/template.sh | 6 + environment/os/ubuntu-18.04.sh | 6 + environment/os/ubuntu-20.04.sh | 6 + release/docker/memgraph_community.dockerfile | 1 + release/docker/memgraph_enterprise.dockerfile | 1 + release/get_version.py | 14 +- release/package/centos-7/Dockerfile | 12 + release/package/centos-8/Dockerfile | 12 + release/package/debian-10/Dockerfile | 15 ++ release/package/debian-9/Dockerfile | 15 ++ release/package/docker-compose.yml | 26 ++ release/package/run.sh | 146 +++++++++++ release/package/ubuntu-18.04/Dockerfile | 15 ++ release/package/ubuntu-20.04/Dockerfile | 15 ++ 21 files changed, 574 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/package_all.yaml create mode 100644 release/package/centos-7/Dockerfile create mode 100644 release/package/centos-8/Dockerfile create mode 100644 release/package/debian-10/Dockerfile create mode 100644 release/package/debian-9/Dockerfile create mode 100644 release/package/docker-compose.yml create mode 100755 release/package/run.sh create mode 100644 release/package/ubuntu-18.04/Dockerfile create mode 100644 release/package/ubuntu-20.04/Dockerfile diff --git a/.github/workflows/package_all.yaml b/.github/workflows/package_all.yaml new file mode 100644 index 000000000..656f1016a --- /dev/null +++ b/.github/workflows/package_all.yaml @@ -0,0 +1,248 @@ +name: Package All + +# TODO(gitbuda): Cleanup docker container if GHA job was canceled. + +on: workflow_dispatch + +jobs: + centos-7_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community centos-7 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: centos-7_community + path: build/output/centos-7/memgraph*.rpm + + centos-8_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community centos-8 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: centos-8_community + path: build/output/centos-8/memgraph*.rpm + + debian-9_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community debian-9 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: debian-9_community + path: build/output/debian-9/memgraph*.deb + + debian-10_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community debian-10 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: debian-10_community + path: build/output/debian-10/memgraph*.deb + + docker_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + cd release/package + ./run.sh package community debian-10 --for-docker + ./run.sh docker + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: docker_community + path: build/output/docker/memgraph*.tar.gz + + ubuntu-1804_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community ubuntu-18.04 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: ubuntu-1804_community + path: build/output/ubuntu-18.04/memgraph*.deb + + ubuntu-2004_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community ubuntu-20.04 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: ubuntu-2004_community + path: build/output/ubuntu-20.04/memgraph*.deb + + centos-7_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise centos-7 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: centos-7_enterprise + path: build/output/centos-7/memgraph*.rpm + + centos-8_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise centos-8 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: centos-8_enterprise + path: build/output/centos-8/memgraph*.rpm + + debian-9_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise debian-9 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: debian-9_enterprise + path: build/output/debian-9/memgraph*.deb + + debian-10_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise debian-10 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: debian-10_enterprise + path: build/output/debian-10/memgraph*.deb + + docker_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + cd release/package + ./run.sh package enterprise debian-10 --for-docker + ./run.sh docker + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: docker_enterprise + path: build/output/docker/memgraph*.tar.gz + + ubuntu-1804_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise ubuntu-18.04 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: ubuntu-1804_enterprise + path: build/output/ubuntu-18.04/memgraph*.deb + + ubuntu-2004_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise ubuntu-20.04 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: ubuntu-2004_enterprise + path: build/output/ubuntu-20.04/memgraph*.deb diff --git a/.github/workflows/release_debian10.yaml b/.github/workflows/release_debian10.yaml index da94818ea..d6c5991f4 100644 --- a/.github/workflows/release_debian10.yaml +++ b/.github/workflows/release_debian10.yaml @@ -1,4 +1,4 @@ -name: Release Debian10 +name: Release Debian 10 on: workflow_dispatch: diff --git a/.github/workflows/release_ubuntu2004.yaml b/.github/workflows/release_ubuntu2004.yaml index 41421b5a7..d69c2d038 100644 --- a/.github/workflows/release_ubuntu2004.yaml +++ b/.github/workflows/release_ubuntu2004.yaml @@ -1,4 +1,4 @@ -name: Release Ubuntu20.04 +name: Release Ubuntu 20.04 on: workflow_dispatch: diff --git a/environment/os/centos-7.sh b/environment/os/centos-7.sh index 3b381c97d..b5fa816c1 100755 --- a/environment/os/centos-7.sh +++ b/environment/os/centos-7.sh @@ -18,6 +18,7 @@ TOOLCHAIN_BUILD_DEPS=( libffi-devel libxml2-devel perl-Digest-MD5 # llvm libedit-devel pcre-devel automake bison # swig ) + TOOLCHAIN_RUN_DEPS=( make # generic build tools tar gzip bzip2 xz # used for archive unpacking @@ -26,6 +27,7 @@ TOOLCHAIN_RUN_DEPS=( readline # for cmake and llvm libffi libxml2 # for llvm ) + MEMGRAPH_BUILD_DEPS=( git # source code control make pkgconfig # build system @@ -48,9 +50,11 @@ MEMGRAPH_BUILD_DEPS=( which mono-complete dotnet-sdk-3.1 golang nodejs zip unzip java-11-openjdk-devel # for driver tests autoconf # for jemalloc code generation ) + list() { echo "$1" } + check() { local missing="" for pkg in $1; do @@ -75,16 +79,13 @@ check() { exit 1 fi } + install() { cd "$DIR" if [ "$EUID" -ne 0 ]; then echo "Please run as root." exit 1 fi - if [ "$SUDO_USER" == "" ]; then - echo "Please run as sudo." - exit 1 - fi # If GitHub Actions runner is installed, append LANG to the environment. # Python related tests doesn't work the LANG export. if [ -d "/home/gh/actions-runner" ]; then @@ -118,11 +119,16 @@ install() { continue fi if [ "$pkg" == PyYAML ]; then - sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML" + if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker). + pip3 install --user PyYAML + else # Running using sudo. + sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML" + fi continue fi yum install -y "$pkg" done } + deps=$2"[*]" "$1" "${!deps}" diff --git a/environment/os/centos-8.sh b/environment/os/centos-8.sh index aa61f8afe..2b316484b 100755 --- a/environment/os/centos-8.sh +++ b/environment/os/centos-8.sh @@ -17,6 +17,7 @@ TOOLCHAIN_BUILD_DEPS=( libffi-devel libxml2-devel # for llvm libedit-devel pcre-devel automake bison # for swig ) + TOOLCHAIN_RUN_DEPS=( make # generic build tools tar gzip bzip2 xz # used for archive unpacking @@ -25,6 +26,7 @@ TOOLCHAIN_RUN_DEPS=( readline # for cmake and llvm libffi libxml2 # for llvm ) + MEMGRAPH_BUILD_DEPS=( git # source code control make pkgconf-pkg-config # build system @@ -47,9 +49,11 @@ MEMGRAPH_BUILD_DEPS=( sbcl # for custom Lisp C++ preprocessing autoconf # for jemalloc code generation ) + list() { echo "$1" } + check() { local missing="" for pkg in $1; do @@ -68,16 +72,13 @@ check() { exit 1 fi } + install() { cd "$DIR" if [ "$EUID" -ne 0 ]; then echo "Please run as root." exit 1 fi - if [ "$SUDO_USER" == "" ]; then - echo "Please run as sudo." - exit 1 - fi # If GitHub Actions runner is installed, append LANG to the environment. # Python related tests doesn't work the LANG export. if [ -d "/home/gh/actions-runner" ]; then @@ -86,6 +87,7 @@ install() { echo "NOTE: export LANG=en_US.utf8" fi dnf install -y epel-release + dnf install -y 'dnf-command(config-manager)' dnf config-manager --set-enabled powertools # Required to install texinfo. dnf update -y dnf install -y wget git python36 python3-pip @@ -135,11 +137,16 @@ install() { continue fi if [ "$pkg" == PyYAML ]; then - sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML" + if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker). + pip3 install --user PyYAML + else # Running using sudo. + sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML" + fi continue fi dnf install -y "$pkg" done } + deps=$2"[*]" "$1" "${!deps}" diff --git a/environment/os/debian-10.sh b/environment/os/debian-10.sh index 87cde6588..ccbd55f4b 100755 --- a/environment/os/debian-10.sh +++ b/environment/os/debian-10.sh @@ -17,6 +17,7 @@ TOOLCHAIN_BUILD_DEPS=( libffi-dev libxml2-dev # for llvm libedit-dev libpcre3-dev automake bison # for swig ) + TOOLCHAIN_RUN_DEPS=( make # generic build tools tar gzip bzip2 xz-utils # used for archive unpacking @@ -26,6 +27,7 @@ TOOLCHAIN_RUN_DEPS=( libreadline7 # for cmake and llvm libffi6 libxml2 # for llvm ) + MEMGRAPH_BUILD_DEPS=( git # source code control make pkg-config # build system @@ -45,12 +47,15 @@ MEMGRAPH_BUILD_DEPS=( dotnet-sdk-3.1 golang nodejs npm autoconf # for jemalloc code generation ) + list() { echo "$1" } + check() { check_all_dpkg "$1" } + install() { cat >/etc/apt/sources.list < Date: Tue, 30 Mar 2021 12:48:45 +0200 Subject: [PATCH 23/63] Fix release package script (#131) --- release/package/run.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/release/package/run.sh b/release/package/run.sh index fe8443cac..338850ccd 100755 --- a/release/package/run.sh +++ b/release/package/run.sh @@ -47,8 +47,14 @@ make_package () { echo "Building Memgraph $offering for $os on $build_container..." echo "Copying project files..." + # If master is not the current branch, fetch it, because the get_version + # script depends on it. If we are on master, the fetch command is going to + # fail so that's why there is the explicit check. # Required here because Docker build container can't access remote. - cd "$PROJECT_ROOT" && git fetch origin master:master + cd "$PROJECT_ROOT" + if [[ "$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then + git fetch origin master:master + fi docker exec "$build_container" mkdir -p /memgraph docker cp "$PROJECT_ROOT/." "$build_container:/memgraph/" From 6d4fe5cdd5f0a0a3e73be4f1957f44e4ce39f5ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 1 Apr 2021 17:08:40 +0200 Subject: [PATCH 24/63] Explore and implement the usage of clang-tidy and sanitizers (#125) * Run clang-tidy on the full code base * Run clang-tidy on diffs * Enable ASAN in coverage build * Add UBSAN to code analysis --- .clang-tidy | 2 +- .github/workflows/diff.yaml | 21 +- .github/workflows/full_clang_tidy.yaml | 44 +++ CMakeLists.txt | 5 +- src/query/interpreter.cpp | 10 +- src/utils/memory.cpp | 5 +- tests/unit/bolt_session.cpp | 9 +- tests/unit/query_plan.cpp | 20 +- tests/unit/query_procedure_mgp_module.cpp | 11 +- tests/unit/query_procedure_mgp_type.cpp | 37 ++- tests/unit/query_procedure_py_module.cpp | 1 + tests/unit/storage_v2_durability.cpp | 11 +- tests/unit/test_utils.hpp | 9 + tests/unit/typed_value.cpp | 2 +- tests/unit/utils_memory.cpp | 8 +- tools/github/clang-tidy/clang-tidy-diff.py | 269 ++++++++++++++++ tools/github/clang-tidy/count_errors.sh | 9 + tools/github/clang-tidy/grep_error_lines.sh | 12 + tools/github/clang-tidy/run-clang-tidy.py | 337 ++++++++++++++++++++ tools/lsan.supp | 12 + 20 files changed, 792 insertions(+), 42 deletions(-) create mode 100644 .github/workflows/full_clang_tidy.yaml create mode 100644 tests/unit/test_utils.hpp create mode 100755 tools/github/clang-tidy/clang-tidy-diff.py create mode 100755 tools/github/clang-tidy/count_errors.sh create mode 100755 tools/github/clang-tidy/grep_error_lines.sh create mode 100755 tools/github/clang-tidy/run-clang-tidy.py create mode 100644 tools/lsan.supp diff --git a/.clang-tidy b/.clang-tidy index 81be7c096..b0f274372 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -54,7 +54,7 @@ Checks: '*, -readability-magic-numbers, -readability-named-parameter' WarningsAsErrors: '' -HeaderFilterRegex: '' +HeaderFilterRegex: 'src/.*' AnalyzeTemporaryDtors: false FormatStyle: none CheckOptions: diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 67be98576..4ba546412 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -66,7 +66,7 @@ jobs: path: build/output/memgraph*.deb coverage_build: - name: "Coverage build" + name: "Code analysis" runs-on: [self-hosted, General, Linux, X64, Debian10] env: THREADS: 24 @@ -79,7 +79,7 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Build coverage binaries + - name: Build combined ASAN, UBSAN and coverage binaries run: | # Activate toolchain. source /opt/toolchain-v2/activate @@ -87,9 +87,8 @@ jobs: # Initialize dependencies. ./init - # Build coverage binaries. cd build - cmake -DTEST_COVERAGE=ON .. + cmake -DTEST_COVERAGE=ON -DASAN=ON -DUBSAN=ON .. make -j$THREADS memgraph__unit - name: Run unit tests @@ -97,9 +96,9 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Run unit tests. + # Run unit tests. It is restricted to 2 threads intentionally, because higher concurrency makes the timing related tests unstable. cd build - ctest -R memgraph__unit --output-on-failure -j$THREADS + LSAN_OPTIONS=suppressions=$PWD/../tools/lsan.supp UBSAN_OPTIONS=halt_on_error=1 ctest -R memgraph__unit --output-on-failure -j2 - name: Compute code coverage run: | @@ -120,6 +119,16 @@ jobs: name: "Code coverage" path: tools/github/generated/code_coverage.tar.gz + - name: Run clang-tidy + run: | + source /opt/toolchain-v2/activate + + # Restrict clang-tidy results only to the modified parts + git diff -U0 master... -- src | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build | tee ./build/clang_tidy_output.txt + + # Fail if any warning is reported + ! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null + debug_build: name: "Debug build" runs-on: [self-hosted, General, Linux, X64, Debian10] diff --git a/.github/workflows/full_clang_tidy.yaml b/.github/workflows/full_clang_tidy.yaml new file mode 100644 index 000000000..d1f4151ba --- /dev/null +++ b/.github/workflows/full_clang_tidy.yaml @@ -0,0 +1,44 @@ +name: Run clang-tidy on the full codebase + +on: + workflow_dispatch: + +jobs: + clang_tidy_check: + name: "Clang-tidy check" + runs-on: [self-hosted, Linux, X64, Ubuntu20.04] + env: + THREADS: 24 + + steps: + - name: Set up repository + uses: actions/checkout@v2 + with: + # Number of commits to fetch. `0` indicates all history for all + # branches and tags. (default: 1) + fetch-depth: 0 + + - name: Build debug binaries + run: | + # Activate toolchain. + source /opt/toolchain-v2/activate + + # Initialize dependencies. + ./init + + # Build debug binaries. + + cd build + cmake .. + make -j$THREADS + + - name: Run clang-tidy + run: | + source /opt/toolchain-v2/activate + + # The results are also written to standard output in order to retain them in the logs + ./tools/github/clang-tidy/run-clang-tidy.py -p build -j $THREADS -clang-tidy-binary=/opt/toolchain-v2/bin/clang-tidy "$PWD/src/*" | + tee ./build/full_clang_tidy_output.txt + + - name: Summarize clang-tidy results + run: cat ./build/full_clang_tidy_output.txt | ./tools/github/clang-tidy/count_errors.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 663eba656..5a0d3310f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -312,8 +312,9 @@ if (UBSAN) # runtime library and c++ standard libraries are present. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer -fno-sanitize=vptr") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined -fno-sanitize=vptr") - # Run program with environment variable UBSAN_OPTIONS=print_stacktrace=1 - # Make sure llvm-symbolizer binary is in path + # Run program with environment variable UBSAN_OPTIONS=print_stacktrace=1. + # Make sure llvm-symbolizer binary is in path. + # To make the program abort on undefined behavior, use UBSAN_OPTIONS=halt_on_error=1. endif() set(MG_PYTHON_VERSION "" CACHE STRING "Specify the exact python version used by the query modules") diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 62391e218..54cafbd53 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -1122,6 +1122,8 @@ PreparedQuery PrepareReplicationQuery(ParsedQuery parsed_query, const bool in_ex return std::nullopt; }, RWType::NONE}; + // False positive report for the std::make_shared above + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) } PreparedQuery PrepareLockPathQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, @@ -1434,10 +1436,12 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, in_explicit_transaction_ ? static_cast(query_executions_.size() - 1) : std::optional{}; // Handle transaction control queries. - auto query_upper = utils::Trim(utils::ToUpperCase(query_string)); - if (query_upper == "BEGIN" || query_upper == "COMMIT" || query_upper == "ROLLBACK") { - query_execution->prepared_query.emplace(PrepareTransactionQuery(query_upper)); + const auto upper_case_query = utils::ToUpperCase(query_string); + const auto trimmed_query = utils::Trim(upper_case_query); + + if (trimmed_query == "BEGIN" || trimmed_query == "COMMIT" || trimmed_query == "ROLLBACK") { + query_execution->prepared_query.emplace(PrepareTransactionQuery(trimmed_query)); return {query_execution->prepared_query->header, query_execution->prepared_query->privileges, qid}; } diff --git a/src/utils/memory.cpp b/src/utils/memory.cpp index 7acffa5c5..fe7e6b4d8 100644 --- a/src/utils/memory.cpp +++ b/src/utils/memory.cpp @@ -23,6 +23,9 @@ size_t GrowMonotonicBuffer(size_t current_size, size_t max_size) { return std::ceil(next_size); } +__attribute__((no_sanitize("pointer-overflow"))) void CheckAllocationSizeOverflow(void *aligned_ptr, size_t bytes) { + if (reinterpret_cast(aligned_ptr) + bytes <= aligned_ptr) throw BadAlloc("Allocation size overflow"); +} } // namespace MonotonicBufferResource::MonotonicBufferResource(size_t initial_size) : initial_size_(initial_size) {} @@ -121,7 +124,7 @@ void *MonotonicBufferResource::DoAllocate(size_t bytes, size_t alignment) { next_buffer_size_ = GrowMonotonicBuffer(next_buffer_size_, std::numeric_limits::max() - sizeof(Buffer)); } if (reinterpret_cast(aligned_ptr) < buffer_head) throw BadAlloc("Allocation alignment overflow"); - if (reinterpret_cast(aligned_ptr) + bytes <= aligned_ptr) throw BadAlloc("Allocation size overflow"); + CheckAllocationSizeOverflow(aligned_ptr, bytes); allocated_ = reinterpret_cast(aligned_ptr) - data + bytes; return aligned_ptr; } diff --git a/tests/unit/bolt_session.cpp b/tests/unit/bolt_session.cpp index c9c8631d4..6bee34273 100644 --- a/tests/unit/bolt_session.cpp +++ b/tests/unit/bolt_session.cpp @@ -383,15 +383,16 @@ TEST(BoltSession, ExecuteRunWrongMarker) { } TEST(BoltSession, ExecuteRunMissingData) { + std::array run_req_without_parameters{ + run_req_header[0], run_req_header[1], run_req_header[2], 0x00, 0x00, 0x00}; // test lengths, they test the following situations: // missing header data, missing query data, missing parameters - int len[] = {1, 2, 37}; - + int len[] = {1, 2, run_req_without_parameters.size()}; for (int i = 0; i < 3; ++i) { INIT_VARS; ExecuteHandshake(input_stream, session, output); ExecuteInit(input_stream, session, output); - ASSERT_THROW(ExecuteCommand(input_stream, session, run_req_header, len[i]), SessionException); + ASSERT_THROW(ExecuteCommand(input_stream, session, run_req_without_parameters.data(), len[i]), SessionException); ASSERT_EQ(session.state_, State::Close); CheckFailureMessage(output); @@ -871,7 +872,7 @@ TEST(BoltSession, Noop) { CheckFailureMessage(output); session.state_ = State::Result; - ExecuteCommand(input_stream, session, pullall_req, sizeof(v4::pullall_req)); + ExecuteCommand(input_stream, session, pullall_req, sizeof(pullall_req)); CheckSuccessMessage(output); ASSERT_THROW(ExecuteCommand(input_stream, session, v4_1::noop, sizeof(v4_1::noop)), SessionException); diff --git a/tests/unit/query_plan.cpp b/tests/unit/query_plan.cpp index d15efc0af..f15cd0055 100644 --- a/tests/unit/query_plan.cpp +++ b/tests/unit/query_plan.cpp @@ -73,6 +73,12 @@ class TestPlanner : public ::testing::Test {}; using PlannerTypes = ::testing::Types; +void DeleteListContent(std::list *list) { + for (BaseOpChecker *ptr : *list) { + delete ptr; + } +} + TYPED_TEST_CASE(TestPlanner, PlannerTypes); TYPED_TEST(TestPlanner, MatchNodeReturn) { @@ -223,6 +229,7 @@ TYPED_TEST(TestPlanner, OptionalMatchNamedPatternReturn) { auto planner = MakePlanner(&dba, storage, symbol_table, query); std::list optional{new ExpectScanAll(), new ExpectExpand(), new ExpectConstructNamedPath()}; CheckPlan(planner.plan(), symbol_table, ExpectOptional(optional_symbols, optional), ExpectProduce()); + DeleteListContent(&optional); } TYPED_TEST(TestPlanner, MatchWhereReturn) { @@ -549,10 +556,8 @@ TYPED_TEST(TestPlanner, MatchMerge) { auto acc = ExpectAccumulate({symbol_table.at(*ident_n)}); auto planner = MakePlanner(&dba, storage, symbol_table, query); CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectMerge(on_match, on_create), acc, ExpectProduce()); - for (auto &op : on_match) delete op; - on_match.clear(); - for (auto &op : on_create) delete op; - on_create.clear(); + DeleteListContent(&on_match); + DeleteListContent(&on_create); } TYPED_TEST(TestPlanner, MatchOptionalMatchWhereReturn) { @@ -564,6 +569,7 @@ TYPED_TEST(TestPlanner, MatchOptionalMatchWhereReturn) { WHERE(LESS(PROPERTY_LOOKUP("m", prop), LITERAL(42))), RETURN("r"))); std::list optional{new ExpectScanAll(), new ExpectExpand(), new ExpectFilter()}; CheckPlan(query, storage, ExpectScanAll(), ExpectOptional(optional), ExpectProduce()); + DeleteListContent(&optional); } TYPED_TEST(TestPlanner, MatchUnwindReturn) { @@ -705,6 +711,7 @@ TYPED_TEST(TestPlanner, MatchOptionalMatchWhere) { // optional ScanAll. std::list optional{new ExpectFilter(), new ExpectScanAll()}; CheckPlan(query, storage, ExpectScanAll(), ExpectExpand(), ExpectOptional(optional), ExpectProduce()); + DeleteListContent(&optional); } TYPED_TEST(TestPlanner, MatchReturnAsterisk) { @@ -763,8 +770,8 @@ TYPED_TEST(TestPlanner, UnwindMergeNodeProperty) { std::list on_match{new ExpectScanAll(), new ExpectFilter()}; std::list on_create{new ExpectCreateNode()}; CheckPlan(query, storage, ExpectUnwind(), ExpectMerge(on_match, on_create)); - for (auto &op : on_match) delete op; - for (auto &op : on_create) delete op; + DeleteListContent(&on_match); + DeleteListContent(&on_create); } TYPED_TEST(TestPlanner, MultipleOptionalMatchReturn) { @@ -774,6 +781,7 @@ TYPED_TEST(TestPlanner, MultipleOptionalMatchReturn) { QUERY(SINGLE_QUERY(OPTIONAL_MATCH(PATTERN(NODE("n"))), OPTIONAL_MATCH(PATTERN(NODE("m"))), RETURN("n"))); std::list optional{new ExpectScanAll()}; CheckPlan(query, storage, ExpectOptional(optional), ExpectOptional(optional), ExpectProduce()); + DeleteListContent(&optional); } TYPED_TEST(TestPlanner, FunctionAggregationReturn) { diff --git a/tests/unit/query_procedure_mgp_module.cpp b/tests/unit/query_procedure_mgp_module.cpp index 3da92bb5c..74e2da106 100644 --- a/tests/unit/query_procedure_mgp_module.cpp +++ b/tests/unit/query_procedure_mgp_module.cpp @@ -5,6 +5,8 @@ #include "query/procedure/mg_procedure_impl.hpp" +#include "test_utils.hpp" + static void DummyCallback(const mgp_list *, const mgp_graph *, mgp_result *, mgp_memory *) {} TEST(Module, InvalidProcedureRegistration) { @@ -53,7 +55,8 @@ TEST(Module, ProcedureSignature) { CheckSignature(proc, "proc() :: ()"); mgp_proc_add_arg(proc, "arg1", mgp_type_number()); CheckSignature(proc, "proc(arg1 :: NUMBER) :: ()"); - mgp_proc_add_opt_arg(proc, "opt1", mgp_type_nullable(mgp_type_any()), mgp_value_make_null(&memory)); + mgp_proc_add_opt_arg(proc, "opt1", mgp_type_nullable(mgp_type_any()), + test_utils::CreateValueOwningPtr(mgp_value_make_null(&memory)).get()); CheckSignature(proc, "proc(arg1 :: NUMBER, opt1 = Null :: ANY?) :: ()"); mgp_proc_add_result(proc, "res1", mgp_type_list(mgp_type_int())); CheckSignature(proc, "proc(arg1 :: NUMBER, opt1 = Null :: ANY?) :: (res1 :: LIST OF INTEGER)"); @@ -69,7 +72,8 @@ TEST(Module, ProcedureSignature) { "(res1 :: LIST OF INTEGER, DEPRECATED res2 :: STRING)"); EXPECT_FALSE(mgp_proc_add_result(proc, "res2", mgp_type_any())); EXPECT_FALSE(mgp_proc_add_deprecated_result(proc, "res1", mgp_type_any())); - mgp_proc_add_opt_arg(proc, "opt2", mgp_type_string(), mgp_value_make_string("string=\"value\"", &memory)); + mgp_proc_add_opt_arg(proc, "opt2", mgp_type_string(), + test_utils::CreateValueOwningPtr(mgp_value_make_string("string=\"value\"", &memory)).get()); CheckSignature(proc, "proc(arg1 :: NUMBER, opt1 = Null :: ANY?, " "opt2 = \"string=\\\"value\\\"\" :: STRING) :: " @@ -80,6 +84,7 @@ TEST(Module, ProcedureSignatureOnlyOptArg) { mgp_memory memory{utils::NewDeleteResource()}; mgp_module module(utils::NewDeleteResource()); auto *proc = mgp_module_add_read_procedure(&module, "proc", DummyCallback); - mgp_proc_add_opt_arg(proc, "opt1", mgp_type_nullable(mgp_type_any()), mgp_value_make_null(&memory)); + mgp_proc_add_opt_arg(proc, "opt1", mgp_type_nullable(mgp_type_any()), + test_utils::CreateValueOwningPtr(mgp_value_make_null(&memory)).get()); CheckSignature(proc, "proc(opt1 = Null :: ANY?) :: ()"); } diff --git a/tests/unit/query_procedure_mgp_type.cpp b/tests/unit/query_procedure_mgp_type.cpp index 2cb1b0ce4..588a35a85 100644 --- a/tests/unit/query_procedure_mgp_type.cpp +++ b/tests/unit/query_procedure_mgp_type.cpp @@ -1,7 +1,13 @@ +#include +#include +#include + #include #include "query/procedure/mg_procedure_impl.hpp" +#include "test_utils.hpp" + TEST(CypherType, PresentableNameSimpleTypes) { EXPECT_EQ(mgp_type_any()->impl->GetPresentableName(), "ANY"); EXPECT_EQ(mgp_type_bool()->impl->GetPresentableName(), "BOOLEAN"); @@ -66,6 +72,7 @@ TEST(CypherType, NullSatisfiesType) { EXPECT_TRUE(null_type->impl->SatisfiesType(tv_null)); } } + mgp_value_destroy(mgp_null); } } @@ -101,6 +108,7 @@ TEST(CypherType, BoolSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_bool, tv_bool, {mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_bool); } TEST(CypherType, IntSatisfiesType) { @@ -111,6 +119,7 @@ TEST(CypherType, IntSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_int, tv_int, {mgp_type_bool(), mgp_type_string(), mgp_type_float(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_int); } TEST(CypherType, DoubleSatisfiesType) { @@ -121,6 +130,7 @@ TEST(CypherType, DoubleSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_double, tv_double, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_double); } TEST(CypherType, StringSatisfiesType) { @@ -131,12 +141,13 @@ TEST(CypherType, StringSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_string, tv_string, {mgp_type_bool(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_string); } TEST(CypherType, MapSatisfiesType) { mgp_memory memory{utils::NewDeleteResource()}; auto *map = mgp_map_make_empty(&memory); - mgp_map_insert(map, "key", mgp_value_make_int(42, &memory)); + mgp_map_insert(map, "key", test_utils::CreateValueOwningPtr(mgp_value_make_int(42, &memory)).get()); auto *mgp_map_v = mgp_value_make_map(map); const query::TypedValue tv_map(std::map{{"key", query::TypedValue(42)}}); CheckSatisfiesTypesAndNullable(mgp_map_v, tv_map, {mgp_type_any(), mgp_type_map()}); @@ -144,6 +155,7 @@ TEST(CypherType, MapSatisfiesType) { mgp_map_v, tv_map, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_map_v); } TEST(CypherType, VertexSatisfiesType) { @@ -160,6 +172,7 @@ TEST(CypherType, VertexSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_vertex_v, tv_vertex, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_vertex_v); } TEST(CypherType, EdgeSatisfiesType) { @@ -178,6 +191,7 @@ TEST(CypherType, EdgeSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_edge_v, tv_edge, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_node(), mgp_type_path()}); + mgp_value_destroy(mgp_edge_v); } TEST(CypherType, PathSatisfiesType) { @@ -190,9 +204,13 @@ TEST(CypherType, PathSatisfiesType) { mgp_memory memory{utils::NewDeleteResource()}; utils::Allocator alloc(memory.impl); mgp_graph graph{&dba, storage::View::NEW}; - auto *path = mgp_path_make_with_start(alloc.new_object(v1, &graph), &memory); + auto *mgp_vertex_v = alloc.new_object(v1, &graph); + auto path = mgp_path_make_with_start(mgp_vertex_v, &memory); ASSERT_TRUE(path); - ASSERT_TRUE(mgp_path_expand(path, alloc.new_object(edge, &graph))); + alloc.delete_object(mgp_vertex_v); + auto mgp_edge_v = alloc.new_object(edge, &graph); + ASSERT_TRUE(mgp_path_expand(path, mgp_edge_v)); + alloc.delete_object(mgp_edge_v); auto *mgp_path_v = mgp_value_make_path(path); const query::TypedValue tv_path(query::Path(v1, edge, v2)); CheckSatisfiesTypesAndNullable(mgp_path_v, tv_path, {mgp_type_any(), mgp_type_path()}); @@ -200,6 +218,7 @@ TEST(CypherType, PathSatisfiesType) { mgp_path_v, tv_path, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_map(), mgp_type_node(), mgp_type_relationship()}); + mgp_value_destroy(mgp_path_v); } static std::vector MakeListTypes(const std::vector &element_types) { @@ -224,6 +243,7 @@ TEST(CypherType, EmptyListSatisfiesType) { auto all_types = MakeListTypes(primitive_types); all_types.push_back(mgp_type_any()); CheckSatisfiesTypesAndNullable(mgp_list_v, tv_list, all_types); + mgp_value_destroy(mgp_list_v); } TEST(CypherType, ListOfIntSatisfiesType) { @@ -233,7 +253,7 @@ TEST(CypherType, ListOfIntSatisfiesType) { auto *mgp_list_v = mgp_value_make_list(list); query::TypedValue tv_list(std::vector{}); for (int64_t i = 0; i < elem_count; ++i) { - ASSERT_TRUE(mgp_list_append(list, mgp_value_make_int(i, &memory))); + ASSERT_TRUE(mgp_list_append(list, test_utils::CreateValueOwningPtr(mgp_value_make_int(i, &memory)).get())); tv_list.ValueList().emplace_back(i); auto valid_types = MakeListTypes({mgp_type_any(), mgp_type_int(), mgp_type_number()}); valid_types.push_back(mgp_type_any()); @@ -242,6 +262,7 @@ TEST(CypherType, ListOfIntSatisfiesType) { {mgp_type_bool(), mgp_type_string(), mgp_type_float(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); } + mgp_value_destroy(mgp_list_v); } TEST(CypherType, ListOfIntAndBoolSatisfiesType) { @@ -251,10 +272,10 @@ TEST(CypherType, ListOfIntAndBoolSatisfiesType) { auto *mgp_list_v = mgp_value_make_list(list); query::TypedValue tv_list(std::vector{}); // Add an int - ASSERT_TRUE(mgp_list_append(list, mgp_value_make_int(42, &memory))); + ASSERT_TRUE(mgp_list_append(list, test_utils::CreateValueOwningPtr(mgp_value_make_int(42, &memory)).get())); tv_list.ValueList().emplace_back(42); // Add a boolean - ASSERT_TRUE(mgp_list_append(list, mgp_value_make_bool(1, &memory))); + ASSERT_TRUE(mgp_list_append(list, test_utils::CreateValueOwningPtr(mgp_value_make_bool(1, &memory)).get())); tv_list.ValueList().emplace_back(true); auto valid_types = MakeListTypes({mgp_type_any()}); valid_types.push_back(mgp_type_any()); @@ -264,6 +285,7 @@ TEST(CypherType, ListOfIntAndBoolSatisfiesType) { mgp_list_v, tv_list, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_list_v); } TEST(CypherType, ListOfNullSatisfiesType) { @@ -271,7 +293,7 @@ TEST(CypherType, ListOfNullSatisfiesType) { auto *list = mgp_list_make_empty(1, &memory); auto *mgp_list_v = mgp_value_make_list(list); query::TypedValue tv_list(std::vector{}); - ASSERT_TRUE(mgp_list_append(list, mgp_value_make_null(&memory))); + ASSERT_TRUE(mgp_list_append(list, test_utils::CreateValueOwningPtr(mgp_value_make_null(&memory)).get())); tv_list.ValueList().emplace_back(); // List with Null satisfies all nullable list element types std::vector primitive_types{ @@ -295,4 +317,5 @@ TEST(CypherType, ListOfNullSatisfiesType) { EXPECT_FALSE(null_type->impl->SatisfiesType(*mgp_list_v)) << null_type->impl->GetPresentableName(); EXPECT_FALSE(null_type->impl->SatisfiesType(tv_list)); } + mgp_value_destroy(mgp_list_v); } diff --git a/tests/unit/query_procedure_py_module.cpp b/tests/unit/query_procedure_py_module.cpp index ccc630842..95a7af2a1 100644 --- a/tests/unit/query_procedure_py_module.cpp +++ b/tests/unit/query_procedure_py_module.cpp @@ -254,6 +254,7 @@ TEST(PyModule, PyObjectToMgpValue) { const mgp_value *v2 = mgp_map_at(map, "four"); ASSERT_TRUE(mgp_value_is_double(v2)); EXPECT_EQ(mgp_value_get_double(v2), 4.0); + mgp_value_destroy(value); } int main(int argc, char **argv) { diff --git a/tests/unit/storage_v2_durability.cpp b/tests/unit/storage_v2_durability.cpp index c6c8a883e..dfe84f7ce 100644 --- a/tests/unit/storage_v2_durability.cpp +++ b/tests/unit/storage_v2_durability.cpp @@ -709,14 +709,15 @@ TEST_P(DurabilityTest, SnapshotFallback) { {.items = {.properties_on_edges = GetParam()}, .durability = {.storage_directory = storage_directory, .snapshot_wal_mode = storage::Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT, - .snapshot_interval = std::chrono::milliseconds(2000)}}); + .snapshot_interval = std::chrono::milliseconds(3000)}}); CreateBaseDataset(&store, GetParam()); - std::this_thread::sleep_for(std::chrono::milliseconds(2500)); + std::this_thread::sleep_for(std::chrono::milliseconds(3500)); + ASSERT_EQ(GetSnapshotsList().size(), 1); CreateExtendedDataset(&store); - std::this_thread::sleep_for(std::chrono::milliseconds(2500)); + std::this_thread::sleep_for(std::chrono::milliseconds(3000)); } - ASSERT_GE(GetSnapshotsList().size(), 2); + ASSERT_EQ(GetSnapshotsList().size(), 2); ASSERT_EQ(GetBackupSnapshotsList().size(), 0); ASSERT_EQ(GetWalsList().size(), 0); ASSERT_EQ(GetBackupWalsList().size(), 0); @@ -724,7 +725,7 @@ TEST_P(DurabilityTest, SnapshotFallback) { // Destroy last snapshot. { auto snapshots = GetSnapshotsList(); - ASSERT_GE(snapshots.size(), 2); + ASSERT_EQ(snapshots.size(), 2); DestroySnapshot(*snapshots.begin()); } diff --git a/tests/unit/test_utils.hpp b/tests/unit/test_utils.hpp new file mode 100644 index 000000000..4efc0f4b2 --- /dev/null +++ b/tests/unit/test_utils.hpp @@ -0,0 +1,9 @@ +#include + +#include "query/procedure/mg_procedure_impl.hpp" + +namespace test_utils { +using MgpValueOwningPtr = std::unique_ptr; + +MgpValueOwningPtr CreateValueOwningPtr(mgp_value *value) { return MgpValueOwningPtr(value, &mgp_value_destroy); } +} // namespace test_utils diff --git a/tests/unit/typed_value.cpp b/tests/unit/typed_value.cpp index 9d848cde7..ffc28262a 100644 --- a/tests/unit/typed_value.cpp +++ b/tests/unit/typed_value.cpp @@ -397,8 +397,8 @@ TEST_F(TypedValueLogicTest, LogicalXor) { // NOLINTNEXTLINE(hicpp-special-member-functions) TEST_F(AllTypesFixture, ConstructionWithMemoryResource) { - std::vector values_with_custom_memory; utils::MonotonicBufferResource monotonic_memory(1024); + std::vector values_with_custom_memory; for (const auto &value : values_) { EXPECT_EQ(value.GetMemoryResource(), utils::NewDeleteResource()); TypedValue copy_constructed_value(value, &monotonic_memory); diff --git a/tests/unit/utils_memory.cpp b/tests/unit/utils_memory.cpp index adeb9e94a..04e3b0631 100644 --- a/tests/unit/utils_memory.cpp +++ b/tests/unit/utils_memory.cpp @@ -12,6 +12,7 @@ class TestMemory final : public utils::MemoryResource { size_t delete_count_{0}; private: + static constexpr size_t kPadSize = 32; void *DoAllocate(size_t bytes, size_t alignment) override { new_count_++; EXPECT_TRUE(alignment != 0U && (alignment & (alignment - 1U)) == 0U) << "Alignment must be power of 2"; @@ -20,11 +21,11 @@ class TestMemory final : public utils::MemoryResource { EXPECT_TRUE(bytes + pad_size > bytes) << "TestMemory size overflow"; EXPECT_TRUE(bytes + pad_size + alignment > bytes + alignment) << "TestMemory size overflow"; EXPECT_TRUE(2U * alignment > alignment) << "TestMemory alignment overflow"; - // Allocate a block containing extra alignment and pad_size bytes, but + // Allocate a block containing extra alignment and kPadSize bytes, but // aligned to 2 * alignment. Then we can offset the ptr so that it's never // aligned to 2 * alignment. This ought to make allocator alignment issues // more obvious. - void *ptr = utils::NewDeleteResource()->Allocate(alignment + bytes + pad_size, 2U * alignment); + void *ptr = utils::NewDeleteResource()->Allocate(alignment + bytes + kPadSize, 2U * alignment); // Clear allocated memory to 0xFF, marking the invalid region. memset(ptr, 0xFF, alignment + bytes + pad_size); // Offset the ptr so it's not aligned to 2 * alignment, but still aligned to @@ -39,7 +40,8 @@ class TestMemory final : public utils::MemoryResource { void DoDeallocate(void *ptr, size_t bytes, size_t alignment) override { delete_count_++; // Deallocate the original ptr, before alignment adjustment. - return utils::NewDeleteResource()->Deallocate(static_cast(ptr) - alignment, bytes, alignment); + return utils::NewDeleteResource()->Deallocate(static_cast(ptr) - alignment, alignment + bytes + kPadSize, + 2U * alignment); } bool DoIsEqual(const utils::MemoryResource &other) const noexcept override { return this == &other; } diff --git a/tools/github/clang-tidy/clang-tidy-diff.py b/tools/github/clang-tidy/clang-tidy-diff.py new file mode 100755 index 000000000..a20b1f1f4 --- /dev/null +++ b/tools/github/clang-tidy/clang-tidy-diff.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +# +#===- clang-tidy-diff.py - ClangTidy Diff Checker -----------*- python -*--===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===-----------------------------------------------------------------------===# + +r""" +ClangTidy Diff Checker +====================== + +This script reads input from a unified diff, runs clang-tidy on all changed +files and outputs clang-tidy warnings in changed lines only. This is useful to +detect clang-tidy regressions in the lines touched by a specific patch. +Example usage for git/svn users: + + git diff -U0 HEAD^ | clang-tidy-diff.py -p1 + svn diff --diff-cmd=diff -x-U0 | \ + clang-tidy-diff.py -fix -checks=-*,modernize-use-override + +""" + +import argparse +import glob +import json +import multiprocessing +import os +import re +import shutil +import subprocess +import sys +import tempfile +import threading +import traceback + +try: + import yaml +except ImportError: + yaml = None + +is_py2 = sys.version[0] == '2' + +if is_py2: + import Queue as queue +else: + import queue as queue + + +def run_tidy(task_queue, lock, timeout): + watchdog = None + while True: + command = task_queue.get() + try: + proc = subprocess.Popen(command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + if timeout is not None: + watchdog = threading.Timer(timeout, proc.kill) + watchdog.start() + + stdout, stderr = proc.communicate() + + with lock: + sys.stdout.write(stdout.decode('utf-8') + '\n') + sys.stdout.flush() + if stderr: + sys.stderr.write(stderr.decode('utf-8') + '\n') + sys.stderr.flush() + except Exception as e: + with lock: + sys.stderr.write('Failed: ' + str(e) + ': '.join(command) + '\n') + finally: + with lock: + if not (timeout is None or watchdog is None): + if not watchdog.is_alive(): + sys.stderr.write('Terminated by timeout: ' + + ' '.join(command) + '\n') + watchdog.cancel() + task_queue.task_done() + + +def start_workers(max_tasks, tidy_caller, task_queue, lock, timeout): + for _ in range(max_tasks): + t = threading.Thread(target=tidy_caller, args=(task_queue, lock, timeout)) + t.daemon = True + t.start() + + +def merge_replacement_files(tmpdir, mergefile): + """Merge all replacement files in a directory into a single file""" + # The fixes suggested by clang-tidy >= 4.0.0 are given under + # the top level key 'Diagnostics' in the output yaml files + mergekey = "Diagnostics" + merged = [] + for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): + content = yaml.safe_load(open(replacefile, 'r')) + if not content: + continue # Skip empty files. + merged.extend(content.get(mergekey, [])) + + if merged: + # MainSourceFile: The key is required by the definition inside + # include/clang/Tooling/ReplacementsYaml.h, but the value + # is actually never used inside clang-apply-replacements, + # so we set it to '' here. + output = {'MainSourceFile': '', mergekey: merged} + with open(mergefile, 'w') as out: + yaml.safe_dump(output, out) + else: + # Empty the file: + open(mergefile, 'w').close() + + +def main(): + parser = argparse.ArgumentParser(description= + 'Run clang-tidy against changed files, and ' + 'output diagnostics only for modified ' + 'lines.') + parser.add_argument('-clang-tidy-binary', metavar='PATH', + default='clang-tidy', + help='path to clang-tidy binary') + parser.add_argument('-p', metavar='NUM', default=0, + help='strip the smallest prefix containing P slashes') + parser.add_argument('-regex', metavar='PATTERN', default=None, + help='custom pattern selecting file paths to check ' + '(case sensitive, overrides -iregex)') + parser.add_argument('-iregex', metavar='PATTERN', default= + r'.*\.(cpp|cc|c\+\+|cxx|c|cl|h|hpp|m|mm|inc)', + help='custom pattern selecting file paths to check ' + '(case insensitive, overridden by -regex)') + parser.add_argument('-j', type=int, default=1, + help='number of tidy instances to be run in parallel.') + parser.add_argument('-timeout', type=int, default=None, + help='timeout per each file in seconds.') + parser.add_argument('-fix', action='store_true', default=False, + help='apply suggested fixes') + parser.add_argument('-checks', + help='checks filter, when not specified, use clang-tidy ' + 'default', + default='') + parser.add_argument('-path', dest='build_path', + help='Path used to read a compile command database.') + if yaml: + parser.add_argument('-export-fixes', metavar='FILE', dest='export_fixes', + help='Create a yaml file to store suggested fixes in, ' + 'which can be applied with clang-apply-replacements.') + parser.add_argument('-extra-arg', dest='extra_arg', + action='append', default=[], + help='Additional argument to append to the compiler ' + 'command line.') + parser.add_argument('-extra-arg-before', dest='extra_arg_before', + action='append', default=[], + help='Additional argument to prepend to the compiler ' + 'command line.') + parser.add_argument('-quiet', action='store_true', default=False, + help='Run clang-tidy in quiet mode') + clang_tidy_args = [] + argv = sys.argv[1:] + if '--' in argv: + clang_tidy_args.extend(argv[argv.index('--'):]) + argv = argv[:argv.index('--')] + + args = parser.parse_args(argv) + + # Extract changed lines for each file. + filename = None + lines_by_file = {} + for line in sys.stdin: + match = re.search('^\+\+\+\ \"?(.*?/){%s}([^ \t\n\"]*)' % args.p, line) + if match: + filename = match.group(2) + if filename is None: + continue + + if args.regex is not None: + if not re.match('^%s$' % args.regex, filename): + continue + else: + if not re.match('^%s$' % args.iregex, filename, re.IGNORECASE): + continue + + match = re.search('^@@.*\+(\d+)(,(\d+))?', line) + if match: + start_line = int(match.group(1)) + line_count = 1 + if match.group(3): + line_count = int(match.group(3)) + if line_count == 0: + continue + end_line = start_line + line_count - 1 + lines_by_file.setdefault(filename, []).append([start_line, end_line]) + + if not any(lines_by_file): + print("No relevant changes found.") + sys.exit(0) + + max_task_count = args.j + if max_task_count == 0: + max_task_count = multiprocessing.cpu_count() + max_task_count = min(len(lines_by_file), max_task_count) + + tmpdir = None + if yaml and args.export_fixes: + tmpdir = tempfile.mkdtemp() + + # Tasks for clang-tidy. + task_queue = queue.Queue(max_task_count) + # A lock for console output. + lock = threading.Lock() + + # Run a pool of clang-tidy workers. + start_workers(max_task_count, run_tidy, task_queue, lock, args.timeout) + + # Form the common args list. + common_clang_tidy_args = [] + if args.fix: + common_clang_tidy_args.append('-fix') + if args.checks != '': + common_clang_tidy_args.append('-checks=' + args.checks) + if args.quiet: + common_clang_tidy_args.append('-quiet') + if args.build_path is not None: + common_clang_tidy_args.append('-p=%s' % args.build_path) + for arg in args.extra_arg: + common_clang_tidy_args.append('-extra-arg=%s' % arg) + for arg in args.extra_arg_before: + common_clang_tidy_args.append('-extra-arg-before=%s' % arg) + + for name in lines_by_file: + line_filter_json = json.dumps( + [{"name": name, "lines": lines_by_file[name]}], + separators=(',', ':')) + + # Run clang-tidy on files containing changes. + command = [args.clang_tidy_binary] + command.append('-line-filter=' + line_filter_json) + if yaml and args.export_fixes: + # Get a temporary file. We immediately close the handle so clang-tidy can + # overwrite it. + (handle, tmp_name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) + os.close(handle) + command.append('-export-fixes=' + tmp_name) + command.extend(common_clang_tidy_args) + command.append(name) + command.extend(clang_tidy_args) + + task_queue.put(command) + + # Wait for all threads to be done. + task_queue.join() + + if yaml and args.export_fixes: + print('Writing fixes to ' + args.export_fixes + ' ...') + try: + merge_replacement_files(tmpdir, args.export_fixes) + except: + sys.stderr.write('Error exporting fixes.\n') + traceback.print_exc() + + if tmpdir: + shutil.rmtree(tmpdir) + + +if __name__ == '__main__': + main() diff --git a/tools/github/clang-tidy/count_errors.sh b/tools/github/clang-tidy/count_errors.sh new file mode 100755 index 000000000..4237099eb --- /dev/null +++ b/tools/github/clang-tidy/count_errors.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# the first sort | uniq is necessary, because the same occurrence of the same error +# can be reported from headers when they are included in multiple source files +`dirname ${BASH_SOURCE[0]}`/grep_error_lines.sh | + sort | uniq | + sed -E 's/.*\[(.*)\]\r?$/\1/g' | # extract the check name from [check-name] + sort | uniq -c | # count each type of check + sort -nr # sort them into descending order diff --git a/tools/github/clang-tidy/grep_error_lines.sh b/tools/github/clang-tidy/grep_error_lines.sh new file mode 100755 index 000000000..3dd0fd7f0 --- /dev/null +++ b/tools/github/clang-tidy/grep_error_lines.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Matches timestamp like "2021-03-25T17:06:42.2621697Z" +TIMESTAMP_PATTERN="\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{7}Z" + +# Matches absolute file pathes with line and column identifier like +# "/opt/actions-runner/_work/memgraph/memgraph/src/utils/exceptions.hpp:71:11:" +FILE_ABSOLUTE_PATH_PATTERN="/[^:]+:\d+:\d+:" + +ERROR_OR_WARNING_PATTERN="(error|warning):" + +grep -P "^($TIMESTAMP_PATTERN )?$FILE_ABSOLUTE_PATH_PATTERN $ERROR_OR_WARNING_PATTERN.*$" \ No newline at end of file diff --git a/tools/github/clang-tidy/run-clang-tidy.py b/tools/github/clang-tidy/run-clang-tidy.py new file mode 100755 index 000000000..0dbac0b25 --- /dev/null +++ b/tools/github/clang-tidy/run-clang-tidy.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +# +#===- run-clang-tidy.py - Parallel clang-tidy runner --------*- python -*--===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===-----------------------------------------------------------------------===# +# FIXME: Integrate with clang-tidy-diff.py + + +""" +Parallel clang-tidy runner +========================== + +Runs clang-tidy over all files in a compilation database. Requires clang-tidy +and clang-apply-replacements in $PATH. + +Example invocations. +- Run clang-tidy on all files in the current working directory with a default + set of checks and show warnings in the cpp files and all project headers. + run-clang-tidy.py $PWD + +- Fix all header guards. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard + +- Fix all header guards included from clang-tidy and header guards + for clang-tidy headers. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \ + -header-filter=extra/clang-tidy + +Compilation database setup: +http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html +""" + +from __future__ import print_function + +import argparse +import glob +import json +import multiprocessing +import os +import re +import shutil +import subprocess +import sys +import tempfile +import threading +import traceback + +try: + import yaml +except ImportError: + yaml = None + +is_py2 = sys.version[0] == '2' + +if is_py2: + import Queue as queue +else: + import queue as queue + + +def find_compilation_database(path): + """Adjusts the directory until a compilation database is found.""" + result = './' + while not os.path.isfile(os.path.join(result, path)): + if os.path.realpath(result) == '/': + print('Error: could not find compilation database.') + sys.exit(1) + result += '../' + return os.path.realpath(result) + + +def make_absolute(f, directory): + if os.path.isabs(f): + return f + return os.path.normpath(os.path.join(directory, f)) + + +def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path, + header_filter, allow_enabling_alpha_checkers, + extra_arg, extra_arg_before, quiet, config): + """Gets a command line for clang-tidy.""" + start = [clang_tidy_binary] + if allow_enabling_alpha_checkers: + start.append('-allow-enabling-analyzer-alpha-checkers') + if header_filter is not None: + start.append('-header-filter=' + header_filter) + if checks: + start.append('-checks=' + checks) + if tmpdir is not None: + start.append('-export-fixes') + # Get a temporary file. We immediately close the handle so clang-tidy can + # overwrite it. + (handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) + os.close(handle) + start.append(name) + for arg in extra_arg: + start.append('-extra-arg=%s' % arg) + for arg in extra_arg_before: + start.append('-extra-arg-before=%s' % arg) + start.append('-p=' + build_path) + if quiet: + start.append('-quiet') + if config: + start.append('-config=' + config) + start.append(f) + return start + + +def merge_replacement_files(tmpdir, mergefile): + """Merge all replacement files in a directory into a single file""" + # The fixes suggested by clang-tidy >= 4.0.0 are given under + # the top level key 'Diagnostics' in the output yaml files + mergekey = "Diagnostics" + merged=[] + for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): + content = yaml.safe_load(open(replacefile, 'r')) + if not content: + continue # Skip empty files. + merged.extend(content.get(mergekey, [])) + + if merged: + # MainSourceFile: The key is required by the definition inside + # include/clang/Tooling/ReplacementsYaml.h, but the value + # is actually never used inside clang-apply-replacements, + # so we set it to '' here. + output = {'MainSourceFile': '', mergekey: merged} + with open(mergefile, 'w') as out: + yaml.safe_dump(output, out) + else: + # Empty the file: + open(mergefile, 'w').close() + + +def check_clang_apply_replacements_binary(args): + """Checks if invoking supplied clang-apply-replacements binary works.""" + try: + subprocess.check_call([args.clang_apply_replacements_binary, '--version']) + except: + print('Unable to run clang-apply-replacements. Is clang-apply-replacements ' + 'binary correctly specified?', file=sys.stderr) + traceback.print_exc() + sys.exit(1) + + +def apply_fixes(args, tmpdir): + """Calls clang-apply-fixes on a given directory.""" + invocation = [args.clang_apply_replacements_binary] + if args.format: + invocation.append('-format') + if args.style: + invocation.append('-style=' + args.style) + invocation.append(tmpdir) + subprocess.call(invocation) + + +def run_tidy(args, tmpdir, build_path, queue, lock, failed_files): + """Takes filenames out of queue and runs clang-tidy on them.""" + while True: + name = queue.get() + invocation = get_tidy_invocation(name, args.clang_tidy_binary, args.checks, + tmpdir, build_path, args.header_filter, + args.allow_enabling_alpha_checkers, + args.extra_arg, args.extra_arg_before, + args.quiet, args.config) + + proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = proc.communicate() + if proc.returncode != 0: + failed_files.append(name) + with lock: + sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) + if len(err) > 0: + sys.stdout.flush() + sys.stderr.write(err.decode('utf-8')) + queue.task_done() + + +def main(): + parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' + 'in a compilation database. Requires ' + 'clang-tidy and clang-apply-replacements in ' + '$PATH.') + parser.add_argument('-allow-enabling-alpha-checkers', + action='store_true', help='allow alpha checkers from ' + 'clang-analyzer.') + parser.add_argument('-clang-tidy-binary', metavar='PATH', + default='clang-tidy-11', + help='path to clang-tidy binary') + parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', + default='clang-apply-replacements-11', + help='path to clang-apply-replacements binary') + parser.add_argument('-checks', default=None, + help='checks filter, when not specified, use clang-tidy ' + 'default') + parser.add_argument('-config', default=None, + help='Specifies a configuration in YAML/JSON format: ' + ' -config="{Checks: \'*\', ' + ' CheckOptions: [{key: x, ' + ' value: y}]}" ' + 'When the value is empty, clang-tidy will ' + 'attempt to find a file named .clang-tidy for ' + 'each source file in its parent directories.') + parser.add_argument('-header-filter', default=None, + help='regular expression matching the names of the ' + 'headers to output diagnostics from. Diagnostics from ' + 'the main file of each translation unit are always ' + 'displayed.') + if yaml: + parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', + help='Create a yaml file to store suggested fixes in, ' + 'which can be applied with clang-apply-replacements.') + parser.add_argument('-j', type=int, default=0, + help='number of tidy instances to be run in parallel.') + parser.add_argument('files', nargs='*', default=['.*'], + help='files to be processed (regex on path)') + parser.add_argument('-fix', action='store_true', help='apply fix-its') + parser.add_argument('-format', action='store_true', help='Reformat code ' + 'after applying fixes') + parser.add_argument('-style', default='file', help='The style of reformat ' + 'code after applying fixes') + parser.add_argument('-p', dest='build_path', + help='Path used to read a compile command database.') + parser.add_argument('-extra-arg', dest='extra_arg', + action='append', default=[], + help='Additional argument to append to the compiler ' + 'command line.') + parser.add_argument('-extra-arg-before', dest='extra_arg_before', + action='append', default=[], + help='Additional argument to prepend to the compiler ' + 'command line.') + parser.add_argument('-quiet', action='store_true', + help='Run clang-tidy in quiet mode') + args = parser.parse_args() + + db_path = 'compile_commands.json' + + if args.build_path is not None: + build_path = args.build_path + else: + # Find our database + build_path = find_compilation_database(db_path) + + try: + invocation = [args.clang_tidy_binary, '-list-checks'] + if args.allow_enabling_alpha_checkers: + invocation.append('-allow-enabling-analyzer-alpha-checkers') + invocation.append('-p=' + build_path) + if args.checks: + invocation.append('-checks=' + args.checks) + invocation.append('-') + if args.quiet: + # Even with -quiet we still want to check if we can call clang-tidy. + with open(os.devnull, 'w') as dev_null: + subprocess.check_call(invocation, stdout=dev_null) + else: + subprocess.check_call(invocation) + except: + print("Unable to run clang-tidy.", file=sys.stderr) + sys.exit(1) + + # Load the database and extract all files. + database = json.load(open(os.path.join(build_path, db_path))) + files = [make_absolute(entry['file'], entry['directory']) + for entry in database] + + max_task = args.j + if max_task == 0: + max_task = multiprocessing.cpu_count() + + tmpdir = None + if args.fix or (yaml and args.export_fixes): + check_clang_apply_replacements_binary(args) + tmpdir = tempfile.mkdtemp() + + # Build up a big regexy filter from all command line arguments. + file_name_re = re.compile('|'.join(args.files)) + + return_code = 0 + try: + # Spin up a bunch of tidy-launching threads. + task_queue = queue.Queue(max_task) + # List of files with a non-zero return code. + failed_files = [] + lock = threading.Lock() + for _ in range(max_task): + t = threading.Thread(target=run_tidy, + args=(args, tmpdir, build_path, task_queue, lock, failed_files)) + t.daemon = True + t.start() + + # Fill the queue with files. + for name in files: + if file_name_re.search(name): + task_queue.put(name) + + # Wait for all threads to be done. + task_queue.join() + if len(failed_files): + return_code = 1 + + except KeyboardInterrupt: + # This is a sad hack. Unfortunately subprocess goes + # bonkers with ctrl-c and we start forking merrily. + print('\nCtrl-C detected, goodbye.') + if tmpdir: + shutil.rmtree(tmpdir) + os.kill(0, 9) + + if yaml and args.export_fixes: + print('Writing fixes to ' + args.export_fixes + ' ...') + try: + merge_replacement_files(tmpdir, args.export_fixes) + except: + print('Error exporting fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code=1 + + if args.fix: + print('Applying fixes ...') + try: + apply_fixes(args, tmpdir) + except: + print('Error applying fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code = 1 + + if tmpdir: + shutil.rmtree(tmpdir) + sys.exit(return_code) + + +if __name__ == '__main__': + main() diff --git a/tools/lsan.supp b/tools/lsan.supp new file mode 100644 index 000000000..daf07ed1a --- /dev/null +++ b/tools/lsan.supp @@ -0,0 +1,12 @@ +leak:antlr4::atn::ArrayPredictionContext::ArrayPredictionContext +leak:std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count, std::weak_ptr&, unsigned long&>(antlr4::atn::SingletonPredictionContext*&, std::_Sp_alloc_shared_tag >, std::weak_ptr&, unsigned long&) +leak:antlr4::atn::PredictionContext::mergeSingletons(std::shared_ptr const&, std::shared_ptr const&, bool, std::map, std::shared_ptr >, std::shared_ptr, std::less, std::shared_ptr > >, std::allocator, std::shared_ptr > const, std::shared_ptr > > >*) +leak:void std::vector, std::allocator > >::_M_realloc_insert >(__gnu_cxx::__normal_iterator*, std::vector, std::allocator > > >, std::shared_ptr&&) +leak:antlr4::atn::ParserATNSimulator::closureCheckingStopState(std::shared_ptr const&, antlr4::atn::ATNConfigSet*, std::unordered_set, antlr4::atn::ATNConfig::Hasher, antlr4::atn::ATNConfig::Comparer, std::allocator > >&, bool, bool, int, bool) +leak:antlr4::atn::ParserATNSimulator::computeReachSet(antlr4::atn::ATNConfigSet*, unsigned long, bool) +leak:std::_Hashtable, std::allocator >, std::__detail::_Select1st, std::equal_to, std::hash, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits >::_M_insert_unique_node(unsigned long const&, unsigned long, unsigned long, std::__detail::_Hash_node, false>*, unsigned long) +leak:void std::vector, std::allocator > >::_M_realloc_insert const&>(__gnu_cxx::__normal_iterator*, std::vector, std::allocator > > >, std::shared_ptr const&) +leak:antlr4::atn::ATNConfigSet::add(std::shared_ptr const&, std::map, std::shared_ptr >, std::shared_ptr, std::less, std::shared_ptr > >, std::allocator, std::shared_ptr > const, std::shared_ptr > > >*) +leak:antlr4::atn::ParserATNSimulator::getEpsilonTarget(std::shared_ptr const&, antlr4::atn::Transition*, bool, bool, bool, bool) +leak:antlr4::atn::PredictionContext::mergeArrays(std::shared_ptr const&, std::shared_ptr const&, bool, std::map, std::shared_ptr >, std::shared_ptr, std::less, std::shared_ptr > >, std::allocator, std::shared_ptr > const, std::shared_ptr > > >*) +leak:/lib/x86_64-linux-gnu/libpython3. From 5c93f81881019c76170ecb3ee34d95c0b12686cf Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Fri, 2 Apr 2021 12:29:10 +0200 Subject: [PATCH 25/63] Disable failing tests and add logs for replication e2e (#132) * Disable sequential test * Remove parent build and benchmark * Save test data * Save e2e logs in build folder * Define different recovery time for each test --- .github/workflows/diff.yaml | 58 ++++------------------ tests/e2e/memory/workloads.yaml | 3 +- tests/e2e/replication/workloads.yaml | 12 +++-- tests/e2e/runner.py | 4 +- tests/jepsen/src/jepsen/memgraph/bank.clj | 2 +- tests/jepsen/src/jepsen/memgraph/core.clj | 6 +-- tests/jepsen/src/jepsen/memgraph/large.clj | 2 +- 7 files changed, 27 insertions(+), 60 deletions(-) diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 4ba546412..502052f94 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -217,21 +217,6 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Set up parent - run: | - # Remove parent folder (if it exists). - cd .. - if [ -d parent ]; then - rm -rf parent - fi - - # Copy untouched repository to parent folder. - cp -r memgraph parent - - # Checkout previous commit - cd parent - git checkout HEAD~1 - - name: Build release binaries run: | # Activate toolchain. @@ -245,20 +230,6 @@ jobs: cmake -DCMAKE_BUILD_TYPE=release .. make -j$THREADS - - name: Build parent binaries - run: | - # Activate toolchain. - source /opt/toolchain-v2/activate - - # Initialize dependencies. - cd ../parent - ./init - - # Build parent binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=release .. - make -j$THREADS memgraph memgraph__macro_benchmark - - name: Run macro benchmark tests run: | cd tests/macro_benchmark @@ -266,26 +237,6 @@ jobs: --groups aggregation 1000_create unwind_create dense_expand match \ --no-strict - - name: Run parent macro benchmark tests - run: | - cd ../parent/tests/macro_benchmark - ./harness QuerySuite MemgraphRunner \ - --groups aggregation 1000_create unwind_create dense_expand match \ - --no-strict - - - name: Compute macro benchmark summary - run: | - ./tools/github/macro_benchmark_summary \ - --current tests/macro_benchmark/.harness_summary \ - --previous ../parent/tests/macro_benchmark/.harness_summary \ - --output macro_benchmark_summary.txt - - - name: Save macro benchmark summary - uses: actions/upload-artifact@v2 - with: - name: "Macro benchmark summary" - path: macro_benchmark_summary.txt - - name: Run GQL Behave tests run: | cd tests/gql_behave @@ -348,6 +299,15 @@ jobs: name: "Enterprise DEB package" path: build/output/memgraph*.deb + - name: Save test data + uses: actions/upload-artifact@v2 + if: always() + with: + name: "Test data" + path: | + # multiple paths could be defined + build/logs + release_jepsen_test: name: "Release Jepsen Test" runs-on: [self-hosted, Linux, X64, Debian10, JepsenControl] diff --git a/tests/e2e/memory/workloads.yaml b/tests/e2e/memory/workloads.yaml index 147b87b04..74dc59e1a 100644 --- a/tests/e2e/memory/workloads.yaml +++ b/tests/e2e/memory/workloads.yaml @@ -2,7 +2,8 @@ bolt_port: &bolt_port "7687" template_cluster: &template_cluster cluster: main: - args: ["--bolt-port", *bolt_port, "--memory-limit=500", "--storage-gc-cycle-sec=180"] + args: ["--bolt-port", *bolt_port, "--memory-limit=500", "--storage-gc-cycle-sec=180", "--log-level=TRACE"] + log_file: "memory-e2e.log" setup_queries: [] validation_queries: [] diff --git a/tests/e2e/replication/workloads.yaml b/tests/e2e/replication/workloads.yaml index 5f5a0d65d..39e1c9bdf 100644 --- a/tests/e2e/replication/workloads.yaml +++ b/tests/e2e/replication/workloads.yaml @@ -11,19 +11,23 @@ template_validation_queries: &template_validation_queries template_cluster: &template_cluster cluster: replica_1: - args: ["--bolt-port", "7688"] + args: ["--bolt-port", "7688", "--log-level=TRACE"] + log_file: "replication-e2e-replica1.log" setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"] <<: *template_validation_queries replica_2: - args: ["--bolt-port", "7689"] + args: ["--bolt-port", "7689", "--log-level=TRACE"] + log_file: "replication-e2e-replica2.log" setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"] <<: *template_validation_queries replica_3: - args: ["--bolt-port", "7690"] + args: ["--bolt-port", "7690", "--log-level=TRACE"] + log_file: "replication-e2e-replica3.log" setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10003;"] <<: *template_validation_queries main: - args: ["--bolt-port", "7687"] + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "replication-e2e-main.log" setup_queries: [ "REGISTER REPLICA replica_1 SYNC WITH TIMEOUT 0 TO '127.0.0.1:10001'", "REGISTER REPLICA replica_2 SYNC WITH TIMEOUT 1 TO '127.0.0.1:10002'", diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index 453c8d3de..5d88740d9 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -44,7 +44,9 @@ def run(args): for name, config in workload['cluster'].items(): mg_instance = MemgraphInstanceRunner(MEMGRAPH_BINARY) mg_instances[name] = mg_instance - mg_instance.start(args=config['args']) + log_file_path = os.path.join(BUILD_DIR, 'logs', config['log_file']) + binary_args = config['args'] + ["--log-file", log_file_path] + mg_instance.start(args=binary_args) for query in config['setup_queries']: mg_instance.query(query) # Test. diff --git a/tests/jepsen/src/jepsen/memgraph/bank.clj b/tests/jepsen/src/jepsen/memgraph/bank.clj index 9db55106a..3bfb79409 100644 --- a/tests/jepsen/src/jepsen/memgraph/bank.clj +++ b/tests/jepsen/src/jepsen/memgraph/bank.clj @@ -164,4 +164,4 @@ {:bank (bank-checker) :timeline (timeline/html)}) :generator (c/replication-gen (gen/mix [read-balances valid-transfer])) - :final-generator (gen/once read-balances)}) + :final-generator {:gen (gen/once read-balances) :recovery-time 20}}) diff --git a/tests/jepsen/src/jepsen/memgraph/core.clj b/tests/jepsen/src/jepsen/memgraph/core.clj index 69a4a0aa0..77bf6a163 100644 --- a/tests/jepsen/src/jepsen/memgraph/core.clj +++ b/tests/jepsen/src/jepsen/memgraph/core.clj @@ -22,7 +22,7 @@ "A map of workload names to functions that can take opts and construct workloads." {:bank bank/workload - :sequential sequential/workload + ;; :sequential sequential/workload (T0532-MG) :large large/workload}) (def nemesis-configuration @@ -45,8 +45,8 @@ (gen/log "Healing cluster.") (gen/nemesis (:final-generator nemesis)) (gen/log "Waiting for recovery") - (gen/sleep 20) - (gen/clients final-generator)) + (gen/sleep (:recovery-time final-generator)) + (gen/clients (:gen final-generator))) gen)] (merge tests/noop-test opts diff --git a/tests/jepsen/src/jepsen/memgraph/large.clj b/tests/jepsen/src/jepsen/memgraph/large.clj index d58248df7..cc710252a 100644 --- a/tests/jepsen/src/jepsen/memgraph/large.clj +++ b/tests/jepsen/src/jepsen/memgraph/large.clj @@ -103,4 +103,4 @@ :timeline (timeline/html)}) :generator (c/replication-gen (gen/mix [read-nodes add-nodes])) - :final-generator (gen/once read-nodes)}) + :final-generator {:gen (gen/once read-nodes) :recovery-time 40}}) From 8de31092ad3607d700bdfe0c11322cddd1d8709b Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 13 Apr 2021 10:41:50 +0200 Subject: [PATCH 26/63] Add back the query memory limit logic (#134) --- src/query/interpreter.cpp | 76 +++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 23 deletions(-) diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 54cafbd53..4bd606cdc 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -27,6 +27,7 @@ #include "utils/logging.hpp" #include "utils/memory.hpp" #include "utils/memory_tracker.hpp" +#include "utils/readable_size.hpp" #include "utils/string.hpp" #include "utils/tsc.hpp" @@ -604,7 +605,7 @@ struct PullPlanVector { struct PullPlan { explicit PullPlan(std::shared_ptr plan, const Parameters ¶meters, bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory); + utils::MonotonicBufferResource *execution_memory, std::optional memory_limit = {}); std::optional Pull(AnyStream *stream, std::optional n, const std::vector &output_symbols, std::map *summary); @@ -614,6 +615,7 @@ struct PullPlan { plan::UniqueCursorPtr cursor_ = nullptr; Frame frame_; ExecutionContext ctx_; + std::optional memory_limit_; // As it's possible to query execution using multiple pulls // we need the keep track of the total execution time across @@ -631,10 +633,11 @@ struct PullPlan { PullPlan::PullPlan(const std::shared_ptr plan, const Parameters ¶meters, const bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory) + utils::MonotonicBufferResource *execution_memory, const std::optional memory_limit) : plan_(plan), cursor_(plan->plan().MakeCursor(execution_memory)), - frame_(plan->symbol_table().max_position(), execution_memory) { + frame_(plan->symbol_table().max_position(), execution_memory), + memory_limit_(memory_limit) { ctx_.db_accessor = dba; ctx_.symbol_table = plan->symbol_table(); ctx_.evaluation_context.timestamp = @@ -657,21 +660,25 @@ std::optional PullPlan::Pull(AnyStream *stream, std::optional< // single `Pull`. constexpr size_t stack_size = 256 * 1024; char stack_data[stack_size]; + utils::ResourceWithOutOfMemoryException resource_with_exception; + utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); + // We can throw on every query because a simple queries for deleting will use only + // the stack allocated buffer. + // Also, we want to throw only when the query engine requests more memory and not the storage + // so we add the exception to the allocator. + // TODO (mferencevic): Tune the parameters accordingly. + utils::PoolResource pool_memory(128, 1024, &monotonic_memory); + std::optional maybe_limited_resource; + + if (memory_limit_) { + maybe_limited_resource.emplace(&pool_memory, *memory_limit_); + ctx_.evaluation_context.memory = &*maybe_limited_resource; + } else { + ctx_.evaluation_context.memory = &pool_memory; + } // Returns true if a result was pulled. - const auto pull_result = [&]() -> bool { - // We can throw on every query because a simple queries for deleting will use only - // the stack allocated buffer. - // Also, we want to throw only when the query engine requests more memory and not the storage - // so we add the exception to the allocator. - utils::ResourceWithOutOfMemoryException resource_with_exception; - utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); - // TODO (mferencevic): Tune the parameters accordingly. - utils::PoolResource pool_memory(128, 1024, &monotonic_memory); - ctx_.evaluation_context.memory = &pool_memory; - - return cursor_->Pull(frame_, ctx_); - }; + const auto pull_result = [&]() -> bool { return cursor_->Pull(frame_, ctx_); }; const auto stream_values = [&]() { // TODO: The streamed values should also probably use the above memory. @@ -829,9 +836,23 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, DbAccessor *dba, utils::MonotonicBufferResource *execution_memory) { - auto plan = CypherQueryToPlan(parsed_query.stripped_query.hash(), std::move(parsed_query.ast_storage), - utils::Downcast(parsed_query.query), parsed_query.parameters, - &interpreter_context->plan_cache, dba, parsed_query.is_cacheable); + auto *cypher_query = utils::Downcast(parsed_query.query); + + Frame frame(0); + SymbolTable symbol_table; + EvaluationContext evaluation_context; + evaluation_context.timestamp = + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) + .count(); + evaluation_context.parameters = parsed_query.parameters; + ExpressionEvaluator evaluator(&frame, symbol_table, evaluation_context, dba, storage::View::OLD); + const auto memory_limit = EvaluateMemoryLimit(&evaluator, cypher_query->memory_limit_, cypher_query->memory_scale_); + if (memory_limit) { + spdlog::info("Running query with memory limit of {}", utils::GetReadableSize(*memory_limit)); + } + + auto plan = CypherQueryToPlan(parsed_query.stripped_query.hash(), std::move(parsed_query.ast_storage), cypher_query, + parsed_query.parameters, &interpreter_context->plan_cache, dba); summary->insert_or_assign("cost_estimate", plan->cost()); auto rw_type_checker = plan::ReadWriteTypeChecker(); @@ -850,8 +871,8 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map(plan, parsed_query.parameters, false, dba, interpreter_context, execution_memory); + auto pull_plan = std::make_shared(plan, parsed_query.parameters, false, dba, interpreter_context, + execution_memory, memory_limit); return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges), [pull_plan = std::move(pull_plan), output_symbols = std::move(output_symbols), summary]( AnyStream *stream, std::optional n) -> std::optional { @@ -949,6 +970,15 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra auto *cypher_query = utils::Downcast(parsed_inner_query.query); MG_ASSERT(cypher_query, "Cypher grammar should not allow other queries in PROFILE"); + Frame frame(0); + SymbolTable symbol_table; + EvaluationContext evaluation_context; + evaluation_context.timestamp = + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) + .count(); + evaluation_context.parameters = parsed_inner_query.parameters; + ExpressionEvaluator evaluator(&frame, symbol_table, evaluation_context, dba, storage::View::OLD); + const auto memory_limit = EvaluateMemoryLimit(&evaluator, cypher_query->memory_limit_, cypher_query->memory_scale_); auto cypher_query_plan = CypherQueryToPlan( parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), cypher_query, @@ -960,14 +990,14 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra {"OPERATOR", "ACTUAL HITS", "RELATIVE TIME", "ABSOLUTE TIME"}, std::move(parsed_query.required_privileges), [plan = std::move(cypher_query_plan), parameters = std::move(parsed_inner_query.parameters), summary, dba, - interpreter_context, execution_memory, + interpreter_context, execution_memory, memory_limit, // We want to execute the query we are profiling lazily, so we delay // the construction of the corresponding context. ctx = std::optional{}, pull_plan = std::shared_ptr(nullptr)]( AnyStream *stream, std::optional n) mutable -> std::optional { // No output symbols are given so that nothing is streamed. if (!ctx) { - ctx = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory) + ctx = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory, memory_limit) .Pull(stream, {}, {}, summary); pull_plan = std::make_shared(ProfilingStatsToTable(ctx->stats, ctx->profile_execution_time)); } From 59105f68bd6f1d64862f942eb74afaa2552aed9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= Date: Tue, 20 Apr 2021 10:54:54 +0200 Subject: [PATCH 27/63] Add primary and secondary dependency links (#137) * Add explicit exit in case of git checkout or cherry-pick --- libs/setup.sh | 142 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 115 insertions(+), 27 deletions(-) diff --git a/libs/setup.sh b/libs/setup.sh index d00c13e2e..08c073995 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -2,8 +2,9 @@ # Download external dependencies. +local_cache_host=${MGDEPS_CACHE_HOST_PORT:-mgdeps-cache:8000} working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd ${working_dir} +cd "${working_dir}" # Clones a git repository and optionally cherry picks additional commits. The # function will try to preserve any local changes in the repo. @@ -29,12 +30,17 @@ clone () { # Stash regardless of local_changes, so that a user gets a message on stdout. git stash # Checkout the primary commit (there's no need to pull/merge). - git checkout $checkout_id + # The checkout fail should exit this script immediately because the target + # commit is not there and that will most likely create build-time errors. + git checkout "$checkout_id" || exit 1 # Apply any optional cherry pick fixes. while [[ $# -ne 0 ]]; do local cherry_pick_id=$1 shift - git cherry-pick -n $cherry_pick_id + # The cherry-pick fail should exit this script immediately because the + # target commit is not there and that will most likely create build-time + # errors. + git cherry-pick -n "$cherry_pick_id" || exit 1 done # Reapply any local changes. if [[ $local_changes == true ]]; then @@ -43,12 +49,95 @@ clone () { popd } +file_get_try_double () { + primary_url="$1" + secondary_url="$2" + echo "Download primary from $primary_url secondary from $secondary_url" + if [ -z "$primary_url" ]; then echo "Primary should not be empty." && exit 1; fi + if [ -z "$secondary_url" ]; then echo "Secondary should not be empty." && exit 1; fi + filename="$(basename "$secondary_url")" + wget -nv "$primary_url" -O "$filename" || wget -nv "$secondary_url" -O "$filename" || exit 1 + echo "" +} + +repo_clone_try_double () { + primary_url="$1" + secondary_url="$2" + folder_name="$3" + ref="$4" + echo "Cloning primary from $primary_url secondary from $secondary_url" + if [ -z "$primary_url" ]; then echo "Primary should not be empty." && exit 1; fi + if [ -z "$secondary_url" ]; then echo "Secondary should not be empty." && exit 1; fi + if [ -z "$folder_name" ]; then echo "Clone folder should not be empty." && exit 1; fi + if [ -z "$ref" ]; then echo "Git clone ref should not be empty." && exit 1; fi + clone "$primary_url" "$folder_name" "$ref" || clone "$secondary_url" "$folder_name" "$ref" || exit 1 + echo "" +} + +# List all dependencies. + +# The reason for introducing primary and secondary urls are: +# * HTTPS is hard to cache +# * Remote development workflow is more flexible if people don't have to connect to VPN +# * Direct download from the "source of truth" is slower and unreliable because of the whole internet in-between +# * When a new dependency has to be added, both urls could be the same, later someone could optimize if required + +# The goal of having primary urls is to have links to the "local" cache of +# dependencies where these dependencies could be downloaded as fast as +# possible. The actual cache server could be on your local machine, on a +# dedicated machine inside the build cluster or on the actual build machine. +# Download from primary_urls might fail because the cache is not installed. +declare -A primary_urls=( + ["antlr4-code"]="http://$local_cache_host/git/antlr4.git" + ["antlr4-generator"]="http://$local_cache_host/file/antlr-4.6-complete.jar" + ["cppitertools"]="http://$local_cache_host/git/cppitertools.git" + ["fmt"]="http://$local_cache_host/git/fmt.git" + ["rapidcheck"]="http://$local_cache_host/git/rapidcheck.git" + ["gbenchmark"]="http://$local_cache_host/git/benchmark.git" + ["gtest"]="http://$local_cache_host/git/googletest.git" + ["gflags"]="http://$local_cache_host/git/gflags.git" + ["libbcrypt"]="http://$local_cache_host/git/libbcrypt.git" + ["bzip2"]="http://$local_cache_host/git/bzip2.git" + ["zlib"]="http://$local_cache_host/git/zlib.git" + ["rocksdb"]="http://$local_cache_host/git/rocksdb.git" + ["mgclient"]="http://$local_cache_host/git/mgclient.git" + ["pymgclient"]="http://$local_cache_host/git/pymgclient.git" + ["spdlog"]="http://$local_cache_host/git/spdlog" + ["jemalloc"]="http://$local_cache_host/git/jemalloc.git" + ["nlohmann"]="http://$local_cache_host/file/nlohmann/json/b3e5cb7f20dcc5c806e418df34324eca60d17d4e/single_include/nlohmann/json.hpp" + ["neo4j"]="http://$local_cache_host/file/neo4j-community-3.2.3-unix.tar.gz" +) + +# The goal of secondary urls is to have links to the "source of truth" of +# dependencies, e.g., Github or S3. Download from secondary urls, if happens +# at all, should never fail. In other words, if it fails, the whole build +# should fail. +declare -A secondary_urls=( + ["antlr4-code"]="https://github.com/antlr/antlr4.git" + ["antlr4-generator"]="http://www.antlr.org/download/antlr-4.6-complete.jar" + ["cppitertools"]="https://github.com/ryanhaining/cppitertools.git" + ["fmt"]="https://github.com/fmtlib/fmt.git" + ["rapidcheck"]="https://github.com/emil-e/rapidcheck.git" + ["gbenchmark"]="https://github.com/google/benchmark.git" + ["gtest"]="https://github.com/google/googletest.git" + ["gflags"]="https://github.com/memgraph/gflags.git" + ["libbcrypt"]="https://github.com/rg3/libbcrypt" + ["bzip2"]="https://github.com/VFR-maniac/bzip2" + ["zlib"]="https://github.com/madler/zlib.git" + ["rocksdb"]="https://github.com/facebook/rocksdb.git" + ["mgclient"]="https://github.com/memgraph/mgclient.git" + ["pymgclient"]="https://github.com/memgraph/pymgclient.git" + ["spdlog"]="https://github.com/gabime/spdlog" + ["jemalloc"]="https://github.com/jemalloc/jemalloc.git" + ["nlohmann"]="https://raw.githubusercontent.com/nlohmann/json/b3e5cb7f20dcc5c806e418df34324eca60d17d4e/single_include/nlohmann/json.hpp" + ["neo4j"]="https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/neo4j-community-3.2.3-unix.tar.gz" +) + # antlr -antlr_generator_filename="antlr-4.6-complete.jar" -# wget -O ${antlr_generator_filename} http://www.antlr.org/download/${antlr_generator_filename} -wget -nv -O ${antlr_generator_filename} https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${antlr_generator_filename} +file_get_try_double "${primary_urls[antlr4-generator]}" "${secondary_urls[antlr4-generator]}" + antlr4_tag="aacd2a2c95816d8dc1c05814051d631bfec4cf3e" # v4.6 -clone https://github.com/antlr/antlr4.git antlr4 $antlr4_tag +repo_clone_try_double "${primary_urls[antlr4-code]}" "${secondary_urls[antlr4-code]}" "antlr4" "$antlr4_tag" # fix missing include sed -i 's/^#pragma once/#pragma once\n#include /' antlr4/runtime/Cpp/runtime/src/support/CPPUtils.h # remove shared library from install dependencies @@ -56,74 +145,73 @@ sed -i 's/install(TARGETS antlr4_shared/install(TARGETS antlr4_shared OPTIONAL/' # cppitertools v2.0 2019-12-23 cppitertools_ref="cb3635456bdb531121b82b4d2e3afc7ae1f56d47" -clone https://github.com/ryanhaining/cppitertools.git cppitertools $cppitertools_ref +repo_clone_try_double "${primary_urls[cppitertools]}" "${secondary_urls[cppitertools]}" "cppitertools" "$cppitertools_ref" # fmt -fmt_tag="7bdf0628b1276379886c7f6dda2cef2b3b374f0b" # (2020-11-25) -clone https://github.com/fmtlib/fmt.git fmt $fmt_tag +fmt_tag="7bdf0628b1276379886c7f6dda2cef2b3b374f0b" # (2020-11-25) +repo_clone_try_double "${primary_urls[fmt]}" "${secondary_urls[fmt]}" "fmt" "$fmt_tag" # rapidcheck rapidcheck_tag="7bc7d302191a4f3d0bf005692677126136e02f60" # (2020-05-04) -clone https://github.com/emil-e/rapidcheck.git rapidcheck $rapidcheck_tag +repo_clone_try_double "${primary_urls[rapidcheck]}" "${secondary_urls[rapidcheck]}" "rapidcheck" "$rapidcheck_tag" # google benchmark benchmark_tag="4f8bfeae470950ef005327973f15b0044eceaceb" # v1.1.0 -clone https://github.com/google/benchmark.git benchmark $benchmark_tag +repo_clone_try_double "${primary_urls[gbenchmark]}" "${secondary_urls[gbenchmark]}" "benchmark" "$benchmark_tag" # google test googletest_tag="ec44c6c1675c25b9827aacd08c02433cccde7780" # v1.8.0 -clone https://github.com/google/googletest.git googletest $googletest_tag +repo_clone_try_double "${primary_urls[gtest]}" "${secondary_urls[gtest]}" "googletest" "$googletest_tag" # google flags gflags_tag="b37ceb03a0e56c9f15ce80409438a555f8a67b7c" # custom version (May 6, 2017) -clone https://github.com/memgraph/gflags.git gflags $gflags_tag +repo_clone_try_double "${primary_urls[gflags]}" "${secondary_urls[gflags]}" "gflags" "$gflags_tag" # libbcrypt libbcrypt_tag="8aa32ad94ebe06b76853b0767c910c9fbf7ccef4" # custom version (Dec 16, 2016) -clone https://github.com/rg3/libbcrypt libbcrypt $libbcrypt_tag +repo_clone_try_double "${primary_urls[libbcrypt]}" "${secondary_urls[libbcrypt]}" "libbcrypt" "$libbcrypt_tag" # neo4j -wget -nv https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/neo4j-community-3.2.3-unix.tar.gz -O neo4j.tar.gz -tar -xzf neo4j.tar.gz -rm -rf neo4j +file_get_try_double "${primary_urls[neo4j]}" "${secondary_urls[neo4j]}" +tar -xzf neo4j-community-3.2.3-unix.tar.gz mv neo4j-community-3.2.3 neo4j -rm neo4j.tar.gz +rm neo4j-community-3.2.3-unix.tar.gz # nlohmann json # We wget header instead of cloning repo since repo is huge (lots of test data). # We use head on Sep 1, 2017 instead of last release since it was long time ago. mkdir -p json cd json -wget "https://raw.githubusercontent.com/nlohmann/json/b3e5cb7f20dcc5c806e418df34324eca60d17d4e/single_include/nlohmann/json.hpp" +file_get_try_double "${primary_urls[nlohmann]}" "${secondary_urls[nlohmann]}" cd .. bzip2_tag="0405487e2b1de738e7f1c8afb50d19cf44e8d580" # v1.0.6 (May 26, 2011) -clone https://github.com/VFR-maniac/bzip2 bzip2 $bzip2_tag +repo_clone_try_double "${primary_urls[bzip2]}" "${secondary_urls[bzip2]}" "bzip2" "$bzip2_tag" zlib_tag="cacf7f1d4e3d44d871b605da3b647f07d718623f" # v1.2.11. -clone https://github.com/madler/zlib.git zlib $zlib_tag +repo_clone_try_double "${primary_urls[zlib]}" "${secondary_urls[zlib]}" "zlib" "$zlib_tag" # remove shared library from install dependencies sed -i 's/install(TARGETS zlib zlibstatic/install(TARGETS zlibstatic/g' zlib/CMakeLists.txt rocksdb_tag="f3e33549c151f30ac4eb7c22356c6d0331f37652" # (2020-10-14) -clone https://github.com/facebook/rocksdb.git rocksdb $rocksdb_tag +repo_clone_try_double "${primary_urls[rocksdb]}" "${secondary_urls[rocksdb]}" "rocksdb" "$rocksdb_tag" # remove shared library from install dependencies sed -i 's/TARGETS ${ROCKSDB_SHARED_LIB}/TARGETS ${ROCKSDB_SHARED_LIB} OPTIONAL/' rocksdb/CMakeLists.txt # mgclient mgclient_tag="v1.2.0" # (2021-01-14) -clone https://github.com/memgraph/mgclient.git mgclient $mgclient_tag +repo_clone_try_double "${primary_urls[mgclient]}" "${secondary_urls[mgclient]}" "mgclient" "$mgclient_tag" sed -i 's/\${CMAKE_INSTALL_LIBDIR}/lib/' mgclient/src/CMakeLists.txt # pymgclient pymgclient_tag="4f85c179e56302d46a1e3e2cf43509db65f062b3" # (2021-01-15) -clone https://github.com/memgraph/pymgclient.git pymgclient $pymgclient_tag +repo_clone_try_double "${primary_urls[pymgclient]}" "${secondary_urls[pymgclient]}" "pymgclient" "$pymgclient_tag" spdlog_tag="46d418164dd4cd9822cf8ca62a116a3f71569241" # (2020-12-01) -clone https://github.com/gabime/spdlog spdlog $spdlog_tag +repo_clone_try_double "${primary_urls[spdlog]}" "${secondary_urls[spdlog]}" "spdlog" "$spdlog_tag" jemalloc_tag="ea6b3e973b477b8061e0076bb257dbd7f3faa756" # (2021-02-11) -clone https://github.com/jemalloc/jemalloc.git jemalloc $jemalloc_tag +repo_clone_try_double "${primary_urls[jemalloc]}" "${secondary_urls[jemalloc]}" "jemalloc" "$jemalloc_tag" pushd jemalloc # ThreadPool select job randomly, and there can be some threads that had been # performed some memory heavy task before and will be inactive for some time, From 50b6afd73de31c95e81fd4eda9cd9f9935d505fb Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Wed, 21 Apr 2021 16:26:13 +0200 Subject: [PATCH 28/63] Fix clang tidy diff (#141) * Ignore header files for clang-tidy-diff * Rename code analysis job --- .github/workflows/diff.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 502052f94..9447ab957 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -65,7 +65,7 @@ jobs: name: "Community DEB package" path: build/output/memgraph*.deb - coverage_build: + code_analysis: name: "Code analysis" runs-on: [self-hosted, General, Linux, X64, Debian10] env: @@ -124,7 +124,7 @@ jobs: source /opt/toolchain-v2/activate # Restrict clang-tidy results only to the modified parts - git diff -U0 master... -- src | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build | tee ./build/clang_tidy_output.txt + git diff -U0 master... -- src ':!*.hpp' | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build | tee ./build/clang_tidy_output.txt # Fail if any warning is reported ! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null From ccdd58b336cd623d9cd0de4e46b179f235679ec3 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 27 Apr 2021 08:31:18 +0200 Subject: [PATCH 29/63] Fix parsing nested complex types (#142) * Fix parsing of types for mgp.List --- CHANGELOG.md | 7 +++++++ include/mgp.py | 10 +++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d8e1af591..ccc88cab7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ ## Future +### Bug Fixes + +* Fixed parsing of types for Python procedures for types nested in `mgp.List`. + For example, parsing of `mgp.List[mgp.Map]` works now. + +## v1.4.0 + ### Breaking Changes * Changed `MEMORY LIMIT num (KB|MB)` clause in the procedure calls to `PROCEDURE MEMORY LIMIT num (KB|MB)`. diff --git a/include/mgp.py b/include/mgp.py index b2719d8fc..a0f7bab50 100644 --- a/include/mgp.py +++ b/include/mgp.py @@ -683,7 +683,15 @@ def _typing_to_cypher_type(type_): return _mgp.type_nullable(simple_type) return _mgp.type_nullable(parse_typing(type_arg_as_str)) elif type_as_str.startswith('typing.List'): - type_arg_as_str, = parse_type_args(type_as_str) + type_arg_as_str = parse_type_args(type_as_str) + + if len(type_arg_as_str) > 1: + # Nested object could be a type consisting of a list of types (e.g. mgp.Map) + # so we need to join the parts. + type_arg_as_str = ', '.join(type_arg_as_str) + else: + type_arg_as_str = type_arg_as_str[0] + simple_type = get_simple_type(type_arg_as_str) if simple_type is not None: return _mgp.type_list(simple_type) From b71345655fa18bcd324ad76d5cca27b38c4a6eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= Date: Fri, 7 May 2021 13:37:45 +0200 Subject: [PATCH 30/63] Add balancing of GHA jobs across runners (#146) --- .github/workflows/diff.yaml | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 9447ab957..3d22a6493 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -10,7 +10,7 @@ on: jobs: community_build: name: "Community build" - runs-on: [self-hosted, General, Linux, X64, Debian10] + runs-on: [self-hosted, Linux, X64, Diff] env: THREADS: 24 @@ -67,7 +67,7 @@ jobs: code_analysis: name: "Code analysis" - runs-on: [self-hosted, General, Linux, X64, Debian10] + runs-on: [self-hosted, Linux, X64, Diff] env: THREADS: 24 @@ -131,7 +131,7 @@ jobs: debug_build: name: "Debug build" - runs-on: [self-hosted, General, Linux, X64, Debian10] + runs-on: [self-hosted, Linux, X64, Diff] env: THREADS: 24 @@ -205,7 +205,7 @@ jobs: release_build: name: "Release build" - runs-on: [self-hosted, General, Linux, X64, Debian10] + runs-on: [self-hosted, Linux, X64, Diff] env: THREADS: 24 @@ -230,13 +230,6 @@ jobs: cmake -DCMAKE_BUILD_TYPE=release .. make -j$THREADS - - name: Run macro benchmark tests - run: | - cd tests/macro_benchmark - ./harness QuerySuite MemgraphRunner \ - --groups aggregation 1000_create unwind_create dense_expand match \ - --no-strict - - name: Run GQL Behave tests run: | cd tests/gql_behave From cc27a0413904e9a04bcdd9c23059351690532f64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= Date: Fri, 7 May 2021 15:54:09 +0200 Subject: [PATCH 31/63] Add bench-graph-client inside tools (#139) --- tools/bench-graph-client/main.py | 63 +++++++++++++++++++++++ tools/bench-graph-client/requirements.txt | 1 + 2 files changed, 64 insertions(+) create mode 100755 tools/bench-graph-client/main.py create mode 100644 tools/bench-graph-client/requirements.txt diff --git a/tools/bench-graph-client/main.py b/tools/bench-graph-client/main.py new file mode 100755 index 000000000..d53901e6c --- /dev/null +++ b/tools/bench-graph-client/main.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +""" +Bench Graph client responsible for sending benchmarking data in JSON format to +the Bench Graph server. +""" + +import json +import logging +import os +import requests +from datetime import datetime +from argparse import ArgumentParser + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + +GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "") +GITHUB_SHA = os.getenv("GITHUB_SHA", "") +GITHUB_REF = os.getenv("GITHUB_REF", "") + +BENCH_GRAPH_SERVER_ENDPOINT = os.getenv( + "BENCH_GRAPH_SERVER_ENDPOINT", + "http://mgdeps-cache:9000") + +log = logging.getLogger(__name__) + + +def parse_args(): + argp = ArgumentParser(description=__doc__) + argp.add_argument("--benchmark-name", type=str, required=True) + argp.add_argument("--benchmark-results-path", type=str, required=True) + argp.add_argument("--github-run-id", type=int, required=True) + argp.add_argument("--github-run-number", type=int, required=True) + return argp.parse_args() + + +def post_measurement(args): + with open(args.benchmark_results_path, "r") as f: + data = json.load(f) + timestamp = datetime.now().timestamp() + req = requests.post( + f"{BENCH_GRAPH_SERVER_ENDPOINT}/measurements", + json={ + "name": args.benchmark_name, + "timestamp": timestamp, + "git_repo": GITHUB_REPOSITORY, + "git_ref": GITHUB_REF, + "git_sha": GITHUB_SHA, + "github_run_id": args.github_run_id, + "github_run_number": args.github_run_number, + "results": data + }, + timeout=1) + assert req.status_code == 200, \ + f"Uploading {args.benchmark_name} data failed." + log.info(f"{args.benchmark_name} data sent to " + f"{BENCH_GRAPH_SERVER_ENDPOINT}") + + +if __name__ == "__main__": + args = parse_args() + logging.basicConfig(level=logging.INFO) + post_measurement(args) diff --git a/tools/bench-graph-client/requirements.txt b/tools/bench-graph-client/requirements.txt new file mode 100644 index 000000000..9d84d3588 --- /dev/null +++ b/tools/bench-graph-client/requirements.txt @@ -0,0 +1 @@ +requests==2.25.1 From 782c377f5d2f19b6f6847f94f72b52a6bb80b794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= Date: Wed, 12 May 2021 17:17:57 +0200 Subject: [PATCH 32/63] Add benchmarking GHA Diff job (#147) --- .github/workflows/diff.yaml | 61 ++++++++++++++++++++++++++++++++ tools/bench-graph-client/main.py | 11 ++++-- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 3d22a6493..07ce34acb 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -340,3 +340,64 @@ jobs: with: name: "Jepsen Report" path: tests/jepsen/Jepsen.tar.gz + + release_benchmarks: + name: "Release benchmarks" + runs-on: [self-hosted, Linux, X64, Diff, Gen7] + env: + THREADS: 24 + + steps: + - name: Set up repository + uses: actions/checkout@v2 + with: + # Number of commits to fetch. `0` indicates all history for all + # branches and tags. (default: 1) + fetch-depth: 0 + + - name: Build release binaries + run: | + # Activate toolchain. + source /opt/toolchain-v2/activate + + # Initialize dependencies. + ./init + + # Build only memgraph release binarie. + cd build + cmake -DCMAKE_BUILD_TYPE=release .. + make -j$THREADS + + - name: Run macro benchmarks + run: | + cd tests/macro_benchmark + ./harness QuerySuite MemgraphRunner \ + --groups aggregation 1000_create unwind_create dense_expand match \ + --no-strict + + - name: Upload macro benchmark results + run: | + cd tools/bench-graph-client + virtualenv -p python3 ve3 + source ve3/bin/activate + pip install -r requirements.txt + ./main.py --benchmark-name "macro_benchmark" \ + --benchmark-results-path "../../tests/macro_benchmark/.harness_summary" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" + + - name: Run mgbench + run: | + cd tests/mgbench + ./benchmark.py --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/* + + - name: Upload mgbench results + run: | + cd tools/bench-graph-client + virtualenv -p python3 ve3 + source ve3/bin/activate + pip install -r requirements.txt + ./main.py --benchmark-name "mgbench" \ + --benchmark-results-path "../../tests/mgbench/benchmark_result.json" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" diff --git a/tools/bench-graph-client/main.py b/tools/bench-graph-client/main.py index d53901e6c..e90340ef0 100755 --- a/tools/bench-graph-client/main.py +++ b/tools/bench-graph-client/main.py @@ -9,6 +9,7 @@ import json import logging import os import requests +import subprocess from datetime import datetime from argparse import ArgumentParser @@ -20,7 +21,7 @@ GITHUB_REF = os.getenv("GITHUB_REF", "") BENCH_GRAPH_SERVER_ENDPOINT = os.getenv( "BENCH_GRAPH_SERVER_ENDPOINT", - "http://mgdeps-cache:9000") + "http://bench-graph-api:9001") log = logging.getLogger(__name__) @@ -38,6 +39,10 @@ def post_measurement(args): with open(args.benchmark_results_path, "r") as f: data = json.load(f) timestamp = datetime.now().timestamp() + branch = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + stdout=subprocess.PIPE, + check=True).stdout.decode("utf-8").strip() req = requests.post( f"{BENCH_GRAPH_SERVER_ENDPOINT}/measurements", json={ @@ -48,8 +53,8 @@ def post_measurement(args): "git_sha": GITHUB_SHA, "github_run_id": args.github_run_id, "github_run_number": args.github_run_number, - "results": data - }, + "results": data, + "git_branch": branch}, timeout=1) assert req.status_code == 200, \ f"Uploading {args.benchmark_name} data failed." From 1def0c9104719b85981eee90458d5efef94154bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= Date: Thu, 13 May 2021 12:05:36 +0200 Subject: [PATCH 33/63] Fix libs/setup.sh nocached download (#148) --- libs/setup.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libs/setup.sh b/libs/setup.sh index 08c073995..4278b7fb6 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -16,7 +16,11 @@ clone () { shift 3 # Clone if there's no repo. if [[ ! -d "$dir_name" ]]; then - git clone "$git_repo" "$dir_name" + echo "Cloning from $git_repo" + # If the clone fails, it doesn't make sense to continue with the function + # execution but the whole script should continue executing because we might + # clone the same repo from a different source. + git clone "$git_repo" "$dir_name" || return 1 fi pushd "$dir_name" # Just fetch new commits from remote repository. Don't merge/pull them in, so From 30413a7b4f21ee9bd69367cababfe8d6acf54a55 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 18 May 2021 13:44:29 +0200 Subject: [PATCH 34/63] Ignore carriage return at the end of line for a CSV file (#151) --- CHANGELOG.md | 1 + src/utils/csv_parsing.cpp | 5 ++++ tests/unit/utils_csv_parsing.cpp | 43 ++++++++++++++++++-------------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccc88cab7..6f3d9f764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * Fixed parsing of types for Python procedures for types nested in `mgp.List`. For example, parsing of `mgp.List[mgp.Map]` works now. +* Fixed reading CSV files that are using CRLF as the newline symbol. ## v1.4.0 diff --git a/src/utils/csv_parsing.cpp b/src/utils/csv_parsing.cpp index fb9dbd29e..d500b517e 100644 --- a/src/utils/csv_parsing.cpp +++ b/src/utils/csv_parsing.cpp @@ -84,6 +84,11 @@ Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) { std::string_view line_string_view = *maybe_line; + // remove '\r' from the end in case we have dos file format + if (line_string_view.back() == '\r') { + line_string_view.remove_suffix(1); + } + while (state != CsvParserState::DONE && !line_string_view.empty()) { const auto c = line_string_view[0]; diff --git a/tests/unit/utils_csv_parsing.cpp b/tests/unit/utils_csv_parsing.cpp index 34c7729b4..335e16769 100644 --- a/tests/unit/utils_csv_parsing.cpp +++ b/tests/unit/utils_csv_parsing.cpp @@ -4,7 +4,7 @@ #include "utils/string.hpp" -class CsvReaderTest : public ::testing::Test { +class CsvReaderTest : public ::testing::TestWithParam { protected: const std::filesystem::path csv_directory{std::filesystem::temp_directory_path() / "csv_testing"}; @@ -30,7 +30,9 @@ class CsvReaderTest : public ::testing::Test { namespace { class FileWriter { public: - explicit FileWriter(const std::filesystem::path path) { stream_.open(path); } + explicit FileWriter(const std::filesystem::path path, std::string newline = "\n") : newline_{std::move(newline)} { + stream_.open(path); + } FileWriter(const FileWriter &) = delete; FileWriter &operator=(const FileWriter &) = delete; @@ -45,7 +47,7 @@ class FileWriter { return 0; } - stream_ << line << std::endl; + stream_ << line << newline_; // including the newline character return line.size() + 1; @@ -53,6 +55,7 @@ class FileWriter { private: std::ofstream stream_; + std::string newline_; }; std::string CreateRow(const std::vector &columns, const std::string_view delim) { @@ -69,10 +72,10 @@ auto ToPmrColumns(const std::vector &columns) { } // namespace -TEST_F(CsvReaderTest, CommaDelimiter) { +TEST_P(CsvReaderTest, CommaDelimiter) { // create a file with a single valid row; const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); + auto writer = FileWriter(filepath, GetParam()); const std::vector columns{"A", "B", "C"}; writer.WriteLine(CreateRow(columns, ",")); @@ -93,9 +96,9 @@ TEST_F(CsvReaderTest, CommaDelimiter) { ASSERT_EQ(*parsed_row, ToPmrColumns(columns)); } -TEST_F(CsvReaderTest, SemicolonDelimiter) { +TEST_P(CsvReaderTest, SemicolonDelimiter) { const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); + auto writer = FileWriter(filepath, GetParam()); utils::MemoryResource *mem(utils::NewDeleteResource()); @@ -116,12 +119,12 @@ TEST_F(CsvReaderTest, SemicolonDelimiter) { ASSERT_EQ(*parsed_row, ToPmrColumns(columns)); } -TEST_F(CsvReaderTest, SkipBad) { +TEST_P(CsvReaderTest, SkipBad) { // create a file with invalid first two rows (containing a string with a // missing closing quote); // the last row is valid; const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); + auto writer = FileWriter(filepath, GetParam()); utils::MemoryResource *mem(utils::NewDeleteResource()); @@ -161,11 +164,11 @@ TEST_F(CsvReaderTest, SkipBad) { } } -TEST_F(CsvReaderTest, AllRowsValid) { +TEST_P(CsvReaderTest, AllRowsValid) { // create a file with all rows valid; // parser should return 'std::nullopt' const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); + auto writer = FileWriter(filepath, GetParam()); utils::MemoryResource *mem(utils::NewDeleteResource()); @@ -190,11 +193,11 @@ TEST_F(CsvReaderTest, AllRowsValid) { } } -TEST_F(CsvReaderTest, SkipAllRows) { +TEST_P(CsvReaderTest, SkipAllRows) { // create a file with all rows invalid (containing a string with a missing closing quote); // parser should return 'std::nullopt' const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); + auto writer = FileWriter(filepath, GetParam()); utils::MemoryResource *mem(utils::NewDeleteResource()); @@ -217,9 +220,9 @@ TEST_F(CsvReaderTest, SkipAllRows) { ASSERT_EQ(parsed_row, std::nullopt); } -TEST_F(CsvReaderTest, WithHeader) { +TEST_P(CsvReaderTest, WithHeader) { const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); + auto writer = FileWriter(filepath, GetParam()); utils::MemoryResource *mem(utils::NewDeleteResource()); @@ -249,12 +252,12 @@ TEST_F(CsvReaderTest, WithHeader) { } } -TEST_F(CsvReaderTest, MultilineQuotedString) { +TEST_P(CsvReaderTest, MultilineQuotedString) { // create a file with first row valid and the second row containing a quoted // string spanning two lines; // parser should return two valid rows const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); + auto writer = FileWriter(filepath, GetParam()); utils::MemoryResource *mem(utils::NewDeleteResource()); @@ -284,11 +287,11 @@ TEST_F(CsvReaderTest, MultilineQuotedString) { ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline)); } -TEST_F(CsvReaderTest, EmptyColumns) { +TEST_P(CsvReaderTest, EmptyColumns) { // create a file with all rows valid; // parser should return 'std::nullopt' const auto filepath = csv_directory / "bla.csv"; - auto writer = FileWriter(filepath); + auto writer = FileWriter(filepath, GetParam()); utils::MemoryResource *mem(utils::NewDeleteResource()); @@ -315,3 +318,5 @@ TEST_F(CsvReaderTest, EmptyColumns) { ASSERT_EQ(*parsed_row, pmr_expected_row); } } + +INSTANTIATE_TEST_CASE_P(NewlineParameterizedTest, CsvReaderTest, ::testing::Values("\n", "\r\n")); From 999b3ef79ff87fdc614532c3996c26bfc6e76b64 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 18 May 2021 18:42:19 +0200 Subject: [PATCH 35/63] Fix memory tracking issues (#150) --- CHANGELOG.md | 2 + libs/setup.sh | 2 +- src/query/interpreter.cpp | 25 +++-- src/query/interpreter.hpp | 6 +- src/utils/new_delete.cpp | 159 +++++++++++++++++++++++++--- tests/e2e/memory/memory_control.cpp | 14 --- tests/e2e/memory/workloads.yaml | 2 +- 7 files changed, 163 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f3d9f764..850e34513 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ * Fixed parsing of types for Python procedures for types nested in `mgp.List`. For example, parsing of `mgp.List[mgp.Map]` works now. +* Fixed memory tracking issues. Some of the allocation and deallocation weren't + tracked during the query execution. * Fixed reading CSV files that are using CRLF as the newline symbol. ## v1.4.0 diff --git a/libs/setup.sh b/libs/setup.sh index 4278b7fb6..dc9bb2bd6 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -230,5 +230,5 @@ pushd jemalloc # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. -./autogen.sh --with-malloc-conf="percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000" +./autogen.sh --with-malloc-conf="percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" popd diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 4bd606cdc..91a070507 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -604,8 +604,8 @@ struct PullPlanVector { struct PullPlan { explicit PullPlan(std::shared_ptr plan, const Parameters ¶meters, bool is_profile_query, - DbAccessor *dba, InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory, std::optional memory_limit = {}); + DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, + std::optional memory_limit = {}); std::optional Pull(AnyStream *stream, std::optional n, const std::vector &output_symbols, std::map *summary); @@ -632,8 +632,8 @@ struct PullPlan { }; PullPlan::PullPlan(const std::shared_ptr plan, const Parameters ¶meters, const bool is_profile_query, - DbAccessor *dba, InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory, const std::optional memory_limit) + DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, + const std::optional memory_limit) : plan_(plan), cursor_(plan->plan().MakeCursor(execution_memory)), frame_(plan->symbol_table().max_position(), execution_memory), @@ -835,7 +835,7 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory) { + utils::MemoryResource *execution_memory) { auto *cypher_query = utils::Downcast(parsed_query.query); Frame frame(0); @@ -886,7 +886,7 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory) { + utils::MemoryResource *execution_memory) { const std::string kExplainQueryStart = "explain "; MG_ASSERT(utils::StartsWith(utils::ToLowerCase(parsed_query.stripped_query.query()), kExplainQueryStart), "Expected stripped query to start with '{}'", kExplainQueryStart); @@ -932,7 +932,7 @@ PreparedQuery PrepareExplainQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, - DbAccessor *dba, utils::MonotonicBufferResource *execution_memory) { + DbAccessor *dba, utils::MemoryResource *execution_memory) { const std::string kProfileQueryStart = "profile "; MG_ASSERT(utils::StartsWith(utils::ToLowerCase(parsed_query.stripped_query.query()), kProfileQueryStart), @@ -1015,7 +1015,7 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra } PreparedQuery PrepareDumpQuery(ParsedQuery parsed_query, std::map *summary, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory) { + utils::MemoryResource *execution_memory) { return PreparedQuery{{"QUERY"}, std::move(parsed_query.required_privileges), [pull_plan = std::make_shared(dba)]( @@ -1030,7 +1030,7 @@ PreparedQuery PrepareDumpQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory) { + utils::MemoryResource *execution_memory) { if (in_explicit_transaction) { throw IndexInMulticommandTxException(); } @@ -1099,7 +1099,7 @@ PreparedQuery PrepareIndexQuery(ParsedQuery parsed_query, bool in_explicit_trans PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, InterpreterContext *interpreter_context, - DbAccessor *dba, utils::MonotonicBufferResource *execution_memory) { + DbAccessor *dba, utils::MemoryResource *execution_memory) { if (in_explicit_transaction) { throw UserModificationInMulticommandTxException(); } @@ -1212,7 +1212,7 @@ PreparedQuery PrepareFreeMemoryQuery(ParsedQuery parsed_query, const bool in_exp PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, InterpreterContext *interpreter_context, - storage::Storage *db, utils::MonotonicBufferResource *execution_memory) { + storage::Storage *db, utils::MemoryResource *execution_memory) { if (in_explicit_transaction) { throw InfoInMulticommandTxException(); } @@ -1300,8 +1300,7 @@ PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transa PreparedQuery PrepareConstraintQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, - InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory) { + InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory) { if (in_explicit_transaction) { throw ConstraintInMulticommandTxException(); } diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 3a8ff61be..171691e03 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -317,7 +317,9 @@ class Interpreter final { private: struct QueryExecution { std::optional prepared_query; - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + utils::MonotonicBufferResource execution_monotonic_memory{kExecutionMemoryBlockSize}; + utils::ResourceWithOutOfMemoryException execution_memory{&execution_monotonic_memory}; + std::map summary; explicit QueryExecution() = default; @@ -331,7 +333,7 @@ class Interpreter final { // destroy the prepared query which is using that instance // of execution memory. prepared_query.reset(); - execution_memory.Release(); + execution_monotonic_memory.Release(); } }; diff --git a/src/utils/new_delete.cpp b/src/utils/new_delete.cpp index a82a6a524..1c6aeba5e 100644 --- a/src/utils/new_delete.cpp +++ b/src/utils/new_delete.cpp @@ -1,3 +1,4 @@ +#include #include #if USE_JEMALLOC @@ -10,7 +11,7 @@ #include "utils/memory_tracker.hpp" namespace { -void *newImpl(std::size_t size) { +void *newImpl(const std::size_t size) { auto *ptr = malloc(size); if (LIKELY(ptr != nullptr)) { return ptr; @@ -19,11 +20,26 @@ void *newImpl(std::size_t size) { throw std::bad_alloc{}; } -void *newNoExcept(const std::size_t size) noexcept { return malloc(size); } +void *newImpl(const std::size_t size, const std::align_val_t align) { + auto *ptr = aligned_alloc(static_cast(align), size); + if (LIKELY(ptr != nullptr)) { + return ptr; + } -void deleteImpl(void *ptr) noexcept { free(ptr); } + throw std::bad_alloc{}; +} + +void *newNoExcept(const std::size_t size) noexcept { return malloc(size); } +void *newNoExcept(const std::size_t size, const std::align_val_t align) noexcept { + return aligned_alloc(size, static_cast(align)); +} #if USE_JEMALLOC +void deleteImpl(void *ptr) noexcept { dallocx(ptr, 0); } + +void deleteImpl(void *ptr, const std::align_val_t align) noexcept { + dallocx(ptr, MALLOCX_ALIGN(align)); // NOLINT(hicpp-signed-bitwise) +} void deleteSized(void *ptr, const std::size_t size) noexcept { if (UNLIKELY(ptr == nullptr)) { @@ -33,24 +49,43 @@ void deleteSized(void *ptr, const std::size_t size) noexcept { sdallocx(ptr, size, 0); } +void deleteSized(void *ptr, const std::size_t size, const std::align_val_t align) noexcept { + if (UNLIKELY(ptr == nullptr)) { + return; + } + + sdallocx(ptr, size, MALLOCX_ALIGN(align)); // NOLINT(hicpp-signed-bitwise) +} + #else +void deleteImpl(void *ptr) noexcept { free(ptr); } + +void deleteImpl(void *ptr, const std::align_val_t /*unused*/) noexcept { free(ptr); } void deleteSized(void *ptr, const std::size_t /*unused*/) noexcept { free(ptr); } +void deleteSized(void *ptr, const std::size_t /*unused*/, const std::align_val_t /*unused*/) noexcept { free(ptr); } #endif -void TrackMemory(const size_t size) { - size_t actual_size = size; - +void TrackMemory(std::size_t size) { #if USE_JEMALLOC if (LIKELY(size != 0)) { - actual_size = nallocx(size, 0); + size = nallocx(size, 0); } #endif - utils::total_memory_tracker.Alloc(actual_size); + utils::total_memory_tracker.Alloc(size); } -bool TrackMemoryNoExcept(const size_t size) { +void TrackMemory(std::size_t size, const std::align_val_t align) { +#if USE_JEMALLOC + if (LIKELY(size != 0)) { + size = nallocx(size, MALLOCX_ALIGN(align)); // NOLINT(hicpp-signed-bitwise) + } +#endif + utils::total_memory_tracker.Alloc(size); +} + +bool TrackMemoryNoExcept(const std::size_t size) { try { TrackMemory(size); } catch (...) { @@ -60,7 +95,17 @@ bool TrackMemoryNoExcept(const size_t size) { return true; } -void UntrackMemory([[maybe_unused]] void *ptr, [[maybe_unused]] size_t size = 0) noexcept { +bool TrackMemoryNoExcept(const std::size_t size, const std::align_val_t align) { + try { + TrackMemory(size, align); + } catch (...) { + return false; + } + + return true; +} + +void UntrackMemory([[maybe_unused]] void *ptr, [[maybe_unused]] std::size_t size = 0) noexcept { try { #if USE_JEMALLOC if (LIKELY(ptr != nullptr)) { @@ -78,32 +123,74 @@ void UntrackMemory([[maybe_unused]] void *ptr, [[maybe_unused]] size_t size = 0) } } +void UntrackMemory(void *ptr, const std::align_val_t align, [[maybe_unused]] std::size_t size = 0) noexcept { + try { +#if USE_JEMALLOC + if (LIKELY(ptr != nullptr)) { + utils::total_memory_tracker.Free(sallocx(ptr, MALLOCX_ALIGN(align))); // NOLINT(hicpp-signed-bitwise) + } +#else + if (size) { + utils::total_memory_tracker.Free(size); + } else { + // Innaccurate because malloc_usable_size() result is greater or equal to allocated size. + utils::total_memory_tracker.Free(malloc_usable_size(ptr)); + } +#endif + } catch (...) { + } +} + } // namespace -void *operator new(std::size_t size) { +void *operator new(const std::size_t size) { TrackMemory(size); return newImpl(size); } -void *operator new[](std::size_t size) { +void *operator new[](const std::size_t size) { TrackMemory(size); return newImpl(size); } -void *operator new(std::size_t size, const std::nothrow_t & /*unused*/) noexcept { +void *operator new(const std::size_t size, const std::align_val_t align) { + TrackMemory(size, align); + return newImpl(size, align); +} + +void *operator new[](const std::size_t size, const std::align_val_t align) { + TrackMemory(size, align); + return newImpl(size, align); +} + +void *operator new(const std::size_t size, const std::nothrow_t & /*unused*/) noexcept { if (LIKELY(TrackMemoryNoExcept(size))) { return newNoExcept(size); } return nullptr; } -void *operator new[](std::size_t size, const std::nothrow_t & /*unused*/) noexcept { +void *operator new[](const std::size_t size, const std::nothrow_t & /*unused*/) noexcept { if (LIKELY(TrackMemoryNoExcept(size))) { return newNoExcept(size); } return nullptr; } +void *operator new(const std::size_t size, const std::align_val_t align, const std::nothrow_t & /*unused*/) noexcept { + if (LIKELY(TrackMemoryNoExcept(size, align))) { + return newNoExcept(size, align); + } + return nullptr; +} + +void *operator new[](const std::size_t size, const std::align_val_t align, const std::nothrow_t & /*unused*/) noexcept { + if (LIKELY(TrackMemoryNoExcept(size, align))) { + return newNoExcept(size, align); + } + return nullptr; +} + void operator delete(void *ptr) noexcept { UntrackMemory(ptr); deleteImpl(ptr); @@ -114,12 +201,52 @@ void operator delete[](void *ptr) noexcept { deleteImpl(ptr); } -void operator delete(void *ptr, std::size_t size) noexcept { +void operator delete(void *ptr, const std::align_val_t align) noexcept { + UntrackMemory(ptr, align); + deleteImpl(ptr, align); +} + +void operator delete[](void *ptr, const std::align_val_t align) noexcept { + UntrackMemory(ptr, align); + deleteImpl(ptr, align); +} + +void operator delete(void *ptr, const std::size_t size) noexcept { UntrackMemory(ptr, size); deleteSized(ptr, size); } -void operator delete[](void *ptr, std::size_t size) noexcept { +void operator delete[](void *ptr, const std::size_t size) noexcept { UntrackMemory(ptr, size); deleteSized(ptr, size); } + +void operator delete(void *ptr, const std::size_t size, const std::align_val_t align) noexcept { + UntrackMemory(ptr, align, size); + deleteSized(ptr, size, align); +} + +void operator delete[](void *ptr, const std::size_t size, const std::align_val_t align) noexcept { + UntrackMemory(ptr, align, size); + deleteSized(ptr, size, align); +} + +void operator delete(void *ptr, const std::nothrow_t & /*unused*/) noexcept { + UntrackMemory(ptr); + deleteImpl(ptr); +} + +void operator delete[](void *ptr, const std::nothrow_t & /*unused*/) noexcept { + UntrackMemory(ptr); + deleteImpl(ptr); +} + +void operator delete(void *ptr, const std::align_val_t align, const std::nothrow_t & /*unused*/) noexcept { + UntrackMemory(ptr, align); + deleteImpl(ptr, align); +} + +void operator delete[](void *ptr, const std::align_val_t align, const std::nothrow_t & /*unused*/) noexcept { + UntrackMemory(ptr, align); + deleteImpl(ptr, align); +} diff --git a/tests/e2e/memory/memory_control.cpp b/tests/e2e/memory/memory_control.cpp index 537fba5e7..a6f5ea96f 100644 --- a/tests/e2e/memory/memory_control.cpp +++ b/tests/e2e/memory/memory_control.cpp @@ -38,19 +38,5 @@ int main(int argc, char **argv) { } spdlog::info("Memgraph is out of memory"); - - spdlog::info("Cleaning up unused memory"); - client->Execute("MATCH (n) DETACH DELETE n;"); - client->DiscardAll(); - client->Execute("FREE MEMORY;"); - client->DiscardAll(); - - // now it should succeed - spdlog::info("Retrying the query with the memory cleaned up"); - client->Execute(create_query); - if (!client->FetchOne()) { - LOG_FATAL("Memgraph is still out of memory"); - } - return 0; } diff --git a/tests/e2e/memory/workloads.yaml b/tests/e2e/memory/workloads.yaml index 74dc59e1a..adec01260 100644 --- a/tests/e2e/memory/workloads.yaml +++ b/tests/e2e/memory/workloads.yaml @@ -2,7 +2,7 @@ bolt_port: &bolt_port "7687" template_cluster: &template_cluster cluster: main: - args: ["--bolt-port", *bolt_port, "--memory-limit=500", "--storage-gc-cycle-sec=180", "--log-level=TRACE"] + args: ["--bolt-port", *bolt_port, "--memory-limit=1000", "--storage-gc-cycle-sec=180", "--log-level=TRACE"] log_file: "memory-e2e.log" setup_queries: [] validation_queries: [] From c4555f5448bb7ee7855a2768dc7e9965a8d8cb1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mislav=20Vuleti=C4=87?= Date: Thu, 20 May 2021 16:04:50 +0200 Subject: [PATCH 36/63] Fix Arch package help message typo (#119) --- release/arch-pkg/package_arch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release/arch-pkg/package_arch b/release/arch-pkg/package_arch index e5b24d867..7e9b4e6c6 100755 --- a/release/arch-pkg/package_arch +++ b/release/arch-pkg/package_arch @@ -1,7 +1,7 @@ #!/bin/bash -e function print_help () { - echo "Usage: $0 MEMGPRAH_PACKAGE.tar.gz" + echo "Usage: $0 MEMGRAPH_PACKAGE.tar.gz" echo "Optional arguments:" echo -e " -h|--help Print help." } From 8a996703016a984fbe5f4b6c92e69585438211a8 Mon Sep 17 00:00:00 2001 From: Kostas Kyrimis Date: Fri, 21 May 2021 13:00:24 +0300 Subject: [PATCH 37/63] Add init toolchain-v3 script and support for zsh (#149) --- environment/toolchain/v3.sh | 546 ++++++++++++++++++++++++++++++++++++ 1 file changed, 546 insertions(+) create mode 100755 environment/toolchain/v3.sh diff --git a/environment/toolchain/v3.sh b/environment/toolchain/v3.sh new file mode 100755 index 000000000..ebcb7dea2 --- /dev/null +++ b/environment/toolchain/v3.sh @@ -0,0 +1,546 @@ +#!/bin/bash -e + +# helpers +pushd () { command pushd "$@" > /dev/null; } +popd () { command popd "$@" > /dev/null; } +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +CPUS=$( grep -c processor < /proc/cpuinfo ) +cd "$DIR" + +source "$DIR/../util.sh" +DISTRO="$(operating_system)" + +# toolchain version +TOOLCHAIN_VERSION=3 + +# package versions used +GCC_VERSION=10.2.0 +BINUTILS_VERSION=2.35.1 +case "$DISTRO" in + centos-7) # because GDB >= 9 does NOT compile with readline6. + GDB_VERSION=8.3 + ;; + *) + GDB_VERSION=10.1 + ;; +esac +CMAKE_VERSION=3.18.4 +CPPCHECK_VERSION=2.2 +LLVM_VERSION=11.0.0 +SWIG_VERSION=4.0.2 # used only for LLVM compilation + +# Check for the dependencies. +echo "ALL BUILD PACKAGES: $($DIR/../os/$DISTRO.sh list TOOLCHAIN_BUILD_DEPS)" +$DIR/../os/$DISTRO.sh check TOOLCHAIN_BUILD_DEPS +echo "ALL RUN PACKAGES: $($DIR/../os/$DISTRO.sh list TOOLCHAIN_RUN_DEPS)" +$DIR/../os/$DISTRO.sh check TOOLCHAIN_RUN_DEPS + +# check installation directory +NAME=toolchain-v$TOOLCHAIN_VERSION +PREFIX=/opt/$NAME +mkdir -p $PREFIX >/dev/null 2>/dev/null || true +if [ ! -d $PREFIX ] || [ ! -w $PREFIX ]; then + echo "Please make sure that the directory '$PREFIX' exists and is writable by the current user!" + echo + echo "If unsure, execute these commands as root:" + echo " mkdir $PREFIX && chown $USER:$USER $PREFIX" + echo + echo "Press when you have created the directory and granted permissions." + # wait for the directory to be created + while true; do + read + if [ ! -d $PREFIX ] || [ ! -w $PREFIX ]; then + echo + echo "You can't continue before you have created the directory and granted permissions!" + echo + echo "Press when you have created the directory and granted permissions." + else + break + fi + done +fi + +# create archives directory +mkdir -p archives + +# download all archives +pushd archives +if [ ! -f gcc-$GCC_VERSION.tar.gz ]; then + wget https://ftp.gnu.org/gnu/gcc/gcc-$GCC_VERSION/gcc-$GCC_VERSION.tar.gz +fi +if [ ! -f binutils-$BINUTILS_VERSION.tar.gz ]; then + wget https://ftp.gnu.org/gnu/binutils/binutils-$BINUTILS_VERSION.tar.gz +fi +if [ ! -f gdb-$GDB_VERSION.tar.gz ]; then + wget https://ftp.gnu.org/gnu/gdb/gdb-$GDB_VERSION.tar.gz +fi +if [ ! -f cmake-$CMAKE_VERSION.tar.gz ]; then + wget https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION.tar.gz +fi +if [ ! -f swig-$SWIG_VERSION.tar.gz ]; then + wget https://github.com/swig/swig/archive/rel-$SWIG_VERSION.tar.gz -O swig-$SWIG_VERSION.tar.gz +fi +if [ ! -f cppcheck-$CPPCHECK_VERSION.tar.gz ]; then + wget https://github.com/danmar/cppcheck/archive/$CPPCHECK_VERSION.tar.gz -O cppcheck-$CPPCHECK_VERSION.tar.gz +fi +if [ ! -f llvm-$LLVM_VERSION.src.tar.xz ]; then + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/llvm-$LLVM_VERSION.src.tar.xz + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/clang-$LLVM_VERSION.src.tar.xz + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/lld-$LLVM_VERSION.src.tar.xz + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/clang-tools-extra-$LLVM_VERSION.src.tar.xz + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/compiler-rt-$LLVM_VERSION.src.tar.xz +fi +if [ ! -f pahole-gdb-master.zip ]; then + wget https://github.com/PhilArmstrong/pahole-gdb/archive/master.zip -O pahole-gdb-master.zip +fi + +# verify all archives +# NOTE: Verification can fail if the archive is signed by another developer. I +# haven't added commands to download all developer GnuPG keys because the +# download is very slow. If the verification fails for you, figure out who has +# signed the archive and download their public key instead. +GPG="gpg --homedir .gnupg" +KEYSERVER="hkp://keyserver.ubuntu.com" +mkdir -p .gnupg +chmod 700 .gnupg +# verify gcc +if [ ! -f gcc-$GCC_VERSION.tar.gz.sig ]; then + wget https://ftp.gnu.org/gnu/gcc/gcc-$GCC_VERSION/gcc-$GCC_VERSION.tar.gz.sig +fi +# list of valid gcc gnupg keys: https://gcc.gnu.org/mirrors.html +$GPG --keyserver $KEYSERVER --recv-keys 0x3AB00996FC26A641 +$GPG --verify gcc-$GCC_VERSION.tar.gz.sig gcc-$GCC_VERSION.tar.gz +# verify binutils +if [ ! -f binutils-$BINUTILS_VERSION.tar.gz.sig ]; then + wget https://ftp.gnu.org/gnu/binutils/binutils-$BINUTILS_VERSION.tar.gz.sig +fi +$GPG --keyserver $KEYSERVER --recv-keys 0xDD9E3C4F +$GPG --verify binutils-$BINUTILS_VERSION.tar.gz.sig binutils-$BINUTILS_VERSION.tar.gz +# verify gdb +if [ ! -f gdb-$GDB_VERSION.tar.gz.sig ]; then + wget https://ftp.gnu.org/gnu/gdb/gdb-$GDB_VERSION.tar.gz.sig +fi +$GPG --keyserver $KEYSERVER --recv-keys 0xFF325CF3 +$GPG --verify gdb-$GDB_VERSION.tar.gz.sig gdb-$GDB_VERSION.tar.gz +# verify cmake +if [ ! -f cmake-$CMAKE_VERSION-SHA-256.txt ] || [ ! -f cmake-$CMAKE_VERSION-SHA-256.txt.asc ]; then + wget https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-SHA-256.txt + wget https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-SHA-256.txt.asc + # Because CentOS 7 doesn't have the `--ignore-missing` flag for `sha256sum` + # we filter out the missing files from the sums here manually. + cat cmake-$CMAKE_VERSION-SHA-256.txt | grep "cmake-$CMAKE_VERSION.tar.gz" > cmake-$CMAKE_VERSION-SHA-256-filtered.txt +fi +$GPG --keyserver $KEYSERVER --recv-keys 0xC6C265324BBEBDC350B513D02D2CEF1034921684 +sha256sum -c cmake-$CMAKE_VERSION-SHA-256-filtered.txt +$GPG --verify cmake-$CMAKE_VERSION-SHA-256.txt.asc cmake-$CMAKE_VERSION-SHA-256.txt +# verify llvm, cfe, lld, clang-tools-extra +if [ ! -f llvm-$LLVM_VERSION.src.tar.xz.sig ]; then + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/llvm-$LLVM_VERSION.src.tar.xz.sig + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/clang-$LLVM_VERSION.src.tar.xz.sig + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/lld-$LLVM_VERSION.src.tar.xz.sig + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/clang-tools-extra-$LLVM_VERSION.src.tar.xz.sig + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/compiler-rt-$LLVM_VERSION.src.tar.xz.sig +fi +# list of valid llvm gnupg keys: https://releases.llvm.org/download.html +$GPG --keyserver $KEYSERVER --recv-keys 0x345AD05D +$GPG --verify llvm-$LLVM_VERSION.src.tar.xz.sig llvm-$LLVM_VERSION.src.tar.xz +$GPG --verify clang-$LLVM_VERSION.src.tar.xz.sig clang-$LLVM_VERSION.src.tar.xz +$GPG --verify lld-$LLVM_VERSION.src.tar.xz.sig lld-$LLVM_VERSION.src.tar.xz +$GPG --verify clang-tools-extra-$LLVM_VERSION.src.tar.xz.sig clang-tools-extra-$LLVM_VERSION.src.tar.xz +$GPG --verify compiler-rt-$LLVM_VERSION.src.tar.xz.sig compiler-rt-$LLVM_VERSION.src.tar.xz +popd + +# create build directory +mkdir -p build +pushd build + +# compile gcc +if [ ! -f $PREFIX/bin/gcc ]; then + if [ -d gcc-$GCC_VERSION ]; then + rm -rf gcc-$GCC_VERSION + fi + tar -xvf ../archives/gcc-$GCC_VERSION.tar.gz + pushd gcc-$GCC_VERSION + ./contrib/download_prerequisites + mkdir build && pushd build + # influenced by: https://buildd.debian.org/status/fetch.php?pkg=gcc-8&arch=amd64&ver=8.3.0-6&stamp=1554588545 + ../configure -v \ + --build=x86_64-linux-gnu \ + --host=x86_64-linux-gnu \ + --target=x86_64-linux-gnu \ + --prefix=$PREFIX \ + --disable-multilib \ + --with-system-zlib \ + --enable-checking=release \ + --enable-languages=c,c++,fortran \ + --enable-gold=yes \ + --enable-ld=yes \ + --enable-lto \ + --enable-bootstrap \ + --disable-vtable-verify \ + --disable-werror \ + --without-included-gettext \ + --enable-threads=posix \ + --enable-nls \ + --enable-clocale=gnu \ + --enable-libstdcxx-debug \ + --enable-libstdcxx-time=yes \ + --enable-gnu-unique-object \ + --enable-libmpx \ + --enable-plugin \ + --enable-default-pie \ + --with-target-system-zlib \ + --with-tune=generic \ + --without-cuda-driver + #--program-suffix=$( printf "$GCC_VERSION" | cut -d '.' -f 1,2 ) \ + make -j$CPUS + # make -k check # run test suite + make install + popd && popd +fi + +# activate toolchain +export PATH=$PREFIX/bin:$PATH +export LD_LIBRARY_PATH=$PREFIX/lib64 + +# compile binutils +if [ ! -f $PREFIX/bin/ld.gold ]; then + if [ -d binutils-$BINUTILS_VERSION ]; then + rm -rf binutils-$BINUTILS_VERSION + fi + tar -xvf ../archives/binutils-$BINUTILS_VERSION.tar.gz + pushd binutils-$BINUTILS_VERSION + mkdir build && pushd build + # influenced by: https://buildd.debian.org/status/fetch.php?pkg=binutils&arch=amd64&ver=2.32-7&stamp=1553247092 + env \ + CC=gcc \ + CXX=g++ \ + CFLAGS="-g -O2" \ + CXXFLAGS="-g -O2" \ + LDFLAGS="" \ + ../configure \ + --build=x86_64-linux-gnu \ + --host=x86_64-linux-gnu \ + --prefix=$PREFIX \ + --enable-ld=default \ + --enable-gold \ + --enable-lto \ + --enable-plugins \ + --enable-shared \ + --enable-threads \ + --with-system-zlib \ + --enable-deterministic-archives \ + --disable-compressed-debug-sections \ + --enable-new-dtags \ + --disable-werror + make -j$CPUS + # make -k check # run test suite + make install + popd && popd +fi + +# compile gdb +if [ ! -f $PREFIX/bin/gdb ]; then + if [ -d gdb-$GDB_VERSION ]; then + rm -rf gdb-$GDB_VERSION + fi + tar -xvf ../archives/gdb-$GDB_VERSION.tar.gz + pushd gdb-$GDB_VERSION + mkdir build && pushd build + # https://buildd.debian.org/status/fetch.php?pkg=gdb&arch=amd64&ver=8.2.1-2&stamp=1550831554&raw=0 + env \ + CC=gcc \ + CXX=g++ \ + CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security" \ + CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security" \ + CPPFLAGS="-Wdate-time -D_FORTIFY_SOURCE=2 -fPIC" \ + LDFLAGS="-Wl,-z,relro" \ + PYTHON="" \ + ../configure \ + --build=x86_64-linux-gnu \ + --host=x86_64-linux-gnu \ + --prefix=$PREFIX \ + --disable-maintainer-mode \ + --disable-dependency-tracking \ + --disable-silent-rules \ + --disable-gdbtk \ + --disable-shared \ + --without-guile \ + --with-system-gdbinit=$PREFIX/etc/gdb/gdbinit \ + --with-system-readline \ + --with-expat \ + --with-system-zlib \ + --with-lzma \ + --with-babeltrace \ + --with-intel-pt \ + --enable-tui \ + --with-python=python3 + make -j$CPUS + make install + popd && popd +fi + +# install pahole +if [ ! -d $PREFIX/share/pahole-gdb ]; then + unzip ../archives/pahole-gdb-master.zip + mv pahole-gdb-master $PREFIX/share/pahole-gdb +fi + +# setup system gdbinit +if [ ! -f $PREFIX/etc/gdb/gdbinit ]; then + mkdir -p $PREFIX/etc/gdb + cat >$PREFIX/etc/gdb/gdbinit <> build-flags.cmake + echo 'set(CMAKE_USE_RELATIVE_PATHS ON CACHE BOOL "Use relative paths" FORCE)' >> build-flags.cmake + echo 'set(CMAKE_C_FLAGS "-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2" CACHE STRING "C flags" FORCE)' >> build-flags.cmake + echo 'set(CMAKE_CXX_FLAGS "-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2" CACHE STRING "C++ flags" FORCE)' >> build-flags.cmake + echo 'set(CMAKE_SKIP_BOOTSTRAP_TEST ON CACHE BOOL "Skip BootstrapTest" FORCE)' >> build-flags.cmake + echo 'set(BUILD_CursesDialog ON CACHE BOOL "Build curses GUI" FORCE)' >> build-flags.cmake + mkdir build && pushd build + ../bootstrap \ + --prefix=$PREFIX \ + --init=../build-flags.cmake \ + --parallel=$CPUS \ + --system-curl + make -j$CPUS + # make test # run test suite + make install + popd && popd +fi + +# compile cppcheck +if [ ! -f $PREFIX/bin/cppcheck ]; then + if [ -d cppcheck-$CPPCHECK_VERSION ]; then + rm -rf cppcheck-$CPPCHECK_VERSION + fi + tar -xvf ../archives/cppcheck-$CPPCHECK_VERSION.tar.gz + pushd cppcheck-$CPPCHECK_VERSION + env \ + CC=gcc \ + CXX=g++ \ + PREFIX=$PREFIX \ + FILESDIR=$PREFIX/share/cppcheck \ + CFGDIR=$PREFIX/share/cppcheck/cfg \ + make -j$CPUS + env \ + CC=gcc \ + CXX=g++ \ + PREFIX=$PREFIX \ + FILESDIR=$PREFIX/share/cppcheck \ + CFGDIR=$PREFIX/share/cppcheck/cfg \ + make install + popd +fi + +# compile swig +if [ ! -d swig-$SWIG_VERSION/install ]; then + if [ -d swig-$SWIG_VERSION ]; then + rm -rf swig-$SWIG_VERSION + fi + tar -xvf ../archives/swig-$SWIG_VERSION.tar.gz + mv swig-rel-$SWIG_VERSION swig-$SWIG_VERSION + pushd swig-$SWIG_VERSION + ./autogen.sh + mkdir build && pushd build + ../configure --prefix=$DIR/build/swig-$SWIG_VERSION/install + make -j$CPUS + make install + popd && popd +fi + +# compile llvm +if [ ! -f $PREFIX/bin/clang ]; then + if [ -d llvm-$LLVM_VERSION ]; then + rm -rf llvm-$LLVM_VERSION + fi + tar -xvf ../archives/llvm-$LLVM_VERSION.src.tar.xz + mv llvm-$LLVM_VERSION.src llvm-$LLVM_VERSION + tar -xvf ../archives/clang-$LLVM_VERSION.src.tar.xz + mv clang-$LLVM_VERSION.src llvm-$LLVM_VERSION/tools/clang + tar -xvf ../archives/lld-$LLVM_VERSION.src.tar.xz + mv lld-$LLVM_VERSION.src/ llvm-$LLVM_VERSION/tools/lld + tar -xvf ../archives/clang-tools-extra-$LLVM_VERSION.src.tar.xz + mv clang-tools-extra-$LLVM_VERSION.src/ llvm-$LLVM_VERSION/tools/clang/tools/extra + tar -xvf ../archives/compiler-rt-$LLVM_VERSION.src.tar.xz + mv compiler-rt-$LLVM_VERSION.src/ llvm-$LLVM_VERSION/projects/compiler-rt + pushd llvm-$LLVM_VERSION + mkdir build && pushd build + # activate swig + export PATH=$DIR/build/swig-$SWIG_VERSION/install/bin:$PATH + # influenced by: https://buildd.debian.org/status/fetch.php?pkg=llvm-toolchain-7&arch=amd64&ver=1%3A7.0.1%7E%2Brc2-1%7Eexp1&stamp=1541506173&raw=0 + cmake .. \ + -DCMAKE_C_COMPILER=$PREFIX/bin/gcc \ + -DCMAKE_CXX_COMPILER=$PREFIX/bin/g++ \ + -DCMAKE_CXX_LINK_FLAGS="-L$PREFIX/lib64 -Wl,-rpath,$PREFIX/lib64" \ + -DCMAKE_INSTALL_PREFIX=$PREFIX \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="-O2 -DNDEBUG" \ + -DCMAKE_CXX_FLAGS=' -fuse-ld=gold -fPIC -Wno-unused-command-line-argument -Wno-unknown-warning-option' \ + -DCMAKE_C_FLAGS=' -fuse-ld=gold -fPIC -Wno-unused-command-line-argument -Wno-unknown-warning-option' \ + -DLLVM_LINK_LLVM_DYLIB=ON \ + -DLLVM_INSTALL_UTILS=ON \ + -DLLVM_VERSION_SUFFIX= \ + -DLLVM_BUILD_LLVM_DYLIB=ON \ + -DLLVM_ENABLE_RTTI=ON \ + -DLLVM_ENABLE_FFI=ON \ + -DLLVM_BINUTILS_INCDIR=$PREFIX/include/ \ + -DLLVM_USE_PERF=yes + make -j$CPUS + make -j$CPUS check-clang # run clang test suite + make -j$CPUS check-lld # run lld test suite + make install + popd && popd +fi + +# create README +if [ ! -f $PREFIX/README.md ]; then + cat >$PREFIX/README.md <$PREFIX/activate < Date: Wed, 26 May 2021 10:02:25 +0200 Subject: [PATCH 38/63] Fix permission for newer queries (#156) --- CHANGELOG.md | 2 + src/query/frontend/ast/ast.lcp | 1 + src/query/frontend/ast/ast_visitor.hpp | 2 +- .../frontend/ast/cypher_main_visitor.cpp | 4 ++ .../opencypher/grammar/MemgraphCypher.g4 | 20 +++++-- .../opencypher/grammar/MemgraphCypherLexer.g4 | 5 ++ .../frontend/semantic/required_privileges.cpp | 52 +++++++------------ .../frontend/stripped_lexer_constants.hpp | 18 +++---- tests/unit/cypher_main_visitor.cpp | 8 +++ tests/unit/query_required_privileges.cpp | 18 +++++++ 10 files changed, 83 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 850e34513..f2d23909c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ * Fixed memory tracking issues. Some of the allocation and deallocation weren't tracked during the query execution. * Fixed reading CSV files that are using CRLF as the newline symbol. +* Fixed permission issues for `LOAD CSV`, `FREE MEMORY`, `LOCK DATA DIRECTORY`, + and replication queries. ## v1.4.0 diff --git a/src/query/frontend/ast/ast.lcp b/src/query/frontend/ast/ast.lcp index b42e07a08..ee4fdcebd 100644 --- a/src/query/frontend/ast/ast.lcp +++ b/src/query/frontend/ast/ast.lcp @@ -2232,6 +2232,7 @@ const std::vector kPrivilegesAll = { AuthQuery::Privilege::AUTH, AuthQuery::Privilege::CONSTRAINT, AuthQuery::Privilege::DUMP, AuthQuery::Privilege::REPLICATION, + AuthQuery::Privilege::READ_FILE, AuthQuery::Privilege::LOCK_PATH, AuthQuery::Privilege::FREE_MEMORY}; cpp<# diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index 2d5373853..1539422e2 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -109,6 +109,6 @@ class ExpressionVisitor template class QueryVisitor : public ::utils::Visitor {}; + ConstraintQuery, DumpQuery, ReplicationQuery, LockPathQuery, FreeMemoryQuery> {}; } // namespace query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 7773ace33..ea0aa3ccd 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -767,6 +767,10 @@ antlrcpp::Any CypherMainVisitor::visitPrivilege(MemgraphCypher::PrivilegeContext if (ctx->AUTH()) return AuthQuery::Privilege::AUTH; if (ctx->CONSTRAINT()) return AuthQuery::Privilege::CONSTRAINT; if (ctx->DUMP()) return AuthQuery::Privilege::DUMP; + if (ctx->REPLICATION()) return AuthQuery::Privilege::REPLICATION; + if (ctx->LOCK_PATH()) return AuthQuery::Privilege::LOCK_PATH; + if (ctx->READ_FILE()) return AuthQuery::Privilege::READ_FILE; + if (ctx->FREE_MEMORY()) return AuthQuery::Privilege::FREE_MEMORY; LOG_FATAL("Should not get here - unknown privilege!"); } diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index 4d96ace10..8778baf01 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -117,7 +117,7 @@ delimiter : literal ; quote : literal ; -rowVar : variable ; +rowVar : variable ; userOrRoleName : symbolicName ; @@ -146,8 +146,22 @@ denyPrivilege : DENY ( ALL PRIVILEGES | privileges=privilegeList ) TO userOrRole revokePrivilege : REVOKE ( ALL PRIVILEGES | privileges=privilegeList ) FROM userOrRole=userOrRoleName ; -privilege : CREATE | DELETE | MATCH | MERGE | SET - | REMOVE | INDEX | STATS | AUTH | CONSTRAINT | DUMP ; +privilege : CREATE + | DELETE + | MATCH + | MERGE + | SET + | REMOVE + | INDEX + | STATS + | AUTH + | CONSTRAINT + | DUMP + | REPLICATION + | LOCK_PATH + | READ_FILE + | FREE_MEMORY + ; privilegeList : privilege ( ',' privilege )* ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 index fbfc5a2d2..9aeec8eec 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 @@ -10,6 +10,8 @@ lexer grammar MemgraphCypherLexer ; import CypherLexer ; +UNDERSCORE : '_' ; + ALTER : A L T E R ; ASYNC : A S Y N C ; AUTH : A U T H ; @@ -25,6 +27,7 @@ DROP : D R O P ; DUMP : D U M P ; FOR : F O R ; FREE : F R E E ; +FREE_MEMORY : F R E E UNDERSCORE M E M O R Y ; FROM : F R O M ; GRANT : G R A N T ; GRANTS : G R A N T S ; @@ -33,12 +36,14 @@ IDENTIFIED : I D E N T I F I E D ; IGNORE : I G N O R E ; LOAD : L O A D ; LOCK : L O C K ; +LOCK_PATH : L O C K UNDERSCORE P A T H ; MAIN : M A I N ; MODE : M O D E ; NO : N O ; PASSWORD : P A S S W O R D ; PORT : P O R T ; PRIVILEGES : P R I V I L E G E S ; +READ_FILE : R E A D UNDERSCORE F I L E ; REGISTER : R E G I S T E R ; REPLICA : R E P L I C A ; REPLICAS : R E P L I C A S ; diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index 4cd046bf5..4173c7fbc 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -51,74 +51,58 @@ class PrivilegeExtractor : public QueryVisitor, public HierarchicalTreeVis void Visit(LockPathQuery &lock_path_query) override { AddPrivilege(AuthQuery::Privilege::LOCK_PATH); } - void Visit(LoadCsv &load_csv) override { AddPrivilege(AuthQuery::Privilege::READ_FILE); } - void Visit(FreeMemoryQuery &free_memory_query) override { AddPrivilege(AuthQuery::Privilege::FREE_MEMORY); } - void Visit(ReplicationQuery &replication_query) override { - switch (replication_query.action_) { - case ReplicationQuery::Action::SET_REPLICATION_ROLE: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - case ReplicationQuery::Action::SHOW_REPLICATION_ROLE: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - case ReplicationQuery::Action::REGISTER_REPLICA: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - case ReplicationQuery::Action::DROP_REPLICA: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - case ReplicationQuery::Action::SHOW_REPLICAS: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - } - } + void Visit(ReplicationQuery &replication_query) override { AddPrivilege(AuthQuery::Privilege::REPLICATION); } - bool PreVisit(Create &) override { + bool PreVisit(Create & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::CREATE); return false; } - bool PreVisit(CallProcedure &) override { + bool PreVisit(CallProcedure & /*unused*/) override { // TODO: Corresponding privilege return false; } - bool PreVisit(Delete &) override { + bool PreVisit(Delete & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::DELETE); return false; } - bool PreVisit(Match &) override { + bool PreVisit(Match & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::MATCH); return false; } - bool PreVisit(Merge &) override { + bool PreVisit(Merge & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::MERGE); return false; } - bool PreVisit(SetProperty &) override { + bool PreVisit(SetProperty & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::SET); return false; } - bool PreVisit(SetProperties &) override { + bool PreVisit(SetProperties & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::SET); return false; } - bool PreVisit(SetLabels &) override { + bool PreVisit(SetLabels & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::SET); return false; } - bool PreVisit(RemoveProperty &) override { + bool PreVisit(RemoveProperty & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::REMOVE); return false; } - bool PreVisit(RemoveLabels &) override { + bool PreVisit(RemoveLabels & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::REMOVE); return false; } + bool PreVisit(LoadCsv & /*unused*/) override { + AddPrivilege(AuthQuery::Privilege::READ_FILE); + return false; + } - bool Visit(Identifier &) override { return true; } - bool Visit(PrimitiveLiteral &) override { return true; } - bool Visit(ParameterLookup &) override { return true; } + bool Visit(Identifier & /*unused*/) override { return true; } + bool Visit(PrimitiveLiteral & /*unused*/) override { return true; } + bool Visit(ParameterLookup & /*unused*/) override { return true; } private: void AddPrivilege(AuthQuery::Privilege privilege) { diff --git a/src/query/frontend/stripped_lexer_constants.hpp b/src/query/frontend/stripped_lexer_constants.hpp index a73b777a6..c08da89ad 100644 --- a/src/query/frontend/stripped_lexer_constants.hpp +++ b/src/query/frontend/stripped_lexer_constants.hpp @@ -79,15 +79,15 @@ class Trie { const int kBitsetSize = 65536; const trie::Trie kKeywords = { - "union", "all", "optional", "match", "unwind", "as", "merge", "on", "create", - "set", "detach", "delete", "remove", "with", "distinct", "return", "order", "by", - "skip", "limit", "ascending", "asc", "descending", "desc", "where", "or", "xor", - "and", "not", "in", "starts", "ends", "contains", "is", "null", "case", - "when", "then", "else", "end", "count", "filter", "extract", "any", "none", - "single", "true", "false", "reduce", "coalesce", "user", "password", "alter", "drop", - "show", "stats", "unique", "explain", "profile", "storage", "index", "info", "exists", - "assert", "constraint", "node", "key", "dump", "database", "call", "yield", "memory", - "mb", "kb", "unlimited", "free", "procedure", "query"}; + "union", "all", "optional", "match", "unwind", "as", "merge", "on", "create", + "set", "detach", "delete", "remove", "with", "distinct", "return", "order", "by", + "skip", "limit", "ascending", "asc", "descending", "desc", "where", "or", "xor", + "and", "not", "in", "starts", "ends", "contains", "is", "null", "case", + "when", "then", "else", "end", "count", "filter", "extract", "any", "none", + "single", "true", "false", "reduce", "coalesce", "user", "password", "alter", "drop", + "show", "stats", "unique", "explain", "profile", "storage", "index", "info", "exists", + "assert", "constraint", "node", "key", "dump", "database", "call", "yield", "memory", + "mb", "kb", "unlimited", "free", "procedure", "query", "free_memory", "read_file", "lock_path"}; // Unicode codepoints that are allowed at the start of the unescaped name. const std::bitset kUnescapedNameAllowedStarts( diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 294cdbf24..e734d1cab 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -2053,6 +2053,14 @@ TEST_P(CypherMainVisitorTest, GrantPrivilege) { {AuthQuery::Privilege::CONSTRAINT}); check_auth_query(&ast_generator, "GRANT DUMP TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, {AuthQuery::Privilege::DUMP}); + check_auth_query(&ast_generator, "GRANT REPLICATION TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::REPLICATION}); + check_auth_query(&ast_generator, "GRANT LOCK_PATH TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::LOCK_PATH}); + check_auth_query(&ast_generator, "GRANT READ_FILE TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::READ_FILE}); + check_auth_query(&ast_generator, "GRANT FREE_MEMORY TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::FREE_MEMORY}); } TEST_P(CypherMainVisitorTest, DenyPrivilege) { diff --git a/tests/unit/query_required_privileges.cpp b/tests/unit/query_required_privileges.cpp index 2e929ee45..a5afc010c 100644 --- a/tests/unit/query_required_privileges.cpp +++ b/tests/unit/query_required_privileges.cpp @@ -1,6 +1,7 @@ #include #include +#include "query/frontend/ast/ast_visitor.hpp" #include "query/frontend/semantic/required_privileges.hpp" #include "storage/v2/id_types.hpp" @@ -131,3 +132,20 @@ TEST_F(TestPrivilegeExtractor, DumpDatabase) { auto *query = storage.Create(); EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::DUMP)); } + +TEST_F(TestPrivilegeExtractor, ReadFile) { + auto load_csv = storage.Create(); + load_csv->row_var_ = IDENT("row"); + auto *query = QUERY(SINGLE_QUERY(load_csv)); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::READ_FILE)); +} + +TEST_F(TestPrivilegeExtractor, LockPathQuery) { + auto *query = storage.Create(); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::LOCK_PATH)); +} + +TEST_F(TestPrivilegeExtractor, FreeMemoryQuery) { + auto *query = storage.Create(); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::FREE_MEMORY)); +} From 2a0b0d969fe3338fb2843ced49856f07b8ba4b57 Mon Sep 17 00:00:00 2001 From: Josip Seljan <62958579+the-joksim@users.noreply.github.com> Date: Wed, 26 May 2021 11:59:36 +0200 Subject: [PATCH 39/63] Replace mg_client with mgconsole (#153) --- .github/workflows/diff.yaml | 14 +- .github/workflows/release_centos8.yaml | 14 +- .github/workflows/release_debian10.yaml | 14 +- .github/workflows/release_ubuntu2004.yaml | 14 +- CHANGELOG.md | 4 + CMakeLists.txt | 4 + libs/CMakeLists.txt | 8 + libs/setup.sh | 6 + release/package/run.sh | 1 + tools/src/CMakeLists.txt | 21 +- tools/src/mg_client/main.cpp | 709 ---------------------- tools/tests/test_mg_client | 131 ---- 12 files changed, 73 insertions(+), 867 deletions(-) delete mode 100644 tools/src/mg_client/main.cpp delete mode 100755 tools/tests/test_mg_client diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 07ce34acb..2b4ce5ded 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -54,8 +54,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create community DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create community DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -281,8 +286,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create enterprise DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create enterprise DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake diff --git a/.github/workflows/release_centos8.yaml b/.github/workflows/release_centos8.yaml index b7433416d..17df9917c 100644 --- a/.github/workflows/release_centos8.yaml +++ b/.github/workflows/release_centos8.yaml @@ -39,8 +39,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create community RPM package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create community RPM package. mkdir output && cd output cpack -G RPM --config ../CPackConfig.cmake rpmlint memgraph*.rpm @@ -232,8 +237,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create enterprise RPM package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create enterprise RPM package. mkdir output && cd output cpack -G RPM --config ../CPackConfig.cmake rpmlint memgraph*.rpm diff --git a/.github/workflows/release_debian10.yaml b/.github/workflows/release_debian10.yaml index d6c5991f4..2d5ba607a 100644 --- a/.github/workflows/release_debian10.yaml +++ b/.github/workflows/release_debian10.yaml @@ -39,8 +39,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create community DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create community DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -231,8 +236,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create enterprise DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create enterprise DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake diff --git a/.github/workflows/release_ubuntu2004.yaml b/.github/workflows/release_ubuntu2004.yaml index d69c2d038..c54f3117b 100644 --- a/.github/workflows/release_ubuntu2004.yaml +++ b/.github/workflows/release_ubuntu2004.yaml @@ -39,8 +39,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create community DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create community DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -231,8 +236,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create enterprise DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create enterprise DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake diff --git a/CHANGELOG.md b/CHANGELOG.md index f2d23909c..95fac9381 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Future +### Major Feature and Improvements + +* Replaced mg_client with mgconsole + ### Bug Fixes * Fixed parsing of types for Python procedures for types nested in `mgp.List`. diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a0d3310f..9d3a099fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -336,3 +336,7 @@ endif() if(QUERY_MODULES) add_subdirectory(query_modules) endif() + +install(FILES ${CMAKE_BINARY_DIR}/bin/mgconsole + PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + TYPE BIN) diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 0d165815d..1cf332e8c 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -209,6 +209,14 @@ import_external_library(mgclient STATIC find_package(OpenSSL REQUIRED) target_link_libraries(mgclient INTERFACE ${OPENSSL_LIBRARIES}) +add_external_project(mgconsole + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/mgconsole + CMAKE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_BINARY_DIR} + BUILD_COMMAND $(MAKE) mgconsole) + +add_custom_target(mgconsole DEPENDS mgconsole-proj) + # Setup spdlog import_external_library(spdlog STATIC ${CMAKE_CURRENT_SOURCE_DIR}/spdlog/${CMAKE_INSTALL_LIBDIR}/libspdlog.a diff --git a/libs/setup.sh b/libs/setup.sh index dc9bb2bd6..8f7c3d476 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -106,6 +106,7 @@ declare -A primary_urls=( ["rocksdb"]="http://$local_cache_host/git/rocksdb.git" ["mgclient"]="http://$local_cache_host/git/mgclient.git" ["pymgclient"]="http://$local_cache_host/git/pymgclient.git" + ["mgconsole"]="http://$local_cache_host/git/mgconsole.git" ["spdlog"]="http://$local_cache_host/git/spdlog" ["jemalloc"]="http://$local_cache_host/git/jemalloc.git" ["nlohmann"]="http://$local_cache_host/file/nlohmann/json/b3e5cb7f20dcc5c806e418df34324eca60d17d4e/single_include/nlohmann/json.hpp" @@ -131,6 +132,7 @@ declare -A secondary_urls=( ["rocksdb"]="https://github.com/facebook/rocksdb.git" ["mgclient"]="https://github.com/memgraph/mgclient.git" ["pymgclient"]="https://github.com/memgraph/pymgclient.git" + ["mgconsole"]="http://github.com/memgraph/mgconsole.git" ["spdlog"]="https://github.com/gabime/spdlog" ["jemalloc"]="https://github.com/jemalloc/jemalloc.git" ["nlohmann"]="https://raw.githubusercontent.com/nlohmann/json/b3e5cb7f20dcc5c806e418df34324eca60d17d4e/single_include/nlohmann/json.hpp" @@ -211,6 +213,10 @@ sed -i 's/\${CMAKE_INSTALL_LIBDIR}/lib/' mgclient/src/CMakeLists.txt pymgclient_tag="4f85c179e56302d46a1e3e2cf43509db65f062b3" # (2021-01-15) repo_clone_try_double "${primary_urls[pymgclient]}" "${secondary_urls[pymgclient]}" "pymgclient" "$pymgclient_tag" +# mgconsole +mgconsole_tag="01ae99bfce772e540e75c076ba03cf06c0c2ac7d" # (2021-05-26) +repo_clone_try_double "${primary_urls[mgconsole]}" "${secondary_urls[mgconsole]}" "mgconsole" "$mgconsole_tag" + spdlog_tag="46d418164dd4cd9822cf8ca62a116a3f71569241" # (2020-12-01) repo_clone_try_double "${primary_urls[spdlog]}" "${secondary_urls[spdlog]}" "spdlog" "$spdlog_tag" diff --git a/release/package/run.sh b/release/package/run.sh index 338850ccd..151d48712 100755 --- a/release/package/run.sh +++ b/release/package/run.sh @@ -78,6 +78,7 @@ make_package () { # container resources. # shellcheck disable=SC2016 docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc)' + docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc) -B mgconsole' docker exec "$build_container" bash -c "mkdir -p $container_output_dir && cd $container_output_dir && $ACTIVATE_TOOLCHAIN && $package_command" echo "Copying targeted package to host..." diff --git a/tools/src/CMakeLists.txt b/tools/src/CMakeLists.txt index d9f5a53be..82b89c1c9 100644 --- a/tools/src/CMakeLists.txt +++ b/tools/src/CMakeLists.txt @@ -3,28 +3,11 @@ set(VERSION_STRING ${MEMGRAPH_VERSION}) configure_file(../../src/version.hpp.in version.hpp @ONLY) include_directories(${CMAKE_CURRENT_BINARY_DIR}) -# Memgraph Client Target -add_executable(mg_client mg_client/main.cpp) -set(CLIENT_LIBS mg-communication mg-io mg-utils) -if (READLINE_FOUND) - list(APPEND CLIENT_LIBS readline) -endif() -target_link_libraries(mg_client ${CLIENT_LIBS}) - # Memgraph Dump Target add_executable(mg_dump mg_dump/main.cpp) target_include_directories(mg_dump PRIVATE ${MGCLIENT_INCLUDE_DIR}) target_link_libraries(mg_dump gflags spdlog fmt mgclient pthread) - -# Strip the executable in release build. -string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) -if (lower_build_type STREQUAL "release") - add_custom_command(TARGET mg_client POST_BUILD - COMMAND strip -s mg_client - COMMENT "Stripping symbols and sections from mg_client") -endif() - -install(TARGETS mg_client RUNTIME DESTINATION bin) +install(TARGETS mg_dump RUNTIME DESTINATION bin) # Target for building all the tool executables. -add_custom_target(tools DEPENDS mg_client mg_dump) +add_custom_target(tools DEPENDS mg_dump) diff --git a/tools/src/mg_client/main.cpp b/tools/src/mg_client/main.cpp deleted file mode 100644 index d28ddd670..000000000 --- a/tools/src/mg_client/main.cpp +++ /dev/null @@ -1,709 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "communication/bolt/client.hpp" -#include "communication/init.hpp" -#include "io/network/endpoint.hpp" -#include "io/network/utils.hpp" -#include "utils/algorithm.hpp" -#include "utils/file.hpp" -#include "utils/flag_validation.hpp" -#include "utils/logging.hpp" -#include "utils/signals.hpp" -#include "utils/string.hpp" -#include "utils/terminate_handler.hpp" -#include "utils/timer.hpp" -#include "version.hpp" - -namespace fs = std::filesystem; - -volatile sig_atomic_t is_shutting_down = 0; - -// Usage strings. -static const std::string kUsage = - "Memgraph bolt client.\n" - "The client can be run in interactive or non-interactive mode.\n"; -static const std::string kInteractiveUsage = - "In interactive mode, user can enter cypher queries and supported " - "commands.\n\n" - "Cypher queries can span through multiple lines and conclude with a\n" - "semi-colon (;). Each query is executed in the database and the results\n" - "are printed out.\n\n" - "The following interactive commands are supported:\n\n" - "\t:help\t Print out usage for interactive mode\n" - "\t:quit\t Exit the shell\n"; - -// Supported commands. -// Maybe also add reconnect? -static const std::string kCommandQuit = ":quit"; -static const std::string kCommandHelp = ":help"; - -// Supported formats. -static const std::string kCsvFormat = "csv"; -static const std::string kTabularFormat = "tabular"; - -DEFINE_string(host, "127.0.0.1", - "Server address. It can be a DNS resolvable hostname."); -DEFINE_int32(port, 7687, "Server port"); -DEFINE_string(username, "", "Username for the database"); -DEFINE_string(password, "", "Password for the database"); -DEFINE_bool(use_ssl, true, "Use SSL when connecting to the server."); -DEFINE_bool(fit_to_screen, false, "Fit output width to screen width."); -DEFINE_VALIDATED_string( - output_format, "tabular", - "Query output format. Can be csv/tabular. If output format is " - "other than tabular `fit-to-screen` flag is ignored.", - { - if (value == kCsvFormat || value == kTabularFormat) { - return true; - } - return false; - }); -DEFINE_VALIDATED_string(csv_delimiter, ",", - "Character used to separate fields.", { - if (value.size() != 1) { - return false; - } - return true; - }); -DEFINE_string( - csv_escapechar, "", - "Character used to escape the quotechar(\") if csv-doublequote is false."); -DEFINE_bool( - csv_doublequote, true, - "Controls how instances of quotechar(\") appearing inside a field should " - "themselves be quoted. When true, the character is doubled. When false, " - "the escapechar is used as a prefix to the quotechar. " - "If csv-doublequote is false 'csv-escapechar' must be set."); - -static bool ValidateCsvDoubleQuote() { - if (!FLAGS_csv_doublequote && FLAGS_csv_escapechar.size() != 1) { - return false; - } - return true; -} - -#ifdef HAS_READLINE -DEFINE_string(history, "~/.memgraph", - "Use the specified directory for saving history."); -DEFINE_bool(no_history, false, "Do not save history."); - -// History default directory. -static const std::string kDefaultHistoryBaseDir = "~"; -static const std::string kDefaultHistoryMemgraphDir = ".memgraph"; -// History filename. -static const std::string kHistoryFilename = "client_history"; -#endif - -DECLARE_int32(min_log_level); - -// Unfinished query text from previous input. -// e.g. Previous input was MATCH(n) RETURN n; MATCH -// then default_text would be set to MATCH for next query. -static std::string default_text; - -static const std::string kPrompt = "memgraph> "; -static const std::string kMultilinePrompt = " -> "; - -static void PrintHelp() { std::cout << kInteractiveUsage << std::endl; } - -static void PrintValue(std::ostream &os, const std::string &value) { - os << value; -} - -static void PrintValue(std::ostream &os, - const communication::bolt::Value &value) { - switch (value.type()) { - case communication::bolt::Value::Type::String: - os << value.ValueString(); - return; - default: - os << value; - return; - } -} - -static void EchoFailure(const std::string &failure_msg, - const std::string &explanation) { - if (isatty(STDIN_FILENO)) { - std::cout << "\033[1;31m" << failure_msg << ": \033[0m"; - std::cout << explanation << std::endl; - } else { - std::cerr << failure_msg << ": "; - std::cerr << explanation << std::endl; - } -} - -static void EchoInfo(const std::string &message) { - if (isatty(STDIN_FILENO)) { - std::cout << message << std::endl; - } -} - -static void SetStdinEcho(bool enable = true) { - struct termios tty; - tcgetattr(STDIN_FILENO, &tty); - if (!enable) { - tty.c_lflag &= ~ECHO; - } else { - tty.c_lflag |= ECHO; - } - tcsetattr(STDIN_FILENO, TCSANOW, &tty); -} - -#ifdef HAS_READLINE - -#include "readline/history.h" -#include "readline/readline.h" - -/// Helper function that sets default input for 'readline' -static int SetDefaultText() { - rl_insert_text(default_text.c_str()); - default_text = ""; - rl_startup_hook = (rl_hook_func_t *)NULL; - return 0; -} - -/// Memgraph and OpenCypher keywords. -/// Copied from src/query/frontend/opencypher/grammar/Cypher.g4 -/// and src/query/frontend/grammar/MemgraphCypher.g4 -static const std::vector kMemgraphKeywords{ - "ALTER", "AUTH", "BATCH", "BATCHES", "CLEAR", "DATA", - "DENY", "DROP", "FOR", "FROM", "GRANT", "IDENTIFIED", - "INTERVAL", "K_TEST", "KAFKA", "LOAD", "PASSWORD", "PRIVILEGES", - "REVOKE", "ROLE", "ROLES", "SIZE", "START", "STOP", - "STREAM", "STREAMS", "TO", "TOPIC", "TRANSFORM", "USER", - "USERS"}; -static const std::vector kCypherKeywords{ - "ALL", "AND", "ANY", "AS", "ASC", "ASCENDING", - "BFS", "BY", "CASE", "CONTAINS", "COUNT", "CREATE", - "CYPHERNULL", "DELETE", "DESC", "DESCENDING", "DETACH", "DISTINCT", - "ELSE", "END", "ENDS", "EXTRACT", "FALSE", "FILTER", - "IN", "INDEX", "IS", "LIMIT", "L_SKIP", "MATCH", - "MERGE", "NONE", "NOT", "ON", "OPTIONAL", "OR", - "ORDER", "REDUCE", "REMOVE", "RETURN", "SET", "SHOW", - "SINGLE", "STARTS", "THEN", "TRUE", "UNION", "UNWIND", - "WHEN", "WHERE", "WITH", "WSHORTEST", "XOR"}; - -static char *CompletionGenerator(const char *text, int state) { - // This function is called with state=0 the first time; subsequent calls - // are with a nonzero state. state=0 can be used to perform one-time - // initialization for this completion session. - static std::vector matches; - static size_t match_index = 0; - - if (state == 0) { - // During initialization, compute the actual matches for 'text' and - // keep them in a static vector. - matches.clear(); - match_index = 0; - - // Collect a vector of matches: vocabulary words that begin with text. - std::string text_str = utils::ToUpperCase(std::string(text)); - for (auto word : kCypherKeywords) { - if (word.size() >= text_str.size() && - word.compare(0, text_str.size(), text_str) == 0) { - matches.push_back(word); - } - } - for (auto word : kMemgraphKeywords) { - if (word.size() >= text_str.size() && - word.compare(0, text_str.size(), text_str) == 0) { - matches.push_back(word); - } - } - } - - if (match_index >= matches.size()) { - // We return nullptr to notify the caller no more matches are available. - return nullptr; - } else { - // Return a malloc'd char* for the match. The caller frees it. - return strdup(matches[match_index++].c_str()); - } -} - -static char **Completer(const char *text, int start, int end) { - // Don't do filename completion even if our generator finds no matches. - rl_attempted_completion_over = 1; - // Note: returning nullptr here will make readline use the default filename - // completer. This note is copied from examples - I think because - // rl_attempted_completion_over is set to 1, filename completer won't be used. - return rl_completion_matches(text, CompletionGenerator); -} - -/// Helper function that reads a line from the -/// standard input using the 'readline' lib. -/// Adds support for history and reverse-search. -/// -/// @param prompt The prompt to display. -/// @return User input line, or nullopt on EOF. -static std::optional ReadLine(const std::string &prompt) { - if (default_text.size() > 0) { - // Initialize text with remainder of previous query. - rl_startup_hook = SetDefaultText; - } - char *line = readline(prompt.c_str()); - if (!line) return std::nullopt; - - std::string r_val(line); - if (!utils::Trim(r_val).empty()) add_history(line); - free(line); - return r_val; -} - -#else - -/// Helper function that reads a line from the standard input -/// using getline. -/// @param prompt The prompt to display. -/// @return User input line, or nullopt on EOF. -static std::optional ReadLine(const std::string &prompt) { - std::cout << prompt << default_text; - std::string line; - std::getline(std::cin, line); - if (std::cin.eof()) return std::nullopt; - line = default_text + line; - default_text = ""; - return line; -} - -#endif // HAS_READLINE - -static std::optional GetLine() { - std::string line; - std::getline(std::cin, line); - if (std::cin.eof()) return std::nullopt; - line = default_text + line; - default_text = ""; - return line; -} - -/// Helper function that parses user line input. -/// @param line user input line. -/// @param quote quote character or '\0'; if set line is inside quotation. -/// @param escaped if set, next character should be escaped. -/// @return pair of string and bool. string is parsed line and bool marks -/// if query finished(Query finishes with ';') with this line. -static std::pair ParseLine(const std::string &line, - char *quote, bool *escaped) { - // Parse line. - bool is_done = false; - std::stringstream parsed_line; - for (auto c : line) { - if (*quote && c == '\\') { - // Escaping is only used inside quotation to not end the quote - // when quotation char is escaped. - *escaped = !*escaped; - parsed_line << c; - continue; - } else if ((!*quote && (c == '\"' || c == '\'')) || - (!*escaped && c == *quote)) { - *quote = *quote ? '\0' : c; - } else if (!*quote && c == ';') { - is_done = true; - break; - } - parsed_line << c; - *escaped = false; - } - return std::make_pair(parsed_line.str(), is_done); -} - -static std::optional GetQuery() { - char quote = '\0'; - bool escaped = false; - auto ret = ParseLine(default_text, "e, &escaped); - if (ret.second) { - auto idx = ret.first.size() + 1; - default_text = utils::Trim(default_text.substr(idx)); - return ret.first; - } - std::stringstream query; - std::optional line; - int line_cnt = 0; - auto is_done = false; - while (!is_done) { - if (!isatty(STDIN_FILENO)) { - line = GetLine(); - } else { - line = ReadLine(line_cnt == 0 ? kPrompt : kMultilinePrompt); - if (line_cnt == 0 && line && line->size() > 0 && (*line)[0] == ':') { - auto trimmed_line = utils::Trim(*line); - if (trimmed_line == kCommandQuit) { - return std::nullopt; - } else if (trimmed_line == kCommandHelp) { - PrintHelp(); - return ""; - } else { - EchoFailure("Unsupported command", std::string(trimmed_line)); - PrintHelp(); - return ""; - } - } - } - if (!line) return std::nullopt; - if (line->empty()) continue; - auto ret = ParseLine(*line, "e, &escaped); - query << ret.first; - auto char_count = ret.first.size(); - if (ret.second) { - is_done = true; - char_count += 1; // ';' sign - } else { - // Query is multiline so append newline. - query << "\n"; - } - if (char_count < line->size()) { - default_text = utils::Trim(line->substr(char_count)); - } - ++line_cnt; - } - return query.str(); -} - -template -static void PrintRowTabular(const std::vector &data, int total_width, - int column_width, int num_columns, - bool all_columns_fit, int margin = 1) { - if (!all_columns_fit) num_columns -= 1; - std::string data_output = std::string(total_width, ' '); - for (auto i = 0; i < total_width; i += column_width) { - data_output[i] = '|'; - int idx = i / column_width; - if (idx < num_columns) { - std::stringstream field; - PrintValue(field, data[idx]); // convert Value to string - std::string field_str(field.str()); - if (field_str.size() > column_width - 2 * margin - 1) { - field_str.erase(column_width - 2 * margin - 1, std::string::npos); - field_str.replace(field_str.size() - 3, 3, "..."); - } - data_output.replace(i + 1 + margin, field_str.size(), field_str); - } - } - if (!all_columns_fit) { - data_output.replace(total_width - column_width, 3, "..."); - } - data_output[total_width - 1] = '|'; - std::cout << data_output << std::endl; -} - -/// Helper function for determining maximum length of data. -/// @param data Vector of string representable elements. Elements should have -/// operator '<<' implemented. -/// @param margin Column margin width. -/// @return length needed for representing max size element in vector. Plus -/// one is added because of column start character '|'. -template -static uint64_t GetMaxColumnWidth(const std::vector &data, int margin = 1) { - uint64_t column_width = 0; - for (auto &elem : data) { - std::stringstream field; - PrintValue(field, elem); - column_width = std::max(column_width, field.str().size() + 2 * margin); - } - return column_width + 1; -} - -static void PrintTabular( - const std::vector &header, - const std::vector> &records) { - struct winsize w; - ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); - bool all_columns_fit = true; - - auto num_columns = header.size(); - auto column_width = GetMaxColumnWidth(header); - for (size_t i = 0; i < records.size(); ++i) { - column_width = std::max(column_width, GetMaxColumnWidth(records[i])); - } - column_width = std::max(static_cast(5), - column_width); // set column width to min 5 - auto total_width = column_width * num_columns + 1; - - // Fit to screen width. - if (FLAGS_fit_to_screen && total_width > w.ws_col) { - uint64_t lo = 5; - uint64_t hi = column_width; - uint64_t last = 5; - while (lo < hi) { - uint64_t mid = lo + (hi - lo) / 2; - uint64_t width = mid * num_columns + 1; - if (width <= w.ws_col) { - last = mid; - lo = mid + 1; - } else { - hi = mid - 1; - } - } - column_width = last; - total_width = column_width * num_columns + 1; - // All columns do not fit on screen. - while (total_width > w.ws_col && num_columns > 1) { - num_columns -= 1; - total_width = column_width * num_columns + 1; - all_columns_fit = false; - } - } - - auto line_fill = std::string(total_width, '-'); - for (auto i = 0; i < total_width; i += column_width) { - line_fill[i] = '+'; - } - line_fill[total_width - 1] = '+'; - std::cout << line_fill << std::endl; - // Print Header. - PrintRowTabular(header, total_width, column_width, num_columns, - all_columns_fit); - std::cout << line_fill << std::endl; - // Print Records. - for (size_t i = 0; i < records.size(); ++i) { - PrintRowTabular(records[i], total_width, column_width, num_columns, - all_columns_fit); - } - std::cout << line_fill << std::endl; -} - -template -static std::vector FormatCsvFields(const std::vector &fields) { - std::vector formatted; - formatted.reserve(fields.size()); - for (auto &field : fields) { - std::stringstream field_stream; - field_stream << field; - std::string formatted_field(field_stream.str()); - if (FLAGS_csv_doublequote) { - formatted_field = utils::Replace(formatted_field, "\"", "\"\""); - } else { - formatted_field = - utils::Replace(formatted_field, "\"", FLAGS_csv_escapechar + "\""); - } - formatted_field.insert(0, 1, '"'); - formatted_field.append(1, '"'); - formatted.push_back(formatted_field); - } - return formatted; -} - -static void PrintCsv( - const std::vector &header, - const std::vector> &records) { - // Print Header. - auto formatted_header = FormatCsvFields(header); - utils::PrintIterable(std::cout, formatted_header, FLAGS_csv_delimiter); - std::cout << std::endl; - // Print Records. - for (size_t i = 0; i < records.size(); ++i) { - auto formatted_row = FormatCsvFields(records[i]); - utils::PrintIterable(std::cout, formatted_row, FLAGS_csv_delimiter); - std::cout << std::endl; - } -} - -static void Output( - const std::vector &header, - const std::vector> &records) { - if (FLAGS_output_format == kTabularFormat) { - PrintTabular(header, records); - } else if (FLAGS_output_format == kCsvFormat) { - PrintCsv(header, records); - } -} - -int main(int argc, char **argv) { - gflags::SetVersionString(version_string); - gflags::SetUsageMessage(kUsage); - - gflags::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_output_format == kCsvFormat && !ValidateCsvDoubleQuote()) { - EchoFailure( - "Unsupported combination of 'csv-doublequote' and 'csv-escapechar'\n" - "flags", - "Run './mg_client --help' for usage."); - return 1; - } - auto password = FLAGS_password; - if (isatty(STDIN_FILENO) && FLAGS_username.size() > 0 && - password.size() == 0) { - SetStdinEcho(false); - auto password_optional = ReadLine("Password: "); - std::cout << std::endl; - if (password_optional) { - password = *password_optional; - } else { - EchoFailure( - "Password not submitted", - fmt::format("Requested password for username {}", FLAGS_username)); - return 1; - } - SetStdinEcho(true); - } - - spdlog::set_level(spdlog::level::err); - - communication::SSLInit sslInit; - -#ifdef HAS_READLINE - using_history(); - int history_len = 0; - rl_attempted_completion_function = Completer; - fs::path history_dir = FLAGS_history; - if (FLAGS_history == - (kDefaultHistoryBaseDir + "/" + kDefaultHistoryMemgraphDir)) { - // Fetch home dir for user. - struct passwd *pw = getpwuid(getuid()); - history_dir = fs::path(pw->pw_dir) / kDefaultHistoryMemgraphDir; - } - if (!utils::EnsureDir(history_dir)) { - EchoFailure("History directory doesn't exist", history_dir); - // Should program exit here or just continue with warning message? - return 1; - } - fs::path history_file = history_dir / kHistoryFilename; - // Read history file. - if (fs::exists(history_file)) { - auto ret = read_history(history_file.string().c_str()); - if (ret != 0) { - EchoFailure("Unable to read history file", history_file); - // Should program exit here or just continue with warning message? - return 1; - } - history_len = history_length; - } - - // Save history function. Used to save readline history after each query. - auto save_history = [&history_len, history_file] { - if (!FLAGS_no_history) { - int ret = 0; - // If there was no history, create history file. - // Otherwise, append to existing history. - if (history_len == 0) { - ret = write_history(history_file.string().c_str()); - } else { - ret = append_history(1, history_file.string().c_str()); - } - if (ret != 0) { - EchoFailure("Unable to save history to file", history_file); - return 1; - } - ++history_len; - } - return 0; - }; -#endif - - // Prevent handling shutdown inside a shutdown. For example, SIGINT handler - // being interrupted by SIGTERM before is_shutting_down is set, thus causing - // double shutdown. - sigset_t block_shutdown_signals; - sigemptyset(&block_shutdown_signals); - sigaddset(&block_shutdown_signals, SIGTERM); - sigaddset(&block_shutdown_signals, SIGINT); - - auto shutdown = [](int exit_code = 0) { - if (is_shutting_down) return; - is_shutting_down = 1; - std::quick_exit(exit_code); - }; - - utils::SignalHandler::RegisterHandler(utils::Signal::Terminate, shutdown, - block_shutdown_signals); - utils::SignalHandler::RegisterHandler(utils::Signal::Interupt, shutdown, - block_shutdown_signals); - - // TODO handle endpoint exception. - // It has CHECK in constructor if address is not valid. - io::network::Endpoint endpoint(io::network::ResolveHostname(FLAGS_host), - FLAGS_port); - communication::ClientContext context(FLAGS_use_ssl); - communication::bolt::Client client(&context); - - std::string bolt_client_version = - fmt::format("mg_client/{}", gflags::VersionString()); - try { - client.Connect(endpoint, FLAGS_username, password, bolt_client_version); - } catch (const communication::bolt::ClientFatalException &e) { - EchoFailure("Connection failure", e.what()); - return 1; - } - - EchoInfo(fmt::format("mg_client {}", gflags::VersionString())); - EchoInfo("Type :help for shell usage"); - EchoInfo("Quit the shell by typing Ctrl-D(eof) or :quit"); - EchoInfo(fmt::format("Connected to 'memgraph://{}'", endpoint)); - int num_retries = 3; - while (true) { - auto query = GetQuery(); - if (!query) { - EchoInfo("Bye"); - break; - } - if (query->empty()) continue; - try { - utils::Timer t; - auto ret = client.Execute(*query, {}); - auto elapsed = t.Elapsed().count(); - if (ret.records.size() > 0) Output(ret.fields, ret.records); - if (isatty(STDIN_FILENO)) { - std::string summary; - if (ret.records.size() == 0) { - summary = "Empty set"; - } else if (ret.records.size() == 1) { - summary = std::to_string(ret.records.size()) + " row in set"; - } else { - summary = std::to_string(ret.records.size()) + " rows in set"; - } - std::cout << summary << " (" << fmt::format("{:.3f}", elapsed) - << " sec)" << std::endl; -#ifdef HAS_READLINE - auto history_ret = save_history(); - if (history_ret != 0) return history_ret; -#endif - } - } catch (const communication::bolt::ClientQueryException &e) { - if (!isatty(STDIN_FILENO)) { - EchoFailure("Failed query", *query); - } - EchoFailure("Client received exception", e.what()); - if (!isatty(STDIN_FILENO)) { - return 1; - } - } catch (const communication::bolt::ClientFatalException &e) { - EchoFailure("Client received exception", e.what()); - EchoInfo("Trying to reconnect"); - bool is_connected = false; - client.Close(); - while (num_retries > 0) { - --num_retries; - try { - client.Connect(endpoint, FLAGS_username, FLAGS_password, - bolt_client_version); - is_connected = true; - break; - } catch (const communication::bolt::ClientFatalException &e) { - EchoFailure("Connection failure", e.what()); - } - std::this_thread::sleep_for(std::chrono::seconds(1)); - } - if (is_connected) { - num_retries = 3; - EchoInfo(fmt::format("Connected to 'memgraph://{}'", endpoint)); - } else { - EchoFailure("Couldn't connect to", - fmt::format("'memgraph://{}'", endpoint)); - return 1; - } - } - } - return 0; -} diff --git a/tools/tests/test_mg_client b/tools/tests/test_mg_client deleted file mode 100755 index e6ab4b46b..000000000 --- a/tools/tests/test_mg_client +++ /dev/null @@ -1,131 +0,0 @@ -#!/bin/bash - -## Helper functions - -function wait_for_server { - port=$1 - while ! nc -z -w 1 127.0.0.1 $port; do - sleep 0.1 - done - sleep 1 -} - -function echo_info { printf "\033[1;36m~~ $1 ~~\033[0m\n"; } -function echo_success { printf "\033[1;32m~~ $1 ~~\033[0m\n\n"; } -function echo_failure { printf "\033[1;31m~~ $1 ~~\033[0m\n\n"; } - - -## Environment setup -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$DIR" - -# Create a temporary directory for output files -tmpdir=/tmp/mg_client/output -if [ -d $tmpdir ]; then - rm -rf $tmpdir -fi -mkdir -p $tmpdir -cd $tmpdir - -# Find memgraph binaries. -memgraph_dir="$DIR/../../build" - -# Find mg_client binaries. -client_dir="$memgraph_dir/tools/src" -if [ ! -d $client_dir ]; then - echo_failure "mg-client directory not found" - exit 1 -fi - -# Find tests dir. -tests_dir="$DIR/client" -if [ ! -d $tests_dir ]; then - echo_failure "Directory with tests not found" - exit 1 -fi - -# Find tests input files. -input_dir="$tests_dir/input" -if [ ! -d $input_dir ]; then - echo_failure "Directory with tests input files not found" - exit 1 -fi - -# Check and generate SSL certificates -use_ssl=false -key_file="" -cert_file="" -if [ "$1" == "--use-ssl" ]; then - use_ssl=true - key_file=".key.pem" - cert_file=".cert.pem" - openssl req -new -newkey rsa:4096 -days 365 -nodes -x509 \ - -subj "/C=HR/ST=Zagreb/L=Zagreb/O=Memgraph/CN=db.memgraph.com" \ - -keyout $key_file -out $cert_file || exit 1 -fi - - -## Startup - -# Start the memgraph process and wait for it to start. -echo_info "Starting memgraph" -$memgraph_dir/memgraph --bolt-cert-file=$cert_file --bolt-key-file=$key_file & -pid=$! -wait_for_server 7687 -echo_success "Started memgraph" - - -## Tests - -echo_info "Running tests" -echo # Blank line - -client_flags="--use-ssl=$use_ssl" -test_code=0 -for output_dir in $tests_dir/output_*; do - for filename in $input_dir/*; do - test_name=$(basename $filename) - test_name=${test_name%.*} - output_name="$test_name.txt" - - output_format=$(basename $output_dir) - output_format=${output_format#*_} - run_flags="$client_flags --output-format=$output_format" - - echo_info "Running test '$test_name' with $output_format output" - $client_dir/mg_client $run_flags < $filename > $tmpdir/$test_name - diff -b $tmpdir/$test_name $output_dir/$output_name - test_code=$? - if [ $test_code -ne 0 ]; then - echo_failure "Test '$test_name' with $output_format output failed" - break - else - echo_success "Test '$test_name' with $output_format output passed" - fi - - # Clear database for each test. - $client_dir/mg_client $client_flags <<< "MATCH (n) DETACH DELETE n;" \ - &> /dev/null || exit 1 - done - if [ $test_code -ne 0 ]; then - break - fi -done - - -## Cleanup -echo_info "Starting test cleanup" - -# Shutdown the memgraph process. -kill $pid -wait -n -code_mg=$? - -# Check memgraph exit code. -if [ $code_mg -ne 0 ]; then - echo_failure "The memgraph process didn't terminate properly!" - exit $code_mg -fi -echo_success "Test cleanup done" - -exit $test_code From 7e44434cdf4bba4d9e507acab21ffda1677ed895 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Thu, 15 Apr 2021 10:49:40 +0200 Subject: [PATCH 40/63] Initial trigger definition (#133) * Pull out cypher query parsing logic * Define trigger structure * Run triggers before commit * Use skip list for saving triggers --- src/query/CMakeLists.txt | 2 + src/query/cypher_query_interpreter.cpp | 135 +++++++++++++++++ src/query/cypher_query_interpreter.hpp | 149 +++++++++++++++++++ src/query/interpreter.cpp | 193 ++----------------------- src/query/interpreter.hpp | 71 ++------- src/query/trigger.cpp | 63 ++++++++ src/query/trigger.hpp | 26 ++++ 7 files changed, 397 insertions(+), 242 deletions(-) create mode 100644 src/query/cypher_query_interpreter.cpp create mode 100644 src/query/cypher_query_interpreter.hpp create mode 100644 src/query/trigger.cpp create mode 100644 src/query/trigger.hpp diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index d68774818..8ebe31425 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -9,6 +9,7 @@ add_custom_target(generate_lcp_query DEPENDS ${generated_lcp_query_files}) set(mg_query_sources ${lcp_query_cpp_files} common.cpp + cypher_query_interpreter.cpp dump.cpp frontend/ast/cypher_main_visitor.cpp frontend/ast/pretty_print.cpp @@ -30,6 +31,7 @@ set(mg_query_sources procedure/mg_procedure_impl.cpp procedure/module.cpp procedure/py_module.cpp + trigger.cpp typed_value.cpp) add_library(mg-query STATIC ${mg_query_sources}) diff --git a/src/query/cypher_query_interpreter.cpp b/src/query/cypher_query_interpreter.cpp new file mode 100644 index 000000000..d721f2c09 --- /dev/null +++ b/src/query/cypher_query_interpreter.cpp @@ -0,0 +1,135 @@ +#include "query/cypher_query_interpreter.hpp" + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_HIDDEN_bool(query_cost_planner, true, "Use the cost-estimating query planner."); +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_VALIDATED_int32(query_plan_cache_ttl, 60, "Time to live for cached query plans, in seconds.", + FLAG_IN_RANGE(0, std::numeric_limits::max())); + +namespace query { +CachedPlan::CachedPlan(std::unique_ptr plan) : plan_(std::move(plan)) {} + +ParsedQuery ParseQuery(const std::string &query_string, const std::map ¶ms, + utils::SkipList *cache, utils::SpinLock *antlr_lock) { + // Strip the query for caching purposes. The process of stripping a query + // "normalizes" it by replacing any literals with new parameters. This + // results in just the *structure* of the query being taken into account for + // caching. + frontend::StrippedQuery stripped_query{query_string}; + + // Copy over the parameters that were introduced during stripping. + Parameters parameters{stripped_query.literals()}; + + // Check that all user-specified parameters are provided. + for (const auto ¶m_pair : stripped_query.parameters()) { + auto it = params.find(param_pair.second); + + if (it == params.end()) { + throw query::UnprovidedParameterError("Parameter ${} not provided.", param_pair.second); + } + + parameters.Add(param_pair.first, it->second); + } + + // Cache the query's AST if it isn't already. + auto hash = stripped_query.hash(); + auto accessor = cache->access(); + auto it = accessor.find(hash); + std::unique_ptr parser; + + // Return a copy of both the AST storage and the query. + CachedQuery result; + bool is_cacheable = true; + + auto get_information_from_cache = [&](const auto &cached_query) { + result.ast_storage.properties_ = cached_query.ast_storage.properties_; + result.ast_storage.labels_ = cached_query.ast_storage.labels_; + result.ast_storage.edge_types_ = cached_query.ast_storage.edge_types_; + + result.query = cached_query.query->Clone(&result.ast_storage); + result.required_privileges = cached_query.required_privileges; + }; + + if (it == accessor.end()) { + { + std::unique_lock guard(*antlr_lock); + + try { + parser = std::make_unique(stripped_query.query()); + } catch (const SyntaxException &e) { + // There is a syntax exception in the stripped query. Re-run the parser + // on the original query to get an appropriate error messsage. + parser = std::make_unique(query_string); + + // If an exception was not thrown here, the stripper messed something + // up. + LOG_FATAL("The stripped query can't be parsed, but the original can."); + } + } + + // Convert the ANTLR4 parse tree into an AST. + AstStorage ast_storage; + frontend::ParsingContext context{true}; + frontend::CypherMainVisitor visitor(context, &ast_storage); + + visitor.visit(parser->tree()); + + if (visitor.IsCacheable()) { + CachedQuery cached_query{std::move(ast_storage), visitor.query(), query::GetRequiredPrivileges(visitor.query())}; + it = accessor.insert({hash, std::move(cached_query)}).first; + + get_information_from_cache(it->second); + } else { + result.ast_storage.properties_ = ast_storage.properties_; + result.ast_storage.labels_ = ast_storage.labels_; + result.ast_storage.edge_types_ = ast_storage.edge_types_; + + result.query = visitor.query()->Clone(&result.ast_storage); + result.required_privileges = query::GetRequiredPrivileges(visitor.query()); + + is_cacheable = false; + } + } else { + get_information_from_cache(it->second); + } + + return ParsedQuery{query_string, + params, + std::move(parameters), + std::move(stripped_query), + std::move(result.ast_storage), + result.query, + std::move(result.required_privileges), + is_cacheable}; +} + +std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, + DbAccessor *db_accessor) { + auto vertex_counts = plan::MakeVertexCountCache(db_accessor); + auto symbol_table = MakeSymbolTable(query); + auto planning_context = plan::MakePlanningContext(&ast_storage, &symbol_table, query, &vertex_counts); + auto [root, cost] = plan::MakeLogicalPlan(&planning_context, parameters, FLAGS_query_cost_planner); + return std::make_unique(std::move(root), cost, std::move(ast_storage), + std::move(symbol_table)); +} + +std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, + const Parameters ¶meters, utils::SkipList *plan_cache, + DbAccessor *db_accessor, const bool is_cacheable) { + auto plan_cache_access = plan_cache->access(); + auto it = plan_cache_access.find(hash); + if (it != plan_cache_access.end()) { + if (it->second->IsExpired()) { + plan_cache_access.remove(hash); + } else { + return it->second; + } + } + + auto plan = std::make_shared(MakeLogicalPlan(std::move(ast_storage), (query), parameters, db_accessor)); + if (is_cacheable) { + plan_cache_access.insert({hash, plan}); + } + return plan; +} +} // namespace query diff --git a/src/query/cypher_query_interpreter.hpp b/src/query/cypher_query_interpreter.hpp new file mode 100644 index 000000000..eaf688521 --- /dev/null +++ b/src/query/cypher_query_interpreter.hpp @@ -0,0 +1,149 @@ +#pragma once + +////////////////////////////////////////////////////// +// THIS INCLUDE SHOULD ALWAYS COME BEFORE THE +// "cypher_main_visitor.hpp" +// "planner.hpp" includes json.hpp which uses libc's +// EOF macro while "cypher_main_visitor.hpp" includes +// "antlr4-runtime.h" which contains a static variable +// of the same name, EOF. +// This hides the definition of the macro which causes +// the compilation to fail. +#include "query/plan/planner.hpp" +////////////////////////////////////////////////////// +#include "query/frontend/ast/cypher_main_visitor.hpp" +#include "query/frontend/opencypher/parser.hpp" +#include "query/frontend/semantic/required_privileges.hpp" +#include "query/frontend/semantic/symbol_generator.hpp" +#include "query/frontend/stripped.hpp" +#include "utils/flag_validation.hpp" +#include "utils/timer.hpp" + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_bool(query_cost_planner); +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_int32(query_plan_cache_ttl); + +namespace query { + +// TODO: Maybe this should move to query/plan/planner. +/// Interface for accessing the root operator of a logical plan. +class LogicalPlan { + public: + explicit LogicalPlan() = default; + + virtual ~LogicalPlan() = default; + + LogicalPlan(const LogicalPlan &) = default; + LogicalPlan &operator=(const LogicalPlan &) = default; + LogicalPlan(LogicalPlan &&) = default; + LogicalPlan &operator=(LogicalPlan &&) = default; + + virtual const plan::LogicalOperator &GetRoot() const = 0; + virtual double GetCost() const = 0; + virtual const SymbolTable &GetSymbolTable() const = 0; + virtual const AstStorage &GetAstStorage() const = 0; +}; + +class CachedPlan { + public: + explicit CachedPlan(std::unique_ptr plan); + + const auto &plan() const { return plan_->GetRoot(); } + double cost() const { return plan_->GetCost(); } + const auto &symbol_table() const { return plan_->GetSymbolTable(); } + const auto &ast_storage() const { return plan_->GetAstStorage(); } + + bool IsExpired() const { + // NOLINTNEXTLINE (modernize-use-nullptr) + return cache_timer_.Elapsed() > std::chrono::seconds(FLAGS_query_plan_cache_ttl); + }; + + private: + std::unique_ptr plan_; + utils::Timer cache_timer_; +}; + +struct CachedQuery { + AstStorage ast_storage; + Query *query; + std::vector required_privileges; +}; + +struct QueryCacheEntry { + bool operator==(const QueryCacheEntry &other) const { return first == other.first; } + bool operator<(const QueryCacheEntry &other) const { return first < other.first; } + bool operator==(const uint64_t &other) const { return first == other; } + bool operator<(const uint64_t &other) const { return first < other; } + + uint64_t first; + // TODO: Maybe store the query string here and use it as a key with the hash + // so that we eliminate the risk of hash collisions. + CachedQuery second; +}; + +struct PlanCacheEntry { + bool operator==(const PlanCacheEntry &other) const { return first == other.first; } + bool operator<(const PlanCacheEntry &other) const { return first < other.first; } + bool operator==(const uint64_t &other) const { return first == other; } + bool operator<(const uint64_t &other) const { return first < other; } + + uint64_t first; + // TODO: Maybe store the query string here and use it as a key with the hash + // so that we eliminate the risk of hash collisions. + std::shared_ptr second; +}; + +/** + * A container for data related to the parsing of a query. + */ +struct ParsedQuery { + std::string query_string; + std::map user_parameters; + Parameters parameters; + frontend::StrippedQuery stripped_query; + AstStorage ast_storage; + Query *query; + std::vector required_privileges; + bool is_cacheable{true}; +}; + +ParsedQuery ParseQuery(const std::string &query_string, const std::map ¶ms, + utils::SkipList *cache, utils::SpinLock *antlr_lock); + +class SingleNodeLogicalPlan final : public LogicalPlan { + public: + SingleNodeLogicalPlan(std::unique_ptr root, double cost, AstStorage storage, + const SymbolTable &symbol_table) + : root_(std::move(root)), cost_(cost), storage_(std::move(storage)), symbol_table_(symbol_table) {} + + const plan::LogicalOperator &GetRoot() const override { return *root_; } + double GetCost() const override { return cost_; } + const SymbolTable &GetSymbolTable() const override { return symbol_table_; } + const AstStorage &GetAstStorage() const override { return storage_; } + + private: + std::unique_ptr root_; + double cost_; + AstStorage storage_; + SymbolTable symbol_table_; +}; + +/** + * Convert a parsed *Cypher* query's AST into a logical plan. + * + * The created logical plan will take ownership of the `AstStorage` within + * `ParsedQuery` and might modify it during planning. + */ +std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, + DbAccessor *db_accessor); + +/** + * Return the parsed *Cypher* query's AST cached logical plan, or create and + * cache a fresh one if it doesn't yet exist. + */ +std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, + const Parameters ¶meters, utils::SkipList *plan_cache, + DbAccessor *db_accessor, bool is_cacheable = true); + +} // namespace query diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 91a070507..2c5ff75ed 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -31,10 +31,6 @@ #include "utils/string.hpp" #include "utils/tsc.hpp" -DEFINE_HIDDEN_bool(query_cost_planner, true, "Use the cost-estimating query planner."); -DEFINE_VALIDATED_int32(query_plan_cache_ttl, 60, "Time to live for cached query plans, in seconds.", - FLAG_IN_RANGE(0, std::numeric_limits::max())); - namespace EventCounter { extern Event ReadQuery; extern Event WriteQuery; @@ -62,135 +58,6 @@ void UpdateTypeCount(const plan::ReadWriteTypeChecker::RWType type) { break; } } -} // namespace - -/** - * A container for data related to the parsing of a query. - */ -struct ParsedQuery { - std::string query_string; - std::map user_parameters; - Parameters parameters; - frontend::StrippedQuery stripped_query; - AstStorage ast_storage; - Query *query; - std::vector required_privileges; - bool is_cacheable{true}; -}; - -ParsedQuery ParseQuery(const std::string &query_string, const std::map ¶ms, - utils::SkipList *cache, utils::SpinLock *antlr_lock) { - // Strip the query for caching purposes. The process of stripping a query - // "normalizes" it by replacing any literals with new parameters. This - // results in just the *structure* of the query being taken into account for - // caching. - frontend::StrippedQuery stripped_query{query_string}; - - // Copy over the parameters that were introduced during stripping. - Parameters parameters{stripped_query.literals()}; - - // Check that all user-specified parameters are provided. - for (const auto ¶m_pair : stripped_query.parameters()) { - auto it = params.find(param_pair.second); - - if (it == params.end()) { - throw query::UnprovidedParameterError("Parameter ${} not provided.", param_pair.second); - } - - parameters.Add(param_pair.first, it->second); - } - - // Cache the query's AST if it isn't already. - auto hash = stripped_query.hash(); - auto accessor = cache->access(); - auto it = accessor.find(hash); - std::unique_ptr parser; - - // Return a copy of both the AST storage and the query. - CachedQuery result; - bool is_cacheable = true; - - auto get_information_from_cache = [&](const auto &cached_query) { - result.ast_storage.properties_ = cached_query.ast_storage.properties_; - result.ast_storage.labels_ = cached_query.ast_storage.labels_; - result.ast_storage.edge_types_ = cached_query.ast_storage.edge_types_; - - result.query = cached_query.query->Clone(&result.ast_storage); - result.required_privileges = cached_query.required_privileges; - }; - - if (it == accessor.end()) { - { - std::unique_lock guard(*antlr_lock); - - try { - parser = std::make_unique(stripped_query.query()); - } catch (const SyntaxException &e) { - // There is a syntax exception in the stripped query. Re-run the parser - // on the original query to get an appropriate error messsage. - parser = std::make_unique(query_string); - - // If an exception was not thrown here, the stripper messed something - // up. - LOG_FATAL("The stripped query can't be parsed, but the original can."); - } - } - - // Convert the ANTLR4 parse tree into an AST. - AstStorage ast_storage; - frontend::ParsingContext context{true}; - frontend::CypherMainVisitor visitor(context, &ast_storage); - - visitor.visit(parser->tree()); - - if (visitor.IsCacheable()) { - CachedQuery cached_query{std::move(ast_storage), visitor.query(), query::GetRequiredPrivileges(visitor.query())}; - it = accessor.insert({hash, std::move(cached_query)}).first; - - get_information_from_cache(it->second); - } else { - result.ast_storage.properties_ = ast_storage.properties_; - result.ast_storage.labels_ = ast_storage.labels_; - result.ast_storage.edge_types_ = ast_storage.edge_types_; - - result.query = visitor.query()->Clone(&result.ast_storage); - result.required_privileges = query::GetRequiredPrivileges(visitor.query()); - - is_cacheable = false; - } - } else { - get_information_from_cache(it->second); - } - - return ParsedQuery{query_string, - params, - std::move(parameters), - std::move(stripped_query), - std::move(result.ast_storage), - result.query, - std::move(result.required_privileges), - is_cacheable}; -} - -class SingleNodeLogicalPlan final : public LogicalPlan { - public: - SingleNodeLogicalPlan(std::unique_ptr root, double cost, AstStorage storage, - const SymbolTable &symbol_table) - : root_(std::move(root)), cost_(cost), storage_(std::move(storage)), symbol_table_(symbol_table) {} - - const plan::LogicalOperator &GetRoot() const override { return *root_; } - double GetCost() const override { return cost_; } - const SymbolTable &GetSymbolTable() const override { return symbol_table_; } - const AstStorage &GetAstStorage() const override { return storage_; } - - private: - std::unique_ptr root_; - double cost_; - AstStorage storage_; - SymbolTable symbol_table_; -}; - -CachedPlan::CachedPlan(std::unique_ptr plan) : plan_(std::move(plan)) {} struct Callback { std::vector header; @@ -575,11 +442,6 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler * } } -Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_context_(interpreter_context) { - MG_ASSERT(interpreter_context_, "Interpreter context must not be NULL"); -} - -namespace { // Struct for lazy pulling from a vector struct PullPlanVector { explicit PullPlanVector(std::vector> values) : values_(std::move(values)) {} @@ -730,52 +592,13 @@ std::optional PullPlan::Pull(AnyStream *stream, std::optional< return ctx_; } +using RWType = plan::ReadWriteTypeChecker::RWType; } // namespace -/** - * Convert a parsed *Cypher* query's AST into a logical plan. - * - * The created logical plan will take ownership of the `AstStorage` within - * `ParsedQuery` and might modify it during planning. - */ -std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, - DbAccessor *db_accessor) { - auto vertex_counts = plan::MakeVertexCountCache(db_accessor); - auto symbol_table = MakeSymbolTable(query); - auto planning_context = plan::MakePlanningContext(&ast_storage, &symbol_table, query, &vertex_counts); - std::unique_ptr root; - double cost; - std::tie(root, cost) = plan::MakeLogicalPlan(&planning_context, parameters, FLAGS_query_cost_planner); - return std::make_unique(std::move(root), cost, std::move(ast_storage), - std::move(symbol_table)); +Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_context_(interpreter_context) { + MG_ASSERT(interpreter_context_, "Interpreter context must not be NULL"); } -/** - * Return the parsed *Cypher* query's AST cached logical plan, or create and - * cache a fresh one if it doesn't yet exist. - */ -std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, - const Parameters ¶meters, utils::SkipList *plan_cache, - DbAccessor *db_accessor, const bool is_cacheable = true) { - auto plan_cache_access = plan_cache->access(); - auto it = plan_cache_access.find(hash); - if (it != plan_cache_access.end()) { - if (it->second->IsExpired()) { - plan_cache_access.remove(hash); - } else { - return it->second; - } - } - - auto plan = std::make_shared(MakeLogicalPlan(std::move(ast_storage), (query), parameters, db_accessor)); - if (is_cacheable) { - plan_cache_access.insert({hash, plan}); - } - return plan; -} - -using RWType = plan::ReadWriteTypeChecker::RWType; - PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) { std::function handler; @@ -1585,6 +1408,7 @@ void Interpreter::Commit() { // We should document clearly that all results should be pulled to complete // a query. if (!db_accessor_) return; + auto maybe_constraint_violation = db_accessor_->Commit(); if (maybe_constraint_violation.HasError()) { const auto &constraint_violation = maybe_constraint_violation.GetError(); @@ -1613,6 +1437,15 @@ void Interpreter::Commit() { } } } + + // Run the triggers + for (const auto &trigger : interpreter_context_->triggers.access()) { + utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + trigger.Execute(&interpreter_context_->plan_cache, &*execution_db_accessor_, &execution_memory, + *interpreter_context_->tsc_frequency, interpreter_context_->execution_timeout_sec, + &interpreter_context_->is_shutting_down); + } + execution_db_accessor_ = std::nullopt; db_accessor_ = std::nullopt; } diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 171691e03..a3ed8e323 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -3,6 +3,7 @@ #include #include "query/context.hpp" +#include "query/cypher_query_interpreter.hpp" #include "query/db_accessor.hpp" #include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" @@ -12,6 +13,7 @@ #include "query/plan/operator.hpp" #include "query/plan/read_write_type_checker.hpp" #include "query/stream.hpp" +#include "query/trigger.hpp" #include "query/typed_value.hpp" #include "utils/event_counter.hpp" #include "utils/logging.hpp" @@ -21,9 +23,6 @@ #include "utils/timer.hpp" #include "utils/tsc.hpp" -DECLARE_bool(query_cost_planner); -DECLARE_int32(query_plan_cache_ttl); - namespace EventCounter { extern const Event FailedQuery; } // namespace EventCounter @@ -139,64 +138,6 @@ struct PreparedQuery { plan::ReadWriteTypeChecker::RWType rw_type; }; -// TODO: Maybe this should move to query/plan/planner. -/// Interface for accessing the root operator of a logical plan. -class LogicalPlan { - public: - virtual ~LogicalPlan() {} - - virtual const plan::LogicalOperator &GetRoot() const = 0; - virtual double GetCost() const = 0; - virtual const SymbolTable &GetSymbolTable() const = 0; - virtual const AstStorage &GetAstStorage() const = 0; -}; - -class CachedPlan { - public: - explicit CachedPlan(std::unique_ptr plan); - - const auto &plan() const { return plan_->GetRoot(); } - double cost() const { return plan_->GetCost(); } - const auto &symbol_table() const { return plan_->GetSymbolTable(); } - const auto &ast_storage() const { return plan_->GetAstStorage(); } - - bool IsExpired() const { return cache_timer_.Elapsed() > std::chrono::seconds(FLAGS_query_plan_cache_ttl); }; - - private: - std::unique_ptr plan_; - utils::Timer cache_timer_; -}; - -struct CachedQuery { - AstStorage ast_storage; - Query *query; - std::vector required_privileges; -}; - -struct QueryCacheEntry { - bool operator==(const QueryCacheEntry &other) const { return first == other.first; } - bool operator<(const QueryCacheEntry &other) const { return first < other.first; } - bool operator==(const uint64_t &other) const { return first == other; } - bool operator<(const uint64_t &other) const { return first < other; } - - uint64_t first; - // TODO: Maybe store the query string here and use it as a key with the hash - // so that we eliminate the risk of hash collisions. - CachedQuery second; -}; - -struct PlanCacheEntry { - bool operator==(const PlanCacheEntry &other) const { return first == other.first; } - bool operator<(const PlanCacheEntry &other) const { return first < other.first; } - bool operator==(const uint64_t &other) const { return first == other; } - bool operator<(const uint64_t &other) const { return first < other; } - - uint64_t first; - // TODO: Maybe store the query string here and use it as a key with the hash - // so that we eliminate the risk of hash collisions. - std::shared_ptr second; -}; - /** * Holds data shared between multiple `Interpreter` instances (which might be * running concurrently). @@ -205,7 +146,10 @@ struct PlanCacheEntry { * been passed to an `Interpreter` instance. */ struct InterpreterContext { - explicit InterpreterContext(storage::Storage *db) : db(db) {} + explicit InterpreterContext(storage::Storage *db) : db(db) { + // auto triggers_acc = triggers.access(); + // triggers_acc.insert(Trigger{"Creator", "CREATE (:CREATED)", &ast_cache, &antlr_lock}); + } storage::Storage *db; @@ -225,6 +169,9 @@ struct InterpreterContext { utils::SkipList ast_cache; utils::SkipList plan_cache; + + // use a thread safe container + utils::SkipList triggers; }; /// Function that is used to tell all active interpreters that they should stop diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp new file mode 100644 index 000000000..03b86ff0f --- /dev/null +++ b/src/query/trigger.cpp @@ -0,0 +1,63 @@ +#include "query/trigger.hpp" +#include "query/context.hpp" +#include "query/db_accessor.hpp" +#include "query/frontend/ast/ast.hpp" +#include "query/interpret/frame.hpp" +#include "utils/memory.hpp" + +namespace query { +Trigger::Trigger(std::string name, std::string query, utils::SkipList *cache, + utils::SpinLock *antlr_lock) + : name_(std::move(name)), + parsed_statements_{ParseQuery(query, {} /* this should contain the predefined parameters */, cache, antlr_lock)} { +} + +void Trigger::Execute(utils::SkipList *plan_cache, DbAccessor *dba, + utils::MonotonicBufferResource *execution_memory, const double tsc_frequency, + const double max_execution_time_sec, std::atomic *is_shutting_down) const { + AstStorage ast_storage; + ast_storage.properties_ = parsed_statements_.ast_storage.properties_; + ast_storage.labels_ = parsed_statements_.ast_storage.labels_; + ast_storage.edge_types_ = parsed_statements_.ast_storage.edge_types_; + + auto plan = CypherQueryToPlan(parsed_statements_.stripped_query.hash(), std::move(ast_storage), + utils::Downcast(parsed_statements_.query), parsed_statements_.parameters, + plan_cache, dba, parsed_statements_.is_cacheable); + ExecutionContext ctx; + ctx.db_accessor = dba; + ctx.symbol_table = plan->symbol_table(); + ctx.evaluation_context.timestamp = + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) + .count(); + ctx.evaluation_context.parameters = parsed_statements_.parameters; + ctx.evaluation_context.properties = NamesToProperties(plan->ast_storage().properties_, dba); + ctx.evaluation_context.labels = NamesToLabels(plan->ast_storage().labels_, dba); + ctx.execution_tsc_timer = utils::TSCTimer(tsc_frequency); + ctx.max_execution_time_sec = max_execution_time_sec; + ctx.is_shutting_down = is_shutting_down; + ctx.is_profile_query = false; + + // Set up temporary memory for a single Pull. Initial memory comes from the + // stack. 256 KiB should fit on the stack and should be more than enough for a + // single `Pull`. + constexpr size_t stack_size = 256 * 1024; + char stack_data[stack_size]; + + // We can throw on every query because a simple queries for deleting will use only + // the stack allocated buffer. + // Also, we want to throw only when the query engine requests more memory and not the storage + // so we add the exception to the allocator. + utils::ResourceWithOutOfMemoryException resource_with_exception; + utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); + // TODO (mferencevic): Tune the parameters accordingly. + utils::PoolResource pool_memory(128, 1024, &monotonic_memory); + ctx.evaluation_context.memory = &pool_memory; + + auto cursor = plan->plan().MakeCursor(execution_memory); + Frame frame{plan->symbol_table().max_position(), execution_memory}; + while (cursor->Pull(frame, ctx)) + ; + + cursor->Shutdown(); +} +} // namespace query diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp new file mode 100644 index 000000000..8e15b38cb --- /dev/null +++ b/src/query/trigger.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include "query/cypher_query_interpreter.hpp" +#include "query/frontend/ast/ast.hpp" + +namespace query { +struct Trigger { + explicit Trigger(std::string name, std::string query, utils::SkipList *cache, + utils::SpinLock *antlr_lock); + + void Execute(utils::SkipList *plan_cache, DbAccessor *dba, + utils::MonotonicBufferResource *execution_memory, double tsc_frequency, double max_execution_time_sec, + std::atomic *is_shutting_down) const; + + bool operator==(const Trigger &other) const { return name_ == other.name_; } + // NOLINTNEXTLINE (modernize-use-nullptr) + bool operator<(const Trigger &other) const { return name_ < other.name_; } + bool operator==(const std::string &other) const { return name_ == other; } + // NOLINTNEXTLINE (modernize-use-nullptr) + bool operator<(const std::string &other) const { return name_ < other; } + + private: + std::string name_; + ParsedQuery parsed_statements_; +}; +} // namespace query From 7bf40eb5d2a7ed1df7aac39941964ce3998b45ce Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 20 Apr 2021 11:09:35 +0200 Subject: [PATCH 41/63] Add support for after commit triggers (#136) --- src/query/interpreter.cpp | 73 ++++++++++++++++++++++++++++++++++----- src/query/interpreter.hpp | 16 +++++++-- src/query/trigger.hpp | 2 ++ 3 files changed, 80 insertions(+), 11 deletions(-) diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 2c5ff75ed..5aeaf0cf4 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -1401,6 +1401,54 @@ void Interpreter::Abort() { db_accessor_ = std::nullopt; } +namespace { +void RunTriggersIndividually(const utils::SkipList &triggers, InterpreterContext *interpreter_context) { + // Run the triggers + for (const auto &trigger : triggers.access()) { + spdlog::debug("Executing trigger '{}'", trigger.name()); + utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + + // create a new transaction for each trigger + auto storage_acc = interpreter_context->db->Access(); + DbAccessor db_accessor{&storage_acc}; + + try { + trigger.Execute(&interpreter_context->plan_cache, &db_accessor, &execution_memory, + *interpreter_context->tsc_frequency, interpreter_context->execution_timeout_sec, + &interpreter_context->is_shutting_down); + } catch (const utils::BasicException &exception) { + spdlog::warn("Trigger {} failed with exception:\n{}", trigger.name(), exception.what()); + db_accessor.Abort(); + continue; + } + + auto maybe_constraint_violation = db_accessor.Commit(); + if (maybe_constraint_violation.HasError()) { + const auto &constraint_violation = maybe_constraint_violation.GetError(); + switch (constraint_violation.type) { + case storage::ConstraintViolation::Type::EXISTENCE: { + const auto &label_name = db_accessor.LabelToName(constraint_violation.label); + MG_ASSERT(constraint_violation.properties.size() == 1U); + const auto &property_name = db_accessor.PropertyToName(*constraint_violation.properties.begin()); + spdlog::warn("Trigger '{}' failed to commit due to existence constraint violation on :{}({})", trigger.name(), + label_name, property_name); + break; + } + case storage::ConstraintViolation::Type::UNIQUE: { + const auto &label_name = db_accessor.LabelToName(constraint_violation.label); + std::stringstream property_names_stream; + utils::PrintIterable(property_names_stream, constraint_violation.properties, ", ", + [&](auto &stream, const auto &prop) { stream << db_accessor.PropertyToName(prop); }); + spdlog::warn("Trigger '{}' failed to commit due to unique constraint violation on :{}({})", trigger.name(), + label_name, property_names_stream.str()); + break; + } + } + } + } +} +} // namespace + void Interpreter::Commit() { // It's possible that some queries did not finish because the user did // not pull all of the results from the query. @@ -1409,6 +1457,16 @@ void Interpreter::Commit() { // a query. if (!db_accessor_) return; + // Run the triggers + for (const auto &trigger : interpreter_context_->before_commit_triggers.access()) { + spdlog::debug("Executing trigger '{}'", trigger.name()); + utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + trigger.Execute(&interpreter_context_->plan_cache, &*execution_db_accessor_, &execution_memory, + *interpreter_context_->tsc_frequency, interpreter_context_->execution_timeout_sec, + &interpreter_context_->is_shutting_down); + } + SPDLOG_DEBUG("Finished executing before commit triggers"); + auto maybe_constraint_violation = db_accessor_->Commit(); if (maybe_constraint_violation.HasError()) { const auto &constraint_violation = maybe_constraint_violation.GetError(); @@ -1438,16 +1496,15 @@ void Interpreter::Commit() { } } - // Run the triggers - for (const auto &trigger : interpreter_context_->triggers.access()) { - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; - trigger.Execute(&interpreter_context_->plan_cache, &*execution_db_accessor_, &execution_memory, - *interpreter_context_->tsc_frequency, interpreter_context_->execution_timeout_sec, - &interpreter_context_->is_shutting_down); - } - execution_db_accessor_ = std::nullopt; db_accessor_ = std::nullopt; + + background_thread_.AddTask([interpreter_context = this->interpreter_context_] { + RunTriggersIndividually(interpreter_context->after_commit_triggers, interpreter_context); + SPDLOG_DEBUG("Finished executing after commit triggers"); // NOLINT(bugprone-lambda-function-name) + }); + + SPDLOG_DEBUG("Finished comitting the transaction"); } void Interpreter::AdvanceCommand() { diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index a3ed8e323..8200201d7 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -20,6 +20,7 @@ #include "utils/memory.hpp" #include "utils/skip_list.hpp" #include "utils/spin_lock.hpp" +#include "utils/thread_pool.hpp" #include "utils/timer.hpp" #include "utils/tsc.hpp" @@ -147,8 +148,14 @@ struct PreparedQuery { */ struct InterpreterContext { explicit InterpreterContext(storage::Storage *db) : db(db) { - // auto triggers_acc = triggers.access(); - // triggers_acc.insert(Trigger{"Creator", "CREATE (:CREATED)", &ast_cache, &antlr_lock}); + // { + // auto triggers_acc = before_commit_triggers.access(); + // triggers_acc.insert(Trigger{"BeforeCreator", "CREATE (:BEFORE)", &ast_cache, &antlr_lock}); + // } + // { + // auto triggers_acc = after_commit_triggers.access(); + // triggers_acc.insert(Trigger{"AfterCreator", "CREATE (:AFTER)", &ast_cache, &antlr_lock}); + // } } storage::Storage *db; @@ -171,7 +178,8 @@ struct InterpreterContext { utils::SkipList plan_cache; // use a thread safe container - utils::SkipList triggers; + utils::SkipList before_commit_triggers; + utils::SkipList after_commit_triggers; }; /// Function that is used to tell all active interpreters that they should stop @@ -304,6 +312,8 @@ class Interpreter final { bool in_explicit_transaction_{false}; bool expect_rollback_{false}; + utils::ThreadPool background_thread_{1}; + PreparedQuery PrepareTransactionQuery(std::string_view query_upper); void Commit(); void AdvanceCommand(); diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index 8e15b38cb..db59b3172 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -19,6 +19,8 @@ struct Trigger { // NOLINTNEXTLINE (modernize-use-nullptr) bool operator<(const std::string &other) const { return name_ < other; } + const auto &name() const { return name_; } + private: std::string name_; ParsedQuery parsed_statements_; From 2f3fa656d9fa252fb38257920c0b57748298102b Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Fri, 23 Apr 2021 14:19:42 +0200 Subject: [PATCH 42/63] Predefined variables (#138) * Add support for injecting predefined variables in any scope --- src/query/context.hpp | 4 + src/query/cypher_query_interpreter.cpp | 13 ++- src/query/cypher_query_interpreter.hpp | 16 ++-- src/query/db_accessor.hpp | 2 + src/query/frontend/ast/ast.lcp | 4 +- .../frontend/semantic/symbol_generator.cpp | 42 +++++++-- .../frontend/semantic/symbol_generator.hpp | 15 +++- src/query/interpreter.cpp | 87 ++++++++++++------- src/query/interpreter.hpp | 34 ++++++-- src/query/plan/operator.cpp | 5 +- src/query/trigger.cpp | 80 ++++++++++++++--- src/query/trigger.hpp | 32 ++++++- src/storage/v2/storage.cpp | 25 ++++-- src/storage/v2/storage.hpp | 3 + tests/unit/query_common.hpp | 2 +- tests/unit/query_semantic.cpp | 51 +++++++++++ 16 files changed, 323 insertions(+), 92 deletions(-) diff --git a/src/query/context.hpp b/src/query/context.hpp index d937769da..286e5adf5 100644 --- a/src/query/context.hpp +++ b/src/query/context.hpp @@ -4,6 +4,7 @@ #include "query/frontend/semantic/symbol_table.hpp" #include "query/parameters.hpp" #include "query/plan/profile.hpp" +#include "query/trigger.hpp" #include "utils/tsc.hpp" namespace query { @@ -56,6 +57,9 @@ struct ExecutionContext { std::chrono::duration profile_execution_time; plan::ProfilingStats stats; plan::ProfilingStats *stats_root{nullptr}; + + // trigger context + TriggerContext *trigger_context{nullptr}; }; inline bool MustAbort(const ExecutionContext &context) { diff --git a/src/query/cypher_query_interpreter.cpp b/src/query/cypher_query_interpreter.cpp index d721f2c09..b86219364 100644 --- a/src/query/cypher_query_interpreter.cpp +++ b/src/query/cypher_query_interpreter.cpp @@ -103,19 +103,23 @@ ParsedQuery ParseQuery(const std::string &query_string, const std::map MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, - DbAccessor *db_accessor) { + DbAccessor *db_accessor, + const std::vector &predefined_identifiers) { auto vertex_counts = plan::MakeVertexCountCache(db_accessor); - auto symbol_table = MakeSymbolTable(query); + auto symbol_table = MakeSymbolTable(query, predefined_identifiers); auto planning_context = plan::MakePlanningContext(&ast_storage, &symbol_table, query, &vertex_counts); auto [root, cost] = plan::MakeLogicalPlan(&planning_context, parameters, FLAGS_query_cost_planner); return std::make_unique(std::move(root), cost, std::move(ast_storage), std::move(symbol_table)); } +} // namespace std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, utils::SkipList *plan_cache, - DbAccessor *db_accessor, const bool is_cacheable) { + DbAccessor *db_accessor, const bool is_cacheable, + const std::vector &predefined_identifiers) { auto plan_cache_access = plan_cache->access(); auto it = plan_cache_access.find(hash); if (it != plan_cache_access.end()) { @@ -126,7 +130,8 @@ std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_stor } } - auto plan = std::make_shared(MakeLogicalPlan(std::move(ast_storage), (query), parameters, db_accessor)); + auto plan = std::make_shared( + MakeLogicalPlan(std::move(ast_storage), query, parameters, db_accessor, predefined_identifiers)); if (is_cacheable) { plan_cache_access.insert({hash, plan}); } diff --git a/src/query/cypher_query_interpreter.hpp b/src/query/cypher_query_interpreter.hpp index eaf688521..5170cd93c 100644 --- a/src/query/cypher_query_interpreter.hpp +++ b/src/query/cypher_query_interpreter.hpp @@ -129,21 +129,17 @@ class SingleNodeLogicalPlan final : public LogicalPlan { SymbolTable symbol_table_; }; -/** - * Convert a parsed *Cypher* query's AST into a logical plan. - * - * The created logical plan will take ownership of the `AstStorage` within - * `ParsedQuery` and might modify it during planning. - */ -std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, - DbAccessor *db_accessor); - /** * Return the parsed *Cypher* query's AST cached logical plan, or create and * cache a fresh one if it doesn't yet exist. + * @param predefined_identifiers optional identifiers you want to inject into a query. + * If an identifier is not defined in a scope, we check the predefined identifiers. + * If an identifier is contained there, we inject it at that place and remove it, + * because a predefined identifier can be used only in one scope. */ std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, utils::SkipList *plan_cache, - DbAccessor *db_accessor, bool is_cacheable = true); + DbAccessor *db_accessor, bool is_cacheable = true, + const std::vector &predefined_identifiers = {}); } // namespace query diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index 0c7f24977..a3ec2b019 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -208,6 +208,8 @@ class DbAccessor final { return std::nullopt; } + void FinalizeTransaction() { accessor_->FinalizeTransaction(); } + VerticesIterable Vertices(storage::View view) { return VerticesIterable(accessor_->Vertices(view)); } VerticesIterable Vertices(storage::View view, storage::LabelId label) { diff --git a/src/query/frontend/ast/ast.lcp b/src/query/frontend/ast/ast.lcp index ee4fdcebd..4f9fdcf4c 100644 --- a/src/query/frontend/ast/ast.lcp +++ b/src/query/frontend/ast/ast.lcp @@ -686,9 +686,7 @@ cpp<# symbol_pos_ = symbol.position(); return this; } - cpp<#) - (:protected - #>cpp + explicit Identifier(const std::string &name) : name_(name) {} Identifier(const std::string &name, bool user_declared) : name_(name), user_declared_(user_declared) {} diff --git a/src/query/frontend/semantic/symbol_generator.cpp b/src/query/frontend/semantic/symbol_generator.cpp index ea6a0f8b9..96aff3646 100644 --- a/src/query/frontend/semantic/symbol_generator.cpp +++ b/src/query/frontend/semantic/symbol_generator.cpp @@ -12,8 +12,23 @@ namespace query { +namespace { +std::unordered_map GeneratePredefinedIdentifierMap( + const std::vector &predefined_identifiers) { + std::unordered_map identifier_map; + for (const auto &identifier : predefined_identifiers) { + identifier_map.emplace(identifier->name_, identifier); + } + + return identifier_map; +} +} // namespace + +SymbolGenerator::SymbolGenerator(SymbolTable *symbol_table, const std::vector &predefined_identifiers) + : symbol_table_(symbol_table), predefined_identifiers_{GeneratePredefinedIdentifierMap(predefined_identifiers)} {} + auto SymbolGenerator::CreateSymbol(const std::string &name, bool user_declared, Symbol::Type type, int token_position) { - auto symbol = symbol_table_.CreateSymbol(name, user_declared, type, token_position); + auto symbol = symbol_table_->CreateSymbol(name, user_declared, type, token_position); scope_.symbols[name] = symbol; return symbol; } @@ -227,7 +242,8 @@ bool SymbolGenerator::PostVisit(Match &) { // Check variables in property maps after visiting Match, so that they can // reference symbols out of bind order. for (auto &ident : scope_.identifiers_in_match) { - if (!HasSymbol(ident->name_)) throw UnboundVariableError(ident->name_); + if (!HasSymbol(ident->name_) && !ConsumePredefinedIdentifier(ident->name_)) + throw UnboundVariableError(ident->name_); ident->MapTo(scope_.symbols[ident->name_]); } scope_.identifiers_in_match.clear(); @@ -277,7 +293,7 @@ SymbolGenerator::ReturnType SymbolGenerator::Visit(Identifier &ident) { scope_.identifiers_in_match.emplace_back(&ident); } else { // Everything else references a bound symbol. - if (!HasSymbol(ident.name_)) throw UnboundVariableError(ident.name_); + if (!HasSymbol(ident.name_) && !ConsumePredefinedIdentifier(ident.name_)) throw UnboundVariableError(ident.name_); symbol = scope_.symbols[ident.name_]; } ident.MapTo(symbol); @@ -448,10 +464,10 @@ bool SymbolGenerator::PreVisit(EdgeAtom &edge_atom) { // Create inner symbols, but don't bind them in scope, since they are to // be used in the missing filter expression. auto *inner_edge = edge_atom.filter_lambda_.inner_edge; - inner_edge->MapTo(symbol_table_.CreateSymbol(inner_edge->name_, inner_edge->user_declared_, Symbol::Type::EDGE)); + inner_edge->MapTo(symbol_table_->CreateSymbol(inner_edge->name_, inner_edge->user_declared_, Symbol::Type::EDGE)); auto *inner_node = edge_atom.filter_lambda_.inner_node; inner_node->MapTo( - symbol_table_.CreateSymbol(inner_node->name_, inner_node->user_declared_, Symbol::Type::VERTEX)); + symbol_table_->CreateSymbol(inner_node->name_, inner_node->user_declared_, Symbol::Type::VERTEX)); } if (edge_atom.weight_lambda_.expression) { VisitWithIdentifiers(edge_atom.weight_lambda_.expression, @@ -506,4 +522,20 @@ void SymbolGenerator::VisitWithIdentifiers(Expression *expr, const std::vectorsecond; + MG_ASSERT(!identifier->user_declared_, "Predefined symbols cannot be user declared!"); + identifier->MapTo(CreateSymbol(identifier->name_, identifier->user_declared_)); + predefined_identifiers_.erase(it); + return true; +} + } // namespace query diff --git a/src/query/frontend/semantic/symbol_generator.hpp b/src/query/frontend/semantic/symbol_generator.hpp index 0103c7f90..0da1082ec 100644 --- a/src/query/frontend/semantic/symbol_generator.hpp +++ b/src/query/frontend/semantic/symbol_generator.hpp @@ -17,7 +17,7 @@ namespace query { /// variable types. class SymbolGenerator : public HierarchicalTreeVisitor { public: - explicit SymbolGenerator(SymbolTable &symbol_table) : symbol_table_(symbol_table) {} + explicit SymbolGenerator(SymbolTable *symbol_table, const std::vector &predefined_identifiers); using HierarchicalTreeVisitor::PostVisit; using HierarchicalTreeVisitor::PreVisit; @@ -116,6 +116,9 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool HasSymbol(const std::string &name); + // @return true if it added a predefined identifier with that name + bool ConsumePredefinedIdentifier(const std::string &name); + // Returns a freshly generated symbol. Previous mapping of the same name to a // different symbol is replaced with the new one. auto CreateSymbol(const std::string &name, bool user_declared, Symbol::Type type = Symbol::Type::ANY, @@ -129,15 +132,19 @@ class SymbolGenerator : public HierarchicalTreeVisitor { void VisitWithIdentifiers(Expression *, const std::vector &); - SymbolTable &symbol_table_; + SymbolTable *symbol_table_; + + // Identifiers which are injected from outside the query. Each identifier + // is mapped by its name. + std::unordered_map predefined_identifiers_; Scope scope_; std::unordered_set prev_return_names_; std::unordered_set curr_return_names_; }; -inline SymbolTable MakeSymbolTable(CypherQuery *query) { +inline SymbolTable MakeSymbolTable(CypherQuery *query, const std::vector &predefined_identifiers = {}) { SymbolTable symbol_table; - SymbolGenerator symbol_generator(symbol_table); + SymbolGenerator symbol_generator(&symbol_table, predefined_identifiers); query->single_query_->Accept(symbol_generator); for (auto *cypher_union : query->cypher_unions_) { cypher_union->Accept(symbol_generator); diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 5aeaf0cf4..00c9c8520 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -467,7 +467,7 @@ struct PullPlanVector { struct PullPlan { explicit PullPlan(std::shared_ptr plan, const Parameters ¶meters, bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, - std::optional memory_limit = {}); + TriggerContext *trigger_context = nullptr, std::optional memory_limit = {}); std::optional Pull(AnyStream *stream, std::optional n, const std::vector &output_symbols, std::map *summary); @@ -495,7 +495,7 @@ struct PullPlan { PullPlan::PullPlan(const std::shared_ptr plan, const Parameters ¶meters, const bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, - const std::optional memory_limit) + TriggerContext *trigger_context, const std::optional memory_limit) : plan_(plan), cursor_(plan->plan().MakeCursor(execution_memory)), frame_(plan->symbol_table().max_position(), execution_memory), @@ -512,6 +512,7 @@ PullPlan::PullPlan(const std::shared_ptr plan, const Parameters &par ctx_.max_execution_time_sec = interpreter_context->execution_timeout_sec; ctx_.is_shutting_down = &interpreter_context->is_shutting_down; ctx_.is_profile_query = is_profile_query; + ctx_.trigger_context = trigger_context; } std::optional PullPlan::Pull(AnyStream *stream, std::optional n, @@ -589,7 +590,7 @@ std::optional PullPlan::Pull(AnyStream *stream, std::optional< summary->insert_or_assign("plan_execution_time", execution_time_.count()); cursor_->Shutdown(); ctx_.profile_execution_time = execution_time_; - return ctx_; + return std::move(ctx_); } using RWType = plan::ReadWriteTypeChecker::RWType; @@ -610,8 +611,8 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) in_explicit_transaction_ = true; expect_rollback_ = false; - db_accessor_.emplace(interpreter_context_->db->Access()); - execution_db_accessor_.emplace(&*db_accessor_); + db_accessor_ = std::make_unique(interpreter_context_->db->Access()); + execution_db_accessor_.emplace(db_accessor_.get()); }; } else if (query_upper == "COMMIT") { handler = [this] { @@ -658,7 +659,7 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, DbAccessor *dba, - utils::MemoryResource *execution_memory) { + utils::MemoryResource *execution_memory, TriggerContext *trigger_context = nullptr) { auto *cypher_query = utils::Downcast(parsed_query.query); Frame frame(0); @@ -695,7 +696,7 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map(plan, parsed_query.parameters, false, dba, interpreter_context, - execution_memory, memory_limit); + execution_memory, trigger_context, memory_limit); return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges), [pull_plan = std::move(pull_plan), output_symbols = std::move(output_symbols), summary]( AnyStream *stream, std::optional n) -> std::optional { @@ -820,7 +821,7 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra AnyStream *stream, std::optional n) mutable -> std::optional { // No output symbols are given so that nothing is streamed. if (!ctx) { - ctx = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory, memory_limit) + ctx = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory, nullptr, memory_limit) .Pull(stream, {}, {}, summary); pull_plan = std::make_shared(ProfilingStatsToTable(ctx->stats, ctx->profile_execution_time)); } @@ -1322,16 +1323,22 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, if (!in_explicit_transaction_ && (utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query))) { - db_accessor_.emplace(interpreter_context_->db->Access()); - execution_db_accessor_.emplace(&*db_accessor_); + db_accessor_ = std::make_unique(interpreter_context_->db->Access()); + execution_db_accessor_.emplace(db_accessor_.get()); } utils::Timer planning_timer; PreparedQuery prepared_query; if (utils::Downcast(parsed_query.query)) { + if (interpreter_context_->before_commit_triggers.size() > 0 || + interpreter_context_->after_commit_triggers.size() > 0) { + trigger_context_.emplace(); + } + prepared_query = PrepareCypherQuery(std::move(parsed_query), &query_execution->summary, interpreter_context_, - &*execution_db_accessor_, &query_execution->execution_memory); + &*execution_db_accessor_, &query_execution->execution_memory, + trigger_context_ ? &*trigger_context_ : nullptr); } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareExplainQuery(std::move(parsed_query), &query_execution->summary, interpreter_context_, &*execution_db_accessor_, &query_execution->execution_memory); @@ -1398,11 +1405,13 @@ void Interpreter::Abort() { if (!db_accessor_) return; db_accessor_->Abort(); execution_db_accessor_ = std::nullopt; - db_accessor_ = std::nullopt; + db_accessor_.reset(); + trigger_context_.reset(); } namespace { -void RunTriggersIndividually(const utils::SkipList &triggers, InterpreterContext *interpreter_context) { +void RunTriggersIndividually(const utils::SkipList &triggers, InterpreterContext *interpreter_context, + TriggerContext trigger_context) { // Run the triggers for (const auto &trigger : triggers.access()) { spdlog::debug("Executing trigger '{}'", trigger.name()); @@ -1412,12 +1421,13 @@ void RunTriggersIndividually(const utils::SkipList &triggers, Interpret auto storage_acc = interpreter_context->db->Access(); DbAccessor db_accessor{&storage_acc}; + trigger_context.AdaptForAccessor(&db_accessor); try { trigger.Execute(&interpreter_context->plan_cache, &db_accessor, &execution_memory, *interpreter_context->tsc_frequency, interpreter_context->execution_timeout_sec, - &interpreter_context->is_shutting_down); + &interpreter_context->is_shutting_down, trigger_context); } catch (const utils::BasicException &exception) { - spdlog::warn("Trigger {} failed with exception:\n{}", trigger.name(), exception.what()); + spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.name(), exception.what()); db_accessor.Abort(); continue; } @@ -1457,15 +1467,17 @@ void Interpreter::Commit() { // a query. if (!db_accessor_) return; - // Run the triggers - for (const auto &trigger : interpreter_context_->before_commit_triggers.access()) { - spdlog::debug("Executing trigger '{}'", trigger.name()); - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; - trigger.Execute(&interpreter_context_->plan_cache, &*execution_db_accessor_, &execution_memory, - *interpreter_context_->tsc_frequency, interpreter_context_->execution_timeout_sec, - &interpreter_context_->is_shutting_down); + if (trigger_context_) { + // Run the triggers + for (const auto &trigger : interpreter_context_->before_commit_triggers.access()) { + spdlog::debug("Executing trigger '{}'", trigger.name()); + utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + trigger.Execute(&interpreter_context_->plan_cache, &*execution_db_accessor_, &execution_memory, + *interpreter_context_->tsc_frequency, interpreter_context_->execution_timeout_sec, + &interpreter_context_->is_shutting_down, *trigger_context_); + } + SPDLOG_DEBUG("Finished executing before commit triggers"); } - SPDLOG_DEBUG("Finished executing before commit triggers"); auto maybe_constraint_violation = db_accessor_->Commit(); if (maybe_constraint_violation.HasError()) { @@ -1475,8 +1487,9 @@ void Interpreter::Commit() { auto label_name = execution_db_accessor_->LabelToName(constraint_violation.label); MG_ASSERT(constraint_violation.properties.size() == 1U); auto property_name = execution_db_accessor_->PropertyToName(*constraint_violation.properties.begin()); - execution_db_accessor_ = std::nullopt; - db_accessor_ = std::nullopt; + execution_db_accessor_.reset(); + db_accessor_.reset(); + trigger_context_.reset(); throw QueryException("Unable to commit due to existence constraint violation on :{}({})", label_name, property_name); break; @@ -1487,8 +1500,9 @@ void Interpreter::Commit() { utils::PrintIterable( property_names_stream, constraint_violation.properties, ", ", [this](auto &stream, const auto &prop) { stream << execution_db_accessor_->PropertyToName(prop); }); - execution_db_accessor_ = std::nullopt; - db_accessor_ = std::nullopt; + execution_db_accessor_.reset(); + db_accessor_.reset(); + trigger_context_.reset(); throw QueryException("Unable to commit due to unique constraint violation on :{}({})", label_name, property_names_stream.str()); break; @@ -1496,13 +1510,20 @@ void Interpreter::Commit() { } } - execution_db_accessor_ = std::nullopt; - db_accessor_ = std::nullopt; + if (trigger_context_) { + background_thread_.AddTask([trigger_context = std::move(*trigger_context_), + interpreter_context = this->interpreter_context_, + user_transaction = std::shared_ptr(std::move(db_accessor_))]() mutable { + RunTriggersIndividually(interpreter_context->after_commit_triggers, interpreter_context, + std::move(trigger_context)); + user_transaction->FinalizeTransaction(); + SPDLOG_DEBUG("Finished executing after commit triggers"); // NOLINT(bugprone-lambda-function-name) + }); + } - background_thread_.AddTask([interpreter_context = this->interpreter_context_] { - RunTriggersIndividually(interpreter_context->after_commit_triggers, interpreter_context); - SPDLOG_DEBUG("Finished executing after commit triggers"); // NOLINT(bugprone-lambda-function-name) - }); + execution_db_accessor_.reset(); + db_accessor_.reset(); + trigger_context_.reset(); SPDLOG_DEBUG("Finished comitting the transaction"); } diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 8200201d7..0eff59c4d 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -148,14 +148,26 @@ struct PreparedQuery { */ struct InterpreterContext { explicit InterpreterContext(storage::Storage *db) : db(db) { - // { - // auto triggers_acc = before_commit_triggers.access(); - // triggers_acc.insert(Trigger{"BeforeCreator", "CREATE (:BEFORE)", &ast_cache, &antlr_lock}); - // } - // { - // auto triggers_acc = after_commit_triggers.access(); - // triggers_acc.insert(Trigger{"AfterCreator", "CREATE (:AFTER)", &ast_cache, &antlr_lock}); - // } + // try { + // { + // auto storage_acc = db->Access(); + // DbAccessor dba(&storage_acc); + // auto triggers_acc = before_commit_triggers.access(); + // triggers_acc.insert(Trigger{"BeforeCreator", "UNWIND createdVertices as u SET u.before = u.id + 1", + // &ast_cache, + // &plan_cache, &dba, &antlr_lock}); + // } + // { + // auto storage_acc = db->Access(); + // DbAccessor dba(&storage_acc); + // auto triggers_acc = after_commit_triggers.access(); + // triggers_acc.insert(Trigger{"AfterCreator", "UNWIND createdVertices as u SET u.after = u.id - 1", + // &ast_cache, + // &plan_cache, &dba, &antlr_lock}); + // } + // } catch (const utils::BasicException &e) { + // spdlog::critical("Failed to create a trigger because: {}", e.what()); + // } } storage::Storage *db; @@ -307,8 +319,12 @@ class Interpreter final { InterpreterContext *interpreter_context_; - std::optional db_accessor_; + // This cannot be std::optional because we need to move this accessor later on into a lambda capture + // which is assigned to std::function. std::function requires every object to be copyable, so we + // move this unique_ptr into a shrared_ptr. + std::unique_ptr db_accessor_; std::optional execution_db_accessor_; + std::optional trigger_context_; bool in_explicit_transaction_{false}; bool expect_rollback_{false}; diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 567cc04b4..7fe2c7327 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -206,7 +206,10 @@ bool CreateNode::CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) SCOPED_PROFILE_OP("CreateNode"); if (input_cursor_->Pull(frame, context)) { - CreateLocalVertex(self_.node_info_, &frame, context); + auto created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); + if (context.trigger_context) { + context.trigger_context->RegisterCreatedVertex(created_vertex); + } return true; } diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 03b86ff0f..547ec06cb 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -1,28 +1,80 @@ #include "query/trigger.hpp" #include "query/context.hpp" +#include "query/cypher_query_interpreter.hpp" #include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" #include "query/interpret/frame.hpp" #include "utils/memory.hpp" namespace query { -Trigger::Trigger(std::string name, std::string query, utils::SkipList *cache, - utils::SpinLock *antlr_lock) - : name_(std::move(name)), - parsed_statements_{ParseQuery(query, {} /* this should contain the predefined parameters */, cache, antlr_lock)} { + +namespace { +std::vector> GetPredefinedIdentifiers() { + return {{{"createdVertices", false}, trigger::IdentifierTag::CREATED_VERTICES}}; +} +} // namespace + +void TriggerContext::RegisterCreatedVertex(const VertexAccessor created_vertex) { + created_vertices_.push_back(created_vertex); } -void Trigger::Execute(utils::SkipList *plan_cache, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory, const double tsc_frequency, - const double max_execution_time_sec, std::atomic *is_shutting_down) const { +TypedValue TriggerContext::GetTypedValue(const trigger::IdentifierTag tag) const { + switch (tag) { + case trigger::IdentifierTag::CREATED_VERTICES: { + std::vector typed_created_vertices; + typed_created_vertices.reserve(created_vertices_.size()); + std::transform(std::begin(created_vertices_), std::end(created_vertices_), + std::back_inserter(typed_created_vertices), + [](const auto &accessor) { return TypedValue(accessor); }); + return TypedValue(typed_created_vertices); + } + } +} + +void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { + // adapt created_vertices_ + auto it = created_vertices_.begin(); + for (const auto &created_vertex : created_vertices_) { + if (auto maybe_vertex = accessor->FindVertex(created_vertex.Gid(), storage::View::OLD); maybe_vertex) { + *it = *maybe_vertex; + ++it; + } + } + created_vertices_.erase(it, created_vertices_.end()); +} + +Trigger::Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, + utils::SkipList *plan_cache, DbAccessor *db_accessor, utils::SpinLock *antlr_lock) + : name_(std::move(name)), + parsed_statements_{ParseQuery(query, {}, query_cache, antlr_lock)}, + identifiers_{GetPredefinedIdentifiers()} { + // We check immediately if the query is valid by trying to create a plan. + GetPlan(plan_cache, db_accessor); +} + +std::shared_ptr Trigger::GetPlan(utils::SkipList *plan_cache, + DbAccessor *db_accessor) const { AstStorage ast_storage; ast_storage.properties_ = parsed_statements_.ast_storage.properties_; ast_storage.labels_ = parsed_statements_.ast_storage.labels_; ast_storage.edge_types_ = parsed_statements_.ast_storage.edge_types_; - auto plan = CypherQueryToPlan(parsed_statements_.stripped_query.hash(), std::move(ast_storage), - utils::Downcast(parsed_statements_.query), parsed_statements_.parameters, - plan_cache, dba, parsed_statements_.is_cacheable); + std::vector predefined_identifiers; + predefined_identifiers.reserve(identifiers_.size()); + std::transform(identifiers_.begin(), identifiers_.end(), std::back_inserter(predefined_identifiers), + [](auto &identifier) { return &identifier.first; }); + + return CypherQueryToPlan(parsed_statements_.stripped_query.hash(), std::move(ast_storage), + utils::Downcast(parsed_statements_.query), parsed_statements_.parameters, + plan_cache, db_accessor, parsed_statements_.is_cacheable, predefined_identifiers); +} + +void Trigger::Execute(utils::SkipList *plan_cache, DbAccessor *dba, + utils::MonotonicBufferResource *execution_memory, const double tsc_frequency, + const double max_execution_time_sec, std::atomic *is_shutting_down, + const TriggerContext &context) const { + auto plan = GetPlan(plan_cache, dba); + ExecutionContext ctx; ctx.db_accessor = dba; ctx.symbol_table = plan->symbol_table(); @@ -55,6 +107,14 @@ void Trigger::Execute(utils::SkipList *plan_cache, DbAccessor *d auto cursor = plan->plan().MakeCursor(execution_memory); Frame frame{plan->symbol_table().max_position(), execution_memory}; + for (const auto &[identifier, tag] : identifiers_) { + if (identifier.symbol_pos_ == -1) { + continue; + } + + frame[plan->symbol_table().at(identifier)] = context.GetTypedValue(tag); + } + while (cursor->Pull(frame, ctx)) ; diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index db59b3172..41fca2b14 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -1,16 +1,36 @@ #pragma once #include "query/cypher_query_interpreter.hpp" +#include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" namespace query { + +namespace trigger { +enum class IdentifierTag : uint8_t { CREATED_VERTICES }; +} // namespace trigger + +struct TriggerContext { + void RegisterCreatedVertex(VertexAccessor created_vertex); + + // Adapt the TriggerContext object inplace for a different DbAccessor + // (each derived accessor, e.g. VertexAccessor, gets adapted + // to the sent DbAccessor so they can be used safely) + void AdaptForAccessor(DbAccessor *accessor); + + TypedValue GetTypedValue(trigger::IdentifierTag tag) const; + + private: + std::vector created_vertices_; +}; + struct Trigger { - explicit Trigger(std::string name, std::string query, utils::SkipList *cache, - utils::SpinLock *antlr_lock); + explicit Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, + utils::SkipList *plan_cache, DbAccessor *db_accessor, utils::SpinLock *antlr_lock); void Execute(utils::SkipList *plan_cache, DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double tsc_frequency, double max_execution_time_sec, - std::atomic *is_shutting_down) const; + std::atomic *is_shutting_down, const TriggerContext &context) const; bool operator==(const Trigger &other) const { return name_ == other.name_; } // NOLINTNEXTLINE (modernize-use-nullptr) @@ -19,10 +39,14 @@ struct Trigger { // NOLINTNEXTLINE (modernize-use-nullptr) bool operator<(const std::string &other) const { return name_ < other; } - const auto &name() const { return name_; } + const auto &name() const noexcept { return name_; } private: + std::shared_ptr GetPlan(utils::SkipList *plan_cache, DbAccessor *db_accessor) const; + std::string name_; ParsedQuery parsed_statements_; + + mutable std::vector> identifiers_; }; } // namespace query diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index b699dc250..d526b328a 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -404,17 +404,22 @@ Storage::Accessor::Accessor(Storage *storage) Storage::Accessor::Accessor(Accessor &&other) noexcept : storage_(other.storage_), + storage_guard_(std::move(other.storage_guard_)), transaction_(std::move(other.transaction_)), + commit_timestamp_(other.commit_timestamp_), is_transaction_active_(other.is_transaction_active_), config_(other.config_) { // Don't allow the other accessor to abort our transaction in destructor. other.is_transaction_active_ = false; + other.commit_timestamp_.reset(); } Storage::Accessor::~Accessor() { if (is_transaction_active_) { Abort(); } + + FinalizeTransaction(); } VertexAccessor Storage::Accessor::CreateVertex() { @@ -793,11 +798,10 @@ utils::BasicResult Storage::Accessor::Commit( // Save these so we can mark them used in the commit log. uint64_t start_timestamp = transaction_.start_timestamp; - uint64_t commit_timestamp; { std::unique_lock engine_guard(storage_->engine_lock_); - commit_timestamp = storage_->CommitTimestamp(desired_commit_timestamp); + commit_timestamp_.emplace(storage_->CommitTimestamp(desired_commit_timestamp)); // Before committing and validating vertices against unique constraints, // we have to update unique constraints with the vertices that are going @@ -821,7 +825,7 @@ utils::BasicResult Storage::Accessor::Commit( // No need to take any locks here because we modified this vertex and no // one else can touch it until we commit. unique_constraint_violation = - storage_->constraints_.unique_constraints.Validate(*prev.vertex, transaction_, commit_timestamp); + storage_->constraints_.unique_constraints.Validate(*prev.vertex, transaction_, *commit_timestamp_); if (unique_constraint_violation) { break; } @@ -838,7 +842,7 @@ utils::BasicResult Storage::Accessor::Commit( // Replica can log only the write transaction received from Main // so the Wal files are consistent if (storage_->replication_role_ == ReplicationRole::MAIN || desired_commit_timestamp.has_value()) { - storage_->AppendToWal(transaction_, commit_timestamp); + storage_->AppendToWal(transaction_, *commit_timestamp_); } // Take committed_transactions lock while holding the engine lock to @@ -848,12 +852,12 @@ utils::BasicResult Storage::Accessor::Commit( // TODO: release lock, and update all deltas to have a local copy // of the commit timestamp MG_ASSERT(transaction_.commit_timestamp != nullptr, "Invalid database state!"); - transaction_.commit_timestamp->store(commit_timestamp, std::memory_order_release); + transaction_.commit_timestamp->store(*commit_timestamp_, std::memory_order_release); // Replica can only update the last commit timestamp with // the commits received from main. if (storage_->replication_role_ == ReplicationRole::MAIN || desired_commit_timestamp.has_value()) { // Update the last commit timestamp - storage_->last_commit_timestamp_.store(commit_timestamp); + storage_->last_commit_timestamp_.store(*commit_timestamp_); } // Release engine lock because we don't have to hold it anymore // and emplace back could take a long time. @@ -862,13 +866,11 @@ utils::BasicResult Storage::Accessor::Commit( }); storage_->commit_log_->MarkFinished(start_timestamp); - storage_->commit_log_->MarkFinished(commit_timestamp); } } if (unique_constraint_violation) { Abort(); - storage_->commit_log_->MarkFinished(commit_timestamp); return *unique_constraint_violation; } } @@ -1041,6 +1043,13 @@ void Storage::Accessor::Abort() { is_transaction_active_ = false; } +void Storage::Accessor::FinalizeTransaction() { + if (commit_timestamp_) { + storage_->commit_log_->MarkFinished(*commit_timestamp_); + commit_timestamp_.reset(); + } +} + const std::string &Storage::LabelToName(LabelId label) const { return name_id_mapper_.IdToName(label.AsUint()); } const std::string &Storage::PropertyToName(PropertyId property) const { diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 6e3275caa..2d1b77f70 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -300,6 +300,8 @@ class Storage final { /// @throw std::bad_alloc void Abort(); + void FinalizeTransaction(); + private: /// @throw std::bad_alloc VertexAccessor CreateVertex(storage::Gid gid); @@ -310,6 +312,7 @@ class Storage final { Storage *storage_; std::shared_lock storage_guard_; Transaction transaction_; + std::optional commit_timestamp_; bool is_transaction_active_; Config::Items config_; }; diff --git a/tests/unit/query_common.hpp b/tests/unit/query_common.hpp index 8f8c6eb66..67698075f 100644 --- a/tests/unit/query_common.hpp +++ b/tests/unit/query_common.hpp @@ -463,7 +463,7 @@ auto GetMerge(AstStorage &storage, Pattern *pattern, OnMatch on_match, OnCreate #define MATCH(...) query::test_common::GetWithPatterns(storage.Create(), {__VA_ARGS__}) #define WHERE(expr) storage.Create((expr)) #define CREATE(...) query::test_common::GetWithPatterns(storage.Create(), {__VA_ARGS__}) -#define IDENT(name) storage.Create((name)) +#define IDENT(...) storage.Create(__VA_ARGS__) #define LITERAL(val) storage.Create((val)) #define LIST(...) storage.Create(std::vector{__VA_ARGS__}) #define MAP(...) \ diff --git a/tests/unit/query_semantic.cpp b/tests/unit/query_semantic.cpp index 4d243382a..1e19f6594 100644 --- a/tests/unit/query_semantic.cpp +++ b/tests/unit/query_semantic.cpp @@ -3,6 +3,7 @@ #include "gtest/gtest.h" +#include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol_generator.hpp" #include "query/frontend/semantic/symbol_table.hpp" @@ -1093,3 +1094,53 @@ TEST(TestSymbolTable, CreateAnonymousSymbolWithExistingUserSymbolCalledAnon) { auto anon2 = symbol_table.CreateAnonymousSymbol(); ASSERT_EQ(anon2.name_, "anon2"); } + +TEST_F(TestSymbolGenerator, PredefinedIdentifiers) { + auto *first_op = IDENT("first_op", false); + auto *second_op = IDENT("second_op", false); + // RETURN first_op + second_op AS result + auto query = QUERY(SINGLE_QUERY(RETURN(ADD(first_op, second_op), AS("result")))); + EXPECT_THROW(query::MakeSymbolTable(query), SemanticException); + EXPECT_THROW(query::MakeSymbolTable(query, {first_op}), SemanticException); + EXPECT_THROW(query::MakeSymbolTable(query, {second_op}), SemanticException); + auto symbol_table = query::MakeSymbolTable(query, {first_op, second_op}); + ASSERT_EQ(symbol_table.max_position(), 3); + + // predefined identifier can only be used in one scope + // RETURN first_op + second_op AS result UNION RETURN second_op + first_op AS result + query = QUERY(SINGLE_QUERY(RETURN(ADD(first_op, second_op), AS("result"))), + UNION(SINGLE_QUERY(RETURN(ADD(second_op, first_op), AS("result"))))); + ASSERT_THROW(query::MakeSymbolTable(query, {first_op, second_op}), SemanticException); + + // predefined identifier can be introduced in any of the scope + // different predefined identifiers can be introduced in different scopes + // RETURN first_op AS result UNION RETURN second_op AS result + query = QUERY(SINGLE_QUERY(RETURN(first_op, AS("result"))), UNION(SINGLE_QUERY(RETURN(second_op, AS("result"))))); + ASSERT_THROW(query::MakeSymbolTable(query), SemanticException); + symbol_table = query::MakeSymbolTable(query, {first_op, second_op}); + ASSERT_EQ(symbol_table.max_position(), 5); + + // WITH statement resets the scope, but the predefined identifier is okay + // because it's the first introduction of it in the query + // WITH 1 as one RETURN first_op AS first + query = QUERY(SINGLE_QUERY(WITH(LITERAL(1), AS("one")), RETURN(first_op, AS("first")))); + ASSERT_THROW(query::MakeSymbolTable(query), SemanticException); + symbol_table = query::MakeSymbolTable(query, {first_op}); + ASSERT_EQ(symbol_table.max_position(), 3); + + // In the first scope, first_op represents identifier created by match, + // in the second it represent the predefined identifier + // MATCH(first_op) WITH first_op as n RETURN first_op, n + query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("first_op"))), WITH("first_op", AS("n")), RETURN("first_op", "n"))); + ASSERT_THROW(query::MakeSymbolTable(query), SemanticException); + symbol_table = query::MakeSymbolTable(query, {first_op}); + ASSERT_EQ(symbol_table.max_position(), 6); + + // You cannot redaclare the predefined identifier in the same scope + // UNWIND first_op as u CREATE(first_op {prop: u}) + auto unwind = UNWIND(first_op, AS("u")); + auto node = NODE("first_op"); + node->properties_[storage.GetPropertyIx("prop")] = dynamic_cast(unwind->named_expression_->expression_); + query = QUERY(SINGLE_QUERY(unwind, CREATE(PATTERN(node)))); + ASSERT_THROW(query::MakeSymbolTable(query, {first_op}), SemanticException); +} From 11c0dde11cc1445a845537967c2bebf16d69376c Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 4 May 2021 13:00:07 +0200 Subject: [PATCH 43/63] Support removed vertices in the triggers (#140) --- src/query/cypher_query_interpreter.cpp | 25 +++---- src/query/cypher_query_interpreter.hpp | 6 +- src/query/db_accessor.hpp | 14 +++- src/query/interpreter.cpp | 56 +++++++++++++--- src/query/interpreter.hpp | 23 +------ src/query/plan/operator.cpp | 4 ++ src/query/trigger.cpp | 92 +++++++++++++++++--------- src/query/trigger.hpp | 26 +++++--- src/storage/v2/mvcc.hpp | 11 ++- src/storage/v2/storage.cpp | 13 ++-- src/storage/v2/storage.hpp | 2 +- src/storage/v2/vertex_accessor.cpp | 12 ++-- src/storage/v2/vertex_accessor.hpp | 18 ++++- tests/unit/storage_v2.cpp | 46 +++++++++++++ 14 files changed, 245 insertions(+), 103 deletions(-) diff --git a/src/query/cypher_query_interpreter.cpp b/src/query/cypher_query_interpreter.cpp index b86219364..f0b322862 100644 --- a/src/query/cypher_query_interpreter.cpp +++ b/src/query/cypher_query_interpreter.cpp @@ -103,7 +103,6 @@ ParsedQuery ParseQuery(const std::string &query_string, const std::map MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, DbAccessor *db_accessor, const std::vector &predefined_identifiers) { @@ -114,26 +113,28 @@ std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery return std::make_unique(std::move(root), cost, std::move(ast_storage), std::move(symbol_table)); } -} // namespace std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, utils::SkipList *plan_cache, - DbAccessor *db_accessor, const bool is_cacheable, + DbAccessor *db_accessor, const std::vector &predefined_identifiers) { - auto plan_cache_access = plan_cache->access(); - auto it = plan_cache_access.find(hash); - if (it != plan_cache_access.end()) { - if (it->second->IsExpired()) { - plan_cache_access.remove(hash); - } else { - return it->second; + std::optional::Accessor> plan_cache_access; + if (plan_cache) { + plan_cache_access.emplace(plan_cache->access()); + auto it = plan_cache_access->find(hash); + if (it != plan_cache_access->end()) { + if (it->second->IsExpired()) { + plan_cache_access->remove(hash); + } else { + return it->second; + } } } auto plan = std::make_shared( MakeLogicalPlan(std::move(ast_storage), query, parameters, db_accessor, predefined_identifiers)); - if (is_cacheable) { - plan_cache_access.insert({hash, plan}); + if (plan_cache_access) { + plan_cache_access->insert({hash, plan}); } return plan; } diff --git a/src/query/cypher_query_interpreter.hpp b/src/query/cypher_query_interpreter.hpp index 5170cd93c..2d763eec6 100644 --- a/src/query/cypher_query_interpreter.hpp +++ b/src/query/cypher_query_interpreter.hpp @@ -129,6 +129,10 @@ class SingleNodeLogicalPlan final : public LogicalPlan { SymbolTable symbol_table_; }; +std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, + DbAccessor *db_accessor, + const std::vector &predefined_identifiers); + /** * Return the parsed *Cypher* query's AST cached logical plan, or create and * cache a fresh one if it doesn't yet exist. @@ -139,7 +143,7 @@ class SingleNodeLogicalPlan final : public LogicalPlan { */ std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, utils::SkipList *plan_cache, - DbAccessor *db_accessor, bool is_cacheable = true, + DbAccessor *db_accessor, const std::vector &predefined_identifiers = {}); } // namespace query diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index a3ec2b019..0189cfa7c 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -246,8 +246,18 @@ class DbAccessor final { return accessor_->DetachDeleteVertex(&vertex_accessor->impl_); } - storage::Result RemoveVertex(VertexAccessor *vertex_accessor) { - return accessor_->DeleteVertex(&vertex_accessor->impl_); + storage::Result> RemoveVertex(VertexAccessor *vertex_accessor) { + auto res = accessor_->DeleteVertex(&vertex_accessor->impl_); + if (res.HasError()) { + return res.GetError(); + } + + const auto &value = res.GetValue(); + if (!value) { + return std::optional{}; + } + + return std::make_optional(*value); } storage::PropertyId NameToProperty(const std::string_view &name) { return accessor_->NameToProperty(name); } diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 00c9c8520..1cd95cc61 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -598,6 +598,35 @@ using RWType = plan::ReadWriteTypeChecker::RWType; Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_context_(interpreter_context) { MG_ASSERT(interpreter_context_, "Interpreter context must not be NULL"); + // try { + // { + // auto storage_acc = interpreter_context_->db->Access(); + // DbAccessor dba(&storage_acc); + // auto triggers_acc = interpreter_context_->before_commit_triggers.access(); + // triggers_acc.insert(Trigger{"BeforeDelete", "UNWIND deletedVertices as u CREATE(:DELETED {id: u.id + 10})", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // // triggers_acc.insert(Trigger{"BeforeDelete2", "UNWIND deletedVertices as u SET u.deleted = 0", + // // &interpreter_context_->ast_cache, &dba, + // // &interpreter_context_->antlr_lock}); + // // triggers_acc.insert(Trigger{"BeforeDeleteProcedure", "CALL script.procedure(deletedVertices) YIELD * RETURN + // // *", + // // &interpreter_context_->ast_cache, &dba, + // // &interpreter_context_->antlr_lock}); + // triggers_acc.insert(Trigger{"BeforeCreator", "UNWIND createdVertices as u SET u.before = u.id + 10", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // } + // { + // auto storage_acc = interpreter_context->db->Access(); + // DbAccessor dba(&storage_acc); + // auto triggers_acc = interpreter_context->after_commit_triggers.access(); + // triggers_acc.insert(Trigger{"AfterDelete", "UNWIND deletedVertices as u CREATE(:DELETED {id: u.id + 100})", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // triggers_acc.insert(Trigger{"AfterCreator", "UNWIND createdVertices as u SET u.after = u.id + 100", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // } + //} catch (const utils::BasicException &e) { + // spdlog::critical("Failed to create a trigger because: {}", e.what()); + //} } PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) { @@ -676,7 +705,8 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::mapplan_cache, dba); + parsed_query.parameters, + parsed_query.is_cacheable ? &interpreter_context->plan_cache : nullptr, dba); summary->insert_or_assign("cost_estimate", plan->cost()); auto rw_type_checker = plan::ReadWriteTypeChecker(); @@ -730,7 +760,7 @@ PreparedQuery PrepareExplainQuery(ParsedQuery parsed_query, std::mapplan_cache, dba, parsed_inner_query.is_cacheable); + parsed_inner_query.parameters, parsed_inner_query.is_cacheable ? &interpreter_context->plan_cache : nullptr, dba); std::stringstream printed_plan; plan::PrettyPrint(*dba, &cypher_query_plan->plan(), &printed_plan); @@ -806,7 +836,7 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra auto cypher_query_plan = CypherQueryToPlan( parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), cypher_query, - parsed_inner_query.parameters, &interpreter_context->plan_cache, dba, parsed_inner_query.is_cacheable); + parsed_inner_query.parameters, parsed_inner_query.is_cacheable ? &interpreter_context->plan_cache : nullptr, dba); auto rw_type_checker = plan::ReadWriteTypeChecker(); rw_type_checker.InferRWType(const_cast(cypher_query_plan->plan())); @@ -1404,7 +1434,7 @@ void Interpreter::Abort() { in_explicit_transaction_ = false; if (!db_accessor_) return; db_accessor_->Abort(); - execution_db_accessor_ = std::nullopt; + execution_db_accessor_.reset(); db_accessor_.reset(); trigger_context_.reset(); } @@ -1423,9 +1453,9 @@ void RunTriggersIndividually(const utils::SkipList &triggers, Interpret trigger_context.AdaptForAccessor(&db_accessor); try { - trigger.Execute(&interpreter_context->plan_cache, &db_accessor, &execution_memory, - *interpreter_context->tsc_frequency, interpreter_context->execution_timeout_sec, - &interpreter_context->is_shutting_down, trigger_context); + trigger.Execute(&db_accessor, &execution_memory, *interpreter_context->tsc_frequency, + interpreter_context->execution_timeout_sec, &interpreter_context->is_shutting_down, + trigger_context); } catch (const utils::BasicException &exception) { spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.name(), exception.what()); db_accessor.Abort(); @@ -1472,9 +1502,15 @@ void Interpreter::Commit() { for (const auto &trigger : interpreter_context_->before_commit_triggers.access()) { spdlog::debug("Executing trigger '{}'", trigger.name()); utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; - trigger.Execute(&interpreter_context_->plan_cache, &*execution_db_accessor_, &execution_memory, - *interpreter_context_->tsc_frequency, interpreter_context_->execution_timeout_sec, - &interpreter_context_->is_shutting_down, *trigger_context_); + AdvanceCommand(); + try { + trigger.Execute(&*execution_db_accessor_, &execution_memory, *interpreter_context_->tsc_frequency, + interpreter_context_->execution_timeout_sec, &interpreter_context_->is_shutting_down, + *trigger_context_); + } catch (const utils::BasicException &e) { + throw utils::BasicException( + fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.name(), e.what())); + } } SPDLOG_DEBUG("Finished executing before commit triggers"); } diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 0eff59c4d..3da93ec6a 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -147,28 +147,7 @@ struct PreparedQuery { * been passed to an `Interpreter` instance. */ struct InterpreterContext { - explicit InterpreterContext(storage::Storage *db) : db(db) { - // try { - // { - // auto storage_acc = db->Access(); - // DbAccessor dba(&storage_acc); - // auto triggers_acc = before_commit_triggers.access(); - // triggers_acc.insert(Trigger{"BeforeCreator", "UNWIND createdVertices as u SET u.before = u.id + 1", - // &ast_cache, - // &plan_cache, &dba, &antlr_lock}); - // } - // { - // auto storage_acc = db->Access(); - // DbAccessor dba(&storage_acc); - // auto triggers_acc = after_commit_triggers.access(); - // triggers_acc.insert(Trigger{"AfterCreator", "UNWIND createdVertices as u SET u.after = u.id - 1", - // &ast_cache, - // &plan_cache, &dba, &antlr_lock}); - // } - // } catch (const utils::BasicException &e) { - // spdlog::critical("Failed to create a trigger because: {}", e.what()); - // } - } + explicit InterpreterContext(storage::Storage *db) : db(db) {} storage::Storage *db; diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 7fe2c7327..c4af42f50 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -1871,6 +1871,10 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when deleting a node."); } } + + if (context.trigger_context && res.GetValue()) { + context.trigger_context->RegisterDeletedVertex(*res.GetValue()); + } } break; } diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 547ec06cb..787bb834b 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -10,24 +10,40 @@ namespace query { namespace { std::vector> GetPredefinedIdentifiers() { - return {{{"createdVertices", false}, trigger::IdentifierTag::CREATED_VERTICES}}; + return {{{"createdVertices", false}, trigger::IdentifierTag::CREATED_VERTICES}, + {{"deletedVertices", false}, trigger::IdentifierTag::DELETED_VERTICES}}; } + +template +concept ConvertableToTypedValue = requires(T value) { + {TypedValue{value}}; +}; + +template +TypedValue ToTypedValue(const std::vector &values) { + std::vector typed_values; + typed_values.reserve(values.size()); + std::transform(std::begin(values), std::end(values), std::back_inserter(typed_values), + [](const auto &accessor) { return TypedValue(accessor); }); + return TypedValue(typed_values); +} + } // namespace void TriggerContext::RegisterCreatedVertex(const VertexAccessor created_vertex) { created_vertices_.push_back(created_vertex); } +void TriggerContext::RegisterDeletedVertex(const VertexAccessor deleted_vertex) { + deleted_vertices_.push_back(deleted_vertex); +} + TypedValue TriggerContext::GetTypedValue(const trigger::IdentifierTag tag) const { switch (tag) { - case trigger::IdentifierTag::CREATED_VERTICES: { - std::vector typed_created_vertices; - typed_created_vertices.reserve(created_vertices_.size()); - std::transform(std::begin(created_vertices_), std::end(created_vertices_), - std::back_inserter(typed_created_vertices), - [](const auto &accessor) { return TypedValue(accessor); }); - return TypedValue(typed_created_vertices); - } + case trigger::IdentifierTag::CREATED_VERTICES: + return ToTypedValue(created_vertices_); + case trigger::IdentifierTag::DELETED_VERTICES: + return ToTypedValue(deleted_vertices_); } } @@ -41,49 +57,63 @@ void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { } } created_vertices_.erase(it, created_vertices_.end()); + + // deleted_vertices_ should keep the transaction context of the transaction which deleted it + // because no other transaction can modify an object after it's deleted so it should be the + // latest state of the object } Trigger::Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, - utils::SkipList *plan_cache, DbAccessor *db_accessor, utils::SpinLock *antlr_lock) - : name_(std::move(name)), - parsed_statements_{ParseQuery(query, {}, query_cache, antlr_lock)}, - identifiers_{GetPredefinedIdentifiers()} { + DbAccessor *db_accessor, utils::SpinLock *antlr_lock) + : name_(std::move(name)), parsed_statements_{ParseQuery(query, {}, query_cache, antlr_lock)} { // We check immediately if the query is valid by trying to create a plan. - GetPlan(plan_cache, db_accessor); + GetPlan(db_accessor); } -std::shared_ptr Trigger::GetPlan(utils::SkipList *plan_cache, - DbAccessor *db_accessor) const { +Trigger::TriggerPlan::TriggerPlan(std::unique_ptr logical_plan, std::vector identifiers) + : cached_plan(std::move(logical_plan)), identifiers(std::move(identifiers)) {} + +std::shared_ptr Trigger::GetPlan(DbAccessor *db_accessor) const { + std::lock_guard plan_guard{plan_lock_}; + if (trigger_plan_ && !trigger_plan_->cached_plan.IsExpired()) { + return trigger_plan_; + } + + auto identifiers = GetPredefinedIdentifiers(); + AstStorage ast_storage; ast_storage.properties_ = parsed_statements_.ast_storage.properties_; ast_storage.labels_ = parsed_statements_.ast_storage.labels_; ast_storage.edge_types_ = parsed_statements_.ast_storage.edge_types_; std::vector predefined_identifiers; - predefined_identifiers.reserve(identifiers_.size()); - std::transform(identifiers_.begin(), identifiers_.end(), std::back_inserter(predefined_identifiers), + predefined_identifiers.reserve(identifiers.size()); + std::transform(identifiers.begin(), identifiers.end(), std::back_inserter(predefined_identifiers), [](auto &identifier) { return &identifier.first; }); - return CypherQueryToPlan(parsed_statements_.stripped_query.hash(), std::move(ast_storage), - utils::Downcast(parsed_statements_.query), parsed_statements_.parameters, - plan_cache, db_accessor, parsed_statements_.is_cacheable, predefined_identifiers); + auto logical_plan = MakeLogicalPlan(std::move(ast_storage), utils::Downcast(parsed_statements_.query), + parsed_statements_.parameters, db_accessor, predefined_identifiers); + + trigger_plan_ = std::make_shared(std::move(logical_plan), std::move(identifiers)); + return trigger_plan_; } -void Trigger::Execute(utils::SkipList *plan_cache, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory, const double tsc_frequency, +void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, const double tsc_frequency, const double max_execution_time_sec, std::atomic *is_shutting_down, const TriggerContext &context) const { - auto plan = GetPlan(plan_cache, dba); + auto trigger_plan = GetPlan(dba); + MG_ASSERT(trigger_plan, "Invalid trigger plan received"); + auto &[plan, identifiers] = *trigger_plan; ExecutionContext ctx; ctx.db_accessor = dba; - ctx.symbol_table = plan->symbol_table(); + ctx.symbol_table = plan.symbol_table(); ctx.evaluation_context.timestamp = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) .count(); ctx.evaluation_context.parameters = parsed_statements_.parameters; - ctx.evaluation_context.properties = NamesToProperties(plan->ast_storage().properties_, dba); - ctx.evaluation_context.labels = NamesToLabels(plan->ast_storage().labels_, dba); + ctx.evaluation_context.properties = NamesToProperties(plan.ast_storage().properties_, dba); + ctx.evaluation_context.labels = NamesToLabels(plan.ast_storage().labels_, dba); ctx.execution_tsc_timer = utils::TSCTimer(tsc_frequency); ctx.max_execution_time_sec = max_execution_time_sec; ctx.is_shutting_down = is_shutting_down; @@ -105,14 +135,14 @@ void Trigger::Execute(utils::SkipList *plan_cache, DbAccessor *d utils::PoolResource pool_memory(128, 1024, &monotonic_memory); ctx.evaluation_context.memory = &pool_memory; - auto cursor = plan->plan().MakeCursor(execution_memory); - Frame frame{plan->symbol_table().max_position(), execution_memory}; - for (const auto &[identifier, tag] : identifiers_) { + auto cursor = plan.plan().MakeCursor(execution_memory); + Frame frame{plan.symbol_table().max_position(), execution_memory}; + for (const auto &[identifier, tag] : identifiers) { if (identifier.symbol_pos_ == -1) { continue; } - frame[plan->symbol_table().at(identifier)] = context.GetTypedValue(tag); + frame[plan.symbol_table().at(identifier)] = context.GetTypedValue(tag); } while (cursor->Pull(frame, ctx)) diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index 41fca2b14..a2bbd6865 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -1,17 +1,16 @@ #pragma once - #include "query/cypher_query_interpreter.hpp" -#include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" namespace query { namespace trigger { -enum class IdentifierTag : uint8_t { CREATED_VERTICES }; +enum class IdentifierTag : uint8_t { CREATED_VERTICES, DELETED_VERTICES }; } // namespace trigger struct TriggerContext { void RegisterCreatedVertex(VertexAccessor created_vertex); + void RegisterDeletedVertex(VertexAccessor deleted_vertex); // Adapt the TriggerContext object inplace for a different DbAccessor // (each derived accessor, e.g. VertexAccessor, gets adapted @@ -22,15 +21,15 @@ struct TriggerContext { private: std::vector created_vertices_; + std::vector deleted_vertices_; }; struct Trigger { explicit Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, - utils::SkipList *plan_cache, DbAccessor *db_accessor, utils::SpinLock *antlr_lock); + DbAccessor *db_accessor, utils::SpinLock *antlr_lock); - void Execute(utils::SkipList *plan_cache, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory, double tsc_frequency, double max_execution_time_sec, - std::atomic *is_shutting_down, const TriggerContext &context) const; + void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double tsc_frequency, + double max_execution_time_sec, std::atomic *is_shutting_down, const TriggerContext &context) const; bool operator==(const Trigger &other) const { return name_ == other.name_; } // NOLINTNEXTLINE (modernize-use-nullptr) @@ -42,11 +41,20 @@ struct Trigger { const auto &name() const noexcept { return name_; } private: - std::shared_ptr GetPlan(utils::SkipList *plan_cache, DbAccessor *db_accessor) const; + struct TriggerPlan { + using IdentifierInfo = std::pair; + + explicit TriggerPlan(std::unique_ptr logical_plan, std::vector identifiers); + + CachedPlan cached_plan; + std::vector identifiers; + }; + std::shared_ptr GetPlan(DbAccessor *db_accessor) const; std::string name_; ParsedQuery parsed_statements_; - mutable std::vector> identifiers_; + mutable utils::SpinLock plan_lock_; + mutable std::shared_ptr trigger_plan_; }; } // namespace query diff --git a/src/storage/v2/mvcc.hpp b/src/storage/v2/mvcc.hpp index 719cfa5d4..211fc87af 100644 --- a/src/storage/v2/mvcc.hpp +++ b/src/storage/v2/mvcc.hpp @@ -1,6 +1,5 @@ #pragma once -#include "storage/v2/delta.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/transaction.hpp" #include "storage/v2/view.hpp" @@ -15,6 +14,12 @@ namespace storage { /// caller to apply the deltas. template inline void ApplyDeltasForRead(Transaction *transaction, const Delta *delta, View view, const TCallback &callback) { + // if the transaction is not committed, then its deltas have transaction_id for the timestamp, otherwise they have + // its commit timestamp set. + // This allows the transaction to see its changes even though it's committed. + const auto commit_timestamp = transaction->commit_timestamp + ? transaction->commit_timestamp->load(std::memory_order_acquire) + : transaction->transaction_id; while (delta != nullptr) { auto ts = delta->timestamp->load(std::memory_order_acquire); auto cid = delta->command_id; @@ -26,13 +31,13 @@ inline void ApplyDeltasForRead(Transaction *transaction, const Delta *delta, Vie // We shouldn't undo our newest changes because the user requested a NEW // view of the database. - if (view == View::NEW && ts == transaction->transaction_id && cid <= transaction->command_id) { + if (view == View::NEW && ts == commit_timestamp && cid <= transaction->command_id) { break; } // We shouldn't undo our older changes because the user requested a OLD view // of the database. - if (view == View::OLD && ts == transaction->transaction_id && cid < transaction->command_id) { + if (view == View::OLD && ts == commit_timestamp && cid < transaction->command_id) { break; } diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index d526b328a..0c03a0bdf 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -13,6 +13,7 @@ #include "storage/v2/durability/paths.hpp" #include "storage/v2/durability/snapshot.hpp" #include "storage/v2/durability/wal.hpp" +#include "storage/v2/edge_accessor.hpp" #include "storage/v2/indices.hpp" #include "storage/v2/mvcc.hpp" #include "storage/v2/replication/config.hpp" @@ -460,7 +461,7 @@ std::optional Storage::Accessor::FindVertex(Gid gid, View view) return VertexAccessor::Create(&*it, &transaction_, &storage_->indices_, &storage_->constraints_, config_, view); } -Result Storage::Accessor::DeleteVertex(VertexAccessor *vertex) { +Result> Storage::Accessor::DeleteVertex(VertexAccessor *vertex) { MG_ASSERT(vertex->transaction_ == &transaction_, "VertexAccessor must be from the same transaction as the storage " "accessor when deleting a vertex!"); @@ -470,14 +471,17 @@ Result Storage::Accessor::DeleteVertex(VertexAccessor *vertex) { if (!PrepareForWrite(&transaction_, vertex_ptr)) return Error::SERIALIZATION_ERROR; - if (vertex_ptr->deleted) return false; + if (vertex_ptr->deleted) { + return std::optional{}; + } if (!vertex_ptr->in_edges.empty() || !vertex_ptr->out_edges.empty()) return Error::VERTEX_HAS_EDGES; CreateAndLinkDelta(&transaction_, vertex_ptr, Delta::RecreateObjectTag()); vertex_ptr->deleted = true; - return true; + return std::make_optional(vertex_ptr, &transaction_, &storage_->indices_, &storage_->constraints_, + config_, true); } Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { @@ -862,7 +866,6 @@ utils::BasicResult Storage::Accessor::Commit( // Release engine lock because we don't have to hold it anymore // and emplace back could take a long time. engine_guard.unlock(); - committed_transactions.emplace_back(std::move(transaction_)); }); storage_->commit_log_->MarkFinished(start_timestamp); @@ -1046,6 +1049,8 @@ void Storage::Accessor::Abort() { void Storage::Accessor::FinalizeTransaction() { if (commit_timestamp_) { storage_->commit_log_->MarkFinished(*commit_timestamp_); + storage_->committed_transactions_.WithLock( + [&](auto &committed_transactions) { committed_transactions.emplace_back(std::move(transaction_)); }); commit_timestamp_.reset(); } } diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 2d1b77f70..7c3192a30 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -250,7 +250,7 @@ class Storage final { } /// @throw std::bad_alloc - Result DeleteVertex(VertexAccessor *vertex); + Result> DeleteVertex(VertexAccessor *vertex); /// @throw std::bad_alloc Result DetachDeleteVertex(VertexAccessor *vertex); diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index 234f61375..01c42e202 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -124,7 +124,7 @@ Result VertexAccessor::HasLabel(LabelId label, View view) const { } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return has_label; } @@ -173,7 +173,7 @@ Result> VertexAccessor::Labels(View view) const { } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(labels); } @@ -257,7 +257,7 @@ Result VertexAccessor::GetProperty(PropertyId property, View view } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(value); } @@ -307,7 +307,7 @@ Result> VertexAccessor::Properties(View view } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(properties); } @@ -505,7 +505,7 @@ Result VertexAccessor::InDegree(View view) const { } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return degree; } @@ -543,7 +543,7 @@ Result VertexAccessor::OutDegree(View view) const { } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return degree; } diff --git a/src/storage/v2/vertex_accessor.hpp b/src/storage/v2/vertex_accessor.hpp index 1c3e4d609..4bd6c4cf9 100644 --- a/src/storage/v2/vertex_accessor.hpp +++ b/src/storage/v2/vertex_accessor.hpp @@ -22,8 +22,13 @@ class VertexAccessor final { public: VertexAccessor(Vertex *vertex, Transaction *transaction, Indices *indices, Constraints *constraints, - Config::Items config) - : vertex_(vertex), transaction_(transaction), indices_(indices), constraints_(constraints), config_(config) {} + Config::Items config, bool for_deleted = false) + : vertex_(vertex), + transaction_(transaction), + indices_(indices), + constraints_(constraints), + config_(config), + for_deleted_(for_deleted) {} static std::optional Create(Vertex *vertex, Transaction *transaction, Indices *indices, Constraints *constraints, Config::Items config, View view); @@ -90,6 +95,15 @@ class VertexAccessor final { Indices *indices_; Constraints *constraints_; Config::Items config_; + + // if the accessor was created for a deleted vertex. + // Accessor behaves differently for some methods based on this + // flag. + // E.g. If this field is set to true, GetProperty will return the property of the node + // even though the node is deleted. + // All the write operations, and operators used for traversal (e.g. InEdges) will still + // return an error if it's called for a deleted vertex. + bool for_deleted_{false}; }; } // namespace storage diff --git a/tests/unit/storage_v2.cpp b/tests/unit/storage_v2.cpp index 309694fe2..62c3dd76b 100644 --- a/tests/unit/storage_v2.cpp +++ b/tests/unit/storage_v2.cpp @@ -3,7 +3,9 @@ #include +#include "storage/v2/property_value.hpp" #include "storage/v2/storage.hpp" +#include "storage/v2/vertex_accessor.hpp" using testing::UnorderedElementsAre; @@ -2530,3 +2532,47 @@ TEST(StorageV2, VertexVisibilityMultipleTransactions) { acc.Abort(); } } + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TEST(StorageV2, DeletedVertexAccessor) { + storage::Storage store; + + const auto property = store.NameToProperty("property"); + const storage::PropertyValue property_value{"property_value"}; + + std::optional gid; + // Create the vertex + { + auto acc = store.Access(); + auto vertex = acc.CreateVertex(); + gid = vertex.Gid(); + ASSERT_FALSE(vertex.SetProperty(property, property_value).HasError()); + ASSERT_FALSE(acc.Commit().HasError()); + } + + auto acc = store.Access(); + auto vertex = acc.FindVertex(*gid, storage::View::OLD); + ASSERT_TRUE(vertex); + auto maybe_deleted_vertex = acc.DeleteVertex(&*vertex); + ASSERT_FALSE(maybe_deleted_vertex.HasError()); + + auto deleted_vertex = maybe_deleted_vertex.GetValue(); + ASSERT_TRUE(deleted_vertex); + // you cannot modify deleted vertex + ASSERT_TRUE(deleted_vertex->ClearProperties().HasError()); + + // you can call read only methods + const auto maybe_property = deleted_vertex->GetProperty(property, storage::View::OLD); + ASSERT_FALSE(maybe_property.HasError()); + ASSERT_EQ(property_value, *maybe_property); + ASSERT_FALSE(acc.Commit().HasError()); + + { + // you can call read only methods and get valid results even after the + // transaction which deleted the vertex committed, but only if the transaction + // accessor is still alive + const auto maybe_property = deleted_vertex->GetProperty(property, storage::View::OLD); + ASSERT_FALSE(maybe_property.HasError()); + ASSERT_EQ(property_value, *maybe_property); + } +} From b459639968ca7e98193c505a8f65b51a92fb19c5 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Mon, 10 May 2021 10:10:01 +0200 Subject: [PATCH 44/63] Other predefined variables (#143) --- src/mg_import_csv.cpp | 12 +- src/query/common.hpp | 20 +- src/query/db_accessor.hpp | 68 +++++-- src/query/interpreter.cpp | 49 +++-- src/query/plan/operator.cpp | 177 +++++++++++++---- src/query/trigger.cpp | 277 ++++++++++++++++++++++++--- src/query/trigger.hpp | 182 +++++++++++++++++- src/storage/v2/edge_accessor.cpp | 50 ++++- src/storage/v2/edge_accessor.hpp | 26 ++- src/storage/v2/storage.cpp | 44 +++-- src/storage/v2/storage.hpp | 9 +- src/storage/v2/vertex_accessor.cpp | 47 +++-- src/storage/v2/vertex_accessor.hpp | 13 +- src/utils/concepts.hpp | 7 + tests/concurrent/storage_indices.cpp | 12 +- tests/unit/storage_v2.cpp | 112 +++++------ tests/unit/storage_v2_edge.cpp | 116 +++++------ 17 files changed, 933 insertions(+), 288 deletions(-) create mode 100644 src/utils/concepts.hpp diff --git a/src/mg_import_csv.cpp b/src/mg_import_csv.cpp index 2e3b08869..152ac22c9 100644 --- a/src/mg_import_csv.cpp +++ b/src/mg_import_csv.cpp @@ -436,9 +436,9 @@ void ProcessNodeRow(storage::Storage *store, const std::vector &fields, c } else { pv_id = storage::PropertyValue(node_id.id); } - auto node_property = node.SetProperty(acc.NameToProperty(field.name), pv_id); - if (!node_property.HasValue()) throw LoadException("Couldn't add property '{}' to the node", field.name); - if (!*node_property) throw LoadException("The property '{}' already exists", field.name); + auto old_node_property = node.SetProperty(acc.NameToProperty(field.name), pv_id); + if (!old_node_property.HasValue()) throw LoadException("Couldn't add property '{}' to the node", field.name); + if (!old_node_property->IsNull()) throw LoadException("The property '{}' already exists", field.name); } id = node_id; } else if (field.type == "LABEL") { @@ -448,9 +448,9 @@ void ProcessNodeRow(storage::Storage *store, const std::vector &fields, c if (!*node_label) throw LoadException("The label '{}' already exists", label); } } else if (field.type != "IGNORE") { - auto node_property = node.SetProperty(acc.NameToProperty(field.name), StringToValue(value, field.type)); - if (!node_property.HasValue()) throw LoadException("Couldn't add property '{}' to the node", field.name); - if (!*node_property) throw LoadException("The property '{}' already exists", field.name); + auto old_node_property = node.SetProperty(acc.NameToProperty(field.name), StringToValue(value, field.type)); + if (!old_node_property.HasValue()) throw LoadException("Couldn't add property '{}' to the node", field.name); + if (!old_node_property->IsNull()) throw LoadException("The property '{}' already exists", field.name); } } for (const auto &label : additional_labels) { diff --git a/src/query/common.hpp b/src/query/common.hpp index 19b302058..72f020c3f 100644 --- a/src/query/common.hpp +++ b/src/query/common.hpp @@ -1,6 +1,7 @@ /// @file #pragma once +#include #include #include @@ -10,6 +11,7 @@ #include "query/frontend/semantic/symbol.hpp" #include "query/typed_value.hpp" #include "storage/v2/id_types.hpp" +#include "storage/v2/property_value.hpp" #include "storage/v2/view.hpp" #include "utils/logging.hpp" @@ -61,15 +63,22 @@ inline void ExpectType(const Symbol &symbol, const TypedValue &value, TypedValue throw QueryRuntimeException("Expected a {} for '{}', but got {}.", expected, symbol.name(), value.type()); } +template +concept AccessorWithSetProperty = requires(T accessor, const storage::PropertyId key, + const storage::PropertyValue new_value) { + { accessor.SetProperty(key, new_value) } + ->std::same_as>; +}; + /// Set a property `value` mapped with given `key` on a `record`. /// /// @throw QueryRuntimeException if value cannot be set as a property value -template -void PropsSetChecked(TRecordAccessor *record, const storage::PropertyId &key, const TypedValue &value) { +template +storage::PropertyValue PropsSetChecked(T *record, const storage::PropertyId &key, const TypedValue &value) { try { - auto maybe_error = record->SetProperty(key, storage::PropertyValue(value)); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_old_value = record->SetProperty(key, storage::PropertyValue(value)); + if (maybe_old_value.HasError()) { + switch (maybe_old_value.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -81,6 +90,7 @@ void PropsSetChecked(TRecordAccessor *record, const storage::PropertyId &key, co throw QueryRuntimeException("Unexpected error when setting a property."); } } + return std::move(*maybe_old_value); } catch (const TypedValueException &) { throw QueryRuntimeException("'{}' cannot be used as a property value.", value.type()); } diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index 0189cfa7c..26b7b6a19 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -43,6 +43,8 @@ class EdgeAccessor final { public: explicit EdgeAccessor(storage::EdgeAccessor impl) : impl_(std::move(impl)) {} + bool IsVisible(storage::View view) const { return impl_.IsVisible(view); } + storage::EdgeTypeId EdgeType() const { return impl_.EdgeType(); } auto Properties(storage::View view) const { return impl_.Properties(view); } @@ -51,16 +53,16 @@ class EdgeAccessor final { return impl_.GetProperty(key, view); } - storage::Result SetProperty(storage::PropertyId key, const storage::PropertyValue &value) { + storage::Result SetProperty(storage::PropertyId key, const storage::PropertyValue &value) { return impl_.SetProperty(key, value); } - storage::Result RemoveProperty(storage::PropertyId key) { return SetProperty(key, storage::PropertyValue()); } + storage::Result RemoveProperty(storage::PropertyId key) { + return SetProperty(key, storage::PropertyValue()); + } - utils::BasicResult ClearProperties() { - auto ret = impl_.ClearProperties(); - if (ret.HasError()) return ret.GetError(); - return {}; + storage::Result> ClearProperties() { + return impl_.ClearProperties(); } VertexAccessor To() const; @@ -87,6 +89,8 @@ class VertexAccessor final { public: explicit VertexAccessor(storage::VertexAccessor impl) : impl_(std::move(impl)) {} + bool IsVisible(storage::View view) const { return impl_.IsVisible(view); } + auto Labels(storage::View view) const { return impl_.Labels(view); } storage::Result AddLabel(storage::LabelId label) { return impl_.AddLabel(label); } @@ -103,16 +107,16 @@ class VertexAccessor final { return impl_.GetProperty(key, view); } - storage::Result SetProperty(storage::PropertyId key, const storage::PropertyValue &value) { + storage::Result SetProperty(storage::PropertyId key, const storage::PropertyValue &value) { return impl_.SetProperty(key, value); } - storage::Result RemoveProperty(storage::PropertyId key) { return SetProperty(key, storage::PropertyValue()); } + storage::Result RemoveProperty(storage::PropertyId key) { + return SetProperty(key, storage::PropertyValue()); + } - utils::BasicResult ClearProperties() { - auto ret = impl_.ClearProperties(); - if (ret.HasError()) return ret.GetError(); - return {}; + storage::Result> ClearProperties() { + return impl_.ClearProperties(); } auto InEdges(storage::View view, const std::vector &edge_types) const @@ -237,13 +241,45 @@ class DbAccessor final { const storage::EdgeTypeId &edge_type) { auto maybe_edge = accessor_->CreateEdge(&from->impl_, &to->impl_, edge_type); if (maybe_edge.HasError()) return storage::Result(maybe_edge.GetError()); - return EdgeAccessor(std::move(*maybe_edge)); + return EdgeAccessor(*maybe_edge); } - storage::Result RemoveEdge(EdgeAccessor *edge) { return accessor_->DeleteEdge(&edge->impl_); } + storage::Result> RemoveEdge(EdgeAccessor *edge) { + auto res = accessor_->DeleteEdge(&edge->impl_); + if (res.HasError()) { + return res.GetError(); + } - storage::Result DetachRemoveVertex(VertexAccessor *vertex_accessor) { - return accessor_->DetachDeleteVertex(&vertex_accessor->impl_); + const auto &value = res.GetValue(); + if (!value) { + return std::optional{}; + } + + return std::make_optional(*value); + } + + storage::Result>>> DetachRemoveVertex( + VertexAccessor *vertex_accessor) { + using ReturnType = std::pair>; + + auto res = accessor_->DetachDeleteVertex(&vertex_accessor->impl_); + if (res.HasError()) { + return res.GetError(); + } + + const auto &value = res.GetValue(); + if (!value) { + return std::optional{}; + } + + const auto &[vertex, edges] = *value; + + std::vector deleted_edges; + deleted_edges.reserve(edges.size()); + std::transform(edges.begin(), edges.end(), std::back_inserter(deleted_edges), + [](const auto &deleted_edge) { return EdgeAccessor{deleted_edge}; }); + + return std::make_optional(vertex, std::move(deleted_edges)); } storage::Result> RemoveVertex(VertexAccessor *vertex_accessor) { diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 1cd95cc61..6881615fd 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -603,16 +603,23 @@ Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_ // auto storage_acc = interpreter_context_->db->Access(); // DbAccessor dba(&storage_acc); // auto triggers_acc = interpreter_context_->before_commit_triggers.access(); - // triggers_acc.insert(Trigger{"BeforeDelete", "UNWIND deletedVertices as u CREATE(:DELETED {id: u.id + 10})", + // triggers_acc.insert(Trigger{"BeforeDelete", + // "UNWIND deletedVertices as u CREATE(:DELETED_VERTEX {id: id(u) + 10})", // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // // triggers_acc.insert(Trigger{"BeforeDelete2", "UNWIND deletedVertices as u SET u.deleted = 0", - // // &interpreter_context_->ast_cache, &dba, - // // &interpreter_context_->antlr_lock}); - // // triggers_acc.insert(Trigger{"BeforeDeleteProcedure", "CALL script.procedure(deletedVertices) YIELD * RETURN - // // *", - // // &interpreter_context_->ast_cache, &dba, - // // &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"BeforeCreator", "UNWIND createdVertices as u SET u.before = u.id + 10", + // triggers_acc.insert(Trigger{"BeforeDeleteEdge", "UNWIND deletedEdges as u CREATE(:DELETED_EDGE {id: id(u) + + // 10})", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // // triggers_acc.insert(Trigger{"BeforeDelete2", "UNWIND deletedEdges as u SET u.deleted = 0", + // // &interpreter_context_->ast_cache, &dba, + // // &interpreter_context_->antlr_lock}); + // triggers_acc.insert(Trigger{"BeforeDeleteProcedure", "CALL script.procedure(updatedVertices) YIELD * RETURN *", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // triggers_acc.insert(Trigger{"BeforeCreator", "UNWIND createdVertices as u SET u.before = id(u) + 10", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // triggers_acc.insert(Trigger{"BeforeCreatorEdge", "UNWIND createdEdges as u SET u.before = id(u) + 10", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // triggers_acc.insert(Trigger{"BeforeSetLabelProcedure", + // "CALL label.procedure(assignedVertexLabels) YIELD * RETURN *", // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); // } // { @@ -623,10 +630,12 @@ Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_ // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); // triggers_acc.insert(Trigger{"AfterCreator", "UNWIND createdVertices as u SET u.after = u.id + 100", // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); + // triggers_acc.insert(Trigger{"AfterUpdateProcedure", "CALL script.procedure(updatedVertices) YIELD * RETURN *", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); // } - //} catch (const utils::BasicException &e) { - // spdlog::critical("Failed to create a trigger because: {}", e.what()); - //} + // } catch (const utils::BasicException &e) { + // spdlog::critical("Failed to create a trigger because: {}", e.what()); + // } } PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) { @@ -642,6 +651,11 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) db_accessor_ = std::make_unique(interpreter_context_->db->Access()); execution_db_accessor_.emplace(db_accessor_.get()); + + if (interpreter_context_->before_commit_triggers.size() > 0 || + interpreter_context_->after_commit_triggers.size() > 0) { + trigger_context_.emplace(); + } }; } else if (query_upper == "COMMIT") { handler = [this] { @@ -1355,17 +1369,18 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query))) { db_accessor_ = std::make_unique(interpreter_context_->db->Access()); execution_db_accessor_.emplace(db_accessor_.get()); + + if (utils::Downcast(parsed_query.query) && + (interpreter_context_->before_commit_triggers.size() > 0 || + interpreter_context_->after_commit_triggers.size() > 0)) { + trigger_context_.emplace(); + } } utils::Timer planning_timer; PreparedQuery prepared_query; if (utils::Downcast(parsed_query.query)) { - if (interpreter_context_->before_commit_triggers.size() > 0 || - interpreter_context_->after_commit_triggers.size() > 0) { - trigger_context_.emplace(); - } - prepared_query = PrepareCypherQuery(std::move(parsed_query), &query_execution->summary, interpreter_context_, &*execution_db_accessor_, &query_execution->execution_memory, trigger_context_ ? &*trigger_context_ : nullptr); diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index c4af42f50..5e90c013e 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -15,6 +15,7 @@ #include #include "query/context.hpp" +#include "query/db_accessor.hpp" #include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol_table.hpp" @@ -23,6 +24,7 @@ #include "query/plan/scoped_profile.hpp" #include "query/procedure/mg_procedure_impl.hpp" #include "query/procedure/module.hpp" +#include "storage/v2/property_value.hpp" #include "utils/algorithm.hpp" #include "utils/csv_parsing.hpp" #include "utils/event_counter.hpp" @@ -208,7 +210,7 @@ bool CreateNode::CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) if (input_cursor_->Pull(frame, context)) { auto created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); if (context.trigger_context) { - context.trigger_context->RegisterCreatedVertex(created_vertex); + context.trigger_context->RegisterCreatedObject(created_vertex); } return true; } @@ -248,8 +250,8 @@ CreateExpand::CreateExpandCursor::CreateExpandCursor(const CreateExpand &self, u namespace { -void CreateEdge(const EdgeCreationInfo &edge_info, DbAccessor *dba, VertexAccessor *from, VertexAccessor *to, - Frame *frame, ExpressionEvaluator *evaluator) { +EdgeAccessor CreateEdge(const EdgeCreationInfo &edge_info, DbAccessor *dba, VertexAccessor *from, VertexAccessor *to, + Frame *frame, ExpressionEvaluator *evaluator) { auto maybe_edge = dba->InsertEdge(from, to, edge_info.edge_type); if (maybe_edge.HasValue()) { auto &edge = *maybe_edge; @@ -267,6 +269,8 @@ void CreateEdge(const EdgeCreationInfo &edge_info, DbAccessor *dba, VertexAccess throw QueryRuntimeException("Unexpected error when creating an edge."); } } + + return *maybe_edge; } } // namespace @@ -292,19 +296,23 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont // create an edge between the two nodes auto *dba = context.db_accessor; - switch (self_.edge_info_.direction) { - case EdgeAtom::Direction::IN: - CreateEdge(self_.edge_info_, dba, &v2, &v1, &frame, &evaluator); - break; - case EdgeAtom::Direction::OUT: - CreateEdge(self_.edge_info_, dba, &v1, &v2, &frame, &evaluator); - break; - case EdgeAtom::Direction::BOTH: + + auto created_edge = [&] { + switch (self_.edge_info_.direction) { + case EdgeAtom::Direction::IN: + return CreateEdge(self_.edge_info_, dba, &v2, &v1, &frame, &evaluator); + case EdgeAtom::Direction::OUT: // in the case of an undirected CreateExpand we choose an arbitrary // direction. this is used in the MERGE clause // it is not allowed in the CREATE clause, and the semantic // checker needs to ensure it doesn't reach this point - CreateEdge(self_.edge_info_, dba, &v1, &v2, &frame, &evaluator); + case EdgeAtom::Direction::BOTH: + return CreateEdge(self_.edge_info_, dba, &v1, &v2, &frame, &evaluator); + } + }(); + + if (context.trigger_context) { + context.trigger_context->RegisterCreatedObject(created_edge); } return true; @@ -320,7 +328,11 @@ VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(Frame &frame, Exec ExpectType(self_.node_info_.symbol, dest_node_value, TypedValue::Type::Vertex); return dest_node_value.ValueVertex(); } else { - return CreateLocalVertex(self_.node_info_, &frame, context); + auto &created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); + if (context.trigger_context) { + context.trigger_context->RegisterCreatedObject(created_vertex); + } + return created_vertex; } } @@ -1823,9 +1835,9 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { for (TypedValue &expression_result : expression_results) { if (MustAbort(context)) throw HintedAbortError(); if (expression_result.type() == TypedValue::Type::Edge) { - auto maybe_error = dba.RemoveEdge(&expression_result.ValueEdge()); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_value = dba.RemoveEdge(&expression_result.ValueEdge()); + if (maybe_value.HasError()) { + switch (maybe_value.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -1835,6 +1847,10 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when deleting an edge."); } } + + if (context.trigger_context && maybe_value.GetValue()) { + context.trigger_context->RegisterDeletedObject(*maybe_value.GetValue()); + } } } @@ -1845,9 +1861,9 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { case TypedValue::Type::Vertex: { auto &va = expression_result.ValueVertex(); if (self_.detach_) { - auto maybe_error = dba.DetachRemoveVertex(&va); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto res = dba.DetachRemoveVertex(&va); + if (res.HasError()) { + switch (res.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -1857,6 +1873,12 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when deleting a node."); } } + if (context.trigger_context && res.GetValue()) { + context.trigger_context->RegisterDeletedObject(res.GetValue()->first); + for (const auto &deleted_edge : res.GetValue()->second) { + context.trigger_context->RegisterDeletedObject(deleted_edge); + } + } } else { auto res = dba.RemoveVertex(&va); if (res.HasError()) { @@ -1873,7 +1895,7 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { } if (context.trigger_context && res.GetValue()) { - context.trigger_context->RegisterDeletedVertex(*res.GetValue()); + context.trigger_context->RegisterDeletedObject(*res.GetValue()); } } break; @@ -1928,12 +1950,24 @@ bool SetProperty::SetPropertyCursor::Pull(Frame &frame, ExecutionContext &contex TypedValue rhs = self_.rhs_->Accept(evaluator); switch (lhs.type()) { - case TypedValue::Type::Vertex: - PropsSetChecked(&lhs.ValueVertex(), self_.property_, rhs); + case TypedValue::Type::Vertex: { + auto old_value = PropsSetChecked(&lhs.ValueVertex(), self_.property_, rhs); + + if (context.trigger_context) { + context.trigger_context->RegisterSetObjectProperty(lhs.ValueVertex(), self_.property_, + TypedValue{std::move(old_value)}, std::move(rhs)); + } break; - case TypedValue::Type::Edge: - PropsSetChecked(&lhs.ValueEdge(), self_.property_, rhs); + } + case TypedValue::Type::Edge: { + auto old_value = PropsSetChecked(&lhs.ValueEdge(), self_.property_, rhs); + + if (context.trigger_context) { + context.trigger_context->RegisterSetObjectProperty(lhs.ValueEdge(), self_.property_, + TypedValue{std::move(old_value)}, std::move(rhs)); + } break; + } case TypedValue::Type::Null: // Skip setting properties on Null (can occur in optional match). break; @@ -1973,16 +2007,26 @@ SetProperties::SetPropertiesCursor::SetPropertiesCursor(const SetProperties &sel namespace { +template +concept AccessorWithProperties = requires(T value, storage::PropertyId property_id, + storage::PropertyValue property_value) { + { value.ClearProperties() } + ->std::same_as>>; + {value.SetProperty(property_id, property_value)}; +}; + /// Helper function that sets the given values on either a Vertex or an Edge. /// /// @tparam TRecordAccessor Either RecordAccessor or /// RecordAccessor -template -void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const TypedValue &rhs, SetProperties::Op op) { +template +void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetProperties::Op op, + ExecutionContext *context) { + std::optional> old_values; if (op == SetProperties::Op::REPLACE) { - auto maybe_error = record->ClearProperties(); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_value = record->ClearProperties(); + if (maybe_value.HasError()) { + switch (maybe_value.GetError()) { case storage::Error::DELETED_OBJECT: throw QueryRuntimeException("Trying to set properties on a deleted graph element."); case storage::Error::SERIALIZATION_ERROR: @@ -1994,6 +2038,10 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed throw QueryRuntimeException("Unexpected error when setting properties."); } } + + if (context->trigger_context) { + old_values.emplace(std::move(*maybe_value)); + } } auto get_props = [](const auto &record) { @@ -2013,8 +2061,24 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed return *maybe_props; }; - auto set_props = [record](const auto &properties) { - for (const auto &kv : properties) { + auto register_set_property = [&](auto returned_old_value, auto key, auto new_value) { + auto old_value = [&]() -> storage::PropertyValue { + if (!old_values) { + return std::move(returned_old_value); + } + + if (auto it = old_values->find(key); it != old_values->end()) { + return std::move(it->second); + } + + return {}; + }(); + context->trigger_context->RegisterSetObjectProperty(*record, key, TypedValue(std::move(old_value)), + TypedValue(std::move(new_value))); + }; + + auto set_props = [&, record](auto properties) { + for (auto &kv : properties) { auto maybe_error = record->SetProperty(kv.first, kv.second); if (maybe_error.HasError()) { switch (maybe_error.GetError()) { @@ -2029,6 +2093,10 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed throw QueryRuntimeException("Unexpected error when setting properties."); } } + + if (context->trigger_context) { + register_set_property(std::move(*maybe_error), kv.first, std::move(kv.second)); + } } }; @@ -2040,7 +2108,13 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed set_props(get_props(rhs.ValueVertex())); break; case TypedValue::Type::Map: { - for (const auto &kv : rhs.ValueMap()) PropsSetChecked(record, dba->NameToProperty(kv.first), kv.second); + for (const auto &kv : rhs.ValueMap()) { + auto key = context->db_accessor->NameToProperty(kv.first); + auto old_value = PropsSetChecked(record, key, kv.second); + if (context->trigger_context) { + register_set_property(std::move(old_value), key, kv.second); + } + } break; } default: @@ -2048,6 +2122,14 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed "Right-hand side in SET expression must be a node, an edge or a " "map."); } + + if (context->trigger_context && old_values) { + // register removed properties + for (auto &[property_id, property_value] : *old_values) { + context->trigger_context->RegisterRemovedObjectProperty(*record, property_id, + TypedValue(std::move(property_value))); + } + } } } // namespace @@ -2066,10 +2148,10 @@ bool SetProperties::SetPropertiesCursor::Pull(Frame &frame, ExecutionContext &co switch (lhs.type()) { case TypedValue::Type::Vertex: - SetPropertiesOnRecord(context.db_accessor, &lhs.ValueVertex(), rhs, self_.op_); + SetPropertiesOnRecord(&lhs.ValueVertex(), rhs, self_.op_, &context); break; case TypedValue::Type::Edge: - SetPropertiesOnRecord(context.db_accessor, &lhs.ValueEdge(), rhs, self_.op_); + SetPropertiesOnRecord(&lhs.ValueEdge(), rhs, self_.op_, &context); break; case TypedValue::Type::Null: // Skip setting properties on Null (can occur in optional match). @@ -2127,6 +2209,10 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when setting a label."); } } + + if (context.trigger_context) { + context.trigger_context->RegisterSetVertexLabel(vertex, label); + } } return true; @@ -2165,10 +2251,10 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext & storage::View::NEW); TypedValue lhs = self_.lhs_->expression_->Accept(evaluator); - auto remove_prop = [property = self_.property_](auto *record) { - auto maybe_error = record->RemoveProperty(property); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto remove_prop = [property = self_.property_, &context](auto *record) { + auto maybe_old_value = record->RemoveProperty(property); + if (maybe_old_value.HasError()) { + switch (maybe_old_value.GetError()) { case storage::Error::DELETED_OBJECT: throw QueryRuntimeException("Trying to remove a property on a deleted graph element."); case storage::Error::SERIALIZATION_ERROR: @@ -2182,6 +2268,11 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext & throw QueryRuntimeException("Unexpected error when removing property."); } } + + if (context.trigger_context) { + context.trigger_context->RegisterRemovedObjectProperty(*record, property, + TypedValue(std::move(*maybe_old_value))); + } }; switch (lhs.type()) { @@ -2234,9 +2325,9 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex); auto &vertex = vertex_value.ValueVertex(); for (auto label : self_.labels_) { - auto maybe_error = vertex.RemoveLabel(label); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_value = vertex.RemoveLabel(label); + if (maybe_value.HasError()) { + switch (maybe_value.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -2247,6 +2338,10 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont throw QueryRuntimeException("Unexpected error when removing labels from a node."); } } + + if (context.trigger_context && *maybe_value) { + context.trigger_context->RegisterRemovedVertexLabel(vertex, label); + } } return true; diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 787bb834b..65ad14dde 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -1,66 +1,297 @@ -#include "query/trigger.hpp" +#include + #include "query/context.hpp" #include "query/cypher_query_interpreter.hpp" #include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" #include "query/interpret/frame.hpp" +#include "query/trigger.hpp" +#include "query/typed_value.hpp" #include "utils/memory.hpp" namespace query { namespace { +// clang-format off std::vector> GetPredefinedIdentifiers() { - return {{{"createdVertices", false}, trigger::IdentifierTag::CREATED_VERTICES}, - {{"deletedVertices", false}, trigger::IdentifierTag::DELETED_VERTICES}}; + return {{{"createdVertices", false}, trigger::IdentifierTag::CREATED_VERTICES }, + {{"createdEdges", false}, trigger::IdentifierTag::CREATED_EDGES }, + {{"deletedVertices", false}, trigger::IdentifierTag::DELETED_VERTICES }, + {{"deletedEdges", false}, trigger::IdentifierTag::DELETED_EDGES }, + {{"assignedVertexProperties", false}, trigger::IdentifierTag::SET_VERTEX_PROPERTIES }, + {{"assignedEdgeProperties", false}, trigger::IdentifierTag::SET_EDGE_PROPERTIES }, + {{"removedVertexProperties", false}, trigger::IdentifierTag::REMOVED_VERTEX_PROPERTIES}, + {{"removedEdgeProperties", false}, trigger::IdentifierTag::REMOVED_EDGE_PROPERTIES }, + {{"assignedVertexLabels", false}, trigger::IdentifierTag::SET_VERTEX_LABELS }, + {{"removedVertexLabels", false}, trigger::IdentifierTag::REMOVED_VERTEX_LABELS }, + {{"updatedVertices", false}, trigger::IdentifierTag::UPDATED_VERTICES }, + {{"updatedEdges", false}, trigger::IdentifierTag::UPDATED_EDGES }, + {{"updatedObjects", false}, trigger::IdentifierTag::UPDATED_OBJECTS }}; +} +// clang-format on + +template +concept WithToMap = requires(const T value, DbAccessor *dba) { + { value.ToMap(dba) } + ->std::same_as>; +}; + +template +TypedValue ToTypedValue(const T &value, DbAccessor *dba) { + return TypedValue{value.ToMap(dba)}; +} + +template +TypedValue ToTypedValue(const TriggerContext::CreatedObject &created_object, + [[maybe_unused]] DbAccessor *dba) { + return TypedValue{created_object.object}; +} + +template +TypedValue ToTypedValue(const TriggerContext::DeletedObject &deleted_object, + [[maybe_unused]] DbAccessor *dba) { + return TypedValue{deleted_object.object}; } template -concept ConvertableToTypedValue = requires(T value) { - {TypedValue{value}}; +concept WithIsValid = requires(const T value) { + { value.IsValid() } + ->std::same_as; }; +template +concept ConvertableToTypedValue = requires(T value, DbAccessor *dba) { + { ToTypedValue(value, dba) } + ->std::same_as; +} +&&WithIsValid; + +template +concept LabelUpdateContext = utils::SameAsAnyOf; + +template +TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) { + std::unordered_map> vertices_by_labels; + + for (const auto &value : values) { + if (value.IsValid()) { + vertices_by_labels[value.label_id].emplace_back(value.object); + } + } + + std::map typed_values; + for (auto &[label_id, vertices] : vertices_by_labels) { + typed_values.emplace(dba->LabelToName(label_id), TypedValue(std::move(vertices))); + } + + return TypedValue(std::move(typed_values)); +} + template -TypedValue ToTypedValue(const std::vector &values) { +TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) requires(!LabelUpdateContext) { std::vector typed_values; typed_values.reserve(values.size()); - std::transform(std::begin(values), std::end(values), std::back_inserter(typed_values), - [](const auto &accessor) { return TypedValue(accessor); }); - return TypedValue(typed_values); + + for (const auto &value : values) { + if (value.IsValid()) { + typed_values.push_back(ToTypedValue(value, dba)); + } + } + + return TypedValue(std::move(typed_values)); +} + +template +const char *TypeToString() { + if constexpr (std::same_as>) { + return "set_vertex_property"; + } else if constexpr (std::same_as>) { + return "set_edge_property"; + } else if constexpr (std::same_as>) { + return "removed_vertex_property"; + } else if constexpr (std::same_as>) { + return "removed_edge_property"; + } else if constexpr (std::same_as) { + return "set_vertex_label"; + } else if constexpr (std::same_as) { + return "removed_vertex_label"; + } +} + +template +concept UpdateContext = WithToMap &&WithIsValid; + +template +TypedValue Updated(DbAccessor *dba, const std::vector &...args) { + const auto size = (args.size() + ...); + std::vector updated; + updated.reserve(size); + + const auto add_to_updated = [&](const std::vector &values) { + for (const auto &value : values) { + if (value.IsValid()) { + auto map = value.ToMap(dba); + map["type"] = TypeToString(); + updated.emplace_back(std::move(map)); + } + } + }; + + (add_to_updated(args), ...); + + return TypedValue(std::move(updated)); } } // namespace -void TriggerContext::RegisterCreatedVertex(const VertexAccessor created_vertex) { - created_vertices_.push_back(created_vertex); +bool TriggerContext::SetVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } + +std::map TriggerContext::SetVertexLabel::ToMap(DbAccessor *dba) const { + return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; } -void TriggerContext::RegisterDeletedVertex(const VertexAccessor deleted_vertex) { - deleted_vertices_.push_back(deleted_vertex); +bool TriggerContext::RemovedVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } + +std::map TriggerContext::RemovedVertexLabel::ToMap(DbAccessor *dba) const { + return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; } -TypedValue TriggerContext::GetTypedValue(const trigger::IdentifierTag tag) const { +void TriggerContext::RegisterSetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) { + set_vertex_labels_.emplace_back(vertex, label_id); +} + +void TriggerContext::RegisterRemovedVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) { + removed_vertex_labels_.emplace_back(vertex, label_id); +} + +TypedValue TriggerContext::GetTypedValue(const trigger::IdentifierTag tag, DbAccessor *dba) const { + const auto &[created_vertices, deleted_vertices, set_vertex_properties, removed_vertex_properties] = vertex_registry_; + const auto &[created_edges, deleted_edges, set_edge_properties, removed_edge_properties] = edge_registry_; + switch (tag) { case trigger::IdentifierTag::CREATED_VERTICES: - return ToTypedValue(created_vertices_); + return ToTypedValue(created_vertices, dba); + + case trigger::IdentifierTag::CREATED_EDGES: + return ToTypedValue(created_edges, dba); + case trigger::IdentifierTag::DELETED_VERTICES: - return ToTypedValue(deleted_vertices_); + return ToTypedValue(deleted_vertices, dba); + + case trigger::IdentifierTag::DELETED_EDGES: + return ToTypedValue(deleted_edges, dba); + + case trigger::IdentifierTag::SET_VERTEX_PROPERTIES: + return ToTypedValue(set_vertex_properties, dba); + + case trigger::IdentifierTag::SET_EDGE_PROPERTIES: + return ToTypedValue(set_edge_properties, dba); + + case trigger::IdentifierTag::REMOVED_VERTEX_PROPERTIES: + return ToTypedValue(removed_vertex_properties, dba); + + case trigger::IdentifierTag::REMOVED_EDGE_PROPERTIES: + return ToTypedValue(removed_edge_properties, dba); + + case trigger::IdentifierTag::SET_VERTEX_LABELS: + return ToTypedValue(set_vertex_labels_, dba); + + case trigger::IdentifierTag::REMOVED_VERTEX_LABELS: + return ToTypedValue(removed_vertex_labels_, dba); + + case trigger::IdentifierTag::UPDATED_VERTICES: + return Updated(dba, set_vertex_properties, removed_vertex_properties, set_vertex_labels_, removed_vertex_labels_); + + case trigger::IdentifierTag::UPDATED_EDGES: + return Updated(dba, set_edge_properties, removed_edge_properties); + + case trigger::IdentifierTag::UPDATED_OBJECTS: + return Updated(dba, set_vertex_properties, set_edge_properties, removed_vertex_properties, + removed_edge_properties, set_vertex_labels_, removed_vertex_labels_); } } void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { + auto &[created_vertices, deleted_vertices, set_vertex_properties, removed_vertex_properties] = vertex_registry_; // adapt created_vertices_ - auto it = created_vertices_.begin(); - for (const auto &created_vertex : created_vertices_) { - if (auto maybe_vertex = accessor->FindVertex(created_vertex.Gid(), storage::View::OLD); maybe_vertex) { - *it = *maybe_vertex; - ++it; + { + auto it = created_vertices.begin(); + for (const auto &created_vertex : created_vertices) { + if (auto maybe_vertex = accessor->FindVertex(created_vertex.object.Gid(), storage::View::OLD); maybe_vertex) { + *it = CreatedObject{*maybe_vertex}; + ++it; + } } + created_vertices.erase(it, created_vertices.end()); } - created_vertices_.erase(it, created_vertices_.end()); // deleted_vertices_ should keep the transaction context of the transaction which deleted it // because no other transaction can modify an object after it's deleted so it should be the // latest state of the object + + const auto adapt_context_with_vertex = [accessor](auto *values) { + auto it = values->begin(); + for (auto &value : *values) { + if (auto maybe_vertex = accessor->FindVertex(value.object.Gid(), storage::View::OLD); maybe_vertex) { + *it = std::move(value); + it->object = *maybe_vertex; + ++it; + } + } + values->erase(it, values->end()); + }; + + adapt_context_with_vertex(&set_vertex_properties); + adapt_context_with_vertex(&removed_vertex_properties); + adapt_context_with_vertex(&set_vertex_labels_); + adapt_context_with_vertex(&removed_vertex_labels_); + + auto &[created_edges, deleted_edges, set_edge_properties, removed_edge_properties] = edge_registry_; + // adapt created_edges + { + auto it = created_edges.begin(); + for (const auto &created_edge : created_edges) { + if (auto maybe_vertex = accessor->FindVertex(created_edge.object.From().Gid(), storage::View::OLD); + maybe_vertex) { + auto maybe_out_edges = maybe_vertex->OutEdges(storage::View::OLD); + MG_ASSERT(maybe_out_edges.HasValue()); + for (const auto &edge : *maybe_out_edges) { + if (edge.Gid() == created_edge.object.Gid()) { + *it = CreatedObject{edge}; + ++it; + break; + } + } + } + } + created_edges.erase(it, created_edges.end()); + } + + // deleted_edges_ should keep the transaction context of the transaction which deleted it + // because no other transaction can modify an object after it's deleted so it should be the + // latest state of the object + + const auto adapt_context_with_edge = [accessor](auto *values) { + auto it = values->begin(); + for (const auto &value : *values) { + if (auto maybe_vertex = accessor->FindVertex(value.object.From().Gid(), storage::View::OLD); maybe_vertex) { + auto maybe_out_edges = maybe_vertex->OutEdges(storage::View::OLD); + MG_ASSERT(maybe_out_edges.HasValue()); + for (const auto &edge : *maybe_out_edges) { + if (edge.Gid() == value.object.Gid()) { + *it = std::move(value); + it->object = edge; + ++it; + break; + } + } + } + } + values->erase(it, values->end()); + }; + + adapt_context_with_edge(&set_edge_properties); + adapt_context_with_edge(&removed_edge_properties); } Trigger::Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, @@ -142,7 +373,7 @@ void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution continue; } - frame[plan.symbol_table().at(identifier)] = context.GetTypedValue(tag); + frame[plan.symbol_table().at(identifier)] = context.GetTypedValue(tag, dba); } while (cursor->Pull(frame, ctx)) diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index a2bbd6865..9689b7775 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -1,27 +1,197 @@ #pragma once +#include +#include + #include "query/cypher_query_interpreter.hpp" +#include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" +#include "query/typed_value.hpp" +#include "utils/concepts.hpp" namespace query { namespace trigger { -enum class IdentifierTag : uint8_t { CREATED_VERTICES, DELETED_VERTICES }; +enum class IdentifierTag : uint8_t { + CREATED_VERTICES, + CREATED_EDGES, + DELETED_VERTICES, + DELETED_EDGES, + SET_VERTEX_PROPERTIES, + SET_EDGE_PROPERTIES, + REMOVED_VERTEX_PROPERTIES, + REMOVED_EDGE_PROPERTIES, + SET_VERTEX_LABELS, + REMOVED_VERTEX_LABELS, + UPDATED_VERTICES, + UPDATED_EDGES, + UPDATED_OBJECTS +}; } // namespace trigger +namespace detail { +template +concept ObjectAccessor = utils::SameAsAnyOf; + +template +const char *ObjectString() { + if constexpr (std::same_as) { + return "vertex"; + } else { + return "edge"; + } +} +} // namespace detail + struct TriggerContext { - void RegisterCreatedVertex(VertexAccessor created_vertex); - void RegisterDeletedVertex(VertexAccessor deleted_vertex); + static_assert(std::is_trivially_copy_constructible_v, + "VertexAccessor is not trivially copy constructible, move it where possible and remove this assert"); + static_assert(std::is_trivially_copy_constructible_v, + "EdgeAccessor is not trivially copy constructible, move it where possible and remove this asssert"); + + template + void RegisterCreatedObject(const TAccessor &created_object) { + GetRegistry().created_objects_.emplace_back(created_object); + } + + template + void RegisterDeletedObject(const TAccessor &deleted_object) { + GetRegistry().deleted_objects_.emplace_back(deleted_object); + } + + template + void RegisterSetObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value, + TypedValue new_value) { + if (new_value.IsNull()) { + RegisterRemovedObjectProperty(object, key, std::move(old_value)); + return; + } + + GetRegistry().set_object_properties_.emplace_back(object, key, std::move(old_value), + std::move(new_value)); + } + + template + void RegisterRemovedObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value) { + // property is already removed + if (old_value.IsNull()) { + return; + } + + GetRegistry().removed_object_properties_.emplace_back(object, key, std::move(old_value)); + } + + void RegisterSetVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); + void RegisterRemovedVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); // Adapt the TriggerContext object inplace for a different DbAccessor // (each derived accessor, e.g. VertexAccessor, gets adapted // to the sent DbAccessor so they can be used safely) void AdaptForAccessor(DbAccessor *accessor); - TypedValue GetTypedValue(trigger::IdentifierTag tag) const; + TypedValue GetTypedValue(trigger::IdentifierTag tag, DbAccessor *dba) const; + + template + struct CreatedObject { + explicit CreatedObject(const TAccessor &object) : object{object} {} + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + }; + + template + struct DeletedObject { + explicit DeletedObject(const TAccessor &object) : object{object} {} + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + }; + + template + struct SetObjectProperty { + explicit SetObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value, + TypedValue new_value) + : object{object}, key{key}, old_value{std::move(old_value)}, new_value{std::move(new_value)} {} + + std::map ToMap(DbAccessor *dba) const { + return {{detail::ObjectString(), TypedValue{object}}, + {"key", TypedValue{dba->PropertyToName(key)}}, + {"old", old_value}, + {"new", new_value}}; + } + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + storage::PropertyId key; + TypedValue old_value; + TypedValue new_value; + }; + + template + struct RemovedObjectProperty { + explicit RemovedObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value) + : object{object}, key{key}, old_value{std::move(old_value)} {} + + std::map ToMap(DbAccessor *dba) const { + return {{detail::ObjectString(), TypedValue{object}}, + {"key", TypedValue{dba->PropertyToName(key)}}, + {"old", old_value}}; + } + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + storage::PropertyId key; + TypedValue old_value; + }; + + struct SetVertexLabel { + explicit SetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) + : object{vertex}, label_id{label_id} {} + + std::map ToMap(DbAccessor *dba) const; + bool IsValid() const; + + VertexAccessor object; + storage::LabelId label_id; + }; + + struct RemovedVertexLabel { + explicit RemovedVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) + : object{vertex}, label_id{label_id} {} + + std::map ToMap(DbAccessor *dba) const; + bool IsValid() const; + + VertexAccessor object; + storage::LabelId label_id; + }; private: - std::vector created_vertices_; - std::vector deleted_vertices_; + template + struct Registry { + std::vector> created_objects_; + std::vector> deleted_objects_; + std::vector> set_object_properties_; + std::vector> removed_object_properties_; + }; + + Registry vertex_registry_; + Registry edge_registry_; + + template + Registry &GetRegistry() { + if constexpr (std::same_as) { + return vertex_registry_; + } else { + return edge_registry_; + } + } + + std::vector set_vertex_labels_; + std::vector removed_vertex_labels_; }; struct Trigger { diff --git a/src/storage/v2/edge_accessor.cpp b/src/storage/v2/edge_accessor.cpp index 2dd29b759..1b1491f99 100644 --- a/src/storage/v2/edge_accessor.cpp +++ b/src/storage/v2/edge_accessor.cpp @@ -3,11 +3,45 @@ #include #include "storage/v2/mvcc.hpp" +#include "storage/v2/property_value.hpp" #include "storage/v2/vertex_accessor.hpp" #include "utils/memory_tracker.hpp" namespace storage { +bool EdgeAccessor::IsVisible(const View view) const { + bool deleted = true; + bool exists = true; + Delta *delta = nullptr; + { + std::lock_guard guard(edge_.ptr->lock); + deleted = edge_.ptr->deleted; + delta = edge_.ptr->delta; + } + ApplyDeltasForRead(transaction_, delta, view, [&](const Delta &delta) { + switch (delta.action) { + case Delta::Action::ADD_LABEL: + case Delta::Action::REMOVE_LABEL: + case Delta::Action::SET_PROPERTY: + case Delta::Action::ADD_IN_EDGE: + case Delta::Action::ADD_OUT_EDGE: + case Delta::Action::REMOVE_IN_EDGE: + case Delta::Action::REMOVE_OUT_EDGE: + break; + case Delta::Action::RECREATE_OBJECT: { + deleted = false; + break; + } + case Delta::Action::DELETE_OBJECT: { + exists = false; + break; + } + } + }); + + return exists && (for_deleted_ || !deleted); +} + VertexAccessor EdgeAccessor::FromVertex() const { return VertexAccessor{from_vertex_, transaction_, indices_, constraints_, config_}; } @@ -16,7 +50,7 @@ VertexAccessor EdgeAccessor::ToVertex() const { return VertexAccessor{to_vertex_, transaction_, indices_, constraints_, config_}; } -Result EdgeAccessor::SetProperty(PropertyId property, const PropertyValue &value) { +Result EdgeAccessor::SetProperty(PropertyId property, const PropertyValue &value) { utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; if (!config_.properties_on_edges) return Error::PROPERTIES_DISABLED; @@ -27,20 +61,19 @@ Result EdgeAccessor::SetProperty(PropertyId property, const PropertyValue if (edge_.ptr->deleted) return Error::DELETED_OBJECT; auto current_value = edge_.ptr->properties.GetProperty(property); - bool existed = !current_value.IsNull(); // We could skip setting the value if the previous one is the same to the new // one. This would save some memory as a delta would not be created as well as // avoid copying the value. The reason we are not doing that is because the // current code always follows the logical pattern of "create a delta" and // "modify in-place". Additionally, the created delta will make other // transactions get a SERIALIZATION_ERROR. - CreateAndLinkDelta(transaction_, edge_.ptr, Delta::SetPropertyTag(), property, std::move(current_value)); + CreateAndLinkDelta(transaction_, edge_.ptr, Delta::SetPropertyTag(), property, current_value); edge_.ptr->properties.SetProperty(property, value); - return !existed; + return std::move(current_value); } -Result EdgeAccessor::ClearProperties() { +Result> EdgeAccessor::ClearProperties() { if (!config_.properties_on_edges) return Error::PROPERTIES_DISABLED; std::lock_guard guard(edge_.ptr->lock); @@ -50,14 +83,13 @@ Result EdgeAccessor::ClearProperties() { if (edge_.ptr->deleted) return Error::DELETED_OBJECT; auto properties = edge_.ptr->properties.Properties(); - bool removed = !properties.empty(); for (const auto &property : properties) { CreateAndLinkDelta(transaction_, edge_.ptr, Delta::SetPropertyTag(), property.first, property.second); } edge_.ptr->properties.ClearProperties(); - return removed; + return std::move(properties); } Result EdgeAccessor::GetProperty(PropertyId property, View view) const { @@ -98,7 +130,7 @@ Result EdgeAccessor::GetProperty(PropertyId property, View view) } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(value); } @@ -149,7 +181,7 @@ Result> EdgeAccessor::Properties(View view) } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(properties); } diff --git a/src/storage/v2/edge_accessor.hpp b/src/storage/v2/edge_accessor.hpp index e90565fcf..261a5a60e 100644 --- a/src/storage/v2/edge_accessor.hpp +++ b/src/storage/v2/edge_accessor.hpp @@ -23,7 +23,7 @@ class EdgeAccessor final { public: EdgeAccessor(EdgeRef edge, EdgeTypeId edge_type, Vertex *from_vertex, Vertex *to_vertex, Transaction *transaction, - Indices *indices, Constraints *constraints, Config::Items config) + Indices *indices, Constraints *constraints, Config::Items config, bool for_deleted = false) : edge_(edge), edge_type_(edge_type), from_vertex_(from_vertex), @@ -31,7 +31,11 @@ class EdgeAccessor final { transaction_(transaction), indices_(indices), constraints_(constraints), - config_(config) {} + config_(config), + for_deleted_(for_deleted) {} + + /// @return true if the object is visible from the current transaction + bool IsVisible(View view) const; VertexAccessor FromVertex() const; @@ -39,15 +43,13 @@ class EdgeAccessor final { EdgeTypeId EdgeType() const { return edge_type_; } - /// Set a property value and return `true` if insertion took place. - /// `false` is returned if assignment took place. + /// Set a property value and return the old value. /// @throw std::bad_alloc - Result SetProperty(PropertyId property, const PropertyValue &value); + Result SetProperty(PropertyId property, const PropertyValue &value); - /// Remove all properties and return `true` if any removal took place. - /// `false` is returned if there were no properties to remove. + /// Remove all properties and return old values for each removed property. /// @throw std::bad_alloc - Result ClearProperties(); + Result> ClearProperties(); /// @throw std::bad_alloc Result GetProperty(PropertyId property, View view) const; @@ -79,6 +81,14 @@ class EdgeAccessor final { Indices *indices_; Constraints *constraints_; Config::Items config_; + + // if the accessor was created for a deleted edge. + // Accessor behaves differently for some methods based on this + // flag. + // E.g. If this field is set to true, GetProperty will return the property of the edge + // even though the edge is deleted. + // All the write operations will still return an error if it's called for a deleted edge. + bool for_deleted_{false}; }; } // namespace storage diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index 0c03a0bdf..8856d6628 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -18,6 +18,7 @@ #include "storage/v2/mvcc.hpp" #include "storage/v2/replication/config.hpp" #include "storage/v2/transaction.hpp" +#include "storage/v2/vertex_accessor.hpp" #include "utils/file.hpp" #include "utils/logging.hpp" #include "utils/memory_tracker.hpp" @@ -465,7 +466,7 @@ Result> Storage::Accessor::DeleteVertex(VertexAcce MG_ASSERT(vertex->transaction_ == &transaction_, "VertexAccessor must be from the same transaction as the storage " "accessor when deleting a vertex!"); - auto vertex_ptr = vertex->vertex_; + auto *vertex_ptr = vertex->vertex_; std::lock_guard guard(vertex_ptr->lock); @@ -484,11 +485,14 @@ Result> Storage::Accessor::DeleteVertex(VertexAcce config_, true); } -Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { +Result>>> Storage::Accessor::DetachDeleteVertex( + VertexAccessor *vertex) { + using ReturnType = std::pair>; + MG_ASSERT(vertex->transaction_ == &transaction_, "VertexAccessor must be from the same transaction as the storage " "accessor when deleting a vertex!"); - auto vertex_ptr = vertex->vertex_; + auto *vertex_ptr = vertex->vertex_; std::vector> in_edges; std::vector> out_edges; @@ -498,12 +502,13 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { if (!PrepareForWrite(&transaction_, vertex_ptr)) return Error::SERIALIZATION_ERROR; - if (vertex_ptr->deleted) return false; + if (vertex_ptr->deleted) return std::optional{}; in_edges = vertex_ptr->in_edges; out_edges = vertex_ptr->out_edges; } + std::vector deleted_edges; for (const auto &item : in_edges) { auto [edge_type, from_vertex, edge] = item; EdgeAccessor e(edge, edge_type, from_vertex, vertex_ptr, &transaction_, &storage_->indices_, @@ -511,7 +516,11 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { auto ret = DeleteEdge(&e); if (ret.HasError()) { MG_ASSERT(ret.GetError() == Error::SERIALIZATION_ERROR, "Invalid database state!"); - return ret; + return ret.GetError(); + } + + if (ret.GetValue()) { + deleted_edges.push_back(*ret.GetValue()); } } for (const auto &item : out_edges) { @@ -521,7 +530,11 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { auto ret = DeleteEdge(&e); if (ret.HasError()) { MG_ASSERT(ret.GetError() == Error::SERIALIZATION_ERROR, "Invalid database state!"); - return ret; + return ret.GetError(); + } + + if (ret.GetValue()) { + deleted_edges.push_back(*ret.GetValue()); } } @@ -538,7 +551,9 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { CreateAndLinkDelta(&transaction_, vertex_ptr, Delta::RecreateObjectTag()); vertex_ptr->deleted = true; - return true; + return std::make_optional( + VertexAccessor{vertex_ptr, &transaction_, &storage_->indices_, &storage_->constraints_, config_, true}, + std::move(deleted_edges)); } Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type) { @@ -668,7 +683,7 @@ Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexA &storage_->constraints_, config_); } -Result Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { +Result> Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { MG_ASSERT(edge->transaction_ == &transaction_, "EdgeAccessor must be from the same transaction as the storage " "accessor when deleting an edge!"); @@ -682,11 +697,11 @@ Result Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { if (!PrepareForWrite(&transaction_, edge_ptr)) return Error::SERIALIZATION_ERROR; - if (edge_ptr->deleted) return false; + if (edge_ptr->deleted) return std::optional{}; } - auto from_vertex = edge->from_vertex_; - auto to_vertex = edge->to_vertex_; + auto *from_vertex = edge->from_vertex_; + auto *to_vertex = edge->to_vertex_; // Obtain the locks by `gid` order to avoid lock cycles. std::unique_lock guard_from(from_vertex->lock, std::defer_lock); @@ -732,12 +747,12 @@ Result Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { MG_ASSERT((op1 && op2) || (!op1 && !op2), "Invalid database state!"); if (!op1 && !op2) { // The edge is already deleted. - return false; + return std::optional{}; } } if (config_.properties_on_edges) { - auto edge_ptr = edge_ref.ptr; + auto *edge_ptr = edge_ref.ptr; CreateAndLinkDelta(&transaction_, edge_ptr, Delta::RecreateObjectTag()); edge_ptr->deleted = true; } @@ -748,7 +763,8 @@ Result Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { // Decrement edge count. storage_->edge_count_.fetch_add(-1, std::memory_order_acq_rel); - return true; + return std::make_optional(edge_ref, edge_type, from_vertex, to_vertex, &transaction_, + &storage_->indices_, &storage_->constraints_, config_, true); } const std::string &Storage::Accessor::LabelToName(LabelId label) const { return storage_->LabelToName(label); } diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 7c3192a30..f61ff5546 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "io/network/endpoint.hpp" #include "storage/v2/commit_log.hpp" @@ -249,17 +250,21 @@ class Storage final { return storage_->indices_.label_property_index.ApproximateVertexCount(label, property, lower, upper); } + /// @return Accessor to the deleted vertex if a deletion took place, std::nullopt otherwise /// @throw std::bad_alloc Result> DeleteVertex(VertexAccessor *vertex); + /// @return Accessor to the deleted vertex and deleted edges if a deletion took place, std::nullopt otherwise /// @throw std::bad_alloc - Result DetachDeleteVertex(VertexAccessor *vertex); + Result>>> DetachDeleteVertex( + VertexAccessor *vertex); /// @throw std::bad_alloc Result CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type); + /// Accessor to the deleted edge if a deletion took place, std::nullopt otherwise /// @throw std::bad_alloc - Result DeleteEdge(EdgeAccessor *edge); + Result> DeleteEdge(EdgeAccessor *edge); const std::string &LabelToName(LabelId label) const; const std::string &PropertyToName(PropertyId property) const; diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index 01c42e202..4b749e9ab 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -6,21 +6,24 @@ #include "storage/v2/id_types.hpp" #include "storage/v2/indices.hpp" #include "storage/v2/mvcc.hpp" +#include "storage/v2/property_value.hpp" #include "utils/logging.hpp" #include "utils/memory_tracker.hpp" namespace storage { -std::optional VertexAccessor::Create(Vertex *vertex, Transaction *transaction, Indices *indices, - Constraints *constraints, Config::Items config, View view) { - bool is_visible = true; +namespace detail { +namespace { +std::pair IsVisible(Vertex *vertex, Transaction *transaction, View view) { + bool exists = true; + bool deleted = false; Delta *delta = nullptr; { std::lock_guard guard(vertex->lock); - is_visible = !vertex->deleted; + deleted = vertex->deleted; delta = vertex->delta; } - ApplyDeltasForRead(transaction, delta, view, [&is_visible](const Delta &delta) { + ApplyDeltasForRead(transaction, delta, view, [&](const Delta &delta) { switch (delta.action) { case Delta::Action::ADD_LABEL: case Delta::Action::REMOVE_LABEL: @@ -31,19 +34,35 @@ std::optional VertexAccessor::Create(Vertex *vertex, Transaction case Delta::Action::REMOVE_OUT_EDGE: break; case Delta::Action::RECREATE_OBJECT: { - is_visible = true; + deleted = false; break; } case Delta::Action::DELETE_OBJECT: { - is_visible = false; + exists = false; break; } } }); - if (!is_visible) return std::nullopt; + + return {exists, deleted}; +} +} // namespace +} // namespace detail + +std::optional VertexAccessor::Create(Vertex *vertex, Transaction *transaction, Indices *indices, + Constraints *constraints, Config::Items config, View view) { + if (const auto [exists, deleted] = detail::IsVisible(vertex, transaction, view); !exists || deleted) { + return std::nullopt; + } + return VertexAccessor{vertex, transaction, indices, constraints, config}; } +bool VertexAccessor::IsVisible(View view) const { + const auto [exists, deleted] = detail::IsVisible(vertex_, transaction_, view); + return exists && (for_deleted_ || !deleted); +} + Result VertexAccessor::AddLabel(LabelId label) { utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; std::lock_guard guard(vertex_->lock); @@ -177,7 +196,7 @@ Result> VertexAccessor::Labels(View view) const { return std::move(labels); } -Result VertexAccessor::SetProperty(PropertyId property, const PropertyValue &value) { +Result VertexAccessor::SetProperty(PropertyId property, const PropertyValue &value) { utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; std::lock_guard guard(vertex_->lock); @@ -186,22 +205,21 @@ Result VertexAccessor::SetProperty(PropertyId property, const PropertyValu if (vertex_->deleted) return Error::DELETED_OBJECT; auto current_value = vertex_->properties.GetProperty(property); - bool existed = !current_value.IsNull(); // We could skip setting the value if the previous one is the same to the new // one. This would save some memory as a delta would not be created as well as // avoid copying the value. The reason we are not doing that is because the // current code always follows the logical pattern of "create a delta" and // "modify in-place". Additionally, the created delta will make other // transactions get a SERIALIZATION_ERROR. - CreateAndLinkDelta(transaction_, vertex_, Delta::SetPropertyTag(), property, std::move(current_value)); + CreateAndLinkDelta(transaction_, vertex_, Delta::SetPropertyTag(), property, current_value); vertex_->properties.SetProperty(property, value); UpdateOnSetProperty(indices_, property, value, vertex_, *transaction_); - return !existed; + return std::move(current_value); } -Result VertexAccessor::ClearProperties() { +Result> VertexAccessor::ClearProperties() { std::lock_guard guard(vertex_->lock); if (!PrepareForWrite(transaction_, vertex_)) return Error::SERIALIZATION_ERROR; @@ -209,7 +227,6 @@ Result VertexAccessor::ClearProperties() { if (vertex_->deleted) return Error::DELETED_OBJECT; auto properties = vertex_->properties.Properties(); - bool removed = !properties.empty(); for (const auto &property : properties) { CreateAndLinkDelta(transaction_, vertex_, Delta::SetPropertyTag(), property.first, property.second); UpdateOnSetProperty(indices_, property.first, PropertyValue(), vertex_, *transaction_); @@ -217,7 +234,7 @@ Result VertexAccessor::ClearProperties() { vertex_->properties.ClearProperties(); - return removed; + return std::move(properties); } Result VertexAccessor::GetProperty(PropertyId property, View view) const { diff --git a/src/storage/v2/vertex_accessor.hpp b/src/storage/v2/vertex_accessor.hpp index 4bd6c4cf9..a24f02a23 100644 --- a/src/storage/v2/vertex_accessor.hpp +++ b/src/storage/v2/vertex_accessor.hpp @@ -33,6 +33,9 @@ class VertexAccessor final { static std::optional Create(Vertex *vertex, Transaction *transaction, Indices *indices, Constraints *constraints, Config::Items config, View view); + /// @return true if the object is visible from the current transaction + bool IsVisible(View view) const; + /// Add a label and return `true` if insertion took place. /// `false` is returned if the label already existed. /// @throw std::bad_alloc @@ -50,15 +53,13 @@ class VertexAccessor final { /// std::vector::max_size(). Result> Labels(View view) const; - /// Set a property value and return `true` if insertion took place. - /// `false` is returned if assignment took place. + /// Set a property value and return the old value. /// @throw std::bad_alloc - Result SetProperty(PropertyId property, const PropertyValue &value); + Result SetProperty(PropertyId property, const PropertyValue &value); - /// Remove all properties and return `true` if any removal took place. - /// `false` is returned if there were no properties to remove. + /// Remove all properties and return the values of the removed properties. /// @throw std::bad_alloc - Result ClearProperties(); + Result> ClearProperties(); /// @throw std::bad_alloc Result GetProperty(PropertyId property, View view) const; diff --git a/src/utils/concepts.hpp b/src/utils/concepts.hpp new file mode 100644 index 000000000..37365fe98 --- /dev/null +++ b/src/utils/concepts.hpp @@ -0,0 +1,7 @@ +#pragma once +#include + +namespace utils { +template +concept SameAsAnyOf = (std::same_as || ...); +} // namespace utils diff --git a/tests/concurrent/storage_indices.cpp b/tests/concurrent/storage_indices.cpp index 5494fd1ed..792ef2937 100644 --- a/tests/concurrent/storage_indices.cpp +++ b/tests/concurrent/storage_indices.cpp @@ -120,9 +120,9 @@ TEST(Storage, LabelPropertyIndex) { ASSERT_TRUE(*ret); } { - auto ret = vertex.SetProperty(prop, storage::PropertyValue(vertex.Gid().AsInt())); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(*ret); + auto old_value = vertex.SetProperty(prop, storage::PropertyValue(vertex.Gid().AsInt())); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_FALSE(acc.Commit().HasError()); } @@ -164,9 +164,9 @@ TEST(Storage, LabelPropertyIndex) { ASSERT_TRUE(*ret); } { - auto ret = vertex.SetProperty(prop, storage::PropertyValue(vertex.Gid().AsInt())); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(*ret); + auto old_value = vertex.SetProperty(prop, storage::PropertyValue(vertex.Gid().AsInt())); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_FALSE(acc.Commit().HasError()); } diff --git a/tests/unit/storage_v2.cpp b/tests/unit/storage_v2.cpp index 62c3dd76b..db9082bd7 100644 --- a/tests/unit/storage_v2.cpp +++ b/tests/unit/storage_v2.cpp @@ -754,7 +754,7 @@ TEST(StorageV2, VertexDeleteProperty) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); // Set property 5 to "nandare" - ASSERT_TRUE(vertex->SetProperty(property, storage::PropertyValue("nandare")).GetValue()); + ASSERT_TRUE(vertex->SetProperty(property, storage::PropertyValue("nandare"))->IsNull()); // Check whether property 5 exists ASSERT_TRUE(vertex->GetProperty(property, storage::View::OLD)->IsNull()); @@ -801,7 +801,7 @@ TEST(StorageV2, VertexDeleteProperty) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); // Set property 5 to "nandare" - ASSERT_TRUE(vertex->SetProperty(property, storage::PropertyValue("nandare")).GetValue()); + ASSERT_TRUE(vertex->SetProperty(property, storage::PropertyValue("nandare"))->IsNull()); // Check whether property 5 exists ASSERT_TRUE(vertex->GetProperty(property, storage::View::OLD)->IsNull()); @@ -1349,9 +1349,9 @@ TEST(StorageV2, VertexPropertyCommit) { ASSERT_EQ(vertex.Properties(storage::View::NEW)->size(), 0); { - auto res = vertex.SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex.SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(vertex.GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -1362,9 +1362,9 @@ TEST(StorageV2, VertexPropertyCommit) { } { - auto res = vertex.SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex.SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex.GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -1412,9 +1412,9 @@ TEST(StorageV2, VertexPropertyCommit) { auto property = acc.NameToProperty("property5"); { - auto res = vertex->SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -1428,9 +1428,9 @@ TEST(StorageV2, VertexPropertyCommit) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); { - auto res = vertex->SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_FALSE(acc.Commit().HasError()); @@ -1481,9 +1481,9 @@ TEST(StorageV2, VertexPropertyAbort) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); { - auto res = vertex->SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -1494,9 +1494,9 @@ TEST(StorageV2, VertexPropertyAbort) { } { - auto res = vertex->SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -1542,9 +1542,9 @@ TEST(StorageV2, VertexPropertyAbort) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); { - auto res = vertex->SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -1555,9 +1555,9 @@ TEST(StorageV2, VertexPropertyAbort) { } { - auto res = vertex->SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -1623,9 +1623,9 @@ TEST(StorageV2, VertexPropertyAbort) { } { - auto res = vertex->SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -1694,9 +1694,9 @@ TEST(StorageV2, VertexPropertyAbort) { } { - auto res = vertex->SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -1764,9 +1764,9 @@ TEST(StorageV2, VertexPropertySerializationError) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); { - auto res = vertex->SetProperty(property1, storage::PropertyValue(123)); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property1, storage::PropertyValue(123)); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::OLD)->IsNull()); @@ -1886,7 +1886,7 @@ TEST(StorageV2, VertexLabelPropertyMixed) { ASSERT_EQ(vertex.Properties(storage::View::NEW)->size(), 0); // Set property 5 to "nandare" - ASSERT_TRUE(vertex.SetProperty(property, storage::PropertyValue("nandare")).GetValue()); + ASSERT_TRUE(vertex.SetProperty(property, storage::PropertyValue("nandare"))->IsNull()); // Check whether label 5 and property 5 exist ASSERT_TRUE(vertex.HasLabel(label, storage::View::OLD).GetValue()); @@ -1940,7 +1940,7 @@ TEST(StorageV2, VertexLabelPropertyMixed) { } // Set property 5 to "haihai" - ASSERT_FALSE(vertex.SetProperty(property, storage::PropertyValue("haihai")).GetValue()); + ASSERT_FALSE(vertex.SetProperty(property, storage::PropertyValue("haihai"))->IsNull()); // Check whether label 5 and property 5 exist ASSERT_TRUE(vertex.HasLabel(label, storage::View::OLD).GetValue()); @@ -2044,7 +2044,7 @@ TEST(StorageV2, VertexLabelPropertyMixed) { } // Set property 5 to null - ASSERT_FALSE(vertex.SetProperty(property, storage::PropertyValue()).GetValue()); + ASSERT_FALSE(vertex.SetProperty(property, storage::PropertyValue())->IsNull()); // Check whether label 5 and property 5 exist ASSERT_FALSE(vertex.HasLabel(label, storage::View::OLD).GetValue()); @@ -2086,9 +2086,9 @@ TEST(StorageV2, VertexPropertyClear) { auto vertex = acc.CreateVertex(); gid = vertex.Gid(); - auto res = vertex.SetProperty(property1, storage::PropertyValue("value")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex.SetProperty(property1, storage::PropertyValue("value")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); ASSERT_FALSE(acc.Commit().HasError()); } @@ -2103,9 +2103,9 @@ TEST(StorageV2, VertexPropertyClear) { UnorderedElementsAre(std::pair(property1, storage::PropertyValue("value")))); { - auto ret = vertex->ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + auto old_values = vertex->ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_FALSE(old_values->empty()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::NEW)->IsNull()); @@ -2113,9 +2113,9 @@ TEST(StorageV2, VertexPropertyClear) { ASSERT_EQ(vertex->Properties(storage::View::NEW).GetValue().size(), 0); { - auto ret = vertex->ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_FALSE(ret.GetValue()); + auto old_values = vertex->ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_TRUE(old_values->empty()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::NEW)->IsNull()); @@ -2129,9 +2129,9 @@ TEST(StorageV2, VertexPropertyClear) { auto vertex = acc.FindVertex(gid, storage::View::OLD); ASSERT_TRUE(vertex); - auto res = vertex->SetProperty(property2, storage::PropertyValue(42)); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property2, storage::PropertyValue(42)); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); ASSERT_FALSE(acc.Commit().HasError()); } @@ -2147,9 +2147,9 @@ TEST(StorageV2, VertexPropertyClear) { std::pair(property2, storage::PropertyValue(42)))); { - auto ret = vertex->ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + auto old_values = vertex->ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_FALSE(old_values->empty()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::NEW)->IsNull()); @@ -2157,9 +2157,9 @@ TEST(StorageV2, VertexPropertyClear) { ASSERT_EQ(vertex->Properties(storage::View::NEW).GetValue().size(), 0); { - auto ret = vertex->ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_FALSE(ret.GetValue()); + auto old_values = vertex->ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_TRUE(old_values->empty()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::NEW)->IsNull()); diff --git a/tests/unit/storage_v2_edge.cpp b/tests/unit/storage_v2_edge.cpp index 2b6bcf421..89eff1e7d 100644 --- a/tests/unit/storage_v2_edge.cpp +++ b/tests/unit/storage_v2_edge.cpp @@ -3324,7 +3324,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteSingleCommit) { { auto ret = acc.DetachDeleteVertex(&*vertex_from); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -3543,7 +3543,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteMultipleCommit) { { auto ret = acc.DetachDeleteVertex(&*vertex1); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -3791,7 +3791,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteSingleAbort) { { auto ret = acc.DetachDeleteVertex(&*vertex_from); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -3895,7 +3895,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteSingleAbort) { { auto ret = acc.DetachDeleteVertex(&*vertex_from); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -4114,7 +4114,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteMultipleAbort) { { auto ret = acc.DetachDeleteVertex(&*vertex1); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -4430,7 +4430,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteMultipleAbort) { { auto ret = acc.DetachDeleteVertex(&*vertex1); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -4622,9 +4622,9 @@ TEST(StorageWithProperties, EdgePropertyCommit) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -4635,9 +4635,9 @@ TEST(StorageWithProperties, EdgePropertyCommit) { } { - auto res = edge.SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -4687,9 +4687,9 @@ TEST(StorageWithProperties, EdgePropertyCommit) { auto property = acc.NameToProperty("property5"); { - auto res = edge.SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -4703,9 +4703,9 @@ TEST(StorageWithProperties, EdgePropertyCommit) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_FALSE(acc.Commit().HasError()); @@ -4763,9 +4763,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -4776,9 +4776,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { } { - auto res = edge.SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -4826,9 +4826,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -4839,9 +4839,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { } { - auto res = edge.SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -4909,9 +4909,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { } { - auto res = edge.SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -4982,9 +4982,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { } { - auto res = edge.SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -5059,9 +5059,9 @@ TEST(StorageWithProperties, EdgePropertySerializationError) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property1, storage::PropertyValue(123)); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property1, storage::PropertyValue(123)); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::OLD)->IsNull()); @@ -5148,9 +5148,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { ASSERT_EQ(edge.FromVertex(), vertex); ASSERT_EQ(edge.ToVertex(), vertex); - auto res = edge.SetProperty(property1, storage::PropertyValue("value")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property1, storage::PropertyValue("value")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); ASSERT_FALSE(acc.Commit().HasError()); } @@ -5166,9 +5166,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { UnorderedElementsAre(std::pair(property1, storage::PropertyValue("value")))); { - auto ret = edge.ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + auto old_values = edge.ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_FALSE(old_values->empty()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::NEW)->IsNull()); @@ -5176,9 +5176,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { ASSERT_EQ(edge.Properties(storage::View::NEW).GetValue().size(), 0); { - auto ret = edge.ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_FALSE(ret.GetValue()); + auto old_values = edge.ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_TRUE(old_values->empty()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::NEW)->IsNull()); @@ -5193,9 +5193,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { ASSERT_TRUE(vertex); auto edge = vertex->OutEdges(storage::View::NEW).GetValue()[0]; - auto res = edge.SetProperty(property2, storage::PropertyValue(42)); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property2, storage::PropertyValue(42)); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); ASSERT_FALSE(acc.Commit().HasError()); } @@ -5212,9 +5212,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { std::pair(property2, storage::PropertyValue(42)))); { - auto ret = edge.ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + auto old_values = edge.ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_FALSE(old_values->empty()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::NEW)->IsNull()); @@ -5222,9 +5222,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { ASSERT_EQ(edge.Properties(storage::View::NEW).GetValue().size(), 0); { - auto ret = edge.ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_FALSE(ret.GetValue()); + auto old_values = edge.ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_TRUE(old_values->empty()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::NEW)->IsNull()); @@ -5361,7 +5361,7 @@ TEST(StorageWithProperties, EdgeNonexistentPropertyAPI) { ASSERT_EQ(*edge->GetProperty(property, storage::View::NEW), storage::PropertyValue()); // Modify edge. - ASSERT_TRUE(edge->SetProperty(property, storage::PropertyValue("value")).HasValue()); + ASSERT_TRUE(edge->SetProperty(property, storage::PropertyValue("value"))->IsNull()); // Check state after (OLD view). ASSERT_EQ(edge->Properties(storage::View::OLD).GetError(), storage::Error::NONEXISTENT_OBJECT); From 883f9c7ed384911f3c2387dd1a22d5593e43f720 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Thu, 13 May 2021 15:38:48 +0200 Subject: [PATCH 45/63] Trigger event types (#144) Co-authored-by: Benjamin Antal --- src/query/context.hpp | 4 +- src/query/interpreter.cpp | 124 ++++---- src/query/interpreter.hpp | 2 +- src/query/plan/operator.cpp | 76 ++--- src/query/trigger.cpp | 467 +++++++++++++++++++++-------- src/query/trigger.hpp | 406 +++++++++++++++++-------- tests/unit/CMakeLists.txt | 3 + tests/unit/query_trigger.cpp | 553 +++++++++++++++++++++++++++++++++++ 8 files changed, 1293 insertions(+), 342 deletions(-) create mode 100644 tests/unit/query_trigger.cpp diff --git a/src/query/context.hpp b/src/query/context.hpp index 286e5adf5..2bced7a1a 100644 --- a/src/query/context.hpp +++ b/src/query/context.hpp @@ -57,9 +57,7 @@ struct ExecutionContext { std::chrono::duration profile_execution_time; plan::ProfilingStats stats; plan::ProfilingStats *stats_root{nullptr}; - - // trigger context - TriggerContext *trigger_context{nullptr}; + TriggerContextCollector *trigger_context_collector{nullptr}; }; inline bool MustAbort(const ExecutionContext &context) { diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 6881615fd..4d8ca0221 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -18,6 +18,7 @@ #include "query/plan/planner.hpp" #include "query/plan/profile.hpp" #include "query/plan/vertex_count_cache.hpp" +#include "query/trigger.hpp" #include "query/typed_value.hpp" #include "utils/algorithm.hpp" #include "utils/csv_parsing.hpp" @@ -467,7 +468,8 @@ struct PullPlanVector { struct PullPlan { explicit PullPlan(std::shared_ptr plan, const Parameters ¶meters, bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, - TriggerContext *trigger_context = nullptr, std::optional memory_limit = {}); + TriggerContextCollector *trigger_context_collector = nullptr, + std::optional memory_limit = {}); std::optional Pull(AnyStream *stream, std::optional n, const std::vector &output_symbols, std::map *summary); @@ -495,7 +497,7 @@ struct PullPlan { PullPlan::PullPlan(const std::shared_ptr plan, const Parameters ¶meters, const bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, - TriggerContext *trigger_context, const std::optional memory_limit) + TriggerContextCollector *trigger_context_collector, const std::optional memory_limit) : plan_(plan), cursor_(plan->plan().MakeCursor(execution_memory)), frame_(plan->symbol_table().max_position(), execution_memory), @@ -512,7 +514,7 @@ PullPlan::PullPlan(const std::shared_ptr plan, const Parameters &par ctx_.max_execution_time_sec = interpreter_context->execution_timeout_sec; ctx_.is_shutting_down = &interpreter_context->is_shutting_down; ctx_.is_profile_query = is_profile_query; - ctx_.trigger_context = trigger_context; + ctx_.trigger_context_collector = trigger_context_collector; } std::optional PullPlan::Pull(AnyStream *stream, std::optional n, @@ -599,40 +601,54 @@ using RWType = plan::ReadWriteTypeChecker::RWType; Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_context_(interpreter_context) { MG_ASSERT(interpreter_context_, "Interpreter context must not be NULL"); // try { - // { - // auto storage_acc = interpreter_context_->db->Access(); - // DbAccessor dba(&storage_acc); - // auto triggers_acc = interpreter_context_->before_commit_triggers.access(); - // triggers_acc.insert(Trigger{"BeforeDelete", - // "UNWIND deletedVertices as u CREATE(:DELETED_VERTEX {id: id(u) + 10})", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"BeforeDeleteEdge", "UNWIND deletedEdges as u CREATE(:DELETED_EDGE {id: id(u) + - // 10})", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // // triggers_acc.insert(Trigger{"BeforeDelete2", "UNWIND deletedEdges as u SET u.deleted = 0", - // // &interpreter_context_->ast_cache, &dba, - // // &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"BeforeDeleteProcedure", "CALL script.procedure(updatedVertices) YIELD * RETURN *", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"BeforeCreator", "UNWIND createdVertices as u SET u.before = id(u) + 10", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"BeforeCreatorEdge", "UNWIND createdEdges as u SET u.before = id(u) + 10", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"BeforeSetLabelProcedure", - // "CALL label.procedure(assignedVertexLabels) YIELD * RETURN *", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // } - // { - // auto storage_acc = interpreter_context->db->Access(); - // DbAccessor dba(&storage_acc); - // auto triggers_acc = interpreter_context->after_commit_triggers.access(); - // triggers_acc.insert(Trigger{"AfterDelete", "UNWIND deletedVertices as u CREATE(:DELETED {id: u.id + 100})", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"AfterCreator", "UNWIND createdVertices as u SET u.after = u.id + 100", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"AfterUpdateProcedure", "CALL script.procedure(updatedVertices) YIELD * RETURN *", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock}); - // } + // { + // auto storage_acc = interpreter_context_->db->Access(); + // DbAccessor dba(&storage_acc); + // auto triggers_acc = interpreter_context_->before_commit_triggers.access(); + // triggers_acc.insert(Trigger{"BeforeDelete", + // "UNWIND deletedVertices as u CREATE(:DELETED_VERTEX {id: id(u) + 10})", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::VERTEX_DELETE}); + // triggers_acc.insert(Trigger{"BeforeUpdatePropertyi", + // "UNWIND assignedVertexProperties as u SET u.vertex.two = u.new", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::VERTEX_UPDATE}); + // triggers_acc.insert(Trigger{"BeforeDeleteEdge", "UNWIND deletedEdges as u CREATE(:DELETED_EDGE {id: id(u) +10}) + // ", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::EDGE_DELETE}); + // // triggers_acc.insert(Trigger{"BeforeDelete2", "UNWIND deletedEdges as u SET u.deleted = 0", + // // &interpreter_context_->ast_cache, &dba, + // // &interpreter_context_->antlr_lock}); + // triggers_acc.insert(Trigger{"BeforeDeleteProcedure", + // "CALL script.procedure('VERTEX_UPDATE', updatedVertices) YIELD * RETURN *", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::VERTEX_UPDATE}); + // triggers_acc.insert(Trigger{"BeforeCreator", "UNWIND createdVertices as u SET u.before = id(u) + 10", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::VERTEX_CREATE}); + // triggers_acc.insert(Trigger{"BeforeCreatorEdge", "UNWIND createdEdges as u SET u.before = id(u) + 10", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::EDGE_CREATE}); + // triggers_acc.insert(Trigger{"BeforeSetLabelProcedure", + // "CALL label.procedure('VERTEX_UPDATE', assignedVertexLabels) YIELD * RETURN *", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::VERTEX_UPDATE}); + // } + // { + // auto storage_acc = interpreter_context->db->Access(); + // DbAccessor dba(&storage_acc); + // auto triggers_acc = interpreter_context->after_commit_triggers.access(); + // triggers_acc.insert(Trigger{"AfterDelete", "UNWIND deletedVertices as u CREATE(:DELETED {id: u.id + 100})", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::VERTEX_DELETE}); + // triggers_acc.insert(Trigger{"AfterCreator", "UNWIND createdVertices as u SET u.after = u.id + 100", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, + // TriggerEventType::VERTEX_CREATE}); + // triggers_acc.insert(Trigger{ + // "AfterUpdateProcedure", "CALL script.procedure('UPDATE',updatedObjects) YIELD * RETURN *", + // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, TriggerEventType::UPDATE}); + // } // } catch (const utils::BasicException &e) { // spdlog::critical("Failed to create a trigger because: {}", e.what()); // } @@ -654,7 +670,7 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) if (interpreter_context_->before_commit_triggers.size() > 0 || interpreter_context_->after_commit_triggers.size() > 0) { - trigger_context_.emplace(); + trigger_context_collector_.emplace(); } }; } else if (query_upper == "COMMIT") { @@ -702,7 +718,8 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, DbAccessor *dba, - utils::MemoryResource *execution_memory, TriggerContext *trigger_context = nullptr) { + utils::MemoryResource *execution_memory, + TriggerContextCollector *trigger_context_collector = nullptr) { auto *cypher_query = utils::Downcast(parsed_query.query); Frame frame(0); @@ -740,7 +757,7 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map(plan, parsed_query.parameters, false, dba, interpreter_context, - execution_memory, trigger_context, memory_limit); + execution_memory, trigger_context_collector, memory_limit); return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges), [pull_plan = std::move(pull_plan), output_symbols = std::move(output_symbols), summary]( AnyStream *stream, std::optional n) -> std::optional { @@ -1373,7 +1390,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, if (utils::Downcast(parsed_query.query) && (interpreter_context_->before_commit_triggers.size() > 0 || interpreter_context_->after_commit_triggers.size() > 0)) { - trigger_context_.emplace(); + trigger_context_collector_.emplace(); } } @@ -1383,7 +1400,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareCypherQuery(std::move(parsed_query), &query_execution->summary, interpreter_context_, &*execution_db_accessor_, &query_execution->execution_memory, - trigger_context_ ? &*trigger_context_ : nullptr); + trigger_context_collector_ ? &*trigger_context_collector_ : nullptr); } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareExplainQuery(std::move(parsed_query), &query_execution->summary, interpreter_context_, &*execution_db_accessor_, &query_execution->execution_memory); @@ -1451,7 +1468,7 @@ void Interpreter::Abort() { db_accessor_->Abort(); execution_db_accessor_.reset(); db_accessor_.reset(); - trigger_context_.reset(); + trigger_context_collector_.reset(); } namespace { @@ -1459,7 +1476,6 @@ void RunTriggersIndividually(const utils::SkipList &triggers, Interpret TriggerContext trigger_context) { // Run the triggers for (const auto &trigger : triggers.access()) { - spdlog::debug("Executing trigger '{}'", trigger.name()); utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; // create a new transaction for each trigger @@ -1512,16 +1528,20 @@ void Interpreter::Commit() { // a query. if (!db_accessor_) return; - if (trigger_context_) { + std::optional trigger_context = std::nullopt; + if (trigger_context_collector_) { + trigger_context.emplace(std::move(*trigger_context_collector_).TransformToTriggerContext()); + } + + if (trigger_context) { // Run the triggers for (const auto &trigger : interpreter_context_->before_commit_triggers.access()) { - spdlog::debug("Executing trigger '{}'", trigger.name()); utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; AdvanceCommand(); try { trigger.Execute(&*execution_db_accessor_, &execution_memory, *interpreter_context_->tsc_frequency, interpreter_context_->execution_timeout_sec, &interpreter_context_->is_shutting_down, - *trigger_context_); + *trigger_context); } catch (const utils::BasicException &e) { throw utils::BasicException( fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.name(), e.what())); @@ -1540,7 +1560,7 @@ void Interpreter::Commit() { auto property_name = execution_db_accessor_->PropertyToName(*constraint_violation.properties.begin()); execution_db_accessor_.reset(); db_accessor_.reset(); - trigger_context_.reset(); + trigger_context_collector_.reset(); throw QueryException("Unable to commit due to existence constraint violation on :{}({})", label_name, property_name); break; @@ -1553,7 +1573,7 @@ void Interpreter::Commit() { [this](auto &stream, const auto &prop) { stream << execution_db_accessor_->PropertyToName(prop); }); execution_db_accessor_.reset(); db_accessor_.reset(); - trigger_context_.reset(); + trigger_context_collector_.reset(); throw QueryException("Unable to commit due to unique constraint violation on :{}({})", label_name, property_names_stream.str()); break; @@ -1561,8 +1581,8 @@ void Interpreter::Commit() { } } - if (trigger_context_) { - background_thread_.AddTask([trigger_context = std::move(*trigger_context_), + if (trigger_context && interpreter_context_->after_commit_triggers.size() > 0) { + background_thread_.AddTask([trigger_context = std::move(*trigger_context), interpreter_context = this->interpreter_context_, user_transaction = std::shared_ptr(std::move(db_accessor_))]() mutable { RunTriggersIndividually(interpreter_context->after_commit_triggers, interpreter_context, @@ -1574,7 +1594,7 @@ void Interpreter::Commit() { execution_db_accessor_.reset(); db_accessor_.reset(); - trigger_context_.reset(); + trigger_context_collector_.reset(); SPDLOG_DEBUG("Finished comitting the transaction"); } diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 3da93ec6a..8f42c430c 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -303,7 +303,7 @@ class Interpreter final { // move this unique_ptr into a shrared_ptr. std::unique_ptr db_accessor_; std::optional execution_db_accessor_; - std::optional trigger_context_; + std::optional trigger_context_collector_; bool in_explicit_transaction_{false}; bool expect_rollback_{false}; diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 5e90c013e..49698bc20 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -209,8 +209,8 @@ bool CreateNode::CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) if (input_cursor_->Pull(frame, context)) { auto created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); - if (context.trigger_context) { - context.trigger_context->RegisterCreatedObject(created_vertex); + if (context.trigger_context_collector) { + context.trigger_context_collector->RegisterCreatedObject(created_vertex); } return true; } @@ -311,8 +311,8 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont } }(); - if (context.trigger_context) { - context.trigger_context->RegisterCreatedObject(created_edge); + if (context.trigger_context_collector) { + context.trigger_context_collector->RegisterCreatedObject(created_edge); } return true; @@ -329,8 +329,8 @@ VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(Frame &frame, Exec return dest_node_value.ValueVertex(); } else { auto &created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); - if (context.trigger_context) { - context.trigger_context->RegisterCreatedObject(created_vertex); + if (context.trigger_context_collector) { + context.trigger_context_collector->RegisterCreatedObject(created_vertex); } return created_vertex; } @@ -1848,8 +1848,8 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { } } - if (context.trigger_context && maybe_value.GetValue()) { - context.trigger_context->RegisterDeletedObject(*maybe_value.GetValue()); + if (context.trigger_context_collector && maybe_value.GetValue()) { + context.trigger_context_collector->RegisterDeletedObject(*maybe_value.GetValue()); } } } @@ -1873,10 +1873,10 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when deleting a node."); } } - if (context.trigger_context && res.GetValue()) { - context.trigger_context->RegisterDeletedObject(res.GetValue()->first); + if (context.trigger_context_collector && res.GetValue()) { + context.trigger_context_collector->RegisterDeletedObject(res.GetValue()->first); for (const auto &deleted_edge : res.GetValue()->second) { - context.trigger_context->RegisterDeletedObject(deleted_edge); + context.trigger_context_collector->RegisterDeletedObject(deleted_edge); } } } else { @@ -1894,8 +1894,8 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { } } - if (context.trigger_context && res.GetValue()) { - context.trigger_context->RegisterDeletedObject(*res.GetValue()); + if (context.trigger_context_collector && res.GetValue()) { + context.trigger_context_collector->RegisterDeletedObject(*res.GetValue()); } } break; @@ -1953,18 +1953,20 @@ bool SetProperty::SetPropertyCursor::Pull(Frame &frame, ExecutionContext &contex case TypedValue::Type::Vertex: { auto old_value = PropsSetChecked(&lhs.ValueVertex(), self_.property_, rhs); - if (context.trigger_context) { - context.trigger_context->RegisterSetObjectProperty(lhs.ValueVertex(), self_.property_, - TypedValue{std::move(old_value)}, std::move(rhs)); + if (context.trigger_context_collector) { + // rhs cannot be moved because it was created with the allocator that is only valid during current pull + context.trigger_context_collector->RegisterSetObjectProperty(lhs.ValueVertex(), self_.property_, + TypedValue{std::move(old_value)}, TypedValue{rhs}); } break; } case TypedValue::Type::Edge: { auto old_value = PropsSetChecked(&lhs.ValueEdge(), self_.property_, rhs); - if (context.trigger_context) { - context.trigger_context->RegisterSetObjectProperty(lhs.ValueEdge(), self_.property_, - TypedValue{std::move(old_value)}, std::move(rhs)); + if (context.trigger_context_collector) { + // rhs cannot be moved because it was created with the allocator that is only valid during current pull + context.trigger_context_collector->RegisterSetObjectProperty(lhs.ValueEdge(), self_.property_, + TypedValue{std::move(old_value)}, TypedValue{rhs}); } break; } @@ -2039,7 +2041,7 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr } } - if (context->trigger_context) { + if (context->trigger_context_collector) { old_values.emplace(std::move(*maybe_value)); } } @@ -2073,8 +2075,8 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr return {}; }(); - context->trigger_context->RegisterSetObjectProperty(*record, key, TypedValue(std::move(old_value)), - TypedValue(std::move(new_value))); + context->trigger_context_collector->RegisterSetObjectProperty(*record, key, TypedValue(std::move(old_value)), + TypedValue(std::move(new_value))); }; auto set_props = [&, record](auto properties) { @@ -2094,7 +2096,7 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr } } - if (context->trigger_context) { + if (context->trigger_context_collector) { register_set_property(std::move(*maybe_error), kv.first, std::move(kv.second)); } } @@ -2111,7 +2113,7 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr for (const auto &kv : rhs.ValueMap()) { auto key = context->db_accessor->NameToProperty(kv.first); auto old_value = PropsSetChecked(record, key, kv.second); - if (context->trigger_context) { + if (context->trigger_context_collector) { register_set_property(std::move(old_value), key, kv.second); } } @@ -2123,11 +2125,11 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr "map."); } - if (context->trigger_context && old_values) { + if (context->trigger_context_collector && old_values) { // register removed properties for (auto &[property_id, property_value] : *old_values) { - context->trigger_context->RegisterRemovedObjectProperty(*record, property_id, - TypedValue(std::move(property_value))); + context->trigger_context_collector->RegisterRemovedObjectProperty(*record, property_id, + TypedValue(std::move(property_value))); } } } @@ -2196,9 +2198,9 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex); auto &vertex = vertex_value.ValueVertex(); for (auto label : self_.labels_) { - auto maybe_error = vertex.AddLabel(label); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_value = vertex.AddLabel(label); + if (maybe_value.HasError()) { + switch (maybe_value.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -2210,8 +2212,8 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { } } - if (context.trigger_context) { - context.trigger_context->RegisterSetVertexLabel(vertex, label); + if (context.trigger_context_collector && *maybe_value) { + context.trigger_context_collector->RegisterSetVertexLabel(vertex, label); } } @@ -2269,9 +2271,9 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext & } } - if (context.trigger_context) { - context.trigger_context->RegisterRemovedObjectProperty(*record, property, - TypedValue(std::move(*maybe_old_value))); + if (context.trigger_context_collector) { + context.trigger_context_collector->RegisterRemovedObjectProperty(*record, property, + TypedValue(std::move(*maybe_old_value))); } }; @@ -2339,8 +2341,8 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont } } - if (context.trigger_context && *maybe_value) { - context.trigger_context->RegisterRemovedVertexLabel(vertex, label); + if (context.trigger_context_collector && *maybe_value) { + context.trigger_context_collector->RegisterRemovedVertexLabel(vertex, label); } } diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 65ad14dde..2a1ee4668 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -12,23 +12,112 @@ namespace query { namespace { -// clang-format off -std::vector> GetPredefinedIdentifiers() { - return {{{"createdVertices", false}, trigger::IdentifierTag::CREATED_VERTICES }, - {{"createdEdges", false}, trigger::IdentifierTag::CREATED_EDGES }, - {{"deletedVertices", false}, trigger::IdentifierTag::DELETED_VERTICES }, - {{"deletedEdges", false}, trigger::IdentifierTag::DELETED_EDGES }, - {{"assignedVertexProperties", false}, trigger::IdentifierTag::SET_VERTEX_PROPERTIES }, - {{"assignedEdgeProperties", false}, trigger::IdentifierTag::SET_EDGE_PROPERTIES }, - {{"removedVertexProperties", false}, trigger::IdentifierTag::REMOVED_VERTEX_PROPERTIES}, - {{"removedEdgeProperties", false}, trigger::IdentifierTag::REMOVED_EDGE_PROPERTIES }, - {{"assignedVertexLabels", false}, trigger::IdentifierTag::SET_VERTEX_LABELS }, - {{"removedVertexLabels", false}, trigger::IdentifierTag::REMOVED_VERTEX_LABELS }, - {{"updatedVertices", false}, trigger::IdentifierTag::UPDATED_VERTICES }, - {{"updatedEdges", false}, trigger::IdentifierTag::UPDATED_EDGES }, - {{"updatedObjects", false}, trigger::IdentifierTag::UPDATED_OBJECTS }}; + +auto IdentifierString(const TriggerIdentifierTag tag) noexcept { + switch (tag) { + case TriggerIdentifierTag::CREATED_VERTICES: + return "createdVertices"; + + case TriggerIdentifierTag::CREATED_EDGES: + return "createdEdges"; + + case TriggerIdentifierTag::CREATED_OBJECTS: + return "createdObjects"; + + case TriggerIdentifierTag::DELETED_VERTICES: + return "deletedVertices"; + + case TriggerIdentifierTag::DELETED_EDGES: + return "deletedEdges"; + + case TriggerIdentifierTag::DELETED_OBJECTS: + return "deletedObjects"; + + case TriggerIdentifierTag::SET_VERTEX_PROPERTIES: + return "assignedVertexProperties"; + + case TriggerIdentifierTag::SET_EDGE_PROPERTIES: + return "assignedEdgeProperties"; + + case TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES: + return "removedVertexProperties"; + + case TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES: + return "removedEdgeProperties"; + + case TriggerIdentifierTag::SET_VERTEX_LABELS: + return "assignedVertexLabels"; + + case TriggerIdentifierTag::REMOVED_VERTEX_LABELS: + return "removedVertexLabels"; + + case TriggerIdentifierTag::UPDATED_VERTICES: + return "updatedVertices"; + + case TriggerIdentifierTag::UPDATED_EDGES: + return "updatedEdges"; + + case TriggerIdentifierTag::UPDATED_OBJECTS: + return "updatedObjects"; + } +} + +template +concept SameAsIdentifierTag = std::same_as; + +template +std::vector> TagsToIdentifiers(const TArgs &...args) { + std::vector> identifiers; + identifiers.reserve(sizeof...(args)); + + auto add_identifier = [&identifiers](const auto tag) { + identifiers.emplace_back(Identifier{IdentifierString(tag), false}, tag); + }; + + (add_identifier(args), ...); + + return identifiers; +}; + +std::vector> GetPredefinedIdentifiers(const TriggerEventType event_type) { + using IdentifierTag = TriggerIdentifierTag; + using EventType = TriggerEventType; + + switch (event_type) { + case EventType::ANY: + return {}; + + case EventType::CREATE: + return TagsToIdentifiers(IdentifierTag::CREATED_OBJECTS); + + case EventType::VERTEX_CREATE: + return TagsToIdentifiers(IdentifierTag::CREATED_VERTICES); + + case EventType::EDGE_CREATE: + return TagsToIdentifiers(IdentifierTag::CREATED_EDGES); + + case EventType::DELETE: + return TagsToIdentifiers(IdentifierTag::DELETED_OBJECTS); + + case EventType::VERTEX_DELETE: + return TagsToIdentifiers(IdentifierTag::DELETED_VERTICES); + + case EventType::EDGE_DELETE: + return TagsToIdentifiers(IdentifierTag::DELETED_EDGES); + + case EventType::UPDATE: + return TagsToIdentifiers(IdentifierTag::UPDATED_OBJECTS); + + case EventType::VERTEX_UPDATE: + return TagsToIdentifiers(IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::REMOVED_VERTEX_PROPERTIES, + IdentifierTag::SET_VERTEX_LABELS, IdentifierTag::REMOVED_VERTEX_LABELS, + IdentifierTag::UPDATED_VERTICES); + + case EventType::EDGE_UPDATE: + return TagsToIdentifiers(IdentifierTag::SET_EDGE_PROPERTIES, IdentifierTag::REMOVED_EDGE_PROPERTIES, + IdentifierTag::UPDATED_EDGES); + } } -// clang-format on template concept WithToMap = requires(const T value, DbAccessor *dba) { @@ -42,14 +131,12 @@ TypedValue ToTypedValue(const T &value, DbAccessor *dba) { } template -TypedValue ToTypedValue(const TriggerContext::CreatedObject &created_object, - [[maybe_unused]] DbAccessor *dba) { +TypedValue ToTypedValue(const detail::CreatedObject &created_object, [[maybe_unused]] DbAccessor *dba) { return TypedValue{created_object.object}; } template -TypedValue ToTypedValue(const TriggerContext::DeletedObject &deleted_object, - [[maybe_unused]] DbAccessor *dba) { +TypedValue ToTypedValue(const detail::DeletedObject &deleted_object, [[maybe_unused]] DbAccessor *dba) { return TypedValue{deleted_object.object}; } @@ -67,7 +154,7 @@ concept ConvertableToTypedValue = requires(T value, DbAccessor *dba) { &&WithIsValid; template -concept LabelUpdateContext = utils::SameAsAnyOf; +concept LabelUpdateContext = utils::SameAsAnyOf; template TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) { @@ -79,17 +166,19 @@ TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) { } } - std::map typed_values; + TypedValue result{std::map{}}; + auto &typed_values = result.ValueMap(); for (auto &[label_id, vertices] : vertices_by_labels) { typed_values.emplace(dba->LabelToName(label_id), TypedValue(std::move(vertices))); } - return TypedValue(std::move(typed_values)); + return result; } template TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) requires(!LabelUpdateContext) { - std::vector typed_values; + TypedValue result{std::vector{}}; + auto &typed_values = result.ValueList(); typed_values.reserve(values.size()); for (const auto &value : values) { @@ -98,131 +187,97 @@ TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) requires( } } - return TypedValue(std::move(typed_values)); + return result; } template const char *TypeToString() { - if constexpr (std::same_as>) { + if constexpr (std::same_as>) { + return "created_vertex"; + } else if constexpr (std::same_as>) { + return "created_edge"; + } else if constexpr (std::same_as>) { + return "deleted_vertex"; + } else if constexpr (std::same_as>) { + return "deleted_edge"; + } else if constexpr (std::same_as>) { return "set_vertex_property"; - } else if constexpr (std::same_as>) { + } else if constexpr (std::same_as>) { return "set_edge_property"; - } else if constexpr (std::same_as>) { + } else if constexpr (std::same_as>) { return "removed_vertex_property"; - } else if constexpr (std::same_as>) { + } else if constexpr (std::same_as>) { return "removed_edge_property"; - } else if constexpr (std::same_as) { + } else if constexpr (std::same_as) { return "set_vertex_label"; - } else if constexpr (std::same_as) { + } else if constexpr (std::same_as) { return "removed_vertex_label"; } } template -concept UpdateContext = WithToMap &&WithIsValid; +concept ContextInfo = WithToMap &&WithIsValid; -template -TypedValue Updated(DbAccessor *dba, const std::vector &...args) { +template +TypedValue Concatenate(DbAccessor *dba, const std::vector &...args) { const auto size = (args.size() + ...); - std::vector updated; - updated.reserve(size); + TypedValue result{std::vector{}}; + auto &concatenated = result.ValueList(); + concatenated.reserve(size); - const auto add_to_updated = [&](const std::vector &values) { + const auto add_to_concatenated = [&](const std::vector &values) { for (const auto &value : values) { if (value.IsValid()) { auto map = value.ToMap(dba); - map["type"] = TypeToString(); - updated.emplace_back(std::move(map)); + map["event_type"] = TypeToString(); + concatenated.emplace_back(std::move(map)); } } }; - (add_to_updated(args), ...); + (add_to_concatenated(args), ...); - return TypedValue(std::move(updated)); + return result; +} + +template +concept WithEmpty = requires(const T value) { + { value.empty() } + ->std::same_as; +}; + +template +bool AnyContainsValue(const TContainer &...value_containers) { + return (!value_containers.empty() || ...); } } // namespace -bool TriggerContext::SetVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } +namespace detail { +bool SetVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } -std::map TriggerContext::SetVertexLabel::ToMap(DbAccessor *dba) const { +std::map SetVertexLabel::ToMap(DbAccessor *dba) const { return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; } -bool TriggerContext::RemovedVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } +bool RemovedVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } -std::map TriggerContext::RemovedVertexLabel::ToMap(DbAccessor *dba) const { +std::map RemovedVertexLabel::ToMap(DbAccessor *dba) const { return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; } - -void TriggerContext::RegisterSetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) { - set_vertex_labels_.emplace_back(vertex, label_id); -} - -void TriggerContext::RegisterRemovedVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) { - removed_vertex_labels_.emplace_back(vertex, label_id); -} - -TypedValue TriggerContext::GetTypedValue(const trigger::IdentifierTag tag, DbAccessor *dba) const { - const auto &[created_vertices, deleted_vertices, set_vertex_properties, removed_vertex_properties] = vertex_registry_; - const auto &[created_edges, deleted_edges, set_edge_properties, removed_edge_properties] = edge_registry_; - - switch (tag) { - case trigger::IdentifierTag::CREATED_VERTICES: - return ToTypedValue(created_vertices, dba); - - case trigger::IdentifierTag::CREATED_EDGES: - return ToTypedValue(created_edges, dba); - - case trigger::IdentifierTag::DELETED_VERTICES: - return ToTypedValue(deleted_vertices, dba); - - case trigger::IdentifierTag::DELETED_EDGES: - return ToTypedValue(deleted_edges, dba); - - case trigger::IdentifierTag::SET_VERTEX_PROPERTIES: - return ToTypedValue(set_vertex_properties, dba); - - case trigger::IdentifierTag::SET_EDGE_PROPERTIES: - return ToTypedValue(set_edge_properties, dba); - - case trigger::IdentifierTag::REMOVED_VERTEX_PROPERTIES: - return ToTypedValue(removed_vertex_properties, dba); - - case trigger::IdentifierTag::REMOVED_EDGE_PROPERTIES: - return ToTypedValue(removed_edge_properties, dba); - - case trigger::IdentifierTag::SET_VERTEX_LABELS: - return ToTypedValue(set_vertex_labels_, dba); - - case trigger::IdentifierTag::REMOVED_VERTEX_LABELS: - return ToTypedValue(removed_vertex_labels_, dba); - - case trigger::IdentifierTag::UPDATED_VERTICES: - return Updated(dba, set_vertex_properties, removed_vertex_properties, set_vertex_labels_, removed_vertex_labels_); - - case trigger::IdentifierTag::UPDATED_EDGES: - return Updated(dba, set_edge_properties, removed_edge_properties); - - case trigger::IdentifierTag::UPDATED_OBJECTS: - return Updated(dba, set_vertex_properties, set_edge_properties, removed_vertex_properties, - removed_edge_properties, set_vertex_labels_, removed_vertex_labels_); - } -} +} // namespace detail void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { - auto &[created_vertices, deleted_vertices, set_vertex_properties, removed_vertex_properties] = vertex_registry_; - // adapt created_vertices_ { - auto it = created_vertices.begin(); - for (const auto &created_vertex : created_vertices) { + // adapt created_vertices_ + auto it = created_vertices_.begin(); + for (auto &created_vertex : created_vertices_) { if (auto maybe_vertex = accessor->FindVertex(created_vertex.object.Gid(), storage::View::OLD); maybe_vertex) { - *it = CreatedObject{*maybe_vertex}; + *it = detail::CreatedObject{*maybe_vertex}; ++it; } } - created_vertices.erase(it, created_vertices.end()); + created_vertices_.erase(it, created_vertices_.end()); } // deleted_vertices_ should keep the transaction context of the transaction which deleted it @@ -241,30 +296,30 @@ void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { values->erase(it, values->end()); }; - adapt_context_with_vertex(&set_vertex_properties); - adapt_context_with_vertex(&removed_vertex_properties); + adapt_context_with_vertex(&set_vertex_properties_); + adapt_context_with_vertex(&removed_vertex_properties_); adapt_context_with_vertex(&set_vertex_labels_); adapt_context_with_vertex(&removed_vertex_labels_); - auto &[created_edges, deleted_edges, set_edge_properties, removed_edge_properties] = edge_registry_; - // adapt created_edges { - auto it = created_edges.begin(); - for (const auto &created_edge : created_edges) { - if (auto maybe_vertex = accessor->FindVertex(created_edge.object.From().Gid(), storage::View::OLD); - maybe_vertex) { - auto maybe_out_edges = maybe_vertex->OutEdges(storage::View::OLD); - MG_ASSERT(maybe_out_edges.HasValue()); - for (const auto &edge : *maybe_out_edges) { - if (edge.Gid() == created_edge.object.Gid()) { - *it = CreatedObject{edge}; - ++it; - break; - } + // adapt created_edges + auto it = created_edges_.begin(); + for (auto &created_edge : created_edges_) { + const auto maybe_from_vertex = accessor->FindVertex(created_edge.object.From().Gid(), storage::View::OLD); + if (!maybe_from_vertex) { + continue; + } + auto maybe_out_edges = maybe_from_vertex->OutEdges(storage::View::OLD); + MG_ASSERT(maybe_out_edges.HasValue()); + const auto edge_gid = created_edge.object.Gid(); + for (const auto &edge : *maybe_out_edges) { + if (edge.Gid() == edge_gid) { + *it = detail::CreatedObject{edge}; + ++it; } } } - created_edges.erase(it, created_edges.end()); + created_edges_.erase(it, created_edges_.end()); } // deleted_edges_ should keep the transaction context of the transaction which deleted it @@ -290,13 +345,164 @@ void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { values->erase(it, values->end()); }; - adapt_context_with_edge(&set_edge_properties); - adapt_context_with_edge(&removed_edge_properties); + adapt_context_with_edge(&set_edge_properties_); + adapt_context_with_edge(&removed_edge_properties_); +} + +TypedValue TriggerContext::GetTypedValue(const TriggerIdentifierTag tag, DbAccessor *dba) const { + switch (tag) { + case TriggerIdentifierTag::CREATED_VERTICES: + return ToTypedValue(created_vertices_, dba); + + case TriggerIdentifierTag::CREATED_EDGES: + return ToTypedValue(created_edges_, dba); + + case TriggerIdentifierTag::CREATED_OBJECTS: + return Concatenate(dba, created_vertices_, created_edges_); + + case TriggerIdentifierTag::DELETED_VERTICES: + return ToTypedValue(deleted_vertices_, dba); + + case TriggerIdentifierTag::DELETED_EDGES: + return ToTypedValue(deleted_edges_, dba); + + case TriggerIdentifierTag::DELETED_OBJECTS: + return Concatenate(dba, deleted_vertices_, deleted_edges_); + + case TriggerIdentifierTag::SET_VERTEX_PROPERTIES: + return ToTypedValue(set_vertex_properties_, dba); + + case TriggerIdentifierTag::SET_EDGE_PROPERTIES: + return ToTypedValue(set_edge_properties_, dba); + + case TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES: + return ToTypedValue(removed_vertex_properties_, dba); + + case TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES: + return ToTypedValue(removed_edge_properties_, dba); + + case TriggerIdentifierTag::SET_VERTEX_LABELS: + return ToTypedValue(set_vertex_labels_, dba); + + case TriggerIdentifierTag::REMOVED_VERTEX_LABELS: + return ToTypedValue(removed_vertex_labels_, dba); + + case TriggerIdentifierTag::UPDATED_VERTICES: + return Concatenate(dba, set_vertex_properties_, removed_vertex_properties_, set_vertex_labels_, + removed_vertex_labels_); + + case TriggerIdentifierTag::UPDATED_EDGES: + return Concatenate(dba, set_edge_properties_, removed_edge_properties_); + + case TriggerIdentifierTag::UPDATED_OBJECTS: + return Concatenate(dba, set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, + removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); + } +} + +bool TriggerContext::ShouldEventTrigger(const TriggerEventType event_type) const { + using EventType = TriggerEventType; + switch (event_type) { + case EventType::ANY: + return true; + + case EventType::CREATE: + return AnyContainsValue(created_vertices_, created_edges_); + + case EventType::VERTEX_CREATE: + return AnyContainsValue(created_vertices_); + + case EventType::EDGE_CREATE: + return AnyContainsValue(created_edges_); + + case EventType::DELETE: + return AnyContainsValue(deleted_vertices_, deleted_edges_); + + case EventType::VERTEX_DELETE: + return AnyContainsValue(deleted_vertices_); + + case EventType::EDGE_DELETE: + return AnyContainsValue(deleted_edges_); + + case EventType::UPDATE: + return AnyContainsValue(set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, + removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); + + case EventType::VERTEX_UPDATE: + return AnyContainsValue(set_vertex_properties_, removed_vertex_properties_, set_vertex_labels_, + removed_vertex_labels_); + + case EventType::EDGE_UPDATE: + return AnyContainsValue(set_edge_properties_, removed_edge_properties_); + } +} + +void TriggerContextCollector::UpdateLabelMap(const VertexAccessor vertex, const storage::LabelId label_id, + const LabelChange change) { + auto ®istry = GetRegistry(); + if (registry.created_objects_.count(vertex.Gid())) { + return; + } + + if (auto it = label_changes_.find({vertex, label_id}); it != label_changes_.end()) { + it->second = std::clamp(it->second + LabelChangeToInt(change), -1, 1); + return; + } + + label_changes_.emplace(std::make_pair(vertex, label_id), LabelChangeToInt(change)); +} + +void TriggerContextCollector::RegisterSetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) { + UpdateLabelMap(vertex, label_id, LabelChange::ADD); +} + +void TriggerContextCollector::RegisterRemovedVertexLabel(const VertexAccessor &vertex, + const storage::LabelId label_id) { + UpdateLabelMap(vertex, label_id, LabelChange::REMOVE); +} + +int8_t TriggerContextCollector::LabelChangeToInt(LabelChange change) { + static_assert(std::is_same_v, int8_t>, + "The underlying type of LabelChange doesn't match the return type!"); + return static_cast(change); +} + +TriggerContext TriggerContextCollector::TransformToTriggerContext() && { + auto [created_vertices, deleted_vertices, set_vertex_properties, removed_vertex_properties] = + std::move(vertex_registry_).Summarize(); + auto [set_vertex_labels, removed_vertex_labels] = LabelMapToList(std::move(label_changes_)); + auto [created_edges, deleted_edges, set_edge_properties, removed_edge_properties] = + std::move(edge_registry_).Summarize(); + + return {std::move(created_vertices), std::move(deleted_vertices), + std::move(set_vertex_properties), std::move(removed_vertex_properties), + std::move(set_vertex_labels), std::move(removed_vertex_labels), + std::move(created_edges), std::move(deleted_edges), + std::move(set_edge_properties), std::move(removed_edge_properties)}; +} + +TriggerContextCollector::LabelChangesLists TriggerContextCollector::LabelMapToList(LabelChangesMap &&label_changes) { + std::vector set_vertex_labels; + std::vector removed_vertex_labels; + + for (const auto &[key, label_state] : label_changes) { + if (label_state == LabelChangeToInt(LabelChange::ADD)) { + set_vertex_labels.emplace_back(key.first, key.second); + } else if (label_state == LabelChangeToInt(LabelChange::REMOVE)) { + removed_vertex_labels.emplace_back(key.first, key.second); + } + } + + label_changes.clear(); + + return {std::move(set_vertex_labels), std::move(removed_vertex_labels)}; } Trigger::Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, - DbAccessor *db_accessor, utils::SpinLock *antlr_lock) - : name_(std::move(name)), parsed_statements_{ParseQuery(query, {}, query_cache, antlr_lock)} { + DbAccessor *db_accessor, utils::SpinLock *antlr_lock, const TriggerEventType event_type) + : name_(std::move(name)), + parsed_statements_{ParseQuery(query, {}, query_cache, antlr_lock)}, + event_type_{event_type} { // We check immediately if the query is valid by trying to create a plan. GetPlan(db_accessor); } @@ -310,7 +516,7 @@ std::shared_ptr Trigger::GetPlan(DbAccessor *db_accessor) return trigger_plan_; } - auto identifiers = GetPredefinedIdentifiers(); + auto identifiers = GetPredefinedIdentifiers(event_type_); AstStorage ast_storage; ast_storage.properties_ = parsed_statements_.ast_storage.properties_; @@ -332,6 +538,11 @@ std::shared_ptr Trigger::GetPlan(DbAccessor *db_accessor) void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, const double tsc_frequency, const double max_execution_time_sec, std::atomic *is_shutting_down, const TriggerContext &context) const { + if (!context.ShouldEventTrigger(event_type_)) { + return; + } + + spdlog::debug("Executing trigger '{}'", name_); auto trigger_plan = GetPlan(dba); MG_ASSERT(trigger_plan, "Invalid trigger plan received"); auto &[plan, identifiers] = *trigger_plan; diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index 9689b7775..51119885e 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -1,33 +1,18 @@ #pragma once +#include #include +#include +#include #include +#include #include "query/cypher_query_interpreter.hpp" -#include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" #include "query/typed_value.hpp" #include "utils/concepts.hpp" +#include "utils/fnv.hpp" namespace query { - -namespace trigger { -enum class IdentifierTag : uint8_t { - CREATED_VERTICES, - CREATED_EDGES, - DELETED_VERTICES, - DELETED_EDGES, - SET_VERTEX_PROPERTIES, - SET_EDGE_PROPERTIES, - REMOVED_VERTEX_PROPERTIES, - REMOVED_EDGE_PROPERTIES, - SET_VERTEX_LABELS, - REMOVED_VERTEX_LABELS, - UPDATED_VERTICES, - UPDATED_EDGES, - UPDATED_OBJECTS -}; -} // namespace trigger - namespace detail { template concept ObjectAccessor = utils::SameAsAnyOf; @@ -40,34 +25,214 @@ const char *ObjectString() { return "edge"; } } + +template +struct CreatedObject { + explicit CreatedObject(const TAccessor &object) : object{object} {} + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + std::map ToMap([[maybe_unused]] DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}}; + } + + TAccessor object; +}; + +template +struct DeletedObject { + explicit DeletedObject(const TAccessor &object) : object{object} {} + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + std::map ToMap([[maybe_unused]] DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}}; + } + + TAccessor object; +}; + +template +struct SetObjectProperty { + explicit SetObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value, + TypedValue new_value) + : object{object}, key{key}, old_value{std::move(old_value)}, new_value{std::move(new_value)} {} + + std::map ToMap(DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}, + {"key", TypedValue{dba->PropertyToName(key)}}, + {"old", old_value}, + {"new", new_value}}; + } + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + storage::PropertyId key; + TypedValue old_value; + TypedValue new_value; +}; + +template +struct RemovedObjectProperty { + explicit RemovedObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value) + : object{object}, key{key}, old_value{std::move(old_value)} {} + + std::map ToMap(DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}, + {"key", TypedValue{dba->PropertyToName(key)}}, + {"old", old_value}}; + } + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + storage::PropertyId key; + TypedValue old_value; +}; + +struct SetVertexLabel { + explicit SetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) + : object{vertex}, label_id{label_id} {} + + std::map ToMap(DbAccessor *dba) const; + bool IsValid() const; + + VertexAccessor object; + storage::LabelId label_id; +}; + +struct RemovedVertexLabel { + explicit RemovedVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) + : object{vertex}, label_id{label_id} {} + + std::map ToMap(DbAccessor *dba) const; + bool IsValid() const; + + VertexAccessor object; + storage::LabelId label_id; +}; } // namespace detail -struct TriggerContext { - static_assert(std::is_trivially_copy_constructible_v, - "VertexAccessor is not trivially copy constructible, move it where possible and remove this assert"); - static_assert(std::is_trivially_copy_constructible_v, - "EdgeAccessor is not trivially copy constructible, move it where possible and remove this asssert"); +enum class TriggerIdentifierTag : uint8_t { + CREATED_VERTICES, + CREATED_EDGES, + CREATED_OBJECTS, + DELETED_VERTICES, + DELETED_EDGES, + DELETED_OBJECTS, + SET_VERTEX_PROPERTIES, + SET_EDGE_PROPERTIES, + REMOVED_VERTEX_PROPERTIES, + REMOVED_EDGE_PROPERTIES, + SET_VERTEX_LABELS, + REMOVED_VERTEX_LABELS, + UPDATED_VERTICES, + UPDATED_EDGES, + UPDATED_OBJECTS +}; +enum class TriggerEventType : uint8_t { + ANY, // Triggers always + VERTEX_CREATE, + EDGE_CREATE, + CREATE, + VERTEX_DELETE, + EDGE_DELETE, + DELETE, + VERTEX_UPDATE, + EDGE_UPDATE, + UPDATE +}; + +static_assert(std::is_trivially_copy_constructible_v, + "VertexAccessor is not trivially copy constructible, move it where possible and remove this assert"); +static_assert(std::is_trivially_copy_constructible_v, + "EdgeAccessor is not trivially copy constructible, move it where possible and remove this asssert"); + +// Holds the information necessary for triggers +class TriggerContext { + public: + TriggerContext() = default; + TriggerContext(std::vector> created_vertices, + std::vector> deleted_vertices, + std::vector> set_vertex_properties, + std::vector> removed_vertex_properties, + std::vector set_vertex_labels, + std::vector removed_vertex_labels, + std::vector> created_edges, + std::vector> deleted_edges, + std::vector> set_edge_properties, + std::vector> removed_edge_properties) + : created_vertices_{std::move(created_vertices)}, + deleted_vertices_{std::move(deleted_vertices)}, + set_vertex_properties_{std::move(set_vertex_properties)}, + removed_vertex_properties_{std::move(removed_vertex_properties)}, + set_vertex_labels_{std::move(set_vertex_labels)}, + removed_vertex_labels_{std::move(removed_vertex_labels)}, + created_edges_{std::move(created_edges)}, + deleted_edges_{std::move(deleted_edges)}, + set_edge_properties_{std::move(set_edge_properties)}, + removed_edge_properties_{std::move(removed_edge_properties)} {} + TriggerContext(const TriggerContext &) = default; + TriggerContext(TriggerContext &&) = default; + TriggerContext &operator=(const TriggerContext &) = default; + TriggerContext &operator=(TriggerContext &&) = default; + + // Adapt the TriggerContext object inplace for a different DbAccessor + // (each derived accessor, e.g. VertexAccessor, gets adapted + // to the sent DbAccessor so they can be used safely) + void AdaptForAccessor(DbAccessor *accessor); + + // Get TypedValue for the identifier defined with tag + TypedValue GetTypedValue(TriggerIdentifierTag tag, DbAccessor *dba) const; + bool ShouldEventTrigger(TriggerEventType) const; + + private: + std::vector> created_vertices_; + std::vector> deleted_vertices_; + std::vector> set_vertex_properties_; + std::vector> removed_vertex_properties_; + std::vector set_vertex_labels_; + std::vector removed_vertex_labels_; + + std::vector> created_edges_; + std::vector> deleted_edges_; + std::vector> set_edge_properties_; + std::vector> removed_edge_properties_; +}; + +// Collects the information necessary for triggers during a single transaction run. +class TriggerContextCollector { + public: template void RegisterCreatedObject(const TAccessor &created_object) { - GetRegistry().created_objects_.emplace_back(created_object); + GetRegistry().created_objects_.emplace(created_object.Gid(), detail::CreatedObject{created_object}); } template void RegisterDeletedObject(const TAccessor &deleted_object) { - GetRegistry().deleted_objects_.emplace_back(deleted_object); + auto ®istry = GetRegistry(); + if (registry.created_objects_.count(deleted_object.Gid())) { + return; + } + + registry.deleted_objects_.emplace_back(deleted_object); } template void RegisterSetObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value, TypedValue new_value) { - if (new_value.IsNull()) { - RegisterRemovedObjectProperty(object, key, std::move(old_value)); + auto ®istry = GetRegistry(); + if (registry.created_objects_.count(object.Gid())) { return; } - GetRegistry().set_object_properties_.emplace_back(object, key, std::move(old_value), - std::move(new_value)); + if (auto it = registry.property_changes_.find({object, key}); it != registry.property_changes_.end()) { + it->second.new_value = std::move(new_value); + return; + } + + registry.property_changes_.emplace(std::make_pair(object, key), + PropertyChangeInfo{std::move(old_value), std::move(new_value)}); } template @@ -77,109 +242,90 @@ struct TriggerContext { return; } - GetRegistry().removed_object_properties_.emplace_back(object, key, std::move(old_value)); + RegisterSetObjectProperty(object, key, std::move(old_value), TypedValue()); } void RegisterSetVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); void RegisterRemovedVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); + [[nodiscard]] TriggerContext TransformToTriggerContext() &&; - // Adapt the TriggerContext object inplace for a different DbAccessor - // (each derived accessor, e.g. VertexAccessor, gets adapted - // to the sent DbAccessor so they can be used safely) - void AdaptForAccessor(DbAccessor *accessor); - - TypedValue GetTypedValue(trigger::IdentifierTag tag, DbAccessor *dba) const; - - template - struct CreatedObject { - explicit CreatedObject(const TAccessor &object) : object{object} {} - - bool IsValid() const { return object.IsVisible(storage::View::OLD); } - - TAccessor object; - }; - - template - struct DeletedObject { - explicit DeletedObject(const TAccessor &object) : object{object} {} - - bool IsValid() const { return object.IsVisible(storage::View::OLD); } - - TAccessor object; - }; - - template - struct SetObjectProperty { - explicit SetObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value, - TypedValue new_value) - : object{object}, key{key}, old_value{std::move(old_value)}, new_value{std::move(new_value)} {} - - std::map ToMap(DbAccessor *dba) const { - return {{detail::ObjectString(), TypedValue{object}}, - {"key", TypedValue{dba->PropertyToName(key)}}, - {"old", old_value}, - {"new", new_value}}; + private: + struct HashPair { + template + size_t operator()(const std::pair &pair) const { + using GidType = decltype(std::declval().Gid()); + return utils::HashCombine{}(pair.first.Gid(), pair.second); } + }; - bool IsValid() const { return object.IsVisible(storage::View::OLD); } - - TAccessor object; - storage::PropertyId key; + struct PropertyChangeInfo { TypedValue old_value; TypedValue new_value; }; template - struct RemovedObjectProperty { - explicit RemovedObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value) - : object{object}, key{key}, old_value{std::move(old_value)} {} + using PropertyChangesMap = + std::unordered_map, PropertyChangeInfo, HashPair>; - std::map ToMap(DbAccessor *dba) const { - return {{detail::ObjectString(), TypedValue{object}}, - {"key", TypedValue{dba->PropertyToName(key)}}, - {"old", old_value}}; - } + template + using PropertyChangesLists = std::pair>, + std::vector>>; - bool IsValid() const { return object.IsVisible(storage::View::OLD); } - - TAccessor object; - storage::PropertyId key; - TypedValue old_value; - }; - - struct SetVertexLabel { - explicit SetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) - : object{vertex}, label_id{label_id} {} - - std::map ToMap(DbAccessor *dba) const; - bool IsValid() const; - - VertexAccessor object; - storage::LabelId label_id; - }; - - struct RemovedVertexLabel { - explicit RemovedVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) - : object{vertex}, label_id{label_id} {} - - std::map ToMap(DbAccessor *dba) const; - bool IsValid() const; - - VertexAccessor object; - storage::LabelId label_id; - }; - - private: template struct Registry { - std::vector> created_objects_; - std::vector> deleted_objects_; - std::vector> set_object_properties_; - std::vector> removed_object_properties_; - }; + using ChangesSummary = + std::tuple>, std::vector>, + std::vector>, + std::vector>>; - Registry vertex_registry_; - Registry edge_registry_; + [[nodiscard]] static PropertyChangesLists PropertyMapToList(PropertyChangesMap &&map) { + std::vector> set_object_properties; + std::vector> removed_object_properties; + + for (auto it = map.begin(); it != map.end(); it = map.erase(it)) { + const auto &[key, property_change_info] = *it; + if (property_change_info.old_value.IsNull() && property_change_info.new_value.IsNull()) { + // no change happened on the transaction level + continue; + } + + if (const auto is_equal = property_change_info.old_value == property_change_info.new_value; + is_equal.IsBool() && is_equal.ValueBool()) { + // no change happened on the transaction level + continue; + } + + if (property_change_info.new_value.IsNull()) { + removed_object_properties.emplace_back(key.first, key.second /* property_id */, + std::move(property_change_info.old_value)); + } else { + set_object_properties.emplace_back(key.first, key.second, std::move(property_change_info.old_value), + std::move(property_change_info.new_value)); + } + } + + return PropertyChangesLists{std::move(set_object_properties), std::move(removed_object_properties)}; + } + + [[nodiscard]] ChangesSummary Summarize() && { + auto [set_object_properties, removed_object_properties] = PropertyMapToList(std::move(property_changes_)); + std::vector> created_objects_vec; + created_objects_vec.reserve(created_objects_.size()); + std::transform(created_objects_.begin(), created_objects_.end(), std::back_inserter(created_objects_vec), + [](const auto &gid_and_created_object) { return gid_and_created_object.second; }); + created_objects_.clear(); + + return {std::move(created_objects_vec), std::move(deleted_objects_), std::move(set_object_properties), + std::move(removed_object_properties)}; + } + + std::unordered_map> created_objects_; + std::vector> deleted_objects_; + // During the transaction, a single property on a single object could be changed multiple times. + // We want to register only the global change, at the end of the transaction. The change consists of + // the value before the transaction start, and the latest value assigned throughout the transaction. + PropertyChangesMap property_changes_; + }; template Registry &GetRegistry() { @@ -190,13 +336,29 @@ struct TriggerContext { } } - std::vector set_vertex_labels_; - std::vector removed_vertex_labels_; + using LabelChangesMap = std::unordered_map, int8_t, HashPair>; + using LabelChangesLists = std::pair, std::vector>; + + enum class LabelChange : int8_t { REMOVE = -1, ADD = 1 }; + + static int8_t LabelChangeToInt(LabelChange change); + + [[nodiscard]] static LabelChangesLists LabelMapToList(LabelChangesMap &&label_changes); + + void UpdateLabelMap(VertexAccessor vertex, storage::LabelId label_id, LabelChange change); + + Registry vertex_registry_; + Registry edge_registry_; + // During the transaction, a single label on a single vertex could be added and removed multiple times. + // We want to register only the global change, at the end of the transaction. The change consists of + // the state of the label before the transaction start, and the latest state assigned throughout the transaction. + LabelChangesMap label_changes_; }; struct Trigger { explicit Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, - DbAccessor *db_accessor, utils::SpinLock *antlr_lock); + DbAccessor *db_accessor, utils::SpinLock *antlr_lock, + TriggerEventType event_type = TriggerEventType::ANY); void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double tsc_frequency, double max_execution_time_sec, std::atomic *is_shutting_down, const TriggerContext &context) const; @@ -212,7 +374,7 @@ struct Trigger { private: struct TriggerPlan { - using IdentifierInfo = std::pair; + using IdentifierInfo = std::pair; explicit TriggerPlan(std::unique_ptr logical_plan, std::vector identifiers); @@ -224,6 +386,8 @@ struct Trigger { std::string name_; ParsedQuery parsed_statements_; + TriggerEventType event_type_; + mutable utils::SpinLock plan_lock_; mutable std::shared_ptr trigger_plan_; }; diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index b1f4a147e..8f7c2bb67 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -95,6 +95,9 @@ target_link_libraries(${test_prefix}query_plan_v2_create_set_remove_delete mg-qu add_unit_test(query_pretty_print.cpp) target_link_libraries(${test_prefix}query_pretty_print mg-query) +add_unit_test(query_trigger.cpp) +target_link_libraries(${test_prefix}query_trigger mg-query) + # Test query/procedure add_unit_test(query_procedure_mgp_type.cpp) target_link_libraries(${test_prefix}query_procedure_mgp_type mg-query) diff --git a/tests/unit/query_trigger.cpp b/tests/unit/query_trigger.cpp new file mode 100644 index 000000000..44936ef60 --- /dev/null +++ b/tests/unit/query_trigger.cpp @@ -0,0 +1,553 @@ +#include +#include + +#include "query/db_accessor.hpp" +#include "query/interpreter.hpp" +#include "query/trigger.hpp" +#include "query/typed_value.hpp" +#include "utils/memory.hpp" + +class TriggerContextTest : public ::testing::Test { + public: + void SetUp() override { db.emplace(); } + + void TearDown() override { + accessors.clear(); + db.reset(); + } + + storage::Storage::Accessor &StartTransaction() { + accessors.push_back(db->Access()); + return accessors.back(); + } + + protected: + std::optional db; + std::list accessors; +}; + +namespace { +void CheckTypedValueSize(const query::TriggerContext &trigger_context, const query::TriggerIdentifierTag tag, + const size_t expected_size, query::DbAccessor &dba) { + auto typed_values = trigger_context.GetTypedValue(tag, &dba); + ASSERT_TRUE(typed_values.IsList()); + ASSERT_EQ(typed_values.ValueList().size(), expected_size); +}; + +void CheckLabelMap(const query::TriggerContext &trigger_context, const query::TriggerIdentifierTag tag, + const size_t expected, query::DbAccessor &dba) { + auto typed_values = trigger_context.GetTypedValue(tag, &dba); + ASSERT_TRUE(typed_values.IsMap()); + auto &typed_values_map = typed_values.ValueMap(); + size_t value_count = 0; + for (const auto &[label, values] : typed_values_map) { + ASSERT_TRUE(values.IsList()); + value_count += values.ValueList().size(); + } + ASSERT_EQ(value_count, expected); +}; +} // namespace + +// Ensure that TriggerContext returns only valid objects. +// Returned TypedValue should always contain only objects +// that exist (unless its explicitly created for the deleted object) +TEST_F(TriggerContextTest, ValidObjectsTest) { + query::TriggerContext trigger_context; + query::TriggerContextCollector trigger_context_collector; + + size_t vertex_count = 0; + size_t edge_count = 0; + { + query::DbAccessor dba{&StartTransaction()}; + + auto create_vertex = [&] { + auto created_vertex = dba.InsertVertex(); + trigger_context_collector.RegisterCreatedObject(created_vertex); + ++vertex_count; + return created_vertex; + }; + + // Create vertices and add them to the trigger context as created + std::vector vertices; + for (size_t i = 0; i < 4; ++i) { + vertices.push_back(create_vertex()); + } + + auto create_edge = [&](auto &from, auto &to) { + auto maybe_edge = dba.InsertEdge(&from, &to, dba.NameToEdgeType("EDGE")); + ASSERT_FALSE(maybe_edge.HasError()); + trigger_context_collector.RegisterCreatedObject(*maybe_edge); + ++edge_count; + }; + + // Create edges and add them to the trigger context as created + create_edge(vertices[0], vertices[1]); + create_edge(vertices[1], vertices[2]); + create_edge(vertices[2], vertices[3]); + + dba.AdvanceCommand(); + trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + trigger_context_collector = query::TriggerContextCollector{}; + + // Should have all the created objects + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, vertex_count + edge_count, dba); + + // we delete one of the vertices and edges in the same transaction + ASSERT_TRUE(dba.DetachRemoveVertex(&vertices[0]).HasValue()); + --vertex_count; + --edge_count; + + dba.AdvanceCommand(); + + // Should have one less created object for vertex and edge + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, vertex_count + edge_count, dba); + + ASSERT_FALSE(dba.Commit().HasError()); + } + + { + query::DbAccessor dba{&StartTransaction()}; + trigger_context.AdaptForAccessor(&dba); + + // Should have one less created object for vertex and edge + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, vertex_count + edge_count, dba); + } + + size_t deleted_vertex_count = 0; + size_t deleted_edge_count = 0; + { + query::DbAccessor dba{&StartTransaction()}; + + // register each type of change for each object + { + auto vertices = dba.Vertices(storage::View::OLD); + for (auto vertex : vertices) { + trigger_context_collector.RegisterSetObjectProperty(vertex, dba.NameToProperty("PROPERTY1"), + query::TypedValue("Value"), query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(vertex, dba.NameToProperty("PROPERTY2"), + query::TypedValue("Value")); + trigger_context_collector.RegisterSetVertexLabel(vertex, dba.NameToLabel("LABEL1")); + trigger_context_collector.RegisterRemovedVertexLabel(vertex, dba.NameToLabel("LABEL2")); + + auto out_edges = vertex.OutEdges(storage::View::OLD); + ASSERT_TRUE(out_edges.HasValue()); + + for (auto edge : *out_edges) { + trigger_context_collector.RegisterSetObjectProperty( + edge, dba.NameToProperty("PROPERTY1"), query::TypedValue("Value"), query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(edge, dba.NameToProperty("PROPERTY2"), + query::TypedValue("Value")); + } + } + } + + // Delete the first vertex with its edge and register the deleted object + { + auto vertices = dba.Vertices(storage::View::OLD); + for (auto vertex : vertices) { + const auto maybe_values = dba.DetachRemoveVertex(&vertex); + ASSERT_TRUE(maybe_values.HasValue()); + ASSERT_TRUE(maybe_values.GetValue()); + const auto &[deleted_vertex, deleted_edges] = *maybe_values.GetValue(); + + trigger_context_collector.RegisterDeletedObject(deleted_vertex); + ++deleted_vertex_count; + --vertex_count; + for (const auto &edge : deleted_edges) { + trigger_context_collector.RegisterDeletedObject(edge); + ++deleted_edge_count; + --edge_count; + } + + break; + } + } + + dba.AdvanceCommand(); + ASSERT_FALSE(dba.Commit().HasError()); + + trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + trigger_context_collector = query::TriggerContextCollector{}; + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, edge_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, edge_count, dba); + + CheckLabelMap(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, vertex_count, dba); + CheckLabelMap(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, vertex_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 4 * vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 2 * edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_OBJECTS, + 4 * vertex_count + 2 * edge_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_VERTICES, deleted_vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_EDGES, deleted_edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_OBJECTS, + deleted_vertex_count + deleted_edge_count, dba); + } + + // delete a single vertex with its edges, it should reduce number of typed values returned by the trigger context + // for each update event. + // TypedValue of the deleted objects stay the same as they're bound to the transaction which deleted them. + { + query::DbAccessor dba{&StartTransaction()}; + trigger_context.AdaptForAccessor(&dba); + + auto vertices = dba.Vertices(storage::View::OLD); + for (auto vertex : vertices) { + ASSERT_TRUE(dba.DetachRemoveVertex(&vertex).HasValue()); + break; + } + --vertex_count; + --edge_count; + + ASSERT_FALSE(dba.Commit().HasError()); + } + + { + query::DbAccessor dba{&StartTransaction()}; + trigger_context.AdaptForAccessor(&dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, edge_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, edge_count, dba); + + CheckLabelMap(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, vertex_count, dba); + CheckLabelMap(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, vertex_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 4 * vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 2 * edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_OBJECTS, + 4 * vertex_count + 2 * edge_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_VERTICES, deleted_vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_EDGES, deleted_edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_OBJECTS, + deleted_vertex_count + deleted_edge_count, dba); + } +} + +// If the trigger context registered a created object, each future event on the same object will be ignored. +// Binding the trigger context to transaction will mean that creating and updating an object in the same transaction +// will return only the CREATE event. +TEST_F(TriggerContextTest, ReturnCreateOnlyEvent) { + query::TriggerContextCollector trigger_context_collector; + + query::DbAccessor dba{&StartTransaction()}; + + auto create_vertex = [&] { + auto vertex = dba.InsertVertex(); + trigger_context_collector.RegisterCreatedObject(vertex); + trigger_context_collector.RegisterSetObjectProperty(vertex, dba.NameToProperty("PROPERTY1"), + query::TypedValue("Value"), query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(vertex, dba.NameToProperty("PROPERTY2"), + query::TypedValue("Value")); + trigger_context_collector.RegisterSetVertexLabel(vertex, dba.NameToLabel("LABEL1")); + trigger_context_collector.RegisterRemovedVertexLabel(vertex, dba.NameToLabel("LABEL2")); + return vertex; + }; + + auto v1 = create_vertex(); + auto v2 = create_vertex(); + auto maybe_edge = dba.InsertEdge(&v1, &v2, dba.NameToEdgeType("EDGE")); + ASSERT_FALSE(maybe_edge.HasError()); + trigger_context_collector.RegisterCreatedObject(*maybe_edge); + trigger_context_collector.RegisterSetObjectProperty(*maybe_edge, dba.NameToProperty("PROPERTY1"), + query::TypedValue("Value"), query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(*maybe_edge, dba.NameToProperty("PROPERTY2"), + query::TypedValue("Value")); + + dba.AdvanceCommand(); + + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, 2, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, 1, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, 3, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, 0, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, 0, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, 0, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, 0, dba); + + CheckLabelMap(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, 0, dba); + CheckLabelMap(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, 0, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 0, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 0, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_OBJECTS, 0, dba); +} + +namespace { +void EXPECT_PROP_TRUE(const query::TypedValue &a) { + EXPECT_TRUE(a.type() == query::TypedValue::Type::Bool && a.ValueBool()); +} + +void EXPECT_PROP_EQ(const query::TypedValue &a, const query::TypedValue &b) { EXPECT_PROP_TRUE(a == b); } +} // namespace + +// During a transaction, same property for the same object can change multiple times. TriggerContext should ensure +// that only the change on the global value is returned (value before the transaction + latest value after the +// transaction) everything inbetween should be ignored. +TEST_F(TriggerContextTest, GlobalPropertyChange) { + query::DbAccessor dba{&StartTransaction()}; + + auto v = dba.InsertVertex(); + dba.AdvanceCommand(); + + { + SPDLOG_DEBUG("SET -> SET"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), + query::TypedValue("ValueNew")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("ValueNew"), query::TypedValue("ValueNewer")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"set_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value"}}, + {"new", query::TypedValue{"ValueNewer"}}}}); + } + + { + SPDLOG_DEBUG("SET -> REMOVE"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), + query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("ValueNew")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"removed_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value"}}}}); + } + + { + SPDLOG_DEBUG("REMOVE -> SET"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("ValueNew")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"set_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value"}}, + {"new", query::TypedValue{"ValueNew"}}}}); + } + + { + SPDLOG_DEBUG("REMOVE -> REMOVE"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"removed_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value"}}}}); + } + + { + SPDLOG_DEBUG("SET -> SET (no change on transaction level)"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), + query::TypedValue("ValueNew")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("ValueNew"), query::TypedValue("Value")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("SET -> REMOVE (no change on transaction level)"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("ValueNew")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("REMOVE -> SET (no change on transaction level)"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("Value")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("REMOVE -> REMOVE (no change on transaction level)"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("SET -> REMOVE -> SET -> REMOVE -> SET"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value0"), + query::TypedValue("Value1")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value1")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("Value2")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value2")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("Value3")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"set_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value0"}}, + {"new", query::TypedValue{"Value3"}}}}); + } +} + +// Same as above, but for label changes +TEST_F(TriggerContextTest, GlobalLabelChange) { + query::DbAccessor dba{&StartTransaction()}; + + auto v = dba.InsertVertex(); + dba.AdvanceCommand(); + + const auto label_id = dba.NameToLabel("LABEL"); + // You cannot add the same label multiple times and you cannot remove non existing labels + // so REMOVE -> REMOVE and SET -> SET doesn't make sense + { + SPDLOG_DEBUG("SET -> REMOVE"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("REMOVE -> SET"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("SET -> REMOVE -> SET -> REMOVE -> SET"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"set_vertex_label"}}, + {"vertex", query::TypedValue{v}}, + {"label", query::TypedValue{"LABEL"}}}}); + } + + { + SPDLOG_DEBUG("REMOVE -> SET -> REMOVE -> SET -> REMOVE"); + query::TriggerContextCollector trigger_context_collector; + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"removed_vertex_label"}}, + {"vertex", query::TypedValue{v}}, + {"label", query::TypedValue{"LABEL"}}}}); + } +} From 62a628c51f81ddd8948e22caa7ec22340d3a919d Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Fri, 14 May 2021 15:38:59 +0200 Subject: [PATCH 46/63] Add trigger queries and trigger store (#145) Co-authored-by: Benjamin Antal --- .clang-tidy | 3 +- src/auth/models.cpp | 2 + src/auth/models.hpp | 11 +- src/glue/auth.cpp | 2 + src/memgraph.cpp | 2 +- src/query/CMakeLists.txt | 3 +- src/query/exceptions.hpp | 10 +- src/query/frontend/ast/ast.lcp | 32 ++- src/query/frontend/ast/ast_visitor.hpp | 7 +- .../frontend/ast/cypher_main_visitor.cpp | 73 +++++++ .../frontend/ast/cypher_main_visitor.hpp | 20 ++ .../opencypher/grammar/MemgraphCypher.g4 | 29 +++ .../opencypher/grammar/MemgraphCypherLexer.g4 | 9 +- .../frontend/semantic/required_privileges.cpp | 2 + src/query/frontend/stripped.cpp | 14 ++ .../frontend/stripped_lexer_constants.hpp | 20 +- src/query/interpreter.cpp | 145 +++++++++++-- src/query/interpreter.hpp | 6 +- src/query/serialization/property_value.cpp | 94 ++++++++ src/query/serialization/property_value.hpp | 21 ++ src/query/trigger.cpp | 200 +++++++++++++++++- src/query/trigger.hpp | 52 ++++- tests/benchmark/expansion.cpp | 4 +- tests/manual/single_query.cpp | 5 +- tests/unit/CMakeLists.txt | 3 + tests/unit/cypher_main_visitor.cpp | 103 +++++++++ tests/unit/interpreter.cpp | 4 +- tests/unit/main.cpp | 2 +- tests/unit/query_dump.cpp | 12 +- tests/unit/query_plan_edge_cases.cpp | 5 +- tests/unit/query_required_privileges.cpp | 5 + .../query_serialization_property_value.cpp | 85 ++++++++ tests/unit/query_trigger.cpp | 187 ++++++++++++++++ 33 files changed, 1107 insertions(+), 65 deletions(-) create mode 100644 src/query/serialization/property_value.cpp create mode 100644 src/query/serialization/property_value.hpp create mode 100644 tests/unit/query_serialization_property_value.cpp diff --git a/.clang-tidy b/.clang-tidy index b0f274372..7a9d32294 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -52,7 +52,8 @@ Checks: '*, -readability-else-after-return, -readability-implicit-bool-conversion, -readability-magic-numbers, - -readability-named-parameter' + -readability-named-parameter, + -misc-no-recursion' WarningsAsErrors: '' HeaderFilterRegex: 'src/.*' AnalyzeTemporaryDtors: false diff --git a/src/auth/models.cpp b/src/auth/models.cpp index 33ebc72fe..be5f28b3e 100644 --- a/src/auth/models.cpp +++ b/src/auth/models.cpp @@ -47,6 +47,8 @@ std::string PermissionToString(Permission permission) { return "READ_FILE"; case Permission::FREE_MEMORY: return "FREE_MEMORY"; + case Permission::TRIGGER: + return "TRIGGER"; case Permission::AUTH: return "AUTH"; } diff --git a/src/auth/models.hpp b/src/auth/models.hpp index f1139e1ca..9e1b54977 100644 --- a/src/auth/models.hpp +++ b/src/auth/models.hpp @@ -25,16 +25,17 @@ enum class Permission : uint64_t { LOCK_PATH = 1U << 11U, READ_FILE = 1U << 12U, FREE_MEMORY = 1U << 13U, + TRIGGER = 1U << 14U, AUTH = 1U << 16U }; // clang-format on // Constant list of all available permissions. -const std::vector kPermissionsAll = {Permission::MATCH, Permission::CREATE, Permission::MERGE, - Permission::DELETE, Permission::SET, Permission::REMOVE, - Permission::INDEX, Permission::STATS, Permission::CONSTRAINT, - Permission::DUMP, Permission::AUTH, Permission::REPLICATION, - Permission::LOCK_PATH, Permission::READ_FILE, Permission::FREE_MEMORY}; +const std::vector kPermissionsAll = { + Permission::MATCH, Permission::CREATE, Permission::MERGE, Permission::DELETE, + Permission::SET, Permission::REMOVE, Permission::INDEX, Permission::STATS, + Permission::CONSTRAINT, Permission::DUMP, Permission::AUTH, Permission::REPLICATION, + Permission::LOCK_PATH, Permission::READ_FILE, Permission::FREE_MEMORY, Permission::TRIGGER}; // Function that converts a permission to its string representation. std::string PermissionToString(Permission permission); diff --git a/src/glue/auth.cpp b/src/glue/auth.cpp index 2a9932595..a54894b80 100644 --- a/src/glue/auth.cpp +++ b/src/glue/auth.cpp @@ -32,6 +32,8 @@ auth::Permission PrivilegeToPermission(query::AuthQuery::Privilege privilege) { return auth::Permission::READ_FILE; case query::AuthQuery::Privilege::FREE_MEMORY: return auth::Permission::FREE_MEMORY; + case query::AuthQuery::Privilege::TRIGGER: + return auth::Permission::TRIGGER; case query::AuthQuery::Privilege::AUTH: return auth::Permission::AUTH; } diff --git a/src/memgraph.cpp b/src/memgraph.cpp index c9c17b334..e99ae8f43 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -980,7 +980,7 @@ int main(int argc, char **argv) { db_config.durability.snapshot_interval = std::chrono::seconds(FLAGS_storage_snapshot_interval_sec); } storage::Storage db(db_config); - query::InterpreterContext interpreter_context{&db}; + query::InterpreterContext interpreter_context{&db, FLAGS_data_directory}; query::SetExecutionTimeout(&interpreter_context, FLAGS_query_execution_timeout_sec); #ifdef MG_ENTERPRISE diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index 8ebe31425..5c692e44e 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -31,6 +31,7 @@ set(mg_query_sources procedure/mg_procedure_impl.cpp procedure/module.cpp procedure/py_module.cpp + serialization/property_value.cpp trigger.cpp typed_value.cpp) @@ -38,7 +39,7 @@ add_library(mg-query STATIC ${mg_query_sources}) add_dependencies(mg-query generate_lcp_query) target_include_directories(mg-query PUBLIC ${CMAKE_SOURCE_DIR}/include) target_link_libraries(mg-query dl cppitertools) -target_link_libraries(mg-query mg-storage-v2 mg-utils) +target_link_libraries(mg-query mg-storage-v2 mg-utils mg-kvstore) if("${MG_PYTHON_VERSION}" STREQUAL "") find_package(Python3 3.5 REQUIRED COMPONENTS Development) else() diff --git a/src/query/exceptions.hpp b/src/query/exceptions.hpp index cde5433d2..b8ddd7480 100644 --- a/src/query/exceptions.hpp +++ b/src/query/exceptions.hpp @@ -161,12 +161,18 @@ class ReplicationModificationInMulticommandTxException : public QueryException { class LockPathModificationInMulticommandTxException : public QueryException { public: LockPathModificationInMulticommandTxException() - : QueryException("Lock path clause not allowed in multicommand transactions.") {} + : QueryException("Lock path query not allowed in multicommand transactions.") {} }; class FreeMemoryModificationInMulticommandTxException : public QueryException { public: FreeMemoryModificationInMulticommandTxException() - : QueryException("Lock path clause not allowed in multicommand transactions.") {} + : QueryException("Free memory query not allowed in multicommand transactions.") {} +}; + +class TriggerModificationInMulticommandTxException : public QueryException { + public: + TriggerModificationInMulticommandTxException() + : QueryException("Trigger queries not allowed in multicommand transactions.") {} }; } // namespace query diff --git a/src/query/frontend/ast/ast.lcp b/src/query/frontend/ast/ast.lcp index 4f9fdcf4c..c6e46f984 100644 --- a/src/query/frontend/ast/ast.lcp +++ b/src/query/frontend/ast/ast.lcp @@ -2193,7 +2193,7 @@ cpp<# (:serialize)) (lcp:define-enum privilege (create delete match merge set remove index stats auth constraint - dump replication lock_path read_file free_memory) + dump replication lock_path read_file free_memory trigger) (:serialize)) #>cpp AuthQuery() = default; @@ -2232,7 +2232,7 @@ const std::vector kPrivilegesAll = { AuthQuery::Privilege::REPLICATION, AuthQuery::Privilege::READ_FILE, AuthQuery::Privilege::LOCK_PATH, - AuthQuery::Privilege::FREE_MEMORY}; + AuthQuery::Privilege::FREE_MEMORY, AuthQuery::Privilege::TRIGGER}; cpp<# (lcp:define-class info-query (query) @@ -2398,7 +2398,7 @@ cpp<# (:serialize (:slk)) (:clone)) - (lcp:define-class free-memory-query (query) () +(lcp:define-class free-memory-query (query) () (:public #>cpp DEFVISITABLE(QueryVisitor); @@ -2406,4 +2406,30 @@ cpp<# (:serialize (:slk)) (:clone)) +(lcp:define-class trigger-query (query) + ((action "Action" :scope :public) + (event_type "EventType" :scope :public) + (trigger_name "std::string" :scope :public) + (before_commit "bool" :scope :public) + (statement "std::string" :scope :public)) + + (:public + (lcp:define-enum action + (create-trigger drop-trigger show-triggers) + (:serialize)) + (lcp:define-enum event-type + (any vertex_create edge_create create vertex_delete edge_delete delete vertex_update edge_update update) + (:serialize)) + #>cpp + TriggerQuery() = default; + + DEFVISITABLE(QueryVisitor); + cpp<#) + (:private + #>cpp + friend class AstStorage; + cpp<#) + (:serialize (:slk)) + (:clone)) + (lcp:pop-namespace) ;; namespace query diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index 1539422e2..4523fd093 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -76,6 +76,7 @@ class ReplicationQuery; class LockPathQuery; class LoadCsv; class FreeMemoryQuery; +class TriggerQuery; using TreeCompositeVisitor = ::utils::CompositeVisitor< SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator, AndOperator, NotOperator, AdditionOperator, @@ -107,8 +108,8 @@ class ExpressionVisitor None, ParameterLookup, Identifier, PrimitiveLiteral, RegexMatch> {}; template -class QueryVisitor - : public ::utils::Visitor {}; +class QueryVisitor : public ::utils::Visitor {}; } // namespace query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index ea0aa3ccd..51cb93305 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -340,6 +340,78 @@ antlrcpp::Any CypherMainVisitor::visitFreeMemoryQuery(MemgraphCypher::FreeMemory return free_memory_query; } +antlrcpp::Any CypherMainVisitor::visitTriggerQuery(MemgraphCypher::TriggerQueryContext *ctx) { + MG_ASSERT(ctx->children.size() == 1, "TriggerQuery should have exactly one child!"); + auto *trigger_query = ctx->children[0]->accept(this).as(); + query_ = trigger_query; + return trigger_query; +} + +antlrcpp::Any CypherMainVisitor::visitCreateTrigger(MemgraphCypher::CreateTriggerContext *ctx) { + auto *trigger_query = storage_->Create(); + trigger_query->action_ = TriggerQuery::Action::CREATE_TRIGGER; + trigger_query->trigger_name_ = ctx->triggerName()->symbolicName()->accept(this).as(); + + auto *statement = ctx->triggerStatement(); + antlr4::misc::Interval interval{statement->start->getStartIndex(), statement->stop->getStopIndex()}; + trigger_query->statement_ = ctx->start->getInputStream()->getText(interval); + + trigger_query->event_type_ = [ctx] { + if (!ctx->ON()) { + return TriggerQuery::EventType::ANY; + } + + if (ctx->CREATE(1)) { + if (ctx->emptyVertex()) { + return TriggerQuery::EventType::VERTEX_CREATE; + } + if (ctx->emptyEdge()) { + return TriggerQuery::EventType::EDGE_CREATE; + } + return TriggerQuery::EventType::CREATE; + } + + if (ctx->DELETE()) { + if (ctx->emptyVertex()) { + return TriggerQuery::EventType::VERTEX_DELETE; + } + if (ctx->emptyEdge()) { + return TriggerQuery::EventType::EDGE_DELETE; + } + return TriggerQuery::EventType::DELETE; + } + + if (ctx->UPDATE()) { + if (ctx->emptyVertex()) { + return TriggerQuery::EventType::VERTEX_UPDATE; + } + if (ctx->emptyEdge()) { + return TriggerQuery::EventType::EDGE_UPDATE; + } + return TriggerQuery::EventType::UPDATE; + } + + LOG_FATAL("Invalid token allowed for the query"); + }(); + + trigger_query->before_commit_ = ctx->BEFORE(); + + return trigger_query; +} + +antlrcpp::Any CypherMainVisitor::visitDropTrigger(MemgraphCypher::DropTriggerContext *ctx) { + auto *trigger_query = storage_->Create(); + trigger_query->action_ = TriggerQuery::Action::DROP_TRIGGER; + trigger_query->trigger_name_ = ctx->triggerName()->symbolicName()->accept(this).as(); + return trigger_query; +} + +antlrcpp::Any CypherMainVisitor::visitShowTriggers(MemgraphCypher::ShowTriggersContext *ctx) { + auto *trigger_query = storage_->Create(); + trigger_query->action_ = TriggerQuery::Action::SHOW_TRIGGERS; + return trigger_query; +} + antlrcpp::Any CypherMainVisitor::visitCypherUnion(MemgraphCypher::CypherUnionContext *ctx) { bool distinct = !ctx->ALL(); auto *cypher_union = storage_->Create(distinct); @@ -771,6 +843,7 @@ antlrcpp::Any CypherMainVisitor::visitPrivilege(MemgraphCypher::PrivilegeContext if (ctx->LOCK_PATH()) return AuthQuery::Privilege::LOCK_PATH; if (ctx->READ_FILE()) return AuthQuery::Privilege::READ_FILE; if (ctx->FREE_MEMORY()) return AuthQuery::Privilege::FREE_MEMORY; + if (ctx->TRIGGER()) return AuthQuery::Privilege::TRIGGER; LOG_FATAL("Should not get here - unknown privilege!"); } diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 03fde1119..ca97ee02a 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -218,6 +218,26 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitFreeMemoryQuery(MemgraphCypher::FreeMemoryQueryContext *ctx) override; + /** + * @return TriggerQuery* + */ + antlrcpp::Any visitTriggerQuery(MemgraphCypher::TriggerQueryContext *ctx) override; + + /** + * @return CreateTrigger* + */ + antlrcpp::Any visitCreateTrigger(MemgraphCypher::CreateTriggerContext *ctx) override; + + /** + * @return DropTrigger* + */ + antlrcpp::Any visitDropTrigger(MemgraphCypher::DropTriggerContext *ctx) override; + + /** + * @return ShowTriggers* + */ + antlrcpp::Any visitShowTriggers(MemgraphCypher::ShowTriggersContext *ctx) override; + /** * @return CypherUnion* */ diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index 8778baf01..a3c578631 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -7,18 +7,22 @@ options { tokenVocab=MemgraphCypherLexer; } import Cypher ; memgraphCypherKeyword : cypherKeyword + | AFTER | ALTER | ASYNC | AUTH | BAD + | BEFORE | CLEAR | CSV + | COMMIT | DATA | DELIMITER | DATABASE | DENY | DROP | DUMP + | EXECUTE | FOR | FREE | FROM @@ -43,9 +47,12 @@ memgraphCypherKeyword : cypherKeyword | QUOTE | STATS | SYNC + | TRIGGER + | TRIGGERS | TIMEOUT | TO | UNLOCK + | UPDATE | USER | USERS ; @@ -66,6 +73,7 @@ query : cypherQuery | replicationQuery | lockPathQuery | freeMemoryQuery + | triggerQuery ; authQuery : createRole @@ -92,6 +100,11 @@ replicationQuery : setReplicationRole | showReplicas ; +triggerQuery : createTrigger + | dropTrigger + | showTriggers + ; + clause : cypherMatch | unwind | merge @@ -161,6 +174,7 @@ privilege : CREATE | LOCK_PATH | READ_FILE | FREE_MEMORY + | TRIGGER ; privilegeList : privilege ( ',' privilege )* ; @@ -193,3 +207,18 @@ showReplicas : SHOW REPLICAS ; lockPathQuery : ( LOCK | UNLOCK ) DATA DIRECTORY ; freeMemoryQuery : FREE MEMORY ; + +triggerName : symbolicName ; + +triggerStatement : .*? ; + +emptyVertex : '(' ')' ; + +emptyEdge : dash dash rightArrowHead ; + +createTrigger : CREATE TRIGGER triggerName ( ON ( emptyVertex | emptyEdge ) ? ( CREATE | UPDATE | DELETE ) ) ? + ( AFTER | BEFORE ) COMMIT EXECUTE triggerStatement ; + +dropTrigger : DROP TRIGGER triggerName ; + +showTriggers : SHOW TRIGGERS ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 index 9aeec8eec..5fdea9f31 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 @@ -12,11 +12,14 @@ import CypherLexer ; UNDERSCORE : '_' ; +AFTER : A F T E R ; ALTER : A L T E R ; ASYNC : A S Y N C ; AUTH : A U T H ; BAD : B A D ; +BEFORE : B E F O R E ; CLEAR : C L E A R ; +COMMIT : C O M M I T ; CSV : C S V ; DATA : D A T A ; DELIMITER : D E L I M I T E R ; @@ -25,9 +28,10 @@ DENY : D E N Y ; DIRECTORY : D I R E C T O R Y ; DROP : D R O P ; DUMP : D U M P ; +EXECUTE : E X E C U T E ; FOR : F O R ; FREE : F R E E ; -FREE_MEMORY : F R E E UNDERSCORE M E M O R Y ; +FREE_MEMORY : F R E E UNDERSCORE M E M O R Y ; FROM : F R O M ; GRANT : G R A N T ; GRANTS : G R A N T S ; @@ -56,6 +60,9 @@ STATS : S T A T S ; SYNC : S Y N C ; TIMEOUT : T I M E O U T ; TO : T O ; +TRIGGER : T R I G G E R ; +TRIGGERS : T R I G G E R S ; UNLOCK : U N L O C K ; +UPDATE : U P D A T E ; USER : U S E R ; USERS : U S E R S ; diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index 4173c7fbc..0f16bff56 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -53,6 +53,8 @@ class PrivilegeExtractor : public QueryVisitor, public HierarchicalTreeVis void Visit(FreeMemoryQuery &free_memory_query) override { AddPrivilege(AuthQuery::Privilege::FREE_MEMORY); } + void Visit(TriggerQuery &trigger_query) override { AddPrivilege(AuthQuery::Privilege::TRIGGER); } + void Visit(ReplicationQuery &replication_query) override { AddPrivilege(AuthQuery::Privilege::REPLICATION); } bool PreVisit(Create & /*unused*/) override { diff --git a/src/query/frontend/stripped.cpp b/src/query/frontend/stripped.cpp index 8f2a3e67d..9a6c31959 100644 --- a/src/query/frontend/stripped.cpp +++ b/src/query/frontend/stripped.cpp @@ -35,6 +35,7 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { }; std::vector> tokens; + std::string unstripped_chunk; for (int i = 0; i < static_cast(original_.size());) { Token token = Token::UNMATCHED; int len = 0; @@ -58,6 +59,13 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { if (token == Token::UNMATCHED) throw LexingException("Invalid query."); tokens.emplace_back(token, original_.substr(i, len)); i += len; + + // if we notice execute, we create a trigger which has defined statements + // the statements will be parsed separately later on so we skip it for now + if (utils::IEquals(tokens.back().second, "execute")) { + unstripped_chunk = original_.substr(i); + break; + } } std::vector token_strings; @@ -79,6 +87,7 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { // named expressions in return. for (int i = 0; i < static_cast(tokens.size()); ++i) { auto &token = tokens[i]; + // We need to shift token index for every parameter since antlr's parser // thinks of parameter as two tokens. int token_index = token_strings.size() + parameters_.size(); @@ -123,6 +132,10 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { } } + if (!unstripped_chunk.empty()) { + token_strings.push_back(std::move(unstripped_chunk)); + } + query_ = utils::Join(token_strings, " "); hash_ = utils::Fnv(query_); @@ -156,6 +169,7 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { } // There is only whitespace, nothing to do... if (it == tokens.end()) break; + bool has_as = false; auto last_non_space = it; auto jt = it; diff --git a/src/query/frontend/stripped_lexer_constants.hpp b/src/query/frontend/stripped_lexer_constants.hpp index c08da89ad..be388d708 100644 --- a/src/query/frontend/stripped_lexer_constants.hpp +++ b/src/query/frontend/stripped_lexer_constants.hpp @@ -79,15 +79,17 @@ class Trie { const int kBitsetSize = 65536; const trie::Trie kKeywords = { - "union", "all", "optional", "match", "unwind", "as", "merge", "on", "create", - "set", "detach", "delete", "remove", "with", "distinct", "return", "order", "by", - "skip", "limit", "ascending", "asc", "descending", "desc", "where", "or", "xor", - "and", "not", "in", "starts", "ends", "contains", "is", "null", "case", - "when", "then", "else", "end", "count", "filter", "extract", "any", "none", - "single", "true", "false", "reduce", "coalesce", "user", "password", "alter", "drop", - "show", "stats", "unique", "explain", "profile", "storage", "index", "info", "exists", - "assert", "constraint", "node", "key", "dump", "database", "call", "yield", "memory", - "mb", "kb", "unlimited", "free", "procedure", "query", "free_memory", "read_file", "lock_path"}; + "union", "all", "optional", "match", "unwind", "as", "merge", "on", + "create", "set", "detach", "delete", "remove", "with", "distinct", "return", + "order", "by", "skip", "limit", "ascending", "asc", "descending", "desc", + "where", "or", "xor", "and", "not", "in", "starts", "ends", + "contains", "is", "null", "case", "when", "then", "else", "end", + "count", "filter", "extract", "any", "none", "single", "true", "false", + "reduce", "coalesce", "user", "password", "alter", "drop", "show", "stats", + "unique", "explain", "profile", "storage", "index", "info", "exists", "assert", + "constraint", "node", "key", "dump", "database", "call", "yield", "memory", + "mb", "kb", "unlimited", "free", "procedure", "query", "free_memory", "read_file", + "lock_path", "after", "before", "execute", "transaction", "trigger", "triggers", "update"}; // Unicode codepoints that are allowed at the start of the unescaped name. const std::bitset kUnescapedNameAllowedStarts( diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 4d8ca0221..dbd8c1828 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -20,6 +20,7 @@ #include "query/plan/vertex_count_cache.hpp" #include "query/trigger.hpp" #include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" #include "utils/algorithm.hpp" #include "utils/csv_parsing.hpp" #include "utils/event_counter.hpp" @@ -439,7 +440,6 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler * }; return callback; } - return callback; } } @@ -598,6 +598,12 @@ std::optional PullPlan::Pull(AnyStream *stream, std::optional< using RWType = plan::ReadWriteTypeChecker::RWType; } // namespace +InterpreterContext::InterpreterContext(storage::Storage *db, const std::filesystem::path &data_directory) : db(db) { + auto storage_accessor = db->Access(); + DbAccessor dba{&storage_accessor}; + trigger_store.emplace(data_directory / "triggers", &ast_cache, &dba, &antlr_lock); +} + Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_context_(interpreter_context) { MG_ASSERT(interpreter_context_, "Interpreter context must not be NULL"); // try { @@ -668,8 +674,7 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) db_accessor_ = std::make_unique(interpreter_context_->db->Access()); execution_db_accessor_.emplace(db_accessor_.get()); - if (interpreter_context_->before_commit_triggers.size() > 0 || - interpreter_context_->after_commit_triggers.size() > 0) { + if (interpreter_context_->trigger_store->HasTriggers()) { trigger_context_collector_.emplace(); } }; @@ -1095,6 +1100,116 @@ PreparedQuery PrepareFreeMemoryQuery(ParsedQuery parsed_query, const bool in_exp RWType::NONE}; } +TriggerEventType ToTriggerEventType(const TriggerQuery::EventType event_type) { + switch (event_type) { + case TriggerQuery::EventType::ANY: + return TriggerEventType::ANY; + + case TriggerQuery::EventType::CREATE: + return TriggerEventType::CREATE; + + case TriggerQuery::EventType::VERTEX_CREATE: + return TriggerEventType::VERTEX_CREATE; + + case TriggerQuery::EventType::EDGE_CREATE: + return TriggerEventType::EDGE_CREATE; + + case TriggerQuery::EventType::DELETE: + return TriggerEventType::DELETE; + + case TriggerQuery::EventType::VERTEX_DELETE: + return TriggerEventType::VERTEX_DELETE; + + case TriggerQuery::EventType::EDGE_DELETE: + return TriggerEventType::EDGE_DELETE; + + case TriggerQuery::EventType::UPDATE: + return TriggerEventType::UPDATE; + + case TriggerQuery::EventType::VERTEX_UPDATE: + return TriggerEventType::VERTEX_UPDATE; + + case TriggerQuery::EventType::EDGE_UPDATE: + return TriggerEventType::EDGE_UPDATE; + } +} + +Callback CreateTrigger(TriggerQuery *trigger_query, + const std::map &user_parameters, + InterpreterContext *interpreter_context, DbAccessor *dba) { + return {{}, [trigger_query, interpreter_context, dba, &user_parameters]() -> std::vector> { + interpreter_context->trigger_store->AddTrigger( + trigger_query->trigger_name_, trigger_query->statement_, user_parameters, + ToTriggerEventType(trigger_query->event_type_), + trigger_query->before_commit_ ? TriggerPhase::BEFORE_COMMIT : TriggerPhase::AFTER_COMMIT, + &interpreter_context->ast_cache, dba, &interpreter_context->antlr_lock); + return {}; + }}; +} + +Callback DropTrigger(TriggerQuery *trigger_query, InterpreterContext *interpreter_context) { + return {{}, [trigger_query, interpreter_context]() -> std::vector> { + interpreter_context->trigger_store->DropTrigger(trigger_query->trigger_name_); + return {}; + }}; +} + +Callback ShowTriggers(InterpreterContext *interpreter_context) { + return {{"trigger name", "statement", "event type", "phase"}, [interpreter_context] { + std::vector> results; + auto trigger_infos = interpreter_context->trigger_store->GetTriggerInfo(); + results.reserve(trigger_infos.size()); + for (auto &trigger_info : trigger_infos) { + std::vector typed_trigger_info; + typed_trigger_info.reserve(4); + typed_trigger_info.emplace_back(std::move(trigger_info.name)); + typed_trigger_info.emplace_back(std::move(trigger_info.statement)); + typed_trigger_info.emplace_back(TriggerEventTypeToString(trigger_info.event_type)); + typed_trigger_info.emplace_back(trigger_info.phase == TriggerPhase::BEFORE_COMMIT ? "BEFORE COMMIT" + : "AFTER COMMIT"); + results.push_back(std::move(typed_trigger_info)); + } + + return results; + }}; +} + +PreparedQuery PrepareTriggerQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, + InterpreterContext *interpreter_context, DbAccessor *dba, + const std::map &user_parameters) { + if (in_explicit_transaction) { + throw TriggerModificationInMulticommandTxException(); + } + + auto *trigger_query = utils::Downcast(parsed_query.query); + MG_ASSERT(trigger_query); + + auto callback = [trigger_query, interpreter_context, dba, &user_parameters] { + switch (trigger_query->action_) { + case TriggerQuery::Action::CREATE_TRIGGER: + return CreateTrigger(trigger_query, user_parameters, interpreter_context, dba); + case TriggerQuery::Action::DROP_TRIGGER: + return DropTrigger(trigger_query, interpreter_context); + case TriggerQuery::Action::SHOW_TRIGGERS: + return ShowTriggers(interpreter_context); + } + }(); + + auto results = callback.fn(); + + return PreparedQuery{std::move(callback.header), std::move(parsed_query.required_privileges), + [pull_plan = std::make_shared(std::move(results))]( + AnyStream *stream, std::optional n) -> std::optional { + if (pull_plan->Pull(stream, n)) { + return QueryHandlerResult::COMMIT; + } + return std::nullopt; + }, + RWType::NONE}; + // False positive report for the std::make_shared above + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) +} + PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, InterpreterContext *interpreter_context, storage::Storage *db, utils::MemoryResource *execution_memory) { @@ -1383,13 +1498,12 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, // Some queries require an active transaction in order to be prepared. if (!in_explicit_transaction_ && (utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || - utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query))) { + utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || + utils::Downcast(parsed_query.query))) { db_accessor_ = std::make_unique(interpreter_context_->db->Access()); execution_db_accessor_.emplace(db_accessor_.get()); - if (utils::Downcast(parsed_query.query) && - (interpreter_context_->before_commit_triggers.size() > 0 || - interpreter_context_->after_commit_triggers.size() > 0)) { + if (utils::Downcast(parsed_query.query) && interpreter_context_->trigger_store->HasTriggers()) { trigger_context_collector_.emplace(); } } @@ -1434,6 +1548,9 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, &*execution_db_accessor_); } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareFreeMemoryQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_); + } else if (utils::Downcast(parsed_query.query)) { + prepared_query = PrepareTriggerQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_, + &*execution_db_accessor_, params); } else { LOG_FATAL("Should not get here -- unknown query type!"); } @@ -1488,7 +1605,7 @@ void RunTriggersIndividually(const utils::SkipList &triggers, Interpret interpreter_context->execution_timeout_sec, &interpreter_context->is_shutting_down, trigger_context); } catch (const utils::BasicException &exception) { - spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.name(), exception.what()); + spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.Name(), exception.what()); db_accessor.Abort(); continue; } @@ -1501,7 +1618,7 @@ void RunTriggersIndividually(const utils::SkipList &triggers, Interpret const auto &label_name = db_accessor.LabelToName(constraint_violation.label); MG_ASSERT(constraint_violation.properties.size() == 1U); const auto &property_name = db_accessor.PropertyToName(*constraint_violation.properties.begin()); - spdlog::warn("Trigger '{}' failed to commit due to existence constraint violation on :{}({})", trigger.name(), + spdlog::warn("Trigger '{}' failed to commit due to existence constraint violation on :{}({})", trigger.Name(), label_name, property_name); break; } @@ -1510,7 +1627,7 @@ void RunTriggersIndividually(const utils::SkipList &triggers, Interpret std::stringstream property_names_stream; utils::PrintIterable(property_names_stream, constraint_violation.properties, ", ", [&](auto &stream, const auto &prop) { stream << db_accessor.PropertyToName(prop); }); - spdlog::warn("Trigger '{}' failed to commit due to unique constraint violation on :{}({})", trigger.name(), + spdlog::warn("Trigger '{}' failed to commit due to unique constraint violation on :{}({})", trigger.Name(), label_name, property_names_stream.str()); break; } @@ -1535,7 +1652,7 @@ void Interpreter::Commit() { if (trigger_context) { // Run the triggers - for (const auto &trigger : interpreter_context_->before_commit_triggers.access()) { + for (const auto &trigger : interpreter_context_->trigger_store->BeforeCommitTriggers().access()) { utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; AdvanceCommand(); try { @@ -1544,7 +1661,7 @@ void Interpreter::Commit() { *trigger_context); } catch (const utils::BasicException &e) { throw utils::BasicException( - fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.name(), e.what())); + fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.Name(), e.what())); } } SPDLOG_DEBUG("Finished executing before commit triggers"); @@ -1581,11 +1698,11 @@ void Interpreter::Commit() { } } - if (trigger_context && interpreter_context_->after_commit_triggers.size() > 0) { + if (trigger_context && interpreter_context_->trigger_store->AfterCommitTriggers().size() > 0) { background_thread_.AddTask([trigger_context = std::move(*trigger_context), interpreter_context = this->interpreter_context_, user_transaction = std::shared_ptr(std::move(db_accessor_))]() mutable { - RunTriggersIndividually(interpreter_context->after_commit_triggers, interpreter_context, + RunTriggersIndividually(interpreter_context->trigger_store->AfterCommitTriggers(), interpreter_context, std::move(trigger_context)); user_transaction->FinalizeTransaction(); SPDLOG_DEBUG("Finished executing after commit triggers"); // NOLINT(bugprone-lambda-function-name) diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 8f42c430c..bd76731db 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -147,7 +147,7 @@ struct PreparedQuery { * been passed to an `Interpreter` instance. */ struct InterpreterContext { - explicit InterpreterContext(storage::Storage *db) : db(db) {} + explicit InterpreterContext(storage::Storage *db, const std::filesystem::path &data_directory); storage::Storage *db; @@ -168,9 +168,7 @@ struct InterpreterContext { utils::SkipList ast_cache; utils::SkipList plan_cache; - // use a thread safe container - utils::SkipList before_commit_triggers; - utils::SkipList after_commit_triggers; + std::optional trigger_store; }; /// Function that is used to tell all active interpreters that they should stop diff --git a/src/query/serialization/property_value.cpp b/src/query/serialization/property_value.cpp new file mode 100644 index 000000000..262ff3cf0 --- /dev/null +++ b/src/query/serialization/property_value.cpp @@ -0,0 +1,94 @@ +#include "query/serialization/property_value.hpp" +#include "storage/v2/property_value.hpp" +#include "utils/logging.hpp" + +namespace query::serialization { + +nlohmann::json SerializePropertyValue(const storage::PropertyValue &property_value) { + using Type = storage::PropertyValue::Type; + switch (property_value.type()) { + case Type::Null: + return {}; + case Type::Bool: + return property_value.ValueBool(); + case Type::Int: + return property_value.ValueInt(); + case Type::Double: + return property_value.ValueDouble(); + case Type::String: + return property_value.ValueString(); + case Type::List: + return SerializePropertyValueVector(property_value.ValueList()); + case Type::Map: + return SerializePropertyValueMap(property_value.ValueMap()); + } +} + +nlohmann::json SerializePropertyValueVector(const std::vector &values) { + nlohmann::json array = nlohmann::json::array(); + for (const auto &value : values) { + array.push_back(SerializePropertyValue(value)); + } + return array; +} + +nlohmann::json SerializePropertyValueMap(const std::map ¶meters) { + nlohmann::json data = nlohmann::json::object(); + + for (const auto &[key, value] : parameters) { + data[key] = SerializePropertyValue(value); + } + + return data; +}; + +storage::PropertyValue DeserializePropertyValue(const nlohmann::json &data) { + if (data.is_null()) { + return storage::PropertyValue(); + } + + if (data.is_boolean()) { + return storage::PropertyValue(data.get()); + } + + if (data.is_number_integer()) { + return storage::PropertyValue(data.get()); + } + + if (data.is_number_float()) { + return storage::PropertyValue(data.get()); + } + + if (data.is_string()) { + return storage::PropertyValue(data.get()); + } + + if (data.is_array()) { + return storage::PropertyValue(DeserializePropertyValueList(data)); + } + + MG_ASSERT(data.is_object(), "Unknown type found in the trigger storage"); + return storage::PropertyValue(DeserializePropertyValueMap(data)); +} + +std::vector DeserializePropertyValueList(const nlohmann::json::array_t &data) { + std::vector property_values; + property_values.reserve(data.size()); + for (const auto &value : data) { + property_values.emplace_back(DeserializePropertyValue(value)); + } + + return property_values; +} + +std::map DeserializePropertyValueMap(const nlohmann::json::object_t &data) { + std::map property_values; + + for (const auto &[key, value] : data) { + property_values.emplace(key, DeserializePropertyValue(value)); + } + + return property_values; +} + +} // namespace query::serialization diff --git a/src/query/serialization/property_value.hpp b/src/query/serialization/property_value.hpp new file mode 100644 index 000000000..2ea5892c2 --- /dev/null +++ b/src/query/serialization/property_value.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include + +#include "storage/v2/property_value.hpp" + +namespace query::serialization { + +nlohmann::json SerializePropertyValue(const storage::PropertyValue &property_value); + +nlohmann::json SerializePropertyValueVector(const std::vector &values); + +nlohmann::json SerializePropertyValueMap(const std::map ¶meters); + +storage::PropertyValue DeserializePropertyValue(const nlohmann::json &data); + +std::vector DeserializePropertyValueList(const nlohmann::json::array_t &data); + +std::map DeserializePropertyValueMap(const nlohmann::json::object_t &data); + +} // namespace query::serialization diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 2a1ee4668..be46d19e8 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -1,3 +1,5 @@ +#include "query/trigger.hpp" + #include #include "query/context.hpp" @@ -5,12 +7,12 @@ #include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" #include "query/interpret/frame.hpp" -#include "query/trigger.hpp" +#include "query/serialization/property_value.hpp" #include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" #include "utils/memory.hpp" namespace query { - namespace { auto IdentifierString(const TriggerIdentifierTag tag) noexcept { @@ -250,9 +252,7 @@ template bool AnyContainsValue(const TContainer &...value_containers) { return (!value_containers.empty() || ...); } - } // namespace - namespace detail { bool SetVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } @@ -267,6 +267,40 @@ std::map RemovedVertexLabel::ToMap(DbAccessor *dba) con } } // namespace detail +const char *TriggerEventTypeToString(const TriggerEventType event_type) { + switch (event_type) { + case TriggerEventType::ANY: + return "ANY"; + + case TriggerEventType::CREATE: + return "CREATE"; + + case TriggerEventType::VERTEX_CREATE: + return "() CREATE"; + + case TriggerEventType::EDGE_CREATE: + return "--> CREATE"; + + case TriggerEventType::DELETE: + return "DELETE"; + + case TriggerEventType::VERTEX_DELETE: + return "() DELETE"; + + case TriggerEventType::EDGE_DELETE: + return "--> DELETE"; + + case TriggerEventType::UPDATE: + return "UPDATE"; + + case TriggerEventType::VERTEX_UPDATE: + return "() UPDATE"; + + case TriggerEventType::EDGE_UPDATE: + return "--> UPDATE"; + } +} + void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { { // adapt created_vertices_ @@ -498,10 +532,12 @@ TriggerContextCollector::LabelChangesLists TriggerContextCollector::LabelMapToLi return {std::move(set_vertex_labels), std::move(removed_vertex_labels)}; } -Trigger::Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, - DbAccessor *db_accessor, utils::SpinLock *antlr_lock, const TriggerEventType event_type) - : name_(std::move(name)), - parsed_statements_{ParseQuery(query, {}, query_cache, antlr_lock)}, +Trigger::Trigger(std::string name, const std::string &query, + const std::map &user_parameters, + const TriggerEventType event_type, utils::SkipList *query_cache, + DbAccessor *db_accessor, utils::SpinLock *antlr_lock) + : name_{std::move(name)}, + parsed_statements_{ParseQuery(query, user_parameters, query_cache, antlr_lock)}, event_type_{event_type} { // We check immediately if the query is valid by trying to create a plan. GetPlan(db_accessor); @@ -512,7 +548,7 @@ Trigger::TriggerPlan::TriggerPlan(std::unique_ptr logical_plan, std std::shared_ptr Trigger::GetPlan(DbAccessor *db_accessor) const { std::lock_guard plan_guard{plan_lock_}; - if (trigger_plan_ && !trigger_plan_->cached_plan.IsExpired()) { + if (parsed_statements_.is_cacheable && trigger_plan_ && !trigger_plan_->cached_plan.IsExpired()) { return trigger_plan_; } @@ -592,4 +628,150 @@ void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution cursor->Shutdown(); } + +namespace { +constexpr uint64_t kVersion{1}; +} // namespace + +TriggerStore::TriggerStore(std::filesystem::path directory, utils::SkipList *query_cache, + DbAccessor *db_accessor, utils::SpinLock *antlr_lock) + : storage_{std::move(directory)} { + spdlog::info("Loading triggers..."); + + for (const auto &[trigger_name, trigger_data] : storage_) { + spdlog::debug("Loading trigger '{}'", trigger_name); + auto json_trigger_data = nlohmann::json::parse(trigger_data); + + if (!json_trigger_data["version"].is_number_unsigned()) { + spdlog::debug("Invalid state of the trigger data."); + continue; + } + if (json_trigger_data["version"] != kVersion) { + spdlog::debug("Invalid version of the trigger data. Got {}"); + continue; + } + + if (!json_trigger_data["statement"].is_string()) { + spdlog::debug("Invalid state of the trigger data"); + continue; + } + auto statement = json_trigger_data["statement"].get(); + + if (!json_trigger_data["phase"].is_number_integer()) { + spdlog::debug("Invalid state of the trigger data"); + continue; + } + const auto phase = json_trigger_data["phase"].get(); + + if (!json_trigger_data["event_type"].is_number_integer()) { + spdlog::debug("Invalid state of the trigger data"); + continue; + } + const auto event_type = json_trigger_data["event_type"].get(); + + if (!json_trigger_data["user_parameters"].is_object()) { + spdlog::debug("Invalid state of the trigger data"); + continue; + } + const auto user_parameters = serialization::DeserializePropertyValueMap(json_trigger_data["user_parameters"]); + + std::optional trigger; + try { + trigger.emplace(trigger_name, statement, user_parameters, event_type, query_cache, db_accessor, antlr_lock); + } catch (const utils::BasicException &e) { + spdlog::debug("Failed to create a trigger '{}' because: {}", trigger_name, e.what()); + continue; + } + + auto triggers_acc = + phase == TriggerPhase::BEFORE_COMMIT ? before_commit_triggers_.access() : after_commit_triggers_.access(); + triggers_acc.insert(std::move(*trigger)); + + spdlog::debug("Trigger loaded successfully!"); + } +} + +void TriggerStore::AddTrigger(const std::string &name, const std::string &query, + const std::map &user_parameters, + TriggerEventType event_type, TriggerPhase phase, + utils::SkipList *query_cache, DbAccessor *db_accessor, + utils::SpinLock *antlr_lock) { + std::unique_lock store_guard{store_lock_}; + if (storage_.Get(name)) { + throw utils::BasicException("Trigger with the same name already exists."); + } + + std::optional trigger; + try { + trigger.emplace(name, query, user_parameters, event_type, query_cache, db_accessor, antlr_lock); + } catch (const utils::BasicException &e) { + const auto identifiers = GetPredefinedIdentifiers(event_type); + std::stringstream identifier_names_stream; + utils::PrintIterable(identifier_names_stream, identifiers, ", ", + [](auto &stream, const auto &identifier) { stream << identifier.first.name_; }); + + throw utils::BasicException( + "Failed creating the trigger.\nError message: '{}'\nThe error was mostly likely generated because of the wrong " + "statement that this trigger executes.\nMake sure all predefined variables used are present for the specified " + "event.\nAllowed variables for event '{}' are: {}", + e.what(), TriggerEventTypeToString(event_type), identifier_names_stream.str()); + } + + nlohmann::json data = nlohmann::json::object(); + data["statement"] = query; + data["user_parameters"] = serialization::SerializePropertyValueMap(user_parameters); + data["event_type"] = event_type; + data["phase"] = phase; + data["version"] = kVersion; + storage_.Put(name, data.dump()); + store_guard.unlock(); + + auto triggers_acc = + phase == TriggerPhase::BEFORE_COMMIT ? before_commit_triggers_.access() : after_commit_triggers_.access(); + triggers_acc.insert(std::move(*trigger)); +} + +void TriggerStore::DropTrigger(const std::string &name) { + std::unique_lock store_guard{store_lock_}; + const auto maybe_trigger_data = storage_.Get(name); + if (!maybe_trigger_data) { + throw utils::BasicException("Trigger with name '{}' doesn't exist", name); + } + + nlohmann::json data; + try { + data = nlohmann::json::parse(*maybe_trigger_data); + } catch (const nlohmann::json::parse_error &e) { + throw utils::BasicException("Couldn't load trigger data!"); + } + + if (!data.is_object()) { + throw utils::BasicException("Couldn't load trigger data!"); + } + + if (!data["phase"].is_number_integer()) { + throw utils::BasicException("Invalid type loaded inside the trigger data!"); + } + + auto triggers_acc = + data["phase"] == TriggerPhase::BEFORE_COMMIT ? before_commit_triggers_.access() : after_commit_triggers_.access(); + triggers_acc.remove(name); + storage_.Delete(name); +} + +std::vector TriggerStore::GetTriggerInfo() const { + std::vector info; + info.reserve(before_commit_triggers_.size() + after_commit_triggers_.size()); + + const auto add_info = [&](const utils::SkipList &trigger_list, const TriggerPhase phase) { + for (const auto &trigger : trigger_list.access()) { + info.push_back({trigger.Name(), trigger.OriginalStatement(), trigger.EventType(), phase}); + } + }; + + add_info(before_commit_triggers_, TriggerPhase::BEFORE_COMMIT); + add_info(after_commit_triggers_, TriggerPhase::AFTER_COMMIT); + + return info; +} } // namespace query diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index 51119885e..a3e296aa0 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -6,9 +6,11 @@ #include #include +#include "kvstore/kvstore.hpp" #include "query/cypher_query_interpreter.hpp" #include "query/frontend/ast/ast.hpp" #include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" #include "utils/concepts.hpp" #include "utils/fnv.hpp" @@ -77,7 +79,7 @@ struct RemovedObjectProperty { : object{object}, key{key}, old_value{std::move(old_value)} {} std::map ToMap(DbAccessor *dba) const { - return {{ObjectString(), TypedValue{object}}, + return {{detail::ObjectString(), TypedValue{object}}, {"key", TypedValue{dba->PropertyToName(key)}}, {"old", old_value}}; } @@ -143,6 +145,8 @@ enum class TriggerEventType : uint8_t { UPDATE }; +const char *TriggerEventTypeToString(TriggerEventType event_type); + static_assert(std::is_trivially_copy_constructible_v, "VertexAccessor is not trivially copy constructible, move it where possible and remove this assert"); static_assert(std::is_trivially_copy_constructible_v, @@ -356,9 +360,9 @@ class TriggerContextCollector { }; struct Trigger { - explicit Trigger(std::string name, const std::string &query, utils::SkipList *query_cache, - DbAccessor *db_accessor, utils::SpinLock *antlr_lock, - TriggerEventType event_type = TriggerEventType::ANY); + explicit Trigger(std::string name, const std::string &query, + const std::map &user_parameters, TriggerEventType event_type, + utils::SkipList *query_cache, DbAccessor *db_accessor, utils::SpinLock *antlr_lock); void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double tsc_frequency, double max_execution_time_sec, std::atomic *is_shutting_down, const TriggerContext &context) const; @@ -370,7 +374,9 @@ struct Trigger { // NOLINTNEXTLINE (modernize-use-nullptr) bool operator<(const std::string &other) const { return name_ < other; } - const auto &name() const noexcept { return name_; } + const auto &Name() const noexcept { return name_; } + const auto &OriginalStatement() const noexcept { return parsed_statements_.query_string; } + auto EventType() const noexcept { return event_type_; } private: struct TriggerPlan { @@ -391,4 +397,40 @@ struct Trigger { mutable utils::SpinLock plan_lock_; mutable std::shared_ptr trigger_plan_; }; + +enum class TriggerPhase : uint8_t { BEFORE_COMMIT, AFTER_COMMIT }; + +struct TriggerStore { + explicit TriggerStore(std::filesystem::path directory, utils::SkipList *query_cache, + DbAccessor *db_accessor, utils::SpinLock *antlr_lock); + + void AddTrigger(const std::string &name, const std::string &query, + const std::map &user_parameters, TriggerEventType event_type, + TriggerPhase phase, utils::SkipList *query_cache, DbAccessor *db_accessor, + utils::SpinLock *antlr_lock); + + void DropTrigger(const std::string &name); + + struct TriggerInfo { + std::string name; + std::string statement; + TriggerEventType event_type; + TriggerPhase phase; + }; + + std::vector GetTriggerInfo() const; + + const auto &BeforeCommitTriggers() const noexcept { return before_commit_triggers_; } + const auto &AfterCommitTriggers() const noexcept { return after_commit_triggers_; } + + bool HasTriggers() const noexcept { return before_commit_triggers_.size() > 0 || after_commit_triggers_.size() > 0; } + + private: + utils::SpinLock store_lock_; + kvstore::KVStore storage_; + + utils::SkipList before_commit_triggers_; + utils::SkipList after_commit_triggers_; +}; + } // namespace query diff --git a/tests/benchmark/expansion.cpp b/tests/benchmark/expansion.cpp index 9bfe2607b..3317a3158 100644 --- a/tests/benchmark/expansion.cpp +++ b/tests/benchmark/expansion.cpp @@ -11,6 +11,7 @@ class ExpansionBenchFixture : public benchmark::Fixture { std::optional db; std::optional interpreter_context; std::optional interpreter; + std::filesystem::path data_directory{std::filesystem::temp_directory_path() / "expansion-benchmark"}; void SetUp(const benchmark::State &state) override { db.emplace(); @@ -34,7 +35,7 @@ class ExpansionBenchFixture : public benchmark::Fixture { MG_ASSERT(db->CreateIndex(label)); - interpreter_context.emplace(&*db); + interpreter_context.emplace(&*db, data_directory); interpreter.emplace(&*interpreter_context); } @@ -42,6 +43,7 @@ class ExpansionBenchFixture : public benchmark::Fixture { interpreter = std::nullopt; interpreter_context = std::nullopt; db = std::nullopt; + std::filesystem::remove_all(data_directory); } }; diff --git a/tests/manual/single_query.cpp b/tests/manual/single_query.cpp index e0deff027..ee60006ed 100644 --- a/tests/manual/single_query.cpp +++ b/tests/manual/single_query.cpp @@ -1,6 +1,7 @@ #include "communication/result_stream_faker.hpp" #include "query/interpreter.hpp" #include "storage/v2/storage.hpp" +#include "utils/on_scope_exit.hpp" int main(int argc, char *argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, true); @@ -12,7 +13,9 @@ int main(int argc, char *argv[]) { } storage::Storage db; - query::InterpreterContext interpreter_context{&db}; + auto data_directory = std::filesystem::temp_directory_path() / "single_query_test"; + utils::OnScopeExit([&data_directory] { std::filesystem::remove_all(data_directory); }); + query::InterpreterContext interpreter_context{&db, data_directory}; query::Interpreter interpreter{&interpreter_context}; ResultStreamFaker stream(&db); diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 8f7c2bb67..1adda7f54 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -98,6 +98,9 @@ target_link_libraries(${test_prefix}query_pretty_print mg-query) add_unit_test(query_trigger.cpp) target_link_libraries(${test_prefix}query_trigger mg-query) +add_unit_test(query_serialization_property_value.cpp) +target_link_libraries(${test_prefix}query_serialization_property_value mg-query) + # Test query/procedure add_unit_test(query_procedure_mgp_type.cpp) target_link_libraries(${test_prefix}query_procedure_mgp_type mg-query) diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index e734d1cab..9cb70fd99 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -25,6 +25,8 @@ #include "query/frontend/stripped.hpp" #include "query/typed_value.hpp" +#include "utils/string.hpp" + namespace { using namespace query; @@ -2061,6 +2063,8 @@ TEST_P(CypherMainVisitorTest, GrantPrivilege) { {AuthQuery::Privilege::READ_FILE}); check_auth_query(&ast_generator, "GRANT FREE_MEMORY TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, {AuthQuery::Privilege::FREE_MEMORY}); + check_auth_query(&ast_generator, "GRANT TRIGGER TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::TRIGGER}); } TEST_P(CypherMainVisitorTest, DenyPrivilege) { @@ -3054,4 +3058,103 @@ TEST_P(CypherMainVisitorTest, MemoryLimit) { CheckCallProcedureDefaultMemoryLimit(ast_generator, *call_proc); } } + +namespace { +void TestInvalidQuery(const auto &query, Base &ast_generator) { + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); +} +} // namespace + +TEST_P(CypherMainVisitorTest, DropTrigger) { + auto &ast_generator = *GetParam(); + + TestInvalidQuery("DROP TR", ast_generator); + TestInvalidQuery("DROP TRIGGER", ast_generator); + + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery("DROP TRIGGER trigger")); + EXPECT_EQ(parsed_query->action_, TriggerQuery::Action::DROP_TRIGGER); + EXPECT_EQ(parsed_query->trigger_name_, "trigger"); +} + +TEST_P(CypherMainVisitorTest, ShowTriggers) { + auto &ast_generator = *GetParam(); + + TestInvalidQuery("SHOW TR", ast_generator); + TestInvalidQuery("SHOW TRIGGER", ast_generator); + + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery("SHOW TRIGGERS")); + EXPECT_EQ(parsed_query->action_, TriggerQuery::Action::SHOW_TRIGGERS); +} + +namespace { +void ValidateCreateQuery(Base &ast_generator, const auto &query, const auto &trigger_name, + const query::TriggerQuery::EventType event_type, const auto &phase, const auto &statement) { + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(query)); + EXPECT_EQ(parsed_query->action_, TriggerQuery::Action::CREATE_TRIGGER); + EXPECT_EQ(parsed_query->trigger_name_, trigger_name); + EXPECT_EQ(parsed_query->event_type_, event_type); + EXPECT_EQ(parsed_query->before_commit_, phase == "BEFORE"); + EXPECT_EQ(parsed_query->statement_, statement); +} +} // namespace + +TEST_P(CypherMainVisitorTest, CreateTriggers) { + auto &ast_generator = *GetParam(); + + TestInvalidQuery("CREATE TRIGGER", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON ", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON ()", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON -->", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON () CREATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON --> CREATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON DELETE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON () DELETE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON --> DELETE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON UPDATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON () UPDATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON --> UPDATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE BEFORE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE BEFORE COMMIT", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE AFTER", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE AFTER COMMIT", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON -> CREATE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON ) CREATE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON ( CREATE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CRETE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON DELET AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON UPDTE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON UPDATE COMMIT EXECUTE a", ast_generator); + + const auto *query_template = "CREATE TRIGGER trigger {} {} COMMIT EXECUTE {}"; + + std::array events{std::pair{"", query::TriggerQuery::EventType::ANY}, + std::pair{"ON CREATE", query::TriggerQuery::EventType::CREATE}, + std::pair{"ON () CREATE", query::TriggerQuery::EventType::VERTEX_CREATE}, + std::pair{"ON --> CREATE", query::TriggerQuery::EventType::EDGE_CREATE}, + std::pair{"ON DELETE", query::TriggerQuery::EventType::DELETE}, + std::pair{"ON () DELETE", query::TriggerQuery::EventType::VERTEX_DELETE}, + std::pair{"ON --> DELETE", query::TriggerQuery::EventType::EDGE_DELETE}, + std::pair{"ON UPDATE", query::TriggerQuery::EventType::UPDATE}, + std::pair{"ON () UPDATE", query::TriggerQuery::EventType::VERTEX_UPDATE}, + std::pair{"ON --> UPDATE", query::TriggerQuery::EventType::EDGE_UPDATE}}; + + std::array phases{"BEFORE", "AFTER"}; + + std::array statements{ + "", "SOME SUPER\nSTATEMENT", "Statement with 12312321 3 ", " Statement with 12312321 3 " + + }; + + for (const auto &[event_string, event_type] : events) { + for (const auto &phase : phases) { + for (const auto &statement : statements) { + ValidateCreateQuery(ast_generator, fmt::format(query_template, event_string, phase, statement), "trigger", + event_type, phase, utils::Trim(statement)); + } + } + } +} + } // namespace diff --git a/tests/unit/interpreter.cpp b/tests/unit/interpreter.cpp index 51444ed4f..194c4c815 100644 --- a/tests/unit/interpreter.cpp +++ b/tests/unit/interpreter.cpp @@ -1,4 +1,5 @@ #include +#include #include "communication/bolt/v1/value.hpp" #include "communication/result_stream_faker.hpp" @@ -32,7 +33,8 @@ auto ToEdgeList(const communication::bolt::Value &v) { class InterpreterTest : public ::testing::Test { protected: storage::Storage db_; - query::InterpreterContext interpreter_context_{&db_}; + std::filesystem::path data_directory{std::filesystem::temp_directory_path() / "MG_tests_unit_interpreter"}; + query::InterpreterContext interpreter_context_{&db_, data_directory}; query::Interpreter interpreter_{&interpreter_context_}; auto Prepare(const std::string &query, const std::map ¶ms = {}) { diff --git a/tests/unit/main.cpp b/tests/unit/main.cpp index 5b0fa17fb..cb05a4e3b 100644 --- a/tests/unit/main.cpp +++ b/tests/unit/main.cpp @@ -4,6 +4,6 @@ int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); logging::RedirectToStderr(); - spdlog::set_level(spdlog::level::info); + spdlog::set_level(spdlog::level::warn); return RUN_ALL_TESTS(); } diff --git a/tests/unit/query_dump.cpp b/tests/unit/query_dump.cpp index b565de5fe..ee19f8ddf 100644 --- a/tests/unit/query_dump.cpp +++ b/tests/unit/query_dump.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -187,7 +188,8 @@ DatabaseState GetState(storage::Storage *db) { } auto Execute(storage::Storage *db, const std::string &query) { - query::InterpreterContext context(db); + auto data_directory = std::filesystem::temp_directory_path() / "MG_tests_unit_query_dump"; + query::InterpreterContext context(db, data_directory); query::Interpreter interpreter(&context); ResultStreamFaker stream(db); @@ -700,7 +702,8 @@ TEST(DumpTest, ExecuteDumpDatabase) { class StatefulInterpreter { public: - explicit StatefulInterpreter(storage::Storage *db) : db_(db), context_(db_), interpreter_(&context_) {} + explicit StatefulInterpreter(storage::Storage *db) + : db_(db), context_(db_, data_directory_), interpreter_(&context_) {} auto Execute(const std::string &query) { ResultStreamFaker stream(db_); @@ -714,11 +717,16 @@ class StatefulInterpreter { } private: + static const std::filesystem::path data_directory_; + storage::Storage *db_; query::InterpreterContext context_; query::Interpreter interpreter_; }; +const std::filesystem::path StatefulInterpreter::data_directory_{std::filesystem::temp_directory_path() / + "MG_tests_unit_query_dump_stateful"}; + // NOLINTNEXTLINE(hicpp-special-member-functions) TEST(DumpTest, ExecuteDumpDatabaseInMulticommandTransaction) { storage::Storage db; diff --git a/tests/unit/query_plan_edge_cases.cpp b/tests/unit/query_plan_edge_cases.cpp index 71e558f17..fe3035240 100644 --- a/tests/unit/query_plan_edge_cases.cpp +++ b/tests/unit/query_plan_edge_cases.cpp @@ -2,6 +2,7 @@ // that's not easily testable with single-phase testing. instead, for // easy testing and latter readability they are tested end-to-end. +#include #include #include "gmock/gmock.h" @@ -19,9 +20,11 @@ class QueryExecution : public testing::Test { std::optional interpreter_context_; std::optional interpreter_; + std::filesystem::path data_directory{std::filesystem::temp_directory_path() / "MG_tests_unit_query_plan_edge_cases"}; + void SetUp() { db_.emplace(); - interpreter_context_.emplace(&*db_); + interpreter_context_.emplace(&*db_, data_directory); interpreter_.emplace(&*interpreter_context_); } diff --git a/tests/unit/query_required_privileges.cpp b/tests/unit/query_required_privileges.cpp index a5afc010c..09fee4c0d 100644 --- a/tests/unit/query_required_privileges.cpp +++ b/tests/unit/query_required_privileges.cpp @@ -149,3 +149,8 @@ TEST_F(TestPrivilegeExtractor, FreeMemoryQuery) { auto *query = storage.Create(); EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::FREE_MEMORY)); } + +TEST_F(TestPrivilegeExtractor, TriggerQuery) { + auto *query = storage.Create(); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::TRIGGER)); +} diff --git a/tests/unit/query_serialization_property_value.cpp b/tests/unit/query_serialization_property_value.cpp new file mode 100644 index 000000000..569d13ec2 --- /dev/null +++ b/tests/unit/query_serialization_property_value.cpp @@ -0,0 +1,85 @@ +#include + +#include "query/serialization/property_value.hpp" +#include "utils/logging.hpp" + +namespace { +void ExpectPropEq(const storage::PropertyValue &a, const storage::PropertyValue &b) { + ASSERT_EQ(a.type(), b.type()); + ASSERT_EQ(a, b); +} + +void CheckJsonConversion(const storage::PropertyValue &property_value) { + const auto json_string = query::serialization::SerializePropertyValue(property_value).dump(); + const auto json_object = nlohmann::json::parse(json_string); + ExpectPropEq(property_value, query::serialization::DeserializePropertyValue(json_object)); +} + +} // namespace + +TEST(PropertyValueSerializationTest, Null) { CheckJsonConversion(storage::PropertyValue{}); } + +TEST(PropertyValueSerializationTest, Bool) { + CheckJsonConversion(storage::PropertyValue{true}); + CheckJsonConversion(storage::PropertyValue{false}); +} + +TEST(PropertyValueSerializationTest, Int) { + CheckJsonConversion(storage::PropertyValue{1}); + CheckJsonConversion(storage::PropertyValue{100}); +} + +TEST(PropertyValueSerializationTest, Double) { + CheckJsonConversion(storage::PropertyValue{1.0}); + CheckJsonConversion(storage::PropertyValue{2.321}); +} + +TEST(PropertyValueSerializationTest, String) { + CheckJsonConversion(storage::PropertyValue{"TestString"}); + CheckJsonConversion(storage::PropertyValue{""}); +} + +namespace { + +std::vector GetPropertyValueListWithBasicTypes() { + return {storage::PropertyValue{}, storage::PropertyValue{true}, storage::PropertyValue{"string"}, + storage::PropertyValue{1}, storage::PropertyValue{1.0}}; +} + +std::map GetPropertyValueMapWithBasicTypes() { + return {{"null", storage::PropertyValue{}}, + {"bool", storage::PropertyValue{true}}, + {"int", storage::PropertyValue{1}}, + {"double", storage::PropertyValue{1.0}}, + {"string", storage::PropertyValue{"string"}}}; +} + +} // namespace + +TEST(PropertyValueSerializationTest, List) { + storage::PropertyValue list = storage::PropertyValue{GetPropertyValueListWithBasicTypes()}; + + SPDLOG_DEBUG("Basic list"); + CheckJsonConversion(list); + + SPDLOG_DEBUG("Nested list"); + CheckJsonConversion(storage::PropertyValue{std::vector{list, list}}); + + SPDLOG_DEBUG("List with map"); + list.ValueList().emplace_back(GetPropertyValueMapWithBasicTypes()); + CheckJsonConversion(list); +} + +TEST(PropertyValueSerializationTest, Map) { + auto map = GetPropertyValueMapWithBasicTypes(); + SPDLOG_DEBUG("Basic map"); + CheckJsonConversion(storage::PropertyValue{map}); + + SPDLOG_DEBUG("Nested map"); + map.emplace("map", storage::PropertyValue{map}); + CheckJsonConversion(storage::PropertyValue{map}); + + SPDLOG_DEBUG("Map with list"); + map.emplace("list", storage::PropertyValue{GetPropertyValueListWithBasicTypes()}); + CheckJsonConversion(storage::PropertyValue{map}); +} diff --git a/tests/unit/query_trigger.cpp b/tests/unit/query_trigger.cpp index 44936ef60..a5bb3ad91 100644 --- a/tests/unit/query_trigger.cpp +++ b/tests/unit/query_trigger.cpp @@ -551,3 +551,190 @@ TEST_F(TriggerContextTest, GlobalLabelChange) { {"label", query::TypedValue{"LABEL"}}}}); } } + +class TriggerStoreTest : public ::testing::Test { + protected: + const std::filesystem::path testing_directory{std::filesystem::temp_directory_path() / "MG_test_unit_query_trigger"}; + + void SetUp() override { + Clear(); + + storage_accessor.emplace(storage.Access()); + dba.emplace(&*storage_accessor); + } + + void TearDown() override { + Clear(); + + dba.reset(); + storage_accessor.reset(); + } + + std::optional dba; + + utils::SkipList ast_cache; + utils::SpinLock antlr_lock; + + private: + void Clear() { + if (!std::filesystem::exists(testing_directory)) return; + std::filesystem::remove_all(testing_directory); + } + + storage::Storage storage; + std::optional storage_accessor; +}; + +TEST_F(TriggerStoreTest, Load) { + std::optional store; + + store.emplace(testing_directory, &ast_cache, &*dba, &antlr_lock); + + const auto check_empty = [&] { + ASSERT_EQ(store->GetTriggerInfo().size(), 0); + ASSERT_EQ(store->BeforeCommitTriggers().size(), 0); + ASSERT_EQ(store->AfterCommitTriggers().size(), 0); + }; + + check_empty(); + + const auto *trigger_name_before = "trigger"; + const auto *trigger_name_after = "trigger_after"; + const auto *trigger_statement = "RETURN $parameter"; + const auto event_type = query::TriggerEventType::VERTEX_CREATE; + store->AddTrigger(trigger_name_before, trigger_statement, + std::map{{"parameter", storage::PropertyValue{1}}}, event_type, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock); + store->AddTrigger(trigger_name_after, trigger_statement, + std::map{{"parameter", storage::PropertyValue{"value"}}}, + event_type, query::TriggerPhase::AFTER_COMMIT, &ast_cache, &*dba, &antlr_lock); + + const auto check_triggers = [&] { + ASSERT_EQ(store->GetTriggerInfo().size(), 2); + + const auto verify_trigger = [&](const auto &trigger, const auto &name) { + ASSERT_EQ(trigger.Name(), name); + ASSERT_EQ(trigger.OriginalStatement(), trigger_statement); + ASSERT_EQ(trigger.EventType(), event_type); + }; + + const auto before_commit_triggers = store->BeforeCommitTriggers().access(); + ASSERT_EQ(before_commit_triggers.size(), 1); + for (const auto &trigger : before_commit_triggers) { + verify_trigger(trigger, trigger_name_before); + } + + const auto after_commit_triggers = store->AfterCommitTriggers().access(); + ASSERT_EQ(after_commit_triggers.size(), 1); + for (const auto &trigger : after_commit_triggers) { + verify_trigger(trigger, trigger_name_after); + } + }; + + check_triggers(); + + // recreate trigger store, this should reload everything from the disk + store.emplace(testing_directory, &ast_cache, &*dba, &antlr_lock); + check_triggers(); + + ASSERT_NO_THROW(store->DropTrigger(trigger_name_after)); + ASSERT_NO_THROW(store->DropTrigger(trigger_name_before)); + + check_empty(); + + store.emplace(testing_directory, &ast_cache, &*dba, &antlr_lock); + + check_empty(); +} + +TEST_F(TriggerStoreTest, AddTrigger) { + query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; + + // Invalid query in statements + ASSERT_THROW(store.AddTrigger("trigger", "RETUR 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + ASSERT_THROW(store.AddTrigger("trigger", "RETURN createdEdges", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + + ASSERT_THROW(store.AddTrigger("trigger", "RETURN $parameter", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + + ASSERT_NO_THROW(store.AddTrigger( + "trigger", "RETURN $parameter", + std::map{{"parameter", storage::PropertyValue{1}}}, + query::TriggerEventType::VERTEX_CREATE, query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock)); + + // Inserting with the same name + ASSERT_THROW(store.AddTrigger("trigger", "RETURN 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + ASSERT_THROW(store.AddTrigger("trigger", "RETURN 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::AFTER_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + + ASSERT_EQ(store.GetTriggerInfo().size(), 1); + ASSERT_EQ(store.BeforeCommitTriggers().size(), 1); + ASSERT_EQ(store.AfterCommitTriggers().size(), 0); +} + +TEST_F(TriggerStoreTest, DropTrigger) { + query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; + + ASSERT_THROW(store.DropTrigger("Unknown"), utils::BasicException); + + const auto *trigger_name = "trigger"; + store.AddTrigger(trigger_name, "RETURN 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock); + + ASSERT_THROW(store.DropTrigger("Unknown"), utils::BasicException); + ASSERT_NO_THROW(store.DropTrigger(trigger_name)); + ASSERT_EQ(store.GetTriggerInfo().size(), 0); +} + +TEST_F(TriggerStoreTest, TriggerInfo) { + query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; + + std::vector expected_info; + store.AddTrigger("trigger", "RETURN 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock); + expected_info.push_back( + {"trigger", "RETURN 1", query::TriggerEventType::VERTEX_CREATE, query::TriggerPhase::BEFORE_COMMIT}); + + const auto check_trigger_info = [&] { + const auto trigger_info = store.GetTriggerInfo(); + ASSERT_EQ(expected_info.size(), trigger_info.size()); + // ensure all of the expected trigger infos can be found in the retrieved infos + ASSERT_TRUE(std::all_of(expected_info.begin(), expected_info.end(), [&](const auto &info) { + return std::find_if(trigger_info.begin(), trigger_info.end(), [&](const auto &other) { + return info.name == other.name && info.statement == other.statement && + info.event_type == other.event_type && info.phase == other.phase; + }) != trigger_info.end(); + })); + }; + + check_trigger_info(); + + store.AddTrigger("edge_update_trigger", "RETURN 1", {}, query::TriggerEventType::EDGE_UPDATE, + query::TriggerPhase::AFTER_COMMIT, &ast_cache, &*dba, &antlr_lock); + expected_info.push_back( + {"edge_update_trigger", "RETURN 1", query::TriggerEventType::EDGE_UPDATE, query::TriggerPhase::AFTER_COMMIT}); + + check_trigger_info(); + + store.DropTrigger("edge_update_trigger"); + const auto erase_from_expected = [&](const std::string_view name) { + const auto erase_count = std::erase_if(expected_info, [name](const auto &info) { return info.name == name; }); + ASSERT_EQ(erase_count, 1); + }; + erase_from_expected("edge_update_trigger"); + + check_trigger_info(); + + store.DropTrigger("trigger"); + erase_from_expected("trigger"); + + check_trigger_info(); +} From 5af3d0ff68b162397f4135803c8b0ebca3de4023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 18 May 2021 18:28:17 +0200 Subject: [PATCH 47/63] Add e2e tests for triggers (#152) --- .github/workflows/diff.yaml | 12 +- .github/workflows/release_centos8.yaml | 12 +- .github/workflows/release_debian10.yaml | 12 +- .github/workflows/release_ubuntu2004.yaml | 12 +- src/query/trigger.cpp | 9 +- tests/e2e/CMakeLists.txt | 2 +- tests/e2e/runner.py | 15 +- tests/e2e/triggers/CMakeLists.txt | 11 + tests/e2e/triggers/common.cpp | 94 +++++++ tests/e2e/triggers/common.hpp | 23 ++ tests/e2e/triggers/on_create_triggers.cpp | 107 ++++++++ tests/e2e/triggers/on_delete_triggers.cpp | 133 ++++++++++ tests/e2e/triggers/on_update_triggers.cpp | 286 ++++++++++++++++++++++ tests/e2e/triggers/workloads.yaml | 24 ++ tests/unit/query_trigger.cpp | 34 ++- 15 files changed, 724 insertions(+), 62 deletions(-) create mode 100644 tests/e2e/triggers/CMakeLists.txt create mode 100644 tests/e2e/triggers/common.cpp create mode 100644 tests/e2e/triggers/common.hpp create mode 100644 tests/e2e/triggers/on_create_triggers.cpp create mode 100644 tests/e2e/triggers/on_delete_triggers.cpp create mode 100644 tests/e2e/triggers/on_update_triggers.cpp create mode 100644 tests/e2e/triggers/workloads.yaml diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 2b4ce5ded..c0794c596 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -248,22 +248,14 @@ jobs: tests/gql_behave/gql_behave_status.csv tests/gql_behave/gql_behave_status.html - - name: Run e2e replication tests + - name: Run e2e tests run: | # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml - - - name: Run e2e memory control tests - run: | - cd tests - ./setup.sh - source ve3/bin/activate - cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path memory/workloads.yaml + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . - name: Run stress test (plain) run: | diff --git a/.github/workflows/release_centos8.yaml b/.github/workflows/release_centos8.yaml index 17df9917c..e58ff5c88 100644 --- a/.github/workflows/release_centos8.yaml +++ b/.github/workflows/release_centos8.yaml @@ -293,22 +293,14 @@ jobs: tests/gql_behave/gql_behave_status.csv tests/gql_behave/gql_behave_status.html - - name: Run e2e replication tests + - name: Run e2e tests run: | # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml - - - name: Run e2e memory control tests - run: | - cd tests - ./setup.sh - source ve3/bin/activate - cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path memory/workloads.yaml + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . - name: Run stress test (plain) run: | diff --git a/.github/workflows/release_debian10.yaml b/.github/workflows/release_debian10.yaml index 2d5ba607a..30e9b5a85 100644 --- a/.github/workflows/release_debian10.yaml +++ b/.github/workflows/release_debian10.yaml @@ -291,22 +291,14 @@ jobs: tests/gql_behave/gql_behave_status.csv tests/gql_behave/gql_behave_status.html - - name: Run e2e replication tests + - name: Run e2e tests run: | # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml - - - name: Run e2e memory control tests - run: | - cd tests - ./setup.sh - source ve3/bin/activate - cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path memory/workloads.yaml + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . - name: Run stress test (plain) run: | diff --git a/.github/workflows/release_ubuntu2004.yaml b/.github/workflows/release_ubuntu2004.yaml index c54f3117b..d309d1946 100644 --- a/.github/workflows/release_ubuntu2004.yaml +++ b/.github/workflows/release_ubuntu2004.yaml @@ -291,22 +291,14 @@ jobs: tests/gql_behave/gql_behave_status.csv tests/gql_behave/gql_behave_status.html - - name: Run e2e replication tests + - name: Run e2e tests run: | # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml - - - name: Run e2e memory control tests - run: | - cd tests - ./setup.sh - source ve3/bin/activate - cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path memory/workloads.yaml + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . - name: Run stress test (plain) run: | diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index be46d19e8..9f7edba84 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -168,10 +168,13 @@ TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) { } } - TypedValue result{std::map{}}; - auto &typed_values = result.ValueMap(); + TypedValue result{std::vector{}}; + auto &typed_values = result.ValueList(); for (auto &[label_id, vertices] : vertices_by_labels) { - typed_values.emplace(dba->LabelToName(label_id), TypedValue(std::move(vertices))); + typed_values.emplace_back(std::map{ + {std::string{"label"}, TypedValue(dba->LabelToName(label_id))}, + {std::string{"vertices"}, TypedValue(std::move(vertices))}, + }); } return result; diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt index 9d6d7bd8e..6984f3530 100644 --- a/tests/e2e/CMakeLists.txt +++ b/tests/e2e/CMakeLists.txt @@ -1,3 +1,3 @@ add_subdirectory(replication) - add_subdirectory(memory) +add_subdirectory(triggers) diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index 5d88740d9..36a7a34de 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -2,6 +2,7 @@ from argparse import ArgumentParser import atexit import logging import os +from pathlib import Path import subprocess import yaml @@ -17,18 +18,21 @@ log = logging.getLogger("memgraph.tests.e2e") def load_args(): parser = ArgumentParser() - parser.add_argument("--workloads-path", required=True) + parser.add_argument("--workloads-root-directory", required=True) parser.add_argument("--workload-name", default=None, required=False) return parser.parse_args() -def load_workloads(path): - with open(path, "r") as f: - return yaml.load(f, Loader=yaml.FullLoader)['workloads'] +def load_workloads(root_directory): + workloads = [] + for file in Path(root_directory).rglob('*.yaml'): + with open(file, "r") as f: + workloads.extend(yaml.load(f, Loader=yaml.FullLoader)['workloads']) + return workloads def run(args): - workloads = load_workloads(args.workloads_path) + workloads = load_workloads(args.workloads_root_directory) for workload in workloads: workload_name = workload['name'] if args.workload_name is not None and \ @@ -37,6 +41,7 @@ def run(args): log.info("%s STARTED.", workload_name) # Setup. mg_instances = {} + @atexit.register def cleanup(): for mg_instance in mg_instances.values(): diff --git a/tests/e2e/triggers/CMakeLists.txt b/tests/e2e/triggers/CMakeLists.txt new file mode 100644 index 000000000..d1587c56b --- /dev/null +++ b/tests/e2e/triggers/CMakeLists.txt @@ -0,0 +1,11 @@ +add_library(memgraph__e2e__triggers_common STATIC common.hpp common.cpp) +target_link_libraries(memgraph__e2e__triggers_common PUBLIC gflags mgclient mg-utils) + +add_executable(memgraph__e2e__triggers__on_create on_create_triggers.cpp) +target_link_libraries(memgraph__e2e__triggers__on_create memgraph__e2e__triggers_common) + +add_executable(memgraph__e2e__triggers__on_update on_update_triggers.cpp) +target_link_libraries(memgraph__e2e__triggers__on_update memgraph__e2e__triggers_common) + +add_executable(memgraph__e2e__triggers__on_delete on_delete_triggers.cpp) +target_link_libraries(memgraph__e2e__triggers__on_delete memgraph__e2e__triggers_common) diff --git a/tests/e2e/triggers/common.cpp b/tests/e2e/triggers/common.cpp new file mode 100644 index 000000000..17f916217 --- /dev/null +++ b/tests/e2e/triggers/common.cpp @@ -0,0 +1,94 @@ +#include "common.hpp" + +#include +#include + +#include +#include +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); + +std::unique_ptr Connect() { + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + MG_ASSERT(client, "Failed to connect!"); + return client; +} + +void CreateVertex(mg::Client &client, int vertex_id) { + mg::Map parameters{ + {"id", mg::Value{vertex_id}}, + }; + client.Execute(fmt::format("CREATE (n: {} {{ id: $id }})", kVertexLabel), mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void CreateEdge(mg::Client &client, int from_vertex, int to_vertex, int edge_id) { + mg::Map parameters{ + {"from", mg::Value{from_vertex}}, + {"to", mg::Value{to_vertex}}, + {"id", mg::Value{edge_id}}, + }; + client.Execute(fmt::format("MATCH (from: {} {{ id: $from }}), (to: {} {{id: $to }}) " + "CREATE (from)-[r: {} {{id: $id}}]->(to)", + kVertexLabel, kVertexLabel, kEdgeLabel), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +int GetNumberOfAllVertices(mg::Client &client) { + client.Execute("MATCH (n) RETURN COUNT(*)"); + const auto value = client.FetchOne(); + MG_ASSERT(value, "Unexpected error"); + MG_ASSERT(value->size() == 1, "Unexpected number of columns!"); + client.FetchAll(); + MG_ASSERT(value->at(0).type() == mg::Value::Type::Int, "Unexpected type!"); + return value->at(0).ValueInt(); +} + +void WaitForNumberOfAllVertices(mg::Client &client, int number_of_vertices) { + utils::Timer timer{}; + while ((timer.Elapsed().count() <= 0.5) && GetNumberOfAllVertices(client) != number_of_vertices) { + } + CheckNumberOfAllVertices(client, number_of_vertices); +} + +void CheckNumberOfAllVertices(mg::Client &client, int expected_number_of_vertices) { + const auto number_of_vertices = GetNumberOfAllVertices(client); + MG_ASSERT(number_of_vertices == expected_number_of_vertices, "There are {} vertices, expected {}!", + number_of_vertices, expected_number_of_vertices); +} + +std::optional GetVertex(mg::Client &client, std::string_view label, int vertex_id) { + mg::Map parameters{ + {"id", mg::Value{vertex_id}}, + }; + + client.Execute(fmt::format("MATCH (n: {} {{id: $id}}) RETURN n", label), mg::ConstMap{parameters.ptr()}); + const auto result = client.FetchAll(); + MG_ASSERT(result, "Vertex with label {} and id {} cannot be found!", label, vertex_id); + const auto &rows = *result; + MG_ASSERT(rows.size() <= 1, "Unexpected number of vertices with label {} and id {}, found {} vertices", label, + vertex_id, rows.size()); + if (rows.empty()) { + return std::nullopt; + } + + return rows[0][0]; +} + +bool VertexExists(mg::Client &client, std::string_view label, int vertex_id) { + return GetVertex(client, label, vertex_id).has_value(); +} + +void CheckVertexMissing(mg::Client &client, std::string_view label, int vertex_id) { + MG_ASSERT(!VertexExists(client, label, vertex_id), "Not expected vertex exist with label {} and id {}!", label, + vertex_id); +} + +void CheckVertexExists(mg::Client &client, std::string_view label, int vertex_id) { + MG_ASSERT(VertexExists(client, label, vertex_id), "Expected vertex doesn't exist with label {} and id {}!", label, + vertex_id); +} diff --git a/tests/e2e/triggers/common.hpp b/tests/e2e/triggers/common.hpp new file mode 100644 index 000000000..903dd8125 --- /dev/null +++ b/tests/e2e/triggers/common.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include +#include + +#include + +constexpr std::string_view kVertexLabel{"VERTEX"}; +constexpr std::string_view kEdgeLabel{"EDGE"}; + +std::unique_ptr Connect(); +void CreateVertex(mg::Client &client, int vertex_id); +void CreateEdge(mg::Client &client, int from_vertex, int to_vertex, int edge_id); + +int GetNumberOfAllVertices(mg::Client &client); +void WaitForNumberOfAllVertices(mg::Client &client, int number_of_vertices); +void CheckNumberOfAllVertices(mg::Client &client, int expected_number_of_vertices); +std::optional GetVertex(mg::Client &client, std::string_view label, int vertex_id); +bool VertexExists(mg::Client &client, std::string_view label, int vertex_id); +void CheckVertexMissing(mg::Client &client, std::string_view label, int vertex_id); +void CheckVertexExists(mg::Client &client, std::string_view label, int vertex_id); \ No newline at end of file diff --git a/tests/e2e/triggers/on_create_triggers.cpp b/tests/e2e/triggers/on_create_triggers.cpp new file mode 100644 index 000000000..d2699ddcf --- /dev/null +++ b/tests/e2e/triggers/on_create_triggers.cpp @@ -0,0 +1,107 @@ +#include +#include + +#include +#include +#include "common.hpp" +#include "utils/logging.hpp" + +constexpr std::string_view kTriggerCreatedVertexLabel{"CREATED_VERTEX"}; +constexpr std::string_view kTriggerCreatedEdgeLabel{"CREATED_EDGE"}; +constexpr std::string_view kTriggerCreatedObjectLabel{"CREATED_OBJECT"}; + +void CreateOnCreateTriggers(mg::Client &client, bool is_before) { + const std::string_view before_or_after = is_before ? "BEFORE" : "AFTER"; + client.Execute( + fmt::format("CREATE TRIGGER CreatedVerticesTrigger ON () CREATE " + "{} COMMIT " + "EXECUTE " + "UNWIND createdVertices as createdVertex " + "CREATE (n: {} {{ id: createdVertex.id }})", + before_or_after, kTriggerCreatedVertexLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER CreatedEdgesTrigger ON --> CREATE " + "{} COMMIT " + "EXECUTE " + "UNWIND createdEdges as createdEdge " + "CREATE (n: {} {{ id: createdEdge.id }})", + before_or_after, kTriggerCreatedEdgeLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER CreatedObjectsTrigger ON CREATE " + "{} COMMIT " + "EXECUTE " + "UNWIND createdObjects as createdObjectEvent " + "WITH CASE createdObjectEvent.event_type WHEN \"created_vertex\" THEN createdObjectEvent.vertex.id " + "ELSE createdObjectEvent.edge.id END as id " + "CREATE (n: {} {{ id: id }})", + before_or_after, kTriggerCreatedObjectLabel)); + client.DiscardAll(); +} + +void DropOnCreateTriggers(mg::Client &client) { + client.Execute("DROP TRIGGER CreatedVerticesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER CreatedEdgesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER CreatedObjectsTrigger"); + client.DiscardAll(); +} + +int main(int argc, char **argv) { + gflags::SetUsageMessage("Memgraph E2E ON CREATE Triggers"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = Connect(); + + const auto run_create_trigger_tests = [&](bool is_before) { + const std::array vertex_ids{1, 2}; + const int edge_id = 3; + { + CreateOnCreateTriggers(*client, is_before); + client->BeginTransaction(); + for (const auto vertex_id : vertex_ids) { + CreateVertex(*client, vertex_id); + CheckVertexExists(*client, kVertexLabel, vertex_id); + CheckVertexMissing(*client, kTriggerCreatedVertexLabel, vertex_id); + CheckVertexMissing(*client, kTriggerCreatedObjectLabel, vertex_id); + } + CreateEdge(*client, vertex_ids[0], vertex_ids[1], edge_id); + CheckVertexMissing(*client, kTriggerCreatedEdgeLabel, edge_id); + CheckVertexMissing(*client, kTriggerCreatedObjectLabel, edge_id); + client->CommitTransaction(); + + // :VERTEX x 2 + // :CREATED_VERTEX x 2 + // :CREATED_EDGE x 1 + // :CREATED_OBJECT x 3 + constexpr auto kNumberOfExpectedVertices = 8; + + if (is_before) { + CheckNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } else { + WaitForNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } + + for (const auto vertex_id : vertex_ids) { + CheckVertexExists(*client, kTriggerCreatedVertexLabel, vertex_id); + CheckVertexExists(*client, kTriggerCreatedObjectLabel, vertex_id); + } + CheckVertexExists(*client, kTriggerCreatedEdgeLabel, edge_id); + CheckVertexExists(*client, kTriggerCreatedObjectLabel, edge_id); + DropOnCreateTriggers(*client); + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + } + }; + constexpr bool kBeforeCommit = true; + constexpr bool kAfterCommit = false; + run_create_trigger_tests(kBeforeCommit); + run_create_trigger_tests(kAfterCommit); + + return 0; +} diff --git a/tests/e2e/triggers/on_delete_triggers.cpp b/tests/e2e/triggers/on_delete_triggers.cpp new file mode 100644 index 000000000..1c0886863 --- /dev/null +++ b/tests/e2e/triggers/on_delete_triggers.cpp @@ -0,0 +1,133 @@ +#include +#include + +#include +#include +#include "common.hpp" +#include "utils/logging.hpp" + +constexpr std::string_view kTriggerDeletedVertexLabel{"DELETED_VERTEX"}; +constexpr std::string_view kTriggerDeletedEdgeLabel{"DELETED_EDGE"}; +constexpr std::string_view kTriggerDeletedObjectLabel{"DELETED_OBJECT"}; + +void DetachDeleteVertex(mg::Client &client, int vertex_id) { + mg::Map parameters{{"id", mg::Value{vertex_id}}}; + client.Execute(fmt::format("MATCH (n: {} {{id: $id}}) DETACH DELETE n", kVertexLabel), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void DeleteEdge(mg::Client &client, int edge_id) { + mg::Map parameters{{"id", mg::Value{edge_id}}}; + client.Execute(fmt::format("MATCH ()-[r: {} {{id: $id}}]->() DELETE r", kEdgeLabel), mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void CreateOnDeleteTriggers(mg::Client &client, bool is_before) { + const std::string_view before_or_after = is_before ? "BEFORE" : "AFTER"; + client.Execute( + fmt::format("CREATE TRIGGER DeletedVerticesTrigger ON () DELETE " + "{} COMMIT " + "EXECUTE " + "UNWIND deletedVertices as deletedVertex " + "CREATE (n: {} {{ id: deletedVertex.id }})", + before_or_after, kTriggerDeletedVertexLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER DeletedEdgesTrigger ON --> DELETE " + "{} COMMIT " + "EXECUTE " + "UNWIND deletedEdges as deletedEdge " + "CREATE (n: {} {{ id: deletedEdge.id }})", + before_or_after, kTriggerDeletedEdgeLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER DeletedObjectsTrigger ON DELETE " + "{} COMMIT " + "EXECUTE " + "UNWIND deletedObjects as deletedObjectEvent " + "WITH CASE deletedObjectEvent.event_type WHEN \"deleted_vertex\" THEN deletedObjectEvent.vertex.id " + "ELSE deletedObjectEvent.edge.id END as id " + "CREATE (n: {} {{ id: id }})", + before_or_after, kTriggerDeletedObjectLabel)); + client.DiscardAll(); +} + +void DropOnDeleteTriggers(mg::Client &client) { + client.Execute("DROP TRIGGER DeletedVerticesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER DeletedEdgesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER DeletedObjectsTrigger"); + client.DiscardAll(); +} + +struct EdgeInfo { + int from_vertex; + int to_vertex; + int edge_id; +}; + +int main(int argc, char **argv) { + gflags::SetUsageMessage("Memgraph E2E ON DELETE Triggers"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = Connect(); + + const auto run_delete_trigger_tests = [&](bool is_before) { + const std::array vertex_ids{1, 2, 3, 4}; + const std::array edges{EdgeInfo{vertex_ids[0], vertex_ids[1], 5}, EdgeInfo{vertex_ids[2], vertex_ids[3], 6}}; + { + CreateOnDeleteTriggers(*client, is_before); + + client->BeginTransaction(); + for (const auto vertex_id : vertex_ids) { + CreateVertex(*client, vertex_id); + } + for (const auto &edge : edges) { + CreateEdge(*client, edge.from_vertex, edge.to_vertex, edge.edge_id); + } + client->CommitTransaction(); + CheckNumberOfAllVertices(*client, vertex_ids.size()); + + client->BeginTransaction(); + DetachDeleteVertex(*client, vertex_ids[0]); + DeleteEdge(*client, edges[1].edge_id); + client->CommitTransaction(); + + // :VERTEX x 4 + // deleted :VERTEX x -1 + // :DELETED_VERTEX x 1 + // :DELETED_EDGE x 2 + // :DELETED_OBJECT x 3 + constexpr auto kNumberOfExpectedVertices = 9; + + if (is_before) { + CheckNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } else { + WaitForNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } + + CheckVertexExists(*client, kTriggerDeletedVertexLabel, vertex_ids[0]); + CheckVertexExists(*client, kTriggerDeletedObjectLabel, vertex_ids[0]); + + for (const auto &edge : edges) { + CheckVertexExists(*client, kTriggerDeletedEdgeLabel, edge.edge_id); + CheckVertexExists(*client, kTriggerDeletedObjectLabel, edge.edge_id); + } + + DropOnDeleteTriggers(*client); + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + } + }; + constexpr bool kBeforeCommit = true; + constexpr bool kAfterCommit = false; + run_delete_trigger_tests(kBeforeCommit); + run_delete_trigger_tests(kAfterCommit); + + return 0; +} diff --git a/tests/e2e/triggers/on_update_triggers.cpp b/tests/e2e/triggers/on_update_triggers.cpp new file mode 100644 index 000000000..75da49ac8 --- /dev/null +++ b/tests/e2e/triggers/on_update_triggers.cpp @@ -0,0 +1,286 @@ +#include +#include + +#include +#include +#include "common.hpp" +#include "utils/logging.hpp" + +constexpr std::string_view kTriggerUpdatedVertexLabel{"UPDATED_VERTEX"}; +constexpr std::string_view kTriggerUpdatedEdgeLabel{"UPDATED_EDGE"}; +constexpr std::string_view kTriggerUpdatedObjectLabel{"UPDATED_OBJECT"}; +constexpr std::string_view kTriggerSetVertexPropertyLabel{"SET_VERTEX_PROPERTY"}; +constexpr std::string_view kTriggerRemovedVertexPropertyLabel{"REMOVED_VERTEX_PROPERTY"}; +constexpr std::string_view kTriggerSetVertexLabelLabel{"SET_VERTEX_LABEL"}; +constexpr std::string_view kTriggerRemovedVertexLabelLabel{"REMOVED_VERTEX_LABEL"}; +constexpr std::string_view kTriggerSetEdgePropertyLabel{"SET_EDGE_PROPERTY"}; +constexpr std::string_view kTriggerRemovedEdgePropertyLabel{"REMOVED_EDGE_PROPERTY"}; + +void SetVertexProperty(mg::Client &client, int vertex_id, std::string_view property_name, mg::Value value) { + mg::Map parameters{ + {"id", mg::Value{vertex_id}}, + {"value", std::move(value)}, + }; + client.Execute(fmt::format("MATCH (n: {} {{id: $id}}) " + "SET n.{} = $value", + kVertexLabel, property_name), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void SetEdgeProperty(mg::Client &client, int edge_id, std::string_view property_name, mg::Value value) { + mg::Map parameters{ + {"id", mg::Value{edge_id}}, + {"value", std::move(value)}, + }; + client.Execute(fmt::format("MATCH ()-[r: {} {{id: $id}}]->() " + "SET r.{} = $value", + kEdgeLabel, property_name), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void DoVertexLabelOperation(mg::Client &client, int vertex_id, std::string_view label, std::string_view operation) { + mg::Map parameters{{"id", mg::Value{vertex_id}}}; + client.Execute(fmt::format("MATCH (n: {} {{id: $id}}) " + "{} n:{}", + kVertexLabel, operation, label), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void AddVertexLabel(mg::Client &client, int vertex_id, std::string_view label) { + DoVertexLabelOperation(client, vertex_id, label, "SET"); +} + +void RemoveVertexLabel(mg::Client &client, int vertex_id, std::string_view label) { + DoVertexLabelOperation(client, vertex_id, label, "REMOVE"); +} + +void CheckVertexProperty(mg::Client &client, std::string_view label, int vertex_id, std::string_view property_name, + const mg::Value &value) { + const auto vertex = GetVertex(client, label, vertex_id); + MG_ASSERT(vertex, "Cannot check property of not existing vertex with label {} and id {}", label, vertex_id); + + const auto properties = vertex->ValueNode().properties(); + const auto prop_it = properties.find(property_name); + MG_ASSERT(prop_it != properties.end(), "Vertex with label {} and id {} doesn't have expected property {}!", label, + vertex_id, property_name); + MG_ASSERT((*prop_it).second == value, "Property {} of vertex with label {} and id {} doesn't have expected value!", + property_name, label, vertex_id); +} + +void CreateOnUpdateTriggers(mg::Client &client, bool is_before) { + const std::string_view before_or_after = is_before ? "BEFORE" : "AFTER"; + client.Execute( + fmt::format("CREATE TRIGGER UpdatedVerticesTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND updatedVertices as updateVertexEvent " + "CREATE (n: {} {{ id: updateVertexEvent.vertex.id , event_type: updateVertexEvent.event_type }})", + before_or_after, kTriggerUpdatedVertexLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER UpdatedEdgesTrigger ON --> UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND updatedEdges as updatedEdgeEvent " + "CREATE (n: {} {{ id: updatedEdgeEvent.edge.id, event_type: updatedEdgeEvent.event_type }})", + before_or_after, kTriggerUpdatedEdgeLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER UpdatedObjectsTrigger ON UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND updatedObjects as updatedObject " + "WITH CASE updatedObject.event_type " + "WHEN \"set_edge_property\" THEN updatedObject.edge.id " + "WHEN \"removed_edge_property\" THEN updatedObject.edge.id " + "ELSE updatedObject.vertex.id END as id, updatedObject " + "CREATE (n: {} {{ id: id, event_type: updatedObject.event_type }})", + before_or_after, kTriggerUpdatedObjectLabel)); + client.DiscardAll(); + + client.Execute( + fmt::format("CREATE TRIGGER SetVertexPropertiesTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND assignedVertexProperties as assignedVertexProperty " + "CREATE (n: {} {{ id: assignedVertexProperty.vertex.id }})", + before_or_after, kTriggerSetVertexPropertyLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER RemovedVertexPropertiesTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND removedVertexProperties as removedVertexProperty " + "CREATE (n: {} {{ id: removedVertexProperty.vertex.id }})", + before_or_after, kTriggerRemovedVertexPropertyLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER SetVertexLabelsTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND assignedVertexLabels as assignedVertexLabel " + "UNWIND assignedVertexLabel.vertices as vertex " + "CREATE (n: {} {{ id: vertex.id }})", + before_or_after, kTriggerSetVertexLabelLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER RemovedVertexLabelTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND removedVertexLabels as removedVertexLabel " + "UNWIND removedVertexLabel.vertices as vertex " + "CREATE (n: {} {{ id: vertex.id }})", + before_or_after, kTriggerRemovedVertexLabelLabel)); + client.DiscardAll(); + + client.Execute( + fmt::format("CREATE TRIGGER SetEdgePropertiesTrigger ON --> UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND assignedEdgeProperties as assignedEdgeProperty " + "CREATE (n: {} {{ id: assignedEdgeProperty.edge.id }})", + before_or_after, kTriggerSetEdgePropertyLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER RemovedEdgePropertiesTrigger ON --> UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND removedEdgeProperties as removedEdgeProperty " + "CREATE (n: {} {{ id: removedEdgeProperty.edge.id }})", + before_or_after, kTriggerRemovedEdgePropertyLabel)); + client.DiscardAll(); +} + +void DropOnUpdateTriggers(mg::Client &client) { + client.Execute("DROP TRIGGER UpdatedVerticesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER UpdatedEdgesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER UpdatedObjectsTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER SetVertexPropertiesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER RemoveVertexPropertiesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER SetVertexLabelsTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER RemovedVertexLabelTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER SetEdgePropertiesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER RemovedEdgePropertiesTrigger"); + client.DiscardAll(); +} + +struct EdgeInfo { + int from_vertex; + int to_vertex; + int edge_id; +}; + +int main(int argc, char **argv) { + constexpr std::string_view kExtraLabel = "EXTRA_LABEL"; + constexpr std::string_view kUpdatedProperty = "updateProperty"; + gflags::SetUsageMessage("Memgraph E2E ON UPDATE Triggers"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = Connect(); + + const auto run_update_trigger_tests = [&](bool is_before) { + const std::array vertex_ids{1, 2, 3, 4}; + const std::array edges{EdgeInfo{vertex_ids[0], vertex_ids[1], 5}, EdgeInfo{vertex_ids[2], vertex_ids[3], 6}}; + { + CreateOnUpdateTriggers(*client, is_before); + + client->BeginTransaction(); + for (const auto vertex_id : vertex_ids) { + CreateVertex(*client, vertex_id); + SetVertexProperty(*client, vertex_id, kUpdatedProperty, mg::Value(vertex_id)); + AddVertexLabel(*client, vertex_id, kExtraLabel); + } + for (const auto &edge : edges) { + CreateEdge(*client, edge.from_vertex, edge.to_vertex, edge.edge_id); + SetEdgeProperty(*client, edge.edge_id, kUpdatedProperty, mg::Value(edge.edge_id)); + } + client->CommitTransaction(); + CheckNumberOfAllVertices(*client, vertex_ids.size()); + + client->BeginTransaction(); + SetVertexProperty(*client, vertex_ids[0], kUpdatedProperty, mg::Value(-1)); + SetVertexProperty(*client, vertex_ids[1], kUpdatedProperty, mg::Value()); + AddVertexLabel(*client, vertex_ids[2], "NEW_LABEL"); + RemoveVertexLabel(*client, vertex_ids[3], kExtraLabel); + SetEdgeProperty(*client, edges[0].edge_id, kUpdatedProperty, mg::Value(-1)); + SetEdgeProperty(*client, edges[1].edge_id, kUpdatedProperty, mg::Value()); + CheckNumberOfAllVertices(*client, vertex_ids.size()); + client->CommitTransaction(); + + // :VERTEX x 4 + // :UPDATED_VERTEX x 4 + // :UPDATED_EDGE x 2 + // :UPDATED_OBJECT x 6 + // :SET_VERTEX_PROPERTY x 1 + // :REMOVED_VERTEX_PROPERTY x 1 + // :SET_VERTEX_LABEL x 1 + // :REMOVED_VERTEX_LABEL x 1 + // :SET_EDGE_PROPERTY x 1 + // :REMOVED_EDGE_PROPERTY x 1 + constexpr auto kNumberOfExpectedVertices = 22; + + if (is_before) { + CheckNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } else { + WaitForNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } + + CheckVertexProperty(*client, kTriggerUpdatedVertexLabel, vertex_ids[0], "event_type", + mg::Value{"set_vertex_property"}); + CheckVertexProperty(*client, kTriggerUpdatedVertexLabel, vertex_ids[1], "event_type", + mg::Value{"removed_vertex_property"}); + CheckVertexProperty(*client, kTriggerUpdatedVertexLabel, vertex_ids[2], "event_type", + mg::Value{"set_vertex_label"}); + CheckVertexProperty(*client, kTriggerUpdatedVertexLabel, vertex_ids[3], "event_type", + mg::Value{"removed_vertex_label"}); + CheckVertexProperty(*client, kTriggerUpdatedEdgeLabel, edges[0].edge_id, "event_type", + mg::Value{"set_edge_property"}); + CheckVertexProperty(*client, kTriggerUpdatedEdgeLabel, edges[1].edge_id, "event_type", + mg::Value{"removed_edge_property"}); + + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, vertex_ids[0], "event_type", + mg::Value{"set_vertex_property"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, vertex_ids[1], "event_type", + mg::Value{"removed_vertex_property"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, vertex_ids[2], "event_type", + mg::Value{"set_vertex_label"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, vertex_ids[3], "event_type", + mg::Value{"removed_vertex_label"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, edges[0].edge_id, "event_type", + mg::Value{"set_edge_property"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, edges[1].edge_id, "event_type", + mg::Value{"removed_edge_property"}); + + CheckVertexExists(*client, kTriggerSetVertexPropertyLabel, vertex_ids[0]); + CheckVertexExists(*client, kTriggerRemovedVertexPropertyLabel, vertex_ids[1]); + CheckVertexExists(*client, kTriggerSetVertexLabelLabel, vertex_ids[2]); + CheckVertexExists(*client, kTriggerRemovedVertexLabelLabel, vertex_ids[3]); + CheckVertexExists(*client, kTriggerSetEdgePropertyLabel, edges[0].edge_id); + CheckVertexExists(*client, kTriggerRemovedEdgePropertyLabel, edges[1].edge_id); + + DropOnUpdateTriggers(*client); + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + } + }; + constexpr bool kBeforeCommit = true; + constexpr bool kAfterCommit = false; + run_update_trigger_tests(kBeforeCommit); + run_update_trigger_tests(kAfterCommit); + + return 0; +} diff --git a/tests/e2e/triggers/workloads.yaml b/tests/e2e/triggers/workloads.yaml new file mode 100644 index 000000000..55d82d485 --- /dev/null +++ b/tests/e2e/triggers/workloads.yaml @@ -0,0 +1,24 @@ +bolt_port: &bolt_port "7687" +template_cluster: &template_cluster + cluster: + main: + args: ["--bolt-port", *bolt_port, "--log-level=TRACE"] + log_file: "triggers-e2e.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "ON CREATE Triggers" + binary: "tests/e2e/triggers/memgraph__e2e__triggers__on_create" + args: ["--bolt-port", *bolt_port] + <<: *template_cluster + - name: "ON UPDATE Triggers" + binary: "tests/e2e/triggers/memgraph__e2e__triggers__on_update" + args: ["--bolt-port", *bolt_port] + <<: *template_cluster + - name: "ON DELETE Triggers" + binary: "tests/e2e/triggers/memgraph__e2e__triggers__on_delete" + args: ["--bolt-port", *bolt_port] + <<: *template_cluster + + diff --git a/tests/unit/query_trigger.cpp b/tests/unit/query_trigger.cpp index a5bb3ad91..e14822208 100644 --- a/tests/unit/query_trigger.cpp +++ b/tests/unit/query_trigger.cpp @@ -34,15 +34,23 @@ void CheckTypedValueSize(const query::TriggerContext &trigger_context, const que ASSERT_EQ(typed_values.ValueList().size(), expected_size); }; -void CheckLabelMap(const query::TriggerContext &trigger_context, const query::TriggerIdentifierTag tag, - const size_t expected, query::DbAccessor &dba) { +void CheckLabelList(const query::TriggerContext &trigger_context, const query::TriggerIdentifierTag tag, + const size_t expected, query::DbAccessor &dba) { auto typed_values = trigger_context.GetTypedValue(tag, &dba); - ASSERT_TRUE(typed_values.IsMap()); - auto &typed_values_map = typed_values.ValueMap(); + ASSERT_TRUE(typed_values.IsList()); + const auto &label_maps = typed_values.ValueList(); size_t value_count = 0; - for (const auto &[label, values] : typed_values_map) { - ASSERT_TRUE(values.IsList()); - value_count += values.ValueList().size(); + for (const auto &label_map : label_maps) { + ASSERT_TRUE(label_map.IsMap()); + const auto &typed_values_map = label_map.ValueMap(); + ASSERT_EQ(typed_values_map.size(), 2); + const auto label_it = typed_values_map.find("label"); + ASSERT_NE(label_it, typed_values_map.end()); + ASSERT_TRUE(label_it->second.IsString()); + const auto vertices_it = typed_values_map.find("vertices"); + ASSERT_NE(vertices_it, typed_values_map.end()); + ASSERT_TRUE(vertices_it->second.IsList()); + value_count += vertices_it->second.ValueList().size(); } ASSERT_EQ(value_count, expected); }; @@ -181,8 +189,8 @@ TEST_F(TriggerContextTest, ValidObjectsTest) { CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, vertex_count, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, edge_count, dba); - CheckLabelMap(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, vertex_count, dba); - CheckLabelMap(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, vertex_count, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, vertex_count, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, vertex_count, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 4 * vertex_count, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 2 * edge_count, dba); @@ -223,8 +231,8 @@ TEST_F(TriggerContextTest, ValidObjectsTest) { CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, vertex_count, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, edge_count, dba); - CheckLabelMap(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, vertex_count, dba); - CheckLabelMap(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, vertex_count, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, vertex_count, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, vertex_count, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 4 * vertex_count, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 2 * edge_count, dba); @@ -282,8 +290,8 @@ TEST_F(TriggerContextTest, ReturnCreateOnlyEvent) { CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, 0, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, 0, dba); - CheckLabelMap(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, 0, dba); - CheckLabelMap(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, 0, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, 0, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, 0, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 0, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 0, dba); From 1abee1ed3af8bca4813ddb1b981a14926e8ccaac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 24 May 2021 09:31:18 +0200 Subject: [PATCH 48/63] Use single thread for after triggers (#154) --- src/query/interpreter.cpp | 21 +++++++++++++-------- src/query/interpreter.hpp | 3 +-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index dbd8c1828..c67b5a91f 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -1698,15 +1698,20 @@ void Interpreter::Commit() { } } + // The ordered execution of after commit triggers is heavily depending on the exclusiveness of db_accessor_->Commit(): + // only one of the transactions can be commiting at the same time, so when the commit is finished, that transaction + // probably will schedule its after commit triggers, because the other transactions that want to commit are still + // waiting for commiting or one of them just started commiting its changes. + // This means the ordered execution of after commit triggers are not guaranteed. if (trigger_context && interpreter_context_->trigger_store->AfterCommitTriggers().size() > 0) { - background_thread_.AddTask([trigger_context = std::move(*trigger_context), - interpreter_context = this->interpreter_context_, - user_transaction = std::shared_ptr(std::move(db_accessor_))]() mutable { - RunTriggersIndividually(interpreter_context->trigger_store->AfterCommitTriggers(), interpreter_context, - std::move(trigger_context)); - user_transaction->FinalizeTransaction(); - SPDLOG_DEBUG("Finished executing after commit triggers"); // NOLINT(bugprone-lambda-function-name) - }); + interpreter_context_->after_commit_trigger_pool.AddTask( + [trigger_context = std::move(*trigger_context), interpreter_context = this->interpreter_context_, + user_transaction = std::shared_ptr(std::move(db_accessor_))]() mutable { + RunTriggersIndividually(interpreter_context->trigger_store->AfterCommitTriggers(), interpreter_context, + std::move(trigger_context)); + user_transaction->FinalizeTransaction(); + SPDLOG_DEBUG("Finished executing after commit triggers"); // NOLINT(bugprone-lambda-function-name) + }); } execution_db_accessor_.reset(); diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index bd76731db..f443caf89 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -169,6 +169,7 @@ struct InterpreterContext { utils::SkipList plan_cache; std::optional trigger_store; + utils::ThreadPool after_commit_trigger_pool{1}; }; /// Function that is used to tell all active interpreters that they should stop @@ -305,8 +306,6 @@ class Interpreter final { bool in_explicit_transaction_{false}; bool expect_rollback_{false}; - utils::ThreadPool background_thread_{1}; - PreparedQuery PrepareTransactionQuery(std::string_view query_upper); void Commit(); void AdvanceCommand(); From e8a1d15a55580732ba52820859ce8f6cfa7cfb62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 25 May 2021 14:18:27 +0200 Subject: [PATCH 49/63] Add filtering based on registered event types (#155) * Add filtering to TriggerContextCollector * Add all predefined variable to ANY triggers * Make variable names consistent with event types --- CHANGELOG.md | 1 + src/query/CMakeLists.txt | 1 + src/query/interpreter.cpp | 75 +-- src/query/plan/operator.cpp | 23 +- src/query/trigger.cpp | 442 +---------------- src/query/trigger.hpp | 364 +------------- src/query/trigger_context.cpp | 549 ++++++++++++++++++++++ src/query/trigger_context.hpp | 353 ++++++++++++++ tests/e2e/triggers/on_update_triggers.cpp | 6 +- tests/unit/query_trigger.cpp | 294 +++++++++++- 10 files changed, 1245 insertions(+), 863 deletions(-) create mode 100644 src/query/trigger_context.cpp create mode 100644 src/query/trigger_context.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 95fac9381..e281fe50d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Major Feature and Improvements +* Added triggers. * Replaced mg_client with mgconsole ### Bug Fixes diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index 5c692e44e..b41e6380c 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -33,6 +33,7 @@ set(mg_query_sources procedure/py_module.cpp serialization/property_value.cpp trigger.cpp + trigger_context.cpp typed_value.cpp) add_library(mg-query STATIC ${mg_query_sources}) diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index c67b5a91f..67fa4b279 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -606,58 +606,6 @@ InterpreterContext::InterpreterContext(storage::Storage *db, const std::filesyst Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_context_(interpreter_context) { MG_ASSERT(interpreter_context_, "Interpreter context must not be NULL"); - // try { - // { - // auto storage_acc = interpreter_context_->db->Access(); - // DbAccessor dba(&storage_acc); - // auto triggers_acc = interpreter_context_->before_commit_triggers.access(); - // triggers_acc.insert(Trigger{"BeforeDelete", - // "UNWIND deletedVertices as u CREATE(:DELETED_VERTEX {id: id(u) + 10})", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::VERTEX_DELETE}); - // triggers_acc.insert(Trigger{"BeforeUpdatePropertyi", - // "UNWIND assignedVertexProperties as u SET u.vertex.two = u.new", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::VERTEX_UPDATE}); - // triggers_acc.insert(Trigger{"BeforeDeleteEdge", "UNWIND deletedEdges as u CREATE(:DELETED_EDGE {id: id(u) +10}) - // ", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::EDGE_DELETE}); - // // triggers_acc.insert(Trigger{"BeforeDelete2", "UNWIND deletedEdges as u SET u.deleted = 0", - // // &interpreter_context_->ast_cache, &dba, - // // &interpreter_context_->antlr_lock}); - // triggers_acc.insert(Trigger{"BeforeDeleteProcedure", - // "CALL script.procedure('VERTEX_UPDATE', updatedVertices) YIELD * RETURN *", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::VERTEX_UPDATE}); - // triggers_acc.insert(Trigger{"BeforeCreator", "UNWIND createdVertices as u SET u.before = id(u) + 10", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::VERTEX_CREATE}); - // triggers_acc.insert(Trigger{"BeforeCreatorEdge", "UNWIND createdEdges as u SET u.before = id(u) + 10", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::EDGE_CREATE}); - // triggers_acc.insert(Trigger{"BeforeSetLabelProcedure", - // "CALL label.procedure('VERTEX_UPDATE', assignedVertexLabels) YIELD * RETURN *", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::VERTEX_UPDATE}); - // } - // { - // auto storage_acc = interpreter_context->db->Access(); - // DbAccessor dba(&storage_acc); - // auto triggers_acc = interpreter_context->after_commit_triggers.access(); - // triggers_acc.insert(Trigger{"AfterDelete", "UNWIND deletedVertices as u CREATE(:DELETED {id: u.id + 100})", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::VERTEX_DELETE}); - // triggers_acc.insert(Trigger{"AfterCreator", "UNWIND createdVertices as u SET u.after = u.id + 100", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, - // TriggerEventType::VERTEX_CREATE}); - // triggers_acc.insert(Trigger{ - // "AfterUpdateProcedure", "CALL script.procedure('UPDATE',updatedObjects) YIELD * RETURN *", - // &interpreter_context_->ast_cache, &dba, &interpreter_context_->antlr_lock, TriggerEventType::UPDATE}); - // } - // } catch (const utils::BasicException &e) { - // spdlog::critical("Failed to create a trigger because: {}", e.what()); - // } } PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) { @@ -675,7 +623,7 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) execution_db_accessor_.emplace(db_accessor_.get()); if (interpreter_context_->trigger_store->HasTriggers()) { - trigger_context_collector_.emplace(); + trigger_context_collector_.emplace(interpreter_context_->trigger_store->GetEventTypes()); } }; } else if (query_upper == "COMMIT") { @@ -1504,7 +1452,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, execution_db_accessor_.emplace(db_accessor_.get()); if (utils::Downcast(parsed_query.query) && interpreter_context_->trigger_store->HasTriggers()) { - trigger_context_collector_.emplace(); + trigger_context_collector_.emplace(interpreter_context_->trigger_store->GetEventTypes()); } } @@ -1648,6 +1596,7 @@ void Interpreter::Commit() { std::optional trigger_context = std::nullopt; if (trigger_context_collector_) { trigger_context.emplace(std::move(*trigger_context_collector_).TransformToTriggerContext()); + trigger_context_collector_.reset(); } if (trigger_context) { @@ -1667,6 +1616,12 @@ void Interpreter::Commit() { SPDLOG_DEBUG("Finished executing before commit triggers"); } + const auto reset_necessary_members = [this]() { + execution_db_accessor_.reset(); + db_accessor_.reset(); + trigger_context_collector_.reset(); + }; + auto maybe_constraint_violation = db_accessor_->Commit(); if (maybe_constraint_violation.HasError()) { const auto &constraint_violation = maybe_constraint_violation.GetError(); @@ -1675,9 +1630,7 @@ void Interpreter::Commit() { auto label_name = execution_db_accessor_->LabelToName(constraint_violation.label); MG_ASSERT(constraint_violation.properties.size() == 1U); auto property_name = execution_db_accessor_->PropertyToName(*constraint_violation.properties.begin()); - execution_db_accessor_.reset(); - db_accessor_.reset(); - trigger_context_collector_.reset(); + reset_necessary_members(); throw QueryException("Unable to commit due to existence constraint violation on :{}({})", label_name, property_name); break; @@ -1688,9 +1641,7 @@ void Interpreter::Commit() { utils::PrintIterable( property_names_stream, constraint_violation.properties, ", ", [this](auto &stream, const auto &prop) { stream << execution_db_accessor_->PropertyToName(prop); }); - execution_db_accessor_.reset(); - db_accessor_.reset(); - trigger_context_collector_.reset(); + reset_necessary_members(); throw QueryException("Unable to commit due to unique constraint violation on :{}({})", label_name, property_names_stream.str()); break; @@ -1714,9 +1665,7 @@ void Interpreter::Commit() { }); } - execution_db_accessor_.reset(); - db_accessor_.reset(); - trigger_context_collector_.reset(); + reset_necessary_members(); SPDLOG_DEBUG("Finished comitting the transaction"); } diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 49698bc20..d29e980b8 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -1873,7 +1873,8 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when deleting a node."); } } - if (context.trigger_context_collector && res.GetValue()) { + if (context.trigger_context_collector && + context.trigger_context_collector->ShouldRegisterDeletedObject() && res.GetValue()) { context.trigger_context_collector->RegisterDeletedObject(res.GetValue()->first); for (const auto &deleted_edge : res.GetValue()->second) { context.trigger_context_collector->RegisterDeletedObject(deleted_edge); @@ -2025,6 +2026,9 @@ template void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetProperties::Op op, ExecutionContext *context) { std::optional> old_values; + const bool should_register_change = + context->trigger_context_collector && + context->trigger_context_collector->ShouldRegisterObjectPropertyChange(); if (op == SetProperties::Op::REPLACE) { auto maybe_value = record->ClearProperties(); if (maybe_value.HasError()) { @@ -2041,7 +2045,7 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr } } - if (context->trigger_context_collector) { + if (should_register_change) { old_values.emplace(std::move(*maybe_value)); } } @@ -2063,10 +2067,10 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr return *maybe_props; }; - auto register_set_property = [&](auto returned_old_value, auto key, auto new_value) { + auto register_set_property = [&](auto &&returned_old_value, auto key, auto &&new_value) { auto old_value = [&]() -> storage::PropertyValue { if (!old_values) { - return std::move(returned_old_value); + return std::forward(returned_old_value); } if (auto it = old_values->find(key); it != old_values->end()) { @@ -2075,8 +2079,9 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr return {}; }(); - context->trigger_context_collector->RegisterSetObjectProperty(*record, key, TypedValue(std::move(old_value)), - TypedValue(std::move(new_value))); + + context->trigger_context_collector->RegisterSetObjectProperty( + *record, key, TypedValue(std::move(old_value)), TypedValue(std::forward(new_value))); }; auto set_props = [&, record](auto properties) { @@ -2096,7 +2101,7 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr } } - if (context->trigger_context_collector) { + if (should_register_change) { register_set_property(std::move(*maybe_error), kv.first, std::move(kv.second)); } } @@ -2113,7 +2118,7 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr for (const auto &kv : rhs.ValueMap()) { auto key = context->db_accessor->NameToProperty(kv.first); auto old_value = PropsSetChecked(record, key, kv.second); - if (context->trigger_context_collector) { + if (should_register_change) { register_set_property(std::move(old_value), key, kv.second); } } @@ -2125,7 +2130,7 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr "map."); } - if (context->trigger_context_collector && old_values) { + if (should_register_change && old_values) { // register removed properties for (auto &[property_id, property_value] : *old_values) { context->trigger_context_collector->RegisterRemovedObjectProperty(*record, property_id, diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 9f7edba84..3f4b0ccb4 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -14,7 +14,6 @@ namespace query { namespace { - auto IdentifierString(const TriggerIdentifierTag tag) noexcept { switch (tag) { case TriggerIdentifierTag::CREATED_VERTICES: @@ -36,10 +35,10 @@ auto IdentifierString(const TriggerIdentifierTag tag) noexcept { return "deletedObjects"; case TriggerIdentifierTag::SET_VERTEX_PROPERTIES: - return "assignedVertexProperties"; + return "setVertexProperties"; case TriggerIdentifierTag::SET_EDGE_PROPERTIES: - return "assignedEdgeProperties"; + return "setEdgeProperties"; case TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES: return "removedVertexProperties"; @@ -48,7 +47,7 @@ auto IdentifierString(const TriggerIdentifierTag tag) noexcept { return "removedEdgeProperties"; case TriggerIdentifierTag::SET_VERTEX_LABELS: - return "assignedVertexLabels"; + return "setVertexLabels"; case TriggerIdentifierTag::REMOVED_VERTEX_LABELS: return "removedVertexLabels"; @@ -87,7 +86,13 @@ std::vector> GetPredefinedIdentifier switch (event_type) { case EventType::ANY: - return {}; + return TagsToIdentifiers( + IdentifierTag::CREATED_VERTICES, IdentifierTag::CREATED_EDGES, IdentifierTag::CREATED_OBJECTS, + IdentifierTag::DELETED_VERTICES, IdentifierTag::DELETED_EDGES, IdentifierTag::DELETED_OBJECTS, + IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::SET_EDGE_PROPERTIES, + IdentifierTag::REMOVED_VERTEX_PROPERTIES, IdentifierTag::REMOVED_EDGE_PROPERTIES, + IdentifierTag::SET_VERTEX_LABELS, IdentifierTag::REMOVED_VERTEX_LABELS, IdentifierTag::UPDATED_VERTICES, + IdentifierTag::UPDATED_EDGES, IdentifierTag::UPDATED_OBJECTS); case EventType::CREATE: return TagsToIdentifiers(IdentifierTag::CREATED_OBJECTS); @@ -120,420 +125,7 @@ std::vector> GetPredefinedIdentifier IdentifierTag::UPDATED_EDGES); } } - -template -concept WithToMap = requires(const T value, DbAccessor *dba) { - { value.ToMap(dba) } - ->std::same_as>; -}; - -template -TypedValue ToTypedValue(const T &value, DbAccessor *dba) { - return TypedValue{value.ToMap(dba)}; -} - -template -TypedValue ToTypedValue(const detail::CreatedObject &created_object, [[maybe_unused]] DbAccessor *dba) { - return TypedValue{created_object.object}; -} - -template -TypedValue ToTypedValue(const detail::DeletedObject &deleted_object, [[maybe_unused]] DbAccessor *dba) { - return TypedValue{deleted_object.object}; -} - -template -concept WithIsValid = requires(const T value) { - { value.IsValid() } - ->std::same_as; -}; - -template -concept ConvertableToTypedValue = requires(T value, DbAccessor *dba) { - { ToTypedValue(value, dba) } - ->std::same_as; -} -&&WithIsValid; - -template -concept LabelUpdateContext = utils::SameAsAnyOf; - -template -TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) { - std::unordered_map> vertices_by_labels; - - for (const auto &value : values) { - if (value.IsValid()) { - vertices_by_labels[value.label_id].emplace_back(value.object); - } - } - - TypedValue result{std::vector{}}; - auto &typed_values = result.ValueList(); - for (auto &[label_id, vertices] : vertices_by_labels) { - typed_values.emplace_back(std::map{ - {std::string{"label"}, TypedValue(dba->LabelToName(label_id))}, - {std::string{"vertices"}, TypedValue(std::move(vertices))}, - }); - } - - return result; -} - -template -TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) requires(!LabelUpdateContext) { - TypedValue result{std::vector{}}; - auto &typed_values = result.ValueList(); - typed_values.reserve(values.size()); - - for (const auto &value : values) { - if (value.IsValid()) { - typed_values.push_back(ToTypedValue(value, dba)); - } - } - - return result; -} - -template -const char *TypeToString() { - if constexpr (std::same_as>) { - return "created_vertex"; - } else if constexpr (std::same_as>) { - return "created_edge"; - } else if constexpr (std::same_as>) { - return "deleted_vertex"; - } else if constexpr (std::same_as>) { - return "deleted_edge"; - } else if constexpr (std::same_as>) { - return "set_vertex_property"; - } else if constexpr (std::same_as>) { - return "set_edge_property"; - } else if constexpr (std::same_as>) { - return "removed_vertex_property"; - } else if constexpr (std::same_as>) { - return "removed_edge_property"; - } else if constexpr (std::same_as) { - return "set_vertex_label"; - } else if constexpr (std::same_as) { - return "removed_vertex_label"; - } -} - -template -concept ContextInfo = WithToMap &&WithIsValid; - -template -TypedValue Concatenate(DbAccessor *dba, const std::vector &...args) { - const auto size = (args.size() + ...); - TypedValue result{std::vector{}}; - auto &concatenated = result.ValueList(); - concatenated.reserve(size); - - const auto add_to_concatenated = [&](const std::vector &values) { - for (const auto &value : values) { - if (value.IsValid()) { - auto map = value.ToMap(dba); - map["event_type"] = TypeToString(); - concatenated.emplace_back(std::move(map)); - } - } - }; - - (add_to_concatenated(args), ...); - - return result; -} - -template -concept WithEmpty = requires(const T value) { - { value.empty() } - ->std::same_as; -}; - -template -bool AnyContainsValue(const TContainer &...value_containers) { - return (!value_containers.empty() || ...); -} } // namespace -namespace detail { -bool SetVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } - -std::map SetVertexLabel::ToMap(DbAccessor *dba) const { - return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; -} - -bool RemovedVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } - -std::map RemovedVertexLabel::ToMap(DbAccessor *dba) const { - return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; -} -} // namespace detail - -const char *TriggerEventTypeToString(const TriggerEventType event_type) { - switch (event_type) { - case TriggerEventType::ANY: - return "ANY"; - - case TriggerEventType::CREATE: - return "CREATE"; - - case TriggerEventType::VERTEX_CREATE: - return "() CREATE"; - - case TriggerEventType::EDGE_CREATE: - return "--> CREATE"; - - case TriggerEventType::DELETE: - return "DELETE"; - - case TriggerEventType::VERTEX_DELETE: - return "() DELETE"; - - case TriggerEventType::EDGE_DELETE: - return "--> DELETE"; - - case TriggerEventType::UPDATE: - return "UPDATE"; - - case TriggerEventType::VERTEX_UPDATE: - return "() UPDATE"; - - case TriggerEventType::EDGE_UPDATE: - return "--> UPDATE"; - } -} - -void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { - { - // adapt created_vertices_ - auto it = created_vertices_.begin(); - for (auto &created_vertex : created_vertices_) { - if (auto maybe_vertex = accessor->FindVertex(created_vertex.object.Gid(), storage::View::OLD); maybe_vertex) { - *it = detail::CreatedObject{*maybe_vertex}; - ++it; - } - } - created_vertices_.erase(it, created_vertices_.end()); - } - - // deleted_vertices_ should keep the transaction context of the transaction which deleted it - // because no other transaction can modify an object after it's deleted so it should be the - // latest state of the object - - const auto adapt_context_with_vertex = [accessor](auto *values) { - auto it = values->begin(); - for (auto &value : *values) { - if (auto maybe_vertex = accessor->FindVertex(value.object.Gid(), storage::View::OLD); maybe_vertex) { - *it = std::move(value); - it->object = *maybe_vertex; - ++it; - } - } - values->erase(it, values->end()); - }; - - adapt_context_with_vertex(&set_vertex_properties_); - adapt_context_with_vertex(&removed_vertex_properties_); - adapt_context_with_vertex(&set_vertex_labels_); - adapt_context_with_vertex(&removed_vertex_labels_); - - { - // adapt created_edges - auto it = created_edges_.begin(); - for (auto &created_edge : created_edges_) { - const auto maybe_from_vertex = accessor->FindVertex(created_edge.object.From().Gid(), storage::View::OLD); - if (!maybe_from_vertex) { - continue; - } - auto maybe_out_edges = maybe_from_vertex->OutEdges(storage::View::OLD); - MG_ASSERT(maybe_out_edges.HasValue()); - const auto edge_gid = created_edge.object.Gid(); - for (const auto &edge : *maybe_out_edges) { - if (edge.Gid() == edge_gid) { - *it = detail::CreatedObject{edge}; - ++it; - } - } - } - created_edges_.erase(it, created_edges_.end()); - } - - // deleted_edges_ should keep the transaction context of the transaction which deleted it - // because no other transaction can modify an object after it's deleted so it should be the - // latest state of the object - - const auto adapt_context_with_edge = [accessor](auto *values) { - auto it = values->begin(); - for (const auto &value : *values) { - if (auto maybe_vertex = accessor->FindVertex(value.object.From().Gid(), storage::View::OLD); maybe_vertex) { - auto maybe_out_edges = maybe_vertex->OutEdges(storage::View::OLD); - MG_ASSERT(maybe_out_edges.HasValue()); - for (const auto &edge : *maybe_out_edges) { - if (edge.Gid() == value.object.Gid()) { - *it = std::move(value); - it->object = edge; - ++it; - break; - } - } - } - } - values->erase(it, values->end()); - }; - - adapt_context_with_edge(&set_edge_properties_); - adapt_context_with_edge(&removed_edge_properties_); -} - -TypedValue TriggerContext::GetTypedValue(const TriggerIdentifierTag tag, DbAccessor *dba) const { - switch (tag) { - case TriggerIdentifierTag::CREATED_VERTICES: - return ToTypedValue(created_vertices_, dba); - - case TriggerIdentifierTag::CREATED_EDGES: - return ToTypedValue(created_edges_, dba); - - case TriggerIdentifierTag::CREATED_OBJECTS: - return Concatenate(dba, created_vertices_, created_edges_); - - case TriggerIdentifierTag::DELETED_VERTICES: - return ToTypedValue(deleted_vertices_, dba); - - case TriggerIdentifierTag::DELETED_EDGES: - return ToTypedValue(deleted_edges_, dba); - - case TriggerIdentifierTag::DELETED_OBJECTS: - return Concatenate(dba, deleted_vertices_, deleted_edges_); - - case TriggerIdentifierTag::SET_VERTEX_PROPERTIES: - return ToTypedValue(set_vertex_properties_, dba); - - case TriggerIdentifierTag::SET_EDGE_PROPERTIES: - return ToTypedValue(set_edge_properties_, dba); - - case TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES: - return ToTypedValue(removed_vertex_properties_, dba); - - case TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES: - return ToTypedValue(removed_edge_properties_, dba); - - case TriggerIdentifierTag::SET_VERTEX_LABELS: - return ToTypedValue(set_vertex_labels_, dba); - - case TriggerIdentifierTag::REMOVED_VERTEX_LABELS: - return ToTypedValue(removed_vertex_labels_, dba); - - case TriggerIdentifierTag::UPDATED_VERTICES: - return Concatenate(dba, set_vertex_properties_, removed_vertex_properties_, set_vertex_labels_, - removed_vertex_labels_); - - case TriggerIdentifierTag::UPDATED_EDGES: - return Concatenate(dba, set_edge_properties_, removed_edge_properties_); - - case TriggerIdentifierTag::UPDATED_OBJECTS: - return Concatenate(dba, set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, - removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); - } -} - -bool TriggerContext::ShouldEventTrigger(const TriggerEventType event_type) const { - using EventType = TriggerEventType; - switch (event_type) { - case EventType::ANY: - return true; - - case EventType::CREATE: - return AnyContainsValue(created_vertices_, created_edges_); - - case EventType::VERTEX_CREATE: - return AnyContainsValue(created_vertices_); - - case EventType::EDGE_CREATE: - return AnyContainsValue(created_edges_); - - case EventType::DELETE: - return AnyContainsValue(deleted_vertices_, deleted_edges_); - - case EventType::VERTEX_DELETE: - return AnyContainsValue(deleted_vertices_); - - case EventType::EDGE_DELETE: - return AnyContainsValue(deleted_edges_); - - case EventType::UPDATE: - return AnyContainsValue(set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, - removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); - - case EventType::VERTEX_UPDATE: - return AnyContainsValue(set_vertex_properties_, removed_vertex_properties_, set_vertex_labels_, - removed_vertex_labels_); - - case EventType::EDGE_UPDATE: - return AnyContainsValue(set_edge_properties_, removed_edge_properties_); - } -} - -void TriggerContextCollector::UpdateLabelMap(const VertexAccessor vertex, const storage::LabelId label_id, - const LabelChange change) { - auto ®istry = GetRegistry(); - if (registry.created_objects_.count(vertex.Gid())) { - return; - } - - if (auto it = label_changes_.find({vertex, label_id}); it != label_changes_.end()) { - it->second = std::clamp(it->second + LabelChangeToInt(change), -1, 1); - return; - } - - label_changes_.emplace(std::make_pair(vertex, label_id), LabelChangeToInt(change)); -} - -void TriggerContextCollector::RegisterSetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) { - UpdateLabelMap(vertex, label_id, LabelChange::ADD); -} - -void TriggerContextCollector::RegisterRemovedVertexLabel(const VertexAccessor &vertex, - const storage::LabelId label_id) { - UpdateLabelMap(vertex, label_id, LabelChange::REMOVE); -} - -int8_t TriggerContextCollector::LabelChangeToInt(LabelChange change) { - static_assert(std::is_same_v, int8_t>, - "The underlying type of LabelChange doesn't match the return type!"); - return static_cast(change); -} - -TriggerContext TriggerContextCollector::TransformToTriggerContext() && { - auto [created_vertices, deleted_vertices, set_vertex_properties, removed_vertex_properties] = - std::move(vertex_registry_).Summarize(); - auto [set_vertex_labels, removed_vertex_labels] = LabelMapToList(std::move(label_changes_)); - auto [created_edges, deleted_edges, set_edge_properties, removed_edge_properties] = - std::move(edge_registry_).Summarize(); - - return {std::move(created_vertices), std::move(deleted_vertices), - std::move(set_vertex_properties), std::move(removed_vertex_properties), - std::move(set_vertex_labels), std::move(removed_vertex_labels), - std::move(created_edges), std::move(deleted_edges), - std::move(set_edge_properties), std::move(removed_edge_properties)}; -} - -TriggerContextCollector::LabelChangesLists TriggerContextCollector::LabelMapToList(LabelChangesMap &&label_changes) { - std::vector set_vertex_labels; - std::vector removed_vertex_labels; - - for (const auto &[key, label_state] : label_changes) { - if (label_state == LabelChangeToInt(LabelChange::ADD)) { - set_vertex_labels.emplace_back(key.first, key.second); - } else if (label_state == LabelChangeToInt(LabelChange::REMOVE)) { - removed_vertex_labels.emplace_back(key.first, key.second); - } - } - - label_changes.clear(); - - return {std::move(set_vertex_labels), std::move(removed_vertex_labels)}; -} Trigger::Trigger(std::string name, const std::string &query, const std::map &user_parameters, @@ -777,4 +369,18 @@ std::vector TriggerStore::GetTriggerInfo() const { return info; } + +std::unordered_set TriggerStore::GetEventTypes() const { + std::unordered_set event_types; + + const auto add_event_types = [&](const utils::SkipList &trigger_list) { + for (const auto &trigger : trigger_list.access()) { + event_types.insert(trigger.EventType()); + } + }; + + add_event_types(before_commit_triggers_); + add_event_types(after_commit_triggers_); + return event_types; +} } // namespace query diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index a3e296aa0..1eb22bd08 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -1,364 +1,23 @@ #pragma once -#include -#include -#include -#include -#include + +#include +#include +#include +#include +#include #include +#include #include "kvstore/kvstore.hpp" #include "query/cypher_query_interpreter.hpp" +#include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" -#include "query/typed_value.hpp" +#include "query/trigger_context.hpp" #include "storage/v2/property_value.hpp" -#include "utils/concepts.hpp" -#include "utils/fnv.hpp" +#include "utils/skip_list.hpp" +#include "utils/spin_lock.hpp" namespace query { -namespace detail { -template -concept ObjectAccessor = utils::SameAsAnyOf; - -template -const char *ObjectString() { - if constexpr (std::same_as) { - return "vertex"; - } else { - return "edge"; - } -} - -template -struct CreatedObject { - explicit CreatedObject(const TAccessor &object) : object{object} {} - - bool IsValid() const { return object.IsVisible(storage::View::OLD); } - std::map ToMap([[maybe_unused]] DbAccessor *dba) const { - return {{ObjectString(), TypedValue{object}}}; - } - - TAccessor object; -}; - -template -struct DeletedObject { - explicit DeletedObject(const TAccessor &object) : object{object} {} - - bool IsValid() const { return object.IsVisible(storage::View::OLD); } - std::map ToMap([[maybe_unused]] DbAccessor *dba) const { - return {{ObjectString(), TypedValue{object}}}; - } - - TAccessor object; -}; - -template -struct SetObjectProperty { - explicit SetObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value, - TypedValue new_value) - : object{object}, key{key}, old_value{std::move(old_value)}, new_value{std::move(new_value)} {} - - std::map ToMap(DbAccessor *dba) const { - return {{ObjectString(), TypedValue{object}}, - {"key", TypedValue{dba->PropertyToName(key)}}, - {"old", old_value}, - {"new", new_value}}; - } - - bool IsValid() const { return object.IsVisible(storage::View::OLD); } - - TAccessor object; - storage::PropertyId key; - TypedValue old_value; - TypedValue new_value; -}; - -template -struct RemovedObjectProperty { - explicit RemovedObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value) - : object{object}, key{key}, old_value{std::move(old_value)} {} - - std::map ToMap(DbAccessor *dba) const { - return {{detail::ObjectString(), TypedValue{object}}, - {"key", TypedValue{dba->PropertyToName(key)}}, - {"old", old_value}}; - } - - bool IsValid() const { return object.IsVisible(storage::View::OLD); } - - TAccessor object; - storage::PropertyId key; - TypedValue old_value; -}; - -struct SetVertexLabel { - explicit SetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) - : object{vertex}, label_id{label_id} {} - - std::map ToMap(DbAccessor *dba) const; - bool IsValid() const; - - VertexAccessor object; - storage::LabelId label_id; -}; - -struct RemovedVertexLabel { - explicit RemovedVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) - : object{vertex}, label_id{label_id} {} - - std::map ToMap(DbAccessor *dba) const; - bool IsValid() const; - - VertexAccessor object; - storage::LabelId label_id; -}; -} // namespace detail - -enum class TriggerIdentifierTag : uint8_t { - CREATED_VERTICES, - CREATED_EDGES, - CREATED_OBJECTS, - DELETED_VERTICES, - DELETED_EDGES, - DELETED_OBJECTS, - SET_VERTEX_PROPERTIES, - SET_EDGE_PROPERTIES, - REMOVED_VERTEX_PROPERTIES, - REMOVED_EDGE_PROPERTIES, - SET_VERTEX_LABELS, - REMOVED_VERTEX_LABELS, - UPDATED_VERTICES, - UPDATED_EDGES, - UPDATED_OBJECTS -}; - -enum class TriggerEventType : uint8_t { - ANY, // Triggers always - VERTEX_CREATE, - EDGE_CREATE, - CREATE, - VERTEX_DELETE, - EDGE_DELETE, - DELETE, - VERTEX_UPDATE, - EDGE_UPDATE, - UPDATE -}; - -const char *TriggerEventTypeToString(TriggerEventType event_type); - -static_assert(std::is_trivially_copy_constructible_v, - "VertexAccessor is not trivially copy constructible, move it where possible and remove this assert"); -static_assert(std::is_trivially_copy_constructible_v, - "EdgeAccessor is not trivially copy constructible, move it where possible and remove this asssert"); - -// Holds the information necessary for triggers -class TriggerContext { - public: - TriggerContext() = default; - TriggerContext(std::vector> created_vertices, - std::vector> deleted_vertices, - std::vector> set_vertex_properties, - std::vector> removed_vertex_properties, - std::vector set_vertex_labels, - std::vector removed_vertex_labels, - std::vector> created_edges, - std::vector> deleted_edges, - std::vector> set_edge_properties, - std::vector> removed_edge_properties) - : created_vertices_{std::move(created_vertices)}, - deleted_vertices_{std::move(deleted_vertices)}, - set_vertex_properties_{std::move(set_vertex_properties)}, - removed_vertex_properties_{std::move(removed_vertex_properties)}, - set_vertex_labels_{std::move(set_vertex_labels)}, - removed_vertex_labels_{std::move(removed_vertex_labels)}, - created_edges_{std::move(created_edges)}, - deleted_edges_{std::move(deleted_edges)}, - set_edge_properties_{std::move(set_edge_properties)}, - removed_edge_properties_{std::move(removed_edge_properties)} {} - TriggerContext(const TriggerContext &) = default; - TriggerContext(TriggerContext &&) = default; - TriggerContext &operator=(const TriggerContext &) = default; - TriggerContext &operator=(TriggerContext &&) = default; - - // Adapt the TriggerContext object inplace for a different DbAccessor - // (each derived accessor, e.g. VertexAccessor, gets adapted - // to the sent DbAccessor so they can be used safely) - void AdaptForAccessor(DbAccessor *accessor); - - // Get TypedValue for the identifier defined with tag - TypedValue GetTypedValue(TriggerIdentifierTag tag, DbAccessor *dba) const; - bool ShouldEventTrigger(TriggerEventType) const; - - private: - std::vector> created_vertices_; - std::vector> deleted_vertices_; - std::vector> set_vertex_properties_; - std::vector> removed_vertex_properties_; - std::vector set_vertex_labels_; - std::vector removed_vertex_labels_; - - std::vector> created_edges_; - std::vector> deleted_edges_; - std::vector> set_edge_properties_; - std::vector> removed_edge_properties_; -}; - -// Collects the information necessary for triggers during a single transaction run. -class TriggerContextCollector { - public: - template - void RegisterCreatedObject(const TAccessor &created_object) { - GetRegistry().created_objects_.emplace(created_object.Gid(), detail::CreatedObject{created_object}); - } - - template - void RegisterDeletedObject(const TAccessor &deleted_object) { - auto ®istry = GetRegistry(); - if (registry.created_objects_.count(deleted_object.Gid())) { - return; - } - - registry.deleted_objects_.emplace_back(deleted_object); - } - - template - void RegisterSetObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value, - TypedValue new_value) { - auto ®istry = GetRegistry(); - if (registry.created_objects_.count(object.Gid())) { - return; - } - - if (auto it = registry.property_changes_.find({object, key}); it != registry.property_changes_.end()) { - it->second.new_value = std::move(new_value); - return; - } - - registry.property_changes_.emplace(std::make_pair(object, key), - PropertyChangeInfo{std::move(old_value), std::move(new_value)}); - } - - template - void RegisterRemovedObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value) { - // property is already removed - if (old_value.IsNull()) { - return; - } - - RegisterSetObjectProperty(object, key, std::move(old_value), TypedValue()); - } - - void RegisterSetVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); - void RegisterRemovedVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); - [[nodiscard]] TriggerContext TransformToTriggerContext() &&; - - private: - struct HashPair { - template - size_t operator()(const std::pair &pair) const { - using GidType = decltype(std::declval().Gid()); - return utils::HashCombine{}(pair.first.Gid(), pair.second); - } - }; - - struct PropertyChangeInfo { - TypedValue old_value; - TypedValue new_value; - }; - - template - using PropertyChangesMap = - std::unordered_map, PropertyChangeInfo, HashPair>; - - template - using PropertyChangesLists = std::pair>, - std::vector>>; - - template - struct Registry { - using ChangesSummary = - std::tuple>, std::vector>, - std::vector>, - std::vector>>; - - [[nodiscard]] static PropertyChangesLists PropertyMapToList(PropertyChangesMap &&map) { - std::vector> set_object_properties; - std::vector> removed_object_properties; - - for (auto it = map.begin(); it != map.end(); it = map.erase(it)) { - const auto &[key, property_change_info] = *it; - if (property_change_info.old_value.IsNull() && property_change_info.new_value.IsNull()) { - // no change happened on the transaction level - continue; - } - - if (const auto is_equal = property_change_info.old_value == property_change_info.new_value; - is_equal.IsBool() && is_equal.ValueBool()) { - // no change happened on the transaction level - continue; - } - - if (property_change_info.new_value.IsNull()) { - removed_object_properties.emplace_back(key.first, key.second /* property_id */, - std::move(property_change_info.old_value)); - } else { - set_object_properties.emplace_back(key.first, key.second, std::move(property_change_info.old_value), - std::move(property_change_info.new_value)); - } - } - - return PropertyChangesLists{std::move(set_object_properties), std::move(removed_object_properties)}; - } - - [[nodiscard]] ChangesSummary Summarize() && { - auto [set_object_properties, removed_object_properties] = PropertyMapToList(std::move(property_changes_)); - std::vector> created_objects_vec; - created_objects_vec.reserve(created_objects_.size()); - std::transform(created_objects_.begin(), created_objects_.end(), std::back_inserter(created_objects_vec), - [](const auto &gid_and_created_object) { return gid_and_created_object.second; }); - created_objects_.clear(); - - return {std::move(created_objects_vec), std::move(deleted_objects_), std::move(set_object_properties), - std::move(removed_object_properties)}; - } - - std::unordered_map> created_objects_; - std::vector> deleted_objects_; - // During the transaction, a single property on a single object could be changed multiple times. - // We want to register only the global change, at the end of the transaction. The change consists of - // the value before the transaction start, and the latest value assigned throughout the transaction. - PropertyChangesMap property_changes_; - }; - - template - Registry &GetRegistry() { - if constexpr (std::same_as) { - return vertex_registry_; - } else { - return edge_registry_; - } - } - - using LabelChangesMap = std::unordered_map, int8_t, HashPair>; - using LabelChangesLists = std::pair, std::vector>; - - enum class LabelChange : int8_t { REMOVE = -1, ADD = 1 }; - - static int8_t LabelChangeToInt(LabelChange change); - - [[nodiscard]] static LabelChangesLists LabelMapToList(LabelChangesMap &&label_changes); - - void UpdateLabelMap(VertexAccessor vertex, storage::LabelId label_id, LabelChange change); - - Registry vertex_registry_; - Registry edge_registry_; - // During the transaction, a single label on a single vertex could be added and removed multiple times. - // We want to register only the global change, at the end of the transaction. The change consists of - // the state of the label before the transaction start, and the latest state assigned throughout the transaction. - LabelChangesMap label_changes_; -}; - struct Trigger { explicit Trigger(std::string name, const std::string &query, const std::map &user_parameters, TriggerEventType event_type, @@ -424,6 +83,7 @@ struct TriggerStore { const auto &AfterCommitTriggers() const noexcept { return after_commit_triggers_; } bool HasTriggers() const noexcept { return before_commit_triggers_.size() > 0 || after_commit_triggers_.size() > 0; } + std::unordered_set GetEventTypes() const; private: utils::SpinLock store_lock_; diff --git a/src/query/trigger_context.cpp b/src/query/trigger_context.cpp new file mode 100644 index 000000000..7daca97a5 --- /dev/null +++ b/src/query/trigger_context.cpp @@ -0,0 +1,549 @@ +#include "query/trigger.hpp" + +#include + +#include "query/context.hpp" +#include "query/cypher_query_interpreter.hpp" +#include "query/db_accessor.hpp" +#include "query/frontend/ast/ast.hpp" +#include "query/interpret/frame.hpp" +#include "query/serialization/property_value.hpp" +#include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" +#include "utils/memory.hpp" + +namespace query { +namespace { +template +concept WithToMap = requires(const T value, DbAccessor *dba) { + { value.ToMap(dba) } + ->std::same_as>; +}; + +template +TypedValue ToTypedValue(const T &value, DbAccessor *dba) { + return TypedValue{value.ToMap(dba)}; +} + +template +TypedValue ToTypedValue(const detail::CreatedObject &created_object, [[maybe_unused]] DbAccessor *dba) { + return TypedValue{created_object.object}; +} + +template +TypedValue ToTypedValue(const detail::DeletedObject &deleted_object, [[maybe_unused]] DbAccessor *dba) { + return TypedValue{deleted_object.object}; +} + +template +concept WithIsValid = requires(const T value) { + { value.IsValid() } + ->std::same_as; +}; + +template +concept ConvertableToTypedValue = requires(T value, DbAccessor *dba) { + { ToTypedValue(value, dba) } + ->std::same_as; +} +&&WithIsValid; + +template +concept LabelUpdateContext = utils::SameAsAnyOf; + +template +TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) { + std::unordered_map> vertices_by_labels; + + for (const auto &value : values) { + if (value.IsValid()) { + vertices_by_labels[value.label_id].emplace_back(value.object); + } + } + + TypedValue result{std::vector{}}; + auto &typed_values = result.ValueList(); + for (auto &[label_id, vertices] : vertices_by_labels) { + typed_values.emplace_back(std::map{ + {std::string{"label"}, TypedValue(dba->LabelToName(label_id))}, + {std::string{"vertices"}, TypedValue(std::move(vertices))}, + }); + } + + return result; +} + +template +TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) requires(!LabelUpdateContext) { + TypedValue result{std::vector{}}; + auto &typed_values = result.ValueList(); + typed_values.reserve(values.size()); + + for (const auto &value : values) { + if (value.IsValid()) { + typed_values.push_back(ToTypedValue(value, dba)); + } + } + + return result; +} + +template +const char *TypeToString() { + if constexpr (std::same_as>) { + return "created_vertex"; + } else if constexpr (std::same_as>) { + return "created_edge"; + } else if constexpr (std::same_as>) { + return "deleted_vertex"; + } else if constexpr (std::same_as>) { + return "deleted_edge"; + } else if constexpr (std::same_as>) { + return "set_vertex_property"; + } else if constexpr (std::same_as>) { + return "set_edge_property"; + } else if constexpr (std::same_as>) { + return "removed_vertex_property"; + } else if constexpr (std::same_as>) { + return "removed_edge_property"; + } else if constexpr (std::same_as) { + return "set_vertex_label"; + } else if constexpr (std::same_as) { + return "removed_vertex_label"; + } +} + +template +concept ContextInfo = WithToMap &&WithIsValid; + +template +TypedValue Concatenate(DbAccessor *dba, const std::vector &...args) { + const auto size = (args.size() + ...); + TypedValue result{std::vector{}}; + auto &concatenated = result.ValueList(); + concatenated.reserve(size); + + const auto add_to_concatenated = [&](const std::vector &values) { + for (const auto &value : values) { + if (value.IsValid()) { + auto map = value.ToMap(dba); + map["event_type"] = TypeToString(); + concatenated.emplace_back(std::move(map)); + } + } + }; + + (add_to_concatenated(args), ...); + + return result; +} + +template +concept WithEmpty = requires(const T value) { + { value.empty() } + ->std::same_as; +}; + +template +bool AnyContainsValue(const TContainer &...value_containers) { + return (!value_containers.empty() || ...); +} + +template +using ChangesSummary = + std::tuple>, std::vector>, + std::vector>, + std::vector>>; + +template +using PropertyChangesLists = + std::pair>, std::vector>>; + +template +[[nodiscard]] PropertyChangesLists PropertyMapToList( + query::TriggerContextCollector::PropertyChangesMap &&map) { + std::vector> set_object_properties; + std::vector> removed_object_properties; + + for (auto it = map.begin(); it != map.end(); it = map.erase(it)) { + const auto &[key, property_change_info] = *it; + if (property_change_info.old_value.IsNull() && property_change_info.new_value.IsNull()) { + // no change happened on the transaction level + continue; + } + + if (const auto is_equal = property_change_info.old_value == property_change_info.new_value; + is_equal.IsBool() && is_equal.ValueBool()) { + // no change happened on the transaction level + continue; + } + + if (property_change_info.new_value.IsNull()) { + removed_object_properties.emplace_back(key.first, key.second /* property_id */, + std::move(property_change_info.old_value)); + } else { + set_object_properties.emplace_back(key.first, key.second, std::move(property_change_info.old_value), + std::move(property_change_info.new_value)); + } + } + + return PropertyChangesLists{std::move(set_object_properties), std::move(removed_object_properties)}; +} + +template +[[nodiscard]] ChangesSummary Summarize(query::TriggerContextCollector::Registry &®istry) { + auto [set_object_properties, removed_object_properties] = PropertyMapToList(std::move(registry.property_changes)); + std::vector> created_objects_vec; + created_objects_vec.reserve(registry.created_objects.size()); + std::transform(registry.created_objects.begin(), registry.created_objects.end(), + std::back_inserter(created_objects_vec), + [](const auto &gid_and_created_object) { return gid_and_created_object.second; }); + registry.created_objects.clear(); + + return {std::move(created_objects_vec), std::move(registry.deleted_objects), std::move(set_object_properties), + std::move(removed_object_properties)}; +} +} // namespace + +namespace detail { +bool SetVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } + +std::map SetVertexLabel::ToMap(DbAccessor *dba) const { + return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; +} + +bool RemovedVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } + +std::map RemovedVertexLabel::ToMap(DbAccessor *dba) const { + return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; +} +} // namespace detail + +const char *TriggerEventTypeToString(const TriggerEventType event_type) { + switch (event_type) { + case TriggerEventType::ANY: + return "ANY"; + + case TriggerEventType::CREATE: + return "CREATE"; + + case TriggerEventType::VERTEX_CREATE: + return "() CREATE"; + + case TriggerEventType::EDGE_CREATE: + return "--> CREATE"; + + case TriggerEventType::DELETE: + return "DELETE"; + + case TriggerEventType::VERTEX_DELETE: + return "() DELETE"; + + case TriggerEventType::EDGE_DELETE: + return "--> DELETE"; + + case TriggerEventType::UPDATE: + return "UPDATE"; + + case TriggerEventType::VERTEX_UPDATE: + return "() UPDATE"; + + case TriggerEventType::EDGE_UPDATE: + return "--> UPDATE"; + } +} + +void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { + { + // adapt created_vertices_ + auto it = created_vertices_.begin(); + for (auto &created_vertex : created_vertices_) { + if (auto maybe_vertex = accessor->FindVertex(created_vertex.object.Gid(), storage::View::OLD); maybe_vertex) { + *it = detail::CreatedObject{*maybe_vertex}; + ++it; + } + } + created_vertices_.erase(it, created_vertices_.end()); + } + + // deleted_vertices_ should keep the transaction context of the transaction which deleted it + // because no other transaction can modify an object after it's deleted so it should be the + // latest state of the object + + const auto adapt_context_with_vertex = [accessor](auto *values) { + auto it = values->begin(); + for (auto &value : *values) { + if (auto maybe_vertex = accessor->FindVertex(value.object.Gid(), storage::View::OLD); maybe_vertex) { + *it = std::move(value); + it->object = *maybe_vertex; + ++it; + } + } + values->erase(it, values->end()); + }; + + adapt_context_with_vertex(&set_vertex_properties_); + adapt_context_with_vertex(&removed_vertex_properties_); + adapt_context_with_vertex(&set_vertex_labels_); + adapt_context_with_vertex(&removed_vertex_labels_); + + { + // adapt created_edges + auto it = created_edges_.begin(); + for (auto &created_edge : created_edges_) { + const auto maybe_from_vertex = accessor->FindVertex(created_edge.object.From().Gid(), storage::View::OLD); + if (!maybe_from_vertex) { + continue; + } + auto maybe_out_edges = maybe_from_vertex->OutEdges(storage::View::OLD); + MG_ASSERT(maybe_out_edges.HasValue()); + const auto edge_gid = created_edge.object.Gid(); + for (const auto &edge : *maybe_out_edges) { + if (edge.Gid() == edge_gid) { + *it = detail::CreatedObject{edge}; + ++it; + } + } + } + created_edges_.erase(it, created_edges_.end()); + } + + // deleted_edges_ should keep the transaction context of the transaction which deleted it + // because no other transaction can modify an object after it's deleted so it should be the + // latest state of the object + + const auto adapt_context_with_edge = [accessor](auto *values) { + auto it = values->begin(); + for (const auto &value : *values) { + if (auto maybe_vertex = accessor->FindVertex(value.object.From().Gid(), storage::View::OLD); maybe_vertex) { + auto maybe_out_edges = maybe_vertex->OutEdges(storage::View::OLD); + MG_ASSERT(maybe_out_edges.HasValue()); + for (const auto &edge : *maybe_out_edges) { + if (edge.Gid() == value.object.Gid()) { + *it = std::move(value); + it->object = edge; + ++it; + break; + } + } + } + } + values->erase(it, values->end()); + }; + + adapt_context_with_edge(&set_edge_properties_); + adapt_context_with_edge(&removed_edge_properties_); +} + +TypedValue TriggerContext::GetTypedValue(const TriggerIdentifierTag tag, DbAccessor *dba) const { + switch (tag) { + case TriggerIdentifierTag::CREATED_VERTICES: + return ToTypedValue(created_vertices_, dba); + + case TriggerIdentifierTag::CREATED_EDGES: + return ToTypedValue(created_edges_, dba); + + case TriggerIdentifierTag::CREATED_OBJECTS: + return Concatenate(dba, created_vertices_, created_edges_); + + case TriggerIdentifierTag::DELETED_VERTICES: + return ToTypedValue(deleted_vertices_, dba); + + case TriggerIdentifierTag::DELETED_EDGES: + return ToTypedValue(deleted_edges_, dba); + + case TriggerIdentifierTag::DELETED_OBJECTS: + return Concatenate(dba, deleted_vertices_, deleted_edges_); + + case TriggerIdentifierTag::SET_VERTEX_PROPERTIES: + return ToTypedValue(set_vertex_properties_, dba); + + case TriggerIdentifierTag::SET_EDGE_PROPERTIES: + return ToTypedValue(set_edge_properties_, dba); + + case TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES: + return ToTypedValue(removed_vertex_properties_, dba); + + case TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES: + return ToTypedValue(removed_edge_properties_, dba); + + case TriggerIdentifierTag::SET_VERTEX_LABELS: + return ToTypedValue(set_vertex_labels_, dba); + + case TriggerIdentifierTag::REMOVED_VERTEX_LABELS: + return ToTypedValue(removed_vertex_labels_, dba); + + case TriggerIdentifierTag::UPDATED_VERTICES: + return Concatenate(dba, set_vertex_properties_, removed_vertex_properties_, set_vertex_labels_, + removed_vertex_labels_); + + case TriggerIdentifierTag::UPDATED_EDGES: + return Concatenate(dba, set_edge_properties_, removed_edge_properties_); + + case TriggerIdentifierTag::UPDATED_OBJECTS: + return Concatenate(dba, set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, + removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); + } +} + +bool TriggerContext::ShouldEventTrigger(const TriggerEventType event_type) const { + using EventType = TriggerEventType; + switch (event_type) { + case EventType::ANY: + return AnyContainsValue(created_vertices_, created_edges_, deleted_vertices_, deleted_edges_, + set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, + removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); + + case EventType::CREATE: + return AnyContainsValue(created_vertices_, created_edges_); + + case EventType::VERTEX_CREATE: + return AnyContainsValue(created_vertices_); + + case EventType::EDGE_CREATE: + return AnyContainsValue(created_edges_); + + case EventType::DELETE: + return AnyContainsValue(deleted_vertices_, deleted_edges_); + + case EventType::VERTEX_DELETE: + return AnyContainsValue(deleted_vertices_); + + case EventType::EDGE_DELETE: + return AnyContainsValue(deleted_edges_); + + case EventType::UPDATE: + return AnyContainsValue(set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, + removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); + + case EventType::VERTEX_UPDATE: + return AnyContainsValue(set_vertex_properties_, removed_vertex_properties_, set_vertex_labels_, + removed_vertex_labels_); + + case EventType::EDGE_UPDATE: + return AnyContainsValue(set_edge_properties_, removed_edge_properties_); + } +} + +void TriggerContextCollector::UpdateLabelMap(const VertexAccessor vertex, const storage::LabelId label_id, + const LabelChange change) { + auto ®istry = GetRegistry(); + if (!registry.should_register_updated_objects || registry.created_objects.count(vertex.Gid())) { + return; + } + + if (auto it = label_changes_.find({vertex, label_id}); it != label_changes_.end()) { + it->second = std::clamp(it->second + LabelChangeToInt(change), -1, 1); + return; + } + + label_changes_.emplace(std::make_pair(vertex, label_id), LabelChangeToInt(change)); +} + +TriggerContextCollector::TriggerContextCollector(const std::unordered_set &event_types) { + for (const auto event_type : event_types) { + switch (event_type) { + case TriggerEventType::ANY: + vertex_registry_.should_register_created_objects = true; + edge_registry_.should_register_created_objects = true; + vertex_registry_.should_register_deleted_objects = true; + edge_registry_.should_register_deleted_objects = true; + vertex_registry_.should_register_updated_objects = true; + edge_registry_.should_register_updated_objects = true; + break; + case TriggerEventType::VERTEX_CREATE: + vertex_registry_.should_register_created_objects = true; + break; + case TriggerEventType::EDGE_CREATE: + edge_registry_.should_register_created_objects = true; + break; + case TriggerEventType::CREATE: + vertex_registry_.should_register_created_objects = true; + edge_registry_.should_register_created_objects = true; + break; + case TriggerEventType::VERTEX_DELETE: + vertex_registry_.should_register_deleted_objects = true; + break; + case TriggerEventType::EDGE_DELETE: + edge_registry_.should_register_deleted_objects = true; + break; + case TriggerEventType::DELETE: + vertex_registry_.should_register_deleted_objects = true; + edge_registry_.should_register_deleted_objects = true; + break; + case TriggerEventType::VERTEX_UPDATE: + vertex_registry_.should_register_updated_objects = true; + break; + case TriggerEventType::EDGE_UPDATE: + edge_registry_.should_register_updated_objects = true; + break; + case TriggerEventType::UPDATE: + vertex_registry_.should_register_updated_objects = true; + edge_registry_.should_register_updated_objects = true; + break; + } + } + + const auto deduce_if_should_register_created = [](auto ®istry) { + // Registering the created objects is necessary to: + // - eliminate deleted objects that were created in the same transaction + // - eliminate set/removed properties and labels of newly created objects + // because those changes are only relevant for objects that have existed before the transaction. + registry.should_register_created_objects |= + registry.should_register_updated_objects || registry.should_register_deleted_objects; + }; + + deduce_if_should_register_created(vertex_registry_); + deduce_if_should_register_created(edge_registry_); +} + +bool TriggerContextCollector::ShouldRegisterVertexLabelChange() const { + return vertex_registry_.should_register_updated_objects; +} + +void TriggerContextCollector::RegisterSetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) { + UpdateLabelMap(vertex, label_id, LabelChange::ADD); +} + +void TriggerContextCollector::RegisterRemovedVertexLabel(const VertexAccessor &vertex, + const storage::LabelId label_id) { + UpdateLabelMap(vertex, label_id, LabelChange::REMOVE); +} + +int8_t TriggerContextCollector::LabelChangeToInt(LabelChange change) { + static_assert(std::is_same_v, int8_t>, + "The underlying type of LabelChange doesn't match the return type!"); + return static_cast(change); +} + +TriggerContext TriggerContextCollector::TransformToTriggerContext() && { + auto [created_vertices, deleted_vertices, set_vertex_properties, removed_vertex_properties] = + Summarize(std::move(vertex_registry_)); + auto [set_vertex_labels, removed_vertex_labels] = LabelMapToList(std::move(label_changes_)); + auto [created_edges, deleted_edges, set_edge_properties, removed_edge_properties] = + Summarize(std::move(edge_registry_)); + + return {std::move(created_vertices), std::move(deleted_vertices), + std::move(set_vertex_properties), std::move(removed_vertex_properties), + std::move(set_vertex_labels), std::move(removed_vertex_labels), + std::move(created_edges), std::move(deleted_edges), + std::move(set_edge_properties), std::move(removed_edge_properties)}; +} + +TriggerContextCollector::LabelChangesLists TriggerContextCollector::LabelMapToList(LabelChangesMap &&label_changes) { + std::vector set_vertex_labels; + std::vector removed_vertex_labels; + + for (const auto &[key, label_state] : label_changes) { + if (label_state == LabelChangeToInt(LabelChange::ADD)) { + set_vertex_labels.emplace_back(key.first, key.second); + } else if (label_state == LabelChangeToInt(LabelChange::REMOVE)) { + removed_vertex_labels.emplace_back(key.first, key.second); + } + } + + label_changes.clear(); + + return {std::move(set_vertex_labels), std::move(removed_vertex_labels)}; +} +} // namespace query diff --git a/src/query/trigger_context.hpp b/src/query/trigger_context.hpp new file mode 100644 index 000000000..8e985e6ce --- /dev/null +++ b/src/query/trigger_context.hpp @@ -0,0 +1,353 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "query/db_accessor.hpp" +#include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" +#include "storage/v2/view.hpp" +#include "utils/concepts.hpp" +#include "utils/fnv.hpp" + +namespace query { +namespace detail { +template +concept ObjectAccessor = utils::SameAsAnyOf; + +template +const char *ObjectString() { + if constexpr (std::same_as) { + return "vertex"; + } else { + return "edge"; + } +} + +template +struct CreatedObject { + explicit CreatedObject(const TAccessor &object) : object{object} {} + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + std::map ToMap([[maybe_unused]] DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}}; + } + + TAccessor object; +}; + +template +struct DeletedObject { + explicit DeletedObject(const TAccessor &object) : object{object} {} + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + std::map ToMap([[maybe_unused]] DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}}; + } + + TAccessor object; +}; + +template +struct SetObjectProperty { + explicit SetObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value, + TypedValue new_value) + : object{object}, key{key}, old_value{std::move(old_value)}, new_value{std::move(new_value)} {} + + std::map ToMap(DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}, + {"key", TypedValue{dba->PropertyToName(key)}}, + {"old", old_value}, + {"new", new_value}}; + } + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + storage::PropertyId key; + TypedValue old_value; + TypedValue new_value; +}; + +template +struct RemovedObjectProperty { + explicit RemovedObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value) + : object{object}, key{key}, old_value{std::move(old_value)} {} + + std::map ToMap(DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}, + {"key", TypedValue{dba->PropertyToName(key)}}, + {"old", old_value}}; + } + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + storage::PropertyId key; + TypedValue old_value; +}; + +struct SetVertexLabel { + explicit SetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) + : object{vertex}, label_id{label_id} {} + + std::map ToMap(DbAccessor *dba) const; + bool IsValid() const; + + VertexAccessor object; + storage::LabelId label_id; +}; + +struct RemovedVertexLabel { + explicit RemovedVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) + : object{vertex}, label_id{label_id} {} + + std::map ToMap(DbAccessor *dba) const; + bool IsValid() const; + + VertexAccessor object; + storage::LabelId label_id; +}; +} // namespace detail + +enum class TriggerIdentifierTag : uint8_t { + CREATED_VERTICES, + CREATED_EDGES, + CREATED_OBJECTS, + DELETED_VERTICES, + DELETED_EDGES, + DELETED_OBJECTS, + SET_VERTEX_PROPERTIES, + SET_EDGE_PROPERTIES, + REMOVED_VERTEX_PROPERTIES, + REMOVED_EDGE_PROPERTIES, + SET_VERTEX_LABELS, + REMOVED_VERTEX_LABELS, + UPDATED_VERTICES, + UPDATED_EDGES, + UPDATED_OBJECTS +}; + +enum class TriggerEventType : uint8_t { + ANY, // Triggers on any change + VERTEX_CREATE, + EDGE_CREATE, + CREATE, + VERTEX_DELETE, + EDGE_DELETE, + DELETE, + VERTEX_UPDATE, + EDGE_UPDATE, + UPDATE +}; + +const char *TriggerEventTypeToString(TriggerEventType event_type); + +static_assert(std::is_trivially_copy_constructible_v, + "VertexAccessor is not trivially copy constructible, move it where possible and remove this assert"); +static_assert(std::is_trivially_copy_constructible_v, + "EdgeAccessor is not trivially copy constructible, move it where possible and remove this asssert"); + +// Holds the information necessary for triggers +class TriggerContext { + public: + TriggerContext() = default; + TriggerContext(std::vector> created_vertices, + std::vector> deleted_vertices, + std::vector> set_vertex_properties, + std::vector> removed_vertex_properties, + std::vector set_vertex_labels, + std::vector removed_vertex_labels, + std::vector> created_edges, + std::vector> deleted_edges, + std::vector> set_edge_properties, + std::vector> removed_edge_properties) + : created_vertices_{std::move(created_vertices)}, + deleted_vertices_{std::move(deleted_vertices)}, + set_vertex_properties_{std::move(set_vertex_properties)}, + removed_vertex_properties_{std::move(removed_vertex_properties)}, + set_vertex_labels_{std::move(set_vertex_labels)}, + removed_vertex_labels_{std::move(removed_vertex_labels)}, + created_edges_{std::move(created_edges)}, + deleted_edges_{std::move(deleted_edges)}, + set_edge_properties_{std::move(set_edge_properties)}, + removed_edge_properties_{std::move(removed_edge_properties)} {} + TriggerContext(const TriggerContext &) = default; + TriggerContext(TriggerContext &&) = default; + TriggerContext &operator=(const TriggerContext &) = default; + TriggerContext &operator=(TriggerContext &&) = default; + + // Adapt the TriggerContext object inplace for a different DbAccessor + // (each derived accessor, e.g. VertexAccessor, gets adapted + // to the sent DbAccessor so they can be used safely) + void AdaptForAccessor(DbAccessor *accessor); + + // Get TypedValue for the identifier defined with tag + TypedValue GetTypedValue(TriggerIdentifierTag tag, DbAccessor *dba) const; + bool ShouldEventTrigger(TriggerEventType) const; + + private: + std::vector> created_vertices_; + std::vector> deleted_vertices_; + std::vector> set_vertex_properties_; + std::vector> removed_vertex_properties_; + std::vector set_vertex_labels_; + std::vector removed_vertex_labels_; + + std::vector> created_edges_; + std::vector> deleted_edges_; + std::vector> set_edge_properties_; + std::vector> removed_edge_properties_; +}; + +// Collects the information necessary for triggers during a single transaction run. +class TriggerContextCollector { + public: + struct HashPairWithAccessor { + template + size_t operator()(const std::pair &pair) const { + using GidType = decltype(std::declval().Gid()); + return utils::HashCombine{}(pair.first.Gid(), pair.second); + } + }; + + struct PropertyChangeInfo { + TypedValue old_value; + TypedValue new_value; + }; + + template + using PropertyChangesMap = + std::unordered_map, PropertyChangeInfo, HashPairWithAccessor>; + + template + struct Registry { + bool should_register_created_objects{false}; + bool should_register_deleted_objects{false}; + bool should_register_updated_objects{false}; // Set/removed properties (and labels for vertices) + std::unordered_map> created_objects; + std::vector> deleted_objects; + // During the transaction, a single property on a single object could be changed multiple times. + // We want to register only the global change, at the end of the transaction. The change consists of + // the value before the transaction start, and the latest value assigned throughout the transaction. + PropertyChangesMap property_changes; + }; + + explicit TriggerContextCollector(const std::unordered_set &event_types); + TriggerContextCollector(const TriggerContextCollector &) = default; + TriggerContextCollector(TriggerContextCollector &&) = default; + TriggerContextCollector &operator=(const TriggerContextCollector &) = default; + TriggerContextCollector &operator=(TriggerContextCollector &&) = default; + ~TriggerContextCollector() = default; + + template + bool ShouldRegisterCreatedObject() const { + return GetRegistry().should_register_created_objects; + } + + template + void RegisterCreatedObject(const TAccessor &created_object) { + auto ®istry = GetRegistry(); + if (!registry.should_register_created_objects) { + return; + } + registry.created_objects.emplace(created_object.Gid(), detail::CreatedObject{created_object}); + } + + template + bool ShouldRegisterDeletedObject() const { + return GetRegistry().should_register_deleted_objects; + } + + template + void RegisterDeletedObject(const TAccessor &deleted_object) { + auto ®istry = GetRegistry(); + if (!registry.should_register_deleted_objects || registry.created_objects.count(deleted_object.Gid())) { + return; + } + + registry.deleted_objects.emplace_back(deleted_object); + } + + template + bool ShouldRegisterObjectPropertyChange() const { + return GetRegistry().should_register_updated_objects; + } + + template + void RegisterSetObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value, + TypedValue new_value) { + auto ®istry = GetRegistry(); + if (!registry.should_register_updated_objects) { + return; + } + + if (registry.created_objects.count(object.Gid())) { + return; + } + + if (auto it = registry.property_changes.find({object, key}); it != registry.property_changes.end()) { + it->second.new_value = std::move(new_value); + return; + } + + registry.property_changes.emplace(std::make_pair(object, key), + PropertyChangeInfo{std::move(old_value), std::move(new_value)}); + } + + template + void RegisterRemovedObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value) { + // property is already removed + if (old_value.IsNull()) { + return; + } + + RegisterSetObjectProperty(object, key, std::move(old_value), TypedValue()); + } + + bool ShouldRegisterVertexLabelChange() const; + void RegisterSetVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); + void RegisterRemovedVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); + [[nodiscard]] TriggerContext TransformToTriggerContext() &&; + + private: + template + const Registry &GetRegistry() const { + if constexpr (std::same_as) { + return vertex_registry_; + } else { + return edge_registry_; + } + } + + template + Registry &GetRegistry() { + return const_cast &>( + const_cast(this)->GetRegistry()); + } + + using LabelChangesMap = std::unordered_map, int8_t, HashPairWithAccessor>; + using LabelChangesLists = std::pair, std::vector>; + + enum class LabelChange : int8_t { REMOVE = -1, ADD = 1 }; + + static int8_t LabelChangeToInt(LabelChange change); + + [[nodiscard]] static LabelChangesLists LabelMapToList(LabelChangesMap &&label_changes); + + void UpdateLabelMap(VertexAccessor vertex, storage::LabelId label_id, LabelChange change); + + Registry vertex_registry_; + Registry edge_registry_; + // During the transaction, a single label on a single vertex could be added and removed multiple times. + // We want to register only the global change, at the end of the transaction. The change consists of + // the state of the label before the transaction start, and the latest state assigned throughout the transaction. + LabelChangesMap label_changes_; +}; +} // namespace query diff --git a/tests/e2e/triggers/on_update_triggers.cpp b/tests/e2e/triggers/on_update_triggers.cpp index 75da49ac8..3d24b1d28 100644 --- a/tests/e2e/triggers/on_update_triggers.cpp +++ b/tests/e2e/triggers/on_update_triggers.cpp @@ -105,7 +105,7 @@ void CreateOnUpdateTriggers(mg::Client &client, bool is_before) { fmt::format("CREATE TRIGGER SetVertexPropertiesTrigger ON () UPDATE " "{} COMMIT " "EXECUTE " - "UNWIND assignedVertexProperties as assignedVertexProperty " + "UNWIND setVertexProperties as assignedVertexProperty " "CREATE (n: {} {{ id: assignedVertexProperty.vertex.id }})", before_or_after, kTriggerSetVertexPropertyLabel)); client.DiscardAll(); @@ -121,7 +121,7 @@ void CreateOnUpdateTriggers(mg::Client &client, bool is_before) { fmt::format("CREATE TRIGGER SetVertexLabelsTrigger ON () UPDATE " "{} COMMIT " "EXECUTE " - "UNWIND assignedVertexLabels as assignedVertexLabel " + "UNWIND setVertexLabels as assignedVertexLabel " "UNWIND assignedVertexLabel.vertices as vertex " "CREATE (n: {} {{ id: vertex.id }})", before_or_after, kTriggerSetVertexLabelLabel)); @@ -140,7 +140,7 @@ void CreateOnUpdateTriggers(mg::Client &client, bool is_before) { fmt::format("CREATE TRIGGER SetEdgePropertiesTrigger ON --> UPDATE " "{} COMMIT " "EXECUTE " - "UNWIND assignedEdgeProperties as assignedEdgeProperty " + "UNWIND setEdgeProperties as assignedEdgeProperty " "CREATE (n: {} {{ id: assignedEdgeProperty.edge.id }})", before_or_after, kTriggerSetEdgePropertyLabel)); client.DiscardAll(); diff --git a/tests/unit/query_trigger.cpp b/tests/unit/query_trigger.cpp index e14822208..3a6ca4207 100644 --- a/tests/unit/query_trigger.cpp +++ b/tests/unit/query_trigger.cpp @@ -1,12 +1,22 @@ #include #include +#include #include "query/db_accessor.hpp" #include "query/interpreter.hpp" #include "query/trigger.hpp" #include "query/typed_value.hpp" #include "utils/memory.hpp" +namespace { +const std::unordered_set kAllEventTypes{ + query::TriggerEventType::ANY, query::TriggerEventType::VERTEX_CREATE, query::TriggerEventType::EDGE_CREATE, + query::TriggerEventType::CREATE, query::TriggerEventType::VERTEX_DELETE, query::TriggerEventType::EDGE_DELETE, + query::TriggerEventType::DELETE, query::TriggerEventType::VERTEX_UPDATE, query::TriggerEventType::EDGE_UPDATE, + query::TriggerEventType::UPDATE, +}; +} // namespace + class TriggerContextTest : public ::testing::Test { public: void SetUp() override { db.emplace(); } @@ -31,7 +41,7 @@ void CheckTypedValueSize(const query::TriggerContext &trigger_context, const que const size_t expected_size, query::DbAccessor &dba) { auto typed_values = trigger_context.GetTypedValue(tag, &dba); ASSERT_TRUE(typed_values.IsList()); - ASSERT_EQ(typed_values.ValueList().size(), expected_size); + ASSERT_EQ(expected_size, typed_values.ValueList().size()); }; void CheckLabelList(const query::TriggerContext &trigger_context, const query::TriggerIdentifierTag tag, @@ -61,7 +71,7 @@ void CheckLabelList(const query::TriggerContext &trigger_context, const query::T // that exist (unless its explicitly created for the deleted object) TEST_F(TriggerContextTest, ValidObjectsTest) { query::TriggerContext trigger_context; - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{kAllEventTypes}; size_t vertex_count = 0; size_t edge_count = 0; @@ -95,7 +105,7 @@ TEST_F(TriggerContextTest, ValidObjectsTest) { dba.AdvanceCommand(); trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); - trigger_context_collector = query::TriggerContextCollector{}; + trigger_context_collector = query::TriggerContextCollector{kAllEventTypes}; // Should have all the created objects CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, vertex_count, dba); @@ -181,7 +191,7 @@ TEST_F(TriggerContextTest, ValidObjectsTest) { ASSERT_FALSE(dba.Commit().HasError()); trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); - trigger_context_collector = query::TriggerContextCollector{}; + trigger_context_collector = query::TriggerContextCollector{kAllEventTypes}; CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, vertex_count, dba); CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, edge_count, dba); @@ -250,7 +260,7 @@ TEST_F(TriggerContextTest, ValidObjectsTest) { // Binding the trigger context to transaction will mean that creating and updating an object in the same transaction // will return only the CREATE event. TEST_F(TriggerContextTest, ReturnCreateOnlyEvent) { - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{kAllEventTypes}; query::DbAccessor dba{&StartTransaction()}; @@ -311,13 +321,14 @@ void EXPECT_PROP_EQ(const query::TypedValue &a, const query::TypedValue &b) { EX // transaction) everything inbetween should be ignored. TEST_F(TriggerContextTest, GlobalPropertyChange) { query::DbAccessor dba{&StartTransaction()}; + const std::unordered_set event_types{query::TriggerEventType::VERTEX_UPDATE}; auto v = dba.InsertVertex(); dba.AdvanceCommand(); { SPDLOG_DEBUG("SET -> SET"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), query::TypedValue("ValueNew")); trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), @@ -339,7 +350,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { { SPDLOG_DEBUG("SET -> REMOVE"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), query::TypedValue("ValueNew")); trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), @@ -360,7 +371,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { { SPDLOG_DEBUG("REMOVE -> SET"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value")); trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), @@ -382,7 +393,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { { SPDLOG_DEBUG("REMOVE -> REMOVE"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value")); trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); @@ -402,7 +413,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { { SPDLOG_DEBUG("SET -> SET (no change on transaction level)"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), query::TypedValue("ValueNew")); trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), @@ -416,7 +427,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { { SPDLOG_DEBUG("SET -> REMOVE (no change on transaction level)"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), query::TypedValue("ValueNew")); trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), @@ -430,7 +441,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { { SPDLOG_DEBUG("REMOVE -> SET (no change on transaction level)"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value")); trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), @@ -444,7 +455,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { { SPDLOG_DEBUG("REMOVE -> REMOVE (no change on transaction level)"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); @@ -456,7 +467,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { { SPDLOG_DEBUG("SET -> REMOVE -> SET -> REMOVE -> SET"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value0"), query::TypedValue("Value1")); trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), @@ -486,6 +497,7 @@ TEST_F(TriggerContextTest, GlobalPropertyChange) { // Same as above, but for label changes TEST_F(TriggerContextTest, GlobalLabelChange) { query::DbAccessor dba{&StartTransaction()}; + const std::unordered_set event_types{query::TriggerEventType::VERTEX_UPDATE}; auto v = dba.InsertVertex(); dba.AdvanceCommand(); @@ -495,7 +507,7 @@ TEST_F(TriggerContextTest, GlobalLabelChange) { // so REMOVE -> REMOVE and SET -> SET doesn't make sense { SPDLOG_DEBUG("SET -> REMOVE"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterSetVertexLabel(v, label_id); trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); @@ -507,7 +519,7 @@ TEST_F(TriggerContextTest, GlobalLabelChange) { { SPDLOG_DEBUG("REMOVE -> SET"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); trigger_context_collector.RegisterSetVertexLabel(v, label_id); const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); @@ -519,7 +531,7 @@ TEST_F(TriggerContextTest, GlobalLabelChange) { { SPDLOG_DEBUG("SET -> REMOVE -> SET -> REMOVE -> SET"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterSetVertexLabel(v, label_id); trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); trigger_context_collector.RegisterSetVertexLabel(v, label_id); @@ -540,7 +552,7 @@ TEST_F(TriggerContextTest, GlobalLabelChange) { { SPDLOG_DEBUG("REMOVE -> SET -> REMOVE -> SET -> REMOVE"); - query::TriggerContextCollector trigger_context_collector; + query::TriggerContextCollector trigger_context_collector{event_types}; trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); trigger_context_collector.RegisterSetVertexLabel(v, label_id); trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); @@ -560,6 +572,231 @@ TEST_F(TriggerContextTest, GlobalLabelChange) { } } +namespace { +struct ShouldRegisterExpectation { + bool creation{false}; + bool deletion{false}; + bool update{false}; +}; + +template +void CheckRegisterInfo(const query::TriggerContextCollector &collector, const ShouldRegisterExpectation &expectation) { + EXPECT_EQ(expectation.creation, collector.ShouldRegisterCreatedObject()); + EXPECT_EQ(expectation.deletion, collector.ShouldRegisterDeletedObject()); + EXPECT_EQ(expectation.update, collector.ShouldRegisterObjectPropertyChange()); +} + +size_t BoolToSize(const bool value) { return value ? 1 : 0; } + +void CheckFilters(const std::unordered_set &event_types, + const ShouldRegisterExpectation &vertex_expectation, + const ShouldRegisterExpectation &edge_expectation, storage::Storage::Accessor *accessor) { + query::TriggerContextCollector collector{event_types}; + { + SCOPED_TRACE("Checking vertex"); + CheckRegisterInfo(collector, vertex_expectation); + } + { + SCOPED_TRACE("Checking edge"); + CheckRegisterInfo(collector, edge_expectation); + } + EXPECT_EQ(collector.ShouldRegisterVertexLabelChange(), vertex_expectation.update); + + query::DbAccessor dba{accessor}; + + auto vertex_to_delete = dba.InsertVertex(); + auto vertex_to_modify = dba.InsertVertex(); + + auto from_vertex = dba.InsertVertex(); + auto to_vertex = dba.InsertVertex(); + auto maybe_edge_to_delete = dba.InsertEdge(&from_vertex, &to_vertex, dba.NameToEdgeType("EDGE")); + auto maybe_edge_to_modify = dba.InsertEdge(&from_vertex, &to_vertex, dba.NameToEdgeType("EDGE")); + auto &edge_to_delete = maybe_edge_to_delete.GetValue(); + auto &edge_to_modify = maybe_edge_to_modify.GetValue(); + + dba.AdvanceCommand(); + + const auto created_vertex = dba.InsertVertex(); + const auto maybe_created_edge = dba.InsertEdge(&from_vertex, &to_vertex, dba.NameToEdgeType("EDGE")); + const auto created_edge = maybe_created_edge.GetValue(); + collector.RegisterCreatedObject(created_vertex); + collector.RegisterCreatedObject(created_edge); + collector.RegisterDeletedObject(dba.RemoveEdge(&edge_to_delete).GetValue().value()); + collector.RegisterDeletedObject(dba.RemoveVertex(&vertex_to_delete).GetValue().value()); + collector.RegisterSetObjectProperty(vertex_to_modify, dba.NameToProperty("UPDATE"), query::TypedValue{1}, + query::TypedValue{2}); + collector.RegisterRemovedObjectProperty(vertex_to_modify, dba.NameToProperty("REMOVE"), query::TypedValue{1}); + collector.RegisterSetObjectProperty(edge_to_modify, dba.NameToProperty("UPDATE"), query::TypedValue{1}, + query::TypedValue{2}); + collector.RegisterRemovedObjectProperty(edge_to_modify, dba.NameToProperty("REMOVE"), query::TypedValue{1}); + collector.RegisterSetVertexLabel(vertex_to_modify, dba.NameToLabel("SET")); + collector.RegisterRemovedVertexLabel(vertex_to_modify, dba.NameToLabel("REMOVE")); + dba.AdvanceCommand(); + + const auto trigger_context = std::move(collector).TransformToTriggerContext(); + const auto created_vertices = BoolToSize(vertex_expectation.creation); + { + SCOPED_TRACE("CREATED_VERTICES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, created_vertices, dba); + } + const auto created_edges = BoolToSize(edge_expectation.creation); + { + SCOPED_TRACE("CREATED_EDGES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, created_edges, dba); + } + { + SCOPED_TRACE("CREATED_OBJECTS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, created_vertices + created_edges, + dba); + } + const auto deleted_vertices = BoolToSize(vertex_expectation.deletion); + { + SCOPED_TRACE("DELETED_VERTICES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_VERTICES, deleted_vertices, dba); + } + const auto deleted_edges = BoolToSize(edge_expectation.deletion); + { + SCOPED_TRACE("DELETED_EDGES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_EDGES, deleted_edges, dba); + } + { + SCOPED_TRACE("DELETED_OBJECTS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_OBJECTS, deleted_vertices + deleted_edges, + dba); + } + { + SCOPED_TRACE("SET_VERTEX_PROPERTIES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, + BoolToSize(vertex_expectation.update), dba); + } + { + SCOPED_TRACE("SET_EDGE_PROPERTIES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, + BoolToSize(edge_expectation.update), dba); + } + { + SCOPED_TRACE("REMOVED_VERTEX_PROPERTIES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, + BoolToSize(vertex_expectation.update), dba); + } + { + SCOPED_TRACE("REMOVED_EDGE_PROPERTIES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, + BoolToSize(edge_expectation.update), dba); + } + const auto set_and_removed_vertex_props_and_labels = BoolToSize(vertex_expectation.update) * 4; + { + SCOPED_TRACE("UPDATED_VERTICES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, + set_and_removed_vertex_props_and_labels, dba); + } + const auto set_and_removed_edge_props = BoolToSize(edge_expectation.update) * 2; + { + SCOPED_TRACE("UPDATED_EDGES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, set_and_removed_edge_props, dba); + } + // sum of the previous + { + SCOPED_TRACE("UPDATED_OBJECTS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_OBJECTS, + set_and_removed_vertex_props_and_labels + set_and_removed_edge_props, dba); + } + { + SCOPED_TRACE("SET_VERTEX_LABELS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, + BoolToSize(vertex_expectation.update), dba); + } + { + SCOPED_TRACE("REMOVED_VERTEX_LABELS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, + BoolToSize(vertex_expectation.update), dba); + } + + dba.Abort(); +} +} // namespace + +TEST_F(TriggerContextTest, Filtering) { + using TET = query::TriggerEventType; + // Check all event type individually + { + SCOPED_TRACE("TET::ANY"); + CheckFilters({TET::ANY}, ShouldRegisterExpectation{true, true, true}, ShouldRegisterExpectation{true, true, true}, + &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_CREATE"); + CheckFilters({TET::VERTEX_CREATE}, ShouldRegisterExpectation{true, false, false}, + ShouldRegisterExpectation{false, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::EDGE_CREATE"); + CheckFilters({TET::EDGE_CREATE}, ShouldRegisterExpectation{false, false, false}, + ShouldRegisterExpectation{true, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::CREATE"); + CheckFilters({TET::CREATE}, ShouldRegisterExpectation{true, false, false}, + ShouldRegisterExpectation{true, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_DELETE"); + CheckFilters({TET::VERTEX_DELETE}, ShouldRegisterExpectation{true, true, false}, + ShouldRegisterExpectation{false, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::EDGE_DELETE"); + CheckFilters({TET::EDGE_DELETE}, ShouldRegisterExpectation{false, false, false}, + ShouldRegisterExpectation{true, true, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::DELETE"); + CheckFilters({TET::DELETE}, ShouldRegisterExpectation{true, true, false}, + ShouldRegisterExpectation{true, true, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_UPDATE"); + CheckFilters({TET::VERTEX_UPDATE}, ShouldRegisterExpectation{true, false, true}, + ShouldRegisterExpectation{false, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::EDGE_UPDATE"); + CheckFilters({TET::EDGE_UPDATE}, ShouldRegisterExpectation{false, false, false}, + ShouldRegisterExpectation{true, false, true}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::UPDATE"); + CheckFilters({TET::UPDATE}, ShouldRegisterExpectation{true, false, true}, + ShouldRegisterExpectation{true, false, true}, &StartTransaction()); + } + // Some combined versions + { + SCOPED_TRACE("TET::VERTEX_UPDATE, TET::EDGE_UPDATE"); + CheckFilters({TET::VERTEX_UPDATE, TET::EDGE_UPDATE}, ShouldRegisterExpectation{true, false, true}, + ShouldRegisterExpectation{true, false, true}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_UPDATE, TET::EDGE_UPDATE, TET::DELETE"); + CheckFilters({TET::VERTEX_UPDATE, TET::EDGE_UPDATE, TET::DELETE}, ShouldRegisterExpectation{true, true, true}, + ShouldRegisterExpectation{true, true, true}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::UPDATE, TET::VERTEX_DELETE, TET::EDGE_DELETE"); + CheckFilters({TET::UPDATE, TET::VERTEX_DELETE, TET::EDGE_DELETE}, ShouldRegisterExpectation{true, true, true}, + ShouldRegisterExpectation{true, true, true}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_CREATE, TET::VERTEX_UPDATE"); + CheckFilters({TET::VERTEX_CREATE, TET::VERTEX_UPDATE}, ShouldRegisterExpectation{true, false, true}, + ShouldRegisterExpectation{false, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::EDGE_CREATE, TET::EDGE_UPDATE"); + CheckFilters({TET::EDGE_CREATE, TET::EDGE_UPDATE}, ShouldRegisterExpectation{false, false, false}, + ShouldRegisterExpectation{true, false, true}, &StartTransaction()); + } +} + class TriggerStoreTest : public ::testing::Test { protected: const std::filesystem::path testing_directory{std::filesystem::temp_directory_path() / "MG_test_unit_query_trigger"}; @@ -746,3 +983,24 @@ TEST_F(TriggerStoreTest, TriggerInfo) { check_trigger_info(); } + +TEST_F(TriggerStoreTest, AnyTriggerAllKeywords) { + query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; + + using namespace std::literals; + const std::array keywords = { + "createdVertices"sv, "createdEdges"sv, "createdObjects"sv, + "deletedVertices"sv, "deletedEdges"sv, "deletedObjects"sv, + "setVertexProperties"sv, "setEdgeProperties"sv, "removedVertexProperties"sv, + "removedEdgeProperties"sv, "setVertexLabels"sv, "removedVertexLabels"sv, + "updatedVertices"sv, "updatedEdges"sv, "updatedObjects"sv, + }; + + const auto trigger_name = "trigger"s; + + for (const auto keyword : keywords) { + ASSERT_NO_THROW(store.AddTrigger(trigger_name, fmt::format("RETURN {}", keyword), {}, query::TriggerEventType::ANY, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock)); + store.DropTrigger(trigger_name); + } +} From a3ecc524290f13581f0f65d311df9bb24cf0c620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 26 May 2021 13:35:12 +0200 Subject: [PATCH 50/63] Use all available predefined variables (#157) --- src/query/trigger.cpp | 18 ++++--- tests/unit/query_trigger.cpp | 98 ++++++++++++++++++++++++++++++++---- 2 files changed, 99 insertions(+), 17 deletions(-) diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 3f4b0ccb4..401161c98 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -89,13 +89,14 @@ std::vector> GetPredefinedIdentifier return TagsToIdentifiers( IdentifierTag::CREATED_VERTICES, IdentifierTag::CREATED_EDGES, IdentifierTag::CREATED_OBJECTS, IdentifierTag::DELETED_VERTICES, IdentifierTag::DELETED_EDGES, IdentifierTag::DELETED_OBJECTS, - IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::SET_EDGE_PROPERTIES, - IdentifierTag::REMOVED_VERTEX_PROPERTIES, IdentifierTag::REMOVED_EDGE_PROPERTIES, + IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::REMOVED_VERTEX_PROPERTIES, IdentifierTag::SET_VERTEX_LABELS, IdentifierTag::REMOVED_VERTEX_LABELS, IdentifierTag::UPDATED_VERTICES, - IdentifierTag::UPDATED_EDGES, IdentifierTag::UPDATED_OBJECTS); + IdentifierTag::SET_EDGE_PROPERTIES, IdentifierTag::REMOVED_EDGE_PROPERTIES, IdentifierTag::UPDATED_EDGES, + IdentifierTag::UPDATED_OBJECTS); case EventType::CREATE: - return TagsToIdentifiers(IdentifierTag::CREATED_OBJECTS); + return TagsToIdentifiers(IdentifierTag::CREATED_VERTICES, IdentifierTag::CREATED_EDGES, + IdentifierTag::CREATED_OBJECTS); case EventType::VERTEX_CREATE: return TagsToIdentifiers(IdentifierTag::CREATED_VERTICES); @@ -104,7 +105,8 @@ std::vector> GetPredefinedIdentifier return TagsToIdentifiers(IdentifierTag::CREATED_EDGES); case EventType::DELETE: - return TagsToIdentifiers(IdentifierTag::DELETED_OBJECTS); + return TagsToIdentifiers(IdentifierTag::DELETED_VERTICES, IdentifierTag::DELETED_EDGES, + IdentifierTag::DELETED_OBJECTS); case EventType::VERTEX_DELETE: return TagsToIdentifiers(IdentifierTag::DELETED_VERTICES); @@ -113,7 +115,11 @@ std::vector> GetPredefinedIdentifier return TagsToIdentifiers(IdentifierTag::DELETED_EDGES); case EventType::UPDATE: - return TagsToIdentifiers(IdentifierTag::UPDATED_OBJECTS); + return TagsToIdentifiers(IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::REMOVED_VERTEX_PROPERTIES, + IdentifierTag::SET_VERTEX_LABELS, IdentifierTag::REMOVED_VERTEX_LABELS, + IdentifierTag::UPDATED_VERTICES, IdentifierTag::SET_EDGE_PROPERTIES, + IdentifierTag::REMOVED_EDGE_PROPERTIES, IdentifierTag::UPDATED_EDGES, + IdentifierTag::UPDATED_OBJECTS); case EventType::VERTEX_UPDATE: return TagsToIdentifiers(IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::REMOVED_VERTEX_PROPERTIES, diff --git a/tests/unit/query_trigger.cpp b/tests/unit/query_trigger.cpp index 3a6ca4207..27eac69f6 100644 --- a/tests/unit/query_trigger.cpp +++ b/tests/unit/query_trigger.cpp @@ -988,19 +988,95 @@ TEST_F(TriggerStoreTest, AnyTriggerAllKeywords) { query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; using namespace std::literals; - const std::array keywords = { - "createdVertices"sv, "createdEdges"sv, "createdObjects"sv, - "deletedVertices"sv, "deletedEdges"sv, "deletedObjects"sv, - "setVertexProperties"sv, "setEdgeProperties"sv, "removedVertexProperties"sv, - "removedEdgeProperties"sv, "setVertexLabels"sv, "removedVertexLabels"sv, - "updatedVertices"sv, "updatedEdges"sv, "updatedObjects"sv, + + const auto created_vertices = "createdVertices"sv; + const auto created_edges = "createdEdges"sv; + const auto created_objects = "createdObjects"sv; + const auto deleted_vertices = "deletedVertices"sv; + const auto deleted_edges = "deletedEdges"sv; + const auto deleted_objects = "deletedObjects"sv; + const auto set_vertex_properties = "setVertexProperties"sv; + const auto set_edge_properties = "setEdgeProperties"sv; + const auto removed_vertex_properties = "removedVertexProperties"sv; + const auto removed_edge_properties = "removedEdgeProperties"sv; + const auto set_vertex_labels = "setVertexLabels"sv; + const auto removed_vertex_labels = "removedVertexLabels"sv; + const auto updated_vertices = "updatedVertices"sv; + const auto updated_edges = "updatedEdges"sv; + const auto updates_objects = "updatedObjects"sv; + + std::array event_types_to_test = { + std::make_pair(query::TriggerEventType::CREATE, std::vector{created_vertices, created_edges, created_objects}), + std::make_pair(query::TriggerEventType::VERTEX_CREATE, std::vector{created_vertices}), + std::make_pair(query::TriggerEventType::EDGE_CREATE, std::vector{created_edges}), + std::make_pair(query::TriggerEventType::UPDATE, + std::vector{ + set_vertex_properties, + set_edge_properties, + removed_vertex_properties, + removed_edge_properties, + set_vertex_labels, + removed_vertex_labels, + updated_vertices, + updated_edges, + updates_objects, + }), + std::make_pair(query::TriggerEventType::VERTEX_UPDATE, + std::vector{ + set_vertex_properties, + removed_vertex_properties, + set_vertex_labels, + removed_vertex_labels, + updated_vertices, + }), + std::make_pair(query::TriggerEventType::EDGE_UPDATE, + std::vector{ + set_edge_properties, + removed_edge_properties, + updated_edges, + }), + std::make_pair(query::TriggerEventType::DELETE, + std::vector{ + deleted_vertices, + deleted_edges, + deleted_objects, + }), + std::make_pair(query::TriggerEventType::VERTEX_DELETE, + std::vector{ + deleted_vertices, + }), + std::make_pair(query::TriggerEventType::EDGE_DELETE, + std::vector{ + deleted_edges, + }), + std::make_pair(query::TriggerEventType::ANY, + std::vector{ + created_vertices, + created_edges, + created_objects, + deleted_vertices, + deleted_edges, + deleted_objects, + set_vertex_properties, + set_edge_properties, + removed_vertex_properties, + removed_edge_properties, + set_vertex_labels, + removed_vertex_labels, + updated_vertices, + updated_edges, + updates_objects, + }), }; const auto trigger_name = "trigger"s; - - for (const auto keyword : keywords) { - ASSERT_NO_THROW(store.AddTrigger(trigger_name, fmt::format("RETURN {}", keyword), {}, query::TriggerEventType::ANY, - query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock)); - store.DropTrigger(trigger_name); + for (const auto &[event_type, keywords] : event_types_to_test) { + SCOPED_TRACE(query::TriggerEventTypeToString(event_type)); + for (const auto keyword : keywords) { + SCOPED_TRACE(keyword); + EXPECT_NO_THROW(store.AddTrigger(trigger_name, fmt::format("RETURN {}", keyword), {}, event_type, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock)); + store.DropTrigger(trigger_name); + } } } From 560eb04f67d8fd9f430cc102d217d17693235c97 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Wed, 26 May 2021 19:57:08 +0200 Subject: [PATCH 51/63] Small trigger fixes (#158) * Fix warning message * Update version * Run query callbacks only on pull * Use warn level for failure of loading a trigger --- CHANGELOG.md | 2 +- src/query/interpreter.cpp | 130 ++++++++++++++++++++------------------ src/query/interpreter.hpp | 8 +-- src/query/trigger.cpp | 21 ++++-- 4 files changed, 86 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e281fe50d..ef4fd5692 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Change Log -## Future +## v1.5.0 ### Major Feature and Improvements diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 67fa4b279..39432f716 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -26,6 +26,7 @@ #include "utils/event_counter.hpp" #include "utils/exceptions.hpp" #include "utils/flag_validation.hpp" +#include "utils/likely.hpp" #include "utils/logging.hpp" #include "utils/memory.hpp" #include "utils/memory_tracker.hpp" @@ -340,8 +341,8 @@ Callback HandleAuthQuery(AuthQuery *auth_query, AuthQueryHandler *auth, const Pa } } -Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler *handler, const Parameters ¶meters, - DbAccessor *db_accessor) { +Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters ¶meters, + InterpreterContext *interpreter_context, DbAccessor *db_accessor) { Frame frame(0); SymbolTable symbol_table; EvaluationContext evaluation_context; @@ -361,16 +362,17 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler * if (port.IsInt()) { maybe_port = port.ValueInt(); } - callback.fn = [handler, role = repl_query->role_, maybe_port] { - handler->SetReplicationRole(role, maybe_port); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, role = repl_query->role_, + maybe_port]() mutable { + handler.SetReplicationRole(role, maybe_port); return std::vector>(); }; return callback; } case ReplicationQuery::Action::SHOW_REPLICATION_ROLE: { callback.header = {"replication mode"}; - callback.fn = [handler] { - auto mode = handler->ShowReplicationRole(); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}] { + auto mode = handler.ShowReplicationRole(); switch (mode) { case ReplicationQuery::ReplicationRole::MAIN: { return std::vector>{{TypedValue("main")}}; @@ -393,24 +395,25 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler * } else if (timeout.IsInt()) { maybe_timeout = static_cast(timeout.ValueInt()); } - callback.fn = [handler, name, socket_address, sync_mode, maybe_timeout] { - handler->RegisterReplica(name, std::string(socket_address.ValueString()), sync_mode, maybe_timeout); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, name, socket_address, sync_mode, + maybe_timeout]() mutable { + handler.RegisterReplica(name, std::string(socket_address.ValueString()), sync_mode, maybe_timeout); return std::vector>(); }; return callback; } case ReplicationQuery::Action::DROP_REPLICA: { const auto &name = repl_query->replica_name_; - callback.fn = [handler, name] { - handler->DropReplica(name); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, name]() mutable { + handler.DropReplica(name); return std::vector>(); }; return callback; } case ReplicationQuery::Action::SHOW_REPLICAS: { callback.header = {"name", "socket_address", "sync_mode", "timeout"}; - callback.fn = [handler, replica_nfields = callback.header.size()] { - const auto &replicas = handler->ShowReplicas(); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, replica_nfields = callback.header.size()] { + const auto &replicas = handler.ShowReplicas(); auto typed_replicas = std::vector>{}; typed_replicas.reserve(replicas.size()); for (const auto &replica : replicas) { @@ -978,12 +981,15 @@ PreparedQuery PrepareReplicationQuery(ParsedQuery parsed_query, const bool in_ex } auto *replication_query = utils::Downcast(parsed_query.query); - ReplQueryHandler handler{interpreter_context->db}; - auto callback = HandleReplicationQuery(replication_query, &handler, parsed_query.parameters, dba); + auto callback = HandleReplicationQuery(replication_query, parsed_query.parameters, interpreter_context, dba); return PreparedQuery{callback.header, std::move(parsed_query.required_privileges), - [pull_plan = std::make_shared(callback.fn())]( - AnyStream *stream, std::optional n) -> std::optional { + [callback_fn = std::move(callback.fn), pull_plan = std::shared_ptr{nullptr}]( + AnyStream *stream, std::optional n) mutable -> std::optional { + if (UNLIKELY(!pull_plan)) { + pull_plan = std::make_shared(callback_fn()); + } + if (pull_plan->Pull(stream, n)) { return QueryHandlerResult::COMMIT; } @@ -1002,31 +1008,22 @@ PreparedQuery PrepareLockPathQuery(ParsedQuery parsed_query, const bool in_expli auto *lock_path_query = utils::Downcast(parsed_query.query); - Frame frame(0); - SymbolTable symbol_table; - EvaluationContext evaluation_context; - evaluation_context.timestamp = - std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) - .count(); - evaluation_context.parameters = parsed_query.parameters; - ExpressionEvaluator evaluator(&frame, symbol_table, evaluation_context, dba, storage::View::OLD); - - Callback callback; - switch (lock_path_query->action_) { - case LockPathQuery::Action::LOCK_PATH: - if (!interpreter_context->db->LockPath()) { - throw QueryRuntimeException("Failed to lock the data directory"); - } - break; - case LockPathQuery::Action::UNLOCK_PATH: - if (!interpreter_context->db->UnlockPath()) { - throw QueryRuntimeException("Failed to unlock the data directory"); - } - break; - } - - return PreparedQuery{callback.header, std::move(parsed_query.required_privileges), - [](AnyStream *stream, std::optional n) -> std::optional { + return PreparedQuery{{}, + std::move(parsed_query.required_privileges), + [interpreter_context, action = lock_path_query->action_]( + AnyStream *stream, std::optional n) -> std::optional { + switch (action) { + case LockPathQuery::Action::LOCK_PATH: + if (!interpreter_context->db->LockPath()) { + throw QueryRuntimeException("Failed to lock the data directory"); + } + break; + case LockPathQuery::Action::UNLOCK_PATH: + if (!interpreter_context->db->UnlockPath()) { + throw QueryRuntimeException("Failed to unlock the data directory"); + } + break; + } return QueryHandlerResult::COMMIT; }, RWType::NONE}; @@ -1038,14 +1035,14 @@ PreparedQuery PrepareFreeMemoryQuery(ParsedQuery parsed_query, const bool in_exp throw FreeMemoryModificationInMulticommandTxException(); } - interpreter_context->db->FreeMemory(); - - return PreparedQuery{{}, - std::move(parsed_query.required_privileges), - [](AnyStream *stream, std::optional n) -> std::optional { - return QueryHandlerResult::COMMIT; - }, - RWType::NONE}; + return PreparedQuery{ + {}, + std::move(parsed_query.required_privileges), + [interpreter_context](AnyStream *stream, std::optional n) -> std::optional { + interpreter_context->db->FreeMemory(); + return QueryHandlerResult::COMMIT; + }, + RWType::NONE}; } TriggerEventType ToTriggerEventType(const TriggerQuery::EventType event_type) { @@ -1085,19 +1082,24 @@ TriggerEventType ToTriggerEventType(const TriggerQuery::EventType event_type) { Callback CreateTrigger(TriggerQuery *trigger_query, const std::map &user_parameters, InterpreterContext *interpreter_context, DbAccessor *dba) { - return {{}, [trigger_query, interpreter_context, dba, &user_parameters]() -> std::vector> { - interpreter_context->trigger_store->AddTrigger( - trigger_query->trigger_name_, trigger_query->statement_, user_parameters, - ToTriggerEventType(trigger_query->event_type_), - trigger_query->before_commit_ ? TriggerPhase::BEFORE_COMMIT : TriggerPhase::AFTER_COMMIT, - &interpreter_context->ast_cache, dba, &interpreter_context->antlr_lock); - return {}; - }}; + return { + {}, + [trigger_name = std::move(trigger_query->trigger_name_), trigger_statement = std::move(trigger_query->statement_), + event_type = trigger_query->event_type_, before_commit = trigger_query->before_commit_, interpreter_context, dba, + user_parameters]() -> std::vector> { + interpreter_context->trigger_store->AddTrigger( + trigger_name, trigger_statement, user_parameters, ToTriggerEventType(event_type), + before_commit ? TriggerPhase::BEFORE_COMMIT : TriggerPhase::AFTER_COMMIT, &interpreter_context->ast_cache, + dba, &interpreter_context->antlr_lock); + return {}; + }}; } Callback DropTrigger(TriggerQuery *trigger_query, InterpreterContext *interpreter_context) { - return {{}, [trigger_query, interpreter_context]() -> std::vector> { - interpreter_context->trigger_store->DropTrigger(trigger_query->trigger_name_); + return {{}, + [trigger_name = std::move(trigger_query->trigger_name_), + interpreter_context]() -> std::vector> { + interpreter_context->trigger_store->DropTrigger(trigger_name); return {}; }}; } @@ -1143,11 +1145,13 @@ PreparedQuery PrepareTriggerQuery(ParsedQuery parsed_query, const bool in_explic } }(); - auto results = callback.fn(); - return PreparedQuery{std::move(callback.header), std::move(parsed_query.required_privileges), - [pull_plan = std::make_shared(std::move(results))]( - AnyStream *stream, std::optional n) -> std::optional { + [callback_fn = std::move(callback.fn), pull_plan = std::shared_ptr{nullptr}]( + AnyStream *stream, std::optional n) mutable -> std::optional { + if (UNLIKELY(!pull_plan)) { + pull_plan = std::make_shared(callback_fn()); + } + if (pull_plan->Pull(stream, n)) { return QueryHandlerResult::COMMIT; } diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index f443caf89..87bec040d 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -99,11 +99,11 @@ class ReplicationQueryHandler { ReplicationQueryHandler() = default; virtual ~ReplicationQueryHandler() = default; - ReplicationQueryHandler(const ReplicationQueryHandler &) = delete; - ReplicationQueryHandler &operator=(const ReplicationQueryHandler &) = delete; + ReplicationQueryHandler(const ReplicationQueryHandler &) = default; + ReplicationQueryHandler &operator=(const ReplicationQueryHandler &) = default; - ReplicationQueryHandler(ReplicationQueryHandler &&) = delete; - ReplicationQueryHandler &operator=(ReplicationQueryHandler &&) = delete; + ReplicationQueryHandler(ReplicationQueryHandler &&) = default; + ReplicationQueryHandler &operator=(ReplicationQueryHandler &&) = default; struct Replica { std::string name; diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 401161c98..401e43d50 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -240,38 +240,45 @@ TriggerStore::TriggerStore(std::filesystem::path directory, utils::SkipList(); if (!json_trigger_data["phase"].is_number_integer()) { - spdlog::debug("Invalid state of the trigger data"); + spdlog::warn(invalid_state_message); continue; } const auto phase = json_trigger_data["phase"].get(); if (!json_trigger_data["event_type"].is_number_integer()) { - spdlog::debug("Invalid state of the trigger data"); + spdlog::warn(invalid_state_message); continue; } const auto event_type = json_trigger_data["event_type"].get(); if (!json_trigger_data["user_parameters"].is_object()) { - spdlog::debug("Invalid state of the trigger data"); + spdlog::warn(invalid_state_message); continue; } const auto user_parameters = serialization::DeserializePropertyValueMap(json_trigger_data["user_parameters"]); @@ -280,7 +287,7 @@ TriggerStore::TriggerStore(std::filesystem::path directory, utils::SkipList Date: Wed, 2 Jun 2021 11:10:16 +0200 Subject: [PATCH 52/63] Remove louvain and connectivity modules (#160) --- query_modules/CMakeLists.txt | 5 - query_modules/connectivity/CMakeLists.txt | 18 - .../connectivity/src/connectivity_module.cpp | 131 ------- query_modules/louvain/CMakeLists.txt | 33 -- .../louvain/src/algorithms/algorithms.hpp | 18 - .../louvain/src/algorithms/louvain.cpp | 163 -------- .../louvain/src/data_structures/graph.cpp | 99 ----- .../louvain/src/data_structures/graph.hpp | 125 ------- query_modules/louvain/src/louvain_module.cpp | 228 ------------ query_modules/louvain/src/main.cpp | 28 -- query_modules/louvain/test/.clang-tidy | 80 ---- query_modules/louvain/test/CMakeLists.txt | 3 - .../louvain/test/unit/CMakeLists.txt | 28 -- query_modules/louvain/test/unit/graph.cpp | 349 ------------------ query_modules/louvain/test/unit/utils.cpp | 32 -- query_modules/louvain/test/unit/utils.hpp | 18 - 16 files changed, 1358 deletions(-) delete mode 100644 query_modules/connectivity/CMakeLists.txt delete mode 100644 query_modules/connectivity/src/connectivity_module.cpp delete mode 100644 query_modules/louvain/CMakeLists.txt delete mode 100644 query_modules/louvain/src/algorithms/algorithms.hpp delete mode 100644 query_modules/louvain/src/algorithms/louvain.cpp delete mode 100644 query_modules/louvain/src/data_structures/graph.cpp delete mode 100644 query_modules/louvain/src/data_structures/graph.hpp delete mode 100644 query_modules/louvain/src/louvain_module.cpp delete mode 100644 query_modules/louvain/src/main.cpp delete mode 100644 query_modules/louvain/test/.clang-tidy delete mode 100644 query_modules/louvain/test/CMakeLists.txt delete mode 100644 query_modules/louvain/test/unit/CMakeLists.txt delete mode 100644 query_modules/louvain/test/unit/graph.cpp delete mode 100644 query_modules/louvain/test/unit/utils.cpp delete mode 100644 query_modules/louvain/test/unit/utils.hpp diff --git a/query_modules/CMakeLists.txt b/query_modules/CMakeLists.txt index d7b6cef9a..7efddb2c6 100644 --- a/query_modules/CMakeLists.txt +++ b/query_modules/CMakeLists.txt @@ -35,8 +35,3 @@ install(FILES graph_analyzer.py DESTINATION lib/memgraph/query_modules) install(FILES mgp_networkx.py DESTINATION lib/memgraph/query_modules) install(FILES nxalg.py DESTINATION lib/memgraph/query_modules) install(FILES wcc.py DESTINATION lib/memgraph/query_modules) - -if (MG_ENTERPRISE) - add_subdirectory(louvain) - add_subdirectory(connectivity) -endif() diff --git a/query_modules/connectivity/CMakeLists.txt b/query_modules/connectivity/CMakeLists.txt deleted file mode 100644 index 185d747a5..000000000 --- a/query_modules/connectivity/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -set(MODULE src/connectivity_module.cpp) - -include_directories(src) - -add_library(connectivity SHARED ${MODULE}) -target_include_directories(connectivity PRIVATE ${CMAKE_SOURCE_DIR}/include) - -# Strip the library in release build. -string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) -if (lower_build_type STREQUAL "release") - add_custom_command(TARGET connectivity POST_BUILD - COMMAND strip -s $ - COMMENT "Stripping symbols and sections from connectivity module") -endif() - -install(PROGRAMS $ - DESTINATION lib/memgraph/query_modules - RENAME connectivity.so) diff --git a/query_modules/connectivity/src/connectivity_module.cpp b/query_modules/connectivity/src/connectivity_module.cpp deleted file mode 100644 index f87b34e2a..000000000 --- a/query_modules/connectivity/src/connectivity_module.cpp +++ /dev/null @@ -1,131 +0,0 @@ -#include "mg_procedure.h" - -#include -#include - -// Finds weakly connected components of a graph. -// Time complexity: O(|V|+|E|) -static void weak(const mgp_list *args, const mgp_graph *graph, - mgp_result *result, mgp_memory *memory) { - std::unordered_map vertex_component; - mgp_vertices_iterator *vertices_iterator = - mgp_graph_iter_vertices(graph, memory); - if (vertices_iterator == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - int64_t curr_component = 0; - for (const mgp_vertex *vertex = mgp_vertices_iterator_get(vertices_iterator); - vertex != nullptr; - vertex = mgp_vertices_iterator_next(vertices_iterator)) { - mgp_vertex_id vertex_id = mgp_vertex_get_id(vertex); - if (vertex_component.find(vertex_id.as_int) != vertex_component.end()) - continue; - - // run bfs from current vertex - std::queue q; - q.push(vertex_id.as_int); - vertex_component[vertex_id.as_int] = curr_component; - while (!q.empty()) { - mgp_vertex *v = mgp_graph_get_vertex_by_id(graph, {q.front()}, memory); - if (v == nullptr) { - mgp_vertices_iterator_destroy(vertices_iterator); - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - q.pop(); - - // iterate over inbound edges - mgp_edges_iterator *edges_iterator = mgp_vertex_iter_in_edges(v, memory); - if (edges_iterator == nullptr) { - mgp_vertex_destroy(v); - mgp_vertices_iterator_destroy(vertices_iterator); - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - for (const mgp_edge *edge = mgp_edges_iterator_get(edges_iterator); - edge != nullptr; edge = mgp_edges_iterator_next(edges_iterator)) { - mgp_vertex_id next_id = mgp_vertex_get_id(mgp_edge_get_from(edge)); - if (vertex_component.find(next_id.as_int) != vertex_component.end()) - continue; - vertex_component[next_id.as_int] = curr_component; - q.push(next_id.as_int); - } - - // iterate over outbound edges - mgp_edges_iterator_destroy(edges_iterator); - edges_iterator = mgp_vertex_iter_out_edges(v, memory); - if (edges_iterator == nullptr) { - mgp_vertex_destroy(v); - mgp_vertices_iterator_destroy(vertices_iterator); - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - for (const mgp_edge *edge = mgp_edges_iterator_get(edges_iterator); - edge != nullptr; edge = mgp_edges_iterator_next(edges_iterator)) { - mgp_vertex_id next_id = mgp_vertex_get_id(mgp_edge_get_to(edge)); - if (vertex_component.find(next_id.as_int) != vertex_component.end()) - continue; - vertex_component[next_id.as_int] = curr_component; - q.push(next_id.as_int); - } - - mgp_vertex_destroy(v); - mgp_edges_iterator_destroy(edges_iterator); - } - - ++curr_component; - } - - mgp_vertices_iterator_destroy(vertices_iterator); - - for (const auto &p : vertex_component) { - mgp_result_record *record = mgp_result_new_record(result); - if (record == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - mgp_value *mem_id_value = mgp_value_make_int(p.first, memory); - if (mem_id_value == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - mgp_value *comp_value = mgp_value_make_int(p.second, memory); - if (comp_value == nullptr) { - mgp_value_destroy(mem_id_value); - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - int mem_id_inserted = mgp_result_record_insert(record, "id", mem_id_value); - int comp_inserted = - mgp_result_record_insert(record, "component", comp_value); - - mgp_value_destroy(mem_id_value); - mgp_value_destroy(comp_value); - - if (!mem_id_inserted || !comp_inserted) { - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - } -} - -extern "C" int mgp_init_module(struct mgp_module *module, - struct mgp_memory *memory) { - struct mgp_proc *wcc_proc = - mgp_module_add_read_procedure(module, "weak", weak); - if (!mgp_proc_add_result(wcc_proc, "id", mgp_type_int())) return 1; - if (!mgp_proc_add_result(wcc_proc, "component", mgp_type_int())) return 1; - return 0; -} - -extern "C" int mgp_shutdown_module() { - return 0; -} diff --git a/query_modules/louvain/CMakeLists.txt b/query_modules/louvain/CMakeLists.txt deleted file mode 100644 index aff268e9b..000000000 --- a/query_modules/louvain/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -set(MAIN src/main.cpp) -set(MODULE src/louvain_module.cpp) -set(SOURCES src/algorithms/louvain.cpp - src/data_structures/graph.cpp) - -include_directories(src) - -add_library(louvain-core STATIC ${SOURCES}) -set_target_properties(louvain-core PROPERTIES POSITION_INDEPENDENT_CODE ON) - -add_executable(louvain-main ${MAIN}) -target_link_libraries(louvain-main louvain-core) - -enable_testing() -add_subdirectory(test) - -add_library(louvain SHARED ${MODULE}) -target_link_libraries(louvain louvain-core) -target_include_directories(louvain PRIVATE ${CMAKE_SOURCE_DIR}/include) - -# Strip the library in release build. -string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) -if (lower_build_type STREQUAL "release") - add_custom_command(TARGET louvain POST_BUILD - COMMAND strip -s $ - COMMENT "Stripping symbols and sections from louvain module") -endif() - -if (NOT MG_COMMUNITY) - install(PROGRAMS $ - DESTINATION lib/memgraph/query_modules - RENAME louvain.so) -endif() diff --git a/query_modules/louvain/src/algorithms/algorithms.hpp b/query_modules/louvain/src/algorithms/algorithms.hpp deleted file mode 100644 index 4aa1f4bb5..000000000 --- a/query_modules/louvain/src/algorithms/algorithms.hpp +++ /dev/null @@ -1,18 +0,0 @@ -/// @file -/// -/// The file contains function declarations of several community-detection -/// graph algorithms. - -#pragma once - -#include "data_structures/graph.hpp" - -namespace algorithms { - /// Detects communities of an unidrected, weighted graph using the Louvain - /// algorithm. The algorithm attempts to maximze the modularity of a weighted - /// graph. - /// - /// @param graph pointer to an undirected, weighted graph which may contain - /// self-loops. - void Louvain(comdata::Graph *graph); -} // namespace algorithms diff --git a/query_modules/louvain/src/algorithms/louvain.cpp b/query_modules/louvain/src/algorithms/louvain.cpp deleted file mode 100644 index 2be08385c..000000000 --- a/query_modules/louvain/src/algorithms/louvain.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "algorithms/algorithms.hpp" - -#include -#include -#include -#include - -namespace { - -void OptimizeLocally(comdata::Graph *graph) { - // We will consider local optimizations uniformly at random. - std::random_device rd; - std::mt19937 g(rd()); - std::vector p(graph->Size()); - std::iota(p.begin(), p.end(), 0); - std::shuffle(p.begin(), p.end(), g); - - // Modularity of a graph can be expressed as: - // - // Q = 1 / (2m) * sum_over_pairs_of_nodes[(Aij - ki * kj / 2m) * delta(ci, cj)] - // - // where m is the sum of all weights in the graph, - // Aij is the weight on edge that connects i and j (i=j for a self-loop) - // ki is the sum of weights incident to node i - // ci is the community of node i - // delta(a, b) is the Kronecker delta function. - // - // With some simple algebraic manipulations, we can transform the formula into: - // - // Q = sum_over_components[M * ((sum_over_pairs(Aij + M * ki * kj)))] = - // = sum_over_components[M * (sum_over_pairs(Aij) + M * sum_over_nodes^2(ki))] = - // = sum_over_components[M * (w_contrib(ci) + M * k_contrib^2(ci))] - // - // where M = 1 / (2m) - // - // Therefore, we could store for each community the following: - // * Weight contribution (w_contrib) - // * Weighted degree contribution (k_contrib) - // - // This allows us to efficiently remove a node from one community and insert - // it into a community of its neighbour without the need to recalculate - // modularity from scratch. - - std::unordered_map w_contrib; - std::unordered_map k_contrib; - - for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) { - k_contrib[graph->Community(node_id)] += graph->IncidentWeight(node_id); - for (const auto &neigh : graph->Neighbours(node_id)) { - uint32_t nxt_id = neigh.dest; - double w = neigh.weight; - if (graph->Community(node_id) == graph->Community(nxt_id)) - w_contrib[graph->Community(node_id)] += w; - } - } - - bool stable = false; - double total_w = graph->TotalWeight(); - - while (!stable) { - stable = true; - for (uint32_t node_id : p) { - std::unordered_map sum_w; - double self_loop = 0; - sum_w[graph->Community(node_id)] = 0; - for (const auto &neigh : graph->Neighbours(node_id)) { - uint32_t nxt_id = neigh.dest; - double weight = neigh.weight; - if (nxt_id == node_id) { - self_loop += weight; - continue; - } - sum_w[graph->Community(nxt_id)] += weight; - } - - uint32_t my_c = graph->Community(node_id); - - uint32_t best_c = my_c; - double best_dq = 0; - - for (const auto &p : sum_w) { - if (p.first == my_c) continue; - uint32_t nxt_c = p.first; - double dq = 0; - - // contributions before swap (dq = d_after - d_before) - for (uint32_t c : {my_c, nxt_c}) - dq -= w_contrib[c] - k_contrib[c] * k_contrib[c] / (2.0 * total_w); - - // leave the current community - dq += (w_contrib[my_c] - 2.0 * sum_w[my_c] - self_loop) - - (k_contrib[my_c] - graph->IncidentWeight(node_id)) * - (k_contrib[my_c] - graph->IncidentWeight(node_id)) / - (2.0 * total_w); - - // join a new community - dq += (w_contrib[nxt_c] + 2.0 * sum_w[nxt_c] + self_loop) - - (k_contrib[nxt_c] + graph->IncidentWeight(node_id)) * - (k_contrib[nxt_c] + graph->IncidentWeight(node_id)) / - (2.0 * total_w); - - if (dq > best_dq) { - best_dq = dq; - best_c = nxt_c; - } - } - - if (best_c != my_c) { - graph->SetCommunity(node_id, best_c); - w_contrib[my_c] -= 2.0 * sum_w[my_c] + self_loop; - k_contrib[my_c] -= graph->IncidentWeight(node_id); - w_contrib[best_c] += 2.0 * sum_w[best_c] + self_loop; - k_contrib[best_c] += graph->IncidentWeight(node_id); - stable = false; - } - } - } -} - -} // anonymous namespace - -namespace algorithms { - -void Louvain(comdata::Graph *graph) { - OptimizeLocally(graph); - - // Collapse the locally optimized graph. - uint32_t collapsed_nodes = graph->NormalizeCommunities(); - if (collapsed_nodes == graph->Size()) return; - comdata::Graph collapsed_graph(collapsed_nodes); - std::map, double> collapsed_edges; - - for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) { - std::unordered_map edges; - for (const auto &neigh : graph->Neighbours(node_id)) { - uint32_t nxt_id = neigh.dest; - double weight = neigh.weight; - if (graph->Community(nxt_id) < graph->Community(node_id)) continue; - edges[graph->Community(nxt_id)] += weight; - } - for (const auto &neigh : edges) { - uint32_t a = std::min(graph->Community(node_id), neigh.first); - uint32_t b = std::max(graph->Community(node_id), neigh.first); - collapsed_edges[{a, b}] += neigh.second; - } - } - - for (const auto &p : collapsed_edges) - collapsed_graph.AddEdge(p.first.first, p.first.second, p.second); - - // Repeat until no local optimizations can be found. - Louvain(&collapsed_graph); - - // Propagate results from collapsed graph. - for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) { - graph->SetCommunity(node_id, - collapsed_graph.Community(graph->Community(node_id))); - } - - graph->NormalizeCommunities(); -} - -} // namespace algorithms diff --git a/query_modules/louvain/src/data_structures/graph.cpp b/query_modules/louvain/src/data_structures/graph.cpp deleted file mode 100644 index 8f4f16217..000000000 --- a/query_modules/louvain/src/data_structures/graph.cpp +++ /dev/null @@ -1,99 +0,0 @@ -#include "data_structures/graph.hpp" - -#include -#include -#include -#include -#include -#include - -namespace comdata { - -Graph::Graph(uint32_t n_nodes) : n_nodes_(n_nodes), total_w_(0) { - adj_list_.resize(n_nodes, {}); - inc_w_.resize(n_nodes, 0); - - // each node starts as its own separate community. - community_.resize(n_nodes); - std::iota(community_.begin(), community_.end(), 0); -} - -uint32_t Graph::Size() const { return n_nodes_; } - -uint32_t Graph::Community(uint32_t node) const { return community_.at(node); } - -void Graph::SetCommunity(uint32_t node, uint32_t c) { community_.at(node) = c; } - -uint32_t Graph::NormalizeCommunities() { - std::set c_id(community_.begin(), community_.end()); - std::unordered_map cmap; - uint32_t id = 0; - for (uint32_t c : c_id) { - cmap[c] = id; - ++id; - } - for (uint32_t node_id = 0; node_id < n_nodes_; ++node_id) - community_[node_id] = cmap[community_[node_id]]; - return id; -} - -void Graph::AddEdge(uint32_t node1, uint32_t node2, double weight) { - if (node1 >= n_nodes_ || node2 >= n_nodes_) - throw std::out_of_range("Node index out of range"); - if (weight <= 0) throw std::out_of_range("Weights must be positive"); - if (edges_.find({node1, node2}) != edges_.end()) - throw std::invalid_argument("Edge already exists"); - - edges_.emplace(node1, node2); - edges_.emplace(node2, node1); - - total_w_ += weight; - - adj_list_[node1].emplace_back(node2, weight); - inc_w_[node1] += weight; - - if (node1 != node2) { - adj_list_[node2].emplace_back(node1, weight); - inc_w_[node2] += weight; - } -} - -uint32_t Graph::Degree(uint32_t node) const { - return static_cast(adj_list_.at(node).size()); -} - -double Graph::IncidentWeight(uint32_t node) const { return inc_w_.at(node); } - -double Graph::TotalWeight() const { return total_w_; } - -double Graph::Modularity() const { - double ret = 0; - // Since all weights should be positive, this implies that our graph has - // no edges. - if (total_w_ == 0) return 0; - - std::unordered_map weight_c; - std::unordered_map degree_c; - - for (uint32_t i = 0; i < n_nodes_; ++i) { - degree_c[Community(i)] += IncidentWeight(i); - for (const auto &neigh : adj_list_[i]) { - uint32_t j = neigh.dest; - double w = neigh.weight; - if (Community(i) != Community(j)) continue; - weight_c[Community(i)] += w; - } - } - - for (const auto &p : degree_c) - ret += weight_c[p.first] - (p.second * p.second) / (2 * total_w_); - - ret /= 2 * total_w_; - return ret; -} - -const std::vector &Graph::Neighbours(uint32_t node) const { - return adj_list_.at(node); -} - -} // namespace comdata diff --git a/query_modules/louvain/src/data_structures/graph.hpp b/query_modules/louvain/src/data_structures/graph.hpp deleted file mode 100644 index a818fd2fd..000000000 --- a/query_modules/louvain/src/data_structures/graph.hpp +++ /dev/null @@ -1,125 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include - -namespace comdata { - -struct Neighbour { - uint32_t dest; - double weight; - Neighbour(uint32_t d, double w) : dest(d), weight(w) {} -}; - -/// Class which models a weighted, undirected graph with necessary -/// functionalities for community detection algorithms. -class Graph { -public: - /// Constructs a new graph with a given number of nodes and no edges between - /// them. - /// - /// The implementation assumes (and enforces) that all nodes - /// are indexed from 0 to n_nodes. - /// - /// @param n_nodes Number of nodes in the graph. - explicit Graph(uint32_t n_nodes); - - /// @return number of nodes in the graph. - uint32_t Size() const; - - /// Adds a bidirectional, weighted edge to the graph between the given - /// nodes. If both given nodes are the same, the method inserts a weighted - /// self-loop. - /// - /// There should be no edges between the given nodes when before invoking - /// this method. - /// - /// @param node1 index of an incident node. - /// @param node2 index of an incident node. - /// @param weight real value which represents the weight of the edge. - /// - /// @throw std::out_of_range - /// @throw std::invalid_argument - void AddEdge(uint32_t node1, uint32_t node2, double weight); - - /// @param node index of node. - /// - /// @return community where the node belongs to. - /// - /// @throw std::out_of_range - uint32_t Community(uint32_t node) const; - - /// Adds a given node to a given community. - /// - /// @param node index of node. - /// @param c community where the given node should go in. - /// - /// @throw std::out_of_range - void SetCommunity(uint32_t node, uint32_t c); - - /// Normalizes the values of communities. More precisely, after invoking this - /// method communities will be indexed by successive integers starting from 0. - /// - /// Note: this method is computationally expensive and takes O(|V|) - /// time, i.e., it traverses all nodes in the graph. - /// - /// @return number of communities in the graph - uint32_t NormalizeCommunities(); - - /// Returns the number of incident edges to a given node. Self-loops - /// contribute a single edge to the degree. - /// - /// @param node index of node. - /// - /// @return degree of given node. - /// - /// @throw std::out_of_range - uint32_t Degree(uint32_t node) const; - - /// Returns the total weight of incident edges to a given node. Weight - /// of a self loop contributes once to the total sum. - /// - /// @param node index of node. - /// - /// @return total incident weight of a given node. - /// - /// @throw std::out_of_range - double IncidentWeight(uint32_t node) const; - - /// @return total weight of all edges in a graph. - double TotalWeight() const; - - /// Calculates the modularity of the graph which is defined as a real value - /// between -1 and 1 that measures the density of links inside communities - /// compared to links between communities. - /// - /// Note: this method is computationally expensive and takes O(|V| + |E|) - /// time, i.e., it traverses the entire graph. - /// - /// @return modularity of the graph. - double Modularity() const; - - /// Returns nodes adjacent to a given node. - /// - /// @param node index of node. - /// - /// @return list of neighbouring nodes. - /// - /// @throw std::out_of_range - const std::vector& Neighbours(uint32_t node) const; - -private: - uint32_t n_nodes_; - double total_w_; - - std::vector> adj_list_; - std::set> edges_; - - std::vector inc_w_; - std::vector community_; -}; - -} // namespace comdata diff --git a/query_modules/louvain/src/louvain_module.cpp b/query_modules/louvain/src/louvain_module.cpp deleted file mode 100644 index 110c79bdc..000000000 --- a/query_modules/louvain/src/louvain_module.cpp +++ /dev/null @@ -1,228 +0,0 @@ -#include "mg_procedure.h" - -#include -#include -#include - -#include "algorithms/algorithms.hpp" -#include "data_structures/graph.hpp" - -namespace { - -std::optional> NormalizeVertexIds( - const mgp_graph *graph, mgp_result *result, mgp_memory *memory) { - std::unordered_map mem_to_louv_id; - mgp_vertices_iterator *vertices_iterator = - mgp_graph_iter_vertices(graph, memory); - if (vertices_iterator == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return std::nullopt; - } - - uint32_t louv_id = 0; - for (const mgp_vertex *vertex = mgp_vertices_iterator_get(vertices_iterator); - vertex != nullptr; - vertex = mgp_vertices_iterator_next(vertices_iterator)) { - mgp_vertex_id mem_id = mgp_vertex_get_id(vertex); - mem_to_louv_id[mem_id.as_int] = louv_id; - ++louv_id; - } - - mgp_vertices_iterator_destroy(vertices_iterator); - return mem_to_louv_id; -} - -std::optional RunLouvain( - const mgp_graph *graph, mgp_result *result, mgp_memory *memory, - const std::unordered_map &mem_to_louv_id) { - comdata::Graph louvain_graph(mem_to_louv_id.size()); - // Extract the graph structure - // TODO(ipaljak): consider filtering nodes and edges by labels. - for (const auto &p : mem_to_louv_id) { - mgp_vertex *vertex = mgp_graph_get_vertex_by_id(graph, {p.first}, memory); - if (!vertex) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return std::nullopt; - } - - // iterate over inbound edges. This is enough because we will eventually - // iterate over outbound edges in another direction. - mgp_edges_iterator *edges_iterator = - mgp_vertex_iter_in_edges(vertex, memory); - if (edges_iterator == nullptr) { - mgp_vertex_destroy(vertex); - mgp_result_set_error_msg(result, "Not enough memory!"); - return std::nullopt; - } - - for (const mgp_edge *edge = mgp_edges_iterator_get(edges_iterator); - edge != nullptr; edge = mgp_edges_iterator_next(edges_iterator)) { - const mgp_vertex *next_vertex = mgp_edge_get_from(edge); - mgp_vertex_id next_mem_id = mgp_vertex_get_id(next_vertex); - uint32_t next_louv_id; - try { - next_louv_id = mem_to_louv_id.at(next_mem_id.as_int); - } catch (const std::exception &e) { - const auto msg = std::string("[Internal error] ") + e.what(); - mgp_result_set_error_msg(result, msg.c_str()); - return std::nullopt; - } - - // retrieve edge weight (default to 1) - mgp_value *weight_prop = mgp_edge_get_property(edge, "weight", memory); - if (!weight_prop) { - mgp_vertex_destroy(vertex); - mgp_edges_iterator_destroy(edges_iterator); - mgp_result_set_error_msg(result, "Not enough memory"); - } - - double weight = 1; - if (mgp_value_is_double(weight_prop)) - weight = mgp_value_get_double(weight_prop); - if (mgp_value_is_int(weight_prop)) - weight = static_cast(mgp_value_get_int(weight_prop)); - - mgp_value_destroy(weight_prop); - - try { - louvain_graph.AddEdge(p.second, next_louv_id, weight); - } catch (const std::exception &e) { - mgp_vertex_destroy(vertex); - mgp_edges_iterator_destroy(edges_iterator); - mgp_result_set_error_msg(result, e.what()); - return std::nullopt; - } - } - - mgp_vertex_destroy(vertex); - mgp_edges_iterator_destroy(edges_iterator); - } - - try { - algorithms::Louvain(&louvain_graph); - } catch (const std::exception &e) { - const auto msg = std::string("[Internal error] ") + e.what(); - mgp_result_set_error_msg(result, msg.c_str()); - return std::nullopt; - } - - return louvain_graph; -} - -void communities(const mgp_list *args, const mgp_graph *graph, - mgp_result *result, mgp_memory *memory) { - try { - // Normalize vertex ids - auto mem_to_louv_id = NormalizeVertexIds(graph, result, memory); - if (!mem_to_louv_id) return; - - // Run louvain - auto louvain_graph = RunLouvain(graph, result, memory, *mem_to_louv_id); - if (!louvain_graph) return; - - // Return node ids and their corresponding communities. - for (const auto &p : *mem_to_louv_id) { - mgp_result_record *record = mgp_result_new_record(result); - if (record == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - mgp_value *mem_id_value = mgp_value_make_int(p.first, memory); - if (mem_id_value == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - mgp_value *com_value = - mgp_value_make_int(louvain_graph->Community(p.second), memory); - if (com_value == nullptr) { - mgp_value_destroy(mem_id_value); - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - int mem_id_inserted = - mgp_result_record_insert(record, "id", mem_id_value); - int com_inserted = - mgp_result_record_insert(record, "community", com_value); - - mgp_value_destroy(mem_id_value); - mgp_value_destroy(com_value); - - if (!mem_id_inserted || !com_inserted) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - } - } catch (const std::exception &e) { - mgp_result_set_error_msg(result, e.what()); - return; - } -} - -void modularity(const mgp_list *args, const mgp_graph *graph, - mgp_result *result, mgp_memory *memory) { - try { - // Normalize vertex ids - auto mem_to_louv_id = NormalizeVertexIds(graph, result, memory); - if (!mem_to_louv_id) return; - - // Run louvain - auto louvain_graph = RunLouvain(graph, result, memory, *mem_to_louv_id); - if (!louvain_graph) return; - - // Return graph modularity after Louvain - // TODO(ipaljak) - consider allowing the user to specify seed communities - // and - // yield modularity values both before and after running - // louvain. - mgp_result_record *record = mgp_result_new_record(result); - if (record == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - mgp_value *modularity_value = - mgp_value_make_double(louvain_graph->Modularity(), memory); - if (modularity_value == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - int value_inserted = - mgp_result_record_insert(record, "modularity", modularity_value); - - mgp_value_destroy(modularity_value); - - if (!value_inserted) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - } catch (const std::exception &e) { - mgp_result_set_error_msg(result, e.what()); - return; - } -} - -} // namespace - -extern "C" int mgp_init_module(struct mgp_module *module, - struct mgp_memory *memory) { - struct mgp_proc *community_proc = - mgp_module_add_read_procedure(module, "communities", communities); - if (!community_proc) return 1; - if (!mgp_proc_add_result(community_proc, "id", mgp_type_int())) return 1; - if (!mgp_proc_add_result(community_proc, "community", mgp_type_int())) - return 1; - - struct mgp_proc *modularity_proc = - mgp_module_add_read_procedure(module, "modularity", modularity); - if (!modularity_proc) return 1; - if (!mgp_proc_add_result(modularity_proc, "modularity", mgp_type_float())) - return 1; - - return 0; -} - -extern "C" int mgp_shutdown_module() { return 0; } diff --git a/query_modules/louvain/src/main.cpp b/query_modules/louvain/src/main.cpp deleted file mode 100644 index 297efe767..000000000 --- a/query_modules/louvain/src/main.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include - -#include "algorithms/algorithms.hpp" -#include "data_structures/graph.hpp" - -// A simple program that reads the graph from STDIN and -// outputs the detected communities from louvain along with -// its modularity measure on STDOUT. -int main() { - int n; - int m; - std::cin >> n >> m; - comdata::Graph graph(n); - for (int i = 0; i < m; ++i) { - int a; - int b; - double c; - std::cin >> a >> b >> c; - graph.AddEdge(a, b, c); - } - - algorithms::Louvain(&graph); - - for (int i = 0; i < n; ++i) - std::cout << i << " " << graph.Community(i) << "\n"; - std::cout << graph.Modularity() << "\n"; - return 0; -} diff --git a/query_modules/louvain/test/.clang-tidy b/query_modules/louvain/test/.clang-tidy deleted file mode 100644 index 559bdb744..000000000 --- a/query_modules/louvain/test/.clang-tidy +++ /dev/null @@ -1,80 +0,0 @@ ---- -Checks: '*, - -android-*, - -cert-err58-cpp, - -cppcoreguidelines-avoid-c-arrays, - -cppcoreguidelines-avoid-goto, - -cppcoreguidelines-avoid-magic-numbers, - -cppcoreguidelines-macro-usage, - -cppcoreguidelines-no-malloc, - -cppcoreguidelines-non-private-member-variables-in-classes, - -cppcoreguidelines-owning-memory, - -cppcoreguidelines-pro-bounds-array-to-pointer-decay, - -cppcoreguidelines-pro-bounds-constant-array-index, - -cppcoreguidelines-pro-bounds-pointer-arithmetic, - -cppcoreguidelines-pro-type-member-init, - -cppcoreguidelines-pro-type-reinterpret-cast, - -cppcoreguidelines-pro-type-static-cast-downcast, - -cppcoreguidelines-pro-type-union-access, - -cppcoreguidelines-pro-type-vararg, - -cppcoreguidelines-special-member-functions, - -fuchsia-default-arguments, - -fuchsia-default-arguments-calls, - -fuchsia-default-arguments-declarations, - -fuchsia-overloaded-operator, - -fuchsia-statically-constructed-objects, - -fuchsia-trailing-return, - -fuchsia-virtual-inheritance, - -google-explicit-constructor, - -google-readability-*, - -hicpp-avoid-c-arrays, - -hicpp-avoid-goto, - -hicpp-braces-around-statements, - -hicpp-member-init, - -hicpp-no-array-decay, - -hicpp-no-assembler, - -hicpp-no-malloc, - -hicpp-special-member-functions, - -hicpp-use-equals-default, - -hicpp-vararg, - -llvm-header-guard, - -misc-non-private-member-variables-in-classes, - -misc-unused-parameters, - -modernize-avoid-c-arrays, - -modernize-concat-nested-namespaces, - -modernize-pass-by-value, - -modernize-use-equals-default, - -modernize-use-nodiscard, - -modernize-use-trailing-return-type, - -performance-unnecessary-value-param, - -readability-braces-around-statements, - -readability-else-after-return, - -readability-implicit-bool-conversion, - -readability-magic-numbers, - -readability-named-parameter' -WarningsAsErrors: '' -HeaderFilterRegex: '' -AnalyzeTemporaryDtors: false -FormatStyle: none -CheckOptions: - - key: google-readability-braces-around-statements.ShortStatementLines - value: '1' - - key: google-readability-function-size.StatementThreshold - value: '800' - - key: google-readability-namespace-comments.ShortNamespaceLines - value: '10' - - key: google-readability-namespace-comments.SpacesBeforeComments - value: '2' - - key: modernize-loop-convert.MaxCopySize - value: '16' - - key: modernize-loop-convert.MinConfidence - value: reasonable - - key: modernize-loop-convert.NamingStyle - value: CamelCase - - key: modernize-pass-by-value.IncludeStyle - value: llvm - - key: modernize-replace-auto-ptr.IncludeStyle - value: llvm - - key: modernize-use-nullptr.NullMacros - value: 'NULL' -... diff --git a/query_modules/louvain/test/CMakeLists.txt b/query_modules/louvain/test/CMakeLists.txt deleted file mode 100644 index eadc4f19a..000000000 --- a/query_modules/louvain/test/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -include_directories(${GTEST_INCLUDE_DIR}) - -add_subdirectory(unit) diff --git a/query_modules/louvain/test/unit/CMakeLists.txt b/query_modules/louvain/test/unit/CMakeLists.txt deleted file mode 100644 index 778cd21d2..000000000 --- a/query_modules/louvain/test/unit/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -set(test_prefix louvain__unit__) - -add_custom_target(louvain__unit) - -add_library(louvain-test STATIC utils.cpp) -set_target_properties(louvain-test PROPERTIES POSITION_INDEPENDENT_CODE ON) - -function(add_unit_test test_cpp) - # get exec name (remove extension from the abs path) - get_filename_component(exec_name ${test_cpp} NAME_WE) - set(target_name ${test_prefix}${exec_name}) - add_executable(${target_name} ${test_cpp}) - # OUTPUT_NAME sets the real name of a target when it is built and can be - # used to help create two targets of the same name even though CMake - # requires unique logical target names - set_target_properties(${target_name} PROPERTIES OUTPUT_NAME ${exec_name}) - # TODO: this is a temporary workaround the test build warnings - target_compile_options(${target_name} PRIVATE -Wno-comment -Wno-sign-compare - -Wno-unused-variable) - target_link_libraries(${target_name} spdlog gflags gtest gtest_main Threads::Threads - louvain-core louvain-test) - # register test - add_test(${target_name} ${exec_name}) - # add to unit target - add_dependencies(louvain__unit ${target_name}) -endfunction(add_unit_test) - -add_unit_test(graph.cpp) diff --git a/query_modules/louvain/test/unit/graph.cpp b/query_modules/louvain/test/unit/graph.cpp deleted file mode 100644 index 989e8cafc..000000000 --- a/query_modules/louvain/test/unit/graph.cpp +++ /dev/null @@ -1,349 +0,0 @@ -#include - -#include "data_structures/graph.hpp" -#include "utils.hpp" - -// Checks if commmunities of nodes in graph correspond to a given community -// vector. -bool CommunityCheck(const comdata::Graph &graph, - const std::vector &c) { - if (graph.Size() != c.size()) return false; - for (uint32_t node_id = 0; node_id < graph.Size(); ++node_id) - if (graph.Community(node_id) != c[node_id]) return false; - return true; -} - -// Checks if degrees of nodes in graph correspond to a given degree vector. -bool DegreeCheck(const comdata::Graph &graph, - const std::vector °) { - if (graph.Size() != deg.size()) return false; - for (uint32_t node_id = 0; node_id < graph.Size(); ++node_id) - if (graph.Degree(node_id) != deg[node_id]) return false; - return true; -} - -// Checks if incident weights of nodes in graph correspond to a given weight -// vector. -bool IncidentWeightCheck(const comdata::Graph &graph, - const std::vector &inc_w) { - if (graph.Size() != inc_w.size()) return false; - for (uint32_t node_id = 0; node_id < graph.Size(); ++node_id) - if (std::abs(graph.IncidentWeight(node_id) - inc_w[node_id]) > 1e-6) - return false; - return true; -} - -// Sets communities of nodes in graph. Returns true on success. -bool SetCommunities(comdata::Graph *graph, const std::vector &c) { - if (graph->Size() != c.size()) return false; - for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) - graph->SetCommunity(node_id, c[node_id]); - return true; -} - -TEST(Graph, Constructor) { - uint32_t nodes = 100; - comdata::Graph graph(nodes); - ASSERT_EQ(graph.Size(), nodes); - for (uint32_t node_id = 0; node_id < nodes; ++node_id) { - ASSERT_EQ(graph.IncidentWeight(node_id), 0); - ASSERT_EQ(graph.Community(node_id), node_id); - } -} - -TEST(Graph, Size) { - comdata::Graph graph1 = GenRandomUnweightedGraph(0, 0); - comdata::Graph graph2 = GenRandomUnweightedGraph(42, 41); - comdata::Graph graph3 = GenRandomUnweightedGraph(100, 250); - ASSERT_EQ(graph1.Size(), 0); - ASSERT_EQ(graph2.Size(), 42); - ASSERT_EQ(graph3.Size(), 100); -} - -TEST(Graph, Communities) { - comdata::Graph graph = GenRandomUnweightedGraph(100, 250); - - for (int i = 0; i < 100; ++i) graph.SetCommunity(i, i % 5); - for (int i = 0; i < 100; ++i) ASSERT_EQ(graph.Community(i), i % 5); - - // Try to set communities on non-existing nodes - EXPECT_THROW({ graph.SetCommunity(100, 2); }, std::out_of_range); - EXPECT_THROW({ graph.SetCommunity(150, 0); }, std::out_of_range); - - // Try to get a the community of a non-existing node - EXPECT_THROW({ graph.Community(100); }, std::out_of_range); - EXPECT_THROW({ graph.Community(150); }, std::out_of_range); -} - -TEST(Graph, CommunityNormalization) { - // Communities are already normalized. - comdata::Graph graph = GenRandomUnweightedGraph(5, 10); - std::vector init_c = {0, 2, 1, 3, 4}; - std::vector final_c = {0, 2, 1, 3, 4}; - ASSERT_TRUE(SetCommunities(&graph, init_c)); - graph.NormalizeCommunities(); - ASSERT_TRUE(CommunityCheck(graph, final_c)); - - // Each node in its own community. - graph = GenRandomUnweightedGraph(5, 10); - init_c = {20, 30, 10, 40, 50}; - final_c = {1, 2, 0, 3, 4}; - ASSERT_TRUE(SetCommunities(&graph, init_c)); - graph.NormalizeCommunities(); - ASSERT_TRUE(CommunityCheck(graph, final_c)); - - // Multiple nodes in the same community - graph = GenRandomUnweightedGraph(7, 10); - init_c = {13, 99, 13, 13, 1, 99, 1}; - final_c = {1, 2, 1, 1, 0, 2, 0}; - ASSERT_TRUE(SetCommunities(&graph, init_c)); - graph.NormalizeCommunities(); - ASSERT_TRUE(CommunityCheck(graph, final_c)); -} - -TEST(Graph, AddEdge) { - comdata::Graph graph = GenRandomUnweightedGraph(5, 0); - - // Node out of bounds. - EXPECT_THROW({ graph.AddEdge(1, 5, 7); }, std::out_of_range); - - // Repeated edge - graph.AddEdge(1, 2, 1); - EXPECT_THROW({ graph.AddEdge(1, 2, 7); }, std::invalid_argument); - - // Non-positive edge weight - EXPECT_THROW({ graph.AddEdge(2, 3, -7); }, std::out_of_range); - EXPECT_THROW({ graph.AddEdge(3, 4, 0); }, std::out_of_range); -} - -TEST(Graph, Degrees) { - // Graph without edges - comdata::Graph graph = GenRandomUnweightedGraph(5, 0); - std::vector deg = {0, 0, 0, 0, 0}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Chain - // (0)--(1)--(2)--(3)--(4) - graph = BuildGraph(5, {{0, 1, 1}, {1, 2, 1}, {2, 3, 1}, {3, 4, 1}}); - deg = {1, 2, 2, 2, 1}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Tree - // (0)--(3) - // / \ - // (1) (2) - // | / \ - // (4) (5) (6) - graph = BuildGraph( - 7, {{0, 1, 1}, {0, 2, 1}, {0, 3, 1}, {1, 4, 1}, {2, 5, 1}, {2, 6, 1}}); - deg = {3, 2, 3, 1, 1, 1, 1}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Graph without self-loops - // (0)--(1) - // | \ | \ - // | \ | \ - // (2)--(3)-(4) - graph = BuildGraph(5, {{0, 1, 1}, - {0, 2, 1}, - {0, 3, 1}, - {1, 3, 1}, - {1, 4, 1}, - {2, 3, 1}, - {3, 4, 1}}); - deg = {3, 3, 2, 4, 2}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Graph with self loop [*nodes have self loops] - // (0)--(1*) - // | \ | \ - // | \ | \ - // (2*)--(3)-(4*) - graph = BuildGraph(5, {{0, 1, 1}, - {0, 2, 1}, - {0, 3, 1}, - {1, 3, 1}, - {1, 4, 1}, - {2, 3, 1}, - {3, 4, 1}, - {1, 1, 1}, - {2, 2, 2}, - {4, 4, 4}}); - deg = {3, 4, 3, 4, 3}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Try to get degree of non-existing nodes - EXPECT_THROW({ graph.Degree(5); }, std::out_of_range); - EXPECT_THROW({ graph.Degree(100); }, std::out_of_range); -} - -TEST(Graph, Weights) { - // Graph without edges - comdata::Graph graph = GenRandomUnweightedGraph(5, 0); - std::vector inc_w = {0, 0, 0, 0, 0}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - ASSERT_EQ(graph.TotalWeight(), 0); - - // Chain - // (0)--(1)--(2)--(3)--(4) - graph = BuildGraph(5, {{0, 1, 0.1}, {1, 2, 0.5}, {2, 3, 2.3}, {3, 4, 4.2}}); - inc_w = {0.1, 0.6, 2.8, 6.5, 4.2}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - ASSERT_NEAR(graph.TotalWeight(), 7.1, 1e-6); - - // Tree - // (0)--(3) - // / \ - // (1) (2) - // | / \ - // (4) (5) (6) - graph = BuildGraph(7, {{0, 1, 1.3}, - {0, 2, 0.2}, - {0, 3, 1}, - {1, 4, 3.2}, - {2, 5, 4.2}, - {2, 6, 0.7}}); - inc_w = {2.5, 4.5, 5.1, 1, 3.2, 4.2, 0.7}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - EXPECT_NEAR(graph.TotalWeight(), 10.6, 1e-6); - - // Graph without self-loops - // (0)--(1) - // | \ | \ - // | \ | \ - // (2)--(3)-(4) - graph = BuildGraph(5, {{0, 1, 0.1}, - {0, 2, 0.2}, - {0, 3, 0.3}, - {1, 3, 0.4}, - {1, 4, 0.5}, - {2, 3, 0.6}, - {3, 4, 0.7}}); - inc_w = {0.6, 1, 0.8, 2, 1.2}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - EXPECT_NEAR(graph.TotalWeight(), 2.8, 1e-6); - - // Graph with self loop [*nodes have self loops] - // (0)--(1*) - // | \ | \ - // | \ | \ - // (2*)--(3)-(4*) - graph = BuildGraph(5, {{0, 1, 0.1}, - {0, 2, 0.2}, - {0, 3, 0.3}, - {1, 3, 0.4}, - {1, 4, 0.5}, - {2, 3, 0.6}, - {3, 4, 0.7}, - {1, 1, 0.8}, - {2, 2, 0.9}, - {4, 4, 1}}); - inc_w = {0.6, 1.8, 1.7, 2, 2.2}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - EXPECT_NEAR(graph.TotalWeight(), 5.5, 1e-6); - - // Try to get incident weight of non-existing node - EXPECT_THROW({ graph.IncidentWeight(5); }, std::out_of_range); - EXPECT_THROW({ graph.IncidentWeight(100); }, std::out_of_range); -} - -TEST(Graph, Modularity) { - // Graph without edges - comdata::Graph graph = GenRandomUnweightedGraph(5, 0); - ASSERT_EQ(graph.Modularity(), 0); - - // Chain - // (0)--(1)--(2)--(3)--(4) - graph = BuildGraph(5, {{0, 1, 0.1}, {1, 2, 0.5}, {2, 3, 2.3}, {3, 4, 4.2}}); - std::vector c = {0, 1, 1, 2, 2}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.036798254314620096, 1e-6); - - // Tree - // (0)--(3) - // / \ - // (1) (2) - // | / \ - // (4) (5) (6) - graph = BuildGraph(7, {{0, 1, 1.3}, - {0, 2, 0.2}, - {0, 3, 1}, - {1, 4, 3.2}, - {2, 5, 4.2}, - {2, 6, 0.7}}); - c = {0, 0, 1, 0, 0, 1, 2}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.4424617301530794, 1e-6); - - // Graph without self-loops - // (0)--(1) - // | \ | \ - // | \ | \ - // (2)--(3)-(4) - graph = BuildGraph(5, {{0, 1, 0.1}, - {0, 2, 0.2}, - {0, 3, 0.3}, - {1, 3, 0.4}, - {1, 4, 0.5}, - {2, 3, 0.6}, - {3, 4, 0.7}}); - c = {0, 1, 1, 1, 1}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), -0.022959183673469507, 1e-6); - - // Graph with self loop [*nodes have self loops] - // (0)--(1*) - // | \ | \ - // | \ | \ - // (2*)--(3)-(4*) - graph = BuildGraph(5, {{0, 1, 0.1}, - {0, 2, 0.2}, - {0, 3, 0.3}, - {1, 3, 0.4}, - {1, 4, 0.5}, - {2, 3, 0.6}, - {3, 4, 0.7}, - {1, 1, 0.8}, - {2, 2, 0.9}, - {4, 4, 1}}); - c = {0, 0, 0, 0, 1}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.188842975206611, 1e-6); - - // Neo4j example graph - // (0)--(1)---(3)--(4) - // \ / \ / - // (2) (5) - graph = BuildGraph(6, {{0, 1, 1}, - {1, 2, 1}, - {0, 2, 1}, - {1, 3, 1}, - {3, 5, 1}, - {5, 4, 1}, - {3, 4, 1}}); - c = {0, 0, 0, 1, 1, 1}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.3571428571428571, 1e-6); - - // Example graph from wikipedia - // (0)--(1)--(3)--(4)--(5) - // \ / | \ / - // (2) (7) (6) - // / \ - // (8)--(9) - graph = BuildGraph(10, {{0, 1, 1}, - {1, 2, 1}, - {0, 2, 1}, - {1, 3, 1}, - {3, 4, 1}, - {4, 5, 1}, - {5, 6, 1}, - {6, 4, 1}, - {3, 7, 1}, - {7, 8, 1}, - {7, 9, 1}, - {8, 9, 1}}); - c = {0, 0, 0, 0, 1, 1, 1, 2, 2, 2}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.4896, 1e-4); -} diff --git a/query_modules/louvain/test/unit/utils.cpp b/query_modules/louvain/test/unit/utils.cpp deleted file mode 100644 index 681d7e1e8..000000000 --- a/query_modules/louvain/test/unit/utils.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "utils.hpp" - -#include - -comdata::Graph BuildGraph( - uint32_t nodes, std::vector> edges) { - comdata::Graph G(nodes); - for (auto &edge : edges) - G.AddEdge(std::get<0>(edge), std::get<1>(edge), std::get<2>(edge)); - return G; -} - -comdata::Graph GenRandomUnweightedGraph(uint32_t nodes, uint32_t edges) { - auto seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); - std::mt19937 rng(seed); - std::uniform_int_distribution dist(0, nodes - 1); - std::set> E; - for (uint32_t i = 0; i < edges; ++i) { - int u; - int v; - do { - u = dist(rng); - v = dist(rng); - if (u > v) std::swap(u, v); - } while (u == v || E.find({u, v, 1}) != E.end()); - E.insert({u, v, 1}); - } - return BuildGraph(nodes, std::vector>( - E.begin(), E.end())); -} - diff --git a/query_modules/louvain/test/unit/utils.hpp b/query_modules/louvain/test/unit/utils.hpp deleted file mode 100644 index f7fba53b2..000000000 --- a/query_modules/louvain/test/unit/utils.hpp +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "data_structures/graph.hpp" - -/// Builds the graph from a given number of nodes and a list of edges. -/// Nodes should be 0-indexed and each edge should be provided only once. -comdata::Graph BuildGraph( - uint32_t nodes, std::vector> edges); - -/// Generates random undirected graph with a given number of nodes and edges. -/// The generated graph is not picked out of a uniform distribution. All weights -/// are the same and equal to one. -comdata::Graph GenRandomUnweightedGraph(uint32_t nodes, uint32_t edges); From 50f6e348dc909b32dcb7d4afc07f9ed04746cbf2 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Mon, 7 Jun 2021 10:40:56 +0200 Subject: [PATCH 53/63] Disable google-runtime-references clang-tidy check (#166) --- .clang-tidy | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-tidy b/.clang-tidy index 7a9d32294..5e357feba 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -26,6 +26,7 @@ Checks: '*, -fuchsia-virtual-inheritance, -google-explicit-constructor, -google-readability-*, + -google-runtime-references, -hicpp-avoid-c-arrays, -hicpp-avoid-goto, -hicpp-braces-around-statements, From 524acb17a191b404c988efd42b2eb010876076d2 Mon Sep 17 00:00:00 2001 From: Kostas Kyrimis Date: Mon, 7 Jun 2021 15:45:05 +0300 Subject: [PATCH 54/63] Add global version allocators for C in query modules (#162) --- include/mg_procedure.h | 133 ++++++++---------- src/query/procedure/mg_procedure_impl.cpp | 37 +++-- src/query/procedure/module.cpp | 3 + src/query/procedure/module.hpp | 4 + tests/e2e/memory/CMakeLists.txt | 9 ++ .../e2e/memory/memory_limit_global_alloc.cpp | 26 ++++ .../memory/memory_limit_global_alloc_proc.cpp | 31 ++++ tests/e2e/memory/procedures/CMakeLists.txt | 5 + .../memory/procedures/global_memory_limit.c | 36 +++++ .../procedures/global_memory_limit_proc.c | 63 +++++++++ tests/e2e/memory/workloads.yaml | 10 ++ tests/e2e/runner.py | 4 + 12 files changed, 279 insertions(+), 82 deletions(-) create mode 100644 tests/e2e/memory/memory_limit_global_alloc.cpp create mode 100644 tests/e2e/memory/memory_limit_global_alloc_proc.cpp create mode 100644 tests/e2e/memory/procedures/CMakeLists.txt create mode 100644 tests/e2e/memory/procedures/global_memory_limit.c create mode 100644 tests/e2e/memory/procedures/global_memory_limit_proc.c diff --git a/include/mg_procedure.h b/include/mg_procedure.h index 2a2567aba..3b0168bcb 100644 --- a/include/mg_procedure.h +++ b/include/mg_procedure.h @@ -17,10 +17,11 @@ extern "C" { /// addition to efficiency, Memgraph can set the limit on allowed allocations /// thus providing some safety with regards to memory usage. The allocated /// memory is only valid during the execution of mgp_main. You must not allocate -/// global resources with these functions. None of the functions are +/// global resources with these functions and none of the functions are /// thread-safe, because we provide a single thread of execution when invoking a -/// custom procedure. This allows Memgraph to be more efficient as stated -/// before. +/// custom procedure. For allocating global resources, you can use the _global +/// variations of the aforementioned allocators. This allows Memgraph to be +/// more efficient as explained before. ///@{ /// Provides memory managament access and state. @@ -39,8 +40,7 @@ void *mgp_alloc(struct mgp_memory *memory, size_t size_in_bytes); /// `alignment` must be a power of 2 value. /// The returned pointer must be freed with mgp_free. /// NULL is returned if unable to serve the requested allocation. -void *mgp_aligned_alloc(struct mgp_memory *memory, size_t size_in_bytes, - size_t alignment); +void *mgp_aligned_alloc(struct mgp_memory *memory, size_t size_in_bytes, size_t alignment); /// Deallocate an allocation from mgp_alloc or mgp_aligned_alloc. /// Unlike free, this function is not thread-safe. @@ -48,6 +48,26 @@ void *mgp_aligned_alloc(struct mgp_memory *memory, size_t size_in_bytes, /// The behavior is undefined if `ptr` is not a value returned from a prior /// mgp_alloc or mgp_aligned_alloc call with the corresponding `memory`. void mgp_free(struct mgp_memory *memory, void *ptr); + +/// Allocate a global block of memory with given size in bytes. +/// This function can be used to allocate global memory that persists +/// beyond a single invocation of mgp_main. +/// The returned pointer must be freed with mgp_global_free. +/// NULL is returned if unable to serve the requested allocation. +void *mgp_global_alloc(size_t size_in_bytes); + +/// Allocate an aligned global block of memory with given size in bytes. +/// This function can be used to allocate global memory that persists +/// beyond a single invocation of mgp_main. +/// The returned pointer must be freed with mgp_global_free. +/// NULL is returned if unable to serve the requested allocation. +void *mgp_global_aligned_alloc(size_t size_in_bytes, size_t alignment); + +/// Deallocate an allocation from mgp_global_alloc or mgp_global_aligned_alloc. +/// If `ptr` is NULL, this function does nothing. +/// The behavior is undefined if `ptr` is not a value returned from a prior +/// mgp_global_alloc() or mgp_global_aligned_alloc(). +void mgp_global_free(void *p); ///@} /// @name Operations on mgp_value @@ -119,8 +139,7 @@ struct mgp_value *mgp_value_make_double(double val, struct mgp_memory *memory); /// Construct a character string value from a NULL terminated string. /// You need to free the instance through mgp_value_destroy. /// NULL is returned if unable to allocate a mgp_value. -struct mgp_value *mgp_value_make_string(const char *val, - struct mgp_memory *memory); +struct mgp_value *mgp_value_make_string(const char *val, struct mgp_memory *memory); /// Create a mgp_value storing a mgp_list. /// You need to free the instance through mgp_value_destroy. The ownership of @@ -238,8 +257,7 @@ const struct mgp_path *mgp_value_get_path(const struct mgp_value *val); /// of mgp_value, but it will not contain any elements. Therefore, /// mgp_list_size will return 0. /// NULL is returned if unable to allocate a new list. -struct mgp_list *mgp_list_make_empty(size_t capacity, - struct mgp_memory *memory); +struct mgp_list *mgp_list_make_empty(size_t capacity, struct mgp_memory *memory); /// Free the memory used by the given mgp_list and contained elements. void mgp_list_destroy(struct mgp_list *list); @@ -288,8 +306,7 @@ void mgp_map_destroy(struct mgp_map *map); /// you still need to free their memory explicitly. /// Return non-zero on success, or 0 if there's no memory to insert a new /// mapping or a previous mapping already exists. -int mgp_map_insert(struct mgp_map *map, const char *key, - const struct mgp_value *value); +int mgp_map_insert(struct mgp_map *map, const char *key, const struct mgp_value *value); /// Return the number of items stored in mgp_map. size_t mgp_map_size(const struct mgp_map *map); @@ -314,8 +331,7 @@ struct mgp_map_items_iterator; /// The returned mgp_map_items_iterator needs to be deallocated with /// mgp_map_items_iterator_destroy. /// NULL is returned if unable to allocate a new iterator. -struct mgp_map_items_iterator *mgp_map_iter_items(const struct mgp_map *map, - struct mgp_memory *memory); +struct mgp_map_items_iterator *mgp_map_iter_items(const struct mgp_map *map, struct mgp_memory *memory); /// Deallocate memory used by mgp_map_items_iterator. void mgp_map_items_iterator_destroy(struct mgp_map_items_iterator *it); @@ -328,27 +344,23 @@ void mgp_map_items_iterator_destroy(struct mgp_map_items_iterator *it); /// as the value before, and use them after invoking /// mgp_map_items_iterator_next. /// NULL is returned if the end of the iteration has been reached. -const struct mgp_map_item *mgp_map_items_iterator_get( - const struct mgp_map_items_iterator *it); +const struct mgp_map_item *mgp_map_items_iterator_get(const struct mgp_map_items_iterator *it); /// Advance the iterator to the next item stored in map and return it. /// The previous pointer obtained through mgp_map_items_iterator_get will /// be invalidated, but the pointers to key and value will remain valid. /// NULL is returned if the end of the iteration has been reached. -const struct mgp_map_item *mgp_map_items_iterator_next( - struct mgp_map_items_iterator *it); +const struct mgp_map_item *mgp_map_items_iterator_next(struct mgp_map_items_iterator *it); /// Create a path with the copy of the given starting vertex. /// You need to free the created instance with mgp_path_destroy. /// NULL is returned if unable to allocate a path. -struct mgp_path *mgp_path_make_with_start(const struct mgp_vertex *vertex, - struct mgp_memory *memory); +struct mgp_path *mgp_path_make_with_start(const struct mgp_vertex *vertex, struct mgp_memory *memory); /// Copy a mgp_path. /// Returned pointer must be freed with mgp_path_destroy. /// NULL is returned if unable to allocate a mgp_path. -struct mgp_path *mgp_path_copy(const struct mgp_path *path, - struct mgp_memory *memory); +struct mgp_path *mgp_path_copy(const struct mgp_path *path, struct mgp_memory *memory); /// Free the memory used by the given mgp_path and contained vertices and edges. void mgp_path_destroy(struct mgp_path *path); @@ -370,14 +382,12 @@ size_t mgp_path_size(const struct mgp_path *path); /// Return the vertex from a path at given index. /// The valid index range is [0, mgp_path_size]. /// NULL is returned if index is out of range. -const struct mgp_vertex *mgp_path_vertex_at(const struct mgp_path *path, - size_t index); +const struct mgp_vertex *mgp_path_vertex_at(const struct mgp_path *path, size_t index); /// Return the edge from a path at given index. /// The valid index range is [0, mgp_path_size - 1]. /// NULL is returned if index is out of range. -const struct mgp_edge *mgp_path_edge_at(const struct mgp_path *path, - size_t index); +const struct mgp_edge *mgp_path_edge_at(const struct mgp_path *path, size_t index); /// Return non-zero if given paths are equal, otherwise 0. int mgp_path_equal(const struct mgp_path *p1, const struct mgp_path *p2); @@ -408,9 +418,7 @@ struct mgp_result_record *mgp_result_new_record(struct mgp_result *res); /// Return 0 if there's no memory to copy the mgp_value to mgp_result_record or /// if the combination of `field_name` and `val` does not satisfy the /// procedure's result signature. -int mgp_result_record_insert(struct mgp_result_record *record, - const char *field_name, - const struct mgp_value *val); +int mgp_result_record_insert(struct mgp_result_record *record, const char *field_name, const struct mgp_value *val); ///@} /// @name Graph Constructs @@ -446,15 +454,13 @@ struct mgp_property { /// When the mgp_properties_iterator_next is invoked, the previous /// mgp_property is invalidated and its value must not be used. /// NULL is returned if the end of the iteration has been reached. -const struct mgp_property *mgp_properties_iterator_get( - const struct mgp_properties_iterator *it); +const struct mgp_property *mgp_properties_iterator_get(const struct mgp_properties_iterator *it); /// Advance the iterator to the next property and return it. /// The previous mgp_property obtained through mgp_properties_iterator_get /// will be invalidated, and you must not use its value. /// NULL is returned if the end of the iteration has been reached. -const struct mgp_property *mgp_properties_iterator_next( - struct mgp_properties_iterator *it); +const struct mgp_property *mgp_properties_iterator_next(struct mgp_properties_iterator *it); /// Iterator over edges of a vertex. struct mgp_edges_iterator; @@ -475,8 +481,7 @@ struct mgp_vertex_id mgp_vertex_get_id(const struct mgp_vertex *v); /// Copy a mgp_vertex. /// Returned pointer must be freed with mgp_vertex_destroy. /// NULL is returned if unable to allocate a mgp_vertex. -struct mgp_vertex *mgp_vertex_copy(const struct mgp_vertex *v, - struct mgp_memory *memory); +struct mgp_vertex *mgp_vertex_copy(const struct mgp_vertex *v, struct mgp_memory *memory); /// Free the memory used by a mgp_vertex. void mgp_vertex_destroy(struct mgp_vertex *v); @@ -495,43 +500,37 @@ struct mgp_label mgp_vertex_label_at(const struct mgp_vertex *v, size_t index); int mgp_vertex_has_label(const struct mgp_vertex *v, struct mgp_label label); /// Return non-zero if the given vertex has a label with given name. -int mgp_vertex_has_label_named(const struct mgp_vertex *v, - const char *label_name); +int mgp_vertex_has_label_named(const struct mgp_vertex *v, const char *label_name); /// Get a copy of a vertex property mapped to a given name. /// Returned value must be freed with mgp_value_destroy. /// NULL is returned if unable to allocate a mgp_value. -struct mgp_value *mgp_vertex_get_property(const struct mgp_vertex *v, - const char *property_name, +struct mgp_value *mgp_vertex_get_property(const struct mgp_vertex *v, const char *property_name, struct mgp_memory *memory); /// Start iterating over properties stored in the given vertex. /// The returned mgp_properties_iterator needs to be deallocated with /// mgp_properties_iterator_destroy. /// NULL is returned if unable to allocate a new iterator. -struct mgp_properties_iterator *mgp_vertex_iter_properties( - const struct mgp_vertex *v, struct mgp_memory *memory); +struct mgp_properties_iterator *mgp_vertex_iter_properties(const struct mgp_vertex *v, struct mgp_memory *memory); /// Start iterating over inbound edges of the given vertex. /// The returned mgp_edges_iterator needs to be deallocated with /// mgp_edges_iterator_destroy. /// NULL is returned if unable to allocate a new iterator. -struct mgp_edges_iterator *mgp_vertex_iter_in_edges(const struct mgp_vertex *v, - struct mgp_memory *memory); +struct mgp_edges_iterator *mgp_vertex_iter_in_edges(const struct mgp_vertex *v, struct mgp_memory *memory); /// Start iterating over outbound edges of the given vertex. /// The returned mgp_edges_iterator needs to be deallocated with /// mgp_edges_iterator_destroy. /// NULL is returned if unable to allocate a new iterator. -struct mgp_edges_iterator *mgp_vertex_iter_out_edges(const struct mgp_vertex *v, - struct mgp_memory *memory); +struct mgp_edges_iterator *mgp_vertex_iter_out_edges(const struct mgp_vertex *v, struct mgp_memory *memory); /// Get the current edge pointed to by the iterator. /// When the mgp_edges_iterator_next is invoked, the previous /// mgp_edge is invalidated and its value must not be used. /// NULL is returned if the end of the iteration has been reached. -const struct mgp_edge *mgp_edges_iterator_get( - const struct mgp_edges_iterator *it); +const struct mgp_edge *mgp_edges_iterator_get(const struct mgp_edges_iterator *it); /// Advance the iterator to the next edge and return it. /// The previous mgp_edge obtained through mgp_edges_iterator_get @@ -552,8 +551,7 @@ struct mgp_edge_id mgp_edge_get_id(const struct mgp_edge *e); /// Copy a mgp_edge. /// Returned pointer must be freed with mgp_edge_destroy. /// NULL is returned if unable to allocate a mgp_edge. -struct mgp_edge *mgp_edge_copy(const struct mgp_edge *e, - struct mgp_memory *memory); +struct mgp_edge *mgp_edge_copy(const struct mgp_edge *e, struct mgp_memory *memory); /// Free the memory used by a mgp_edge. void mgp_edge_destroy(struct mgp_edge *e); @@ -573,16 +571,13 @@ const struct mgp_vertex *mgp_edge_get_to(const struct mgp_edge *e); /// Get a copy of a edge property mapped to a given name. /// Returned value must be freed with mgp_value_destroy. /// NULL is returned if unable to allocate a mgp_value. -struct mgp_value *mgp_edge_get_property(const struct mgp_edge *e, - const char *property_name, - struct mgp_memory *memory); +struct mgp_value *mgp_edge_get_property(const struct mgp_edge *e, const char *property_name, struct mgp_memory *memory); /// Start iterating over properties stored in the given edge. /// The returned mgp_properties_iterator needs to be deallocated with /// mgp_properties_iterator_destroy. /// NULL is returned if unable to allocate a new iterator. -struct mgp_properties_iterator *mgp_edge_iter_properties( - const struct mgp_edge *e, struct mgp_memory *memory); +struct mgp_properties_iterator *mgp_edge_iter_properties(const struct mgp_edge *e, struct mgp_memory *memory); /// State of the graph database. struct mgp_graph; @@ -590,8 +585,7 @@ struct mgp_graph; /// Return the vertex corresponding to given ID. /// The returned vertex must be freed using mgp_vertex_destroy. /// NULL is returned if unable to allocate the vertex or if ID is not valid. -struct mgp_vertex *mgp_graph_get_vertex_by_id(const struct mgp_graph *g, - struct mgp_vertex_id id, +struct mgp_vertex *mgp_graph_get_vertex_by_id(const struct mgp_graph *g, struct mgp_vertex_id id, struct mgp_memory *memory); /// Iterator over vertices. @@ -604,22 +598,19 @@ void mgp_vertices_iterator_destroy(struct mgp_vertices_iterator *it); /// The returned mgp_vertices_iterator needs to be deallocated with /// mgp_vertices_iterator_destroy. /// NULL is returned if unable to allocate a new iterator. -struct mgp_vertices_iterator *mgp_graph_iter_vertices( - const struct mgp_graph *g, struct mgp_memory *memory); +struct mgp_vertices_iterator *mgp_graph_iter_vertices(const struct mgp_graph *g, struct mgp_memory *memory); /// Get the current vertex pointed to by the iterator. /// When the mgp_vertices_iterator_next is invoked, the previous /// mgp_vertex is invalidated and its value must not be used. /// NULL is returned if the end of the iteration has been reached. -const struct mgp_vertex *mgp_vertices_iterator_get( - const struct mgp_vertices_iterator *it); +const struct mgp_vertex *mgp_vertices_iterator_get(const struct mgp_vertices_iterator *it); /// Advance the iterator to the next vertex and return it. /// The previous mgp_vertex obtained through mgp_vertices_iterator_get /// will be invalidated, and you must not use its value. /// NULL is returned if the end of the iteration has been reached. -const struct mgp_vertex *mgp_vertices_iterator_next( - struct mgp_vertices_iterator *it); +const struct mgp_vertex *mgp_vertices_iterator_next(struct mgp_vertices_iterator *it); ///@} /// @name Type System @@ -718,8 +709,8 @@ struct mgp_proc; /// Passed in arguments will not live longer than the callback's execution. /// Therefore, you must not store them globally or use the passed in mgp_memory /// to allocate global resources. -typedef void (*mgp_proc_cb)(const struct mgp_list *, const struct mgp_graph *, - struct mgp_result *, struct mgp_memory *); +typedef void (*mgp_proc_cb)(const struct mgp_list *, const struct mgp_graph *, struct mgp_result *, + struct mgp_memory *); /// Register a read-only procedure with a module. /// @@ -730,9 +721,7 @@ typedef void (*mgp_proc_cb)(const struct mgp_list *, const struct mgp_graph *, /// /// NULL is returned if unable to allocate memory for mgp_proc; if `name` is /// not valid or a procedure with the same name was already registered. -struct mgp_proc *mgp_module_add_read_procedure(struct mgp_module *module, - const char *name, - mgp_proc_cb cb); +struct mgp_proc *mgp_module_add_read_procedure(struct mgp_module *module, const char *name, mgp_proc_cb cb); /// Add a required argument to a procedure. /// @@ -748,8 +737,7 @@ struct mgp_proc *mgp_module_add_read_procedure(struct mgp_module *module, /// 0 is returned if unable to allocate memory for an argument; if invoking this /// function after setting an optional argument or if `name` is not valid. /// Non-zero is returned on success. -int mgp_proc_add_arg(struct mgp_proc *proc, const char *name, - const struct mgp_type *type); +int mgp_proc_add_arg(struct mgp_proc *proc, const char *name, const struct mgp_type *type); /// Add an optional argument with a default value to a procedure. /// @@ -772,8 +760,7 @@ int mgp_proc_add_arg(struct mgp_proc *proc, const char *name, /// 0 is returned if unable to allocate memory for an argument; if `name` is /// not valid or `default_value` does not satisfy `type`. Non-zero is returned /// on success. -int mgp_proc_add_opt_arg(struct mgp_proc *proc, const char *name, - const struct mgp_type *type, +int mgp_proc_add_opt_arg(struct mgp_proc *proc, const char *name, const struct mgp_type *type, const struct mgp_value *default_value); /// Add a result field to a procedure. @@ -787,15 +774,13 @@ int mgp_proc_add_opt_arg(struct mgp_proc *proc, const char *name, /// 0 is returned if unable to allocate memory for a result field; if /// `name` is not valid or if a result field with the same name was already /// added. Non-zero is returned on success. -int mgp_proc_add_result(struct mgp_proc *proc, const char *name, - const struct mgp_type *type); +int mgp_proc_add_result(struct mgp_proc *proc, const char *name, const struct mgp_type *type); /// Add a result field to a procedure and mark it as deprecated. /// /// This is the same as mgp_proc_add_result, but the result field will be marked /// as deprecated. -int mgp_proc_add_deprecated_result(struct mgp_proc *proc, const char *name, - const struct mgp_type *type); +int mgp_proc_add_deprecated_result(struct mgp_proc *proc, const char *name, const struct mgp_type *type); ///@} /// @name Execution diff --git a/src/query/procedure/mg_procedure_impl.cpp b/src/query/procedure/mg_procedure_impl.cpp index 98e7d66aa..e65f9437a 100644 --- a/src/query/procedure/mg_procedure_impl.cpp +++ b/src/query/procedure/mg_procedure_impl.cpp @@ -6,21 +6,20 @@ #include #include +#include "module.hpp" #include "utils/algorithm.hpp" #include "utils/logging.hpp" #include "utils/math.hpp" +#include "utils/memory.hpp" #include "utils/string.hpp" - // This file contains implementation of top level C API functions, but this is // all actually part of query::procedure. So use that namespace for simplicity. // NOLINTNEXTLINE(google-build-using-namespace) using namespace query::procedure; -void *mgp_alloc(mgp_memory *memory, size_t size_in_bytes) { - return mgp_aligned_alloc(memory, size_in_bytes, alignof(std::max_align_t)); -} +namespace { -void *mgp_aligned_alloc(mgp_memory *memory, const size_t size_in_bytes, const size_t alignment) { +void *MgpAlignedAllocImpl(utils::MemoryResource &memory, const size_t size_in_bytes, const size_t alignment) { if (size_in_bytes == 0U || !utils::IsPow2(alignment)) return nullptr; // Simplify alignment by always using values greater or equal to max_align. const size_t alloc_align = std::max(alignment, alignof(std::max_align_t)); @@ -37,7 +36,7 @@ void *mgp_aligned_alloc(mgp_memory *memory, const size_t size_in_bytes, const si const size_t alloc_size = bytes_for_header + size_in_bytes; if (alloc_size < size_in_bytes) return nullptr; try { - void *ptr = memory->impl->Allocate(alloc_size, alloc_align); + void *ptr = memory.Allocate(alloc_size, alloc_align); char *data = reinterpret_cast(ptr) + bytes_for_header; std::memcpy(data - sizeof(size_in_bytes), &size_in_bytes, sizeof(size_in_bytes)); std::memcpy(data - sizeof(size_in_bytes) - sizeof(alloc_align), &alloc_align, sizeof(alloc_align)); @@ -47,7 +46,7 @@ void *mgp_aligned_alloc(mgp_memory *memory, const size_t size_in_bytes, const si } } -void mgp_free(mgp_memory *memory, void *const p) { +void MgpFreeImpl(utils::MemoryResource &memory, void *const p) { if (!p) return; char *const data = reinterpret_cast(p); // Read the header containing size & alignment info. @@ -63,9 +62,31 @@ void mgp_free(mgp_memory *memory, void *const p) { const size_t alloc_size = bytes_for_header + size_in_bytes; // Get the original ptr we allocated. void *const original_ptr = data - bytes_for_header; - memory->impl->Deallocate(original_ptr, alloc_size, alloc_align); + memory.Deallocate(original_ptr, alloc_size, alloc_align); } +} // namespace + +void *mgp_alloc(mgp_memory *memory, size_t size_in_bytes) { + return mgp_aligned_alloc(memory, size_in_bytes, alignof(std::max_align_t)); +} + +void *mgp_aligned_alloc(mgp_memory *memory, const size_t size_in_bytes, const size_t alignment) { + return MgpAlignedAllocImpl(*memory->impl, size_in_bytes, alignment); +} + +void mgp_free(mgp_memory *memory, void *const p) { MgpFreeImpl(*memory->impl, p); } + +void *mgp_global_alloc(size_t size_in_bytes) { + return mgp_global_aligned_alloc(size_in_bytes, alignof(std::max_align_t)); +} + +void *mgp_global_aligned_alloc(size_t size_in_bytes, size_t alignment) { + return MgpAlignedAllocImpl(gModuleRegistry.GetSharedMemoryResource(), size_in_bytes, alignment); +} + +void mgp_global_free(void *const p) { MgpFreeImpl(gModuleRegistry.GetSharedMemoryResource(), p); } + namespace { // May throw whatever the constructor of U throws. `std::bad_alloc` is handled diff --git a/src/query/procedure/module.cpp b/src/query/procedure/module.cpp index 99bc54dc9..a0c9a4103 100644 --- a/src/query/procedure/module.cpp +++ b/src/query/procedure/module.cpp @@ -1,4 +1,5 @@ #include "query/procedure/module.hpp" +#include "utils/memory.hpp" extern "C" { #include @@ -478,6 +479,8 @@ void ModuleRegistry::UnloadAllModules() { DoUnloadAllModules(); } +utils::MemoryResource &ModuleRegistry::GetSharedMemoryResource() { return *shared_; } + std::optional> FindProcedure( const ModuleRegistry &module_registry, const std::string_view &fully_qualified_procedure_name, utils::MemoryResource *memory) { diff --git a/src/query/procedure/module.hpp b/src/query/procedure/module.hpp index cdae588c8..0a96c7d83 100644 --- a/src/query/procedure/module.hpp +++ b/src/query/procedure/module.hpp @@ -52,6 +52,7 @@ class ModulePtr final { class ModuleRegistry final { std::map, std::less<>> modules_; mutable utils::RWLock lock_{utils::RWLock::Priority::WRITE}; + std::unique_ptr shared_{std::make_unique()}; bool RegisterModule(const std::string_view &name, std::unique_ptr module); @@ -96,6 +97,9 @@ class ModuleRegistry final { /// Takes a write lock. void UnloadAllModules(); + /// Returns the shared memory allocator used by modules + utils::MemoryResource &GetSharedMemoryResource(); + private: std::vector modules_dirs_; }; diff --git a/tests/e2e/memory/CMakeLists.txt b/tests/e2e/memory/CMakeLists.txt index 4e258f61a..95f3a145c 100644 --- a/tests/e2e/memory/CMakeLists.txt +++ b/tests/e2e/memory/CMakeLists.txt @@ -1,2 +1,11 @@ +add_subdirectory(procedures) + add_executable(memgraph__e2e__memory__control memory_control.cpp) target_link_libraries(memgraph__e2e__memory__control gflags mgclient mg-utils mg-io Threads::Threads) + +add_executable(memgraph__e2e__memory__limit_global_alloc memory_limit_global_alloc.cpp) +target_link_libraries(memgraph__e2e__memory__limit_global_alloc gflags mgclient mg-utils mg-io Threads::Threads) + +add_executable(memgraph__e2e__memory__limit_global_alloc_proc memory_limit_global_alloc_proc.cpp) +target_link_libraries(memgraph__e2e__memory__limit_global_alloc_proc gflags mgclient mg-utils mg-io Threads::Threads) + diff --git a/tests/e2e/memory/memory_limit_global_alloc.cpp b/tests/e2e/memory/memory_limit_global_alloc.cpp new file mode 100644 index 000000000..0d2094fea --- /dev/null +++ b/tests/e2e/memory/memory_limit_global_alloc.cpp @@ -0,0 +1,26 @@ +#include +#include + +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); +DEFINE_uint64(timeout, 120, "Timeout seconds"); + +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph E2E Memory Limit For Global Allocators"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + if (!client) { + LOG_FATAL("Failed to connect!"); + } + + bool result = client->Execute("CALL libglobal_memory_limit.procedure() YIELD *"); + MG_ASSERT(result == false); + return 0; +} diff --git a/tests/e2e/memory/memory_limit_global_alloc_proc.cpp b/tests/e2e/memory/memory_limit_global_alloc_proc.cpp new file mode 100644 index 000000000..2e57da9d6 --- /dev/null +++ b/tests/e2e/memory/memory_limit_global_alloc_proc.cpp @@ -0,0 +1,31 @@ +#include +#include +#include + +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); +DEFINE_uint64(timeout, 120, "Timeout seconds"); + +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph E2E Memory Limit For Global Allocators"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + if (!client) { + LOG_FATAL("Failed to connect!"); + } + bool result = client->Execute("CALL libglobal_memory_limit_proc.error() YIELD *"); + auto result1 = client->FetchAll(); + MG_ASSERT(result1 != std::nullopt && result1->size() == 0); + + result = client->Execute("CALL libglobal_memory_limit_proc.success() YIELD *"); + auto result2 = client->FetchAll(); + MG_ASSERT(result2 != std::nullopt && result2->size() > 0); + return 0; +} diff --git a/tests/e2e/memory/procedures/CMakeLists.txt b/tests/e2e/memory/procedures/CMakeLists.txt new file mode 100644 index 000000000..21201e59b --- /dev/null +++ b/tests/e2e/memory/procedures/CMakeLists.txt @@ -0,0 +1,5 @@ +add_library(global_memory_limit SHARED global_memory_limit.c) +target_include_directories(global_memory_limit PRIVATE ${CMAKE_SOURCE_DIR}/include) + +add_library(global_memory_limit_proc SHARED global_memory_limit_proc.c) +target_include_directories(global_memory_limit_proc PRIVATE ${CMAKE_SOURCE_DIR}/include) diff --git a/tests/e2e/memory/procedures/global_memory_limit.c b/tests/e2e/memory/procedures/global_memory_limit.c new file mode 100644 index 000000000..a2003cb59 --- /dev/null +++ b/tests/e2e/memory/procedures/global_memory_limit.c @@ -0,0 +1,36 @@ +#include "mg_procedure.h" + +int *gVal = NULL; + +void set_error(struct mgp_result *result) { mgp_result_set_error_msg(result, "Something went wrong"); } + +static void procedure(const struct mgp_list *args, const struct mgp_graph *graph, struct mgp_result *result, + struct mgp_memory *memory) { + struct mgp_result_record *record = mgp_result_new_record(result); + if (record == NULL) return set_error(result); + + struct mgp_value *result_msg = mgp_value_make_string("mgp_init_module allocation works", memory); + if (result_msg == NULL) return set_error(result); + + int result_inserted = mgp_result_record_insert(record, "result", result_msg); + mgp_value_destroy(result_msg); + if (!result_inserted) return set_error(result); +} + +int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) { + const size_t one_gb = 1 << 30; + gVal = mgp_global_alloc(one_gb); + if (!gVal) return 1; + + struct mgp_proc *proc = mgp_module_add_read_procedure(module, "procedure", procedure); + if (!proc) return 1; + + if (!mgp_proc_add_result(proc, "result", mgp_type_string())) return 1; + + return 0; +} + +int mgp_shutdown_module() { + if (gVal) mgp_global_free(gVal); + return 0; +} diff --git a/tests/e2e/memory/procedures/global_memory_limit_proc.c b/tests/e2e/memory/procedures/global_memory_limit_proc.c new file mode 100644 index 000000000..519f11f05 --- /dev/null +++ b/tests/e2e/memory/procedures/global_memory_limit_proc.c @@ -0,0 +1,63 @@ +#include "mg_procedure.h" + +int *gVal = NULL; + +void set_error(struct mgp_result *result) { mgp_result_set_error_msg(result, "Something went wrong"); } + +void set_out_of_memory_error(struct mgp_result *result) { mgp_result_set_error_msg(result, "Out of memory"); } + +static void error(const struct mgp_list *args, const struct mgp_graph *graph, struct mgp_result *result, + struct mgp_memory *memory) { + const size_t one_gb = 1 << 30; + if (gVal) { + mgp_global_free(gVal); + gVal = NULL; + } + if (!gVal) { + gVal = mgp_global_alloc(one_gb); + if (!gVal) return set_out_of_memory_error(result); + } + struct mgp_result_record *record = mgp_result_new_record(result); + if (record == NULL) return set_error(result); + struct mgp_value *error_value = mgp_value_make_string("ERROR", memory); + if (error_value == NULL) return set_error(result); + int result_inserted = mgp_result_record_insert(record, "error_result", error_value); + mgp_value_destroy(error_value); + if (!result_inserted) return set_error(result); +} + +static void success(const struct mgp_list *args, const struct mgp_graph *graph, struct mgp_result *result, + struct mgp_memory *memory) { + const size_t bytes = 1024; + if (!gVal) { + gVal = mgp_global_alloc(bytes); + if (!gVal) set_out_of_memory_error(result); + } + + struct mgp_result_record *record = mgp_result_new_record(result); + if (record == NULL) return set_error(result); + struct mgp_value *success_value = mgp_value_make_string("sucess", memory); + if (success_value == NULL) return set_error(result); + int result_inserted = mgp_result_record_insert(record, "success_result", success_value); + mgp_value_destroy(success_value); + if (!result_inserted) return set_error(result); +} + +int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) { + struct mgp_proc *error_proc = mgp_module_add_read_procedure(module, "error", error); + if (!error_proc) return 1; + + if (!mgp_proc_add_result(error_proc, "error_result", mgp_type_string())) return 1; + + struct mgp_proc *succ_proc = mgp_module_add_read_procedure(module, "success", success); + if (!succ_proc) return 1; + + if (!mgp_proc_add_result(succ_proc, "success_result", mgp_type_string())) return 1; + + return 0; +} + +int mgp_shutdown_module() { + if (gVal) mgp_global_free(gVal); + return 0; +} diff --git a/tests/e2e/memory/workloads.yaml b/tests/e2e/memory/workloads.yaml index adec01260..bf7ba373e 100644 --- a/tests/e2e/memory/workloads.yaml +++ b/tests/e2e/memory/workloads.yaml @@ -13,4 +13,14 @@ workloads: args: ["--bolt-port", *bolt_port, "--timeout", "180"] <<: *template_cluster + - name: "Memory limit for modules upon loading" + binary: "tests/e2e/memory/memgraph__e2e__memory__limit_global_alloc" + args: ["--bolt-port", *bolt_port, "--timeout", "180"] + proc: "tests/e2e/memory/procedures/" + <<: *template_cluster + - name: "Memory limit for modules inside a procedure" + binary: "tests/e2e/memory/memgraph__e2e__memory__limit_global_alloc_proc" + args: ["--bolt-port", *bolt_port, "--timeout", "180"] + proc: "tests/e2e/memory/procedures/" + <<: *template_cluster diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index 36a7a34de..74b69955e 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -51,6 +51,10 @@ def run(args): mg_instances[name] = mg_instance log_file_path = os.path.join(BUILD_DIR, 'logs', config['log_file']) binary_args = config['args'] + ["--log-file", log_file_path] + if 'proc' in workload: + procdir = "--query-modules-directory=" + os.path.join(BUILD_DIR, workload['proc']) + binary_args.append(procdir) + mg_instance.start(args=binary_args) for query in config['setup_queries']: mg_instance.query(query) From 03cb007339e204601e8dc132e67bfc25501b3679 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Mon, 7 Jun 2021 16:26:15 +0200 Subject: [PATCH 55/63] CHANGELOG redirects to the docs page (#165) --- CHANGELOG.md | 499 +-------------------------------------------------- 1 file changed, 4 insertions(+), 495 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef4fd5692..2b9576171 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,496 +1,5 @@ -# Change Log +Change Log for all versions of Memgraph can be found on-line at +https://docs.memgraph.com/memgraph/changelog -## v1.5.0 - -### Major Feature and Improvements - -* Added triggers. -* Replaced mg_client with mgconsole - -### Bug Fixes - -* Fixed parsing of types for Python procedures for types nested in `mgp.List`. - For example, parsing of `mgp.List[mgp.Map]` works now. -* Fixed memory tracking issues. Some of the allocation and deallocation weren't - tracked during the query execution. -* Fixed reading CSV files that are using CRLF as the newline symbol. -* Fixed permission issues for `LOAD CSV`, `FREE MEMORY`, `LOCK DATA DIRECTORY`, - and replication queries. - -## v1.4.0 - -### Breaking Changes - -* Changed `MEMORY LIMIT num (KB|MB)` clause in the procedure calls to `PROCEDURE MEMORY LIMIT num (KB|MB)`. - The functionality is still the same. - -### Major Feature and Improvements - -* Added replication to community version. -* Added support for multiple query modules directories at the same time. - You can now define multiple, comma-separated paths to directories from - which the modules will be loaded using the `--query-modules-directory` flag. -* Added support for programatically reading in data from CSV files through the - `LOAD CSV` clause. We support CSV files with and without a header, the - supported dialect being Excel. -* Added a new flag `--memory-limit` which enables the user to set the maximum total amount of memory - memgraph can allocate during its runtime. -* Added `FREE MEMORY` query which tries to free unusued memory chunks in different parts of storage. -* Added the memory limit and amount of currently allocated bytes in the result of `SHOW STORAGE INFO` query. -* Added `QUERY MEMORY LIMIT num (KB|MB)` to Cypher queries which allows you to limit memory allocation for - the entire query. It can be added only at the end of the entire Cypher query. -* Added logs for the different parts of the recovery process. `INFO`, `DEBUG` and `TRACE` level all contain - additional information that is printed out while the recovery is in progress. - -### Bug Fixes - -* Fixed garbage collector by correctly marking the oldest current timestamp - after the database was recovered using the durability files. -* Fixed reloading of the modules with changed result names. -* Fixed profile query to show the correct name of the ScanAll operator variant. - -## v1.3.0 - -### Breaking Changes - -* Added extra information in durability files to support replication, making it - incompatible with the durability files generated by older versions of - Memgraph. Even though the replication is an Enterprise feature, the files are - compatible with the Community version. - -### Major Features and Improvements - -* Added support for data replication across a cluster of Memgraph instances. - Supported instance types are MAIN and REPLICA. Supported replication modes - are SYNC (all SYNC REPLICAS have to receive data before the MAIN can commit - the transaction), ASYNC (MAIN doesn't care if data is replicated), SYNC WITH - TIMEOUT (MAIN will wait for REPLICAS within the given timeout period, after - timout, replication isn't aborted but the replication demotes the REPLICA to - the ASYNC mode). -* Added support for query type deduction. Possible query types are `r` (read), - `w` (write), `rw` (read-write). The query type is returned as a part of the - summary. -* Improved logging capabilities by introducing granular logging levels. Added - new flag, `--log-level`, which specifies the minimum log level that will be - printed. E.g., it's possible to print incoming queries or Bolt server states. -* Added ability to lock the storage data directory by executing the `LOCK DATA - DIRECTORY;` query which delays the deletion of the files contained in the - data directory. The data directory can be unlocked again by executing the - `UNLOCK DATA DIRECTORY;` query. - -### Bug Fixes and Other Changes - -* Added cleanup of query executions if not in an explicit transaction. -* Fix RPC dangling reference. - -## v1.2.0 - -### Breaking Changes - -* SSL is disabled by default (`--bolt-cert-file` and `--bolt-key-file` are - empty). This change might only affect the client connection configuration. - -### Major Features and Improvements - -* Added support for Bolt v4.0 and v4.1. -* Added `mgp_networkx.py` as an alternative implementation of NetworkX graph - objects, which is useful to use Memgraph data from NetworkX algorithms - optimally. -* Added `nxalg.py` query module as a proxy to NetworkX algorithms. -* Added plan optimization to use a label-property index where the property is - not null. As a result, the query engine, instead of scanning all elements and - applying the filter, performs a label-property index lookup when possible. - -### Bug Fixes and Other Changes - -* Fixed Cypher `ID` function `Null` handling. When the `ID` function receives - `Null`, it will also return `Null`. -* Fixed bug that caused random crashes in SSL communication on platforms - that use older versions of OpenSSL (< 1.1) by adding proper multi-threading - handling. -* Fix `DISCARD` message handling. The query is now executed before discarding - the results. - -## v1.1.0 - -### Major Features and Improvements - -* Properties in nodes and edges are now stored encoded and compressed. This - change significantly reduces memory usage. Depending on the specific dataset, - total memory usage can be reduced up to 50%. -* Added support for rescanning query modules. Previously, the query modules - directory was scanned only upon startup. Now it is scanned each time the user - requests to load a query module. The functions used to load the query modules - were renamed to `mg.load()` and `mg.load_all()` (from `mg.reload()` and - `mg.reload_all()`). -* Improved execution performance of queries that have an IN list filter by - using label+property indices. - Example: `MATCH (n:Label) WHERE n.property IN [] ...` -* Added support for `ANY` and `NONE` openCypher functions. Previously, only - `ALL` and `SINGLE` functions were implemented. - -### Bug Fixes and Other Changes - -* Fixed invalid paths returned by variable expansion when the starting node and - destination node used the same symbol. - Example: `MATCH path = (n:Person {name: "John"})-[:KNOWS*]->(n) RETURN path` -* Improved semantics of `ALL` and `SINGLE` functions to be consistent with - openCypher when handling lists with `Null`s. -* `SHOW CONSTRAINT INFO` now returns property names as a list for unique - constraints. -* Escaped label/property/edgetype names in `DUMP DATABASE` to support names - with spaces in them. -* Fixed handling of `DUMP DATABASE` queries in multi-command transactions - (`BEGIN`, ..., `COMMIT`). -* Fixed handling of various query types in explicit transactions. For example, - constraints were allowed to be created in multi-command transactions - (`BEGIN`, ..., `COMMIT`) but that isn't a transactional operation and as such - can't be allowed in multi-command transactions. -* Fixed integer overflow bugs in `COUNT`, `LIMIT` and `SKIP`. -* Fixed integer overflow bugs in weighted shortest path expansions. -* Fixed various other integer overflow bugs in query execution. -* Added Marvel Comic Universe tutorial. -* Added FootballTransfers tutorial. - -## v1.0.0 - -### Major Features and Improvements - -* [Enterprise Ed.] Exposed authentication username/rolename regex as a flag - (`--auth-user-or-role-name-regex`). -* [Enterprise Ed.] Improved auth module error handling and added support for - relative paths. -* Added support for Python query modules. This release of Memgraph supports - query modules written using the already existing C API and the new Python - API. -* Added support for unique constraints. The unique constraint is created with a - label and one or more properties. -* Implemented support for importing CSV files (`mg_import_csv`). The importer - is compatible with the Neo4j batch CSV importer. -* Snapshot and write-ahead log format changed (backward compatible with v0.50). -* Vertices looked up by their openCypher ID (`MATCH (n) WHERE ID(n) = ...`) - will now find the node in O(logn) instead of O(n). -* Improved planning of BFS expansion, a faster, specific approach is now - favored instead of a ScanAll+Filter operation. -* Added syntax for limiting memory of `CALL`. -* Exposed server name that should be used for Bolt handshake as flag - (`--bolt-server-name-for-init`). -* Added several more functions to the query module C API. -* Implemented a storage locking mechanism that prevents the user from - concurrently starting two Memgraph instances with the same data directory. - -### Bug Fixes and Other Changes - -* [Enterprise Ed.] Fixed a bug that crashed the database when granting - privileges to a user. -* [Enterprise Ed.] Improved Louvain algorithm for community detection. -* Type of variable expansion is now printed in `EXPLAIN` (e.g. ExpandVariable, - STShortestPath, BFSExpand, WeightedShortestPath). -* Correctly display `CALL` in `EXPLAIN` output. -* Correctly delimit arguments when printing the signature of a query module. -* Fixed a planning issue when `CALL` preceded filtering. -* Fixed spelling mistakes in the storage durability module. -* Fixed storage GC indices/constraints subtle race condition. -* Reduced memory allocations in storage API and indices. -* Memgraph version is now outputted to `stdout` when Memgraph is started. -* Improved RPM packaging. -* Reduced number of errors reported in production log when loading query - modules. -* Removed `early access` wording from the Community Offering license. - -## v0.50.0 - -### Breaking Changes - -* [Enterprise Ed.] Remove support for Kafka streams. -* Snapshot and write-ahead log format changed (not backward compatible). -* Removed support for unique constraints. -* Label indices aren't created automatically, create them explicitly instead. -* Renamed several database flags. Please see the configuration file for a list of current flags. - -### Major Features and Improvements - -* [Enterprise Ed.] Add support for auth module. -* [Enterprise Ed.] LDAP support migrated to auth module. -* Implemented new graph storage engine. -* Add support for disabling properties on edges. -* Add support for existence constraints. -* Add support for custom openCypher procedures using a C API. -* Support loading query modules implementing read-only procedures. -* Add `CALL YIELD ` syntax for invoking loaded procedures. -* Add `CREATE INDEX ON :Label` for creating label indices. -* Add `DROP INDEX ON :Label` for dropping label indices. -* Add `DUMP DATABASE` clause to openCypher. -* Add functions for treating character strings as byte strings. - -### Bug Fixes and Other Changes - -* Fix several memory management bugs. -* Reduce memory usage in query execution. -* Fix bug that crashes the database when `EXPLAIN` is used. - -## v0.15.0 - -### Breaking Changes - -* Snapshot and write-ahead log format changed (not backward compatible). -* `indexInfo()` function replaced with `SHOW INDEX INFO` syntax. -* Removed support for unique index. Use unique constraints instead. -* `CREATE UNIQUE INDEX ON :label (property)` replaced with `CREATE CONSTRAINT ON (n:label) ASSERT n.property IS UNIQUE`. -* Changed semantics for `COUNTER` openCypher function. - -### Major Features and Improvements - -* [Enterprise Ed.] Add new privilege, `STATS` for accessing storage info. -* [Enterprise Ed.] LDAP authentication and authorization support. -* [Enterprise Ed.] Add audit logging feature. -* Add multiple properties unique constraint which replace unique indices. -* Add `SHOW STORAGE INFO` feature. -* Add `PROFILE` clause to openCypher. -* Add `CREATE CONSTRAINT` clause to openCypher. -* Add `DROP CONSTRAINT` clause to openCypher. -* Add `SHOW CONSTRAINT INFO` feature. -* Add `uniformSample` function to openCypher. -* Add regex matching to openCypher. - -### Bug Fixes and Other Changes - -* Fix bug in explicit transaction handling. -* Fix bug in edge filtering by edge type and destination. -* Fix bug in query comment parsing. -* Fix bug in query symbol table. -* Fix OpenSSL memory leaks. -* Make authentication case insensitive. -* Remove `COALESCE` function. -* Add movie tutorial. -* Add backpacking tutorial. - -## v0.14.0 - -### Breaking Changes - -* Write-ahead log format changed (not backward compatible). - -### Major Features and Improvements - -* [Enterprise Ed.] Reduce memory usage in distributed usage. -* Add `DROP INDEX` feature. -* Improve SSL error messages. - -### Bug Fixes and Other Changes - -* [Enterprise Ed.] Fix issues with reading and writing in a distributed query. -* Correctly handle an edge case with unique constraint checks. -* Fix a minor issue with `mg_import_csv`. -* Fix an issue with `EXPLAIN`. - -## v0.13.0 - -### Breaking Changes - -* Write-ahead log format changed (not backward compatible). -* Snapshot format changed (not backward compatible). - -### Major Features and Improvements - -* [Enterprise Ed.] Authentication and authorization support. -* [Enterprise Ed.] Kafka integration. -* [Enterprise Ed.] Support dynamic worker addition in distributed. -* Reduce memory usage and improve overall performance. -* Add `CREATE UNIQUE INDEX` clause to openCypher. -* Add `EXPLAIN` clause to openCypher. -* Add `inDegree` and `outDegree` functions to openCypher. -* Improve BFS performance when both endpoints are known. -* Add new `node-label`, `relationship-type` and `quote` options to - `mg_import_csv` tool. -* Reduce memory usage of `mg_import_csv`. - -### Bug Fixes and Other Changes - -* [Enterprise Ed.] Fix an edge case in distributed index creation. -* [Enterprise Ed.] Fix issues with Cartesian in distributed queries. -* Correctly handle large messages in Bolt protocol. -* Fix issues when handling explicitly started transactions in queries. -* Allow openCypher keywords to be used as variable names. -* Revise and make user visible error messages consistent. -* Improve aborting time consuming execution. - -## v0.12.0 - -### Breaking Changes - -* Snapshot format changed (not backward compatible). - -### Major Features and Improvements - -* Improved Id Cypher function. -* Added string functions to openCypher (`lTrim`, `left`, `rTrim`, `replace`, - `reverse`, `right`, `split`, `substring`, `toLower`, `toUpper`, `trim`). -* Added `timestamp` function to openCypher. -* Added support for dynamic property access with `[]` operator. - -## v0.11.0 - -### Major Features and Improvements - -* [Enterprise Ed.] Improve Cartesian support in distributed queries. -* [Enterprise Ed.] Improve distributed execution of BFS. -* [Enterprise Ed.] Dynamic graph partitioner added. -* Static nodes/edges id generators exposed through the Id Cypher function. -* Properties on disk added. -* Telemetry added. -* SSL support added. -* `toString` function added. - -### Bug Fixes and Other Changes - -* Document issues with Docker on OS X. -* Add BFS and Dijkstra's algorithm examples to documentation. - -## v0.10.0 - -### Breaking Changes - -* Snapshot format changed (not backward compatible). - -### Major Features and Improvements - -* [Enterprise Ed.] Distributed storage and execution. -* `reduce` and `single` functions added to openCypher. -* `wShortest` edge expansion added to openCypher. -* Support packaging RPM on CentOS 7. - -### Bug Fixes and Other Changes - -* Report an error if updating a deleted element. -* Log an error if reading info on available memory fails. -* Fix a bug when `MATCH` would stop matching if a result was empty, but later - results still contain data to be matched. The simplest case of this was the - query: `UNWIND [1,2,3] AS x MATCH (n :Label {prop: x}) RETURN n`. If there - was no node `(:Label {prop: 1})`, then the `MATCH` wouldn't even try to find - for `x` being 2 or 3. -* Report an error if trying to compare a property value with something that - cannot be stored in a property. -* Fix crashes in some obscure cases. -* Commit log automatically garbage collected. -* Add minor performance improvements. - -## v0.9.0 - -### Breaking Changes - -* Snapshot format changed (not backward compatible). -* Snapshot configuration flags changed, general durability flags added. - -### Major Features and Improvements - -* Write-ahead log added. -* `nodes` and `relationships` functions added. -* `UNION` and `UNION ALL` is implemented. -* Concurrent index creation is now enabled. - -### Bug Fixes and Other Changes - - -## v0.8.0 - -### Major Features and Improvements - -* CASE construct (without aggregations). -* Named path support added. -* Maps can now be stored as node/edge properties. -* Map indexing supported. -* `rand` function added. -* `assert` function added. -* `counter` and `counterSet` functions added. -* `indexInfo` function added. -* `collect` aggregation now supports Map collection. -* Changed the BFS syntax. - -### Bug Fixes and Other Changes - -* Use \u to specify 4 digit codepoint and \U for 8 digit -* Keywords appearing in header (named expressions) keep original case. -* Our Bolt protocol implementation is now completely compatible with the protocol version 1 specification. (https://boltprotocol.org/v1/) -* Added a log warning when running out of memory and the `memory_warning_threshold` flag -* Edges are no longer additionally filtered after expansion. - -## v0.7.0 - -### Major Features and Improvements - -* Variable length path `MATCH`. -* Explicitly started transactions (multi-query transactions). -* Map literal. -* Query parameters (except for parameters in place of property maps). -* `all` function in openCypher. -* `degree` function in openCypher. -* User specified transaction execution timeout. - -### Bug Fixes and Other Changes - -* Concurrent `BUILD INDEX` deadlock now returns an error to the client. -* A `MATCH` preceeded by `OPTIONAL MATCH` expansion inconsistencies. -* High concurrency Antlr parsing bug. -* Indexing improvements. -* Query stripping and caching speedups. - -## v0.6.0 - -### Major Features and Improvements - -* AST caching. -* Label + property index support. -* Different logging setup & format. - -## v0.5.0 - -### Major Features and Improvements - -* Use label indexes to speed up querying. -* Generate multiple query plans and use the cost estimator to select the best. -* Snapshots & Recovery. -* Abandon old yaml configuration and migrate to gflags. -* Query stripping & AST caching support. - -### Bug Fixes and Other Changes - -* Fixed race condition in MVCC. Hints exp+aborted race condition prevented. -* Fixed conceptual bug in MVCC GC. Evaluate old records w.r.t. the oldest. - transaction's id AND snapshot. -* User friendly error messages thrown from the query engine. - -## Build 837 - -### Bug Fixes and Other Changes - -* List indexing supported with preceeding IN (for example in query `RETURN 1 IN [[1,2]][0]`). - -## Build 825 - -### Major Features and Improvements - -* RETURN *, count(*), OPTIONAL MATCH, UNWIND, DISTINCT (except DISTINCT in aggregate functions), list indexing and slicing, escaped labels, IN LIST operator, range function. - -### Bug Fixes and Other Changes - -* TCP_NODELAY -> import should be faster. -* Clear hint bits. - -## Build 783 - -### Major Features and Improvements - -* SKIP, LIMIT, ORDER BY. -* Math functions. -* Initial support for MERGE clause. - -### Bug Fixes and Other Changes - -* Unhandled Lock Timeout Exception. - -## Build 755 - -### Major Features and Improvements - -* MATCH, CREATE, WHERE, SET, REMOVE, DELETE. +All the updates to the Change Log can be made in the following repository: +https://github.com/memgraph/docs From be91126134ce6c549ae0502a2de2e3baf52b87fe Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 8 Jun 2021 08:44:12 +0200 Subject: [PATCH 56/63] Run the benchmarks daily (#164) --- .github/workflows/daily_banchmark.yaml | 68 ++++++++++++++++++++++++++ .github/workflows/diff.yaml | 2 +- 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/daily_banchmark.yaml diff --git a/.github/workflows/daily_banchmark.yaml b/.github/workflows/daily_banchmark.yaml new file mode 100644 index 000000000..4867f7097 --- /dev/null +++ b/.github/workflows/daily_banchmark.yaml @@ -0,0 +1,68 @@ +name: Daily Benchmark + +on: + workflow_dispatch: + schedule: + - cron: "0 1 * * *" + +jobs: + release_benchmarks: + name: "Release benchmarks" + runs-on: [self-hosted, Linux, X64, Diff, Gen7] + env: + THREADS: 24 + + steps: + - name: Set up repository + uses: actions/checkout@v2 + with: + # Number of commits to fetch. `0` indicates all history for all + # branches and tags. (default: 1) + fetch-depth: 0 + + - name: Build release binaries + run: | + # Activate toolchain. + source /opt/toolchain-v2/activate + + # Initialize dependencies. + ./init + + # Build only memgraph release binaries. + cd build + cmake -DCMAKE_BUILD_TYPE=release .. + make -j$THREADS + + - name: Run macro benchmarks + run: | + cd tests/macro_benchmark + ./harness QuerySuite MemgraphRunner \ + --groups aggregation 1000_create unwind_create dense_expand match \ + --no-strict + + - name: Upload macro benchmark results + run: | + cd tools/bench-graph-client + virtualenv -p python3 ve3 + source ve3/bin/activate + pip install -r requirements.txt + ./main.py --benchmark-name "macro_benchmark" \ + --benchmark-results-path "../../tests/macro_benchmark/.harness_summary" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" + + - name: Run mgbench + run: | + cd tests/mgbench + ./benchmark.py --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/* + + - name: Upload mgbench results + run: | + cd tools/bench-graph-client + virtualenv -p python3 ve3 + source ve3/bin/activate + pip install -r requirements.txt + ./main.py --benchmark-name "mgbench" \ + --benchmark-results-path "../../tests/mgbench/benchmark_result.json" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index c0794c596..4d75c0089 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -365,7 +365,7 @@ jobs: # Initialize dependencies. ./init - # Build only memgraph release binarie. + # Build only memgraph release binaries. cd build cmake -DCMAKE_BUILD_TYPE=release .. make -j$THREADS From cd03e13443b3f44555df497e8b29bf5db62b23ac Mon Sep 17 00:00:00 2001 From: Josip Seljan <62958579+the-joksim@users.noreply.github.com> Date: Wed, 9 Jun 2021 12:50:27 +0200 Subject: [PATCH 57/63] Upgrade Antlr to v4.9.2 (#161) --- libs/CMakeLists.txt | 6 ++---- libs/setup.sh | 10 +++++----- src/query/CMakeLists.txt | 2 +- src/query/frontend/opencypher/parser.hpp | 4 ++-- tests/manual/antlr_parser.cpp | 2 +- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 1cf332e8c..9f7941c78 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -91,11 +91,9 @@ import_external_library(antlr4 STATIC CMAKE_ARGS # http://stackoverflow.com/questions/37096062/get-a-basic-c-program-to-compile-using-clang-on-ubuntu-16/38385967#38385967 -DWITH_LIBCXX=OFF # because of debian bug -DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=true + -DCMAKE_CXX_STANDARD=20 BUILD_COMMAND $(MAKE) antlr4_static - # Make a License.txt out of thin air, so that antlr4.6 knows how to build. - # When we upgrade antlr, this will no longer be needed. - INSTALL_COMMAND touch ${CMAKE_CURRENT_SOURCE_DIR}/antlr4/runtime/Cpp/License.txt - COMMAND $(MAKE) install) + INSTALL_COMMAND $(MAKE) install) # Setup google benchmark. import_external_library(benchmark STATIC diff --git a/libs/setup.sh b/libs/setup.sh index 8f7c3d476..bab12fbfc 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -93,7 +93,7 @@ repo_clone_try_double () { # Download from primary_urls might fail because the cache is not installed. declare -A primary_urls=( ["antlr4-code"]="http://$local_cache_host/git/antlr4.git" - ["antlr4-generator"]="http://$local_cache_host/file/antlr-4.6-complete.jar" + ["antlr4-generator"]="http://$local_cache_host/file/antlr-4.9.2-complete.jar" ["cppitertools"]="http://$local_cache_host/git/cppitertools.git" ["fmt"]="http://$local_cache_host/git/fmt.git" ["rapidcheck"]="http://$local_cache_host/git/rapidcheck.git" @@ -119,7 +119,7 @@ declare -A primary_urls=( # should fail. declare -A secondary_urls=( ["antlr4-code"]="https://github.com/antlr/antlr4.git" - ["antlr4-generator"]="http://www.antlr.org/download/antlr-4.6-complete.jar" + ["antlr4-generator"]="http://www.antlr.org/download/antlr-4.9.2-complete.jar" ["cppitertools"]="https://github.com/ryanhaining/cppitertools.git" ["fmt"]="https://github.com/fmtlib/fmt.git" ["rapidcheck"]="https://github.com/emil-e/rapidcheck.git" @@ -142,12 +142,12 @@ declare -A secondary_urls=( # antlr file_get_try_double "${primary_urls[antlr4-generator]}" "${secondary_urls[antlr4-generator]}" -antlr4_tag="aacd2a2c95816d8dc1c05814051d631bfec4cf3e" # v4.6 +antlr4_tag="5e5b6d35b4183fd330102c40947b95c4b5c6abb5" # v4.9.2 repo_clone_try_double "${primary_urls[antlr4-code]}" "${secondary_urls[antlr4-code]}" "antlr4" "$antlr4_tag" -# fix missing include -sed -i 's/^#pragma once/#pragma once\n#include /' antlr4/runtime/Cpp/runtime/src/support/CPPUtils.h # remove shared library from install dependencies sed -i 's/install(TARGETS antlr4_shared/install(TARGETS antlr4_shared OPTIONAL/' antlr4/runtime/Cpp/runtime/CMakeLists.txt +# fix issue https://github.com/antlr/antlr4/issues/3194 - should update Antlr commit once the PR related to the issue gets merged +sed -i 's/std::is_nothrow_copy_constructible/std::is_copy_constructible/' antlr4/runtime/Cpp/runtime/src/support/Any.h # cppitertools v2.0 2019-12-23 cppitertools_ref="cb3635456bdb531121b82b4d2e3afc7ae1f56d47" diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index b41e6380c..53f9ee8f9 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -72,7 +72,7 @@ add_custom_command( OUTPUT ${antlr_opencypher_generated_src} ${antlr_opencypher_generated_include} COMMAND ${CMAKE_COMMAND} -E make_directory ${opencypher_generated} COMMAND - java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.6-complete.jar + java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.9.2-complete.jar -Dlanguage=Cpp -visitor -package antlropencypher -o ${opencypher_generated} ${opencypher_lexer_grammar} ${opencypher_parser_grammar} diff --git a/src/query/frontend/opencypher/parser.hpp b/src/query/frontend/opencypher/parser.hpp index dcdedfa32..0bdaae228 100644 --- a/src/query/frontend/opencypher/parser.hpp +++ b/src/query/frontend/opencypher/parser.hpp @@ -35,7 +35,7 @@ class Parser { private: class FirstMessageErrorListener : public antlr4::BaseErrorListener { - void syntaxError(antlr4::IRecognizer *, antlr4::Token *, size_t line, size_t position, const std::string &message, + void syntaxError(antlr4::Recognizer *, antlr4::Token *, size_t line, size_t position, const std::string &message, std::exception_ptr) override { if (error_.empty()) { error_ = "line " + std::to_string(line) + ":" + std::to_string(position + 1) + " " + message; @@ -48,7 +48,7 @@ class Parser { FirstMessageErrorListener error_listener_; std::string query_; - antlr4::ANTLRInputStream input_{query_.c_str()}; + antlr4::ANTLRInputStream input_{query_}; antlropencypher::MemgraphCypherLexer lexer_{&input_}; antlr4::CommonTokenStream tokens_{&lexer_}; diff --git a/tests/manual/antlr_parser.cpp b/tests/manual/antlr_parser.cpp index d1986c4b1..42ccfba7e 100644 --- a/tests/manual/antlr_parser.cpp +++ b/tests/manual/antlr_parser.cpp @@ -8,7 +8,7 @@ using namespace antlropencypher; using namespace antlr4; int main(int, const char **a) { - const char *query = a[1]; + std::string_view query{a[1]}; ANTLRInputStream input(query); MemgraphCypherLexer lexer(&input); From 542a324c96cb317d8fbfd36563c1d53b5b152f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Budiseli=C4=87?= Date: Wed, 9 Jun 2021 10:09:43 -0700 Subject: [PATCH 58/63] Fix typing to cypher type impl inside mgp.py (#159) --- include/mgp.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/mgp.py b/include/mgp.py index a0f7bab50..a7044a77d 100644 --- a/include/mgp.py +++ b/include/mgp.py @@ -627,8 +627,11 @@ def _typing_to_cypher_type(type_): if complex_type == typing.Union: # If we have a Union with NoneType inside, it means we are building # a nullable type. - if isinstance(None, type_args): - types = tuple(t for t in type_args if not isinstance(None, t)) + # isinstance doesn't work here because subscripted generics cannot + # be used with class and instance checks. type comparison should be + # fine because subclasses are not used. + if type(None) in type_args: + types = tuple(t for t in type_args if t is not type(None)) # noqa E721 if len(types) == 1: type_arg, = types else: From 90a093bd952496d2eab3a9b8f76e68af8b5e5d44 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Thu, 10 Jun 2021 10:40:18 +0200 Subject: [PATCH 59/63] Send py::Object as a reference to callback call (#169) Sending the py::Object by value caused UB because multiple threads could copy the same object at the same time without the GIL. By sending the object by reference we eliminate UB and avoid UB. --- src/query/procedure/py_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/procedure/py_module.cpp b/src/query/procedure/py_module.cpp index 43fbef989..96ef5f387 100644 --- a/src/query/procedure/py_module.cpp +++ b/src/query/procedure/py_module.cpp @@ -505,7 +505,7 @@ std::optional AddMultipleRecordsFromPython(mgp_result *result return std::nullopt; } -void CallPythonProcedure(py::Object py_cb, const mgp_list *args, const mgp_graph *graph, mgp_result *result, +void CallPythonProcedure(const py::Object &py_cb, const mgp_list *args, const mgp_graph *graph, mgp_result *result, mgp_memory *memory) { auto gil = py::EnsureGIL(); From 8cd9f696cf33d525c2f36a1a72893a36d272df17 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Mon, 14 Jun 2021 15:47:57 +0200 Subject: [PATCH 60/63] Multiple isolation levels (#171) This PR introduces READ COMMITTED and READ UNCOMMITTED isolation levels. The isolation level can be set with a config or with a query for different scopes. --- config/flags.yaml | 4 + src/auth/models.cpp | 2 + src/auth/models.hpp | 12 +- src/glue/auth.cpp | 2 + src/memgraph.cpp | 175 +++++++++---- src/query/exceptions.hpp | 6 + src/query/frontend/ast/ast.lcp | 28 +- src/query/frontend/ast/ast_visitor.hpp | 3 +- .../frontend/ast/cypher_main_visitor.cpp | 28 ++ .../frontend/ast/cypher_main_visitor.hpp | 5 + .../opencypher/grammar/MemgraphCypher.g4 | 19 ++ .../opencypher/grammar/MemgraphCypherLexer.g4 | 11 + .../frontend/semantic/required_privileges.cpp | 2 + .../frontend/stripped_lexer_constants.hpp | 24 +- src/query/interpreter.cpp | 71 +++++- src/query/interpreter.hpp | 8 + src/storage/v2/config.hpp | 6 + src/storage/v2/isolation_level.hpp | 9 + src/storage/v2/mvcc.hpp | 14 +- src/storage/v2/storage.cpp | 16 +- src/storage/v2/storage.hpp | 12 +- src/storage/v2/transaction.hpp | 13 +- tests/benchmark/expansion.cpp | 1 + tests/e2e/CMakeLists.txt | 1 + tests/e2e/isolation_levels/CMakeLists.txt | 2 + .../e2e/isolation_levels/isolation_levels.cpp | 241 ++++++++++++++++++ tests/e2e/isolation_levels/workloads.yaml | 14 + tests/manual/single_query.cpp | 1 + tests/unit/CMakeLists.txt | 3 + tests/unit/cypher_main_visitor.cpp | 65 ++++- tests/unit/interpreter.cpp | 1 + tests/unit/query_required_privileges.cpp | 5 + tests/unit/storage_v2_isolation_level.cpp | 98 +++++++ tests/unit/storage_v2_wal_file.cpp | 4 +- 34 files changed, 810 insertions(+), 96 deletions(-) create mode 100644 src/storage/v2/isolation_level.hpp create mode 100644 tests/e2e/isolation_levels/CMakeLists.txt create mode 100644 tests/e2e/isolation_levels/isolation_levels.cpp create mode 100644 tests/e2e/isolation_levels/workloads.yaml create mode 100644 tests/unit/storage_v2_isolation_level.cpp diff --git a/config/flags.yaml b/config/flags.yaml index c610591d1..93f9c73ed 100644 --- a/config/flags.yaml +++ b/config/flags.yaml @@ -87,6 +87,10 @@ modifications: value: "0" override: true + - name: "isolation_level" + value: "SNAPSHOT_ISOLATION" + override: true + undocumented: - "flag_file" - "also_log_to_stderr" diff --git a/src/auth/models.cpp b/src/auth/models.cpp index be5f28b3e..cc34ca410 100644 --- a/src/auth/models.cpp +++ b/src/auth/models.cpp @@ -49,6 +49,8 @@ std::string PermissionToString(Permission permission) { return "FREE_MEMORY"; case Permission::TRIGGER: return "TRIGGER"; + case Permission::CONFIG: + return "CONFIG"; case Permission::AUTH: return "AUTH"; } diff --git a/src/auth/models.hpp b/src/auth/models.hpp index 9e1b54977..56cf897d2 100644 --- a/src/auth/models.hpp +++ b/src/auth/models.hpp @@ -26,16 +26,18 @@ enum class Permission : uint64_t { READ_FILE = 1U << 12U, FREE_MEMORY = 1U << 13U, TRIGGER = 1U << 14U, + CONFIG = 1U << 15U, AUTH = 1U << 16U }; // clang-format on // Constant list of all available permissions. -const std::vector kPermissionsAll = { - Permission::MATCH, Permission::CREATE, Permission::MERGE, Permission::DELETE, - Permission::SET, Permission::REMOVE, Permission::INDEX, Permission::STATS, - Permission::CONSTRAINT, Permission::DUMP, Permission::AUTH, Permission::REPLICATION, - Permission::LOCK_PATH, Permission::READ_FILE, Permission::FREE_MEMORY, Permission::TRIGGER}; +const std::vector kPermissionsAll = {Permission::MATCH, Permission::CREATE, Permission::MERGE, + Permission::DELETE, Permission::SET, Permission::REMOVE, + Permission::INDEX, Permission::STATS, Permission::CONSTRAINT, + Permission::DUMP, Permission::AUTH, Permission::REPLICATION, + Permission::LOCK_PATH, Permission::READ_FILE, Permission::FREE_MEMORY, + Permission::TRIGGER, Permission::CONFIG}; // Function that converts a permission to its string representation. std::string PermissionToString(Permission permission); diff --git a/src/glue/auth.cpp b/src/glue/auth.cpp index a54894b80..fe55c6dde 100644 --- a/src/glue/auth.cpp +++ b/src/glue/auth.cpp @@ -34,6 +34,8 @@ auth::Permission PrivilegeToPermission(query::AuthQuery::Privilege privilege) { return auth::Permission::FREE_MEMORY; case query::AuthQuery::Privilege::TRIGGER: return auth::Permission::TRIGGER; + case query::AuthQuery::Privilege::CONFIG: + return auth::Permission::CONFIG; case query::AuthQuery::Privilege::AUTH: return auth::Permission::AUTH; } diff --git a/src/memgraph.cpp b/src/memgraph.cpp index e99ae8f43..d128f1a2e 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,7 @@ #include "query/procedure/module.hpp" #include "query/procedure/py_module.hpp" #include "requests/requests.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/storage.hpp" #include "storage/v2/view.hpp" #include "telemetry/telemetry.hpp" @@ -67,6 +69,42 @@ #include "glue/auth.hpp" #endif +namespace { +std::string GetAllowedEnumValuesString(const auto &mappings) { + std::vector allowed_values; + allowed_values.reserve(mappings.size()); + std::transform(mappings.begin(), mappings.end(), std::back_inserter(allowed_values), + [](const auto &mapping) { return std::string(mapping.first); }); + return utils::Join(allowed_values, ", "); +} + +enum class ValidationError : uint8_t { EmptyValue, InvalidValue }; + +utils::BasicResult IsValidEnumValueString(const auto &value, const auto &mappings) { + if (value.empty()) { + return ValidationError::EmptyValue; + } + + if (std::find_if(mappings.begin(), mappings.end(), [&](const auto &mapping) { return mapping.first == value; }) == + mappings.cend()) { + return ValidationError::InvalidValue; + } + + return {}; +} + +template +std::optional StringToEnum(const auto &value, const auto &mappings) { + const auto mapping_iter = + std::find_if(mappings.begin(), mappings.end(), [&](const auto &mapping) { return mapping.first == value; }); + if (mapping_iter == mappings.cend()) { + return std::nullopt; + } + + return mapping_iter->second; +} +} // namespace + // Bolt server flags. DEFINE_string(bolt_address, "0.0.0.0", "IP address on which the Bolt server should listen."); DEFINE_VALIDATED_int32(bolt_port, 7687, "Port on which the Bolt server should listen.", @@ -140,6 +178,72 @@ DEFINE_uint64(query_execution_timeout_sec, 180, "Maximum allowed query execution time. Queries exceeding this " "limit will be aborted. Value of 0 means no limit."); +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_uint64( + memory_limit, 0, + "Total memory limit in MiB. Set to 0 to use the default values which are 100\% of the phyisical memory if the swap " + "is enabled and 90\% of the physical memory otherwise."); + +namespace { +using namespace std::literals; +constexpr std::array isolation_level_mappings{ + std::pair{"SNAPSHOT_ISOLATION"sv, storage::IsolationLevel::SNAPSHOT_ISOLATION}, + std::pair{"READ_COMMITTED"sv, storage::IsolationLevel::READ_COMMITTED}, + std::pair{"READ_UNCOMMITTED"sv, storage::IsolationLevel::READ_UNCOMMITTED}}; + +const std::string isolation_level_help_string = + fmt::format("Default isolation level used for the transactions. Allowed values: {}", + GetAllowedEnumValuesString(isolation_level_mappings)); +} // namespace + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_VALIDATED_string(isolation_level, "SNAPSHOT_ISOLATION", isolation_level_help_string.c_str(), { + if (const auto result = IsValidEnumValueString(value, isolation_level_mappings); result.HasError()) { + const auto error = result.GetError(); + switch (error) { + case ValidationError::EmptyValue: { + std::cout << "Isolation level cannot be empty." << std::endl; + break; + } + case ValidationError::InvalidValue: { + std::cout << "Invalid value for isolation level. Allowed values: " + << GetAllowedEnumValuesString(isolation_level_mappings) << std::endl; + break; + } + } + return false; + } + + return true; +}); + +namespace { +storage::IsolationLevel ParseIsolationLevel() { + const auto isolation_level = StringToEnum(FLAGS_isolation_level, isolation_level_mappings); + MG_ASSERT(isolation_level, "Invalid isolation level"); + return *isolation_level; +} + +int64_t GetMemoryLimit() { + if (FLAGS_memory_limit == 0) { + auto maybe_total_memory = utils::sysinfo::TotalMemory(); + MG_ASSERT(maybe_total_memory, "Failed to fetch the total physical memory"); + const auto maybe_swap_memory = utils::sysinfo::SwapTotalMemory(); + MG_ASSERT(maybe_swap_memory, "Failed to fetch the total swap memory"); + + if (*maybe_swap_memory == 0) { + // take only 90% of the total memory + *maybe_total_memory *= 9; + *maybe_total_memory /= 10; + } + return *maybe_total_memory * 1024; + } + + // We parse the memory as MiB every time + return FLAGS_memory_limit * 1024 * 1024; +} +} // namespace + namespace { std::vector query_modules_directories; } // namespace @@ -168,37 +272,30 @@ DEFINE_VALIDATED_string(query_modules_directory, "", DEFINE_bool(also_log_to_stderr, false, "Log messages go to stderr in addition to logfiles"); DEFINE_string(log_file, "", "Path to where the log should be stored."); -DEFINE_uint64( - memory_limit, 0, - "Total memory limit in MiB. Set to 0 to use the default values which are 100\% of the phyisical memory if the swap " - "is enabled and 90\% of the physical memory otherwise."); namespace { constexpr std::array log_level_mappings{ - std::pair{"TRACE", spdlog::level::trace}, std::pair{"DEBUG", spdlog::level::debug}, - std::pair{"INFO", spdlog::level::info}, std::pair{"WARNING", spdlog::level::warn}, - std::pair{"ERROR", spdlog::level::err}, std::pair{"CRITICAL", spdlog::level::critical}}; - -std::string GetAllowedLogLevelsString() { - std::vector allowed_log_levels; - allowed_log_levels.reserve(log_level_mappings.size()); - std::transform(log_level_mappings.cbegin(), log_level_mappings.cend(), std::back_inserter(allowed_log_levels), - [](const auto &mapping) { return mapping.first; }); - return utils::Join(allowed_log_levels, ", "); -} + std::pair{"TRACE"sv, spdlog::level::trace}, std::pair{"DEBUG"sv, spdlog::level::debug}, + std::pair{"INFO"sv, spdlog::level::info}, std::pair{"WARNING"sv, spdlog::level::warn}, + std::pair{"ERROR"sv, spdlog::level::err}, std::pair{"CRITICAL"sv, spdlog::level::critical}}; const std::string log_level_help_string = - fmt::format("Minimum log level. Allowed values: {}", GetAllowedLogLevelsString()); + fmt::format("Minimum log level. Allowed values: {}", GetAllowedEnumValuesString(log_level_mappings)); } // namespace DEFINE_VALIDATED_string(log_level, "WARNING", log_level_help_string.c_str(), { - if (value.empty()) { - std::cout << "Log level cannot be empty." << std::endl; - return false; - } - - if (std::find_if(log_level_mappings.cbegin(), log_level_mappings.cend(), - [&](const auto &mapping) { return mapping.first == value; }) == log_level_mappings.cend()) { - std::cout << "Invalid value for log level. Allowed values: " << GetAllowedLogLevelsString() << std::endl; + if (const auto result = IsValidEnumValueString(value, log_level_mappings); result.HasError()) { + const auto error = result.GetError(); + switch (error) { + case ValidationError::EmptyValue: { + std::cout << "Log level cannot be empty." << std::endl; + break; + } + case ValidationError::InvalidValue: { + std::cout << "Invalid value for log level. Allowed values: " << GetAllowedEnumValuesString(log_level_mappings) + << std::endl; + break; + } + } return false; } @@ -207,11 +304,9 @@ DEFINE_VALIDATED_string(log_level, "WARNING", log_level_help_string.c_str(), { namespace { void ParseLogLevel() { - const auto mapping_iter = std::find_if(log_level_mappings.cbegin(), log_level_mappings.cend(), - [](const auto &mapping) { return mapping.first == FLAGS_log_level; }); - MG_ASSERT(mapping_iter != log_level_mappings.cend(), "Invalid log level"); - - spdlog::set_level(mapping_iter->second); + const auto log_level = StringToEnum(FLAGS_log_level, log_level_mappings); + MG_ASSERT(log_level, "Invalid log level"); + spdlog::set_level(*log_level); } // 5 weeks * 7 days @@ -241,25 +336,6 @@ void ConfigureLogging() { spdlog::flush_on(spdlog::level::trace); ParseLogLevel(); } - -int64_t GetMemoryLimit() { - if (FLAGS_memory_limit == 0) { - auto maybe_total_memory = utils::sysinfo::TotalMemory(); - MG_ASSERT(maybe_total_memory, "Failed to fetch the total physical memory"); - const auto maybe_swap_memory = utils::sysinfo::SwapTotalMemory(); - MG_ASSERT(maybe_swap_memory, "Failed to fetch the total swap memory"); - - if (*maybe_swap_memory == 0) { - // take only 90% of the total memory - *maybe_total_memory *= 9; - *maybe_total_memory /= 10; - } - return *maybe_total_memory * 1024; - } - - // We parse the memory as MiB every time - return FLAGS_memory_limit * 1024 * 1024; -} } // namespace /// Encapsulates Dbms and Interpreter that are passed through the network server @@ -962,7 +1038,8 @@ int main(int argc, char **argv) { .snapshot_retention_count = FLAGS_storage_snapshot_retention_count, .wal_file_size_kibibytes = FLAGS_storage_wal_file_size_kib, .wal_file_flush_every_n_tx = FLAGS_storage_wal_file_flush_every_n_tx, - .snapshot_on_exit = FLAGS_storage_snapshot_on_exit}}; + .snapshot_on_exit = FLAGS_storage_snapshot_on_exit}, + .transaction = {.isolation_level = ParseIsolationLevel()}}; if (FLAGS_storage_snapshot_interval_sec == 0) { if (FLAGS_storage_wal_enabled) { LOG_FATAL( diff --git a/src/query/exceptions.hpp b/src/query/exceptions.hpp index b8ddd7480..6c757e91b 100644 --- a/src/query/exceptions.hpp +++ b/src/query/exceptions.hpp @@ -175,4 +175,10 @@ class TriggerModificationInMulticommandTxException : public QueryException { TriggerModificationInMulticommandTxException() : QueryException("Trigger queries not allowed in multicommand transactions.") {} }; + +class IsolationLevelModificationInMulticommandTxException : public QueryException { + public: + IsolationLevelModificationInMulticommandTxException() + : QueryException("Isolation level cannot be modified in multicommand transactions.") {} +}; } // namespace query diff --git a/src/query/frontend/ast/ast.lcp b/src/query/frontend/ast/ast.lcp index c6e46f984..81b6038e5 100644 --- a/src/query/frontend/ast/ast.lcp +++ b/src/query/frontend/ast/ast.lcp @@ -2193,7 +2193,7 @@ cpp<# (:serialize)) (lcp:define-enum privilege (create delete match merge set remove index stats auth constraint - dump replication lock_path read_file free_memory trigger) + dump replication lock_path read_file free_memory trigger config) (:serialize)) #>cpp AuthQuery() = default; @@ -2232,7 +2232,8 @@ const std::vector kPrivilegesAll = { AuthQuery::Privilege::REPLICATION, AuthQuery::Privilege::READ_FILE, AuthQuery::Privilege::LOCK_PATH, - AuthQuery::Privilege::FREE_MEMORY, AuthQuery::Privilege::TRIGGER}; + AuthQuery::Privilege::FREE_MEMORY, AuthQuery::Privilege::TRIGGER, + AuthQuery::Privilege::CONFIG}; cpp<# (lcp:define-class info-query (query) @@ -2432,4 +2433,27 @@ cpp<# (:serialize (:slk)) (:clone)) +(lcp:define-class isolation-level-query (query) + ((isolation_level "IsolationLevel" :scope :public) + (isolation_level_scope "IsolationLevelScope" :scope :public)) + + (:public + (lcp:define-enum isolation-level + (snapshot-isolation read-committed read-uncommitted) + (:serialize)) + (lcp:define-enum isolation-level-scope + (next session global) + (:serialize)) + #>cpp + IsolationLevelQuery() = default; + + DEFVISITABLE(QueryVisitor); + cpp<#) + (:private + #>cpp + friend class AstStorage; + cpp<#) + (:serialize (:slk)) + (:clone)) + (lcp:pop-namespace) ;; namespace query diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index 4523fd093..9d55f6b12 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -77,6 +77,7 @@ class LockPathQuery; class LoadCsv; class FreeMemoryQuery; class TriggerQuery; +class IsolationLevelQuery; using TreeCompositeVisitor = ::utils::CompositeVisitor< SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator, AndOperator, NotOperator, AdditionOperator, @@ -110,6 +111,6 @@ class ExpressionVisitor template class QueryVisitor : public ::utils::Visitor {}; + FreeMemoryQuery, TriggerQuery, IsolationLevelQuery> {}; } // namespace query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 51cb93305..aaab87a4f 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -412,6 +412,33 @@ antlrcpp::Any CypherMainVisitor::visitShowTriggers(MemgraphCypher::ShowTriggersC return trigger_query; } +antlrcpp::Any CypherMainVisitor::visitIsolationLevelQuery(MemgraphCypher::IsolationLevelQueryContext *ctx) { + auto *isolation_level_query = storage_->Create(); + + isolation_level_query->isolation_level_scope_ = [scope = ctx->isolationLevelScope()]() { + if (scope->GLOBAL()) { + return IsolationLevelQuery::IsolationLevelScope::GLOBAL; + } + if (scope->SESSION()) { + return IsolationLevelQuery::IsolationLevelScope::SESSION; + } + return IsolationLevelQuery::IsolationLevelScope::NEXT; + }(); + + isolation_level_query->isolation_level_ = [level = ctx->isolationLevel()]() { + if (level->SNAPSHOT()) { + return IsolationLevelQuery::IsolationLevel::SNAPSHOT_ISOLATION; + } + if (level->COMMITTED()) { + return IsolationLevelQuery::IsolationLevel::READ_COMMITTED; + } + return IsolationLevelQuery::IsolationLevel::READ_UNCOMMITTED; + }(); + + query_ = isolation_level_query; + return isolation_level_query; +} + antlrcpp::Any CypherMainVisitor::visitCypherUnion(MemgraphCypher::CypherUnionContext *ctx) { bool distinct = !ctx->ALL(); auto *cypher_union = storage_->Create(distinct); @@ -844,6 +871,7 @@ antlrcpp::Any CypherMainVisitor::visitPrivilege(MemgraphCypher::PrivilegeContext if (ctx->READ_FILE()) return AuthQuery::Privilege::READ_FILE; if (ctx->FREE_MEMORY()) return AuthQuery::Privilege::FREE_MEMORY; if (ctx->TRIGGER()) return AuthQuery::Privilege::TRIGGER; + if (ctx->CONFIG()) return AuthQuery::Privilege::CONFIG; LOG_FATAL("Should not get here - unknown privilege!"); } diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index ca97ee02a..799f74986 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -238,6 +238,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitShowTriggers(MemgraphCypher::ShowTriggersContext *ctx) override; + /** + * @return IsolationLevelQuery* + */ + antlrcpp::Any visitIsolationLevelQuery(MemgraphCypher::IsolationLevelQueryContext *ctx) override; + /** * @return CypherUnion* */ diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index a3c578631..e90157503 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -14,8 +14,10 @@ memgraphCypherKeyword : cypherKeyword | BAD | BEFORE | CLEAR + | CONFIG | CSV | COMMIT + | COMMITTED | DATA | DELIMITER | DATABASE @@ -26,17 +28,22 @@ memgraphCypherKeyword : cypherKeyword | FOR | FREE | FROM + | GLOBAL | GRANT | HEADER | IDENTIFIED + | ISOLATION + | LEVEL | LOAD | LOCK | MAIN | MODE + | NEXT | NO | PASSWORD | PORT | PRIVILEGES + | READ | REGISTER | REPLICA | REPLICAS @@ -45,12 +52,16 @@ memgraphCypherKeyword : cypherKeyword | ROLE | ROLES | QUOTE + | SESSION + | SNAPSHOT | STATS | SYNC + | TRANSACTION | TRIGGER | TRIGGERS | TIMEOUT | TO + | UNCOMMITTED | UNLOCK | UPDATE | USER @@ -74,6 +85,7 @@ query : cypherQuery | lockPathQuery | freeMemoryQuery | triggerQuery + | isolationLevelQuery ; authQuery : createRole @@ -175,6 +187,7 @@ privilege : CREATE | READ_FILE | FREE_MEMORY | TRIGGER + | CONFIG ; privilegeList : privilege ( ',' privilege )* ; @@ -222,3 +235,9 @@ createTrigger : CREATE TRIGGER triggerName ( ON ( emptyVertex | emptyEdge ) ? ( dropTrigger : DROP TRIGGER triggerName ; showTriggers : SHOW TRIGGERS ; + +isolationLevel : SNAPSHOT ISOLATION | READ COMMITTED | READ UNCOMMITTED ; + +isolationLevelScope : GLOBAL | SESSION | NEXT ; + +isolationLevelQuery : SET isolationLevelScope TRANSACTION ISOLATION LEVEL isolationLevel ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 index 5fdea9f31..37d8cafc1 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 @@ -20,6 +20,8 @@ BAD : B A D ; BEFORE : B E F O R E ; CLEAR : C L E A R ; COMMIT : C O M M I T ; +COMMITTED : C O M M I T T E D ; +CONFIG : C O N F I G ; CSV : C S V ; DATA : D A T A ; DELIMITER : D E L I M I T E R ; @@ -33,20 +35,25 @@ FOR : F O R ; FREE : F R E E ; FREE_MEMORY : F R E E UNDERSCORE M E M O R Y ; FROM : F R O M ; +GLOBAL : G L O B A L ; GRANT : G R A N T ; GRANTS : G R A N T S ; HEADER : H E A D E R ; IDENTIFIED : I D E N T I F I E D ; IGNORE : I G N O R E ; +ISOLATION : I S O L A T I O N ; +LEVEL : L E V E L ; LOAD : L O A D ; LOCK : L O C K ; LOCK_PATH : L O C K UNDERSCORE P A T H ; MAIN : M A I N ; MODE : M O D E ; +NEXT : N E X T ; NO : N O ; PASSWORD : P A S S W O R D ; PORT : P O R T ; PRIVILEGES : P R I V I L E G E S ; +READ : R E A D ; READ_FILE : R E A D UNDERSCORE F I L E ; REGISTER : R E G I S T E R ; REPLICA : R E P L I C A ; @@ -56,12 +63,16 @@ REVOKE : R E V O K E ; ROLE : R O L E ; ROLES : R O L E S ; QUOTE : Q U O T E ; +SESSION : S E S S I O N ; +SNAPSHOT : S N A P S H O T ; STATS : S T A T S ; SYNC : S Y N C ; TIMEOUT : T I M E O U T ; TO : T O ; +TRANSACTION : T R A N S A C T I O N ; TRIGGER : T R I G G E R ; TRIGGERS : T R I G G E R S ; +UNCOMMITTED : U N C O M M I T T E D ; UNLOCK : U N L O C K ; UPDATE : U P D A T E ; USER : U S E R ; diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index 0f16bff56..49e78c4bf 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -57,6 +57,8 @@ class PrivilegeExtractor : public QueryVisitor, public HierarchicalTreeVis void Visit(ReplicationQuery &replication_query) override { AddPrivilege(AuthQuery::Privilege::REPLICATION); } + void Visit(IsolationLevelQuery &isolation_level_query) override { AddPrivilege(AuthQuery::Privilege::CONFIG); } + bool PreVisit(Create & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::CREATE); return false; diff --git a/src/query/frontend/stripped_lexer_constants.hpp b/src/query/frontend/stripped_lexer_constants.hpp index be388d708..5a03c9cf9 100644 --- a/src/query/frontend/stripped_lexer_constants.hpp +++ b/src/query/frontend/stripped_lexer_constants.hpp @@ -79,17 +79,19 @@ class Trie { const int kBitsetSize = 65536; const trie::Trie kKeywords = { - "union", "all", "optional", "match", "unwind", "as", "merge", "on", - "create", "set", "detach", "delete", "remove", "with", "distinct", "return", - "order", "by", "skip", "limit", "ascending", "asc", "descending", "desc", - "where", "or", "xor", "and", "not", "in", "starts", "ends", - "contains", "is", "null", "case", "when", "then", "else", "end", - "count", "filter", "extract", "any", "none", "single", "true", "false", - "reduce", "coalesce", "user", "password", "alter", "drop", "show", "stats", - "unique", "explain", "profile", "storage", "index", "info", "exists", "assert", - "constraint", "node", "key", "dump", "database", "call", "yield", "memory", - "mb", "kb", "unlimited", "free", "procedure", "query", "free_memory", "read_file", - "lock_path", "after", "before", "execute", "transaction", "trigger", "triggers", "update"}; + "union", "all", "optional", "match", "unwind", "as", "merge", "on", + "create", "set", "detach", "delete", "remove", "with", "distinct", "return", + "order", "by", "skip", "limit", "ascending", "asc", "descending", "desc", + "where", "or", "xor", "and", "not", "in", "starts", "ends", + "contains", "is", "null", "case", "when", "then", "else", "end", + "count", "filter", "extract", "any", "none", "single", "true", "false", + "reduce", "coalesce", "user", "password", "alter", "drop", "show", "stats", + "unique", "explain", "profile", "storage", "index", "info", "exists", "assert", + "constraint", "node", "key", "dump", "database", "call", "yield", "memory", + "mb", "kb", "unlimited", "free", "procedure", "query", "free_memory", "read_file", + "lock_path", "after", "before", "execute", "transaction", "trigger", "triggers", "update", + "comitted", "uncomitted", "global", "isolation", "level", "next", "read", "session", + "snapshot", "transaction"}; // Unicode codepoints that are allowed at the start of the unescaped name. const std::bitset kUnescapedNameAllowedStarts( diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 39432f716..69bc4e3d6 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -622,7 +622,8 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) in_explicit_transaction_ = true; expect_rollback_ = false; - db_accessor_ = std::make_unique(interpreter_context_->db->Access()); + db_accessor_ = + std::make_unique(interpreter_context_->db->Access(GetIsolationLevelOverride())); execution_db_accessor_.emplace(db_accessor_.get()); if (interpreter_context_->trigger_store->HasTriggers()) { @@ -1162,6 +1163,50 @@ PreparedQuery PrepareTriggerQuery(ParsedQuery parsed_query, const bool in_explic // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) } +constexpr auto ToStorageIsolationLevel(const IsolationLevelQuery::IsolationLevel isolation_level) noexcept { + switch (isolation_level) { + case IsolationLevelQuery::IsolationLevel::SNAPSHOT_ISOLATION: + return storage::IsolationLevel::SNAPSHOT_ISOLATION; + case IsolationLevelQuery::IsolationLevel::READ_COMMITTED: + return storage::IsolationLevel::READ_COMMITTED; + case IsolationLevelQuery::IsolationLevel::READ_UNCOMMITTED: + return storage::IsolationLevel::READ_UNCOMMITTED; + } +} + +PreparedQuery PrepareIsolationLevelQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, + InterpreterContext *interpreter_context, Interpreter *interpreter) { + if (in_explicit_transaction) { + throw IsolationLevelModificationInMulticommandTxException(); + } + + auto *isolation_level_query = utils::Downcast(parsed_query.query); + MG_ASSERT(isolation_level_query); + + const auto isolation_level = ToStorageIsolationLevel(isolation_level_query->isolation_level_); + + auto callback = [isolation_level_query, isolation_level, interpreter_context, + interpreter]() -> std::function { + switch (isolation_level_query->isolation_level_scope_) { + case IsolationLevelQuery::IsolationLevelScope::GLOBAL: + return [interpreter_context, isolation_level] { interpreter_context->db->SetIsolationLevel(isolation_level); }; + case IsolationLevelQuery::IsolationLevelScope::SESSION: + return [interpreter, isolation_level] { interpreter->SetSessionIsolationLevel(isolation_level); }; + case IsolationLevelQuery::IsolationLevelScope::NEXT: + return [interpreter, isolation_level] { interpreter->SetNextTransactionIsolationLevel(isolation_level); }; + } + }(); + + return PreparedQuery{ + {}, + std::move(parsed_query.required_privileges), + [callback = std::move(callback)](AnyStream *stream, std::optional n) -> std::optional { + callback(); + return QueryHandlerResult::COMMIT; + }, + RWType::NONE}; +} + PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, InterpreterContext *interpreter_context, storage::Storage *db, utils::MemoryResource *execution_memory) { @@ -1452,7 +1497,8 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, (utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query))) { - db_accessor_ = std::make_unique(interpreter_context_->db->Access()); + db_accessor_ = + std::make_unique(interpreter_context_->db->Access(GetIsolationLevelOverride())); execution_db_accessor_.emplace(db_accessor_.get()); if (utils::Downcast(parsed_query.query) && interpreter_context_->trigger_store->HasTriggers()) { @@ -1503,6 +1549,9 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareTriggerQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_, &*execution_db_accessor_, params); + } else if (utils::Downcast(parsed_query.query)) { + prepared_query = + PrepareIsolationLevelQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_, this); } else { LOG_FATAL("Should not get here -- unknown query type!"); } @@ -1690,4 +1739,22 @@ void Interpreter::AbortCommand(std::unique_ptr *query_execution) } } +std::optional Interpreter::GetIsolationLevelOverride() { + if (next_transaction_isolation_level) { + const auto isolation_level = *next_transaction_isolation_level; + next_transaction_isolation_level.reset(); + return isolation_level; + } + + return interpreter_isolation_level; +} + +void Interpreter::SetNextTransactionIsolationLevel(const storage::IsolationLevel isolation_level) { + next_transaction_isolation_level.emplace(isolation_level); +} + +void Interpreter::SetSessionIsolationLevel(const storage::IsolationLevel isolation_level) { + interpreter_isolation_level.emplace(isolation_level); +} + } // namespace query diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 87bec040d..d68d9cee6 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -15,6 +15,7 @@ #include "query/stream.hpp" #include "query/trigger.hpp" #include "query/typed_value.hpp" +#include "storage/v2/isolation_level.hpp" #include "utils/event_counter.hpp" #include "utils/logging.hpp" #include "utils/memory.hpp" @@ -254,6 +255,9 @@ class Interpreter final { void RollbackTransaction(); + void SetNextTransactionIsolationLevel(storage::IsolationLevel isolation_level); + void SetSessionIsolationLevel(storage::IsolationLevel isolation_level); + /** * Abort the current multicommand transaction. */ @@ -306,10 +310,14 @@ class Interpreter final { bool in_explicit_transaction_{false}; bool expect_rollback_{false}; + std::optional interpreter_isolation_level; + std::optional next_transaction_isolation_level; + PreparedQuery PrepareTransactionQuery(std::string_view query_upper); void Commit(); void AdvanceCommand(); void AbortCommand(std::unique_ptr *query_execution); + std::optional GetIsolationLevelOverride(); size_t ActiveQueryExecutions() { return std::count_if(query_executions_.begin(), query_executions_.end(), diff --git a/src/storage/v2/config.hpp b/src/storage/v2/config.hpp index 42f9ebee1..5c3bab2ca 100644 --- a/src/storage/v2/config.hpp +++ b/src/storage/v2/config.hpp @@ -3,6 +3,8 @@ #include #include #include +#include "storage/v2/isolation_level.hpp" +#include "storage/v2/transaction.hpp" namespace storage { @@ -38,6 +40,10 @@ struct Config { bool snapshot_on_exit{false}; } durability; + + struct Transaction { + IsolationLevel isolation_level{IsolationLevel::SNAPSHOT_ISOLATION}; + } transaction; }; } // namespace storage diff --git a/src/storage/v2/isolation_level.hpp b/src/storage/v2/isolation_level.hpp new file mode 100644 index 000000000..42bf8f421 --- /dev/null +++ b/src/storage/v2/isolation_level.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace storage { + +enum class IsolationLevel : std::uint8_t { SNAPSHOT_ISOLATION, READ_COMMITTED, READ_UNCOMMITTED }; + +} // namespace storage diff --git a/src/storage/v2/mvcc.hpp b/src/storage/v2/mvcc.hpp index 211fc87af..1c449fa6b 100644 --- a/src/storage/v2/mvcc.hpp +++ b/src/storage/v2/mvcc.hpp @@ -24,8 +24,18 @@ inline void ApplyDeltasForRead(Transaction *transaction, const Delta *delta, Vie auto ts = delta->timestamp->load(std::memory_order_acquire); auto cid = delta->command_id; - // This is a committed change that we see so we shouldn't undo it. - if (ts < transaction->start_timestamp) { + // For SNAPSHOT ISOLATION -> we can only see the changes which were committed before the start of the current + // transaction + // + // For READ COMMITTED -> we can only see the changes which are committed. Commit timestamps of + // uncommitted changes are set to the transaction id of the transaction that made the change. Transaction id is + // always higher than start or commit timestamps so we know if the timestamp is lower than the initial transaction + // id value, that the change is committed. + // + // For READ UNCOMMITTED -> we accept any change. + if ((transaction->isolation_level == IsolationLevel::SNAPSHOT_ISOLATION && ts < transaction->start_timestamp) || + (transaction->isolation_level == IsolationLevel::READ_COMMITTED && ts < kTransactionInitialId) || + (transaction->isolation_level == IsolationLevel::READ_UNCOMMITTED)) { break; } diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index 8856d6628..fae4c710b 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -286,6 +286,7 @@ bool VerticesIterable::Iterator::operator==(const Iterator &other) const { Storage::Storage(Config config) : indices_(&constraints_, config.items), + isolation_level_(config.transaction.isolation_level), config_(config), snapshot_directory_(config_.durability.storage_directory / durability::kSnapshotDirectory), wal_directory_(config_.durability.storage_directory / durability::kWalDirectory), @@ -394,13 +395,13 @@ Storage::~Storage() { } } -Storage::Accessor::Accessor(Storage *storage) +Storage::Accessor::Accessor(Storage *storage, IsolationLevel isolation_level) : storage_(storage), // The lock must be acquired before creating the transaction object to // prevent freshly created transactions from dangling in an active state // during exclusive operations. storage_guard_(storage_->main_lock_), - transaction_(storage->CreateTransaction()), + transaction_(storage->CreateTransaction(isolation_level)), is_transaction_active_(true), config_(storage->config_.items) {} @@ -1227,7 +1228,7 @@ VerticesIterable Storage::Accessor::Vertices(LabelId label, PropertyId property, storage_->indices_.label_property_index.Vertices(label, property, lower_bound, upper_bound, view, &transaction_)); } -Transaction Storage::CreateTransaction() { +Transaction Storage::CreateTransaction(IsolationLevel isolation_level) { // We acquire the transaction engine lock here because we access (and // modify) the transaction engine variables (`transaction_id` and // `timestamp`) below. @@ -1248,7 +1249,7 @@ Transaction Storage::CreateTransaction() { start_timestamp = timestamp_++; } } - return {transaction_id, start_timestamp}; + return {transaction_id, start_timestamp, isolation_level}; } template @@ -1736,7 +1737,7 @@ void Storage::CreateSnapshot() { std::shared_lock storage_guard(main_lock_); // Create the transaction used to create the snapshot. - auto transaction = CreateTransaction(); + auto transaction = CreateTransaction(IsolationLevel::SNAPSHOT_ISOLATION); // Create snapshot. durability::CreateSnapshot(&transaction, snapshot_directory_, wal_directory_, @@ -1894,4 +1895,9 @@ std::vector Storage::ReplicasInfo() { }); } +void Storage::SetIsolationLevel(IsolationLevel isolation_level) { + std::unique_lock main_guard{main_lock_}; + isolation_level_ = isolation_level; +} + } // namespace storage diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index f61ff5546..fcb13c349 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -15,6 +15,7 @@ #include "storage/v2/edge.hpp" #include "storage/v2/edge_accessor.hpp" #include "storage/v2/indices.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/mvcc.hpp" #include "storage/v2/name_id_mapper.hpp" #include "storage/v2/result.hpp" @@ -184,7 +185,7 @@ class Storage final { private: friend class Storage; - explicit Accessor(Storage *storage); + explicit Accessor(Storage *storage, IsolationLevel isolation_level); public: Accessor(const Accessor &) = delete; @@ -322,7 +323,9 @@ class Storage final { Config::Items config_; }; - Accessor Access() { return Accessor{this}; } + Accessor Access(std::optional override_isolation_level = {}) { + return Accessor{this, override_isolation_level.value_or(isolation_level_)}; + } const std::string &LabelToName(LabelId label) const; const std::string &PropertyToName(PropertyId property) const; @@ -423,8 +426,10 @@ class Storage final { void FreeMemory(); + void SetIsolationLevel(IsolationLevel isolation_level); + private: - Transaction CreateTransaction(); + Transaction CreateTransaction(IsolationLevel isolation_level); /// The force parameter determines the behaviour of the garbage collector. /// If it's set to true, it will behave as a global operation, i.e. it can't @@ -485,6 +490,7 @@ class Storage final { std::optional commit_log_; utils::Synchronized, utils::SpinLock> committed_transactions_; + IsolationLevel isolation_level_; Config config_; utils::Scheduler gc_runner_; diff --git a/src/storage/v2/transaction.hpp b/src/storage/v2/transaction.hpp index 979b26da8..dbf029dba 100644 --- a/src/storage/v2/transaction.hpp +++ b/src/storage/v2/transaction.hpp @@ -9,6 +9,7 @@ #include "storage/v2/delta.hpp" #include "storage/v2/edge.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/vertex.hpp" #include "storage/v2/view.hpp" @@ -19,8 +20,12 @@ const uint64_t kTimestampInitialId = 0; const uint64_t kTransactionInitialId = 1ULL << 63U; struct Transaction { - Transaction(uint64_t transaction_id, uint64_t start_timestamp) - : transaction_id(transaction_id), start_timestamp(start_timestamp), command_id(0), must_abort(false) {} + Transaction(uint64_t transaction_id, uint64_t start_timestamp, IsolationLevel isolation_level) + : transaction_id(transaction_id), + start_timestamp(start_timestamp), + command_id(0), + must_abort(false), + isolation_level(isolation_level) {} Transaction(Transaction &&other) noexcept : transaction_id(other.transaction_id), @@ -28,7 +33,8 @@ struct Transaction { commit_timestamp(std::move(other.commit_timestamp)), command_id(other.command_id), deltas(std::move(other.deltas)), - must_abort(other.must_abort) {} + must_abort(other.must_abort), + isolation_level(other.isolation_level) {} Transaction(const Transaction &) = delete; Transaction &operator=(const Transaction &) = delete; @@ -52,6 +58,7 @@ struct Transaction { uint64_t command_id; std::list deltas; bool must_abort; + IsolationLevel isolation_level; }; inline bool operator==(const Transaction &first, const Transaction &second) { diff --git a/tests/benchmark/expansion.cpp b/tests/benchmark/expansion.cpp index 3317a3158..5551f4829 100644 --- a/tests/benchmark/expansion.cpp +++ b/tests/benchmark/expansion.cpp @@ -4,6 +4,7 @@ #include "communication/result_stream_faker.hpp" #include "query/interpreter.hpp" #include "query/typed_value.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/storage.hpp" class ExpansionBenchFixture : public benchmark::Fixture { diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt index 6984f3530..1a14bd21a 100644 --- a/tests/e2e/CMakeLists.txt +++ b/tests/e2e/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(replication) add_subdirectory(memory) add_subdirectory(triggers) +add_subdirectory(isolation_levels) diff --git a/tests/e2e/isolation_levels/CMakeLists.txt b/tests/e2e/isolation_levels/CMakeLists.txt new file mode 100644 index 000000000..d2441aced --- /dev/null +++ b/tests/e2e/isolation_levels/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(memgraph__e2e__isolation_levels isolation_levels.cpp) +target_link_libraries(memgraph__e2e__isolation_levels gflags mgclient mg-utils mg-io Threads::Threads) diff --git a/tests/e2e/isolation_levels/isolation_levels.cpp b/tests/e2e/isolation_levels/isolation_levels.cpp new file mode 100644 index 000000000..df91c3e70 --- /dev/null +++ b/tests/e2e/isolation_levels/isolation_levels.cpp @@ -0,0 +1,241 @@ +#include +#include + +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); +DEFINE_uint64(timeout, 120, "Timeout seconds"); + +namespace { + +auto GetClient() { + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + MG_ASSERT(client, "Failed to connect!"); + + return client; +} + +auto GetVertexCount(std::unique_ptr &client) { + MG_ASSERT(client->Execute("MATCH (n) RETURN count(n)")); + auto maybe_row = client->FetchOne(); + MG_ASSERT(maybe_row, "Failed to fetch vertex count"); + + const auto &row = *maybe_row; + MG_ASSERT(row.size() == 1, "Got invalid result for vertex count"); + + client->FetchOne(); + return row[0].ValueInt(); +} + +void CleanDatabase() { + auto client = GetClient(); + MG_ASSERT(client->Execute("MATCH (n) DETACH DELETE n;")); + client->DiscardAll(); +} + +void TestSnapshotIsolation(std::unique_ptr &client) { + spdlog::info("Verifying SNAPSHOT ISOLATION"); + + auto creator = GetClient(); + + MG_ASSERT(client->BeginTransaction()); + MG_ASSERT(creator->BeginTransaction()); + + constexpr auto vertex_count = 10; + for (size_t i = 0; i < vertex_count; ++i) { + MG_ASSERT(creator->Execute("CREATE ()")); + creator->DiscardAll(); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == 0, + "Invalid number of vertices found for SNAPSHOT ISOLATION (found {}, expected {}). Read vertices from a " + "transaction which started " + "at a later point.", + current_vertex_count, 0); + } + + MG_ASSERT(creator->CommitTransaction()); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == 0, + "Invalid number of vertices found for SNAPSHOT ISOLATION (found {}, expected {}). Read vertices from a " + "transaction which started " + "at a later point.", + current_vertex_count, 0); + MG_ASSERT(client->CommitTransaction()); + CleanDatabase(); +} + +void TestReadCommitted(std::unique_ptr &client) { + spdlog::info("Verifying READ COMMITTED"); + + auto creator = GetClient(); + + MG_ASSERT(client->BeginTransaction()); + MG_ASSERT(creator->BeginTransaction()); + + constexpr auto vertex_count = 10; + for (size_t i = 0; i < vertex_count; ++i) { + MG_ASSERT(creator->Execute("CREATE ()")); + creator->DiscardAll(); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == 0, + "Invalid number of vertices found for READ COMMITTED (found {}, expected {}. Read vertices from a " + "transaction which is not " + "committed.", + current_vertex_count, 0); + } + + MG_ASSERT(creator->CommitTransaction()); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == vertex_count, + "Invalid number of vertices found for READ COMMITTED (found {}, expected {}). Failed to read vertices " + "from a committed transaction", + current_vertex_count, vertex_count); + MG_ASSERT(client->CommitTransaction()); + CleanDatabase(); +} + +void TestReadUncommitted(std::unique_ptr &client) { + spdlog::info("Verifying READ UNCOMMITTED"); + + auto creator = GetClient(); + + MG_ASSERT(client->BeginTransaction()); + MG_ASSERT(creator->BeginTransaction()); + + constexpr auto vertex_count = 10; + for (size_t i = 1; i <= vertex_count; ++i) { + MG_ASSERT(creator->Execute("CREATE ()")); + creator->DiscardAll(); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == i, + "Invalid number of vertices found for READ UNCOMMITTED (found {}, expected {}). Failed to read vertices " + "from a different transaction.", + current_vertex_count, i); + } + + MG_ASSERT(creator->CommitTransaction()); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == vertex_count, + "Invalid number of vertices found for READ UNCOMMITTED (found {}, expected {}). Failed to read vertices " + "from a different transaction", + current_vertex_count, vertex_count); + MG_ASSERT(client->CommitTransaction()); + CleanDatabase(); +} + +constexpr std::array isolation_levels{std::pair{"SNAPSHOT ISOLATION", &TestSnapshotIsolation}, + std::pair{"READ COMMITTED", &TestReadCommitted}, + std::pair{"READ UNCOMMITTED", &TestReadUncommitted}}; + +void TestGlobalIsolationLevel() { + spdlog::info("\n\n----Test global isolation levels----\n"); + auto first_client = GetClient(); + auto second_client = GetClient(); + + for (const auto &[isolation_level, verification_function] : isolation_levels) { + spdlog::info("--------------------------"); + spdlog::info("Setting global isolation level to {}", isolation_level); + MG_ASSERT(first_client->Execute(fmt::format("SET GLOBAL TRANSACTION ISOLATION LEVEL {}", isolation_level))); + first_client->DiscardAll(); + + verification_function(first_client); + verification_function(second_client); + spdlog::info("--------------------------\n"); + } +} + +void TestSessionIsolationLevel() { + spdlog::info("\n\n----Test session isolation levels----\n"); + + auto global_client = GetClient(); + auto session_client = GetClient(); + for (const auto &[global_isolation_level, global_verification_function] : isolation_levels) { + spdlog::info("Setting global isolation level to {}", global_isolation_level); + MG_ASSERT(global_client->Execute(fmt::format("SET GLOBAL TRANSACTION ISOLATION LEVEL {}", global_isolation_level))); + global_client->DiscardAll(); + + for (const auto &[session_isolation_level, session_verification_function] : isolation_levels) { + spdlog::info("--------------------------"); + spdlog::info("Setting session isolation level to {}", session_isolation_level); + MG_ASSERT( + session_client->Execute(fmt::format("SET SESSION TRANSACTION ISOLATION LEVEL {}", session_isolation_level))); + session_client->DiscardAll(); + + spdlog::info("Verifying client which is using global isolation level"); + global_verification_function(global_client); + spdlog::info("Verifying client which is using session isolation level"); + session_verification_function(session_client); + spdlog::info("--------------------------\n"); + } + } +} + +// Priority of applying the isolation level from highest priority NEXT -> SESSION -> GLOBAL +void TestNextIsolationLevel() { + spdlog::info("\n\n----Test next isolation levels----\n"); + + auto global_client = GetClient(); + auto session_client = GetClient(); + for (const auto &[global_isolation_level, global_verification_function] : isolation_levels) { + spdlog::info("Setting global isolation level to {}", global_isolation_level); + MG_ASSERT(global_client->Execute(fmt::format("SET GLOBAL TRANSACTION ISOLATION LEVEL {}", global_isolation_level))); + global_client->DiscardAll(); + + for (const auto &[session_isolation_level, session_verification_function] : isolation_levels) { + spdlog::info("Setting session isolation level to {}", session_isolation_level); + MG_ASSERT( + session_client->Execute(fmt::format("SET SESSION TRANSACTION ISOLATION LEVEL {}", session_isolation_level))); + session_client->DiscardAll(); + + for (const auto &[next_isolation_level, next_verification_function] : isolation_levels) { + spdlog::info("--------------------------"); + spdlog::info("Verifying client which is using global isolation level"); + global_verification_function(global_client); + spdlog::info("Verifying client which is using session isolation level"); + session_verification_function(session_client); + + spdlog::info("Setting isolation level of the next transaction to {}", next_isolation_level); + MG_ASSERT(global_client->Execute(fmt::format("SET NEXT TRANSACTION ISOLATION LEVEL {}", next_isolation_level))); + global_client->DiscardAll(); + MG_ASSERT( + session_client->Execute(fmt::format("SET NEXT TRANSACTION ISOLATION LEVEL {}", next_isolation_level))); + session_client->DiscardAll(); + + spdlog::info("Verifying client which is using global isolation level while next isolation level is set"); + next_verification_function(global_client); + spdlog::info("Verifying client which is using session isolation level while next isolation level is set"); + next_verification_function(session_client); + + spdlog::info("Verifying client which is using global isolation level after the next isolation level was used"); + global_verification_function(global_client); + spdlog::info("Verifying client which is using session isolation level after the next isolation level was used"); + session_verification_function(session_client); + spdlog::info("--------------------------\n"); + } + } + } +} + +} // namespace + +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph E2E Isolation Levels"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + TestGlobalIsolationLevel(); + TestSessionIsolationLevel(); + TestNextIsolationLevel(); + + return 0; +} diff --git a/tests/e2e/isolation_levels/workloads.yaml b/tests/e2e/isolation_levels/workloads.yaml new file mode 100644 index 000000000..5d793396e --- /dev/null +++ b/tests/e2e/isolation_levels/workloads.yaml @@ -0,0 +1,14 @@ +bolt_port: &bolt_port "7687" +template_cluster: &template_cluster + cluster: + main: + args: ["--bolt-port", *bolt_port, "--log-level=TRACE"] + log_file: "isolation-levels-e2e.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Isolation levels" + binary: "tests/e2e/isolation_levels/memgraph__e2e__isolation_levels" + args: ["--bolt-port", *bolt_port] + <<: *template_cluster diff --git a/tests/manual/single_query.cpp b/tests/manual/single_query.cpp index ee60006ed..471b7779a 100644 --- a/tests/manual/single_query.cpp +++ b/tests/manual/single_query.cpp @@ -1,5 +1,6 @@ #include "communication/result_stream_faker.hpp" #include "query/interpreter.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/storage.hpp" #include "utils/on_scope_exit.hpp" diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 1adda7f54..5f61495a4 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -279,6 +279,9 @@ target_link_libraries(${test_prefix}storage_v2_wal_file mg-storage-v2 fmt) add_unit_test(storage_v2_replication.cpp) target_link_libraries(${test_prefix}storage_v2_replication mg-storage-v2 fmt) +add_unit_test(storage_v2_isolation_level.cpp) +target_link_libraries(${test_prefix}storage_v2_isolation_level mg-storage-v2) + # Test mg-auth if (MG_ENTERPRISE) diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 9cb70fd99..fd673c3f4 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -2065,6 +2065,8 @@ TEST_P(CypherMainVisitorTest, GrantPrivilege) { {AuthQuery::Privilege::FREE_MEMORY}); check_auth_query(&ast_generator, "GRANT TRIGGER TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, {AuthQuery::Privilege::TRIGGER}); + check_auth_query(&ast_generator, "GRANT CONFIG TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::CONFIG}); } TEST_P(CypherMainVisitorTest, DenyPrivilege) { @@ -3129,20 +3131,20 @@ TEST_P(CypherMainVisitorTest, CreateTriggers) { const auto *query_template = "CREATE TRIGGER trigger {} {} COMMIT EXECUTE {}"; - std::array events{std::pair{"", query::TriggerQuery::EventType::ANY}, - std::pair{"ON CREATE", query::TriggerQuery::EventType::CREATE}, - std::pair{"ON () CREATE", query::TriggerQuery::EventType::VERTEX_CREATE}, - std::pair{"ON --> CREATE", query::TriggerQuery::EventType::EDGE_CREATE}, - std::pair{"ON DELETE", query::TriggerQuery::EventType::DELETE}, - std::pair{"ON () DELETE", query::TriggerQuery::EventType::VERTEX_DELETE}, - std::pair{"ON --> DELETE", query::TriggerQuery::EventType::EDGE_DELETE}, - std::pair{"ON UPDATE", query::TriggerQuery::EventType::UPDATE}, - std::pair{"ON () UPDATE", query::TriggerQuery::EventType::VERTEX_UPDATE}, - std::pair{"ON --> UPDATE", query::TriggerQuery::EventType::EDGE_UPDATE}}; + constexpr std::array events{std::pair{"", query::TriggerQuery::EventType::ANY}, + std::pair{"ON CREATE", query::TriggerQuery::EventType::CREATE}, + std::pair{"ON () CREATE", query::TriggerQuery::EventType::VERTEX_CREATE}, + std::pair{"ON --> CREATE", query::TriggerQuery::EventType::EDGE_CREATE}, + std::pair{"ON DELETE", query::TriggerQuery::EventType::DELETE}, + std::pair{"ON () DELETE", query::TriggerQuery::EventType::VERTEX_DELETE}, + std::pair{"ON --> DELETE", query::TriggerQuery::EventType::EDGE_DELETE}, + std::pair{"ON UPDATE", query::TriggerQuery::EventType::UPDATE}, + std::pair{"ON () UPDATE", query::TriggerQuery::EventType::VERTEX_UPDATE}, + std::pair{"ON --> UPDATE", query::TriggerQuery::EventType::EDGE_UPDATE}}; - std::array phases{"BEFORE", "AFTER"}; + constexpr std::array phases{"BEFORE", "AFTER"}; - std::array statements{ + constexpr std::array statements{ "", "SOME SUPER\nSTATEMENT", "Statement with 12312321 3 ", " Statement with 12312321 3 " }; @@ -3157,4 +3159,43 @@ TEST_P(CypherMainVisitorTest, CreateTriggers) { } } +namespace { +void ValidateSetIsolationLevelQuery(Base &ast_generator, const auto &query, const auto scope, + const auto isolation_level) { + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(query)); + EXPECT_EQ(parsed_query->isolation_level_scope_, scope); + EXPECT_EQ(parsed_query->isolation_level_, isolation_level); +} +} // namespace + +TEST_P(CypherMainVisitorTest, SetIsolationLevelQuery) { + auto &ast_generator = *GetParam(); + TestInvalidQuery("SET ISO", ast_generator); + TestInvalidQuery("SET TRANSACTION ISOLATION", ast_generator); + TestInvalidQuery("SET TRANSACTION ISOLATION LEVEL", ast_generator); + TestInvalidQuery("SET TRANSACTION ISOLATION LEVEL READ COMMITTED", ast_generator); + TestInvalidQuery("SET NEXT TRANSACTION ISOLATION LEVEL", ast_generator); + TestInvalidQuery("SET ISOLATION LEVEL READ COMMITTED", ast_generator); + TestInvalidQuery("SET GLOBAL ISOLATION LEVEL READ COMMITTED", ast_generator); + TestInvalidQuery("SET GLOBAL TRANSACTION ISOLATION LEVEL READ COMITTED", ast_generator); + TestInvalidQuery("SET GLOBAL TRANSACTION ISOLATION LEVEL READ_COMITTED", ast_generator); + TestInvalidQuery("SET SESSION TRANSACTION ISOLATION LEVEL READCOMITTED", ast_generator); + + constexpr std::array scopes{std::pair{"GLOBAL", query::IsolationLevelQuery::IsolationLevelScope::GLOBAL}, + std::pair{"SESSION", query::IsolationLevelQuery::IsolationLevelScope::SESSION}, + std::pair{"NEXT", query::IsolationLevelQuery::IsolationLevelScope::NEXT}}; + constexpr std::array isolation_levels{ + std::pair{"READ UNCOMMITTED", query::IsolationLevelQuery::IsolationLevel::READ_UNCOMMITTED}, + std::pair{"READ COMMITTED", query::IsolationLevelQuery::IsolationLevel::READ_COMMITTED}, + std::pair{"SNAPSHOT ISOLATION", query::IsolationLevelQuery::IsolationLevel::SNAPSHOT_ISOLATION}}; + + constexpr const auto *query_template = "SET {} TRANSACTION ISOLATION LEVEL {}"; + + for (const auto &[scope_string, scope] : scopes) { + for (const auto &[isolation_level_string, isolation_level] : isolation_levels) { + ValidateSetIsolationLevelQuery(ast_generator, fmt::format(query_template, scope_string, isolation_level_string), + scope, isolation_level); + } + } +} } // namespace diff --git a/tests/unit/interpreter.cpp b/tests/unit/interpreter.cpp index 194c4c815..70b92fbca 100644 --- a/tests/unit/interpreter.cpp +++ b/tests/unit/interpreter.cpp @@ -11,6 +11,7 @@ #include "query/stream.hpp" #include "query/typed_value.hpp" #include "query_common.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/property_value.hpp" #include "utils/csv_parsing.hpp" #include "utils/logging.hpp" diff --git a/tests/unit/query_required_privileges.cpp b/tests/unit/query_required_privileges.cpp index 09fee4c0d..8ce38ee55 100644 --- a/tests/unit/query_required_privileges.cpp +++ b/tests/unit/query_required_privileges.cpp @@ -154,3 +154,8 @@ TEST_F(TestPrivilegeExtractor, TriggerQuery) { auto *query = storage.Create(); EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::TRIGGER)); } + +TEST_F(TestPrivilegeExtractor, SetIsolationLevelQuery) { + auto *query = storage.Create(); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::CONFIG)); +} diff --git a/tests/unit/storage_v2_isolation_level.cpp b/tests/unit/storage_v2_isolation_level.cpp new file mode 100644 index 000000000..f80bc2ef2 --- /dev/null +++ b/tests/unit/storage_v2_isolation_level.cpp @@ -0,0 +1,98 @@ +#include + +#include "storage/v2/isolation_level.hpp" +#include "storage/v2/storage.hpp" + +namespace { +int64_t VerticesCount(storage::Storage::Accessor &accessor) { + int64_t count{0}; + for ([[maybe_unused]] const auto &vertex : accessor.Vertices(storage::View::NEW)) { + ++count; + } + + return count; +} + +constexpr std::array isolation_levels{storage::IsolationLevel::SNAPSHOT_ISOLATION, + storage::IsolationLevel::READ_COMMITTED, + storage::IsolationLevel::READ_UNCOMMITTED}; + +std::string_view IsolationLevelToString(const storage::IsolationLevel isolation_level) { + switch (isolation_level) { + case storage::IsolationLevel::SNAPSHOT_ISOLATION: + return "SNAPSHOT_ISOLATION"; + case storage::IsolationLevel::READ_COMMITTED: + return "READ_COMMITTED"; + case storage::IsolationLevel::READ_UNCOMMITTED: + return "READ_UNCOMMITTED"; + } +} +} // namespace + +class StorageIsolationLevelTest : public ::testing::TestWithParam { + public: + struct PrintToStringParamName { + std::string operator()(const testing::TestParamInfo &info) { + return std::string(IsolationLevelToString(static_cast(info.param))); + } + }; +}; + +TEST_P(StorageIsolationLevelTest, Visibility) { + const auto default_isolation_level = GetParam(); + + for (const auto override_isolation_level : isolation_levels) { + storage::Storage storage{storage::Config{.transaction = {.isolation_level = default_isolation_level}}}; + auto creator = storage.Access(); + auto default_isolation_level_reader = storage.Access(); + auto override_isolation_level_reader = storage.Access(override_isolation_level); + + ASSERT_EQ(VerticesCount(default_isolation_level_reader), 0); + ASSERT_EQ(VerticesCount(override_isolation_level_reader), 0); + + constexpr auto iteration_count = 10; + { + SCOPED_TRACE(fmt::format( + "Visibility while the creator transaction is active " + "(default isolation level = {}, override isolation level = {})", + IsolationLevelToString(default_isolation_level), IsolationLevelToString(override_isolation_level))); + for (size_t i = 1; i <= iteration_count; ++i) { + creator.CreateVertex(); + + const auto check_vertices_count = [i](auto &accessor, const auto isolation_level) { + const auto expected_count = isolation_level == storage::IsolationLevel::READ_UNCOMMITTED ? i : 0; + EXPECT_EQ(VerticesCount(accessor), expected_count); + }; + check_vertices_count(default_isolation_level_reader, default_isolation_level); + check_vertices_count(override_isolation_level_reader, override_isolation_level); + } + } + + ASSERT_FALSE(creator.Commit().HasError()); + { + SCOPED_TRACE(fmt::format( + "Visibility after the creator transaction is committed " + "(default isolation level = {}, override isolation level = {})", + IsolationLevelToString(default_isolation_level), IsolationLevelToString(override_isolation_level))); + const auto check_vertices_count = [iteration_count](auto &accessor, const auto isolation_level) { + const auto expected_count = + isolation_level == storage::IsolationLevel::SNAPSHOT_ISOLATION ? 0 : iteration_count; + ASSERT_EQ(VerticesCount(accessor), expected_count); + }; + + check_vertices_count(default_isolation_level_reader, default_isolation_level); + check_vertices_count(override_isolation_level_reader, override_isolation_level); + } + + ASSERT_FALSE(default_isolation_level_reader.Commit().HasError()); + ASSERT_FALSE(override_isolation_level_reader.Commit().HasError()); + + SCOPED_TRACE("Visibility after a new transaction is started"); + auto verifier = storage.Access(); + ASSERT_EQ(VerticesCount(verifier), iteration_count); + ASSERT_FALSE(verifier.Commit().HasError()); + } +} + +INSTANTIATE_TEST_CASE_P(ParameterizedStorageIsolationLevelTests, StorageIsolationLevelTest, + ::testing::ValuesIn(isolation_levels), StorageIsolationLevelTest::PrintToStringParamName()); diff --git a/tests/unit/storage_v2_wal_file.cpp b/tests/unit/storage_v2_wal_file.cpp index 34b48b77e..79e03f351 100644 --- a/tests/unit/storage_v2_wal_file.cpp +++ b/tests/unit/storage_v2_wal_file.cpp @@ -45,7 +45,9 @@ class DeltaGenerator final { private: friend class DeltaGenerator; - explicit Transaction(DeltaGenerator *gen) : gen_(gen), transaction_(gen->transaction_id_++, gen->timestamp_++) {} + explicit Transaction(DeltaGenerator *gen) + : gen_(gen), + transaction_(gen->transaction_id_++, gen->timestamp_++, storage::IsolationLevel::SNAPSHOT_ISOLATION) {} public: storage::Vertex *CreateVertex() { From 644a3a0b2a4c101b9177a47d45729d97709e458d Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Tue, 15 Jun 2021 13:21:05 +0200 Subject: [PATCH 61/63] Codeowners update (#173) * CODEOWNERS update * clang-tidy changes trigger the diff --- .github/workflows/diff.yaml | 3 ++- CODEOWNERS | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 4d75c0089..1c88bf766 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -5,7 +5,8 @@ on: paths-ignore: - 'docs/**' - '**/*.md' - - '.clang-*' + - '.clang-format' + - 'CODEOWNERS' jobs: community_build: diff --git a/CODEOWNERS b/CODEOWNERS index f871ca5f9..aed0ff4ed 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,4 +1 @@ -/docs/ @gitbuda -/src/communication/ @antonio2368 -/src/query/ @the-joksim -/src/storage/ @antonio2368 +* @gitbuda @antonio2368 @antaljanosbenjamin @kostasrim From cbf826e0c36fdde9f9f20a7364596c36bb67fb2c Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Wed, 16 Jun 2021 13:22:48 +0200 Subject: [PATCH 62/63] Load WAL on replica using transactions (#95) --- .../v2/replication/replication_server.cpp | 415 +++++++++--------- .../v2/replication/replication_server.hpp | 4 +- tests/jepsen/src/jepsen/memgraph/bank.clj | 4 +- .../jepsen/src/jepsen/memgraph/sequential.clj | 39 +- 4 files changed, 226 insertions(+), 236 deletions(-) diff --git a/src/storage/v2/replication/replication_server.cpp b/src/storage/v2/replication/replication_server.cpp index 495b03d04..503898ef1 100644 --- a/src/storage/v2/replication/replication_server.cpp +++ b/src/storage/v2/replication/replication_server.cpp @@ -1,12 +1,33 @@ #include "storage/v2/replication/replication_server.hpp" +#include +#include #include "storage/v2/durability/durability.hpp" +#include "storage/v2/durability/paths.hpp" +#include "storage/v2/durability/serialization.hpp" #include "storage/v2/durability/snapshot.hpp" +#include "storage/v2/durability/version.hpp" +#include "storage/v2/durability/wal.hpp" #include "storage/v2/replication/config.hpp" #include "storage/v2/transaction.hpp" #include "utils/exceptions.hpp" namespace storage { +namespace { +std::pair ReadDelta(durability::BaseDecoder *decoder) { + try { + auto timestamp = ReadWalDeltaHeader(decoder); + SPDLOG_INFO(" Timestamp {}", timestamp); + auto delta = ReadWalDeltaData(decoder); + return {timestamp, delta}; + } catch (const slk::SlkReaderException &) { + throw utils::BasicException("Missing data!"); + } catch (const durability::RecoveryFailure &) { + throw utils::BasicException("Invalid data!"); + } +}; +} // namespace + Storage::ReplicationServer::ReplicationServer(Storage *storage, io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config) : storage_(storage) { @@ -68,33 +89,6 @@ void Storage::ReplicationServer::AppendDeltasHandler(slk::Reader *req_reader, sl storage_->epoch_id_ = std::move(*maybe_epoch_id); } - const auto read_delta = [&]() -> std::pair { - try { - auto timestamp = ReadWalDeltaHeader(&decoder); - SPDLOG_INFO(" Timestamp {}", timestamp); - auto delta = ReadWalDeltaData(&decoder); - return {timestamp, delta}; - } catch (const slk::SlkReaderException &) { - throw utils::BasicException("Missing data!"); - } catch (const durability::RecoveryFailure &) { - throw utils::BasicException("Invalid data!"); - } - }; - - if (req.previous_commit_timestamp != storage_->last_commit_timestamp_.load()) { - // Empty the stream - bool transaction_complete = false; - while (!transaction_complete) { - SPDLOG_INFO("Skipping delta"); - const auto [timestamp, delta] = read_delta(); - transaction_complete = durability::IsWalDeltaDataTypeTransactionEnd(delta.type); - } - - AppendDeltasRes res{false, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); - return; - } - if (storage_->wal_file_) { if (req.seq_num > storage_->wal_file_->SequenceNumber() || *maybe_epoch_id != storage_->epoch_id_) { storage_->wal_file_->FinalizeWal(); @@ -108,6 +102,173 @@ void Storage::ReplicationServer::AppendDeltasHandler(slk::Reader *req_reader, sl storage_->wal_seq_num_ = req.seq_num; } + if (req.previous_commit_timestamp != storage_->last_commit_timestamp_.load()) { + // Empty the stream + bool transaction_complete = false; + while (!transaction_complete) { + SPDLOG_INFO("Skipping delta"); + const auto [timestamp, delta] = ReadDelta(&decoder); + transaction_complete = durability::IsWalDeltaDataTypeTransactionEnd(delta.type); + } + + AppendDeltasRes res{false, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); + return; + } + + ReadAndApplyDelta(&decoder); + + AppendDeltasRes res{true, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); +} + +void Storage::ReplicationServer::SnapshotHandler(slk::Reader *req_reader, slk::Builder *res_builder) { + SnapshotReq req; + slk::Load(&req, req_reader); + + replication::Decoder decoder(req_reader); + + utils::EnsureDirOrDie(storage_->snapshot_directory_); + + const auto maybe_snapshot_path = decoder.ReadFile(storage_->snapshot_directory_); + MG_ASSERT(maybe_snapshot_path, "Failed to load snapshot!"); + spdlog::info("Received snapshot saved to {}", *maybe_snapshot_path); + + std::unique_lock storage_guard(storage_->main_lock_); + // Clear the database + storage_->vertices_.clear(); + storage_->edges_.clear(); + + storage_->constraints_ = Constraints(); + storage_->indices_.label_index = LabelIndex(&storage_->indices_, &storage_->constraints_, storage_->config_.items); + storage_->indices_.label_property_index = + LabelPropertyIndex(&storage_->indices_, &storage_->constraints_, storage_->config_.items); + try { + spdlog::debug("Loading snapshot"); + auto recovered_snapshot = durability::LoadSnapshot(*maybe_snapshot_path, &storage_->vertices_, &storage_->edges_, + &storage_->epoch_history_, &storage_->name_id_mapper_, + &storage_->edge_count_, storage_->config_.items); + spdlog::debug("Snapshot loaded successfully"); + // If this step is present it should always be the first step of + // the recovery so we use the UUID we read from snasphost + storage_->uuid_ = std::move(recovered_snapshot.snapshot_info.uuid); + storage_->epoch_id_ = std::move(recovered_snapshot.snapshot_info.epoch_id); + const auto &recovery_info = recovered_snapshot.recovery_info; + storage_->vertex_id_ = recovery_info.next_vertex_id; + storage_->edge_id_ = recovery_info.next_edge_id; + storage_->timestamp_ = std::max(storage_->timestamp_, recovery_info.next_timestamp); + + durability::RecoverIndicesAndConstraints(recovered_snapshot.indices_constraints, &storage_->indices_, + &storage_->constraints_, &storage_->vertices_); + } catch (const durability::RecoveryFailure &e) { + LOG_FATAL("Couldn't load the snapshot because of: {}", e.what()); + } + storage_guard.unlock(); + + SnapshotRes res{true, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); + + // Delete other durability files + auto snapshot_files = durability::GetSnapshotFiles(storage_->snapshot_directory_, storage_->uuid_); + for (const auto &[path, uuid, _] : snapshot_files) { + if (path != *maybe_snapshot_path) { + storage_->file_retainer_.DeleteFile(path); + } + } + + auto wal_files = durability::GetWalFiles(storage_->wal_directory_, storage_->uuid_); + if (wal_files) { + for (const auto &wal_file : *wal_files) { + storage_->file_retainer_.DeleteFile(wal_file.path); + } + + storage_->wal_file_.reset(); + } +} + +void Storage::ReplicationServer::WalFilesHandler(slk::Reader *req_reader, slk::Builder *res_builder) { + WalFilesReq req; + slk::Load(&req, req_reader); + + const auto wal_file_number = req.file_number; + spdlog::debug("Received WAL files: {}", wal_file_number); + + replication::Decoder decoder(req_reader); + + utils::EnsureDirOrDie(storage_->wal_directory_); + + for (auto i = 0; i < wal_file_number; ++i) { + LoadWal(&decoder); + } + + WalFilesRes res{true, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); +} + +void Storage::ReplicationServer::CurrentWalHandler(slk::Reader *req_reader, slk::Builder *res_builder) { + CurrentWalReq req; + slk::Load(&req, req_reader); + + replication::Decoder decoder(req_reader); + + utils::EnsureDirOrDie(storage_->wal_directory_); + + LoadWal(&decoder); + + CurrentWalRes res{true, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); +} + +void Storage::ReplicationServer::LoadWal(replication::Decoder *decoder) { + const auto temp_wal_directory = std::filesystem::temp_directory_path() / "memgraph" / durability::kWalDirectory; + utils::EnsureDir(temp_wal_directory); + auto maybe_wal_path = decoder->ReadFile(temp_wal_directory); + MG_ASSERT(maybe_wal_path, "Failed to load WAL!"); + spdlog::trace("Received WAL saved to {}", *maybe_wal_path); + try { + auto wal_info = durability::ReadWalInfo(*maybe_wal_path); + if (wal_info.seq_num == 0) { + storage_->uuid_ = wal_info.uuid; + } + + if (wal_info.epoch_id != storage_->epoch_id_) { + storage_->epoch_history_.emplace_back(wal_info.epoch_id, storage_->last_commit_timestamp_); + storage_->epoch_id_ = std::move(wal_info.epoch_id); + } + + if (storage_->wal_file_) { + if (storage_->wal_file_->SequenceNumber() != wal_info.seq_num) { + storage_->wal_file_->FinalizeWal(); + storage_->wal_seq_num_ = wal_info.seq_num; + storage_->wal_file_.reset(); + } + } else { + storage_->wal_seq_num_ = wal_info.seq_num; + } + + durability::Decoder wal; + const auto version = wal.Initialize(*maybe_wal_path, durability::kWalMagic); + if (!version) throw durability::RecoveryFailure("Couldn't read WAL magic and/or version!"); + if (!durability::IsVersionSupported(*version)) throw durability::RecoveryFailure("Invalid WAL version!"); + wal.SetPosition(wal_info.offset_deltas); + + for (size_t i = 0; i < wal_info.num_deltas;) { + i += ReadAndApplyDelta(&wal); + } + + spdlog::debug("{} loaded successfully", *maybe_wal_path); + } catch (const durability::RecoveryFailure &e) { + LOG_FATAL("Couldn't recover WAL deltas from {} because of: {}", *maybe_wal_path, e.what()); + } +} + +Storage::ReplicationServer::~ReplicationServer() { + if (rpc_server_) { + rpc_server_->Shutdown(); + rpc_server_->AwaitShutdown(); + } +} +uint64_t Storage::ReplicationServer::ReadAndApplyDelta(durability::BaseDecoder *decoder) { auto edge_acc = storage_->edges_.access(); auto vertex_acc = storage_->vertices_.access(); @@ -121,11 +282,22 @@ void Storage::ReplicationServer::AppendDeltasHandler(slk::Reader *req_reader, sl return &commit_timestamp_and_accessor->second; }; - bool transaction_complete = false; - for (uint64_t i = 0; !transaction_complete; ++i) { - SPDLOG_INFO(" Delta {}", i); - const auto [timestamp, delta] = read_delta(); + uint64_t applied_deltas = 0; + auto max_commit_timestamp = storage_->last_commit_timestamp_.load(); + for (bool transaction_complete = false; !transaction_complete; ++applied_deltas) { + const auto [timestamp, delta] = ReadDelta(decoder); + if (timestamp > max_commit_timestamp) { + max_commit_timestamp = timestamp; + } + + transaction_complete = durability::IsWalDeltaDataTypeTransactionEnd(delta.type); + + if (timestamp < storage_->timestamp_) { + continue; + } + + SPDLOG_INFO(" Delta {}", applied_deltas); switch (delta.type) { case durability::WalDeltaData::Type::VERTEX_CREATE: { spdlog::trace(" Create vertex {}", delta.vertex_create_delete.gid.AsUint()); @@ -368,189 +540,12 @@ void Storage::ReplicationServer::AppendDeltasHandler(slk::Reader *req_reader, sl break; } } - transaction_complete = durability::IsWalDeltaDataTypeTransactionEnd(delta.type); } if (commit_timestamp_and_accessor) throw utils::BasicException("Invalid data!"); - AppendDeltasRes res{true, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); -} + storage_->last_commit_timestamp_ = max_commit_timestamp; -void Storage::ReplicationServer::SnapshotHandler(slk::Reader *req_reader, slk::Builder *res_builder) { - SnapshotReq req; - slk::Load(&req, req_reader); - - replication::Decoder decoder(req_reader); - - utils::EnsureDirOrDie(storage_->snapshot_directory_); - - const auto maybe_snapshot_path = decoder.ReadFile(storage_->snapshot_directory_); - MG_ASSERT(maybe_snapshot_path, "Failed to load snapshot!"); - spdlog::info("Received snapshot saved to {}", *maybe_snapshot_path); - - { - std::unique_lock storage_guard(storage_->main_lock_); - // Clear the database - storage_->vertices_.clear(); - storage_->edges_.clear(); - - storage_->constraints_ = Constraints(); - storage_->indices_.label_index = LabelIndex(&storage_->indices_, &storage_->constraints_, storage_->config_.items); - storage_->indices_.label_property_index = - LabelPropertyIndex(&storage_->indices_, &storage_->constraints_, storage_->config_.items); - try { - spdlog::debug("Loading snapshot"); - auto recovered_snapshot = durability::LoadSnapshot(*maybe_snapshot_path, &storage_->vertices_, &storage_->edges_, - &storage_->epoch_history_, &storage_->name_id_mapper_, - &storage_->edge_count_, storage_->config_.items); - spdlog::debug("Snapshot loaded successfully"); - // If this step is present it should always be the first step of - // the recovery so we use the UUID we read from snasphost - storage_->uuid_ = std::move(recovered_snapshot.snapshot_info.uuid); - storage_->epoch_id_ = std::move(recovered_snapshot.snapshot_info.epoch_id); - const auto &recovery_info = recovered_snapshot.recovery_info; - storage_->vertex_id_ = recovery_info.next_vertex_id; - storage_->edge_id_ = recovery_info.next_edge_id; - storage_->timestamp_ = std::max(storage_->timestamp_, recovery_info.next_timestamp); - storage_->commit_log_.emplace(storage_->timestamp_); - - durability::RecoverIndicesAndConstraints(recovered_snapshot.indices_constraints, &storage_->indices_, - &storage_->constraints_, &storage_->vertices_); - } catch (const durability::RecoveryFailure &e) { - LOG_FATAL("Couldn't load the snapshot because of: {}", e.what()); - } - } - - SnapshotRes res{true, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); - - // Delete other durability files - auto snapshot_files = durability::GetSnapshotFiles(storage_->snapshot_directory_, storage_->uuid_); - for (const auto &[path, uuid, _] : snapshot_files) { - if (path != *maybe_snapshot_path) { - storage_->file_retainer_.DeleteFile(path); - } - } - - auto wal_files = durability::GetWalFiles(storage_->wal_directory_, storage_->uuid_); - if (wal_files) { - for (const auto &wal_file : *wal_files) { - storage_->file_retainer_.DeleteFile(wal_file.path); - } - - storage_->wal_file_.reset(); - } -} - -void Storage::ReplicationServer::WalFilesHandler(slk::Reader *req_reader, slk::Builder *res_builder) { - WalFilesReq req; - slk::Load(&req, req_reader); - - const auto wal_file_number = req.file_number; - spdlog::debug("Received WAL files: {}", wal_file_number); - - replication::Decoder decoder(req_reader); - - utils::EnsureDirOrDie(storage_->wal_directory_); - - { - std::unique_lock storage_guard(storage_->main_lock_); - durability::RecoveredIndicesAndConstraints indices_constraints; - auto [wal_info, path] = LoadWal(&decoder, &indices_constraints); - if (wal_info.seq_num == 0) { - storage_->uuid_ = wal_info.uuid; - } - - // Check the seq number of the first wal file to see if it's the - // finalized form of the current wal on replica - if (storage_->wal_file_) { - if (storage_->wal_file_->SequenceNumber() == wal_info.seq_num && storage_->wal_file_->Path() != path) { - storage_->wal_file_->DeleteWal(); - } - storage_->wal_file_.reset(); - } - - for (auto i = 1; i < wal_file_number; ++i) { - LoadWal(&decoder, &indices_constraints); - } - - durability::RecoverIndicesAndConstraints(indices_constraints, &storage_->indices_, &storage_->constraints_, - &storage_->vertices_); - } - - WalFilesRes res{true, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); -} - -void Storage::ReplicationServer::CurrentWalHandler(slk::Reader *req_reader, slk::Builder *res_builder) { - CurrentWalReq req; - slk::Load(&req, req_reader); - - replication::Decoder decoder(req_reader); - - utils::EnsureDirOrDie(storage_->wal_directory_); - - { - std::unique_lock storage_guard(storage_->main_lock_); - durability::RecoveredIndicesAndConstraints indices_constraints; - auto [wal_info, path] = LoadWal(&decoder, &indices_constraints); - if (wal_info.seq_num == 0) { - storage_->uuid_ = wal_info.uuid; - } - - if (storage_->wal_file_ && storage_->wal_file_->SequenceNumber() == wal_info.seq_num && - storage_->wal_file_->Path() != path) { - // Delete the old wal file - storage_->file_retainer_.DeleteFile(storage_->wal_file_->Path()); - } - MG_ASSERT(storage_->config_.durability.snapshot_wal_mode == - Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL); - storage_->wal_file_.emplace(std::move(path), storage_->config_.items, &storage_->name_id_mapper_, wal_info.seq_num, - wal_info.from_timestamp, wal_info.to_timestamp, wal_info.num_deltas, - &storage_->file_retainer_); - durability::RecoverIndicesAndConstraints(indices_constraints, &storage_->indices_, &storage_->constraints_, - &storage_->vertices_); - } - - CurrentWalRes res{true, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); -} - -std::pair Storage::ReplicationServer::LoadWal( - replication::Decoder *decoder, durability::RecoveredIndicesAndConstraints *indices_constraints) { - auto maybe_wal_path = decoder->ReadFile(storage_->wal_directory_, "_MAIN"); - MG_ASSERT(maybe_wal_path, "Failed to load WAL!"); - spdlog::trace("Received WAL saved to {}", *maybe_wal_path); - try { - auto wal_info = durability::ReadWalInfo(*maybe_wal_path); - if (wal_info.epoch_id != storage_->epoch_id_) { - storage_->epoch_history_.emplace_back(wal_info.epoch_id, storage_->last_commit_timestamp_); - storage_->epoch_id_ = std::move(wal_info.epoch_id); - } - const auto last_loaded_timestamp = - storage_->timestamp_ == kTimestampInitialId ? std::nullopt : std::optional{storage_->timestamp_ - 1}; - auto info = durability::LoadWal(*maybe_wal_path, indices_constraints, last_loaded_timestamp, &storage_->vertices_, - &storage_->edges_, &storage_->name_id_mapper_, &storage_->edge_count_, - storage_->config_.items); - storage_->vertex_id_ = std::max(storage_->vertex_id_.load(), info.next_vertex_id); - storage_->edge_id_ = std::max(storage_->edge_id_.load(), info.next_edge_id); - storage_->timestamp_ = std::max(storage_->timestamp_, info.next_timestamp); - storage_->commit_log_.emplace(storage_->timestamp_); - if (info.last_commit_timestamp) { - storage_->last_commit_timestamp_ = *info.last_commit_timestamp; - } - spdlog::debug("{} loaded successfully", *maybe_wal_path); - return {std::move(wal_info), std::move(*maybe_wal_path)}; - } catch (const durability::RecoveryFailure &e) { - LOG_FATAL("Couldn't recover WAL deltas from {} because of: {}", *maybe_wal_path, e.what()); - } -} - -Storage::ReplicationServer::~ReplicationServer() { - if (rpc_server_) { - rpc_server_->Shutdown(); - rpc_server_->AwaitShutdown(); - } + return applied_deltas; } } // namespace storage diff --git a/src/storage/v2/replication/replication_server.hpp b/src/storage/v2/replication/replication_server.hpp index bff872f80..2d0846ab6 100644 --- a/src/storage/v2/replication/replication_server.hpp +++ b/src/storage/v2/replication/replication_server.hpp @@ -23,8 +23,8 @@ class Storage::ReplicationServer { void WalFilesHandler(slk::Reader *req_reader, slk::Builder *res_builder); void CurrentWalHandler(slk::Reader *req_reader, slk::Builder *res_builder); - std::pair LoadWal( - replication::Decoder *decoder, durability::RecoveredIndicesAndConstraints *indices_constraints); + void LoadWal(replication::Decoder *decoder); + uint64_t ReadAndApplyDelta(durability::BaseDecoder *decoder); std::optional rpc_server_context_; std::optional rpc_server_; diff --git a/tests/jepsen/src/jepsen/memgraph/bank.clj b/tests/jepsen/src/jepsen/memgraph/bank.clj index 3bfb79409..4b5955903 100644 --- a/tests/jepsen/src/jepsen/memgraph/bank.clj +++ b/tests/jepsen/src/jepsen/memgraph/bank.clj @@ -118,8 +118,10 @@ (filter #(= :ok (:type %))) (filter #(= :read (:f %)))) bad-reads (->> ok-reads + (map #(->> % :value :accounts)) + (filter #(= (count %) 5)) (map (fn [op] - (let [balances (->> op :value :accounts (map :balance)) + (let [balances (map :balance op) expected-total (* account-num starting-balance)] (cond (and (not-empty balances) diff --git a/tests/jepsen/src/jepsen/memgraph/sequential.clj b/tests/jepsen/src/jepsen/memgraph/sequential.clj index b8f772cf2..d50a241cb 100644 --- a/tests/jepsen/src/jepsen/memgraph/sequential.clj +++ b/tests/jepsen/src/jepsen/memgraph/sequential.clj @@ -9,19 +9,7 @@ [jepsen.memgraph.client :as c])) (dbclient/defquery get-all-nodes - "MATCH (n:Node) RETURN n;") - -(dbclient/defquery get-max-id - "MATCH (n:Node) - RETURN n.id AS id - ORDER BY id DESC - LIMIT 1;") - -(dbclient/defquery get-min-id - "MATCH (n:Node) - RETURN n.id AS id - ORDER BY id - LIMIT 1;") + "MATCH (n:Node) RETURN n ORDER BY n.id;") (dbclient/defquery create-node "CREATE (n:Node {id: $id});") @@ -29,19 +17,23 @@ (dbclient/defquery delete-node-with-id "MATCH (n:Node {id: $id}) DELETE n;") +(def next-node-for-add (atom 0)) + (defn add-next-node "Add a new node with its id set to the next highest" [conn] - (dbclient/with-transaction conn tx - (let [max-id (-> (get-max-id tx) first :id)] - (create-node tx {:id (inc max-id)})))) + (when (dbclient/with-transaction conn tx + (create-node tx {:id (swap! next-node-for-add identity)})) + (swap! next-node-for-add inc))) + +(def next-node-for-delete (atom 0)) (defn delete-oldest-node "Delete a node with the lowest id" [conn] - (dbclient/with-transaction conn tx - (let [min-id (-> (get-min-id tx) first :id)] - (delete-node-with-id tx {:id min-id})))) + (when (dbclient/with-transaction conn tx + (delete-node-with-id tx {:id (swap! next-node-for-delete identity)})) + (swap! next-node-for-delete inc))) (c/replication-client Client [] (open! [this test node] @@ -123,11 +115,12 @@ (when (not-empty ids) (cond ((complement strictly-increasing) ids) {:type :not-increasing-ids - :op op} - - ((complement increased-by-1) ids) - {:type :ids-missing :op op}))))) + + ;; if there are multiple threads not sure how to guarante that the ids are created in order + ;;((complement increased-by-1) ids) + ;;{:type :ids-missing + ;; :op op}))))) (filter identity) (into [])) empty-nodes (let [all-nodes (->> ok-reads From 15911b64dc1a55ba5c518bd068d592161a8a3af8 Mon Sep 17 00:00:00 2001 From: antonio2368 Date: Wed, 16 Jun 2021 16:03:58 +0200 Subject: [PATCH 63/63] Use timers for query timeout thread (#163) Co-authored-by: Benjamin Antal --- src/query/context.hpp | 19 +++- src/query/interpreter.cpp | 76 ++++++------- src/query/plan/profile.cpp | 13 +-- src/query/plan/profile.hpp | 10 +- src/query/trigger.cpp | 5 +- src/query/trigger.hpp | 4 +- src/utils/CMakeLists.txt | 3 +- src/utils/async_timer.cpp | 187 +++++++++++++++++++++++++++++++ src/utils/async_timer.hpp | 37 ++++++ src/utils/skip_list.hpp | 4 +- tests/unit/CMakeLists.txt | 3 + tests/unit/query_profile.cpp | 8 +- tests/unit/utils_async_timer.cpp | 138 +++++++++++++++++++++++ 13 files changed, 441 insertions(+), 66 deletions(-) create mode 100644 src/utils/async_timer.cpp create mode 100644 src/utils/async_timer.hpp create mode 100644 tests/unit/utils_async_timer.cpp diff --git a/src/query/context.hpp b/src/query/context.hpp index 2bced7a1a..afdd83f5d 100644 --- a/src/query/context.hpp +++ b/src/query/context.hpp @@ -1,11 +1,13 @@ #pragma once +#include + #include "query/common.hpp" #include "query/frontend/semantic/symbol_table.hpp" #include "query/parameters.hpp" #include "query/plan/profile.hpp" #include "query/trigger.hpp" -#include "utils/tsc.hpp" +#include "utils/async_timer.hpp" namespace query { @@ -50,20 +52,25 @@ struct ExecutionContext { DbAccessor *db_accessor{nullptr}; SymbolTable symbol_table; EvaluationContext evaluation_context; - utils::TSCTimer execution_tsc_timer; - double max_execution_time_sec{0.0}; std::atomic *is_shutting_down{nullptr}; bool is_profile_query{false}; std::chrono::duration profile_execution_time; plan::ProfilingStats stats; plan::ProfilingStats *stats_root{nullptr}; TriggerContextCollector *trigger_context_collector{nullptr}; + utils::AsyncTimer timer; }; +static_assert(std::is_move_assignable_v, "ExecutionContext must be move assignable!"); +static_assert(std::is_move_constructible_v, "ExecutionContext must be move constructible!"); + inline bool MustAbort(const ExecutionContext &context) { - return (context.is_shutting_down && context.is_shutting_down->load(std::memory_order_acquire)) || - (context.max_execution_time_sec > 0 && - context.execution_tsc_timer.Elapsed() >= context.max_execution_time_sec); + return (context.is_shutting_down != nullptr && context.is_shutting_down->load(std::memory_order_acquire)) || + context.timer.IsExpired(); +} + +inline plan::ProfilingStatsWithTotalTime GetStatsWithTotalTime(const ExecutionContext &context) { + return plan::ProfilingStatsWithTotalTime{context.stats, context.profile_execution_time}; } } // namespace query diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 69bc4e3d6..de3a341b5 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -473,9 +473,9 @@ struct PullPlan { DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, TriggerContextCollector *trigger_context_collector = nullptr, std::optional memory_limit = {}); - std::optional Pull(AnyStream *stream, std::optional n, - const std::vector &output_symbols, - std::map *summary); + std::optional Pull(AnyStream *stream, std::optional n, + const std::vector &output_symbols, + std::map *summary); private: std::shared_ptr plan_ = nullptr; @@ -513,16 +513,17 @@ PullPlan::PullPlan(const std::shared_ptr plan, const Parameters &par ctx_.evaluation_context.parameters = parameters; ctx_.evaluation_context.properties = NamesToProperties(plan->ast_storage().properties_, dba); ctx_.evaluation_context.labels = NamesToLabels(plan->ast_storage().labels_, dba); - ctx_.execution_tsc_timer = utils::TSCTimer(interpreter_context->tsc_frequency); - ctx_.max_execution_time_sec = interpreter_context->execution_timeout_sec; + if (interpreter_context->execution_timeout_sec > 0) { + ctx_.timer = utils::AsyncTimer{interpreter_context->execution_timeout_sec}; + } ctx_.is_shutting_down = &interpreter_context->is_shutting_down; ctx_.is_profile_query = is_profile_query; ctx_.trigger_context_collector = trigger_context_collector; } -std::optional PullPlan::Pull(AnyStream *stream, std::optional n, - const std::vector &output_symbols, - std::map *summary) { +std::optional PullPlan::Pull(AnyStream *stream, std::optional n, + const std::vector &output_symbols, + std::map *summary) { // Set up temporary memory for a single Pull. Initial memory comes from the // stack. 256 KiB should fit on the stack and should be more than enough for a // single `Pull`. @@ -595,7 +596,7 @@ std::optional PullPlan::Pull(AnyStream *stream, std::optional< summary->insert_or_assign("plan_execution_time", execution_time_.count()); cursor_->Shutdown(); ctx_.profile_execution_time = execution_time_; - return std::move(ctx_); + return GetStatsWithTotalTime(ctx_); } using RWType = plan::ReadWriteTypeChecker::RWType; @@ -828,32 +829,33 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra auto rw_type_checker = plan::ReadWriteTypeChecker(); rw_type_checker.InferRWType(const_cast(cypher_query_plan->plan())); - return PreparedQuery{ - {"OPERATOR", "ACTUAL HITS", "RELATIVE TIME", "ABSOLUTE TIME"}, - std::move(parsed_query.required_privileges), - [plan = std::move(cypher_query_plan), parameters = std::move(parsed_inner_query.parameters), summary, dba, - interpreter_context, execution_memory, memory_limit, - // We want to execute the query we are profiling lazily, so we delay - // the construction of the corresponding context. - ctx = std::optional{}, pull_plan = std::shared_ptr(nullptr)]( - AnyStream *stream, std::optional n) mutable -> std::optional { - // No output symbols are given so that nothing is streamed. - if (!ctx) { - ctx = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory, nullptr, memory_limit) - .Pull(stream, {}, {}, summary); - pull_plan = std::make_shared(ProfilingStatsToTable(ctx->stats, ctx->profile_execution_time)); - } + return PreparedQuery{{"OPERATOR", "ACTUAL HITS", "RELATIVE TIME", "ABSOLUTE TIME"}, + std::move(parsed_query.required_privileges), + [plan = std::move(cypher_query_plan), parameters = std::move(parsed_inner_query.parameters), + summary, dba, interpreter_context, execution_memory, memory_limit, + // We want to execute the query we are profiling lazily, so we delay + // the construction of the corresponding context. + stats_and_total_time = std::optional{}, + pull_plan = std::shared_ptr(nullptr)]( + AnyStream *stream, std::optional n) mutable -> std::optional { + // No output symbols are given so that nothing is streamed. + if (!stats_and_total_time) { + stats_and_total_time = PullPlan(plan, parameters, true, dba, interpreter_context, + execution_memory, nullptr, memory_limit) + .Pull(stream, {}, {}, summary); + pull_plan = std::make_shared(ProfilingStatsToTable(*stats_and_total_time)); + } - MG_ASSERT(ctx, "Failed to execute the query!"); + MG_ASSERT(stats_and_total_time, "Failed to execute the query!"); - if (pull_plan->Pull(stream, n)) { - summary->insert_or_assign("profile", ProfilingStatsToJson(ctx->stats, ctx->profile_execution_time).dump()); - return QueryHandlerResult::ABORT; - } + if (pull_plan->Pull(stream, n)) { + summary->insert_or_assign("profile", ProfilingStatsToJson(*stats_and_total_time).dump()); + return QueryHandlerResult::ABORT; + } - return std::nullopt; - }, - rw_type_checker.type}; + return std::nullopt; + }, + rw_type_checker.type}; } PreparedQuery PrepareDumpQuery(ParsedQuery parsed_query, std::map *summary, DbAccessor *dba, @@ -1602,9 +1604,8 @@ void RunTriggersIndividually(const utils::SkipList &triggers, Interpret trigger_context.AdaptForAccessor(&db_accessor); try { - trigger.Execute(&db_accessor, &execution_memory, *interpreter_context->tsc_frequency, - interpreter_context->execution_timeout_sec, &interpreter_context->is_shutting_down, - trigger_context); + trigger.Execute(&db_accessor, &execution_memory, interpreter_context->execution_timeout_sec, + &interpreter_context->is_shutting_down, trigger_context); } catch (const utils::BasicException &exception) { spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.Name(), exception.what()); db_accessor.Abort(); @@ -1658,9 +1659,8 @@ void Interpreter::Commit() { utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; AdvanceCommand(); try { - trigger.Execute(&*execution_db_accessor_, &execution_memory, *interpreter_context_->tsc_frequency, - interpreter_context_->execution_timeout_sec, &interpreter_context_->is_shutting_down, - *trigger_context); + trigger.Execute(&*execution_db_accessor_, &execution_memory, interpreter_context_->execution_timeout_sec, + &interpreter_context_->is_shutting_down, *trigger_context); } catch (const utils::BasicException &e) { throw utils::BasicException( fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.Name(), e.what())); diff --git a/src/query/plan/profile.cpp b/src/query/plan/profile.cpp index f8afeb03c..bafba17d1 100644 --- a/src/query/plan/profile.cpp +++ b/src/query/plan/profile.cpp @@ -98,10 +98,9 @@ class ProfilingStatsToTableHelper { } // namespace -std::vector> ProfilingStatsToTable(const ProfilingStats &cumulative_stats, - std::chrono::duration total_time) { - ProfilingStatsToTableHelper helper{cumulative_stats.num_cycles, total_time}; - helper.Output(cumulative_stats); +std::vector> ProfilingStatsToTable(const ProfilingStatsWithTotalTime &stats) { + ProfilingStatsToTableHelper helper{stats.cumulative_stats.num_cycles, stats.total_time}; + helper.Output(stats.cumulative_stats); return helper.rows(); } @@ -147,9 +146,9 @@ class ProfilingStatsToJsonHelper { } // namespace -nlohmann::json ProfilingStatsToJson(const ProfilingStats &cumulative_stats, std::chrono::duration total_time) { - ProfilingStatsToJsonHelper helper{cumulative_stats.num_cycles, total_time}; - helper.Output(cumulative_stats); +nlohmann::json ProfilingStatsToJson(const ProfilingStatsWithTotalTime &stats) { + ProfilingStatsToJsonHelper helper{stats.cumulative_stats.num_cycles, stats.total_time}; + helper.Output(stats.cumulative_stats); return helper.ToJson(); } diff --git a/src/query/plan/profile.hpp b/src/query/plan/profile.hpp index bea2536a5..ed88f29e9 100644 --- a/src/query/plan/profile.hpp +++ b/src/query/plan/profile.hpp @@ -23,10 +23,14 @@ struct ProfilingStats { std::vector children; }; -std::vector> ProfilingStatsToTable(const ProfilingStats &cumulative_stats, - std::chrono::duration); +struct ProfilingStatsWithTotalTime { + ProfilingStats cumulative_stats{}; + std::chrono::duration total_time{}; +}; -nlohmann::json ProfilingStatsToJson(const ProfilingStats &cumulative_stats, std::chrono::duration); +std::vector> ProfilingStatsToTable(const ProfilingStatsWithTotalTime &stats); + +nlohmann::json ProfilingStatsToJson(const ProfilingStatsWithTotalTime &stats); } // namespace plan } // namespace query diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 401e43d50..89fb90681 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -172,7 +172,7 @@ std::shared_ptr Trigger::GetPlan(DbAccessor *db_accessor) return trigger_plan_; } -void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, const double tsc_frequency, +void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, const double max_execution_time_sec, std::atomic *is_shutting_down, const TriggerContext &context) const { if (!context.ShouldEventTrigger(event_type_)) { @@ -193,8 +193,7 @@ void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution ctx.evaluation_context.parameters = parsed_statements_.parameters; ctx.evaluation_context.properties = NamesToProperties(plan.ast_storage().properties_, dba); ctx.evaluation_context.labels = NamesToLabels(plan.ast_storage().labels_, dba); - ctx.execution_tsc_timer = utils::TSCTimer(tsc_frequency); - ctx.max_execution_time_sec = max_execution_time_sec; + ctx.timer = utils::AsyncTimer(max_execution_time_sec); ctx.is_shutting_down = is_shutting_down; ctx.is_profile_query = false; diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index 1eb22bd08..38e9005ef 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -23,8 +23,8 @@ struct Trigger { const std::map &user_parameters, TriggerEventType event_type, utils::SkipList *query_cache, DbAccessor *db_accessor, utils::SpinLock *antlr_lock); - void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double tsc_frequency, - double max_execution_time_sec, std::atomic *is_shutting_down, const TriggerContext &context) const; + void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double max_execution_time_sec, + std::atomic *is_shutting_down, const TriggerContext &context) const; bool operator==(const Trigger &other) const { return name_ == other.name_; } // NOLINTNEXTLINE (modernize-use-nullptr) diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index b63d8e530..bd806e870 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -1,4 +1,5 @@ set(utils_src_files + async_timer.cpp event_counter.cpp csv_parsing.cpp file.cpp @@ -13,7 +14,7 @@ set(utils_src_files uuid.cpp) add_library(mg-utils STATIC ${utils_src_files}) -target_link_libraries(mg-utils stdc++fs Threads::Threads spdlog fmt gflags uuid) +target_link_libraries(mg-utils stdc++fs Threads::Threads spdlog fmt gflags uuid rt) add_library(mg-new-delete STATIC new_delete.cpp) target_link_libraries(mg-new-delete jemalloc fmt) diff --git a/src/utils/async_timer.cpp b/src/utils/async_timer.cpp new file mode 100644 index 000000000..afe90ce7f --- /dev/null +++ b/src/utils/async_timer.cpp @@ -0,0 +1,187 @@ +#include "utils/async_timer.hpp" + +#include + +#include +#include +#include +#include +#include + +#include "utils/skip_list.hpp" +#include "utils/spin_lock.hpp" +#include "utils/synchronized.hpp" + +namespace { + +constexpr uint64_t kInvalidFlagId = 0U; +// std::numeric_limits::max() cannot be represented precisely as a double, so the next smallest value is the +// maximum number of seconds the timer can be used with +const double max_seconds_as_double = std::nexttoward(std::numeric_limits::max(), 0.0); + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +std::atomic expiration_flag_counter{kInvalidFlagId + 1U}; + +struct ExpirationFlagInfo { + uint64_t id{0U}; + std::weak_ptr> flag{}; +}; + +bool operator==(const ExpirationFlagInfo &lhs, const ExpirationFlagInfo &rhs) { return lhs.id == rhs.id; } +bool operator<(const ExpirationFlagInfo &lhs, const ExpirationFlagInfo &rhs) { return lhs.id < rhs.id; } +bool operator==(const ExpirationFlagInfo &flag_info, const uint64_t id) { return flag_info.id == id; } +bool operator<(const ExpirationFlagInfo &flag_info, const uint64_t id) { return flag_info.id < id; } + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +utils::SkipList expiration_flags{}; + +uint64_t AddFlag(std::weak_ptr> flag) { + const auto id = expiration_flag_counter.fetch_add(1, std::memory_order_relaxed); + expiration_flags.access().insert({id, std::move(flag)}); + return id; +} + +void EraseFlag(uint64_t flag_id) { expiration_flags.access().remove(flag_id); } + +std::weak_ptr> GetFlag(uint64_t flag_id) { + const auto flag_accessor = expiration_flags.access(); + const auto it = flag_accessor.find(flag_id); + if (it == flag_accessor.end()) { + return {}; + } + + return it->flag; +} + +void MarkDone(const uint64_t flag_id) { + const auto weak_flag = GetFlag(flag_id); + if (weak_flag.expired()) { + return; + } + auto flag = weak_flag.lock(); + if (flag != nullptr) { + flag->store(true, std::memory_order_relaxed); + } +} +} // namespace + +namespace utils { + +namespace { +struct ThreadInfo { + pid_t thread_id; + std::atomic setup_done{false}; +}; + +void *TimerBackgroundWorker(void *args) { + auto *thread_info = static_cast(args); + thread_info->thread_id = syscall(SYS_gettid); + thread_info->setup_done.store(true, std::memory_order_release); + + sigset_t ss; + sigemptyset(&ss); + sigaddset(&ss, SIGTIMER); + sigprocmask(SIG_BLOCK, &ss, nullptr); + + while (true) { + siginfo_t si; + int result = sigwaitinfo(&ss, &si); + + if (result <= 0) { + continue; + } + + if (si.si_code == SI_TIMER) { + auto flag_id = kInvalidFlagId; + std::memcpy(&flag_id, &si.si_value.sival_ptr, sizeof(flag_id)); + MarkDone(flag_id); + } else if (si.si_code == SI_TKILL) { + pthread_exit(nullptr); + } + } +} +} // namespace + +AsyncTimer::AsyncTimer() : flag_id_{kInvalidFlagId} {}; + +AsyncTimer::AsyncTimer(double seconds) + : expiration_flag_{std::make_shared>(false)}, flag_id_{kInvalidFlagId}, timer_id_{} { + MG_ASSERT(seconds <= max_seconds_as_double, + "The AsyncTimer cannot handle larger time values than {:f}, the specified value: {:f}", + max_seconds_as_double, seconds); + MG_ASSERT(seconds >= 0.0, "The AsyncTimer cannot handle negative time values: {:f}", seconds); + + static pthread_t background_timer_thread; + static ThreadInfo thread_info; + static std::once_flag timer_thread_setup_flag; + + std::call_once(timer_thread_setup_flag, [] { + pthread_create(&background_timer_thread, nullptr, TimerBackgroundWorker, &thread_info); + while (!thread_info.setup_done.load(std::memory_order_acquire)) + ; + }); + + flag_id_ = AddFlag(std::weak_ptr>{expiration_flag_}); + + sigevent notification_settings{}; + notification_settings.sigev_notify = SIGEV_THREAD_ID; + notification_settings.sigev_signo = SIGTIMER; + notification_settings._sigev_un._tid = thread_info.thread_id; + static_assert(sizeof(void *) == sizeof(flag_id_), "ID size must be equal to pointer size!"); + std::memcpy(¬ification_settings.sigev_value.sival_ptr, &flag_id_, sizeof(flag_id_)); + MG_ASSERT(timer_create(CLOCK_MONOTONIC, ¬ification_settings, &timer_id_) == 0, "Couldn't create timer: ({}) {}", + errno, strerror(errno)); + + constexpr auto kSecondsToNanos = 1000 * 1000 * 1000; + // Casting will truncate down, but that's exactly what we want. + const auto second_as_time_t = static_cast(seconds); + const auto remaining_nano_seconds = static_cast((seconds - second_as_time_t) * kSecondsToNanos); + + struct itimerspec spec; + spec.it_interval.tv_sec = 0; + spec.it_interval.tv_nsec = 0; + spec.it_value.tv_sec = second_as_time_t; + spec.it_value.tv_nsec = remaining_nano_seconds; + + MG_ASSERT(timer_settime(timer_id_, 0, &spec, nullptr) == 0, "Couldn't set timer: ({}) {}", errno, strerror(errno)); +} + +AsyncTimer::~AsyncTimer() { ReleaseResources(); } + +AsyncTimer::AsyncTimer(AsyncTimer &&other) noexcept + : expiration_flag_{std::move(other.expiration_flag_)}, flag_id_{other.flag_id_}, timer_id_{other.timer_id_} { + other.flag_id_ = kInvalidFlagId; +} + +// NOLINTNEXTLINE (hicpp-noexcept-move) +AsyncTimer &AsyncTimer::operator=(AsyncTimer &&other) { + if (this == &other) { + return *this; + } + + ReleaseResources(); + + expiration_flag_ = std::move(other.expiration_flag_); + flag_id_ = std::exchange(other.flag_id_, kInvalidFlagId); + timer_id_ = other.timer_id_; + + return *this; +}; + +bool AsyncTimer::IsExpired() const { + if (expiration_flag_ != nullptr) { + return expiration_flag_->load(std::memory_order_relaxed); + } + return false; +} + +void AsyncTimer::ReleaseResources() { + if (expiration_flag_ != nullptr) { + timer_delete(timer_id_); + EraseFlag(flag_id_); + flag_id_ = kInvalidFlagId; + expiration_flag_ = std::shared_ptr>{}; + } +} + +} // namespace utils diff --git a/src/utils/async_timer.hpp b/src/utils/async_timer.hpp new file mode 100644 index 000000000..4ac2ffc87 --- /dev/null +++ b/src/utils/async_timer.hpp @@ -0,0 +1,37 @@ +#pragma once +#include + +#include + +#include "utils/logging.hpp" + +namespace utils { + +#define SIGTIMER (SIGRTMAX - 2) + +class AsyncTimer { + public: + AsyncTimer(); + explicit AsyncTimer(double seconds); + ~AsyncTimer(); + AsyncTimer(AsyncTimer &&other) noexcept; + // NOLINTNEXTLINE (hicpp-noexcept-move) + AsyncTimer &operator=(AsyncTimer &&other); + + AsyncTimer(const AsyncTimer &) = delete; + AsyncTimer &operator=(const AsyncTimer &) = delete; + + // Returns false if the object isn't associated with any timer. + bool IsExpired() const; + + private: + void ReleaseResources(); + + // If the expiration_flag_ is nullptr, then the object is not associated with any timer, therefore no clean up + // is necessary. Furthermore, the the POSIX API doesn't specify any value as "invalid" for timer_t, so the timer_id_ + // cannot be used to determine whether the object is associated with any timer or not. + std::shared_ptr> expiration_flag_; + uint64_t flag_id_; + timer_t timer_id_; +}; +} // namespace utils diff --git a/src/utils/skip_list.hpp b/src/utils/skip_list.hpp index fd2295fea..b5ee81544 100644 --- a/src/utils/skip_list.hpp +++ b/src/utils/skip_list.hpp @@ -666,7 +666,7 @@ class SkipList final { /// @return Iterator to the item in the list, will be equal to `end()` when /// the key isn't found template - Iterator find(const TKey &key) { + Iterator find(const TKey &key) const { return skiplist_->template find(key); } @@ -676,7 +676,7 @@ class SkipList final { /// @return Iterator to the item in the list, will be equal to `end()` when /// no items match the search template - Iterator find_equal_or_greater(const TKey &key) { + Iterator find_equal_or_greater(const TKey &key) const { return skiplist_->template find_equal_or_greater(key); } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 5f61495a4..7a2c0e5e7 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -238,6 +238,9 @@ target_link_libraries(${test_prefix}utils_thread_pool mg-utils fmt) add_unit_test(utils_csv_parsing.cpp ${CMAKE_SOURCE_DIR}/src/utils/csv_parsing.cpp) target_link_libraries(${test_prefix}utils_csv_parsing mg-utils fmt) +add_unit_test(utils_async_timer.cpp) +target_link_libraries(${test_prefix}utils_async_timer mg-utils) + # Test mg-storage-v2 add_unit_test(commit_log_v2.cpp) diff --git a/tests/unit/query_profile.cpp b/tests/unit/query_profile.cpp index 6f80f3812..09d657201 100644 --- a/tests/unit/query_profile.cpp +++ b/tests/unit/query_profile.cpp @@ -19,7 +19,7 @@ TEST(QueryProfileTest, SimpleQuery) { // | * Once | 2 | 25.000000 % | 0.250000 ms | // +---------------+---------------+---------------+---------------+ // clang-format: on - auto table = ProfilingStatsToTable(produce, total_time); + auto table = ProfilingStatsToTable(ProfilingStatsWithTotalTime{produce, total_time}); EXPECT_EQ(table[0][0].ValueString(), "* Produce"); EXPECT_EQ(table[0][1].ValueInt(), 2); @@ -48,7 +48,7 @@ TEST(QueryProfileTest, SimpleQuery) { // "relative_time": 0.75 // } // clang-format: on - auto json = ProfilingStatsToJson(produce, total_time); + auto json = ProfilingStatsToJson(ProfilingStatsWithTotalTime{produce, total_time}); /* * NOTE: When one of these comparions fails and Google Test tries to report @@ -94,7 +94,7 @@ TEST(QueryProfileTest, ComplicatedQuery) { // | * Once (1) | 2 | 5.000000 % | 0.050000 ms | // +----------------+----------------+----------------+----------------+ // clang-format: on - auto table = ProfilingStatsToTable(produce, total_time); + auto table = ProfilingStatsToTable({produce, total_time}); EXPECT_EQ(table[0][0].ValueString(), "* Produce"); EXPECT_EQ(table[0][1].ValueInt(), 2); @@ -209,7 +209,7 @@ TEST(QueryProfileTest, ComplicatedQuery) { // "relative_time": 0.1, // } // clang-format: on - auto json = ProfilingStatsToJson(produce, total_time); + auto json = ProfilingStatsToJson(ProfilingStatsWithTotalTime{produce, total_time}); EXPECT_EQ(json["actual_hits"], 2); EXPECT_EQ(json["relative_time"], 0.1); diff --git a/tests/unit/utils_async_timer.cpp b/tests/unit/utils_async_timer.cpp new file mode 100644 index 000000000..65fa69dee --- /dev/null +++ b/tests/unit/utils_async_timer.cpp @@ -0,0 +1,138 @@ +#include +#include +#include + +#include "gtest/gtest.h" + +#include "utils/async_timer.hpp" + +using AsyncTimer = utils::AsyncTimer; + +constexpr auto kSecondsInMilis = 1000.0; +constexpr auto kIntervalInSeconds = 0.3; +constexpr auto kIntervalInMilis = kIntervalInSeconds * kSecondsInMilis; +constexpr auto kAbsoluteErrorInMilis = 50; + +std::chrono::steady_clock::time_point Now() { return std::chrono::steady_clock::now(); } + +int ElapsedMilis(const std::chrono::steady_clock::time_point &start, const std::chrono::steady_clock::time_point &end) { + return std::chrono::duration_cast(end - start).count(); +} + +void CheckTimeSimple() { + const auto before = Now(); + AsyncTimer timer{kIntervalInSeconds}; + while (!timer.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 2 * kIntervalInMilis); + } + + const auto after = Now(); + + EXPECT_NEAR(ElapsedMilis(before, after), kIntervalInMilis, kAbsoluteErrorInMilis); +} + +TEST(AsyncTimer, SimpleWait) { CheckTimeSimple(); } + +TEST(AsyncTimer, DoubleWait) { + CheckTimeSimple(); + CheckTimeSimple(); +} + +TEST(AsyncTimer, MoveConstruct) { + const auto before = Now(); + AsyncTimer timer_1{kIntervalInSeconds}; + AsyncTimer timer_2{std::move(timer_1)}; + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + const auto first_check_point = Now(); + + while (!timer_2.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 2 * kIntervalInMilis); + } + const auto second_check_point = Now(); + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_TRUE(timer_2.IsExpired()); + + EXPECT_LT(ElapsedMilis(before, first_check_point), kIntervalInMilis / 2); + EXPECT_NEAR(ElapsedMilis(before, second_check_point), kIntervalInMilis, kAbsoluteErrorInMilis); +} + +TEST(AsyncTimer, MoveAssign) { + const auto before = Now(); + AsyncTimer timer_1{2 * kIntervalInSeconds}; + AsyncTimer timer_2{kIntervalInSeconds}; + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + const auto first_check_point = Now(); + + timer_2 = std::move(timer_1); + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + + while (!timer_2.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 3 * kIntervalInMilis); + } + const auto second_check_point = Now(); + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_TRUE(timer_2.IsExpired()); + + EXPECT_LT(ElapsedMilis(before, first_check_point), kIntervalInMilis / 2); + EXPECT_NEAR(ElapsedMilis(before, second_check_point), 2 * kIntervalInMilis, kAbsoluteErrorInMilis); +} + +TEST(AsyncTimer, AssignToExpiredTimer) { + const auto before = Now(); + AsyncTimer timer_1{2 * kIntervalInSeconds}; + AsyncTimer timer_2{kIntervalInSeconds}; + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + const auto first_check_point = Now(); + + while (!timer_2.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 3 * kIntervalInMilis); + } + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_TRUE(timer_2.IsExpired()); + const auto second_check_point = Now(); + + timer_2 = std::move(timer_1); + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + const auto third_check_point = Now(); + + while (!timer_2.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 3 * kIntervalInMilis); + } + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_TRUE(timer_2.IsExpired()); + const auto fourth_check_point = Now(); + + EXPECT_LT(ElapsedMilis(before, first_check_point), kIntervalInMilis / 2); + EXPECT_NEAR(ElapsedMilis(before, second_check_point), kIntervalInMilis, kAbsoluteErrorInMilis); + EXPECT_LT(ElapsedMilis(before, third_check_point), 1.5 * kIntervalInMilis); + EXPECT_NEAR(ElapsedMilis(before, fourth_check_point), 2 * kIntervalInMilis, kAbsoluteErrorInMilis); +} + +TEST(AsyncTimer, DestroyTimerWhileItIsStillRunning) { + { AsyncTimer timer_to_destroy{kIntervalInSeconds}; } + const auto before = Now(); + AsyncTimer timer_to_wait{1.5 * kIntervalInSeconds}; + while (!timer_to_wait.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 3 * kIntervalInMilis); + } + // At this point the timer_to_destroy has expired, nothing bad happened. This doesn't mean the timer cancellation + // works properly, it just means that nothing bad happens if a timer get cancelled. +} + +TEST(AsyncTimer, TimersWithExtremeValues) { + AsyncTimer timer_with_zero{0}; + const double expected_maximum_value = std::nexttoward(std::numeric_limits::max(), 0.0); + AsyncTimer timer_with_max_value{expected_maximum_value}; +}