From a282542666db13910b897ea2b6b1fdec39a09dc2 Mon Sep 17 00:00:00 2001 From: Gareth Andrew Lloyd Date: Tue, 12 Mar 2024 00:26:11 +0000 Subject: [PATCH 01/16] Optimise ORDER BY, RANGE, UNWIND (#1781) * Optimise frame change * Optimise distinct + orderby memory usage - dispose collections as earlier as possible - move values rather than copy * Better perf, ORDER BY * Optimise RANGE and UNWIND * ConstraintVerificationInfo only if at least one constraint * Optimise TypeValue * Clang-tidy fix --- src/query/common.cpp | 60 +------- src/query/common.hpp | 128 ++++++++++++++---- src/query/frame_change.hpp | 7 +- .../interpret/awesome_memgraph_functions.cpp | 6 + src/query/plan/operator.cpp | 73 ++++++---- src/query/plan/pretty_print.cpp | 2 +- src/query/typed_value.cpp | 40 +++--- src/query/typed_value.hpp | 71 +++++----- .../replication_handler.cpp | 2 +- src/storage/v2/constraints/constraints.hpp | 3 +- .../v2/constraints/existence_constraints.hpp | 4 +- .../v2/constraints/unique_constraints.hpp | 4 +- src/storage/v2/disk/storage.cpp | 3 +- src/storage/v2/disk/unique_constraints.cpp | 3 +- src/storage/v2/disk/unique_constraints.hpp | 3 +- src/storage/v2/inmemory/storage.cpp | 17 ++- .../v2/inmemory/unique_constraints.cpp | 1 + .../v2/inmemory/unique_constraints.hpp | 3 + src/storage/v2/transaction.hpp | 6 +- src/storage/v2/vertex_accessor.cpp | 36 +++-- tests/unit/storage_rocks.cpp | 2 - tests/unit/storage_v2_wal_file.cpp | 2 +- 22 files changed, 270 insertions(+), 206 deletions(-) diff --git a/src/query/common.cpp b/src/query/common.cpp index 3c75ed5ec..94a8d8cdf 100644 --- a/src/query/common.cpp +++ b/src/query/common.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -13,64 +13,6 @@ namespace memgraph::query { -namespace impl { - -bool TypedValueCompare(const TypedValue &a, const TypedValue &b) { - // in ordering null comes after everything else - // at the same time Null is not less that null - // first deal with Null < Whatever case - if (a.IsNull()) return false; - // now deal with NotNull < Null case - if (b.IsNull()) return true; - - // comparisons are from this point legal only between values of - // the same type, or int+float combinations - if ((a.type() != b.type() && !(a.IsNumeric() && b.IsNumeric()))) - throw QueryRuntimeException("Can't compare value of type {} to value of type {}.", a.type(), b.type()); - - switch (a.type()) { - case TypedValue::Type::Bool: - return !a.ValueBool() && b.ValueBool(); - case TypedValue::Type::Int: - if (b.type() == TypedValue::Type::Double) - return a.ValueInt() < b.ValueDouble(); - else - return a.ValueInt() < b.ValueInt(); - case TypedValue::Type::Double: - if (b.type() == TypedValue::Type::Int) - return a.ValueDouble() < b.ValueInt(); - else - return a.ValueDouble() < b.ValueDouble(); - case TypedValue::Type::String: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueString() < b.ValueString(); - case TypedValue::Type::Date: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueDate() < b.ValueDate(); - case TypedValue::Type::LocalTime: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueLocalTime() < b.ValueLocalTime(); - case TypedValue::Type::LocalDateTime: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueLocalDateTime() < b.ValueLocalDateTime(); - case TypedValue::Type::Duration: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueDuration() < b.ValueDuration(); - case TypedValue::Type::List: - case TypedValue::Type::Map: - case TypedValue::Type::Vertex: - case TypedValue::Type::Edge: - case TypedValue::Type::Path: - case TypedValue::Type::Graph: - case TypedValue::Type::Function: - throw QueryRuntimeException("Comparison is not defined for values of type {}.", a.type()); - case TypedValue::Type::Null: - LOG_FATAL("Invalid type"); - } -} - -} // namespace impl - int64_t QueryTimestamp() { return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) .count(); diff --git a/src/query/common.hpp b/src/query/common.hpp index 36ba07791..9f4e01cc0 100644 --- a/src/query/common.hpp +++ b/src/query/common.hpp @@ -23,6 +23,7 @@ #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol.hpp" #include "query/typed_value.hpp" +#include "range/v3/all.hpp" #include "storage/v2/id_types.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/result.hpp" @@ -31,9 +32,91 @@ namespace memgraph::query { -namespace impl { -bool TypedValueCompare(const TypedValue &a, const TypedValue &b); -} // namespace impl +namespace { +std::partial_ordering TypedValueCompare(TypedValue const &a, TypedValue const &b) { + // First assume typical same type comparisons + if (a.type() == b.type()) { + switch (a.type()) { + case TypedValue::Type::Bool: + return a.UnsafeValueBool() <=> b.UnsafeValueBool(); + case TypedValue::Type::Int: + return a.UnsafeValueInt() <=> b.UnsafeValueInt(); + case TypedValue::Type::Double: + return a.UnsafeValueDouble() <=> b.UnsafeValueDouble(); + case TypedValue::Type::String: + return a.UnsafeValueString() <=> b.UnsafeValueString(); + case TypedValue::Type::Date: + return a.UnsafeValueDate() <=> b.UnsafeValueDate(); + case TypedValue::Type::LocalTime: + return a.UnsafeValueLocalTime() <=> b.UnsafeValueLocalTime(); + case TypedValue::Type::LocalDateTime: + return a.UnsafeValueLocalDateTime() <=> b.UnsafeValueLocalDateTime(); + case TypedValue::Type::Duration: + return a.UnsafeValueDuration() <=> b.UnsafeValueDuration(); + case TypedValue::Type::Null: + return std::partial_ordering::equivalent; + case TypedValue::Type::List: + case TypedValue::Type::Map: + case TypedValue::Type::Vertex: + case TypedValue::Type::Edge: + case TypedValue::Type::Path: + case TypedValue::Type::Graph: + case TypedValue::Type::Function: + throw QueryRuntimeException("Comparison is not defined for values of type {}.", a.type()); + } + } else { + // from this point legal only between values of + // int+float combinations or against null + + // in ordering null comes after everything else + // at the same time Null is not less that null + // first deal with Null < Whatever case + if (a.IsNull()) return std::partial_ordering::greater; + // now deal with NotNull < Null case + if (b.IsNull()) return std::partial_ordering::less; + + if (!(a.IsNumeric() && b.IsNumeric())) [[unlikely]] + throw QueryRuntimeException("Can't compare value of type {} to value of type {}.", a.type(), b.type()); + + switch (a.type()) { + case TypedValue::Type::Int: + return a.UnsafeValueInt() <=> b.ValueDouble(); + case TypedValue::Type::Double: + return a.UnsafeValueDouble() <=> b.ValueInt(); + case TypedValue::Type::Bool: + case TypedValue::Type::Null: + case TypedValue::Type::String: + case TypedValue::Type::List: + case TypedValue::Type::Map: + case TypedValue::Type::Vertex: + case TypedValue::Type::Edge: + case TypedValue::Type::Path: + case TypedValue::Type::Date: + case TypedValue::Type::LocalTime: + case TypedValue::Type::LocalDateTime: + case TypedValue::Type::Duration: + case TypedValue::Type::Graph: + case TypedValue::Type::Function: + LOG_FATAL("Invalid type"); + } + } +} + +} // namespace + +struct OrderedTypedValueCompare { + OrderedTypedValueCompare(Ordering ordering) : ordering_{ordering}, ascending{ordering == Ordering::ASC} {} + + auto operator()(const TypedValue &lhs, const TypedValue &rhs) const -> std::partial_ordering { + return ascending ? TypedValueCompare(lhs, rhs) : TypedValueCompare(rhs, lhs); + } + + auto ordering() const { return ordering_; } + + private: + Ordering ordering_; + bool ascending = true; +}; /// Custom Comparator type for comparing vectors of TypedValues. /// @@ -43,32 +126,27 @@ bool TypedValueCompare(const TypedValue &a, const TypedValue &b); class TypedValueVectorCompare final { public: TypedValueVectorCompare() = default; - explicit TypedValueVectorCompare(const std::vector &ordering) : ordering_(ordering) {} + explicit TypedValueVectorCompare(std::vector orderings) + : orderings_{std::move(orderings)} {} - template - bool operator()(const std::vector &c1, const std::vector &c2) const { - // ordering is invalid if there are more elements in the collections - // then there are in the ordering_ vector - MG_ASSERT(c1.size() <= ordering_.size() && c2.size() <= ordering_.size(), - "Collections contain more elements then there are orderings"); + const auto &orderings() const { return orderings_; } - auto c1_it = c1.begin(); - auto c2_it = c2.begin(); - auto ordering_it = ordering_.begin(); - for (; c1_it != c1.end() && c2_it != c2.end(); c1_it++, c2_it++, ordering_it++) { - if (impl::TypedValueCompare(*c1_it, *c2_it)) return *ordering_it == Ordering::ASC; - if (impl::TypedValueCompare(*c2_it, *c1_it)) return *ordering_it == Ordering::DESC; - } - - // at least one collection is exhausted - // c1 is less then c2 iff c1 reached the end but c2 didn't - return (c1_it == c1.end()) && (c2_it != c2.end()); + auto lex_cmp() const { + return [orderings = &orderings_](const std::vector &lhs, + const std::vector &rhs) { + auto rng = ranges::views::zip(*orderings, lhs, rhs); + for (auto const &[cmp, l, r] : rng) { + auto res = cmp(l, r); + if (res == std::partial_ordering::less) return true; + if (res == std::partial_ordering::greater) return false; + } + DMG_ASSERT(orderings->size() == lhs.size() && lhs.size() == rhs.size()); + return false; + }; } - // TODO: Remove this, member is public - const auto &ordering() const { return ordering_; } - - std::vector ordering_; + private: + std::vector orderings_; }; /// Raise QueryRuntimeException if the value for symbol isn't of expected type. diff --git a/src/query/frame_change.hpp b/src/query/frame_change.hpp index 7baf1fe41..f51185722 100644 --- a/src/query/frame_change.hpp +++ b/src/query/frame_change.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -126,10 +126,11 @@ class FrameChangeCollector { } bool ResetTrackingValue(const std::string &key) { - if (!tracked_values_.contains(utils::pmr::string(key, utils::NewDeleteResource()))) { + auto const it = tracked_values_.find(utils::pmr::string(key, utils::NewDeleteResource())); + if (it == tracked_values_.cend()) { return false; } - tracked_values_.erase(utils::pmr::string(key, utils::NewDeleteResource())); + tracked_values_.erase(it); AddTrackingKey(key); return true; } diff --git a/src/query/interpret/awesome_memgraph_functions.cpp b/src/query/interpret/awesome_memgraph_functions.cpp index 6be8c4837..a9381f92a 100644 --- a/src/query/interpret/awesome_memgraph_functions.cpp +++ b/src/query/interpret/awesome_memgraph_functions.cpp @@ -761,13 +761,19 @@ TypedValue Range(const TypedValue *args, int64_t nargs, const FunctionContext &c int64_t step = nargs == 3 ? args[2].ValueInt() : 1; TypedValue::TVector list(ctx.memory); if (lbound <= rbound && step > 0) { + int64_t n = ((rbound - lbound + 1) + (step - 1)) / step; + list.reserve(n); for (auto i = lbound; i <= rbound; i += step) { list.emplace_back(i); } + MG_ASSERT(list.size() == n); } else if (lbound >= rbound && step < 0) { + int64_t n = ((lbound - rbound + 1) + (-step - 1)) / -step; + list.reserve(n); for (auto i = lbound; i >= rbound; i += step) { list.emplace_back(i); } + MG_ASSERT(list.size() == n); } return TypedValue(std::move(list)); } diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 7cd506050..bf2194641 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -47,6 +47,7 @@ #include "query/procedure/mg_procedure_impl.hpp" #include "query/procedure/module.hpp" #include "query/typed_value.hpp" +#include "range/v3/all.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/view.hpp" #include "utils/algorithm.hpp" @@ -4147,14 +4148,14 @@ OrderBy::OrderBy(const std::shared_ptr &input, const std::vecto const std::vector &output_symbols) : input_(input), output_symbols_(output_symbols) { // split the order_by vector into two vectors of orderings and expressions - std::vector ordering; + std::vector ordering; ordering.reserve(order_by.size()); order_by_.reserve(order_by.size()); for (const auto &ordering_expression_pair : order_by) { ordering.emplace_back(ordering_expression_pair.ordering); order_by_.emplace_back(ordering_expression_pair.expression); } - compare_ = TypedValueVectorCompare(ordering); + compare_ = TypedValueVectorCompare(std::move(ordering)); } ACCEPT_WITH_INPUT(OrderBy) @@ -4175,29 +4176,43 @@ class OrderByCursor : public Cursor { OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP_BY_REF(self_); - if (!did_pull_all_) { + if (!did_pull_all_) [[unlikely]] { ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, storage::View::OLD); - auto *mem = cache_.get_allocator().GetMemoryResource(); + auto *pull_mem = context.evaluation_context.memory; + auto *query_mem = cache_.get_allocator().GetMemoryResource(); + + utils::pmr::vector> order_by(pull_mem); // Not cached, pull memory + utils::pmr::vector> output(query_mem); // Cached, query memory + while (input_cursor_->Pull(frame, context)) { // collect the order_by elements - utils::pmr::vector order_by(mem); - order_by.reserve(self_.order_by_.size()); - for (auto expression_ptr : self_.order_by_) { - order_by.emplace_back(expression_ptr->Accept(evaluator)); + utils::pmr::vector order_by_elem(pull_mem); + order_by_elem.reserve(self_.order_by_.size()); + for (auto const &expression_ptr : self_.order_by_) { + order_by_elem.emplace_back(expression_ptr->Accept(evaluator)); } + order_by.emplace_back(std::move(order_by_elem)); // collect the output elements - utils::pmr::vector output(mem); - output.reserve(self_.output_symbols_.size()); - for (const Symbol &output_sym : self_.output_symbols_) output.emplace_back(frame[output_sym]); - - cache_.push_back(Element{std::move(order_by), std::move(output)}); + utils::pmr::vector output_elem(query_mem); + output_elem.reserve(self_.output_symbols_.size()); + for (const Symbol &output_sym : self_.output_symbols_) { + output_elem.emplace_back(frame[output_sym]); + } + output.emplace_back(std::move(output_elem)); } - std::sort(cache_.begin(), cache_.end(), [this](const auto &pair1, const auto &pair2) { - return self_.compare_(pair1.order_by, pair2.order_by); - }); + // sorting with range zip + // we compare on just the projection of the 1st range (order_by) + // this will also permute the 2nd range (output) + ranges::sort( + ranges::views::zip(order_by, output), self_.compare_.lex_cmp(), + [](auto const &value) -> auto const & { return std::get<0>(value); }); + + // no longer need the order_by terms + order_by.clear(); + cache_ = std::move(output); did_pull_all_ = true; cache_it_ = cache_.begin(); @@ -4208,15 +4223,15 @@ class OrderByCursor : public Cursor { AbortCheck(context); // place the output values on the frame - DMG_ASSERT(self_.output_symbols_.size() == cache_it_->remember.size(), + DMG_ASSERT(self_.output_symbols_.size() == cache_it_->size(), "Number of values does not match the number of output symbols " "in OrderBy"); auto output_sym_it = self_.output_symbols_.begin(); - for (const TypedValue &output : cache_it_->remember) { - if (context.frame_change_collector && context.frame_change_collector->IsKeyTracked(output_sym_it->name())) { + for (TypedValue &output : *cache_it_) { + if (context.frame_change_collector) { context.frame_change_collector->ResetTrackingValue(output_sym_it->name()); } - frame[*output_sym_it++] = output; + frame[*output_sym_it++] = std::move(output); } cache_it_++; return true; @@ -4231,17 +4246,12 @@ class OrderByCursor : public Cursor { } private: - struct Element { - utils::pmr::vector order_by; - utils::pmr::vector remember; - }; - const OrderBy &self_; const UniqueCursorPtr input_cursor_; bool did_pull_all_{false}; // a cache of elements pulled from the input - // the cache is filled and sorted (only on first elem) on first Pull - utils::pmr::vector cache_; + // the cache is filled and sorted on first Pull + utils::pmr::vector> cache_; // iterator over the cache_, maintains state between Pulls decltype(cache_.begin()) cache_it_ = cache_.begin(); }; @@ -4445,6 +4455,7 @@ class UnwindCursor : public Cursor { if (input_value.type() != TypedValue::Type::List) throw QueryRuntimeException("Argument of UNWIND must be a list, but '{}' was provided.", input_value.type()); // Copy the evaluted input_value_list to our vector. + // eval memory != query memory input_value_ = input_value.ValueList(); input_value_it_ = input_value_.begin(); } @@ -4452,7 +4463,7 @@ class UnwindCursor : public Cursor { // if we reached the end of our list of values goto back to top if (input_value_it_ == input_value_.end()) continue; - frame[self_.output_symbol_] = *input_value_it_++; + frame[self_.output_symbol_] = std::move(*input_value_it_++); if (context.frame_change_collector && context.frame_change_collector->IsKeyTracked(self_.output_symbol_.name_)) { context.frame_change_collector->ResetTrackingValue(self_.output_symbol_.name_); } @@ -4493,7 +4504,11 @@ class DistinctCursor : public Cursor { SCOPED_PROFILE_OP("Distinct"); while (true) { - if (!input_cursor_->Pull(frame, context)) return false; + if (!input_cursor_->Pull(frame, context)) { + // Nothing left to pull, we can dispose of seen_rows now + seen_rows_.clear(); + return false; + } utils::pmr::vector row(seen_rows_.get_allocator().GetMemoryResource()); row.reserve(self_.value_symbols_.size()); diff --git a/src/query/plan/pretty_print.cpp b/src/query/plan/pretty_print.cpp index eeb0c15b5..2e6fad9d0 100644 --- a/src/query/plan/pretty_print.cpp +++ b/src/query/plan/pretty_print.cpp @@ -769,7 +769,7 @@ bool PlanToJsonVisitor::PreVisit(OrderBy &op) { for (auto i = 0; i < op.order_by_.size(); ++i) { json json; - json["ordering"] = ToString(op.compare_.ordering_[i]); + json["ordering"] = ToString(op.compare_.orderings()[i].ordering()); json["expression"] = ToJson(op.order_by_[i]); self["order_by"].push_back(json); } diff --git a/src/query/typed_value.cpp b/src/query/typed_value.cpp index 86d25f01b..059e1b1ba 100644 --- a/src/query/typed_value.cpp +++ b/src/query/typed_value.cpp @@ -321,6 +321,20 @@ TypedValue::operator storage::PropertyValue() const { throw TypedValueException("Unsupported conversion from TypedValue to PropertyValue"); } +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(type_param, type_enum, field) \ + type_param &TypedValue::Value##type_enum() { \ + if (type_ != Type::type_enum) [[unlikely]] \ + throw TypedValueException("TypedValue is of type '{}', not '{}'", type_, Type::type_enum); \ + return field; \ + } \ + type_param TypedValue::Value##type_enum() const { \ + if (type_ != Type::type_enum) [[unlikely]] \ + throw TypedValueException("TypedValue is of type '{}', not '{}'", type_, Type::type_enum); \ + return field; \ + } \ + bool TypedValue::Is##type_enum() const { return type_ == Type::type_enum; } + #define DEFINE_VALUE_AND_TYPE_GETTERS(type_param, type_enum, field) \ type_param &TypedValue::Value##type_enum() { \ if (type_ != Type::type_enum) [[unlikely]] \ @@ -334,9 +348,9 @@ TypedValue::operator storage::PropertyValue() const { } \ bool TypedValue::Is##type_enum() const { return type_ == Type::type_enum; } -DEFINE_VALUE_AND_TYPE_GETTERS(bool, Bool, bool_v) -DEFINE_VALUE_AND_TYPE_GETTERS(int64_t, Int, int_v) -DEFINE_VALUE_AND_TYPE_GETTERS(double, Double, double_v) +DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(bool, Bool, bool_v) +DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(int64_t, Int, int_v) +DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(double, Double, double_v) DEFINE_VALUE_AND_TYPE_GETTERS(TypedValue::TString, String, string_v) DEFINE_VALUE_AND_TYPE_GETTERS(TypedValue::TVector, List, list_v) DEFINE_VALUE_AND_TYPE_GETTERS(TypedValue::TMap, Map, map_v) @@ -348,24 +362,10 @@ DEFINE_VALUE_AND_TYPE_GETTERS(utils::LocalTime, LocalTime, local_time_v) DEFINE_VALUE_AND_TYPE_GETTERS(utils::LocalDateTime, LocalDateTime, local_date_time_v) DEFINE_VALUE_AND_TYPE_GETTERS(utils::Duration, Duration, duration_v) DEFINE_VALUE_AND_TYPE_GETTERS(std::function, Function, function_v) - -Graph &TypedValue::ValueGraph() { - if (type_ != Type::Graph) { - throw TypedValueException("TypedValue is of type '{}', not '{}'", type_, Type::Graph); - } - return *graph_v; -} - -const Graph &TypedValue::ValueGraph() const { - if (type_ != Type::Graph) { - throw TypedValueException("TypedValue is of type '{}', not '{}'", type_, Type::Graph); - } - return *graph_v; -} - -bool TypedValue::IsGraph() const { return type_ == Type::Graph; } +DEFINE_VALUE_AND_TYPE_GETTERS(Graph, Graph, *graph_v) #undef DEFINE_VALUE_AND_TYPE_GETTERS +#undef DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE bool TypedValue::ContainsDeleted() const { switch (type_) { @@ -399,8 +399,6 @@ bool TypedValue::ContainsDeleted() const { return false; } -bool TypedValue::IsNull() const { return type_ == Type::Null; } - bool TypedValue::IsNumeric() const { return IsInt() || IsDouble(); } bool TypedValue::IsPropertyValue() const { diff --git a/src/query/typed_value.hpp b/src/query/typed_value.hpp index a1353869a..9b9346a1c 100644 --- a/src/query/typed_value.hpp +++ b/src/query/typed_value.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -475,50 +475,51 @@ class TypedValue { Type type() const { return type_; } - // TODO consider adding getters for primitives by value (and not by ref) +#define DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(type_param, type_enum, field) \ + /** Gets the value of type field. Throws if value is not field*/ \ + type_param &Value##type_enum(); \ + /** Gets the value of type field. Throws if value is not field*/ \ + type_param Value##type_enum() const; \ + /** Checks if it's the value is of the given type */ \ + bool Is##type_enum() const; \ + /** Get the value of the type field. Unchecked */ \ + type_param UnsafeValue##type_enum() const { return field; } -#define DECLARE_VALUE_AND_TYPE_GETTERS(type_param, field) \ - /** Gets the value of type field. Throws if value is not field*/ \ - type_param &Value##field(); \ - /** Gets the value of type field. Throws if value is not field*/ \ - const type_param &Value##field() const; \ - /** Checks if it's the value is of the given type */ \ - bool Is##field() const; +#define DECLARE_VALUE_AND_TYPE_GETTERS(type_param, type_enum, field) \ + /** Gets the value of type field. Throws if value is not field*/ \ + type_param &Value##type_enum(); \ + /** Gets the value of type field. Throws if value is not field*/ \ + const type_param &Value##type_enum() const; \ + /** Checks if it's the value is of the given type */ \ + bool Is##type_enum() const; \ + /** Get the value of the type field. Unchecked */ \ + type_param const &UnsafeValue##type_enum() const { return field; } - DECLARE_VALUE_AND_TYPE_GETTERS(bool, Bool) - DECLARE_VALUE_AND_TYPE_GETTERS(int64_t, Int) - DECLARE_VALUE_AND_TYPE_GETTERS(double, Double) - DECLARE_VALUE_AND_TYPE_GETTERS(TString, String) + DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(bool, Bool, bool_v) + DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(int64_t, Int, int_v) + DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(double, Double, double_v) + DECLARE_VALUE_AND_TYPE_GETTERS(TString, String, string_v) - /** - * Get the list value. - * @throw TypedValueException if stored value is not a list. - */ - TVector &ValueList(); + DECLARE_VALUE_AND_TYPE_GETTERS(TVector, List, list_v) + DECLARE_VALUE_AND_TYPE_GETTERS(TMap, Map, map_v) + DECLARE_VALUE_AND_TYPE_GETTERS(VertexAccessor, Vertex, vertex_v) + DECLARE_VALUE_AND_TYPE_GETTERS(EdgeAccessor, Edge, edge_v) + DECLARE_VALUE_AND_TYPE_GETTERS(Path, Path, path_v) - const TVector &ValueList() const; - - /** Check if the stored value is a list value */ - bool IsList() const; - - DECLARE_VALUE_AND_TYPE_GETTERS(TMap, Map) - DECLARE_VALUE_AND_TYPE_GETTERS(VertexAccessor, Vertex) - DECLARE_VALUE_AND_TYPE_GETTERS(EdgeAccessor, Edge) - DECLARE_VALUE_AND_TYPE_GETTERS(Path, Path) - - DECLARE_VALUE_AND_TYPE_GETTERS(utils::Date, Date) - DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalTime, LocalTime) - DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalDateTime, LocalDateTime) - DECLARE_VALUE_AND_TYPE_GETTERS(utils::Duration, Duration) - DECLARE_VALUE_AND_TYPE_GETTERS(Graph, Graph) - DECLARE_VALUE_AND_TYPE_GETTERS(std::function, Function) + DECLARE_VALUE_AND_TYPE_GETTERS(utils::Date, Date, date_v) + DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalTime, LocalTime, local_time_v) + DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalDateTime, LocalDateTime, local_date_time_v) + DECLARE_VALUE_AND_TYPE_GETTERS(utils::Duration, Duration, duration_v) + DECLARE_VALUE_AND_TYPE_GETTERS(Graph, Graph, *graph_v) + DECLARE_VALUE_AND_TYPE_GETTERS(std::function, Function, function_v) #undef DECLARE_VALUE_AND_TYPE_GETTERS +#undef DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE bool ContainsDeleted() const; /** Checks if value is a TypedValue::Null. */ - bool IsNull() const; + bool IsNull() const { return type_ == Type::Null; } /** Convenience function for checking if this TypedValue is either * an integer or double */ diff --git a/src/replication_handler/replication_handler.cpp b/src/replication_handler/replication_handler.cpp index 34ccdfc99..4ae4c796e 100644 --- a/src/replication_handler/replication_handler.cpp +++ b/src/replication_handler/replication_handler.cpp @@ -310,7 +310,7 @@ auto ReplicationHandler::ShowReplicas() const -> utils::BasicResultstorage_mode_ != storage::StorageMode::IN_MEMORY_TRANSACTIONAL) return; if (!full_info && storage->name() == dbms::kDefaultDB) return; - auto ok = + [[maybe_unused]] auto ok = storage->repl_storage_state_.WithClient(replica.name_, [&](storage::ReplicationStorageClient &client) { auto ts_info = client.GetTimestampInfo(storage); auto state = client.State(); diff --git a/src/storage/v2/constraints/constraints.hpp b/src/storage/v2/constraints/constraints.hpp index 1f5ef999e..0d2a49875 100644 --- a/src/storage/v2/constraints/constraints.hpp +++ b/src/storage/v2/constraints/constraints.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -33,6 +33,7 @@ struct Constraints { std::unique_ptr existence_constraints_; std::unique_ptr unique_constraints_; + bool empty() const { return existence_constraints_->empty() && unique_constraints_->empty(); } }; } // namespace memgraph::storage diff --git a/src/storage/v2/constraints/existence_constraints.hpp b/src/storage/v2/constraints/existence_constraints.hpp index c3b68828a..a043a9f5b 100644 --- a/src/storage/v2/constraints/existence_constraints.hpp +++ b/src/storage/v2/constraints/existence_constraints.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -40,6 +40,8 @@ class ExistenceConstraints { const LabelId &label, const PropertyId &property); }; + bool empty() const { return constraints_.empty(); } + [[nodiscard]] static std::optional ValidateVertexOnConstraint(const Vertex &vertex, const LabelId &label, const PropertyId &property); diff --git a/src/storage/v2/constraints/unique_constraints.hpp b/src/storage/v2/constraints/unique_constraints.hpp index b9ec04bfc..fcdcf1739 100644 --- a/src/storage/v2/constraints/unique_constraints.hpp +++ b/src/storage/v2/constraints/unique_constraints.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -60,6 +60,8 @@ class UniqueConstraints { virtual void Clear() = 0; + virtual bool empty() const = 0; + protected: static DeletionStatus CheckPropertiesBeforeDeletion(const std::set &properties) { if (properties.empty()) { diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index 21ae7755e..21fa5ecc7 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -2049,7 +2049,8 @@ Transaction DiskStorage::CreateTransaction(IsolationLevel isolation_level, Stora edge_import_mode_active = edge_import_status_ == EdgeImportMode::ACTIVE; } - return {transaction_id, start_timestamp, isolation_level, storage_mode, edge_import_mode_active}; + return {transaction_id, start_timestamp, isolation_level, + storage_mode, edge_import_mode_active, !constraints_.empty()}; } uint64_t DiskStorage::CommitTimestamp(const std::optional desired_commit_timestamp) { diff --git a/src/storage/v2/disk/unique_constraints.cpp b/src/storage/v2/disk/unique_constraints.cpp index 3c17530c2..04a0c265a 100644 --- a/src/storage/v2/disk/unique_constraints.cpp +++ b/src/storage/v2/disk/unique_constraints.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -347,5 +347,6 @@ void DiskUniqueConstraints::LoadUniqueConstraints(const std::vector constraints_.emplace(label, properties); } } +bool DiskUniqueConstraints::empty() const { return constraints_.empty(); } } // namespace memgraph::storage diff --git a/src/storage/v2/disk/unique_constraints.hpp b/src/storage/v2/disk/unique_constraints.hpp index 0cc5a9586..4e3450ef1 100644 --- a/src/storage/v2/disk/unique_constraints.hpp +++ b/src/storage/v2/disk/unique_constraints.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -59,6 +59,7 @@ class DiskUniqueConstraints : public UniqueConstraints { RocksDBStorage *GetRocksDBStorage() const; void LoadUniqueConstraints(const std::vector &keys); + bool empty() const override; private: utils::Synchronized>>>>> diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index 1ea909450..dab56750b 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -779,9 +779,10 @@ utils::BasicResult InMemoryStorage::InMemoryAcce // This is usually done by the MVCC, but it does not handle the metadata deltas transaction_.EnsureCommitTimestampExists(); - if (transaction_.constraint_verification_info.NeedsExistenceConstraintVerification()) { + if (transaction_.constraint_verification_info && + transaction_.constraint_verification_info->NeedsExistenceConstraintVerification()) { const auto vertices_to_update = - transaction_.constraint_verification_info.GetVerticesForExistenceConstraintChecking(); + transaction_.constraint_verification_info->GetVerticesForExistenceConstraintChecking(); for (auto const *vertex : vertices_to_update) { // No need to take any locks here because we modified this vertex and no // one else can touch it until we commit. @@ -808,12 +809,13 @@ utils::BasicResult InMemoryStorage::InMemoryAcce static_cast(storage_->constraints_.unique_constraints_.get()); commit_timestamp_.emplace(mem_storage->CommitTimestamp(reparg.desired_commit_timestamp)); - if (transaction_.constraint_verification_info.NeedsUniqueConstraintVerification()) { + if (transaction_.constraint_verification_info && + transaction_.constraint_verification_info->NeedsUniqueConstraintVerification()) { // Before committing and validating vertices against unique constraints, // we have to update unique constraints with the vertices that are going // to be validated/committed. const auto vertices_to_update = - transaction_.constraint_verification_info.GetVerticesForUniqueConstraintChecking(); + transaction_.constraint_verification_info->GetVerticesForUniqueConstraintChecking(); for (auto const *vertex : vertices_to_update) { mem_unique_constraints->UpdateBeforeCommit(vertex, transaction_); @@ -994,10 +996,11 @@ void InMemoryStorage::InMemoryAccessor::Abort() { // note: this check also saves on unnecessary contention on `engine_lock_` if (!transaction_.deltas.empty()) { // CONSTRAINTS - if (transaction_.constraint_verification_info.NeedsUniqueConstraintVerification()) { + if (transaction_.constraint_verification_info && + transaction_.constraint_verification_info->NeedsUniqueConstraintVerification()) { // Need to remove elements from constraints before handling of the deltas, so the elements match the correct // values - auto vertices_to_check = transaction_.constraint_verification_info.GetVerticesForUniqueConstraintChecking(); + auto vertices_to_check = transaction_.constraint_verification_info->GetVerticesForUniqueConstraintChecking(); auto vertices_to_check_v = std::vector{vertices_to_check.begin(), vertices_to_check.end()}; storage_->constraints_.AbortEntries(vertices_to_check_v, transaction_.start_timestamp); } @@ -1449,7 +1452,7 @@ Transaction InMemoryStorage::CreateTransaction( start_timestamp = timestamp_; } } - return {transaction_id, start_timestamp, isolation_level, storage_mode, false}; + return {transaction_id, start_timestamp, isolation_level, storage_mode, false, !constraints_.empty()}; } void InMemoryStorage::SetStorageMode(StorageMode new_storage_mode) { diff --git a/src/storage/v2/inmemory/unique_constraints.cpp b/src/storage/v2/inmemory/unique_constraints.cpp index e08965eab..dd47a3f68 100644 --- a/src/storage/v2/inmemory/unique_constraints.cpp +++ b/src/storage/v2/inmemory/unique_constraints.cpp @@ -522,5 +522,6 @@ void InMemoryUniqueConstraints::Clear() { constraints_.clear(); constraints_by_label_.clear(); } +bool InMemoryUniqueConstraints::empty() const { return constraints_.empty() && constraints_by_label_.empty(); } } // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/unique_constraints.hpp b/src/storage/v2/inmemory/unique_constraints.hpp index 27fae1b30..40ea0a19e 100644 --- a/src/storage/v2/inmemory/unique_constraints.hpp +++ b/src/storage/v2/inmemory/unique_constraints.hpp @@ -41,6 +41,9 @@ struct FixedCapacityArray { using PropertyIdArray = FixedCapacityArray; class InMemoryUniqueConstraints : public UniqueConstraints { + public: + bool empty() const override; + private: struct Entry { std::vector values; diff --git a/src/storage/v2/transaction.hpp b/src/storage/v2/transaction.hpp index 9f973cbf0..ff1699626 100644 --- a/src/storage/v2/transaction.hpp +++ b/src/storage/v2/transaction.hpp @@ -41,7 +41,7 @@ const uint64_t kTransactionInitialId = 1ULL << 63U; struct Transaction { Transaction(uint64_t transaction_id, uint64_t start_timestamp, IsolationLevel isolation_level, - StorageMode storage_mode, bool edge_import_mode_active) + StorageMode storage_mode, bool edge_import_mode_active, bool has_constraints) : transaction_id(transaction_id), start_timestamp(start_timestamp), command_id(0), @@ -50,6 +50,8 @@ struct Transaction { isolation_level(isolation_level), storage_mode(storage_mode), edge_import_mode_active(edge_import_mode_active), + constraint_verification_info{(has_constraints) ? std::optional{std::in_place} + : std::nullopt}, vertices_{(storage_mode == StorageMode::ON_DISK_TRANSACTIONAL) ? std::optional>{std::in_place} : std::nullopt}, @@ -99,7 +101,7 @@ struct Transaction { // Used to speedup getting info about a vertex when there is a long delta // chain involved in rebuilding that info. mutable VertexInfoCache manyDeltasCache{}; - mutable ConstraintVerificationInfo constraint_verification_info{}; + mutable std::optional constraint_verification_info{}; // Store modified edges GID mapped to changed Delta and serialized edge key // Only for disk storage diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index ef0a6ab3e..7d78070a8 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -120,7 +120,7 @@ Result VertexAccessor::AddLabel(LabelId label) { /// TODO: some by pointers, some by reference => not good, make it better storage_->constraints_.unique_constraints_->UpdateOnAddLabel(label, *vertex_, transaction_->start_timestamp); - transaction_->constraint_verification_info.AddedLabel(vertex_); + if (transaction_->constraint_verification_info) transaction_->constraint_verification_info->AddedLabel(vertex_); storage_->indices_.UpdateOnAddLabel(label, vertex_, *transaction_); transaction_->manyDeltasCache.Invalidate(vertex_, label); @@ -276,10 +276,12 @@ Result VertexAccessor::SetProperty(PropertyId property, const Pro }}; std::invoke(atomic_memory_block); - if (!value.IsNull()) { - transaction_->constraint_verification_info.AddedProperty(vertex_); - } else { - transaction_->constraint_verification_info.RemovedProperty(vertex_); + if (transaction_->constraint_verification_info) { + if (!value.IsNull()) { + transaction_->constraint_verification_info->AddedProperty(vertex_); + } else { + transaction_->constraint_verification_info->RemovedProperty(vertex_); + } } storage_->indices_.UpdateOnSetProperty(property, value, vertex_, *transaction_); transaction_->manyDeltasCache.Invalidate(vertex_, property); @@ -309,10 +311,12 @@ Result VertexAccessor::InitProperties(const std::mapindices_.UpdateOnSetProperty(property, value, vertex, *transaction); transaction->manyDeltasCache.Invalidate(vertex, property); - if (!value.IsNull()) { - transaction->constraint_verification_info.AddedProperty(vertex); - } else { - transaction->constraint_verification_info.RemovedProperty(vertex); + if (transaction->constraint_verification_info) { + if (!value.IsNull()) { + transaction->constraint_verification_info->AddedProperty(vertex); + } else { + transaction->constraint_verification_info->RemovedProperty(vertex); + } } } result = true; @@ -347,10 +351,12 @@ Result>> Vertex storage->indices_.UpdateOnSetProperty(id, new_value, vertex, *transaction); CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), id, std::move(old_value)); transaction->manyDeltasCache.Invalidate(vertex, id); - if (!new_value.IsNull()) { - transaction->constraint_verification_info.AddedProperty(vertex); - } else { - transaction->constraint_verification_info.RemovedProperty(vertex); + if (transaction->constraint_verification_info) { + if (!new_value.IsNull()) { + transaction->constraint_verification_info->AddedProperty(vertex); + } else { + transaction->constraint_verification_info->RemovedProperty(vertex); + } } } }}; @@ -380,9 +386,11 @@ Result> VertexAccessor::ClearProperties() { for (const auto &[property, value] : *properties) { CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, value); storage->indices_.UpdateOnSetProperty(property, PropertyValue(), vertex, *transaction); - transaction->constraint_verification_info.RemovedProperty(vertex); transaction->manyDeltasCache.Invalidate(vertex, property); } + if (transaction->constraint_verification_info) { + transaction->constraint_verification_info->RemovedProperty(vertex); + } vertex->properties.ClearProperties(); }}; std::invoke(atomic_memory_block); diff --git a/tests/unit/storage_rocks.cpp b/tests/unit/storage_rocks.cpp index 5cdaf4691..539cf3e0a 100644 --- a/tests/unit/storage_rocks.cpp +++ b/tests/unit/storage_rocks.cpp @@ -17,8 +17,6 @@ #include #include "disk_test_utils.hpp" -#include "query/common.hpp" -#include "query/db_accessor.hpp" #include "storage/v2/delta.hpp" #include "storage/v2/disk/storage.hpp" #include "storage/v2/id_types.hpp" diff --git a/tests/unit/storage_v2_wal_file.cpp b/tests/unit/storage_v2_wal_file.cpp index dcb7d3326..4094090f5 100644 --- a/tests/unit/storage_v2_wal_file.cpp +++ b/tests/unit/storage_v2_wal_file.cpp @@ -74,7 +74,7 @@ class DeltaGenerator final { explicit Transaction(DeltaGenerator *gen) : gen_(gen), transaction_(gen->transaction_id_++, gen->timestamp_++, memgraph::storage::IsolationLevel::SNAPSHOT_ISOLATION, - gen->storage_mode_, false) {} + gen->storage_mode_, false, false) {} public: memgraph::storage::Vertex *CreateVertex() { From de2e2048ef5058a5c0882ed2707fde324d85ffc0 Mon Sep 17 00:00:00 2001 From: DavIvek Date: Tue, 12 Mar 2024 13:55:40 +0100 Subject: [PATCH 02/16] Support label creation via property values (#1762) --- src/query/frontend/ast/ast.hpp | 42 +++++-- .../frontend/ast/cypher_main_visitor.cpp | 25 ++-- .../frontend/opencypher/grammar/Cypher.g4 | 5 +- .../frontend/semantic/symbol_generator.cpp | 47 ++++++++ .../frontend/semantic/symbol_generator.hpp | 6 + src/query/plan/operator.cpp | 94 +++++++++------ src/query/plan/operator.hpp | 21 ++-- src/query/plan/preprocess.cpp | 14 ++- src/query/plan/pretty_print.cpp | 15 ++- src/query/plan/rule_based_planner.hpp | 34 +++--- tests/e2e/load_csv/load_csv.py | 39 ++++++- .../tests/memgraph_V1/features/with.feature | 110 ++++++++++++++++++ tests/unit/plan_pretty_print.cpp | 5 +- tests/unit/query_common.hpp | 4 +- .../query_plan_create_set_remove_delete.cpp | 48 +++++--- tests/unit/query_plan_match_filter_return.cpp | 46 +++++--- tests/unit/query_plan_operator_to_string.cpp | 14 ++- .../unit/query_plan_read_write_typecheck.cpp | 4 +- 18 files changed, 440 insertions(+), 133 deletions(-) diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index b8d8c9e1a..f136975bc 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -1249,6 +1249,8 @@ class AllPropertiesLookup : public memgraph::query::Expression { friend class AstStorage; }; +using QueryLabelType = std::variant; + class LabelsTest : public memgraph::query::Expression { public: static const utils::TypeInfo kType; @@ -1281,6 +1283,16 @@ class LabelsTest : public memgraph::query::Expression { protected: LabelsTest(Expression *expression, const std::vector &labels) : expression_(expression), labels_(labels) {} + LabelsTest(Expression *expression, const std::vector &labels) : expression_(expression) { + labels_.reserve(labels.size()); + for (const auto &label : labels) { + if (const auto *label_ix = std::get_if(&label)) { + labels_.push_back(*label_ix); + } else { + throw SemanticException("You can't use labels in filter expressions."); + } + } + } private: friend class AstStorage; @@ -1771,7 +1783,7 @@ class NodeAtom : public memgraph::query::PatternAtom { return visitor.PostVisit(*this); } - std::vector labels_; + std::vector labels_; std::variant, memgraph::query::ParameterLookup *> properties_; @@ -1781,7 +1793,11 @@ class NodeAtom : public memgraph::query::PatternAtom { object->identifier_ = identifier_ ? identifier_->Clone(storage) : nullptr; object->labels_.resize(labels_.size()); for (auto i = 0; i < object->labels_.size(); ++i) { - object->labels_[i] = storage->GetLabelIx(labels_[i].name); + if (const auto *label = std::get_if(&labels_[i])) { + object->labels_[i] = storage->GetLabelIx(label->name); + } else { + object->labels_[i] = std::get(labels_[i])->Clone(storage); + } } if (const auto *properties = std::get_if>(&properties_)) { auto &new_obj_properties = std::get>(object->properties_); @@ -2657,20 +2673,25 @@ class SetLabels : public memgraph::query::Clause { } memgraph::query::Identifier *identifier_{nullptr}; - std::vector labels_; + std::vector labels_; SetLabels *Clone(AstStorage *storage) const override { SetLabels *object = storage->Create(); object->identifier_ = identifier_ ? identifier_->Clone(storage) : nullptr; object->labels_.resize(labels_.size()); for (auto i = 0; i < object->labels_.size(); ++i) { - object->labels_[i] = storage->GetLabelIx(labels_[i].name); + if (const auto *label = std::get_if(&labels_[i])) { + object->labels_[i] = storage->GetLabelIx(label->name); + } else { + object->labels_[i] = std::get(labels_[i])->Clone(storage); + } } return object; } protected: - SetLabels(Identifier *identifier, const std::vector &labels) : identifier_(identifier), labels_(labels) {} + SetLabels(Identifier *identifier, std::vector labels) + : identifier_(identifier), labels_(std::move(labels)) {} private: friend class AstStorage; @@ -2720,20 +2741,25 @@ class RemoveLabels : public memgraph::query::Clause { } memgraph::query::Identifier *identifier_{nullptr}; - std::vector labels_; + std::vector labels_; RemoveLabels *Clone(AstStorage *storage) const override { RemoveLabels *object = storage->Create(); object->identifier_ = identifier_ ? identifier_->Clone(storage) : nullptr; object->labels_.resize(labels_.size()); for (auto i = 0; i < object->labels_.size(); ++i) { - object->labels_[i] = storage->GetLabelIx(labels_[i].name); + if (const auto *label = std::get_if(&labels_[i])) { + object->labels_[i] = storage->GetLabelIx(label->name); + } else { + object->labels_[i] = std::get(labels_[i])->Clone(storage); + } } return object; } protected: - RemoveLabels(Identifier *identifier, const std::vector &labels) : identifier_(identifier), labels_(labels) {} + RemoveLabels(Identifier *identifier, std::vector labels) + : identifier_(identifier), labels_(std::move(labels)) {} private: friend class AstStorage; diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 467c73125..ceebe2815 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -1933,7 +1933,7 @@ antlrcpp::Any CypherMainVisitor::visitNodePattern(MemgraphCypher::NodePatternCon anonymous_identifiers.push_back(&node->identifier_); } if (ctx->nodeLabels()) { - node->labels_ = std::any_cast>(ctx->nodeLabels()->accept(this)); + node->labels_ = std::any_cast>(ctx->nodeLabels()->accept(this)); } if (ctx->properties()) { // This can return either properties or parameters @@ -1947,16 +1947,27 @@ antlrcpp::Any CypherMainVisitor::visitNodePattern(MemgraphCypher::NodePatternCon } antlrcpp::Any CypherMainVisitor::visitNodeLabels(MemgraphCypher::NodeLabelsContext *ctx) { - std::vector labels; + std::vector labels; for (auto *node_label : ctx->nodeLabel()) { - if (node_label->labelName()->symbolicName()) { + auto *label_name = node_label->labelName(); + if (label_name->symbolicName()) { labels.emplace_back(AddLabel(std::any_cast(node_label->accept(this)))); - } else { + } else if (label_name->parameter()) { // If we have a parameter, we have to resolve it. const auto *param_lookup = std::any_cast(node_label->accept(this)); const auto label_name = parameters_->AtTokenPosition(param_lookup->token_position_).ValueString(); labels.emplace_back(storage_->GetLabelIx(label_name)); query_info_.is_cacheable = false; // We can't cache queries with label parameters. + } else { + auto variable = std::any_cast(label_name->variable()->accept(this)); + users_identifiers.insert(variable); + auto *expression = static_cast(storage_->Create(variable)); + for (auto *lookup : label_name->propertyLookup()) { + auto key = std::any_cast(lookup->accept(this)); + auto *property_lookup = storage_->Create(expression, key); + expression = property_lookup; + } + labels.emplace_back(expression); } } return labels; @@ -2504,7 +2515,7 @@ antlrcpp::Any CypherMainVisitor::visitListIndexingOrSlicing(MemgraphCypher::List antlrcpp::Any CypherMainVisitor::visitExpression2a(MemgraphCypher::Expression2aContext *ctx) { auto *expression = std::any_cast(ctx->expression2b()->accept(this)); if (ctx->nodeLabels()) { - auto labels = std::any_cast>(ctx->nodeLabels()->accept(this)); + auto labels = std::any_cast>(ctx->nodeLabels()->accept(this)); expression = storage_->Create(expression, labels); } return expression; @@ -2830,7 +2841,7 @@ antlrcpp::Any CypherMainVisitor::visitSetItem(MemgraphCypher::SetItemContext *ct // SetLabels auto *set_labels = storage_->Create(); set_labels->identifier_ = storage_->Create(std::any_cast(ctx->variable()->accept(this))); - set_labels->labels_ = std::any_cast>(ctx->nodeLabels()->accept(this)); + set_labels->labels_ = std::any_cast>(ctx->nodeLabels()->accept(this)); return static_cast(set_labels); } @@ -2853,7 +2864,7 @@ antlrcpp::Any CypherMainVisitor::visitRemoveItem(MemgraphCypher::RemoveItemConte // RemoveLabels auto *remove_labels = storage_->Create(); remove_labels->identifier_ = storage_->Create(std::any_cast(ctx->variable()->accept(this))); - remove_labels->labels_ = std::any_cast>(ctx->nodeLabels()->accept(this)); + remove_labels->labels_ = std::any_cast>(ctx->nodeLabels()->accept(this)); return static_cast(remove_labels); } diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index 55cb53ef3..7fa218598 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -193,7 +193,10 @@ nodeLabels : nodeLabel ( nodeLabel )* ; nodeLabel : ':' labelName ; -labelName : symbolicName | parameter; +labelName : symbolicName + | parameter + | variable ( propertyLookup )+ + ; relTypeName : symbolicName ; diff --git a/src/query/frontend/semantic/symbol_generator.cpp b/src/query/frontend/semantic/symbol_generator.cpp index 2cfbee584..c12915634 100644 --- a/src/query/frontend/semantic/symbol_generator.cpp +++ b/src/query/frontend/semantic/symbol_generator.cpp @@ -568,6 +568,44 @@ bool SymbolGenerator::PostVisit(SetProperty & /*set_property*/) { return true; } +bool SymbolGenerator::PreVisit(SetLabels &set_labels) { + auto &scope = scopes_.back(); + scope.in_set_labels = true; + for (auto &label : set_labels.labels_) { + if (auto *expression = std::get_if(&label)) { + (*expression)->Accept(*this); + } + } + + return true; +} + +bool SymbolGenerator::PostVisit(SetLabels & /*set_labels*/) { + auto &scope = scopes_.back(); + scope.in_set_labels = false; + + return true; +} + +bool SymbolGenerator::PreVisit(RemoveLabels &remove_labels) { + auto &scope = scopes_.back(); + scope.in_remove_labels = true; + for (auto &label : remove_labels.labels_) { + if (auto *expression = std::get_if(&label)) { + (*expression)->Accept(*this); + } + } + + return true; +} + +bool SymbolGenerator::PostVisit(RemoveLabels & /*remove_labels*/) { + auto &scope = scopes_.back(); + scope.in_remove_labels = false; + + return true; +} + // Pattern and its subparts. bool SymbolGenerator::PreVisit(Pattern &pattern) { @@ -602,6 +640,15 @@ bool SymbolGenerator::PreVisit(NodeAtom &node_atom) { }; scope.in_node_atom = true; + + if (scope.in_create) { // you can use expressions with labels only in create + for (auto &label : node_atom.labels_) { + if (auto *expression = std::get_if(&label)) { + (*expression)->Accept(*this); + } + } + } + if (auto *properties = std::get_if>(&node_atom.properties_)) { bool props_or_labels = !properties->empty() || !node_atom.labels_.empty(); diff --git a/src/query/frontend/semantic/symbol_generator.hpp b/src/query/frontend/semantic/symbol_generator.hpp index e5b46fbfe..41122625a 100644 --- a/src/query/frontend/semantic/symbol_generator.hpp +++ b/src/query/frontend/semantic/symbol_generator.hpp @@ -68,6 +68,10 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool PostVisit(Foreach &) override; bool PreVisit(SetProperty & /*set_property*/) override; bool PostVisit(SetProperty & /*set_property*/) override; + bool PreVisit(SetLabels &) override; + bool PostVisit(SetLabels & /*set_labels*/) override; + bool PreVisit(RemoveLabels &) override; + bool PostVisit(RemoveLabels & /*remove_labels*/) override; // Expressions ReturnType Visit(Identifier &) override; @@ -130,6 +134,8 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool in_set_property{false}; bool in_call_subquery{false}; bool has_return{false}; + bool in_set_labels{false}; + bool in_remove_labels{false}; // True when visiting a pattern atom (node or edge) identifier, which can be // reused or created in the pattern itself. bool in_pattern_atom_identifier{false}; diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index bf2194641..29f64f950 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -48,6 +48,7 @@ #include "query/procedure/module.hpp" #include "query/typed_value.hpp" #include "range/v3/all.hpp" +#include "storage/v2/id_types.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/view.hpp" #include "utils/algorithm.hpp" @@ -179,6 +180,20 @@ inline void AbortCheck(ExecutionContext const &context) { if (auto const reason = MustAbort(context); reason != AbortReason::NO_ABORT) throw HintedAbortError(reason); } +std::vector EvaluateLabels(const std::vector &labels, + ExpressionEvaluator &evaluator, DbAccessor *dba) { + std::vector result; + result.reserve(labels.size()); + for (const auto &label : labels) { + if (const auto *label_atom = std::get_if(&label)) { + result.emplace_back(*label_atom); + } else { + result.emplace_back(dba->NameToLabel(std::get(label)->Accept(evaluator).ValueString())); + } + } + return result; +} + } // namespace // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) @@ -214,12 +229,13 @@ CreateNode::CreateNode(const std::shared_ptr &input, NodeCreati // Creates a vertex on this GraphDb. Returns a reference to vertex placed on the // frame. -VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *frame, ExecutionContext &context) { +VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *frame, ExecutionContext &context, + std::vector &labels, ExpressionEvaluator &evaluator) { auto &dba = *context.db_accessor; auto new_node = dba.InsertVertex(); context.execution_stats[ExecutionStats::Key::CREATED_NODES] += 1; - for (auto label : node_info.labels) { - auto maybe_error = new_node.AddLabel(label); + for (const auto &label : labels) { + auto maybe_error = std::invoke([&] { return new_node.AddLabel(label); }); if (maybe_error.HasError()) { switch (maybe_error.GetError()) { case storage::Error::SERIALIZATION_ERROR: @@ -234,10 +250,6 @@ VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *fram } context.execution_stats[ExecutionStats::Key::CREATED_LABELS] += 1; } - // Evaluator should use the latest accessors, as modified in this query, when - // setting properties on new nodes. - ExpressionEvaluator evaluator(frame, context.symbol_table, context.evaluation_context, context.db_accessor, - storage::View::NEW); // TODO: PropsSetChecked allocates a PropertyValue, make it use context.memory // when we update PropertyValue with custom allocator. std::map properties; @@ -277,16 +289,21 @@ CreateNode::CreateNodeCursor::CreateNodeCursor(const CreateNode &self, utils::Me bool CreateNode::CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) { OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP("CreateNode"); -#ifdef MG_ENTERPRISE - if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && - !context.auth_checker->Has(self_.node_info_.labels, - memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { - throw QueryRuntimeException("Vertex not created due to not having enough permission!"); - } -#endif + ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); if (input_cursor_->Pull(frame, context)) { - auto created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); + // we have to resolve the labels before we can check for permissions + auto labels = EvaluateLabels(self_.node_info_.labels, evaluator, context.db_accessor); + +#ifdef MG_ENTERPRISE + if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && + !context.auth_checker->Has(labels, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { + throw QueryRuntimeException("Vertex not created due to not having enough permission!"); + } +#endif + + auto created_vertex = CreateLocalVertex(self_.node_info_, &frame, context, labels, evaluator); if (context.trigger_context_collector) { context.trigger_context_collector->RegisterCreatedObject(created_vertex); } @@ -370,6 +387,9 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont SCOPED_PROFILE_OP_BY_REF(self_); if (!input_cursor_->Pull(frame, context)) return false; + ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); + auto labels = EvaluateLabels(self_.node_info_.labels, evaluator, context.db_accessor); #ifdef MG_ENTERPRISE if (license::global_license_checker.IsEnterpriseValidFast()) { @@ -381,7 +401,7 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont if (context.auth_checker && !(context.auth_checker->Has(self_.edge_info_.edge_type, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE) && - context.auth_checker->Has(self_.node_info_.labels, fine_grained_permission))) { + context.auth_checker->Has(labels, fine_grained_permission))) { throw QueryRuntimeException("Edge not created due to not having enough permission!"); } } @@ -391,14 +411,8 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex); auto &v1 = vertex_value.ValueVertex(); - // Similarly to CreateNode, newly created edges and nodes should use the - // storage::View::NEW. - // E.g. we pickup new properties: `CREATE (n {p: 42}) -[:r {ep: n.p}]-> ()` - ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, - storage::View::NEW); - // get the destination vertex (possibly an existing node) - auto &v2 = OtherVertex(frame, context); + auto &v2 = OtherVertex(frame, context, labels, evaluator); // create an edge between the two nodes auto *dba = context.db_accessor; @@ -429,13 +443,15 @@ void CreateExpand::CreateExpandCursor::Shutdown() { input_cursor_->Shutdown(); } void CreateExpand::CreateExpandCursor::Reset() { input_cursor_->Reset(); } -VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(Frame &frame, ExecutionContext &context) { +VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(Frame &frame, ExecutionContext &context, + std::vector &labels, + ExpressionEvaluator &evaluator) { if (self_.existing_node_) { TypedValue &dest_node_value = frame[self_.node_info_.symbol]; ExpectType(self_.node_info_.symbol, dest_node_value, TypedValue::Type::Vertex); return dest_node_value.ValueVertex(); } else { - auto &created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); + auto &created_vertex = CreateLocalVertex(self_.node_info_, &frame, context, labels, evaluator); if (context.trigger_context_collector) { context.trigger_context_collector->RegisterCreatedObject(created_vertex); } @@ -3208,8 +3224,8 @@ void SetProperties::SetPropertiesCursor::Shutdown() { input_cursor_->Shutdown(); void SetProperties::SetPropertiesCursor::Reset() { input_cursor_->Reset(); } SetLabels::SetLabels(const std::shared_ptr &input, Symbol input_symbol, - const std::vector &labels) - : input_(input), input_symbol_(std::move(input_symbol)), labels_(labels) {} + std::vector labels) + : input_(input), input_symbol_(std::move(input_symbol)), labels_(std::move(labels)) {} ACCEPT_WITH_INPUT(SetLabels) @@ -3229,16 +3245,18 @@ SetLabels::SetLabelsCursor::SetLabelsCursor(const SetLabels &self, utils::Memory bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP("SetLabels"); + ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); + if (!input_cursor_->Pull(frame, context)) return false; + auto labels = EvaluateLabels(self_.labels_, evaluator, context.db_accessor); #ifdef MG_ENTERPRISE if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && - !context.auth_checker->Has(self_.labels_, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { + !context.auth_checker->Has(labels, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { throw QueryRuntimeException("Couldn't set label due to not having enough permission!"); } #endif - if (!input_cursor_->Pull(frame, context)) return false; - TypedValue &vertex_value = frame[self_.input_symbol_]; // Skip setting labels on Null (can occur in optional match). if (vertex_value.IsNull()) return true; @@ -3253,7 +3271,7 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { } #endif - for (auto label : self_.labels_) { + for (auto label : labels) { auto maybe_value = vertex.AddLabel(label); if (maybe_value.HasError()) { switch (maybe_value.GetError()) { @@ -3368,8 +3386,8 @@ void RemoveProperty::RemovePropertyCursor::Shutdown() { input_cursor_->Shutdown( void RemoveProperty::RemovePropertyCursor::Reset() { input_cursor_->Reset(); } RemoveLabels::RemoveLabels(const std::shared_ptr &input, Symbol input_symbol, - const std::vector &labels) - : input_(input), input_symbol_(std::move(input_symbol)), labels_(labels) {} + std::vector labels) + : input_(input), input_symbol_(std::move(input_symbol)), labels_(std::move(labels)) {} ACCEPT_WITH_INPUT(RemoveLabels) @@ -3389,16 +3407,18 @@ RemoveLabels::RemoveLabelsCursor::RemoveLabelsCursor(const RemoveLabels &self, u bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP("RemoveLabels"); + ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); + if (!input_cursor_->Pull(frame, context)) return false; + auto labels = EvaluateLabels(self_.labels_, evaluator, context.db_accessor); #ifdef MG_ENTERPRISE if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && - !context.auth_checker->Has(self_.labels_, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { + !context.auth_checker->Has(labels, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { throw QueryRuntimeException("Couldn't remove label due to not having enough permission!"); } #endif - if (!input_cursor_->Pull(frame, context)) return false; - TypedValue &vertex_value = frame[self_.input_symbol_]; // Skip removing labels on Null (can occur in optional match). if (vertex_value.IsNull()) return true; @@ -3413,7 +3433,7 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont } #endif - for (auto label : self_.labels_) { + for (auto label : labels) { auto maybe_value = vertex.RemoveLabel(label); if (maybe_value.HasError()) { switch (maybe_value.GetError()) { diff --git a/src/query/plan/operator.hpp b/src/query/plan/operator.hpp index 6563c2bb0..5a8ef0625 100644 --- a/src/query/plan/operator.hpp +++ b/src/query/plan/operator.hpp @@ -285,6 +285,7 @@ class Once : public memgraph::query::plan::LogicalOperator { }; using PropertiesMapList = std::vector>; +using StorageLabelType = std::variant; struct NodeCreationInfo { static const utils::TypeInfo kType; @@ -292,18 +293,18 @@ struct NodeCreationInfo { NodeCreationInfo() = default; - NodeCreationInfo(Symbol symbol, std::vector labels, + NodeCreationInfo(Symbol symbol, std::vector labels, std::variant properties) : symbol{std::move(symbol)}, labels{std::move(labels)}, properties{std::move(properties)} {}; - NodeCreationInfo(Symbol symbol, std::vector labels, PropertiesMapList properties) + NodeCreationInfo(Symbol symbol, std::vector labels, PropertiesMapList properties) : symbol{std::move(symbol)}, labels{std::move(labels)}, properties{std::move(properties)} {}; - NodeCreationInfo(Symbol symbol, std::vector labels, ParameterLookup *properties) + NodeCreationInfo(Symbol symbol, std::vector labels, ParameterLookup *properties) : symbol{std::move(symbol)}, labels{std::move(labels)}, properties{properties} {}; Symbol symbol; - std::vector labels; + std::vector labels; std::variant properties; NodeCreationInfo Clone(AstStorage *storage) const { @@ -506,7 +507,8 @@ class CreateExpand : public memgraph::query::plan::LogicalOperator { const UniqueCursorPtr input_cursor_; // Get the existing node (if existing_node_ == true), or create a new node - VertexAccessor &OtherVertex(Frame &frame, ExecutionContext &context); + VertexAccessor &OtherVertex(Frame &frame, ExecutionContext &context, + std::vector &labels, ExpressionEvaluator &evaluator); }; }; @@ -1477,8 +1479,7 @@ class SetLabels : public memgraph::query::plan::LogicalOperator { SetLabels() = default; - SetLabels(const std::shared_ptr &input, Symbol input_symbol, - const std::vector &labels); + SetLabels(const std::shared_ptr &input, Symbol input_symbol, std::vector labels); bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override; UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override; std::vector ModifiedSymbols(const SymbolTable &) const override; @@ -1489,7 +1490,7 @@ class SetLabels : public memgraph::query::plan::LogicalOperator { std::shared_ptr input_; Symbol input_symbol_; - std::vector labels_; + std::vector labels_; std::unique_ptr Clone(AstStorage *storage) const override { auto object = std::make_unique(); @@ -1567,7 +1568,7 @@ class RemoveLabels : public memgraph::query::plan::LogicalOperator { RemoveLabels() = default; RemoveLabels(const std::shared_ptr &input, Symbol input_symbol, - const std::vector &labels); + std::vector labels); bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override; UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override; std::vector ModifiedSymbols(const SymbolTable &) const override; @@ -1578,7 +1579,7 @@ class RemoveLabels : public memgraph::query::plan::LogicalOperator { std::shared_ptr input_; Symbol input_symbol_; - std::vector labels_; + std::vector labels_; std::unique_ptr Clone(AstStorage *storage) const override { auto object = std::make_unique(); diff --git a/src/query/plan/preprocess.cpp b/src/query/plan/preprocess.cpp index ca605a46a..2c783fa15 100644 --- a/src/query/plan/preprocess.cpp +++ b/src/query/plan/preprocess.cpp @@ -358,11 +358,17 @@ void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table, }; auto add_node_filter = [&](NodeAtom *node) { const auto &node_symbol = symbol_table.at(*node->identifier_); - if (!node->labels_.empty()) { - // Create a LabelsTest and store it. - auto *labels_test = storage.Create(node->identifier_, node->labels_); + std::vector labels; + for (auto label : node->labels_) { + if (const auto *label_node = std::get_if(&label)) { + throw SemanticException("Property lookup not supported in MATCH/MERGE clause!"); + } + labels.push_back(std::get(label)); + } + if (!labels.empty()) { + auto *labels_test = storage.Create(node->identifier_, labels); auto label_filter = FilterInfo{FilterInfo::Type::Label, labels_test, std::unordered_set{node_symbol}}; - label_filter.labels = node->labels_; + label_filter.labels = labels; all_filters_.emplace_back(label_filter); } add_properties(node); diff --git a/src/query/plan/pretty_print.cpp b/src/query/plan/pretty_print.cpp index 2e6fad9d0..5dd272052 100644 --- a/src/query/plan/pretty_print.cpp +++ b/src/query/plan/pretty_print.cpp @@ -340,7 +340,7 @@ json ToJson(NamedExpression *nexpr) { return json; } -json ToJson(const std::vector> &properties, const DbAccessor &dba) { +json ToJson(const PropertiesMapList &properties, const DbAccessor &dba) { json json; for (const auto &prop_pair : properties) { json.emplace(ToJson(prop_pair.first, dba), ToJson(prop_pair.second)); @@ -348,6 +348,18 @@ json ToJson(const std::vector> &pro return json; } +json ToJson(const std::vector &labels, const DbAccessor &dba) { + json json; + for (const auto &label : labels) { + if (const auto *label_node = std::get_if(&label)) { + json.emplace_back(ToJson(*label_node)); + } else { + json.emplace_back(ToJson(std::get(label), dba)); + } + } + return json; +} + json ToJson(const NodeCreationInfo &node_info, const DbAccessor &dba) { json self; self["symbol"] = ToJson(node_info.symbol); @@ -654,7 +666,6 @@ bool PlanToJsonVisitor::PreVisit(SetLabels &op) { self["name"] = "SetLabels"; self["input_symbol"] = ToJson(op.input_symbol_); self["labels"] = ToJson(op.labels_, *dba_); - op.input_->Accept(*this); self["input"] = PopOutput(); diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp index 27f46e764..52281de60 100644 --- a/src/query/plan/rule_based_planner.hpp +++ b/src/query/plan/rule_based_planner.hpp @@ -293,6 +293,19 @@ class RuleBasedPlanner { storage::EdgeTypeId GetEdgeType(EdgeTypeIx edge_type) { return context_->db->NameToEdgeType(edge_type.name); } + std::vector GetLabelIds(const std::vector &labels) { + std::vector label_ids; + label_ids.reserve(labels.size()); + for (const auto &label : labels) { + if (const auto *label_atom = std::get_if(&label)) { + label_ids.emplace_back(GetLabel(*label_atom)); + } else { + label_ids.emplace_back(std::get(label)); + } + } + return label_ids; + } + std::unique_ptr HandleMatching(std::unique_ptr last_op, const SingleQueryPart &single_query_part, SymbolTable &symbol_table, std::unordered_set &bound_symbols) { @@ -328,11 +341,6 @@ class RuleBasedPlanner { std::unordered_set &bound_symbols) { auto node_to_creation_info = [&](const NodeAtom &node) { const auto &node_symbol = symbol_table.at(*node.identifier_); - std::vector labels; - labels.reserve(node.labels_.size()); - for (const auto &label : node.labels_) { - labels.push_back(GetLabel(label)); - } auto properties = std::invoke([&]() -> std::variant { if (const auto *node_properties = @@ -346,7 +354,7 @@ class RuleBasedPlanner { } return std::get(node.properties_); }); - return NodeCreationInfo{node_symbol, labels, properties}; + return NodeCreationInfo{node_symbol, GetLabelIds(node.labels_), properties}; }; auto base = [&](NodeAtom *node) -> std::unique_ptr { @@ -423,23 +431,13 @@ class RuleBasedPlanner { return std::make_unique(std::move(input_op), input_symbol, set->expression_, op); } else if (auto *set = utils::Downcast(clause)) { const auto &input_symbol = symbol_table.at(*set->identifier_); - std::vector labels; - labels.reserve(set->labels_.size()); - for (const auto &label : set->labels_) { - labels.push_back(GetLabel(label)); - } - return std::make_unique(std::move(input_op), input_symbol, labels); + return std::make_unique(std::move(input_op), input_symbol, GetLabelIds(set->labels_)); } else if (auto *rem = utils::Downcast(clause)) { return std::make_unique(std::move(input_op), GetProperty(rem->property_lookup_->property_), rem->property_lookup_); } else if (auto *rem = utils::Downcast(clause)) { const auto &input_symbol = symbol_table.at(*rem->identifier_); - std::vector labels; - labels.reserve(rem->labels_.size()); - for (const auto &label : rem->labels_) { - labels.push_back(GetLabel(label)); - } - return std::make_unique(std::move(input_op), input_symbol, labels); + return std::make_unique(std::move(input_op), input_symbol, GetLabelIds(rem->labels_)); } return nullptr; } diff --git a/tests/e2e/load_csv/load_csv.py b/tests/e2e/load_csv/load_csv.py index 371803ed1..6483676e6 100644 --- a/tests/e2e/load_csv/load_csv.py +++ b/tests/e2e/load_csv/load_csv.py @@ -53,8 +53,45 @@ def test_given_one_row_in_db_when_load_csv_after_match_then_pass(): assert len(list(results)) == 4 -def test_load_csv_with_parameters(): +def test_creating_labels_with_load_csv_variable(): memgraph = Memgraph("localhost", 7687) + + results = list( + memgraph.execute_and_fetch( + f"""LOAD CSV FROM '{get_file_path(SIMPLE_CSV_FILE)}' WITH HEADER AS row + CREATE (p:row.name) + RETURN p + """ + ) + ) + + assert len(results) == 4 + assert results[0]["p"]._labels == {"Joseph"} + assert results[1]["p"]._labels == {"Peter"} + assert results[2]["p"]._labels == {"Ella"} + assert results[3]["p"]._labels == {"Joe"} + + +def test_create_relationships_with_load_csv_variable2(): + memgraph = Memgraph("localhost", 7687) + + results = list( + memgraph.execute_and_fetch( + f"""LOAD CSV FROM '{get_file_path(SIMPLE_CSV_FILE)}' WITH HEADER AS row + CREATE (p:row.name:Person:row.id) + RETURN p + """ + ) + ) + + assert len(results) == 4 + assert results[0]["p"]._labels == {"Joseph", "Person", "1"} + assert results[1]["p"]._labels == {"Peter", "Person", "2"} + assert results[2]["p"]._labels == {"Ella", "Person", "3"} + assert results[3]["p"]._labels == {"Joe", "Person", "4"} + + +def test_load_csv_with_parameters(): URI = "bolt://localhost:7687" AUTH = ("", "") diff --git a/tests/gql_behave/tests/memgraph_V1/features/with.feature b/tests/gql_behave/tests/memgraph_V1/features/with.feature index f1882e8d7..53c63b5b0 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/with.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/with.feature @@ -264,3 +264,113 @@ Feature: With | id | | 0 | | 1 | + + Scenario: With test 17: + Given an empty graph + And having executed: + """ + CREATE ({name: "node1"}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + CREATE (m:node.name) + """ + When executing query: + """ + MATCH (n:node1) RETURN n; + """ + Then the result should be: + | n | + | (:node1) | + + Scenario: With test 18: + Given an empty graph + And having executed: + """ + CREATE ({name: "LabelToAdd"}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + SET node:node.name + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | (:LabelToAdd {name: 'LabelToAdd'}) | + + Scenario: With test 19: + Given an empty graph + And having executed: + """ + CREATE (:labelToRemove {name: 'labelToRemove'}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + REMOVE node:node.name + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | ({name: 'labelToRemove'}) | + + Scenario: With test 20: + Given an empty graph + And having executed: + """ + CREATE ({name: 'label1'}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + SET node:node.name:label2 + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | (:label1:label2 {name: 'label1'}) | + + Scenario: With test 21: + Given an empty graph + And having executed: + """ + CREATE ({name: 'label1'}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + SET node:label2:node.name + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | (:label2:label1 {name: 'label1'}) | + + Scenario: With test 22: + Given an empty graph + And having executed: + """ + WITH {value: {label: "labelvalue"}} as label + CREATE (n:label.value.label); + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | (:labelvalue) | diff --git a/tests/unit/plan_pretty_print.cpp b/tests/unit/plan_pretty_print.cpp index ef2395931..0bc7b35cf 100644 --- a/tests/unit/plan_pretty_print.cpp +++ b/tests/unit/plan_pretty_print.cpp @@ -12,6 +12,7 @@ #include #include "disk_test_utils.hpp" +#include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol_table.hpp" #include "query/plan/operator.hpp" #include "query/plan/pretty_print.hpp" @@ -515,7 +516,7 @@ TYPED_TEST(PrintToJsonTest, SetLabels) { std::shared_ptr last_op = std::make_shared(nullptr, node_sym); last_op = std::make_shared( last_op, node_sym, - std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); this->Check(last_op.get(), R"( { @@ -554,7 +555,7 @@ TYPED_TEST(PrintToJsonTest, RemoveLabels) { std::shared_ptr last_op = std::make_shared(nullptr, node_sym); last_op = std::make_shared( last_op, node_sym, - std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); this->Check(last_op.get(), R"( { diff --git a/tests/unit/query_common.hpp b/tests/unit/query_common.hpp index c18e06abf..6f9b1260a 100644 --- a/tests/unit/query_common.hpp +++ b/tests/unit/query_common.hpp @@ -425,7 +425,7 @@ auto GetSet(AstStorage &storage, const std::string &name, Expression *expr, bool /// Create a set labels clause for given identifier name and labels. auto GetSet(AstStorage &storage, const std::string &name, std::vector label_names) { - std::vector labels; + std::vector labels; labels.reserve(label_names.size()); for (const auto &label : label_names) { labels.push_back(storage.GetLabelIx(label)); @@ -438,7 +438,7 @@ auto GetRemove(AstStorage &storage, PropertyLookup *prop_lookup) { return storag /// Create a remove labels clause for given identifier name and labels. auto GetRemove(AstStorage &storage, const std::string &name, std::vector label_names) { - std::vector labels; + std::vector labels; labels.reserve(label_names.size()); for (const auto &label : label_names) { labels.push_back(storage.GetLabelIx(label)); diff --git a/tests/unit/query_plan_create_set_remove_delete.cpp b/tests/unit/query_plan_create_set_remove_delete.cpp index 1fa400940..b32fa91b1 100644 --- a/tests/unit/query_plan_create_set_remove_delete.cpp +++ b/tests/unit/query_plan_create_set_remove_delete.cpp @@ -497,7 +497,7 @@ class MatchCreateNodeWithAuthFixture : public QueryPlanTest { NodeCreationInfo m{}; m.symbol = symbol_table.CreateSymbol("m", true); - std::vector labels{dba.NameToLabel("l2")}; + std::vector labels{dba.NameToLabel("l2")}; m.labels = labels; // creation op auto create_node = std::make_shared(n_scan_all.op_, m); @@ -627,7 +627,7 @@ class MatchCreateExpandWithAuthFixture : public QueryPlanTest { // data for the second node NodeCreationInfo m; m.symbol = cycle ? n_scan_all.sym_ : symbol_table.CreateSymbol("m", true); - std::vector labels{dba.NameToLabel("l2")}; + std::vector labels{dba.NameToLabel("l2")}; m.labels = labels; EdgeCreationInfo r; @@ -1231,12 +1231,14 @@ TYPED_TEST(QueryPlanTest, SetLabels) { ASSERT_TRUE(dba.InsertVertex().AddLabel(label1).HasValue()); ASSERT_TRUE(dba.InsertVertex().AddLabel(label1).HasValue()); dba.AdvanceCommand(); + std::vector labels; + labels.emplace_back(label2); + labels.emplace_back(label3); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); - auto label_set = - std::make_shared(n.op_, n.sym_, std::vector{label2, label3}); + auto label_set = std::make_shared(n.op_, n.sym_, labels); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(2, PullAll(*label_set, &context)); @@ -1255,12 +1257,14 @@ TYPED_TEST(QueryPlanTest, SetLabelsWithFineGrained) { ASSERT_TRUE(dba.InsertVertex().AddLabel(labels[0]).HasValue()); ASSERT_TRUE(dba.InsertVertex().AddLabel(labels[0]).HasValue()); dba.AdvanceCommand(); + std::vector labels_variant; + labels_variant.emplace_back(labels[1]); + labels_variant.emplace_back(labels[2]); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); - auto label_set = - std::make_shared(n.op_, n.sym_, std::vector{labels[1], labels[2]}); + auto label_set = std::make_shared(n.op_, n.sym_, labels_variant); memgraph::glue::FineGrainedAuthChecker auth_checker{user, &dba}; auto context = MakeContextWithFineGrainedChecker(this->storage, symbol_table, &dba, &auth_checker); @@ -1396,12 +1400,14 @@ TYPED_TEST(QueryPlanTest, RemoveLabels) { ASSERT_TRUE(v2.AddLabel(label1).HasValue()); ASSERT_TRUE(v2.AddLabel(label3).HasValue()); dba.AdvanceCommand(); + std::vector labels; + labels.emplace_back(label1); + labels.emplace_back(label2); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); - auto label_remove = - std::make_shared(n.op_, n.sym_, std::vector{label1, label2}); + auto label_remove = std::make_shared(n.op_, n.sym_, labels); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(2, PullAll(*label_remove, &context)); @@ -1425,12 +1431,14 @@ TYPED_TEST(QueryPlanTest, RemoveLabelsFineGrainedFiltering) { ASSERT_TRUE(v2.AddLabel(labels[0]).HasValue()); ASSERT_TRUE(v2.AddLabel(labels[2]).HasValue()); dba.AdvanceCommand(); + std::vector labels_variant; + labels_variant.emplace_back(labels[0]); + labels_variant.emplace_back(labels[1]); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); - auto label_remove = std::make_shared( - n.op_, n.sym_, std::vector{labels[0], labels[1]}); + auto label_remove = std::make_shared(n.op_, n.sym_, labels_variant); memgraph::glue::FineGrainedAuthChecker auth_checker{user, &dba}; auto context = MakeContextWithFineGrainedChecker(this->storage, symbol_table, &dba, &auth_checker); @@ -1569,15 +1577,16 @@ TYPED_TEST(QueryPlanTest, SetRemove) { auto label1 = dba.NameToLabel("label1"); auto label2 = dba.NameToLabel("label2"); dba.AdvanceCommand(); + std::vector labels; + labels.emplace_back(label1); + labels.emplace_back(label2); // Create operations which match (v) and set and remove v :label. // The expected result is single (v) as it was at the start. SymbolTable symbol_table; // MATCH (n) SET n :label1 :label2 REMOVE n :label1 :label2 auto scan_all = MakeScanAll(this->storage, symbol_table, "n"); - auto set = std::make_shared(scan_all.op_, scan_all.sym_, - std::vector{label1, label2}); - auto rem = - std::make_shared(set, scan_all.sym_, std::vector{label1, label2}); + auto set = std::make_shared(scan_all.op_, scan_all.sym_, labels); + auto rem = std::make_shared(set, scan_all.sym_, labels); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(1, PullAll(*rem, &context)); dba.AdvanceCommand(); @@ -1773,10 +1782,12 @@ TYPED_TEST(QueryPlanTest, SetLabelsOnNull) { auto storage_dba = this->db->Access(ReplicationRole::MAIN); memgraph::query::DbAccessor dba(storage_dba.get()); auto label = dba.NameToLabel("label"); + std::vector labels; + labels.emplace_back(label); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); auto optional = std::make_shared(nullptr, n.op_, std::vector{n.sym_}); - auto set_op = std::make_shared(optional, n.sym_, std::vector{label}); + auto set_op = std::make_shared(optional, n.sym_, labels); EXPECT_EQ(0, CountIterable(dba.Vertices(memgraph::storage::View::OLD))); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(1, PullAll(*set_op, &context)); @@ -1801,11 +1812,12 @@ TYPED_TEST(QueryPlanTest, RemoveLabelsOnNull) { auto storage_dba = this->db->Access(ReplicationRole::MAIN); memgraph::query::DbAccessor dba(storage_dba.get()); auto label = dba.NameToLabel("label"); + std::vector labels; + labels.emplace_back(label); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); auto optional = std::make_shared(nullptr, n.op_, std::vector{n.sym_}); - auto remove_op = - std::make_shared(optional, n.sym_, std::vector{label}); + auto remove_op = std::make_shared(optional, n.sym_, labels); EXPECT_EQ(0, CountIterable(dba.Vertices(memgraph::storage::View::OLD))); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(1, PullAll(*remove_op, &context)); @@ -1906,7 +1918,7 @@ TYPED_TEST(QueryPlanTest, DeleteRemoveLabels) { auto n = MakeScanAll(this->storage, symbol_table, "n"); auto n_get = this->storage.template Create("n")->MapTo(n.sym_); auto delete_op = std::make_shared(n.op_, std::vector{n_get}, false); - std::vector labels{dba.NameToLabel("label")}; + std::vector labels{dba.NameToLabel("label")}; auto rem_op = std::make_shared(delete_op, n.sym_, labels); auto accumulate_op = std::make_shared(rem_op, rem_op->ModifiedSymbols(symbol_table), true); diff --git a/tests/unit/query_plan_match_filter_return.cpp b/tests/unit/query_plan_match_filter_return.cpp index d5468b6b5..925c90c3f 100644 --- a/tests/unit/query_plan_match_filter_return.cpp +++ b/tests/unit/query_plan_match_filter_return.cpp @@ -315,11 +315,12 @@ TYPED_TEST(QueryPlan, NodeFilterLabelsAndProperties) { // make a scan all auto n = MakeScanAll(this->storage, symbol_table, "n"); - n.node_->labels_.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label))); + std::vector labels; + labels.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label))); std::get<0>(n.node_->properties_)[this->storage.GetPropertyIx(property.first)] = LITERAL(42); // node filtering - auto *filter_expr = AND(this->storage.template Create(n.node_->identifier_, n.node_->labels_), + auto *filter_expr = AND(this->storage.template Create(n.node_->identifier_, labels), EQ(PROPERTY_LOOKUP(dba, n.node_->identifier_, property), LITERAL(42))); auto node_filter = std::make_shared(n.op_, std::vector>{}, filter_expr); @@ -366,11 +367,12 @@ TYPED_TEST(QueryPlan, NodeFilterMultipleLabels) { // make a scan all auto n = MakeScanAll(this->storage, symbol_table, "n"); - n.node_->labels_.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label1))); - n.node_->labels_.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label2))); + std::vector labels; + labels.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label1))); + labels.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label2))); // node filtering - auto *filter_expr = this->storage.template Create(n.node_->identifier_, n.node_->labels_); + auto *filter_expr = this->storage.template Create(n.node_->identifier_, labels); auto node_filter = std::make_shared(n.op_, std::vector>{}, filter_expr); // make a named expression and a produce @@ -2805,9 +2807,10 @@ TYPED_TEST(QueryPlan, OptionalMatchThenExpandToMissingNode) { // OPTIONAL MATCH (n :missing) auto n = MakeScanAll(this->storage, symbol_table, "n"); auto label_missing = "missing"; - n.node_->labels_.emplace_back(this->storage.GetLabelIx(label_missing)); + std::vector labels; + labels.emplace_back(this->storage.GetLabelIx(label_missing)); - auto *filter_expr = this->storage.template Create(n.node_->identifier_, n.node_->labels_); + auto *filter_expr = this->storage.template Create(n.node_->identifier_, labels); auto node_filter = std::make_shared(n.op_, std::vector>{}, filter_expr); auto optional = std::make_shared(nullptr, node_filter, std::vector{n.sym_}); // WITH n @@ -3619,7 +3622,8 @@ class ExistsFixture : public testing::Test { exists_expression->MapTo(symbol_table.CreateAnonymousSymbol()); auto scan_all = MakeScanAll(storage, symbol_table, "n"); - scan_all.node_->labels_.emplace_back(storage.GetLabelIx(match_label)); + std::vector labels; + labels.emplace_back(storage.GetLabelIx(match_label)); std::shared_ptr last_op = std::make_shared( nullptr, scan_all.sym_, dest_sym, edge_sym, direction, edge_types, false, memgraph::storage::View::OLD); @@ -3656,8 +3660,7 @@ class ExistsFixture : public testing::Test { last_op = std::make_shared(std::move(last_op), storage.Create(1)); last_op = std::make_shared(std::move(last_op), symbol_table.at(*exists_expression)); - auto *total_expression = - AND(storage.Create(scan_all.node_->identifier_, scan_all.node_->labels_), exists_expression); + auto *total_expression = AND(storage.Create(scan_all.node_->identifier_, labels), exists_expression); auto filter = std::make_shared(scan_all.op_, std::vector>{last_op}, total_expression); @@ -3709,7 +3712,8 @@ class ExistsFixture : public testing::Test { exists_expression2->MapTo(symbol_table.CreateAnonymousSymbol()); auto scan_all = MakeScanAll(storage, symbol_table, "n"); - scan_all.node_->labels_.emplace_back(storage.GetLabelIx(match_label)); + std::vector labels; + labels.emplace_back(storage.GetLabelIx(match_label)); std::shared_ptr last_op = std::make_shared( nullptr, scan_all.sym_, dest_sym, edge_sym, direction, first_edge_type, false, memgraph::storage::View::OLD); @@ -3721,7 +3725,7 @@ class ExistsFixture : public testing::Test { last_op2 = std::make_shared(std::move(last_op2), storage.Create(1)); last_op2 = std::make_shared(std::move(last_op2), symbol_table.at(*exists_expression2)); - Expression *total_expression = storage.Create(scan_all.node_->identifier_, scan_all.node_->labels_); + Expression *total_expression = storage.Create(scan_all.node_->identifier_, labels); if (or_flag) { total_expression = AND(total_expression, OR(exists_expression, exists_expression2)); @@ -3841,7 +3845,11 @@ TYPED_TEST(SubqueriesFeature, BasicCartesianWithFilter) { // MATCH (n) WHERE n.prop = 2 CALL { MATCH (m) RETURN m } RETURN n, m auto n = MakeScanAll(this->storage, this->symbol_table, "n"); - auto *filter_expr = AND(this->storage.template Create(n.node_->identifier_, n.node_->labels_), + std::vector labels; + for (const auto &label : n.node_->labels_) { + labels.emplace_back(std::get(label)); + } + auto *filter_expr = AND(this->storage.template Create(n.node_->identifier_, labels), EQ(PROPERTY_LOOKUP(this->dba, n.node_->identifier_, this->prop), LITERAL(2))); auto filter = std::make_shared(n.op_, std::vector>{}, filter_expr); @@ -3866,11 +3874,15 @@ TYPED_TEST(SubqueriesFeature, BasicCartesianWithFilterInsideSubquery) { // MATCH (n) CALL { MATCH (m) WHERE m.prop = 2 RETURN m } RETURN n, m auto n = MakeScanAll(this->storage, this->symbol_table, "n"); + std::vector labels; + for (const auto &label : n.node_->labels_) { + labels.emplace_back(std::get(label)); + } auto return_n = NEXPR("n", IDENT("n")->MapTo(n.sym_))->MapTo(this->symbol_table.CreateSymbol("named_expression_1", true)); auto m = MakeScanAll(this->storage, this->symbol_table, "m"); - auto *filter_expr = AND(this->storage.template Create(n.node_->identifier_, n.node_->labels_), + auto *filter_expr = AND(this->storage.template Create(n.node_->identifier_, labels), EQ(PROPERTY_LOOKUP(this->dba, n.node_->identifier_, this->prop), LITERAL(2))); auto filter = std::make_shared(m.op_, std::vector>{}, filter_expr); @@ -3891,7 +3903,11 @@ TYPED_TEST(SubqueriesFeature, BasicCartesianWithFilterNoResults) { // MATCH (n) WHERE n.prop = 3 CALL { MATCH (m) RETURN m } RETURN n, m auto n = MakeScanAll(this->storage, this->symbol_table, "n"); - auto *filter_expr = AND(this->storage.template Create(n.node_->identifier_, n.node_->labels_), + std::vector labels; + for (const auto &label : n.node_->labels_) { + labels.emplace_back(std::get(label)); + } + auto *filter_expr = AND(this->storage.template Create(n.node_->identifier_, labels), EQ(PROPERTY_LOOKUP(this->dba, n.node_->identifier_, this->prop), LITERAL(3))); auto filter = std::make_shared(n.op_, std::vector>{}, filter_expr); diff --git a/tests/unit/query_plan_operator_to_string.cpp b/tests/unit/query_plan_operator_to_string.cpp index 9696050f2..d60d38251 100644 --- a/tests/unit/query_plan_operator_to_string.cpp +++ b/tests/unit/query_plan_operator_to_string.cpp @@ -290,9 +290,10 @@ TYPED_TEST(OperatorToStringTest, SetProperties) { TYPED_TEST(OperatorToStringTest, SetLabels) { auto node_sym = this->GetSymbol("node"); std::shared_ptr last_op = std::make_shared(nullptr, node_sym); - last_op = std::make_shared( - last_op, node_sym, - std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector labels; + labels.emplace_back(this->dba.NameToLabel("label1")); + labels.emplace_back(this->dba.NameToLabel("label2")); + last_op = std::make_shared(last_op, node_sym, labels); std::string expected_string{"SetLabels"}; EXPECT_EQ(last_op->ToString(), expected_string); @@ -311,9 +312,10 @@ TYPED_TEST(OperatorToStringTest, RemoveProperty) { TYPED_TEST(OperatorToStringTest, RemoveLabels) { auto node_sym = this->GetSymbol("node"); std::shared_ptr last_op = std::make_shared(nullptr, node_sym); - last_op = std::make_shared( - last_op, node_sym, - std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector labels; + labels.emplace_back(this->dba.NameToLabel("label1")); + labels.emplace_back(this->dba.NameToLabel("label2")); + last_op = std::make_shared(last_op, node_sym, labels); std::string expected_string{"RemoveLabels"}; EXPECT_EQ(last_op->ToString(), expected_string); diff --git a/tests/unit/query_plan_read_write_typecheck.cpp b/tests/unit/query_plan_read_write_typecheck.cpp index f9f14902b..a6af9a03e 100644 --- a/tests/unit/query_plan_read_write_typecheck.cpp +++ b/tests/unit/query_plan_read_write_typecheck.cpp @@ -183,10 +183,10 @@ TYPED_TEST(ReadWriteTypeCheckTest, SetRemovePropertiesLabels) { plan::SetProperties::Op::REPLACE); last_op = std::make_shared( last_op, node_sym, - std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); last_op = std::make_shared( last_op, node_sym, - std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); this->CheckPlanType(last_op.get(), RWType::RW); } From 2cab07429e4223241ac6dd3b0e44c3ea46e6b663 Mon Sep 17 00:00:00 2001 From: Josipmrden Date: Wed, 13 Mar 2024 10:09:22 +0100 Subject: [PATCH 03/16] Add new PR template (#1798) --- .github/pull_request_template.md | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 72ab1ea66..7a14cf12e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,14 +1,28 @@ +### Description + +Please briefly explain the changes you made here. + + +Please delete either the [master < EPIC] or [master < Task] part, depending on what are your needs. + [master < Epic] PR -- [ ] Check, and update documentation if necessary - [ ] Write E2E tests - [ ] Compare the [benchmarking results](https://bench-graph.memgraph.com/) between the master branch and the Epic branch - [ ] Provide the full content or a guide for the final git message + - [FINAL GIT MESSAGE] [master < Task] PR -- [ ] Check, and update documentation if necessary - [ ] Provide the full content or a guide for the final git message + - **[FINAL GIT MESSAGE]** -To keep docs changelog up to date, one more thing to do: -- [ ] Write a release note here, including added/changed clauses +### Documentation checklist +- [ ] Add the documentation label tag +- [ ] Add the bug / feature label tag +- [ ] Add the milestone for which this feature is intended + - If not known, set for a later milestone +- [ ] Write a release note, including added/changed clauses + - **[Release note text]** +- [ ] Link the documentation PR here + - **[Documentation PR link]** - [ ] Tag someone from docs team in the comments From 24f8a14b43baf4168075ae6ed54cef54c9d98cdd Mon Sep 17 00:00:00 2001 From: Andi Date: Wed, 13 Mar 2024 14:04:27 +0100 Subject: [PATCH 04/16] Improve registration queries in HA environment(#1809) --- src/query/frontend/ast/ast.hpp | 24 ++-- .../frontend/ast/cypher_main_visitor.cpp | 36 ++---- .../opencypher/grammar/MemgraphCypher.g4 | 18 ++- src/query/interpreter.cpp | 99 ++++++++++++++--- .../coord_cluster_registration.py | 104 +++++++++++++----- tests/e2e/high_availability/coordinator.py | 2 +- .../disable_writing_on_main_after_restart.py | 13 ++- .../high_availability/distributed_coords.py | 101 +++++++++-------- .../not_replicate_from_old_main.py | 10 +- .../high_availability/single_coordinator.py | 40 +++---- tests/e2e/high_availability/workloads.yaml | 6 +- tests/unit/cypher_main_visitor.cpp | 90 ++++++++++++++- 12 files changed, 369 insertions(+), 174 deletions(-) diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index f136975bc..29f7be3cf 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -26,6 +26,11 @@ namespace memgraph::query { +constexpr std::string_view kBoltServer = "bolt_server"; +constexpr std::string_view kReplicationServer = "replication_server"; +constexpr std::string_view kCoordinatorServer = "coordinator_server"; +constexpr std::string_view kManagementServer = "management_server"; + struct LabelIx { static const utils::TypeInfo kType; const utils::TypeInfo &GetTypeInfo() const { return kType; } @@ -3140,24 +3145,21 @@ class CoordinatorQuery : public memgraph::query::Query { DEFVISITABLE(QueryVisitor); memgraph::query::CoordinatorQuery::Action action_; - std::string instance_name_; - memgraph::query::Expression *replication_socket_address_{nullptr}; - memgraph::query::Expression *coordinator_socket_address_{nullptr}; - memgraph::query::Expression *raft_socket_address_{nullptr}; - memgraph::query::Expression *raft_server_id_{nullptr}; + std::string instance_name_{}; + std::unordered_map configs_; + memgraph::query::Expression *coordinator_server_id_{nullptr}; memgraph::query::CoordinatorQuery::SyncMode sync_mode_; CoordinatorQuery *Clone(AstStorage *storage) const override { auto *object = storage->Create(); + object->action_ = action_; object->instance_name_ = instance_name_; - object->replication_socket_address_ = - replication_socket_address_ ? replication_socket_address_->Clone(storage) : nullptr; + object->coordinator_server_id_ = coordinator_server_id_ ? coordinator_server_id_->Clone(storage) : nullptr; object->sync_mode_ = sync_mode_; - object->coordinator_socket_address_ = - coordinator_socket_address_ ? coordinator_socket_address_->Clone(storage) : nullptr; - object->raft_socket_address_ = raft_socket_address_ ? raft_socket_address_->Clone(storage) : nullptr; - object->raft_server_id_ = raft_server_id_ ? raft_server_id_->Clone(storage) : nullptr; + for (const auto &[key, value] : configs_) { + object->configs_[key->Clone(storage)] = value->Clone(storage); + } return object; } diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index ceebe2815..6da48c97c 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -398,24 +398,17 @@ antlrcpp::Any CypherMainVisitor::visitRegisterReplica(MemgraphCypher::RegisterRe antlrcpp::Any CypherMainVisitor::visitRegisterInstanceOnCoordinator( MemgraphCypher::RegisterInstanceOnCoordinatorContext *ctx) { auto *coordinator_query = storage_->Create(); - if (!ctx->replicationSocketAddress()->literal()->StringLiteral()) { - throw SemanticException("Replication socket address should be a string literal!"); - } - if (!ctx->coordinatorSocketAddress()->literal()->StringLiteral()) { - throw SemanticException("Coordinator socket address should be a string literal!"); - } coordinator_query->action_ = CoordinatorQuery::Action::REGISTER_INSTANCE; - coordinator_query->replication_socket_address_ = - std::any_cast(ctx->replicationSocketAddress()->accept(this)); - coordinator_query->coordinator_socket_address_ = - std::any_cast(ctx->coordinatorSocketAddress()->accept(this)); coordinator_query->instance_name_ = std::any_cast(ctx->instanceName()->symbolicName()->accept(this)); - if (ctx->ASYNC()) { - coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::ASYNC; - } else { - coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::SYNC; - } + coordinator_query->configs_ = + std::any_cast>(ctx->configsMap->accept(this)); + coordinator_query->sync_mode_ = [ctx]() { + if (ctx->ASYNC()) { + return CoordinatorQuery::SyncMode::ASYNC; + } + return CoordinatorQuery::SyncMode::SYNC; + }(); return coordinator_query; } @@ -431,17 +424,10 @@ antlrcpp::Any CypherMainVisitor::visitUnregisterInstanceOnCoordinator( antlrcpp::Any CypherMainVisitor::visitAddCoordinatorInstance(MemgraphCypher::AddCoordinatorInstanceContext *ctx) { auto *coordinator_query = storage_->Create(); - if (!ctx->raftSocketAddress()->literal()->StringLiteral()) { - throw SemanticException("Raft socket address should be a string literal!"); - } - - if (!ctx->raftServerId()->literal()->numberLiteral()) { - throw SemanticException("Raft server id should be a number literal!"); - } - coordinator_query->action_ = CoordinatorQuery::Action::ADD_COORDINATOR_INSTANCE; - coordinator_query->raft_socket_address_ = std::any_cast(ctx->raftSocketAddress()->accept(this)); - coordinator_query->raft_server_id_ = std::any_cast(ctx->raftServerId()->accept(this)); + coordinator_query->coordinator_server_id_ = std::any_cast(ctx->coordinatorServerId()->accept(this)); + coordinator_query->configs_ = + std::any_cast>(ctx->configsMap->accept(this)); return coordinator_query; } diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index 0147bba04..378310c22 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -388,22 +388,22 @@ instanceName : symbolicName ; socketAddress : literal ; -coordinatorSocketAddress : literal ; -replicationSocketAddress : literal ; -raftSocketAddress : literal ; - registerReplica : REGISTER REPLICA instanceName ( SYNC | ASYNC ) TO socketAddress ; -registerInstanceOnCoordinator : REGISTER INSTANCE instanceName ON coordinatorSocketAddress ( AS ASYNC ) ? WITH replicationSocketAddress ; +configKeyValuePair : literal ':' literal ; + +configMap : '{' ( configKeyValuePair ( ',' configKeyValuePair )* )? '}' ; + +registerInstanceOnCoordinator : REGISTER INSTANCE instanceName ( AS ASYNC ) ? WITH CONFIG configsMap=configMap ; unregisterInstanceOnCoordinator : UNREGISTER INSTANCE instanceName ; setInstanceToMain : SET INSTANCE instanceName TO MAIN ; -raftServerId : literal ; +coordinatorServerId : literal ; -addCoordinatorInstance : ADD COORDINATOR raftServerId ON raftSocketAddress ; +addCoordinatorInstance : ADD COORDINATOR coordinatorServerId WITH CONFIG configsMap=configMap ; dropReplica : DROP REPLICA instanceName ; @@ -457,10 +457,6 @@ commonCreateStreamConfig : TRANSFORM transformationName=procedureName createStream : kafkaCreateStream | pulsarCreateStream ; -configKeyValuePair : literal ':' literal ; - -configMap : '{' ( configKeyValuePair ( ',' configKeyValuePair )* )? '}' ; - kafkaCreateStreamConfig : TOPICS topicNames | CONSUMER_GROUP consumerGroup=symbolicNameWithDotsAndMinus | BOOTSTRAP_SERVERS bootstrapServers=literal diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index ce74586d3..a5c81cc72 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -1146,6 +1146,27 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } #ifdef MG_ENTERPRISE + +auto ParseConfigMap(std::unordered_map const &config_map, + ExpressionVisitor &evaluator) + -> std::optional>> { + if (std::ranges::any_of(config_map, [&evaluator](const auto &entry) { + auto key_expr = entry.first->Accept(evaluator); + auto value_expr = entry.second->Accept(evaluator); + return !key_expr.IsString() || !value_expr.IsString(); + })) { + spdlog::error("Config map must contain only string keys and values!"); + return std::nullopt; + } + + return ranges::views::all(config_map) | ranges::views::transform([&evaluator](const auto &entry) { + auto key_expr = entry.first->Accept(evaluator); + auto value_expr = entry.second->Accept(evaluator); + return std::pair{key_expr.ValueString(), value_expr.ValueString()}; + }) | + ranges::to>>; +} + Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Parameters ¶meters, coordination::CoordinatorState *coordinator_state, const query::InterpreterConfig &config, std::vector *notifications) { @@ -1173,17 +1194,37 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param EvaluationContext evaluation_context{.timestamp = QueryTimestamp(), .parameters = parameters}; auto evaluator = PrimitiveLiteralExpressionEvaluator{evaluation_context}; - auto raft_socket_address_tv = coordinator_query->raft_socket_address_->Accept(evaluator); - auto raft_server_id_tv = coordinator_query->raft_server_id_->Accept(evaluator); - callback.fn = [handler = CoordQueryHandler{*coordinator_state}, raft_socket_address_tv, - raft_server_id_tv]() mutable { - handler.AddCoordinatorInstance(raft_server_id_tv.ValueInt(), std::string(raft_socket_address_tv.ValueString())); + auto config_map = ParseConfigMap(coordinator_query->configs_, evaluator); + if (!config_map) { + throw QueryRuntimeException("Failed to parse config map!"); + } + + if (config_map->size() != 2) { + throw QueryRuntimeException("Config map must contain exactly 2 entries: {} and !", kCoordinatorServer, + kBoltServer); + } + + auto const &coordinator_server_it = config_map->find(kCoordinatorServer); + if (coordinator_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kCoordinatorServer); + } + + auto const &bolt_server_it = config_map->find(kBoltServer); + if (bolt_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kBoltServer); + } + + auto coord_server_id = coordinator_query->coordinator_server_id_->Accept(evaluator).ValueInt(); + + callback.fn = [handler = CoordQueryHandler{*coordinator_state}, coord_server_id, + coordinator_server = coordinator_server_it->second]() mutable { + handler.AddCoordinatorInstance(coord_server_id, coordinator_server); return std::vector>(); }; notifications->emplace_back(SeverityLevel::INFO, NotificationCode::ADD_COORDINATOR_INSTANCE, fmt::format("Coordinator has added instance {} on coordinator server {}.", - coordinator_query->instance_name_, raft_socket_address_tv.ValueString())); + coordinator_query->instance_name_, coordinator_server_it->second)); return callback; } case CoordinatorQuery::Action::REGISTER_INSTANCE: { @@ -1194,27 +1235,49 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param // the argument to Callback. EvaluationContext evaluation_context{.timestamp = QueryTimestamp(), .parameters = parameters}; auto evaluator = PrimitiveLiteralExpressionEvaluator{evaluation_context}; + auto config_map = ParseConfigMap(coordinator_query->configs_, evaluator); - auto coordinator_socket_address_tv = coordinator_query->coordinator_socket_address_->Accept(evaluator); - auto replication_socket_address_tv = coordinator_query->replication_socket_address_->Accept(evaluator); - callback.fn = [handler = CoordQueryHandler{*coordinator_state}, coordinator_socket_address_tv, - replication_socket_address_tv, + if (!config_map) { + throw QueryRuntimeException("Failed to parse config map!"); + } + + if (config_map->size() != 3) { + throw QueryRuntimeException("Config map must contain exactly 3 entries: {}, {} and {}!", kBoltServer, + kManagementServer, kReplicationServer); + } + + auto const &replication_server_it = config_map->find(kReplicationServer); + if (replication_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kReplicationServer); + } + + auto const &management_server_it = config_map->find(kManagementServer); + if (management_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kManagementServer); + } + + auto const &bolt_server_it = config_map->find(kBoltServer); + if (bolt_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kBoltServer); + } + + callback.fn = [handler = CoordQueryHandler{*coordinator_state}, instance_health_check_frequency_sec = config.instance_health_check_frequency_sec, + management_server = management_server_it->second, + replication_server = replication_server_it->second, bolt_server = bolt_server_it->second, instance_name = coordinator_query->instance_name_, instance_down_timeout_sec = config.instance_down_timeout_sec, instance_get_uuid_frequency_sec = config.instance_get_uuid_frequency_sec, sync_mode = coordinator_query->sync_mode_]() mutable { - handler.RegisterReplicationInstance(std::string(coordinator_socket_address_tv.ValueString()), - std::string(replication_socket_address_tv.ValueString()), - instance_health_check_frequency_sec, instance_down_timeout_sec, - instance_get_uuid_frequency_sec, instance_name, sync_mode); + handler.RegisterReplicationInstance(management_server, replication_server, instance_health_check_frequency_sec, + instance_down_timeout_sec, instance_get_uuid_frequency_sec, instance_name, + sync_mode); return std::vector>(); }; - notifications->emplace_back( - SeverityLevel::INFO, NotificationCode::REGISTER_REPLICATION_INSTANCE, - fmt::format("Coordinator has registered coordinator server on {} for instance {}.", - coordinator_socket_address_tv.ValueString(), coordinator_query->instance_name_)); + notifications->emplace_back(SeverityLevel::INFO, NotificationCode::REGISTER_REPLICATION_INSTANCE, + fmt::format("Coordinator has registered replication instance on {} for instance {}.", + bolt_server_it->second, coordinator_query->instance_name_)); return callback; } case CoordinatorQuery::Action::UNREGISTER_INSTANCE: diff --git a/tests/e2e/high_availability/coord_cluster_registration.py b/tests/e2e/high_availability/coord_cluster_registration.py index 774c6dca1..13aaf27fe 100644 --- a/tests/e2e/high_availability/coord_cluster_registration.py +++ b/tests/e2e/high_availability/coord_cluster_registration.py @@ -117,17 +117,26 @@ def test_register_repl_instances_then_coordinators(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + ) def check_coordinator3(): return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) @@ -172,16 +181,25 @@ def test_register_coordinator_then_repl_instances(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") - execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + ) + execute_and_fetch_all( + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") @@ -228,16 +246,25 @@ def test_coordinators_communication_with_restarts(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") - execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + ) + execute_and_fetch_all( + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") @@ -295,16 +322,25 @@ def test_unregister_replicas(kill_instance): coordinator2_cursor = connect(host="localhost", port=7691).cursor() coordinator3_cursor = connect(host="localhost", port=7692).cursor() - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") - execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + ) + execute_and_fetch_all( + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") @@ -429,16 +465,26 @@ def test_unregister_main(): coordinator1_cursor = connect(host="localhost", port=7690).cursor() coordinator2_cursor = connect(host="localhost", port=7691).cursor() coordinator3_cursor = connect(host="localhost", port=7692).cursor() - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") - execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + ) + execute_and_fetch_all( + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") diff --git a/tests/e2e/high_availability/coordinator.py b/tests/e2e/high_availability/coordinator.py index ed55dff9e..8a6ae1a0a 100644 --- a/tests/e2e/high_availability/coordinator.py +++ b/tests/e2e/high_availability/coordinator.py @@ -79,7 +79,7 @@ def test_main_and_replicas_cannot_register_coord_server(port): with pytest.raises(Exception) as e: execute_and_fetch_all( cursor, - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10001' WITH '127.0.0.1:10011';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) assert str(e.value) == "Only coordinator can register coordinator server!" diff --git a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py index 363ce1c41..517bf346f 100644 --- a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py +++ b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py @@ -133,11 +133,18 @@ def test_writing_disabled_on_main_restart(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + ) def check_coordinator3(): return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) diff --git a/tests/e2e/high_availability/distributed_coords.py b/tests/e2e/high_availability/distributed_coords.py index 7dc3ef238..59e083545 100644 --- a/tests/e2e/high_availability/distributed_coords.py +++ b/tests/e2e/high_availability/distributed_coords.py @@ -110,11 +110,11 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { ], "log_file": "coordinator3.log", "setup_queries": [ - "ADD COORDINATOR 1 ON '127.0.0.1:10111'", - "ADD COORDINATOR 2 ON '127.0.0.1:10112'", - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'", + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ], }, @@ -221,11 +221,11 @@ def test_old_main_comes_back_on_new_leader_as_replica(): interactive_mg_runner.start_all(inner_instances_description) setup_queries = [ - "ADD COORDINATOR 1 ON '127.0.0.1:10111'", - "ADD COORDINATOR 2 ON '127.0.0.1:10112'", - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'", + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ] coord_cursor_3 = connect(host="localhost", port=7692).cursor() @@ -416,11 +416,11 @@ def test_distributed_automatic_failover_with_leadership_change(): interactive_mg_runner.start_all(inner_instances_description) setup_queries = [ - "ADD COORDINATOR 1 ON '127.0.0.1:10111'", - "ADD COORDINATOR 2 ON '127.0.0.1:10112'", - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'", + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ] coord_cursor_3 = connect(host="localhost", port=7692).cursor() @@ -522,7 +522,10 @@ def test_no_leader_after_leader_and_follower_die(): coord_cursor_1 = connect(host="localhost", port=7690).cursor() with pytest.raises(Exception) as e: - execute_and_fetch_all(coord_cursor_1, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.10001'") + execute_and_fetch_all( + coord_cursor_1, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + ) assert str(e) == "Couldn't register replica instance since coordinator is not a leader!" @@ -541,11 +544,11 @@ def test_old_main_comes_back_on_new_leader_as_main(): coord_cursor_3 = connect(host="localhost", port=7692).cursor() setup_queries = [ - "ADD COORDINATOR 1 ON '127.0.0.1:10111'", - "ADD COORDINATOR 2 ON '127.0.0.1:10112'", - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'", + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ] @@ -719,12 +722,12 @@ def test_registering_4_coords(): ], "log_file": "coordinator4.log", "setup_queries": [ - "ADD COORDINATOR 1 ON '127.0.0.1:10111';", - "ADD COORDINATOR 2 ON '127.0.0.1:10112';", - "ADD COORDINATOR 3 ON '127.0.0.1:10113';", - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'", + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "ADD COORDINATOR 3 WITH CONFIG {'bolt_server': '127.0.0.1:7692', 'coordinator_server': '127.0.0.1:10113'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ], }, @@ -854,12 +857,12 @@ def test_registering_coord_log_store(): ], "log_file": "coordinator4.log", "setup_queries": [ - "ADD COORDINATOR 1 ON '127.0.0.1:10111';", - "ADD COORDINATOR 2 ON '127.0.0.1:10112';", - "ADD COORDINATOR 3 ON '127.0.0.1:10113';", - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'", + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "ADD COORDINATOR 3 WITH CONFIG {'bolt_server': '127.0.0.1:7692', 'coordinator_server': '127.0.0.1:10113'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ], }, } @@ -896,7 +899,7 @@ def test_registering_coord_log_store(): # 3 instances_ports_added = [10011, 10012, 10013] bolt_port_id = 7700 - coord_port_id = 10014 + manag_port_id = 10014 additional_instances = [] for i in range(4, 7): @@ -908,10 +911,10 @@ def test_registering_coord_log_store(): bolt_port = f"--bolt-port={bolt_port_id}" - coord_server_port = f"--coordinator-server-port={coord_port_id}" + manag_server_port = f"--coordinator-server-port={manag_port_id}" args_desc.append(bolt_port) - args_desc.append(coord_server_port) + args_desc.append(manag_server_port) instance_description = { "args": args_desc, @@ -922,17 +925,23 @@ def test_registering_coord_log_store(): full_instance_desc = {instance_name: instance_description} interactive_mg_runner.start(full_instance_desc, instance_name) - repl_port_id = coord_port_id - 10 + repl_port_id = manag_port_id - 10 assert repl_port_id < 10011, "Wrong test setup, repl port must be smaller than smallest coord port id" + bolt_server = f"127.0.0.1:{bolt_port_id}" + management_server = f"127.0.0.1:{manag_port_id}" + repl_server = f"127.0.0.1:{repl_port_id}" + + config_str = f"{{'bolt_server': '{bolt_server}', 'management_server': '{management_server}', 'replication_server': '{repl_server}'}}" + execute_and_fetch_all( coord_cursor, - f"REGISTER INSTANCE {instance_name} ON '127.0.0.1:{coord_port_id}' WITH '127.0.0.1:{repl_port_id}'", + f"REGISTER INSTANCE {instance_name} WITH CONFIG {config_str}", ) - additional_instances.append((f"{instance_name}", "", f"127.0.0.1:{coord_port_id}", "up", "replica")) - instances_ports_added.append(coord_port_id) - coord_port_id += 1 + additional_instances.append((f"{instance_name}", "", management_server, "up", "replica")) + instances_ports_added.append(manag_port_id) + manag_port_id += 1 bolt_port_id += 1 # 4 @@ -1004,11 +1013,11 @@ def test_multiple_failovers_in_row_no_leadership_change(): coord_cursor_3 = connect(host="localhost", port=7692).cursor() setup_queries = [ - "ADD COORDINATOR 1 ON '127.0.0.1:10111'", - "ADD COORDINATOR 2 ON '127.0.0.1:10112'", - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'", + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ] diff --git a/tests/e2e/high_availability/not_replicate_from_old_main.py b/tests/e2e/high_availability/not_replicate_from_old_main.py index 7ffffc04a..d9729f650 100644 --- a/tests/e2e/high_availability/not_replicate_from_old_main.py +++ b/tests/e2e/high_availability/not_replicate_from_old_main.py @@ -185,8 +185,8 @@ def test_not_replicate_old_main_register_new_cluster(): ], "log_file": "coordinator.log", "setup_queries": [ - "REGISTER INSTANCE shared_instance ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", + "REGISTER INSTANCE shared_instance WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", "SET INSTANCE instance_2 TO MAIN", ], }, @@ -244,10 +244,12 @@ def test_not_replicate_old_main_register_new_cluster(): interactive_mg_runner.start_all_keep_others(MEMGRAPH_SECOND_COORD_CLUSTER_DESCRIPTION) second_cluster_coord_cursor = connect(host="localhost", port=7691).cursor() execute_and_fetch_all( - second_cluster_coord_cursor, "REGISTER INSTANCE shared_instance ON '127.0.0.1:10011' WITH '127.0.0.1:10001';" + second_cluster_coord_cursor, + "REGISTER INSTANCE shared_instance WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - second_cluster_coord_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';" + second_cluster_coord_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(second_cluster_coord_cursor, "SET INSTANCE instance_3 TO MAIN") diff --git a/tests/e2e/high_availability/single_coordinator.py b/tests/e2e/high_availability/single_coordinator.py index 7335d2847..1d839b4fc 100644 --- a/tests/e2e/high_availability/single_coordinator.py +++ b/tests/e2e/high_availability/single_coordinator.py @@ -90,9 +90,9 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { ], "log_file": "coordinator.log", "setup_queries": [ - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ], }, @@ -185,9 +185,9 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov ], "log_file": "coordinator.log", "setup_queries": [ - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ], }, @@ -415,10 +415,10 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r ], "log_file": "coordinator.log", "setup_queries": [ - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", - "REGISTER INSTANCE instance_4 ON '127.0.0.1:10014' WITH '127.0.0.1:10004';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "REGISTER INSTANCE instance_4 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'management_server': '127.0.0.1:10014', 'replication_server': '127.0.0.1:10004'};", "SET INSTANCE instance_3 TO MAIN", ], }, @@ -702,10 +702,10 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco ], "log_file": "coordinator.log", "setup_queries": [ - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", - "REGISTER INSTANCE instance_4 ON '127.0.0.1:10014' WITH '127.0.0.1:10004';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "REGISTER INSTANCE instance_4 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'management_server': '127.0.0.1:10014', 'replication_server': '127.0.0.1:10004'};", "SET INSTANCE instance_3 TO MAIN", ], }, @@ -989,10 +989,10 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): ], "log_file": "coordinator.log", "setup_queries": [ - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", - "REGISTER INSTANCE instance_4 ON '127.0.0.1:10014' WITH '127.0.0.1:10004';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "REGISTER INSTANCE instance_4 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'management_server': '127.0.0.1:10014', 'replication_server': '127.0.0.1:10004'};", "SET INSTANCE instance_3 TO MAIN", ], }, @@ -1559,7 +1559,7 @@ def test_registering_replica_fails_name_exists(): with pytest.raises(Exception) as e: execute_and_fetch_all( coord_cursor, - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10051' WITH '127.0.0.1:10111';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7693', 'management_server': '127.0.0.1:10051', 'replication_server': '127.0.0.1:10111'};", ) assert str(e.value) == "Couldn't register replica instance since instance with such name already exists!" shutil.rmtree(TEMP_DIR) @@ -1573,7 +1573,7 @@ def test_registering_replica_fails_endpoint_exists(): with pytest.raises(Exception) as e: execute_and_fetch_all( coord_cursor, - "REGISTER INSTANCE instance_5 ON '127.0.0.1:10011' WITH '127.0.0.1:10005';", + "REGISTER INSTANCE instance_5 WITH CONFIG {'bolt_server': '127.0.0.1:7693', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10005'};", ) assert ( str(e.value) diff --git a/tests/e2e/high_availability/workloads.yaml b/tests/e2e/high_availability/workloads.yaml index 75f17b2f7..aaf76fc6b 100644 --- a/tests/e2e/high_availability/workloads.yaml +++ b/tests/e2e/high_availability/workloads.yaml @@ -16,9 +16,9 @@ ha_cluster: &ha_cluster args: ["--experimental-enabled=high-availability", "--bolt-port", "7690", "--log-level=TRACE", "--raft-server-id=1", "--raft-server-port=10111"] log_file: "replication-e2e-coordinator.log" setup_queries: [ - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN;" ] diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index bcc6767f4..33e3af2e3 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -2633,15 +2633,99 @@ TEST_P(CypherMainVisitorTest, TestRegisterReplicationQuery) { } #ifdef MG_ENTERPRISE + +TEST_P(CypherMainVisitorTest, TestRegisterSyncInstance) { + auto &ast_generator = *GetParam(); + + std::string const sync_instance = R"(REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": "127.0.0.1:7688", + "replication_server": "127.0.0.1:10001", "management_server": "127.0.0.1:10011" + })"; + + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(sync_instance)); + + EXPECT_EQ(parsed_query->action_, CoordinatorQuery::Action::REGISTER_INSTANCE); + EXPECT_EQ(parsed_query->sync_mode_, CoordinatorQuery::SyncMode::SYNC); + + auto const evaluate_config_map = [&ast_generator](std::unordered_map const &config_map) + -> std::unordered_map { + auto const expr_to_str = [&ast_generator](Expression *expression) { + return std::string{ast_generator.GetLiteral(expression, ast_generator.context_.is_query_cached).ValueString()}; + }; + + return ranges::views::transform(config_map, + [&expr_to_str](auto const &expr_pair) { + return std::pair{expr_to_str(expr_pair.first), expr_to_str(expr_pair.second)}; + }) | + ranges::to>; + }; + + auto const config_map = evaluate_config_map(parsed_query->configs_); + ASSERT_EQ(config_map.size(), 3); + EXPECT_EQ(config_map.at("bolt_server"), "127.0.0.1:7688"); + EXPECT_EQ(config_map.at("management_server"), "127.0.0.1:10011"); + EXPECT_EQ(config_map.at("replication_server"), "127.0.0.1:10001"); +} + +TEST_P(CypherMainVisitorTest, TestRegisterAsyncInstance) { + auto &ast_generator = *GetParam(); + + std::string const async_instance = + R"(REGISTER INSTANCE instance_1 AS ASYNC WITH CONFIG {"bolt_server": "127.0.0.1:7688", + "replication_server": "127.0.0.1:10001", + "management_server": "127.0.0.1:10011"})"; + + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(async_instance)); + + EXPECT_EQ(parsed_query->action_, CoordinatorQuery::Action::REGISTER_INSTANCE); + EXPECT_EQ(parsed_query->sync_mode_, CoordinatorQuery::SyncMode::ASYNC); + + auto const evaluate_config_map = [&ast_generator](std::unordered_map const &config_map) + -> std::map> { + auto const expr_to_str = [&ast_generator](Expression *expression) { + return std::string{ast_generator.GetLiteral(expression, ast_generator.context_.is_query_cached).ValueString()}; + }; + + return ranges::views::transform(config_map, + [&expr_to_str](auto const &expr_pair) { + return std::pair{expr_to_str(expr_pair.first), expr_to_str(expr_pair.second)}; + }) | + ranges::to>>; + }; + + auto const config_map = evaluate_config_map(parsed_query->configs_); + ASSERT_EQ(config_map.size(), 3); + EXPECT_EQ(config_map.find(memgraph::query::kBoltServer)->second, "127.0.0.1:7688"); + EXPECT_EQ(config_map.find(memgraph::query::kManagementServer)->second, "127.0.0.1:10011"); + EXPECT_EQ(config_map.find(memgraph::query::kReplicationServer)->second, "127.0.0.1:10001"); +} + TEST_P(CypherMainVisitorTest, TestAddCoordinatorInstance) { auto &ast_generator = *GetParam(); - std::string const correct_query = R"(ADD COORDINATOR 1 ON "127.0.0.1:10111")"; + std::string const correct_query = + R"(ADD COORDINATOR 1 WITH CONFIG {"bolt_server": "127.0.0.1:7688", "coordinator_server": "127.0.0.1:10111"})"; auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(correct_query)); EXPECT_EQ(parsed_query->action_, CoordinatorQuery::Action::ADD_COORDINATOR_INSTANCE); - ast_generator.CheckLiteral(parsed_query->raft_socket_address_, TypedValue("127.0.0.1:10111")); - ast_generator.CheckLiteral(parsed_query->raft_server_id_, TypedValue(1)); + ast_generator.CheckLiteral(parsed_query->coordinator_server_id_, TypedValue(1)); + + auto const evaluate_config_map = [&ast_generator](std::unordered_map const &config_map) + -> std::map> { + auto const expr_to_str = [&ast_generator](Expression *expression) { + return std::string{ast_generator.GetLiteral(expression, ast_generator.context_.is_query_cached).ValueString()}; + }; + + return ranges::views::transform(config_map, + [&expr_to_str](auto const &expr_pair) { + return std::pair{expr_to_str(expr_pair.first), expr_to_str(expr_pair.second)}; + }) | + ranges::to>>; + }; + + auto const config_map = evaluate_config_map(parsed_query->configs_); + ASSERT_EQ(config_map.size(), 2); + EXPECT_EQ(config_map.find(kBoltServer)->second, "127.0.0.1:7688"); + EXPECT_EQ(config_map.find(kCoordinatorServer)->second, "127.0.0.1:10111"); } #endif From b0cdcd3483ab7e088d338752fdac64482d40af1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Bari=C5=A1i=C4=87?= <48765171+MarkoBarisic@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:19:59 +0100 Subject: [PATCH 05/16] Run CI in mgbuilder containers (#1749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update deployment files for mgbuilders because of toolchain upgrade * Fix args parameter in builder yaml files * Add fedora 38, 39 and rockylinux 9.3 mgbuilder Dockerfiles * Change format of ARG TOOLCHAIN_VERSION from toolchain-vX to vX * Add function to check supported arch, build type, os and toolchain * Add options to init subcommand * Add image names to mgbuilders * Add v2 of the run.sh script * Add testing to run2.sh * Add option for threads --thread * Add options for enterprise license and organization name * Make stop mgbuild container step run always * Add --ci flag to init script * Move init conditionals under build-memgraph flags * Add --community flag to build-memgraph * Change target dir inside mgbuild container * Add node fix to debian 11, ubuntu 20.04 and ubuntu 22.04 * rm memgraph repo after installing deps * Add mg user in Dockerfile * Add step to install rust on all OSs * Chown files copied into mgbuild container * Add e2e tests * Add jepsen test * Bugfix: Using reference in a callback * Bugfix: Broad target for e2e tests * Up db info test limit * Disable e2e streams tests * Fix default THREADS * Prioretize docker compose over docker-compose * Improve selection between docker compose and docker-compose * Install PyYAML as mg user * Fix doxygen install for rocky linux 9.3 * Fix rocky-9.3 environment script to properly install sbcl * Rename all rocky-9 mentions to rocky-9.3 * Add mgdeps-cache and benchgraph-api hostnames to mgbuild images * Add logic to pull mgbuild image if missing * Fix build errors on toolchain-v5 (#1806) * Rename run2 script, remove run script, add small features to mgbuild.sh * Add --no-copy flag to build-memgraph to resolve TODO * Add timeouts to diff jobs * Fix asio flaky clone, try mgdeps-cache first --------- Co-authored-by: Andreja Tonev Co-authored-by: Ante PuÅ¡ić Co-authored-by: antoniofilipovic --- .clang-tidy | 4 +- .github/workflows/diff.yaml | 553 ++++++++++----- environment/os/rocky-9.3.sh | 26 +- init | 34 +- libs/setup.sh | 4 + release/package/amd-builders-v4.yml | 73 ++ release/package/amd-builders-v5.yml | 81 +++ release/package/amzn-2/Dockerfile | 33 +- release/package/arm-builders-v4.yml | 18 + release/package/arm-builders-v5.yml | 18 + release/package/arm-builders.yml | 11 - release/package/centos-7/Dockerfile | 32 +- release/package/centos-9/Dockerfile | 32 +- release/package/debian-10/Dockerfile | 29 +- release/package/debian-11-arm/Dockerfile | 29 +- release/package/debian-11/Dockerfile | 29 +- release/package/debian-12-arm/Dockerfile | 39 + release/package/debian-12/Dockerfile | 39 + release/package/docker-compose.yml | 38 - release/package/fedora-36/Dockerfile | 29 +- release/package/fedora-38/Dockerfile | 37 + release/package/fedora-39/Dockerfile | 37 + release/package/mgbuild.sh | 665 ++++++++++++++++++ release/package/rocky-9.3/Dockerfile | 40 ++ release/package/run.sh | 208 ------ release/package/ubuntu-18.04/Dockerfile | 29 +- release/package/ubuntu-20.04/Dockerfile | 29 +- release/package/ubuntu-22.04-arm/Dockerfile | 29 +- release/package/ubuntu-22.04/Dockerfile | 29 +- src/coordination/raft_state.cpp | 14 +- src/dbms/inmemory/replication_handlers.cpp | 10 +- .../v2/replication/replication_client.cpp | 7 +- tests/code_analysis/clang_tidy.sh | 23 + tests/code_analysis/python_code_analysis.sh | 27 + .../module_file_manager.cpp | 8 +- tests/e2e/run.sh | 4 +- tests/e2e/runner.py | 3 + tests/e2e/streams/conftest.py | 4 + tests/e2e/streams/kafka_streams_tests.py | 5 +- tests/e2e/streams/pulsar_streams_tests.py | 2 + tests/setup.sh | 1 + tests/unit/database_get_info.cpp | 4 +- tools/github/clang-tidy/clang-tidy-diff.py | 2 - 43 files changed, 1843 insertions(+), 525 deletions(-) create mode 100644 release/package/amd-builders-v4.yml create mode 100644 release/package/amd-builders-v5.yml create mode 100644 release/package/arm-builders-v4.yml create mode 100644 release/package/arm-builders-v5.yml delete mode 100644 release/package/arm-builders.yml create mode 100644 release/package/debian-12-arm/Dockerfile create mode 100644 release/package/debian-12/Dockerfile delete mode 100644 release/package/docker-compose.yml create mode 100644 release/package/fedora-38/Dockerfile create mode 100644 release/package/fedora-39/Dockerfile create mode 100755 release/package/mgbuild.sh create mode 100644 release/package/rocky-9.3/Dockerfile delete mode 100755 release/package/run.sh create mode 100755 tests/code_analysis/clang_tidy.sh create mode 100755 tests/code_analysis/python_code_analysis.sh diff --git a/.clang-tidy b/.clang-tidy index a30f9e592..c02b7152f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -64,8 +64,8 @@ Checks: '*, -readability-identifier-length, -misc-no-recursion, -concurrency-mt-unsafe, - -bugprone-easily-swappable-parameters' - + -bugprone-easily-swappable-parameters, + -bugprone-unchecked-optional-access' WarningsAsErrors: '' HeaderFilterRegex: 'src/.*' AnalyzeTemporaryDtors: false diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index a2dc0aef2..6c96cda80 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -4,10 +4,6 @@ concurrency: cancel-in-progress: true on: - push: - branches: - - master - workflow_dispatch: pull_request: paths-ignore: - "docs/**" @@ -19,11 +15,16 @@ on: jobs: community_build: name: "Community build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: RelWithDebInfo steps: - name: Set up repository @@ -33,35 +34,56 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Build community binaries + - name: Spin up mgbuild container run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - # Initialize dependencies. - ./init - - # Build community binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMG_ENTERPRISE=OFF .. - make -j$THREADS + - name: Build release binaries + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --community - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit - # Run unit tests. - cd build - ctest -R memgraph__unit --output-on-failure -j$THREADS + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove code_analysis: name: "Code analysis" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Debug steps: - name: Set up repository @@ -71,6 +93,14 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + # This is also needed if we want do to comparison against other branches # See https://github.community/t/checkout-code-fails-when-it-runs-lerna-run-test-since-master/17920 - name: Fetch all history for all tags and branches @@ -78,11 +108,13 @@ jobs: - name: Initialize deps run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --init-only - name: Set base branch if: ${{ github.event_name == 'pull_request' }} @@ -96,45 +128,43 @@ jobs: - name: Python code analysis run: | - CHANGED_FILES=$(git diff -U0 ${{ env.BASE_BRANCH }}... --name-only --diff-filter=d) - for file in ${CHANGED_FILES}; do - echo ${file} - if [[ ${file} == *.py ]]; then - python3 -m black --check --diff ${file} - python3 -m isort --profile black --check-only --diff ${file} - fi - done + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph code-analysis --base-branch "${{ env.BASE_BRANCH }}" - name: Build combined ASAN, UBSAN and coverage binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - cd build - cmake -DTEST_COVERAGE=ON -DASAN=ON -DUBSAN=ON .. - make -j$THREADS memgraph__unit + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --coverage --asan --ubsan - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run unit tests. It is restricted to 2 threads intentionally, because higher concurrency makes the timing related tests unstable. - cd build - LSAN_OPTIONS=suppressions=$PWD/../tools/lsan.supp UBSAN_OPTIONS=halt_on_error=1 ctest -R memgraph__unit --output-on-failure -j2 + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit-coverage - name: Compute code coverage run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Compute code coverage. - cd tools/github - ./coverage_convert - - # Package code coverage. - cd generated - tar -czf code_coverage.tar.gz coverage.json html report.json summary.rmu + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph code-coverage - name: Save code coverage uses: actions/upload-artifact@v4 @@ -144,21 +174,36 @@ jobs: - name: Run clang-tidy run: | - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph clang-tidy --base-branch "${{ env.BASE_BRANCH }}" - # Restrict clang-tidy results only to the modified parts - git diff -U0 ${{ env.BASE_BRANCH }}... -- src | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build -regex ".+\.cpp" | tee ./build/clang_tidy_output.txt - - # Fail if any warning is reported - ! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove debug_build: name: "Debug build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 100 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Debug steps: - name: Set up repository @@ -168,58 +213,95 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Build debug binaries + - name: Spin up mgbuild container run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - # Initialize dependencies. - ./init - - # Build debug binaries. - cd build - cmake .. - make -j$THREADS + - name: Build release binaries + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run leftover CTest tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run leftover CTest tests (all except unit and benchmark tests). - cd build - ctest -E "(memgraph__unit|memgraph__benchmark)" --output-on-failure + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph leftover-CTest - name: Run drivers tests run: | - ./tests/drivers/run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph drivers - name: Run integration tests run: | - tests/integration/run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph integration - name: Run cppcheck and clang-format run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run cppcheck and clang-format. - cd tools/github - ./cppcheck_and_clang_format diff - + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph cppcheck-and-clang-format + - name: Save cppcheck and clang-format errors uses: actions/upload-artifact@v4 with: name: "Code coverage(Debug build)" path: tools/github/cppcheck_and_clang_format.txt + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove + release_build: name: "Release build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 100 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Release steps: - name: Set up repository @@ -229,26 +311,33 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init - - # Build release binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run GQL Behave tests run: | - cd tests - ./setup.sh /opt/toolchain-v4/activate - cd gql_behave - ./continuous_integration + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph gql-behave - name: Save quality assurance status uses: actions/upload-artifact@v4 @@ -260,14 +349,19 @@ jobs: - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run unit tests. - cd build - ctest -R memgraph__unit --output-on-failure -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit + # This step will be skipped because the e2e stream tests have been disabled + # We need to fix this as soon as possible - name: Ensure Kafka and Pulsar are up + if: false run: | cd tests/e2e/streams/kafka docker-compose up -d @@ -276,13 +370,17 @@ jobs: - name: Run e2e tests run: | - cd tests - ./setup.sh /opt/toolchain-v4/activate - source ve3/bin/activate_e2e - cd e2e - ./run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph e2e + # Same as two steps prior - name: Ensure Kafka and Pulsar are down + if: false run: | cd tests/e2e/streams/kafka docker-compose down @@ -291,59 +389,92 @@ jobs: - name: Run stress test (plain) run: | - cd tests/stress - source ve3/bin/activate - ./continuous_integration + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph stress-plain - name: Run stress test (SSL) run: | - cd tests/stress - source ve3/bin/activate - ./continuous_integration --use-ssl + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph stress-ssl - name: Run durability test run: | - cd tests/stress - source ve3/bin/activate - python3 durability --num-steps 5 + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph durability - name: Create enterprise DEB package run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - cd build + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + package-memgraph - # create mgconsole - # we use the -B to force the build - make -j$THREADS -B mgconsole - - # Create enterprise DEB package. - mkdir output && cd output - cpack -G DEB --config ../CPackConfig.cmake + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --package - name: Save enterprise DEB package uses: actions/upload-artifact@v4 with: name: "Enterprise DEB package" - path: build/output/memgraph*.deb + path: build/output/${{ env.OS }}/memgraph*.deb + + - name: Copy build logs + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --build-logs - name: Save test data uses: actions/upload-artifact@v4 if: always() with: name: "Test data(Release build)" - path: | - # multiple paths could be defined - build/logs + path: build/logs + + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove release_jepsen_test: name: "Release Jepsen Test" - runs-on: [self-hosted, Linux, X64, Debian10, JepsenControl] - #continue-on-error: true + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 80 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-10 + TOOLCHAIN: v4 + ARCH: amd + BUILD_TYPE: RelWithDebInfo steps: - name: Set up repository @@ -353,16 +484,31 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - # Initialize dependencies. - ./init - # Build only memgraph release binarie. - cd build - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. - make -j$THREADS memgraph + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph + + - name: Copy memgraph binary + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --binary - name: Refresh Jepsen Cluster run: | @@ -381,13 +527,27 @@ jobs: name: "Jepsen Report" path: tests/jepsen/Jepsen.tar.gz + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove + release_benchmarks: name: "Release benchmarks" - runs-on: [self-hosted, Linux, X64, Diff, Gen7] + runs-on: [self-hosted, Linux, X64, DockerMgBuild, Gen7] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Release steps: - name: Set up repository @@ -397,25 +557,33 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init - - # Build only memgraph release binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=release .. - make -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run macro benchmarks run: | - cd tests/macro_benchmark - ./harness QuerySuite MemgraphRunner \ - --groups aggregation 1000_create unwind_create dense_expand match \ - --no-strict + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph macro-benchmark - name: Get branch name (merge) if: github.event_name != 'pull_request' @@ -429,30 +597,49 @@ jobs: - name: Upload macro benchmark results run: | - cd tools/bench-graph-client - virtualenv -p python3 ve3 - source ve3/bin/activate - pip install -r requirements.txt - ./main.py --benchmark-name "macro_benchmark" \ - --benchmark-results "../../tests/macro_benchmark/.harness_summary" \ - --github-run-id "${{ github.run_id }}" \ - --github-run-number "${{ github.run_number }}" \ - --head-branch-name "${{ env.BRANCH_NAME }}" + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph upload-to-bench-graph \ + --benchmark-name "macro_benchmark" \ + --benchmark-results "../../tests/macro_benchmark/.harness_summary" \ + --github-run-id ${{ github.run_id }} \ + --github-run-number ${{ github.run_number }} \ + --head-branch-name ${{ env.BRANCH_NAME }} - # TODO (andi) No need for path flags and for --disk-storage and --in-memory-analytical - name: Run mgbench run: | - cd tests/mgbench - ./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/* + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph mgbench - name: Upload mgbench results run: | - cd tools/bench-graph-client - virtualenv -p python3 ve3 - source ve3/bin/activate - pip install -r requirements.txt - ./main.py --benchmark-name "mgbench" \ - --benchmark-results "../../tests/mgbench/benchmark_result.json" \ - --github-run-id "${{ github.run_id }}" \ - --github-run-number "${{ github.run_number }}" \ - --head-branch-name "${{ env.BRANCH_NAME }}" + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph upload-to-bench-graph \ + --benchmark-name "mgbench" \ + --benchmark-results "../../tests/mgbench/benchmark_result.json" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" \ + --head-branch-name "${{ env.BRANCH_NAME }}" + + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove diff --git a/environment/os/rocky-9.3.sh b/environment/os/rocky-9.3.sh index 571278654..6ec045b42 100755 --- a/environment/os/rocky-9.3.sh +++ b/environment/os/rocky-9.3.sh @@ -59,7 +59,7 @@ MEMGRAPH_BUILD_DEPS=( doxygen graphviz # source documentation generators which nodejs golang custom-golang1.18.9 # for driver tests zip unzip java-11-openjdk-devel java-17-openjdk java-17-openjdk-devel custom-maven3.9.3 # for driver tests - sbcl # for custom Lisp C++ preprocessing + cl-asdf common-lisp-controller sbcl # for custom Lisp C++ preprocessing autoconf # for jemalloc code generation libtool # for protobuf code generation cyrus-sasl-devel @@ -162,6 +162,30 @@ install() { fi continue fi + if [ "$pkg" == doxygen ]; then + if ! dnf list installed doxygen >/dev/null 2>/dev/null; then + dnf install -y https://dl.rockylinux.org/pub/rocky/9/CRB/x86_64/os/Packages/d/doxygen-1.9.1-11.el9.x86_64.rpm + fi + continue + fi + if [ "$pkg" == cl-asdf ]; then + if ! dnf list installed cl-asdf >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/cl-asdf-20101028-18.el8.noarch.rpm + fi + continue + fi + if [ "$pkg" == common-lisp-controller ]; then + if ! dnf list installed common-lisp-controller >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/common-lisp-controller-7.4-20.el8.noarch.rpm + fi + continue + fi + if [ "$pkg" == sbcl ]; then + if ! dnf list installed sbcl >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/sbcl-2.0.1-4.el8.x86_64.rpm + fi + continue + fi if [ "$pkg" == PyYAML ]; then if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker). pip3 install --user PyYAML diff --git a/init b/init index 9187ee5aa..5d1799237 100755 --- a/init +++ b/init @@ -14,6 +14,7 @@ function print_help () { echo "Optional arguments:" echo -e " -h\tdisplay this help and exit" echo -e " --without-libs-setup\tskip the step for setting up libs" + echo -e " --ci\tscript is being run inside ci" } function setup_virtualenv () { @@ -35,6 +36,7 @@ function setup_virtualenv () { } setup_libs=true +ci=false if [[ $# -eq 1 && "$1" == "-h" ]]; then print_help exit 0 @@ -45,6 +47,10 @@ else shift setup_libs=false ;; + --ci) + shift + ci=true + ;; *) # unknown option echo "Invalid argument provided: $1" @@ -76,11 +82,13 @@ if [[ "$setup_libs" == "true" ]]; then fi # Fix for centos 7 during release -if [ "${DISTRO}" = "centos-7" ] || [ "${DISTRO}" = "debian-11" ] || [ "${DISTRO}" = "amzn-2" ]; then - if python3 -m pip show virtualenv >/dev/null 2>/dev/null; then - python3 -m pip uninstall -y virtualenv +if [[ "$ci" == "false" ]]; then + if [ "${DISTRO}" = "centos-7" ] || [ "${DISTRO}" = "debian-11" ] || [ "${DISTRO}" = "amzn-2" ]; then + if python3 -m pip show virtualenv >/dev/null 2>/dev/null; then + python3 -m pip uninstall -y virtualenv + fi + python3 -m pip install virtualenv fi - python3 -m pip install virtualenv fi # setup gql_behave dependencies @@ -119,14 +127,16 @@ fi # Install precommit hook except on old operating systems because we don't # develop on them -> pre-commit hook not required -> we can use latest # packages. -if [ "${DISTRO}" != "centos-7" ] && [ "$DISTRO" != "debian-10" ] && [ "${DISTRO}" != "ubuntu-18.04" ] && [ "${DISTRO}" != "amzn-2" ]; then - python3 -m pip install pre-commit - python3 -m pre_commit install - # Install py format tools for usage during the development. - echo "Install black formatter" - python3 -m pip install black==23.1.* - echo "Install isort" - python3 -m pip install isort==5.12.* +if [[ "$ci" == "false" ]]; then + if [ "${DISTRO}" != "centos-7" ] && [ "$DISTRO" != "debian-10" ] && [ "${DISTRO}" != "ubuntu-18.04" ] && [ "${DISTRO}" != "amzn-2" ]; then + python3 -m pip install pre-commit + python3 -m pre_commit install + # Install py format tools for usage during the development. + echo "Install black formatter" + python3 -m pip install black==23.1.* + echo "Install isort" + python3 -m pip install isort==5.12.* + fi fi # Link `include/mgp.py` with `release/mgp/mgp.py` diff --git a/libs/setup.sh b/libs/setup.sh index 9c2a38c47..e1e1243af 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -127,6 +127,7 @@ declare -A primary_urls=( ["jemalloc"]="http://$local_cache_host/git/jemalloc.git" ["range-v3"]="http://$local_cache_host/git/range-v3.git" ["nuraft"]="http://$local_cache_host/git/NuRaft.git" + ["asio"]="http://$local_cache_host/git/asio.git" ) # The goal of secondary urls is to have links to the "source of truth" of @@ -157,6 +158,7 @@ declare -A secondary_urls=( ["jemalloc"]="https://github.com/jemalloc/jemalloc.git" ["range-v3"]="https://github.com/ericniebler/range-v3.git" ["nuraft"]="https://github.com/eBay/NuRaft.git" + ["asio"]="https://github.com/chriskohlhoff/asio.git" ) # antlr @@ -286,5 +288,7 @@ nuraft_tag="v2.1.0" repo_clone_try_double "${primary_urls[nuraft]}" "${secondary_urls[nuraft]}" "nuraft" "$nuraft_tag" true pushd nuraft git apply ../nuraft2.1.0.patch +asio_tag="asio-1-29-0" +repo_clone_try_double "${primary_urls[asio]}" "${secondary_urls[asio]}" "asio" "$asio_tag" true ./prepare.sh popd diff --git a/release/package/amd-builders-v4.yml b/release/package/amd-builders-v4.yml new file mode 100644 index 000000000..2e3d4c5d2 --- /dev/null +++ b/release/package/amd-builders-v4.yml @@ -0,0 +1,73 @@ +version: "3" +services: + mgbuild_v4_amzn-2: + image: "memgraph/mgbuild:v4_amzn-2" + build: + context: amzn-2 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_amzn-2" + + mgbuild_v4_centos-7: + image: "memgraph/mgbuild:v4_centos-7" + build: + context: centos-7 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_centos-7" + + mgbuild_v4_centos-9: + image: "memgraph/mgbuild:v4_centos-9" + build: + context: centos-9 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_centos-9" + + mgbuild_v4_debian-10: + image: "memgraph/mgbuild:v4_debian-10" + build: + context: debian-10 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-10" + + mgbuild_v4_debian-11: + image: "memgraph/mgbuild:v4_debian-11" + build: + context: debian-11 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-11" + + mgbuild_v4_fedora-36: + image: "memgraph/mgbuild:v4_fedora-36" + build: + context: fedora-36 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_fedora-36" + + mgbuild_v4_ubuntu-18.04: + image: "memgraph/mgbuild:v4_ubuntu-18.04" + build: + context: ubuntu-18.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-18.04" + + mgbuild_v4_ubuntu-20.04: + image: "memgraph/mgbuild:v4_ubuntu-20.04" + build: + context: ubuntu-20.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-20.04" + + mgbuild_v4_ubuntu-22.04: + image: "memgraph/mgbuild:v4_ubuntu-22.04" + build: + context: ubuntu-22.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-22.04" diff --git a/release/package/amd-builders-v5.yml b/release/package/amd-builders-v5.yml new file mode 100644 index 000000000..d37bc6092 --- /dev/null +++ b/release/package/amd-builders-v5.yml @@ -0,0 +1,81 @@ +version: "3" +services: + mgbuild_v5_amzn-2: + image: "memgraph/mgbuild:v5_amzn-2" + build: + context: amzn-2 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_amzn-2" + + mgbuild_v5_centos-7: + image: "memgraph/mgbuild:v5_centos-7" + build: + context: centos-7 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_centos-7" + + mgbuild_v5_centos-9: + image: "memgraph/mgbuild:v5_centos-9" + build: + context: centos-9 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_centos-9" + + mgbuild_v5_debian-11: + image: "memgraph/mgbuild:v5_debian-11" + build: + context: debian-11 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_debian-11" + + mgbuild_v5_debian-12: + image: "memgraph/mgbuild:v5_debian-12" + build: + context: debian-12 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_debian-12" + + mgbuild_v5_fedora-38: + image: "memgraph/mgbuild:v5_fedora-38" + build: + context: fedora-38 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_fedora-38" + + mgbuild_v5_fedora-39: + image: "memgraph/mgbuild:v5_fedora-39" + build: + context: fedora-39 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_fedora-39" + + mgbuild_v5_rocky-9.3: + image: "memgraph/mgbuild:v5_rocky-9.3" + build: + context: rocky-9.3 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_rocky-9.3" + + mgbuild_v5_ubuntu-20.04: + image: "memgraph/mgbuild:v5_ubuntu-20.04" + build: + context: ubuntu-20.04 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_ubuntu-20.04" + + mgbuild_v5_ubuntu-22.04: + image: "memgraph/mgbuild:v5_ubuntu-22.04" + build: + context: ubuntu-22.04 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_ubuntu-22.04" diff --git a/release/package/amzn-2/Dockerfile b/release/package/amzn-2/Dockerfile index ba3ad34b9..80fdb9893 100644 --- a/release/package/amzn-2/Dockerfile +++ b/release/package/amzn-2/Dockerfile @@ -7,9 +7,34 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz +# Download and install toolchain +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/amzn-2.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/amzn-2.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/arm-builders-v4.yml b/release/package/arm-builders-v4.yml new file mode 100644 index 000000000..18cd5c4e9 --- /dev/null +++ b/release/package/arm-builders-v4.yml @@ -0,0 +1,18 @@ +version: "3" + +services: + mgbuild_v4_debian-11-arm: + image: "memgraph/mgbuild:v4_debian-11-arm" + build: + context: debian-11-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-11-arm" + + mgbuild_v4_ubuntu_v4_22.04-arm: + image: "memgraph/mgbuild:v4_ubuntu-22.04-arm" + build: + context: ubuntu-22.04-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-22.04-arm" diff --git a/release/package/arm-builders-v5.yml b/release/package/arm-builders-v5.yml new file mode 100644 index 000000000..c276a49a8 --- /dev/null +++ b/release/package/arm-builders-v5.yml @@ -0,0 +1,18 @@ +version: "3" + +services: + debian-12-arm: + image: "memgraph/mgbuild:v5_debian-12-arm" + build: + context: debian-12-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_debian-12-arm" + + ubuntu-22.04-arm: + image: "memgraph/mgbuild:v5_ubuntu-22.04-arm" + build: + context: ubuntu-22.04-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_ubuntu-22.04-arm" diff --git a/release/package/arm-builders.yml b/release/package/arm-builders.yml deleted file mode 100644 index d52f3bb26..000000000 --- a/release/package/arm-builders.yml +++ /dev/null @@ -1,11 +0,0 @@ -version: "3" - -services: - debian-11-arm: - build: - context: debian-11-arm - container_name: "mgbuild_debian-11-arm" - ubuntu-2204-arm: - build: - context: ubuntu-22.04-arm - container_name: "mgbuild_ubuntu-22.04-arm" diff --git a/release/package/centos-7/Dockerfile b/release/package/centos-7/Dockerfile index ff386c551..d34dfee77 100644 --- a/release/package/centos-7/Dockerfile +++ b/release/package/centos-7/Dockerfile @@ -7,9 +7,33 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/centos-7.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/centos-7.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/centos-9/Dockerfile b/release/package/centos-9/Dockerfile index 1c4c9541e..7c559a932 100644 --- a/release/package/centos-9/Dockerfile +++ b/release/package/centos-9/Dockerfile @@ -7,9 +7,33 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/centos-9.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/centos-9.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-10/Dockerfile b/release/package/debian-10/Dockerfile index abd4a7f0e..a3ef12cc6 100644 --- a/release/package/debian-10/Dockerfile +++ b/release/package/debian-10/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-10.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-10.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-11-arm/Dockerfile b/release/package/debian-11-arm/Dockerfile index 5040d8041..9b3aa3c8c 100644 --- a/release/package/debian-11-arm/Dockerfile +++ b/release/package/debian-11-arm/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-11-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-11-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-11/Dockerfile b/release/package/debian-11/Dockerfile index cf47f253e..62070eccf 100644 --- a/release/package/debian-11/Dockerfile +++ b/release/package/debian-11/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-11.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-11.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-12-arm/Dockerfile b/release/package/debian-12-arm/Dockerfile new file mode 100644 index 000000000..6d4200391 --- /dev/null +++ b/release/package/debian-12-arm/Dockerfile @@ -0,0 +1,39 @@ +FROM debian:12 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-12-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-12-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-12/Dockerfile b/release/package/debian-12/Dockerfile new file mode 100644 index 000000000..c35640eb3 --- /dev/null +++ b/release/package/debian-12/Dockerfile @@ -0,0 +1,39 @@ +FROM debian:12 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-12.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-12.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/docker-compose.yml b/release/package/docker-compose.yml deleted file mode 100644 index 4da0526ba..000000000 --- a/release/package/docker-compose.yml +++ /dev/null @@ -1,38 +0,0 @@ -version: "3" -services: - mgbuild_centos-7: - build: - context: centos-7 - container_name: "mgbuild_centos-7" - mgbuild_centos-9: - build: - context: centos-9 - container_name: "mgbuild_centos-9" - mgbuild_debian-10: - build: - context: debian-10 - container_name: "mgbuild_debian-10" - mgbuild_debian-11: - build: - context: debian-11 - container_name: "mgbuild_debian-11" - mgbuild_ubuntu-18.04: - build: - context: ubuntu-18.04 - container_name: "mgbuild_ubuntu-18.04" - mgbuild_ubuntu-20.04: - build: - context: ubuntu-20.04 - container_name: "mgbuild_ubuntu-20.04" - mgbuild_ubuntu-22.04: - build: - context: ubuntu-22.04 - container_name: "mgbuild_ubuntu-22.04" - mgbuild_fedora-36: - build: - context: fedora-36 - container_name: "mgbuild_fedora-36" - mgbuild_amzn-2: - build: - context: amzn-2 - container_name: "mgbuild_amzn-2" diff --git a/release/package/fedora-36/Dockerfile b/release/package/fedora-36/Dockerfile index f84af132f..954a5b2bf 100644 --- a/release/package/fedora-36/Dockerfile +++ b/release/package/fedora-36/Dockerfile @@ -8,9 +8,30 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-36.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-36.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/fedora-38/Dockerfile b/release/package/fedora-38/Dockerfile new file mode 100644 index 000000000..cf7454ae4 --- /dev/null +++ b/release/package/fedora-38/Dockerfile @@ -0,0 +1,37 @@ +FROM fedora:38 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-38.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-38.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/fedora-39/Dockerfile b/release/package/fedora-39/Dockerfile new file mode 100644 index 000000000..263017a3d --- /dev/null +++ b/release/package/fedora-39/Dockerfile @@ -0,0 +1,37 @@ +FROM fedora:39 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-39.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-39.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/mgbuild.sh b/release/package/mgbuild.sh new file mode 100755 index 000000000..3ff2f4979 --- /dev/null +++ b/release/package/mgbuild.sh @@ -0,0 +1,665 @@ +#!/bin/bash +set -Eeuo pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +SCRIPT_NAME=${0##*/} +PROJECT_ROOT="$SCRIPT_DIR/../.." +MGBUILD_HOME_DIR="/home/mg" +MGBUILD_ROOT_DIR="$MGBUILD_HOME_DIR/memgraph" + +DEFAULT_TOOLCHAIN="v5" +SUPPORTED_TOOLCHAINS=( + v4 v5 +) +DEFAULT_OS="all" +SUPPORTED_OS=( + all + amzn-2 + centos-7 centos-9 + debian-10 debian-11 debian-11-arm debian-12 debian-12-arm + fedora-36 fedora-38 fedora-39 + rocky-9.3 + ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +SUPPORTED_OS_V4=( + amzn-2 + centos-7 centos-9 + debian-10 debian-11 debian-11-arm + fedora-36 + ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +SUPPORTED_OS_V5=( + amzn-2 + centos-7 centos-9 + debian-11 debian-11-arm debian-12 debian-12-arm + fedora-38 fedora-39 + rocky-9.3 + ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +DEFAULT_BUILD_TYPE="Release" +SUPPORTED_BUILD_TYPES=( + Debug + Release + RelWithDebInfo +) +DEFAULT_ARCH="amd" +SUPPORTED_ARCHS=( + amd + arm +) +SUPPORTED_TESTS=( + clang-tidy cppcheck-and-clang-format code-analysis + code-coverage drivers durability e2e gql-behave + integration leftover-CTest macro-benchmark + mgbench stress-plain stress-ssl + unit unit-coverage upload-to-bench-graph + +) +DEFAULT_THREADS=0 +DEFAULT_ENTERPRISE_LICENSE="" +DEFAULT_ORGANIZATION_NAME="memgraph" + +print_help () { + echo -e "\nUsage: $SCRIPT_NAME [GLOBAL OPTIONS] COMMAND [COMMAND OPTIONS]" + echo -e "\nInteract with mgbuild containers" + + echo -e "\nCommands:" + echo -e " build Build mgbuild image" + echo -e " build-memgraph [OPTIONS] Build memgraph binary inside mgbuild container" + echo -e " copy OPTIONS Copy an artifact from mgbuild container to host" + echo -e " package-memgraph Create memgraph package from built binary inside mgbuild container" + echo -e " pull Pull mgbuild image from dockerhub" + echo -e " push [OPTIONS] Push mgbuild image to dockerhub" + echo -e " run [OPTIONS] Run mgbuild container" + echo -e " stop [OPTIONS] Stop mgbuild container" + echo -e " test-memgraph TEST Run a selected test TEST (see supported tests below) inside mgbuild container" + + echo -e "\nSupported tests:" + echo -e " \"${SUPPORTED_TESTS[*]}\"" + + echo -e "\nGlobal options:" + echo -e " --arch string Specify target architecture (\"${SUPPORTED_ARCHS[*]}\") (default \"$DEFAULT_ARCH\")" + echo -e " --build-type string Specify build type (\"${SUPPORTED_BUILD_TYPES[*]}\") (default \"$DEFAULT_BUILD_TYPE\")" + echo -e " --enterprise-license string Specify the enterprise license (default \"\")" + echo -e " --organization-name string Specify the organization name (default \"memgraph\")" + echo -e " --os string Specify operating system (\"${SUPPORTED_OS[*]}\") (default \"$DEFAULT_OS\")" + echo -e " --threads int Specify the number of threads a command will use (default \"\$(nproc)\" for container)" + echo -e " --toolchain string Specify toolchain version (\"${SUPPORTED_TOOLCHAINS[*]}\") (default \"$DEFAULT_TOOLCHAIN\")" + + echo -e "\nbuild-memgraph options:" + echo -e " --asan Build with ASAN" + echo -e " --community Build community version" + echo -e " --coverage Build with code coverage" + echo -e " --for-docker Add flag -DMG_TELEMETRY_ID_OVERRIDE=DOCKER to cmake" + echo -e " --for-platform Add flag -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM to cmake" + echo -e " --init-only Only run init script" + echo -e " --no-copy Don't copy the memgraph repo from host." + echo -e " Use this option with caution, be sure that memgraph source code is in correct location inside mgbuild container" + echo -e " --ubsan Build with UBSAN" + + echo -e "\ncopy options:" + echo -e " --binary Copy memgraph binary from mgbuild container to host" + echo -e " --build-logs Copy build logs from mgbuild container to host" + echo -e " --package Copy memgraph package from mgbuild container to host" + + echo -e "\npush options:" + echo -e " -p, --password string Specify password for docker login" + echo -e " -u, --username string Specify username for docker login" + + echo -e "\nrun options:" + echo -e " --pull Pull the mgbuild image before running" + + echo -e "\nstop options:" + echo -e " --remove Remove the stopped mgbuild container" + + echo -e "\nToolchain v4 supported OSs:" + echo -e " \"${SUPPORTED_OS_V4[*]}\"" + + echo -e "\nToolchain v5 supported OSs:" + echo -e " \"${SUPPORTED_OS_V5[*]}\"" + + echo -e "\nExample usage:" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd run" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo build-memgraph --community" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo test-memgraph unit" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd package" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd copy --package" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd stop --remove" +} + +check_support() { + local is_supported=false + case "$1" in + arch) + for e in "${SUPPORTED_ARCHS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Architecture $2 isn't supported!\nChoose from ${SUPPORTED_ARCHS[*]}" + exit 1 + fi + ;; + build_type) + for e in "${SUPPORTED_BUILD_TYPES[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Build type $2 isn't supported!\nChoose from ${SUPPORTED_BUILD_TYPES[*]}" + exit 1 + fi + ;; + os) + for e in "${SUPPORTED_OS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: OS $2 isn't supported!\nChoose from ${SUPPORTED_OS[*]}" + exit 1 + fi + ;; + toolchain) + for e in "${SUPPORTED_TOOLCHAINS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "TError: oolchain version $2 isn't supported!\nChoose from ${SUPPORTED_TOOLCHAINS[*]}" + exit 1 + fi + ;; + os_toolchain_combo) + if [[ "$3" == "v4" ]]; then + local SUPPORTED_OS_TOOLCHAIN=("${SUPPORTED_OS_V4[@]}") + elif [[ "$3" == "v5" ]]; then + local SUPPORTED_OS_TOOLCHAIN=("${SUPPORTED_OS_V5[@]}") + else + echo -e "Error: $3 isn't a supported toolchain_version!\nChoose from ${SUPPORTED_TOOLCHAINS[*]}" + exit 1 + fi + for e in "${SUPPORTED_OS_TOOLCHAIN[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Toolchain version $3 doesn't support OS $2!\nChoose from ${SUPPORTED_OS_TOOLCHAIN[*]}" + exit 1 + fi + ;; + *) + echo -e "Error: This function can only check arch, build_type, os, toolchain version and os toolchain combination" + exit 1 + ;; + esac +} + + +################################################## +######## BUILD, COPY AND PACKAGE MEMGRAPH ######## +################################################## +build_memgraph () { + local build_container="mgbuild_${toolchain_version}_${os}" + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local container_build_dir="$MGBUILD_ROOT_DIR/build" + local container_output_dir="$container_build_dir/output" + local arm_flag="" + if [[ "$arch" == "arm" ]] || [[ "$os" =~ "-arm" ]]; then + arm_flag="-DMG_ARCH="ARM64"" + fi + local build_type_flag="-DCMAKE_BUILD_TYPE=$build_type" + local telemetry_id_override_flag="" + local community_flag="" + local coverage_flag="" + local asan_flag="" + local ubsan_flag="" + local init_only=false + local for_docker=false + local for_platform=false + local copy_from_host=true + while [[ "$#" -gt 0 ]]; do + case "$1" in + --community) + community_flag="-DMG_ENTERPRISE=OFF" + shift 1 + ;; + --init-only) + init_only=true + shift 1 + ;; + --for-docker) + for_docker=true + if [[ "$for_platform" == "true" ]]; then + echo "Error: Cannot combine --for-docker and --for-platform flags" + exit 1 + fi + telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER " + shift 1 + ;; + --for-platform) + for_platform=true + if [[ "$for_docker" == "true" ]]; then + echo "Error: Cannot combine --for-docker and --for-platform flags" + exit 1 + fi + telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM " + shift 1 + ;; + --coverage) + coverage_flag="-DTEST_COVERAGE=ON" + shift 1 + ;; + --asan) + asan_flag="-DASAN=ON" + shift 1 + ;; + --ubsan) + ubsan_flag="-DUBSAN=ON" + shift 1 + ;; + --no-copy) + copy_from_host=false + shift 1 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac + done + + echo "Initializing deps ..." + # If master is not the current branch, fetch it, because the get_version + # script depends on it. If we are on master, the fetch command is going to + # fail so that's why there is the explicit check. + # Required here because Docker build container can't access remote. + cd "$PROJECT_ROOT" + if [[ "$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then + git fetch origin master:master + fi + + if [[ "$copy_from_host" == "true" ]]; then + # Ensure we have a clean build directory + docker exec -u mg "$build_container" bash -c "rm -rf $MGBUILD_ROOT_DIR && mkdir -p $MGBUILD_ROOT_DIR" + echo "Copying project files..." + docker cp "$PROJECT_ROOT/." "$build_container:$MGBUILD_ROOT_DIR/" + fi + # Change ownership of copied files so the mg user inside container can access them + docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR" + + echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..." + docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check TOOLCHAIN_RUN_DEPS || /environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" + docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check MEMGRAPH_BUILD_DEPS || /environment/os/$os.sh install MEMGRAPH_BUILD_DEPS" + + echo "Building targeted package..." + # Fix issue with git marking directory as not safe + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && git config --global --add safe.directory '*'" + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && $ACTIVATE_TOOLCHAIN && ./init --ci" + if [[ "$init_only" == "true" ]]; then + return + fi + + echo "Building Memgraph for $os on $build_container..." + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && rm -rf ./*" + # Fix cmake failing locally if remote is clone via ssh + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && git remote set-url origin https://github.com/memgraph/memgraph.git" + + # Define cmake command + local cmake_cmd="cmake $build_type_flag $arm_flag $community_flag $telemetry_id_override_flag $coverage_flag $asan_flag $ubsan_flag .." + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $cmake_cmd" + + # ' is used instead of " because we need to run make within the allowed + # container resources. + # Default value for $threads is 0 instead of $(nproc) because macos + # doesn't support the nproc command. + # 0 is set for default value and checked here because mgbuild containers + # support nproc + # shellcheck disable=SC2016 + if [[ "$threads" == 0 ]]; then + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc)' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc) -B mgconsole' + else + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$threads' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$threads -B mgconsole' + fi +} + +package_memgraph() { + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local build_container="mgbuild_${toolchain_version}_${os}" + local container_output_dir="$MGBUILD_ROOT_DIR/build/output" + local package_command="" + if [[ "$os" =~ ^"centos".* ]] || [[ "$os" =~ ^"fedora".* ]] || [[ "$os" =~ ^"amzn".* ]] || [[ "$os" =~ ^"rocky".* ]]; then + docker exec -u root "$build_container" bash -c "yum -y update" + package_command=" cpack -G RPM --config ../CPackConfig.cmake && rpmlint --file='../../release/rpm/rpmlintrc' memgraph*.rpm " + fi + if [[ "$os" =~ ^"debian".* ]]; then + docker exec -u root "$build_container" bash -c "apt --allow-releaseinfo-change -y update" + package_command=" cpack -G DEB --config ../CPackConfig.cmake " + fi + if [[ "$os" =~ ^"ubuntu".* ]]; then + docker exec -u root "$build_container" bash -c "apt update" + package_command=" cpack -G DEB --config ../CPackConfig.cmake " + fi + docker exec -u mg "$build_container" bash -c "mkdir -p $container_output_dir && cd $container_output_dir && $ACTIVATE_TOOLCHAIN && $package_command" +} + +copy_memgraph() { + local build_container="mgbuild_${toolchain_version}_${os}" + case "$1" in + --binary) + echo "Copying memgraph binary to host..." + local container_output_path="$MGBUILD_ROOT_DIR/build/memgraph" + local host_output_path="$PROJECT_ROOT/build/memgraph" + mkdir -p "$PROJECT_ROOT/build" + docker cp -L $build_container:$container_output_path $host_output_path + echo "Binary saved to $host_output_path" + ;; + --build-logs) + echo "Copying memgraph build logs to host..." + local container_output_path="$MGBUILD_ROOT_DIR/build/logs" + local host_output_path="$PROJECT_ROOT/build/logs" + mkdir -p "$PROJECT_ROOT/build" + docker cp -L $build_container:$container_output_path $host_output_path + echo "Build logs saved to $host_output_path" + ;; + --package) + echo "Copying memgraph package to host..." + local container_output_dir="$MGBUILD_ROOT_DIR/build/output" + local host_output_dir="$PROJECT_ROOT/build/output/$os" + local last_package_name=$(docker exec -u mg "$build_container" bash -c "cd $container_output_dir && ls -t memgraph* | head -1") + mkdir -p "$host_output_dir" + docker cp "$build_container:$container_output_dir/$last_package_name" "$host_output_dir/$last_package_name" + echo "Package saved to $host_output_dir/$last_package_name" + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +} + + +################################################## +##################### TESTS ###################### +################################################## +test_memgraph() { + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local ACTIVATE_VENV="./setup.sh /opt/toolchain-${toolchain_version}/activate" + local EXPORT_LICENSE="export MEMGRAPH_ENTERPRISE_LICENSE=$enterprise_license" + local EXPORT_ORG_NAME="export MEMGRAPH_ORGANIZATION_NAME=$organization_name" + local BUILD_DIR="$MGBUILD_ROOT_DIR/build" + local build_container="mgbuild_${toolchain_version}_${os}" + echo "Running $1 test on $build_container..." + + case "$1" in + unit) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN "'&& ctest -R memgraph__unit --output-on-failure -j$threads' + ;; + unit-coverage) + local setup_lsan_ubsan="export LSAN_OPTIONS=suppressions=$BUILD_DIR/../tools/lsan.supp && export UBSAN_OPTIONS=halt_on_error=1" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN && $setup_lsan_ubsan "'&& ctest -R memgraph__unit --output-on-failure -j2' + ;; + leftover-CTest) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN "'&& ctest -E "(memgraph__unit|memgraph__benchmark)" --output-on-failure' + ;; + drivers) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run.sh' + ;; + integration) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& tests/integration/run.sh' + ;; + cppcheck-and-clang-format) + local test_output_path="$MGBUILD_ROOT_DIR/tools/github/cppcheck_and_clang_format.txt" + local test_output_host_dest="$PROJECT_ROOT/tools/github/cppcheck_and_clang_format.txt" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tools/github && $ACTIVATE_TOOLCHAIN "'&& ./cppcheck_and_clang_format diff' + docker cp $build_container:$test_output_path $test_output_host_dest + ;; + stress-plain) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& ./continuous_integration' + ;; + stress-ssl) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& ./continuous_integration --use-ssl' + ;; + durability) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& python3 durability --num-steps 5' + ;; + gql-behave) + local test_output_dir="$MGBUILD_ROOT_DIR/tests/gql_behave" + local test_output_host_dest="$PROJECT_ROOT/tests/gql_behave" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests && $ACTIVATE_VENV && cd $MGBUILD_ROOT_DIR/tests/gql_behave "'&& ./continuous_integration' + docker cp $build_container:$test_output_dir/gql_behave_status.csv $test_output_host_dest/gql_behave_status.csv + docker cp $build_container:$test_output_dir/gql_behave_status.html $test_output_host_dest/gql_behave_status.html + ;; + macro-benchmark) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && export USER=mg && export LANG=$(echo $LANG) && cd $MGBUILD_ROOT_DIR/tests/macro_benchmark "'&& ./harness QuerySuite MemgraphRunner --groups aggregation 1000_create unwind_create dense_expand match --no-strict' + ;; + mgbench) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/mgbench "'&& ./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/*' + ;; + upload-to-bench-graph) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + local SETUP_VE3_ENV="virtualenv -p python3 ve3 && source ve3/bin/activate && pip install -r requirements.txt" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tools/bench-graph-client && $SETUP_VE3_ENV && $SETUP_PASSED_ARGS "'&& ./main.py $PASSED_ARGS' + ;; + code-analysis) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/code_analysis && $SETUP_PASSED_ARGS "'&& ./python_code_analysis.sh $PASSED_ARGS' + ;; + code-coverage) + local test_output_path="$MGBUILD_ROOT_DIR/tools/github/generated/code_coverage.tar.gz" + local test_output_host_dest="$PROJECT_ROOT/tools/github/generated/code_coverage.tar.gz" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && $ACTIVATE_TOOLCHAIN && cd $MGBUILD_ROOT_DIR/tools/github "'&& ./coverage_convert' + docker exec -u mg $build_container bash -c "cd $MGBUILD_ROOT_DIR/tools/github/generated && tar -czf code_coverage.tar.gz coverage.json html report.json summary.rmu" + mkdir -p $PROJECT_ROOT/tools/github/generated + docker cp $build_container:$test_output_path $test_output_host_dest + ;; + clang-tidy) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && export THREADS=$threads && $ACTIVATE_TOOLCHAIN && cd $MGBUILD_ROOT_DIR/tests/code_analysis && $SETUP_PASSED_ARGS "'&& ./clang_tidy.sh $PASSED_ARGS' + ;; + e2e) + # local kafka_container="kafka_kafka_1" + # local kafka_hostname="kafka" + # local pulsar_container="pulsar_pulsar_1" + # local pulsar_hostname="pulsar" + # local setup_hostnames="export KAFKA_HOSTNAME=$kafka_hostname && PULSAR_HOSTNAME=$pulsar_hostname" + # local build_container_network=$(docker inspect $build_container --format='{{ .HostConfig.NetworkMode }}') + # docker network connect --alias $kafka_hostname $build_container_network $kafka_container > /dev/null 2>&1 || echo "Kafka container already inside correct network or something went wrong ..." + # docker network connect --alias $pulsar_hostname $build_container_network $pulsar_container > /dev/null 2>&1 || echo "Kafka container already inside correct network or something went wrong ..." + docker exec -u mg $build_container bash -c "pip install --user networkx && pip3 install --user networkx" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests && $ACTIVATE_VENV && source ve3/bin/activate_e2e && cd $MGBUILD_ROOT_DIR/tests/e2e "'&& ./run.sh' + ;; + *) + echo "Error: Unknown test '$1'" + exit 1 + ;; + esac +} + + +################################################## +################### PARSE ARGS ################### +################################################## +if [ "$#" -eq 0 ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then + print_help + exit 0 +fi +arch=$DEFAULT_ARCH +build_type=$DEFAULT_BUILD_TYPE +enterprise_license=$DEFAULT_ENTERPRISE_LICENSE +organization_name=$DEFAULT_ORGANIZATION_NAME +os=$DEFAULT_OS +threads=$DEFAULT_THREADS +toolchain_version=$DEFAULT_TOOLCHAIN +command="" +while [[ $# -gt 0 ]]; do + case "$1" in + --arch) + arch=$2 + check_support arch $arch + shift 2 + ;; + --build-type) + build_type=$2 + check_support build_type $build_type + shift 2 + ;; + --enterprise-license) + enterprise_license=$2 + shift 2 + ;; + --organization-name) + organization_name=$2 + shift 2 + ;; + --os) + os=$2 + check_support os $os + shift 2 + ;; + --threads) + threads=$2 + shift 2 + ;; + --toolchain) + toolchain_version=$2 + check_support toolchain $toolchain_version + shift 2 + ;; + *) + if [[ "$1" =~ ^--.* ]]; then + echo -e "Error: Unknown option '$1'" + exit 1 + else + command=$1 + shift 1 + break + fi + ;; + esac +done +check_support os_toolchain_combo $os $toolchain_version + +if [[ "$command" == "" ]]; then + echo -e "Error: Command not provided, please provide command" + print_help + exit 1 +fi + +if docker compose version > /dev/null 2>&1; then + docker_compose_cmd="docker compose" +elif which docker-compose > /dev/null 2>&1; then + docker_compose_cmd="docker-compose" +else + echo -e "Missing command: There has to be installed either 'docker-compose' or 'docker compose'" + exit 1 +fi +echo "Using $docker_compose_cmd" + +################################################## +################# PARSE COMMAND ################## +################################################## +case $command in + build) + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml build + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml build mgbuild_${toolchain_version}_${os} + fi + ;; + run) + cd $SCRIPT_DIR + pull=false + if [[ "$#" -gt 0 ]]; then + if [[ "$1" == "--pull" ]]; then + pull=true + else + echo "Error: Unknown flag '$1'" + exit 1 + fi + fi + if [[ "$os" == "all" ]]; then + if [[ "$pull" == "true" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures + elif [[ "$docker_compose_cmd" == "docker compose" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures --policy missing + fi + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml up -d + else + if [[ "$pull" == "true" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull mgbuild_${toolchain_version}_${os} + elif ! docker image inspect memgraph/mgbuild:${toolchain_version}_${os} > /dev/null 2>&1; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures mgbuild_${toolchain_version}_${os} + fi + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml up -d mgbuild_${toolchain_version}_${os} + fi + ;; + stop) + cd $SCRIPT_DIR + remove=false + if [[ "$#" -gt 0 ]]; then + if [[ "$1" == "--remove" ]]; then + remove=true + else + echo "Error: Unknown flag '$1'" + exit 1 + fi + fi + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml down + else + docker stop mgbuild_${toolchain_version}_${os} + if [[ "$remove" == "true" ]]; then + docker rm mgbuild_${toolchain_version}_${os} + fi + fi + ;; + pull) + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull mgbuild_${toolchain_version}_${os} + fi + ;; + push) + docker login $@ + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml push --ignore-push-failures + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml push mgbuild_${toolchain_version}_${os} + fi + ;; + build-memgraph) + build_memgraph $@ + ;; + package-memgraph) + package_memgraph + ;; + test-memgraph) + test_memgraph $@ + ;; + copy) + copy_memgraph $@ + ;; + *) + echo "Error: Unknown command '$command'" + exit 1 + ;; +esac diff --git a/release/package/rocky-9.3/Dockerfile b/release/package/rocky-9.3/Dockerfile new file mode 100644 index 000000000..4ee3a0d78 --- /dev/null +++ b/release/package/rocky-9.3/Dockerfile @@ -0,0 +1,40 @@ +FROM rockylinux:9.3 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/rocky-9.3.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/rocky-9.3.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9.3) +RUN pip3 install --user PyYAML + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/run.sh b/release/package/run.sh deleted file mode 100755 index bbd5ff48a..000000000 --- a/release/package/run.sh +++ /dev/null @@ -1,208 +0,0 @@ -#!/bin/bash - -set -Eeuo pipefail - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -SUPPORTED_OS=( - centos-7 centos-9 - debian-10 debian-11 debian-11-arm - ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm - fedora-36 - amzn-2 -) - -SUPPORTED_BUILD_TYPES=( - Debug - Release - RelWithDebInfo -) - -PROJECT_ROOT="$SCRIPT_DIR/../.." -TOOLCHAIN_VERSION="toolchain-v4" -ACTIVATE_TOOLCHAIN="source /opt/${TOOLCHAIN_VERSION}/activate" -HOST_OUTPUT_DIR="$PROJECT_ROOT/build/output" - -print_help () { - # TODO(gitbuda): Update the release/package/run.sh help - echo "$0 init|package|docker|test {os} {build_type} [--for-docker|--for-platform]" - echo "" - echo " OSs: ${SUPPORTED_OS[*]}" - echo " Build types: ${SUPPORTED_BUILD_TYPES[*]}" - exit 1 -} - -make_package () { - os="$1" - build_type="$2" - - build_container="mgbuild_$os" - echo "Building Memgraph for $os on $build_container..." - - package_command="" - if [[ "$os" =~ ^"centos".* ]] || [[ "$os" =~ ^"fedora".* ]] || [[ "$os" =~ ^"amzn".* ]]; then - docker exec "$build_container" bash -c "yum -y update" - package_command=" cpack -G RPM --config ../CPackConfig.cmake && rpmlint --file='../../release/rpm/rpmlintrc' memgraph*.rpm " - fi - if [[ "$os" =~ ^"debian".* ]]; then - docker exec "$build_container" bash -c "apt --allow-releaseinfo-change -y update" - package_command=" cpack -G DEB --config ../CPackConfig.cmake " - fi - if [[ "$os" =~ ^"ubuntu".* ]]; then - docker exec "$build_container" bash -c "apt update" - package_command=" cpack -G DEB --config ../CPackConfig.cmake " - fi - telemetry_id_override_flag="" - if [[ "$#" -gt 2 ]]; then - if [[ "$3" == "--for-docker" ]]; then - telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER " - elif [[ "$3" == "--for-platform" ]]; then - telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM" - else - print_help - exit - fi - fi - - echo "Copying project files..." - # If master is not the current branch, fetch it, because the get_version - # script depends on it. If we are on master, the fetch command is going to - # fail so that's why there is the explicit check. - # Required here because Docker build container can't access remote. - cd "$PROJECT_ROOT" - if [[ "$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then - git fetch origin master:master - fi - - # Ensure we have a clean build directory - docker exec "$build_container" rm -rf /memgraph - - docker exec "$build_container" mkdir -p /memgraph - # TODO(gitbuda): Revisit copying the whole repo -> makese sense under CI. - docker cp "$PROJECT_ROOT/." "$build_container:/memgraph/" - - container_build_dir="/memgraph/build" - container_output_dir="$container_build_dir/output" - - # TODO(gitbuda): TOOLCHAIN_RUN_DEPS should be installed during the Docker - # image build phase, but that is not easy at this point because the - # environment/os/{os}.sh does not come within the toolchain package. When - # migrating to the next version of toolchain do that, and remove the - # TOOLCHAIN_RUN_DEPS installation from here. - # TODO(gitbuda): On the other side, having this here allows updating deps - # wihout reruning the build containers. - echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..." - docker exec "$build_container" bash -c "/memgraph/environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" - docker exec "$build_container" bash -c "/memgraph/environment/os/$os.sh install MEMGRAPH_BUILD_DEPS" - - echo "Building targeted package..." - # Fix issue with git marking directory as not safe - docker exec "$build_container" bash -c "cd /memgraph && git config --global --add safe.directory '*'" - docker exec "$build_container" bash -c "cd /memgraph && $ACTIVATE_TOOLCHAIN && ./init" - docker exec "$build_container" bash -c "cd $container_build_dir && rm -rf ./*" - # TODO(gitbuda): cmake fails locally if remote is clone via ssh because of the key -> FIX - if [[ "$os" =~ "-arm" ]]; then - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && cmake -DCMAKE_BUILD_TYPE=$build_type -DMG_ARCH="ARM64" $telemetry_id_override_flag .." - else - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && cmake -DCMAKE_BUILD_TYPE=$build_type $telemetry_id_override_flag .." - fi - # ' is used instead of " because we need to run make within the allowed - # container resources. - # shellcheck disable=SC2016 - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc)' - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc) -B mgconsole' - docker exec "$build_container" bash -c "mkdir -p $container_output_dir && cd $container_output_dir && $ACTIVATE_TOOLCHAIN && $package_command" - - echo "Copying targeted package to host..." - last_package_name=$(docker exec "$build_container" bash -c "cd $container_output_dir && ls -t memgraph* | head -1") - # The operating system folder is introduced because multiple different - # packages could be preserved during the same build "session". - mkdir -p "$HOST_OUTPUT_DIR/$os" - package_host_destination="$HOST_OUTPUT_DIR/$os/$last_package_name" - docker cp "$build_container:$container_output_dir/$last_package_name" "$package_host_destination" - echo "Package saved to $package_host_destination." -} - -case "$1" in - init) - cd "$SCRIPT_DIR" - if ! which "docker-compose" >/dev/null; then - docker_compose_cmd="docker compose" - else - docker_compose_cmd="docker-compose" - fi - $docker_compose_cmd build --build-arg TOOLCHAIN_VERSION="${TOOLCHAIN_VERSION}" - $docker_compose_cmd up -d - ;; - - docker) - # NOTE: Docker is build on top of Debian 11 package. - based_on_os="debian-11" - # shellcheck disable=SC2012 - last_package_name=$(cd "$HOST_OUTPUT_DIR/$based_on_os" && ls -t memgraph* | head -1) - docker_build_folder="$PROJECT_ROOT/release/docker" - cd "$docker_build_folder" - ./package_docker --latest "$HOST_OUTPUT_DIR/$based_on_os/$last_package_name" - # shellcheck disable=SC2012 - docker_image_name=$(cd "$docker_build_folder" && ls -t memgraph* | head -1) - docker_host_folder="$HOST_OUTPUT_DIR/docker" - docker_host_image_path="$docker_host_folder/$docker_image_name" - mkdir -p "$docker_host_folder" - cp "$docker_build_folder/$docker_image_name" "$docker_host_image_path" - echo "Docker images saved to $docker_host_image_path." - ;; - - package) - shift 1 - if [[ "$#" -lt 2 ]]; then - print_help - fi - os="$1" - build_type="$2" - shift 2 - is_os_ok=false - for supported_os in "${SUPPORTED_OS[@]}"; do - if [[ "$supported_os" == "${os}" ]]; then - is_os_ok=true - break - fi - done - is_build_type_ok=false - for supported_build_type in "${SUPPORTED_BUILD_TYPES[@]}"; do - if [[ "$supported_build_type" == "${build_type}" ]]; then - is_build_type_ok=true - break - fi - done - if [[ "$is_os_ok" == true && "$is_build_type_ok" == true ]]; then - make_package "$os" "$build_type" "$@" - else - if [[ "$is_os_ok" == false ]]; then - echo "Unsupported OS: $os" - elif [[ "$is_build_type_ok" == false ]]; then - echo "Unsupported build type: $build_type" - fi - print_help - fi - ;; - - build) - shift 1 - if [[ "$#" -ne 2 ]]; then - print_help - fi - # in the vX format, e.g. v5 - toolchain_version="$1" - # a name of the os folder, e.g. ubuntu-22.04-arm - os="$2" - cd "$SCRIPT_DIR/$os" - docker build -f Dockerfile --build-arg TOOLCHAIN_VERSION="toolchain-$toolchain_version" -t "memgraph/memgraph-builder:${toolchain_version}_$os" . - ;; - - test) - echo "TODO(gitbuda): Test all packages on mgtest containers." - ;; - - *) - print_help - ;; -esac diff --git a/release/package/ubuntu-18.04/Dockerfile b/release/package/ubuntu-18.04/Dockerfile index 97eceead4..b4b090984 100644 --- a/release/package/ubuntu-18.04/Dockerfile +++ b/release/package/ubuntu-18.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-18.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-18.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-20.04/Dockerfile b/release/package/ubuntu-20.04/Dockerfile index 9fea7fd79..30d6cda8e 100644 --- a/release/package/ubuntu-20.04/Dockerfile +++ b/release/package/ubuntu-20.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-20.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-20.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-22.04-arm/Dockerfile b/release/package/ubuntu-22.04-arm/Dockerfile index 56cac8f76..aa7b5b63f 100644 --- a/release/package/ubuntu-22.04-arm/Dockerfile +++ b/release/package/ubuntu-22.04-arm/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-22.04-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-22.04-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-22.04/Dockerfile b/release/package/ubuntu-22.04/Dockerfile index 6bd22589b..652de0f5c 100644 --- a/release/package/ubuntu-22.04/Dockerfile +++ b/release/package/ubuntu-22.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-22.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-22.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/src/coordination/raft_state.cpp b/src/coordination/raft_state.cpp index 38acfd85e..fd93160b6 100644 --- a/src/coordination/raft_state.cpp +++ b/src/coordination/raft_state.cpp @@ -12,6 +12,7 @@ #ifdef MG_ENTERPRISE #include +#include #include "coordination/coordinator_config.hpp" #include "coordination/coordinator_exceptions.hpp" #include "coordination/raft_state.hpp" @@ -123,7 +124,7 @@ auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_po spdlog::info("Request to add server {} to the cluster accepted", endpoint); } else { throw RaftAddServerException("Failed to accept request to add server {} to the cluster with error code {}", - endpoint, cmd_result->get_result_code()); + endpoint, int(cmd_result->get_result_code())); } // Waiting for server to join @@ -173,7 +174,8 @@ auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorClientConfig con spdlog::info("Request for registering instance {} accepted", config.instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to register instance {} with error code {}", config.instance_name, res->get_result_code()); + spdlog::error("Failed to register instance {} with error code {}", config.instance_name, + int(res->get_result_code())); return false; } @@ -194,7 +196,7 @@ auto RaftState::AppendUnregisterReplicationInstanceLog(std::string_view instance spdlog::info("Request for unregistering instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to unregister instance {} with error code {}", instance_name, res->get_result_code()); + spdlog::error("Failed to unregister instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } return true; @@ -214,7 +216,7 @@ auto RaftState::AppendSetInstanceAsMainLog(std::string_view instance_name) -> bo spdlog::info("Request for promoting instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to promote instance {} with error code {}", instance_name, res->get_result_code()); + spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } return true; @@ -233,7 +235,7 @@ auto RaftState::AppendSetInstanceAsReplicaLog(std::string_view instance_name) -> spdlog::info("Request for demoting instance {} accepted", instance_name); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to promote instance {} with error code {}", instance_name, res->get_result_code()); + spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code())); return false; } @@ -252,7 +254,7 @@ auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool { spdlog::info("Request for updating UUID accepted"); if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to update UUID with error code {}", res->get_result_code()); + spdlog::error("Failed to update UUID with error code {}", int(res->get_result_code())); return false; } diff --git a/src/dbms/inmemory/replication_handlers.cpp b/src/dbms/inmemory/replication_handlers.cpp index 3e4a31884..69f04914c 100644 --- a/src/dbms/inmemory/replication_handlers.cpp +++ b/src/dbms/inmemory/replication_handlers.cpp @@ -589,7 +589,6 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage if (timestamp < storage->timestamp_) { continue; } - SPDLOG_INFO(" Delta {}", applied_deltas); switch (delta.type) { case WalDeltaData::Type::VERTEX_CREATE: { @@ -634,9 +633,10 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage break; } case WalDeltaData::Type::VERTEX_SET_PROPERTY: { - spdlog::trace(" Vertex {} set property {} to {}", delta.vertex_edge_set_property.gid.AsUint(), - delta.vertex_edge_set_property.property, delta.vertex_edge_set_property.value); + spdlog::trace(" Vertex {} set property", delta.vertex_edge_set_property.gid.AsUint()); + // NOLINTNEXTLINE auto *transaction = get_transaction(timestamp); + // NOLINTNEXTLINE auto vertex = transaction->FindVertex(delta.vertex_edge_set_property.gid, View::NEW); if (!vertex) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); @@ -684,8 +684,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage break; } case WalDeltaData::Type::EDGE_SET_PROPERTY: { - spdlog::trace(" Edge {} set property {} to {}", delta.vertex_edge_set_property.gid.AsUint(), - delta.vertex_edge_set_property.property, delta.vertex_edge_set_property.value); + spdlog::trace(" Edge {} set property", delta.vertex_edge_set_property.gid.AsUint()); if (!storage->config_.salient.items.properties_on_edges) throw utils::BasicException( "Can't set properties on edges because properties on edges " @@ -917,5 +916,4 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage spdlog::debug("Applied {} deltas", applied_deltas); return applied_deltas; } - } // namespace memgraph::dbms diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index 3c1081206..a02c1eff0 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -13,6 +13,7 @@ #include "replication/replication_client.hpp" #include "storage/v2/inmemory/storage.hpp" +#include "storage/v2/replication/enums.hpp" #include "storage/v2/storage.hpp" #include "utils/exceptions.hpp" #include "utils/on_scope_exit.hpp" @@ -247,11 +248,13 @@ bool ReplicationStorageClient::FinalizeTransactionReplication(Storage *storage, MG_ASSERT(replica_stream_, "Missing stream for transaction deltas"); try { auto response = replica_stream_->Finalize(); - return replica_state_.WithLock([storage, &response, db_acc = std::move(db_acc), this](auto &state) mutable { + // NOLINTNEXTLINE + return replica_state_.WithLock([storage, response, db_acc = std::move(db_acc), this](auto &state) mutable { replica_stream_.reset(); if (!response.success || state == replication::ReplicaState::RECOVERY) { state = replication::ReplicaState::RECOVERY; - client_.thread_pool_.AddTask([storage, &response, db_acc = std::move(db_acc), this] { + // NOLINTNEXTLINE + client_.thread_pool_.AddTask([storage, response, db_acc = std::move(db_acc), this] { this->RecoverReplica(response.current_commit_timestamp, storage); }); return false; diff --git a/tests/code_analysis/clang_tidy.sh b/tests/code_analysis/clang_tidy.sh new file mode 100755 index 000000000..f8bdfc252 --- /dev/null +++ b/tests/code_analysis/clang_tidy.sh @@ -0,0 +1,23 @@ +#!/bin/bash +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$SCRIPT_DIR/../.." +BASE_BRANCH="origin/master" +THREADS=${THREADS:-$(nproc)} + +if [[ "$#" -gt 0 ]]; then + case "$1" in + --base-branch) + BASE_BRANCH=$2 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +fi + +cd $PROJECT_ROOT +git diff -U0 $BASE_BRANCH... -- src | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build -regex ".+\.cpp" | tee ./build/clang_tidy_output.txt +# Fail if any warning is reported +! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null +cd $SCRIPT_DIR diff --git a/tests/code_analysis/python_code_analysis.sh b/tests/code_analysis/python_code_analysis.sh new file mode 100755 index 000000000..500a873d1 --- /dev/null +++ b/tests/code_analysis/python_code_analysis.sh @@ -0,0 +1,27 @@ +#!/bin/bash +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$SCRIPT_DIR/../.." +BASE_BRANCH="origin/master" + +if [[ "$#" -gt 0 ]]; then + case "$1" in + --base-branch) + BASE_BRANCH=$2 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +fi + +cd $PROJECT_ROOT +CHANGED_FILES=$(git diff -U0 $BASE_BRANCH... --name-only --diff-filter=d) +for file in ${CHANGED_FILES}; do + echo ${file} + if [[ ${file} == *.py ]]; then + python3 -m black --check --diff ${file} + python3 -m isort --profile black --check-only --diff ${file} + fi +done +cd $SCRIPT_DIR diff --git a/tests/e2e/module_file_manager/module_file_manager.cpp b/tests/e2e/module_file_manager/module_file_manager.cpp index 20641b9d7..73508d81b 100644 --- a/tests/e2e/module_file_manager/module_file_manager.cpp +++ b/tests/e2e/module_file_manager/module_file_manager.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -126,7 +126,7 @@ std::filesystem::path CreateModuleFile(auto &client, const std::string_view file } std::string GetModuleFileQuery(const std::filesystem::path &path) { - return fmt::format("CALL mg.get_module_file({}) YIELD content", path); + return fmt::format("CALL mg.get_module_file('{}') YIELD content", path); } std::string GetModuleFile(auto &client, const std::filesystem::path &path) { @@ -141,7 +141,7 @@ std::string GetModuleFile(auto &client, const std::filesystem::path &path) { } std::string UpdateModuleFileQuery(const std::filesystem::path &path, const std::string_view content) { - return fmt::format("CALL mg.update_module_file({}, '{}')", path, content); + return fmt::format("CALL mg.update_module_file('{}', '{}')", path, content); } void UpdateModuleFile(auto &client, const std::filesystem::path &path, const std::string_view content) { @@ -151,7 +151,7 @@ void UpdateModuleFile(auto &client, const std::filesystem::path &path, const std } std::string DeleteModuleFileQuery(const std::filesystem::path &path) { - return fmt::format("CALL mg.delete_module_file({})", path); + return fmt::format("CALL mg.delete_module_file('{}')", path); } void DeleteModuleFile(auto &client, const std::filesystem::path &path) { diff --git a/tests/e2e/run.sh b/tests/e2e/run.sh index 88b70ae32..9ee4babe9 100755 --- a/tests/e2e/run.sh +++ b/tests/e2e/run.sh @@ -25,7 +25,7 @@ if [ "$#" -eq 0 ]; then # NOTE: If you want to run all tests under specific folder/section just # replace the dot (root directory below) with the folder name, e.g. # `--workloads-root-directory replication`. - python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build" + python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build/tests/e2e" elif [ "$#" -eq 1 ]; then if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then print_help @@ -34,7 +34,7 @@ elif [ "$#" -eq 1 ]; then # NOTE: --workload-name comes from each individual folder/section # workloads.yaml file. E.g. `streams/workloads.yaml` has a list of # `workloads:` and each workload has it's `-name`. - python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build" --workload-name "$1" + python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build/tests/e2e" --workload-name "$1" else print_help fi diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index ae022d4d8..282629b20 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -40,6 +40,9 @@ def load_args(): def load_workloads(root_directory): workloads = [] for file in Path(root_directory).rglob("*.yaml"): + # 8.03.2024. - Skip streams e2e tests + if str(file).endswith("/streams/workloads.yaml"): + continue with open(file, "r") as f: workloads.extend(yaml.load(f, Loader=yaml.FullLoader)["workloads"]) return workloads diff --git a/tests/e2e/streams/conftest.py b/tests/e2e/streams/conftest.py index 1bf3544c2..2dadfcb28 100644 --- a/tests/e2e/streams/conftest.py +++ b/tests/e2e/streams/conftest.py @@ -9,6 +9,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import pulsar import pytest from common import NAME, PULSAR_SERVICE_URL, connect, execute_and_fetch_all @@ -20,6 +21,9 @@ import requests # To run these test locally a running Kafka sever is necessery. The test tries # to connect on localhost:9092. +# KAFKA_HOSTNAME=os.getenv("KAFKA_HOSTNAME", "localhost") +# PULSAR_HOSTNAME=os.getenv("PULSAR_HOSTNAME", "localhost") +# PULSAR_PORT="6652" if PULSAR_HOSTNAME == "localhost" else "8080" @pytest.fixture(autouse=True) def connection(): diff --git a/tests/e2e/streams/kafka_streams_tests.py b/tests/e2e/streams/kafka_streams_tests.py index b988a6c26..17decdc0f 100755 --- a/tests/e2e/streams/kafka_streams_tests.py +++ b/tests/e2e/streams/kafka_streams_tests.py @@ -11,6 +11,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import sys import time from multiprocessing import Process @@ -23,7 +24,7 @@ from mg_utils import mg_sleep_and_assert TRANSFORMATIONS_TO_CHECK_C = ["c_transformations.empty_transformation"] TRANSFORMATIONS_TO_CHECK_PY = ["kafka_transform.simple", "kafka_transform.with_parameters"] KAFKA_PRODUCER_SENDING_MSG_DEFAULT_TIMEOUT = 60 - +# KAFKA_HOSTNAME=os.getenv("KAFKA_HOSTNAME", "localhost") @pytest.mark.parametrize("transformation", TRANSFORMATIONS_TO_CHECK_PY) def test_simple(kafka_producer, kafka_topics, connection, transformation): @@ -162,7 +163,7 @@ def test_show_streams(kafka_topics, connection): complex_values_stream = "complex_values" common.create_stream( - cursor, default_values_stream, kafka_topics[0], "kafka_transform.simple", bootstrap_servers="'localhost:29092'" + cursor, default_values_stream, kafka_topics[0], "kafka_transform.simple", bootstrap_servers="'localhost:29092'" ) common.create_stream( cursor, diff --git a/tests/e2e/streams/pulsar_streams_tests.py b/tests/e2e/streams/pulsar_streams_tests.py index cf52416cb..49aa773e6 100755 --- a/tests/e2e/streams/pulsar_streams_tests.py +++ b/tests/e2e/streams/pulsar_streams_tests.py @@ -11,6 +11,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import sys import time from multiprocessing import Process, Value @@ -20,6 +21,7 @@ import mgclient import pytest TRANSFORMATIONS_TO_CHECK = ["pulsar_transform.simple", "pulsar_transform.with_parameters"] +# PULSAR_HOSTNAME=os.getenv("PULSAR_HOSTNAME", "127.0.0.1") def check_vertex_exists_with_topic_and_payload(cursor, topic, payload_byte): diff --git a/tests/setup.sh b/tests/setup.sh index 7cab86db6..b91eea7c3 100755 --- a/tests/setup.sh +++ b/tests/setup.sh @@ -37,6 +37,7 @@ pip --timeout 1000 install "pulsar-client==3.1.0" for pkg in "${PIP_DEPS[@]}"; do pip --timeout 1000 install "$pkg" done +pip --timeout 1000 install "networkx==2.4" # Install mgclient from source becasue of full flexibility. pushd "$DIR/../libs/pymgclient" > /dev/null diff --git a/tests/unit/database_get_info.cpp b/tests/unit/database_get_info.cpp index be6885b37..2547378c6 100644 --- a/tests/unit/database_get_info.cpp +++ b/tests/unit/database_get_info.cpp @@ -197,8 +197,8 @@ TYPED_TEST(InfoTest, InfoCheck) { ASSERT_EQ(info.storage_info.vertex_count, 5); ASSERT_EQ(info.storage_info.edge_count, 2); ASSERT_EQ(info.storage_info.average_degree, 0.8); - ASSERT_GT(info.storage_info.memory_res, 10'000'000); // 250MB < > 10MB - ASSERT_LT(info.storage_info.memory_res, 250'000'000); + ASSERT_GT(info.storage_info.memory_res, 10'000'000); // 300MB < > 10MB + ASSERT_LT(info.storage_info.memory_res, 300'000'000); ASSERT_GT(info.storage_info.disk_usage, 100); // 1MB < > 100B ASSERT_LT(info.storage_info.disk_usage, 1000'000); ASSERT_EQ(info.storage_info.label_indices, 1); diff --git a/tools/github/clang-tidy/clang-tidy-diff.py b/tools/github/clang-tidy/clang-tidy-diff.py index 1bdf1da25..609b2eedb 100755 --- a/tools/github/clang-tidy/clang-tidy-diff.py +++ b/tools/github/clang-tidy/clang-tidy-diff.py @@ -250,7 +250,6 @@ def main(): common_clang_tidy_args.append("-extra-arg=%s" % arg) for arg in args.extra_arg_before: common_clang_tidy_args.append("-extra-arg-before=%s" % arg) - for name in lines_by_file: line_filter_json = json.dumps([{"name": name, "lines": lines_by_file[name]}], separators=(",", ":")) @@ -266,7 +265,6 @@ def main(): command.extend(common_clang_tidy_args) command.append(name) command.extend(clang_tidy_args) - task_queue.put(command) # Wait for all threads to be done. From 8bc8e867e48d9706b0519c4c370ed77811f75335 Mon Sep 17 00:00:00 2001 From: Gareth Andrew Lloyd Date: Thu, 14 Mar 2024 18:21:59 +0000 Subject: [PATCH 06/16] Pmr allocator unify (#1801) Query allocator and evaluation allocator were different. After analysis, was determined they should be the same, this will help future development reduce TypeValue copies during queries. Changes: - Common allocator, PoolResource backed by MonotonicResource - Optimized Pool, now O(1) alloc/dealloc as all chunks in Pool form a single free list - 2nd PoolResource, using bin sizing, not as perfect for memory usage but O(1) bin selection - Now have jemalloc's background thread to make sure decay and return to OS happens - Optimized ProperyValue to be faster at destruction/copy/move - Less temporary memory allocations - CSV reader now maintains a common line buffer it reuses on line reads - Writing out bolt values, now reuses a values buffer - Evaluating an int no longer makes temporary strings for errors it most likely never throws - ExpandVariable will reuse existing edge list in frame it one existed --- CMakeLists.txt | 13 ++ libs/setup.sh | 4 +- src/csv/include/csv/parsing.hpp | 4 +- src/csv/parsing.cpp | 37 +++- src/glue/SessionHL.cpp | 21 +- src/query/interpret/eval.cpp | 6 +- src/query/interpret/eval.hpp | 2 +- src/query/interpreter.cpp | 193 +++------------- src/query/interpreter.hpp | 88 +++++--- src/query/plan/operator.cpp | 50 +++-- src/query/plan/operator.hpp | 19 +- src/query/trigger.cpp | 23 +- src/query/trigger.hpp | 2 +- src/storage/v2/property_value.hpp | 225 +++++++++++++------ src/utils/memory.cpp | 317 ++++++++++++--------------- src/utils/memory.hpp | 298 +++++++++++++++++-------- src/utils/tag.hpp | 32 +++ tests/benchmark/query/execution.cpp | 4 +- tests/benchmark/skip_list_vs_stl.cpp | 14 +- tests/e2e/memory/workloads.yaml | 20 +- tests/mgbench/runners.py | 2 + tests/unit/property_value_v2.cpp | 4 +- tests/unit/utils_memory.cpp | 130 +---------- 23 files changed, 738 insertions(+), 770 deletions(-) create mode 100644 src/utils/tag.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 028406447..c02039497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -300,6 +300,19 @@ endif() option(ENABLE_JEMALLOC "Use jemalloc" ON) +option(MG_MEMORY_PROFILE "If build should be setup for memory profiling" OFF) +if (MG_MEMORY_PROFILE AND ENABLE_JEMALLOC) + message(STATUS "Jemalloc has been disabled because MG_MEMORY_PROFILE is enabled") + set(ENABLE_JEMALLOC OFF) +endif () +if (MG_MEMORY_PROFILE AND ASAN) + message(STATUS "ASAN has been disabled because MG_MEMORY_PROFILE is enabled") + set(ASAN OFF) +endif () +if (MG_MEMORY_PROFILE) + add_compile_definitions(MG_MEMORY_PROFILE) +endif () + if (ASAN) message(WARNING "Disabling jemalloc as it doesn't work well with ASAN") set(ENABLE_JEMALLOC OFF) diff --git a/libs/setup.sh b/libs/setup.sh index e1e1243af..e23c5efef 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -268,13 +268,13 @@ repo_clone_try_double "${primary_urls[jemalloc]}" "${secondary_urls[jemalloc]}" pushd jemalloc ./autogen.sh -MALLOC_CONF="retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" \ +MALLOC_CONF="background_thread:true,retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" \ ./configure \ --disable-cxx \ --with-lg-page=12 \ --with-lg-hugepage=21 \ --enable-shared=no --prefix=$working_dir \ - --with-malloc-conf="retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" + --with-malloc-conf="background_thread:true,retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" make -j$CPUS install popd diff --git a/src/csv/include/csv/parsing.hpp b/src/csv/include/csv/parsing.hpp index 66f2913c8..0accc616d 100644 --- a/src/csv/include/csv/parsing.hpp +++ b/src/csv/include/csv/parsing.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -119,6 +119,8 @@ class Reader { auto GetHeader() const -> Header const &; auto GetNextRow(utils::MemoryResource *mem) -> std::optional; + void Reset(); + private: // Some implementation issues that need clearing up, but this is mainly because // I don't want `boost/iostreams/filtering_stream.hpp` included in this header file diff --git a/src/csv/parsing.cpp b/src/csv/parsing.cpp index 6d03dc7fd..6961a42e4 100644 --- a/src/csv/parsing.cpp +++ b/src/csv/parsing.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -34,6 +34,10 @@ struct Reader::impl { [[nodiscard]] bool HasHeader() const { return read_config_.with_header; } [[nodiscard]] auto Header() const -> Header const & { return header_; } + void Reset() { + line_buffer_.clear(); + line_buffer_.shrink_to_fit(); + } auto GetNextRow(utils::MemoryResource *mem) -> std::optional; @@ -42,7 +46,7 @@ struct Reader::impl { void TryInitializeHeader(); - std::optional GetNextLine(utils::MemoryResource *mem); + bool GetNextLine(); ParsingResult ParseHeader(); @@ -55,6 +59,8 @@ struct Reader::impl { Config read_config_; uint64_t line_count_{1}; uint16_t number_of_columns_{0}; + uint64_t estimated_number_of_columns_{0}; + utils::pmr::string line_buffer_{memory_}; Reader::Header header_{memory_}; }; @@ -129,17 +135,16 @@ void Reader::impl::InitializeStream() { MG_ASSERT(csv_stream_.is_complete(), "Should be 'complete' for correct operation"); } -std::optional Reader::impl::GetNextLine(utils::MemoryResource *mem) { - utils::pmr::string line(mem); - if (!std::getline(csv_stream_, line)) { +bool Reader::impl::GetNextLine() { + if (!std::getline(csv_stream_, line_buffer_)) { // reached end of file or an I/0 error occurred if (!csv_stream_.good()) { csv_stream_.reset(); // this will close the file_stream_ and clear the chain } - return std::nullopt; + return false; } ++line_count_; - return std::move(line); + return true; } Reader::ParsingResult Reader::impl::ParseHeader() { @@ -170,6 +175,8 @@ void Reader::impl::TryInitializeHeader() { const Reader::Header &Reader::GetHeader() const { return pimpl->Header(); } +void Reader::Reset() { pimpl->Reset(); } + namespace { enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE }; @@ -179,6 +186,8 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { utils::pmr::vector row(mem); if (number_of_columns_ != 0) { row.reserve(number_of_columns_); + } else if (estimated_number_of_columns_ != 0) { + row.reserve(estimated_number_of_columns_); } utils::pmr::string column(memory_); @@ -186,13 +195,12 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { auto state = CsvParserState::INITIAL_FIELD; do { - const auto maybe_line = GetNextLine(mem); - if (!maybe_line) { + if (!GetNextLine()) { // The whole file was processed. break; } - std::string_view line_string_view = *maybe_line; + std::string_view line_string_view = line_buffer_; // remove '\r' from the end in case we have dos file format if (line_string_view.back() == '\r') { @@ -312,6 +320,11 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { fmt::format("Expected {:d} columns in row {:d}, but got {:d}", number_of_columns_, line_count_ - 1, row.size())); } + // To avoid unessisary dynamic growth of the row, remember the number of + // columns for future calls + if (number_of_columns_ == 0 && estimated_number_of_columns_ == 0) { + estimated_number_of_columns_ = row.size(); + } return std::move(row); } @@ -319,7 +332,7 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { std::optional Reader::impl::GetNextRow(utils::MemoryResource *mem) { auto row = ParseRow(mem); - if (row.HasError()) { + if (row.HasError()) [[unlikely]] { if (!read_config_.ignore_bad) { throw CsvReadException("CSV Reader: Bad row at line {:d}: {}", line_count_ - 1, row.GetError().message); } @@ -333,7 +346,7 @@ std::optional Reader::impl::GetNextRow(utils::MemoryResource *mem) } while (row.HasError()); } - if (row->empty()) { + if (row->empty()) [[unlikely]] { // reached end of file return std::nullopt; } diff --git a/src/glue/SessionHL.cpp b/src/glue/SessionHL.cpp index 6c901516c..6a48f15ca 100644 --- a/src/glue/SessionHL.cpp +++ b/src/glue/SessionHL.cpp @@ -59,12 +59,14 @@ class TypedValueResultStreamBase { public: explicit TypedValueResultStreamBase(memgraph::storage::Storage *storage); - std::vector DecodeValues( - const std::vector &values) const; + void DecodeValues(const std::vector &values); + + auto AccessValues() const -> std::vector const & { return decoded_values_; } protected: // NOTE: Needed only for ToBoltValue conversions memgraph::storage::Storage *storage_; + std::vector decoded_values_; }; /// Wrapper around TEncoder which converts TypedValue to Value @@ -75,16 +77,18 @@ class TypedValueResultStream : public TypedValueResultStreamBase { TypedValueResultStream(TEncoder *encoder, memgraph::storage::Storage *storage) : TypedValueResultStreamBase{storage}, encoder_(encoder) {} - void Result(const std::vector &values) { encoder_->MessageRecord(DecodeValues(values)); } + void Result(const std::vector &values) { + DecodeValues(values); + encoder_->MessageRecord(AccessValues()); + } private: TEncoder *encoder_; }; -std::vector TypedValueResultStreamBase::DecodeValues( - const std::vector &values) const { - std::vector decoded_values; - decoded_values.reserve(values.size()); +void TypedValueResultStreamBase::DecodeValues(const std::vector &values) { + decoded_values_.reserve(values.size()); + decoded_values_.clear(); for (const auto &v : values) { auto maybe_value = memgraph::glue::ToBoltValue(v, storage_, memgraph::storage::View::NEW); if (maybe_value.HasError()) { @@ -99,9 +103,8 @@ std::vector TypedValueResultStreamBase::De throw memgraph::communication::bolt::ClientError("Unexpected storage error when streaming results."); } } - decoded_values.emplace_back(std::move(*maybe_value)); + decoded_values_.emplace_back(std::move(*maybe_value)); } - return decoded_values; } TypedValueResultStreamBase::TypedValueResultStreamBase(memgraph::storage::Storage *storage) : storage_(storage) {} diff --git a/src/query/interpret/eval.cpp b/src/query/interpret/eval.cpp index 8bd308420..7c5d838a5 100644 --- a/src/query/interpret/eval.cpp +++ b/src/query/interpret/eval.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -13,12 +13,12 @@ namespace memgraph::query { -int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what) { +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, std::string_view what) { TypedValue value = expr->Accept(*evaluator); try { return value.ValueInt(); } catch (TypedValueException &e) { - throw QueryRuntimeException(what + " must be an int"); + throw QueryRuntimeException(std::string(what) + " must be an int"); } } diff --git a/src/query/interpret/eval.hpp b/src/query/interpret/eval.hpp index 2a9fb289f..07a71412c 100644 --- a/src/query/interpret/eval.hpp +++ b/src/query/interpret/eval.hpp @@ -1209,7 +1209,7 @@ class ExpressionEvaluator : public ExpressionVisitor { /// @param what - Name of what's getting evaluated. Used for user feedback (via /// exception) when the evaluated value is not an int. /// @throw QueryRuntimeException if expression doesn't evaluate to an int. -int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what); +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, std::string_view what); std::optional EvaluateMemoryLimit(ExpressionVisitor &eval, Expression *memory_limit, size_t memory_scale); diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index a5c81cc72..1322a7b99 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -246,27 +246,6 @@ std::optional GetOptionalStringValue(query::Expression *expression, return {}; }; -bool IsAllShortestPathsQuery(const std::vector &clauses) { - for (const auto &clause : clauses) { - if (clause->GetTypeInfo() != Match::kType) { - continue; - } - auto *match_clause = utils::Downcast(clause); - for (const auto &pattern : match_clause->patterns_) { - for (const auto &atom : pattern->atoms_) { - if (atom->GetTypeInfo() != EdgeAtom::kType) { - continue; - } - auto *edge_atom = utils::Downcast(atom); - if (edge_atom->type_ == EdgeAtom::Type::ALL_SHORTEST_PATHS) { - return true; - } - } - } - } - return false; -} - inline auto convertFromCoordinatorToReplicationMode(const CoordinatorQuery::SyncMode &sync_mode) -> replication_coordination_glue::ReplicationMode { switch (sync_mode) { @@ -1733,8 +1712,7 @@ struct PullPlan { std::shared_ptr user_or_role, std::atomic *transaction_status, std::shared_ptr tx_timer, TriggerContextCollector *trigger_context_collector = nullptr, - std::optional memory_limit = {}, bool use_monotonic_memory = true, - FrameChangeCollector *frame_change_collector_ = nullptr); + std::optional memory_limit = {}, FrameChangeCollector *frame_change_collector_ = nullptr); std::optional Pull(AnyStream *stream, std::optional n, const std::vector &output_symbols, @@ -1759,26 +1737,17 @@ struct PullPlan { // we have to keep track of any unsent results from previous `PullPlan::Pull` // manually by using this flag. bool has_unsent_results_ = false; - - // In the case of LOAD CSV, we want to use only PoolResource without MonotonicMemoryResource - // to reuse allocated memory. As LOAD CSV is processing row by row - // it is possible to reduce memory usage significantly if MemoryResource deals with memory allocation - // can reuse memory that was allocated on processing the first row on all subsequent rows. - // This flag signals to `PullPlan::Pull` which MemoryResource to use - bool use_monotonic_memory_; }; PullPlan::PullPlan(const std::shared_ptr plan, const Parameters ¶meters, const bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, std::shared_ptr user_or_role, std::atomic *transaction_status, std::shared_ptr tx_timer, TriggerContextCollector *trigger_context_collector, - const std::optional memory_limit, bool use_monotonic_memory, - FrameChangeCollector *frame_change_collector) + const std::optional memory_limit, FrameChangeCollector *frame_change_collector) : plan_(plan), cursor_(plan->plan().MakeCursor(execution_memory)), frame_(plan->symbol_table().max_position(), execution_memory), - memory_limit_(memory_limit), - use_monotonic_memory_(use_monotonic_memory) { + memory_limit_(memory_limit) { ctx_.db_accessor = dba; ctx_.symbol_table = plan->symbol_table(); ctx_.evaluation_context.timestamp = QueryTimestamp(); @@ -1804,6 +1773,7 @@ PullPlan::PullPlan(const std::shared_ptr plan, const Parameters &pa ctx_.is_profile_query = is_profile_query; ctx_.trigger_context_collector = trigger_context_collector; ctx_.frame_change_collector = frame_change_collector; + ctx_.evaluation_context.memory = execution_memory; } std::optional PullPlan::Pull(AnyStream *stream, std::optional n, @@ -1827,43 +1797,14 @@ std::optional PullPlan::Pull(AnyStream *strea } }}; - // Set up temporary memory for a single Pull. Initial memory comes from the - // stack. 256 KiB should fit on the stack and should be more than enough for a - // single `Pull`. - static constexpr size_t stack_size = 256UL * 1024UL; - char stack_data[stack_size]; - - utils::ResourceWithOutOfMemoryException resource_with_exception; - utils::MonotonicBufferResource monotonic_memory{&stack_data[0], stack_size, &resource_with_exception}; - std::optional pool_memory; - static constexpr auto kMaxBlockPerChunks = 128; - - if (!use_monotonic_memory_) { - pool_memory.emplace(kMaxBlockPerChunks, kExecutionPoolMaxBlockSize, &resource_with_exception, - &resource_with_exception); - } else { - // We can throw on every query because a simple queries for deleting will use only - // the stack allocated buffer. - // Also, we want to throw only when the query engine requests more memory and not the storage - // so we add the exception to the allocator. - // TODO (mferencevic): Tune the parameters accordingly. - pool_memory.emplace(kMaxBlockPerChunks, 1024, &monotonic_memory, &resource_with_exception); - } - - ctx_.evaluation_context.memory = &*pool_memory; - // Returns true if a result was pulled. const auto pull_result = [&]() -> bool { return cursor_->Pull(frame_, ctx_); }; - const auto stream_values = [&]() { - // TODO: The streamed values should also probably use the above memory. - std::vector values; - values.reserve(output_symbols.size()); - - for (const auto &symbol : output_symbols) { - values.emplace_back(frame_[symbol]); + auto values = std::vector(output_symbols.size()); + const auto stream_values = [&] { + for (auto const i : ranges::views::iota(0UL, output_symbols.size())) { + values[i] = frame_[output_symbols[i]]; } - stream->Result(values); }; @@ -1973,7 +1914,6 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper, std::function handler; if (query_upper == "BEGIN") { - ResetInterpreter(); // TODO: Evaluate doing move(extras). Currently the extras is very small, but this will be important if it ever // becomes large. handler = [this, extras = extras] { @@ -2051,30 +1991,6 @@ inline static void TryCaching(const AstStorage &ast_storage, FrameChangeCollecto } } -bool IsLoadCsvQuery(const std::vector &clauses) { - return std::any_of(clauses.begin(), clauses.end(), - [](memgraph::query::Clause const *clause) { return clause->GetTypeInfo() == LoadCsv::kType; }); -} - -bool IsCallBatchedProcedureQuery(const std::vector &clauses) { - EvaluationContext evaluation_context; - - return std::ranges::any_of(clauses, [&evaluation_context](memgraph::query::Clause *clause) -> bool { - if (!(clause->GetTypeInfo() == CallProcedure::kType)) return false; - auto *call_procedure_clause = utils::Downcast(clause); - - const auto &maybe_found = memgraph::query::procedure::FindProcedure( - procedure::gModuleRegistry, call_procedure_clause->procedure_name_, evaluation_context.memory); - if (!maybe_found) { - throw QueryRuntimeException("There is no procedure named '{}'.", call_procedure_clause->procedure_name_); - } - const auto &[module, proc] = *maybe_found; - if (!proc->info.is_batched) return false; - spdlog::trace("Using PoolResource for batched query procedure"); - return true; - }); -} - PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, CurrentDB ¤t_db, utils::MemoryResource *execution_memory, std::vector *notifications, @@ -2094,7 +2010,6 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::mapsingle_query_->clauses_; - bool contains_csv = false; if (std::any_of(clauses.begin(), clauses.end(), [](const auto *clause) { return clause->GetTypeInfo() == LoadCsv::kType; })) { notifications->emplace_back( @@ -2102,13 +2017,8 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map( plan, parsed_query.parameters, false, dba, interpreter_context, execution_memory, std::move(user_or_role), - transaction_status, std::move(tx_timer), trigger_context_collector, memory_limit, use_monotonic_memory, + transaction_status, std::move(tx_timer), trigger_context_collector, memory_limit, frame_change_collector->IsTrackingValues() ? frame_change_collector : nullptr); return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges), [pull_plan = std::move(pull_plan), output_symbols = std::move(output_symbols), summary]( @@ -2261,18 +2171,6 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra auto *cypher_query = utils::Downcast(parsed_inner_query.query); - bool contains_csv = false; - auto clauses = cypher_query->single_query_->clauses_; - if (std::any_of(clauses.begin(), clauses.end(), - [](const auto *clause) { return clause->GetTypeInfo() == LoadCsv::kType; })) { - contains_csv = true; - } - - // If this is LOAD CSV, BatchedProcedure or AllShortest query, use PoolResource without MonotonicMemoryResource as we - // want to reuse allocated memory - auto use_monotonic_memory = - !contains_csv && !IsCallBatchedProcedureQuery(clauses) && !IsAllShortestPathsQuery(clauses); - MG_ASSERT(cypher_query, "Cypher grammar should not allow other queries in PROFILE"); EvaluationContext evaluation_context; evaluation_context.timestamp = QueryTimestamp(); @@ -2306,14 +2204,14 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra // We want to execute the query we are profiling lazily, so we delay // the construction of the corresponding context. stats_and_total_time = std::optional{}, - pull_plan = std::shared_ptr(nullptr), transaction_status, use_monotonic_memory, - frame_change_collector, tx_timer = std::move(tx_timer)]( - AnyStream *stream, std::optional n) mutable -> std::optional { + pull_plan = std::shared_ptr(nullptr), transaction_status, frame_change_collector, + tx_timer = std::move(tx_timer)](AnyStream *stream, + std::optional n) mutable -> std::optional { // No output symbols are given so that nothing is streamed. if (!stats_and_total_time) { stats_and_total_time = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory, std::move(user_or_role), - transaction_status, std::move(tx_timer), nullptr, memory_limit, use_monotonic_memory, + transaction_status, std::move(tx_timer), nullptr, memory_limit, frame_change_collector->IsTrackingValues() ? frame_change_collector : nullptr) .Pull(stream, {}, {}, summary); pull_plan = std::make_shared(ProfilingStatsToTable(*stats_and_total_time)); @@ -4276,6 +4174,7 @@ PreparedQuery PrepareShowDatabasesQuery(ParsedQuery parsed_query, InterpreterCon std::optional Interpreter::GetTransactionId() const { return current_transaction_; } void Interpreter::BeginTransaction(QueryExtras const &extras) { + ResetInterpreter(); const auto prepared_query = PrepareTransactionQuery("BEGIN", extras); prepared_query.query_handler(nullptr, {}); } @@ -4310,12 +4209,12 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, const auto upper_case_query = utils::ToUpperCase(query_string); const auto trimmed_query = utils::Trim(upper_case_query); if (trimmed_query == "BEGIN" || trimmed_query == "COMMIT" || trimmed_query == "ROLLBACK") { - auto resource = utils::MonotonicBufferResource(kExecutionMemoryBlockSize); - auto prepared_query = PrepareTransactionQuery(trimmed_query, extras); - auto &query_execution = - query_executions_.emplace_back(QueryExecution::Create(std::move(resource), std::move(prepared_query))); - std::optional qid = - in_explicit_transaction_ ? static_cast(query_executions_.size() - 1) : std::optional{}; + if (trimmed_query == "BEGIN") { + ResetInterpreter(); + } + auto &query_execution = query_executions_.emplace_back(QueryExecution::Create()); + query_execution->prepared_query = PrepareTransactionQuery(trimmed_query, extras); + auto qid = in_explicit_transaction_ ? static_cast(query_executions_.size() - 1) : std::optional{}; return {query_execution->prepared_query->header, query_execution->prepared_query->privileges, qid, {}}; } @@ -4345,35 +4244,8 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, ParseQuery(query_string, params, &interpreter_context_->ast_cache, interpreter_context_->config.query); auto parsing_time = parsing_timer.Elapsed().count(); - CypherQuery const *const cypher_query = [&]() -> CypherQuery * { - if (auto *cypher_query = utils::Downcast(parsed_query.query)) { - return cypher_query; - } - if (auto *profile_query = utils::Downcast(parsed_query.query)) { - return profile_query->cypher_query_; - } - return nullptr; - }(); // IILE - - auto const [usePool, hasAllShortestPaths] = [&]() -> std::pair { - if (!cypher_query) { - return {false, false}; - } - auto const &clauses = cypher_query->single_query_->clauses_; - bool hasAllShortestPaths = IsAllShortestPathsQuery(clauses); - // Using PoolResource without MonotonicMemoryResouce for LOAD CSV reduces memory usage. - bool usePool = hasAllShortestPaths || IsCallBatchedProcedureQuery(clauses) || IsLoadCsvQuery(clauses); - return {usePool, hasAllShortestPaths}; - }(); // IILE - // Setup QueryExecution - // its MemoryResource is mostly used for allocations done on Frame and storing `row`s - if (usePool) { - query_executions_.emplace_back(QueryExecution::Create(utils::PoolResource(128, kExecutionPoolMaxBlockSize))); - } else { - query_executions_.emplace_back(QueryExecution::Create(utils::MonotonicBufferResource(kExecutionMemoryBlockSize))); - } - + query_executions_.emplace_back(QueryExecution::Create()); auto &query_execution = query_executions_.back(); query_execution_ptr = &query_execution; @@ -4442,9 +4314,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, utils::Timer planning_timer; PreparedQuery prepared_query; - utils::MemoryResource *memory_resource = - std::visit([](auto &execution_memory) -> utils::MemoryResource * { return &execution_memory; }, - query_execution->execution_memory); + utils::MemoryResource *memory_resource = query_execution->execution_memory.resource(); frame_change_collector_.reset(); frame_change_collector_.emplace(); if (utils::Downcast(parsed_query.query)) { @@ -4455,10 +4325,10 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, prepared_query = PrepareExplainQuery(std::move(parsed_query), &query_execution->summary, &query_execution->notifications, interpreter_context_, current_db_); } else if (utils::Downcast(parsed_query.query)) { - prepared_query = PrepareProfileQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->summary, - &query_execution->notifications, interpreter_context_, current_db_, - &query_execution->execution_memory_with_exception, user_or_role_, - &transaction_status_, current_timeout_timer_, &*frame_change_collector_); + prepared_query = + PrepareProfileQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->summary, + &query_execution->notifications, interpreter_context_, current_db_, memory_resource, + user_or_role_, &transaction_status_, current_timeout_timer_, &*frame_change_collector_); } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareDumpQuery(std::move(parsed_query), current_db_); } else if (utils::Downcast(parsed_query.query)) { @@ -4660,7 +4530,7 @@ void RunTriggersAfterCommit(dbms::DatabaseAccess db_acc, InterpreterContext *int std::atomic *transaction_status) { // Run the triggers for (const auto &trigger : db_acc->trigger_store()->AfterCommitTriggers().access()) { - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + QueryAllocator execution_memory{}; // create a new transaction for each trigger auto tx_acc = db_acc->Access(); @@ -4671,7 +4541,7 @@ void RunTriggersAfterCommit(dbms::DatabaseAccess db_acc, InterpreterContext *int auto trigger_context = original_trigger_context; trigger_context.AdaptForAccessor(&db_accessor); try { - trigger.Execute(&db_accessor, &execution_memory, flags::run_time::GetExecutionTimeout(), + trigger.Execute(&db_accessor, execution_memory.resource(), flags::run_time::GetExecutionTimeout(), &interpreter_context->is_shutting_down, transaction_status, trigger_context); } catch (const utils::BasicException &exception) { spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.Name(), exception.what()); @@ -4825,11 +4695,12 @@ void Interpreter::Commit() { if (trigger_context) { // Run the triggers for (const auto &trigger : db->trigger_store()->BeforeCommitTriggers().access()) { - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + QueryAllocator execution_memory{}; AdvanceCommand(); try { - trigger.Execute(&*current_db_.execution_db_accessor_, &execution_memory, flags::run_time::GetExecutionTimeout(), - &interpreter_context_->is_shutting_down, &transaction_status_, *trigger_context); + trigger.Execute(&*current_db_.execution_db_accessor_, execution_memory.resource(), + flags::run_time::GetExecutionTimeout(), &interpreter_context_->is_shutting_down, + &transaction_status_, *trigger_context); } catch (const utils::BasicException &e) { throw utils::BasicException( fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.Name(), e.what())); diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index f18bd6721..5366b4472 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -65,6 +65,54 @@ extern const Event SuccessfulQuery; namespace memgraph::query { +struct QueryAllocator { + QueryAllocator() = default; + QueryAllocator(QueryAllocator const &) = delete; + QueryAllocator &operator=(QueryAllocator const &) = delete; + + // No move addresses to pool & monotonic fields must be stable + QueryAllocator(QueryAllocator &&) = delete; + QueryAllocator &operator=(QueryAllocator &&) = delete; + + auto resource() -> utils::MemoryResource * { +#ifndef MG_MEMORY_PROFILE + return &pool; +#else + return upstream_resource(); +#endif + } + auto resource_without_pool() -> utils::MemoryResource * { +#ifndef MG_MEMORY_PROFILE + return &monotonic; +#else + return upstream_resource(); +#endif + } + auto resource_without_pool_or_mono() -> utils::MemoryResource * { return upstream_resource(); } + + private: + // At least one page to ensure not sharing page with other subsystems + static constexpr auto kMonotonicInitialSize = 4UL * 1024UL; + // TODO: need to profile to check for good defaults, also maybe PoolResource + // needs to be smarter. We expect more reuse of smaller objects than larger + // objects. 64*1024B is maybe wasteful, whereas 256*32B maybe sensible. + // Depends on number of small objects expected. + static constexpr auto kPoolBlockPerChunk = 64UL; + static constexpr auto kPoolMaxBlockSize = 1024UL; + + static auto upstream_resource() -> utils::MemoryResource * { + // singleton ResourceWithOutOfMemoryException + // explicitly backed by NewDeleteResource + static auto upstream = utils::ResourceWithOutOfMemoryException{utils::NewDeleteResource()}; + return &upstream; + } + +#ifndef MG_MEMORY_PROFILE + memgraph::utils::MonotonicBufferResource monotonic{kMonotonicInitialSize, upstream_resource()}; + memgraph::utils::PoolResource pool{kPoolBlockPerChunk, &monotonic, upstream_resource()}; +#endif +}; + struct InterpreterContext; inline constexpr size_t kExecutionMemoryBlockSize = 1UL * 1024UL * 1024UL; @@ -304,45 +352,25 @@ class Interpreter final { } struct QueryExecution { - std::variant execution_memory; - utils::ResourceWithOutOfMemoryException execution_memory_with_exception; - std::optional prepared_query; + QueryAllocator execution_memory; // NOTE: before all other fields which uses this memory + std::optional prepared_query; std::map summary; std::vector notifications; - static auto Create(std::variant memory_resource, - std::optional prepared_query = std::nullopt) -> std::unique_ptr { - return std::make_unique(std::move(memory_resource), std::move(prepared_query)); - } + static auto Create() -> std::unique_ptr { return std::make_unique(); } - explicit QueryExecution(std::variant memory_resource, - std::optional prepared_query) - : execution_memory(std::move(memory_resource)), prepared_query{std::move(prepared_query)} { - std::visit( - [&](auto &memory_resource) { - execution_memory_with_exception = utils::ResourceWithOutOfMemoryException(&memory_resource); - }, - execution_memory); - }; + explicit QueryExecution() = default; QueryExecution(const QueryExecution &) = delete; - QueryExecution(QueryExecution &&) = default; + QueryExecution(QueryExecution &&) = delete; QueryExecution &operator=(const QueryExecution &) = delete; - QueryExecution &operator=(QueryExecution &&) = default; + QueryExecution &operator=(QueryExecution &&) = delete; - ~QueryExecution() { - // We should always release the execution memory AFTER we - // destroy the prepared query which is using that instance - // of execution memory. - prepared_query.reset(); - std::visit([](auto &memory_resource) { memory_resource.Release(); }, execution_memory); - } + ~QueryExecution() = default; void CleanRuntimeData() { - if (prepared_query.has_value()) { - prepared_query.reset(); - } + prepared_query.reset(); notifications.clear(); } }; @@ -413,9 +441,7 @@ std::map Interpreter::Pull(TStream *result_stream, std: try { // Wrap the (statically polymorphic) stream type into a common type which // the handler knows. - AnyStream stream{result_stream, - std::visit([](auto &execution_memory) -> utils::MemoryResource * { return &execution_memory; }, - query_execution->execution_memory)}; + AnyStream stream{result_stream, query_execution->execution_memory.resource()}; const auto maybe_res = query_execution->prepared_query->query_handler(&stream, n); // Stream is using execution memory of the query_execution which // can be deleted after its execution so the stream should be cleared diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 29f64f950..8e1b9f529 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -69,6 +69,7 @@ #include "utils/pmr/vector.hpp" #include "utils/readable_size.hpp" #include "utils/string.hpp" +#include "utils/tag.hpp" #include "utils/temporal.hpp" #include "utils/typeinfo.hpp" @@ -864,17 +865,15 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { SCOPED_PROFILE_OP_BY_REF(self_); // A helper function for expanding a node from an edge. - auto pull_node = [this, &frame](const EdgeAccessor &new_edge, EdgeAtom::Direction direction) { + auto pull_node = [this, &frame](const EdgeAccessor &new_edge, + utils::tag_value) { if (self_.common_.existing_node) return; - switch (direction) { - case EdgeAtom::Direction::IN: - frame[self_.common_.node_symbol] = new_edge.From(); - break; - case EdgeAtom::Direction::OUT: - frame[self_.common_.node_symbol] = new_edge.To(); - break; - case EdgeAtom::Direction::BOTH: - LOG_FATAL("Must indicate exact expansion direction here"); + if constexpr (direction == EdgeAtom::Direction::IN) { + frame[self_.common_.node_symbol] = new_edge.From(); + } else if constexpr (direction == EdgeAtom::Direction::OUT) { + frame[self_.common_.node_symbol] = new_edge.To(); + } else { + LOG_FATAL("Must indicate exact expansion direction here"); } }; @@ -893,7 +892,7 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { #endif frame[self_.common_.edge_symbol] = edge; - pull_node(edge, EdgeAtom::Direction::IN); + pull_node(edge, utils::tag_v); return true; } @@ -913,7 +912,7 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { } #endif frame[self_.common_.edge_symbol] = edge; - pull_node(edge, EdgeAtom::Direction::OUT); + pull_node(edge, utils::tag_v); return true; } @@ -1007,12 +1006,12 @@ bool Expand::ExpandCursor::InitEdges(Frame &frame, ExecutionContext &context) { auto existing_node = *expansion_info_.existing_node; auto edges_result = UnwrapEdgesResult(vertex.InEdges(self_.view_, self_.common_.edge_types, existing_node)); - in_edges_.emplace(edges_result.edges); + in_edges_.emplace(std::move(edges_result.edges)); num_expanded_first = edges_result.expanded_count; } } else { auto edges_result = UnwrapEdgesResult(vertex.InEdges(self_.view_, self_.common_.edge_types)); - in_edges_.emplace(edges_result.edges); + in_edges_.emplace(std::move(edges_result.edges)); num_expanded_first = edges_result.expanded_count; } if (in_edges_) { @@ -1026,12 +1025,12 @@ bool Expand::ExpandCursor::InitEdges(Frame &frame, ExecutionContext &context) { if (expansion_info_.existing_node) { auto existing_node = *expansion_info_.existing_node; auto edges_result = UnwrapEdgesResult(vertex.OutEdges(self_.view_, self_.common_.edge_types, existing_node)); - out_edges_.emplace(edges_result.edges); + out_edges_.emplace(std::move(edges_result.edges)); num_expanded_second = edges_result.expanded_count; } } else { auto edges_result = UnwrapEdgesResult(vertex.OutEdges(self_.view_, self_.common_.edge_types)); - out_edges_.emplace(edges_result.edges); + out_edges_.emplace(std::move(edges_result.edges)); num_expanded_second = edges_result.expanded_count; } if (out_edges_) { @@ -1117,14 +1116,14 @@ auto ExpandFromVertex(const VertexAccessor &vertex, EdgeAtom::Direction directio if (direction != EdgeAtom::Direction::OUT) { auto edges = UnwrapEdgesResult(vertex.InEdges(view, edge_types)).edges; - if (edges.begin() != edges.end()) { + if (!edges.empty()) { chain_elements.emplace_back(wrapper(EdgeAtom::Direction::IN, std::move(edges))); } } if (direction != EdgeAtom::Direction::IN) { auto edges = UnwrapEdgesResult(vertex.OutEdges(view, edge_types)).edges; - if (edges.begin() != edges.end()) { + if (!edges.empty()) { chain_elements.emplace_back(wrapper(EdgeAtom::Direction::OUT, std::move(edges))); } } @@ -1244,8 +1243,13 @@ class ExpandVariableCursor : public Cursor { } // reset the frame value to an empty edge list - auto *pull_memory = context.evaluation_context.memory; - frame[self_.common_.edge_symbol] = TypedValue::TVector(pull_memory); + if (frame[self_.common_.edge_symbol].IsList()) { + // Preserve the list capacity if possible + frame[self_.common_.edge_symbol].ValueList().clear(); + } else { + auto *pull_memory = context.evaluation_context.memory; + frame[self_.common_.edge_symbol] = TypedValue::TVector(pull_memory); + } return true; } @@ -4474,9 +4478,8 @@ class UnwindCursor : public Cursor { TypedValue input_value = self_.input_expression_->Accept(evaluator); if (input_value.type() != TypedValue::Type::List) throw QueryRuntimeException("Argument of UNWIND must be a list, but '{}' was provided.", input_value.type()); - // Copy the evaluted input_value_list to our vector. - // eval memory != query memory - input_value_ = input_value.ValueList(); + // Move the evaluted input_value_list to our vector. + input_value_ = std::move(input_value.ValueList()); input_value_it_ = input_value_.begin(); } @@ -5336,6 +5339,7 @@ class LoadCsvCursor : public Cursor { "1"); } did_pull_ = true; + reader_->Reset(); } auto row = reader_->GetNextRow(context.evaluation_context.memory); diff --git a/src/query/plan/operator.hpp b/src/query/plan/operator.hpp index 5a8ef0625..e502fbadd 100644 --- a/src/query/plan/operator.hpp +++ b/src/query/plan/operator.hpp @@ -76,18 +76,13 @@ using UniqueCursorPtr = std::unique_ptr>; template std::unique_ptr> MakeUniqueCursorPtr(utils::Allocator allocator, TArgs &&...args) { - auto *ptr = allocator.allocate(1); - try { - auto *cursor = new (ptr) TCursor(std::forward(args)...); - return std::unique_ptr>(cursor, [allocator](Cursor *base_ptr) mutable { - auto *p = static_cast(base_ptr); - p->~TCursor(); - allocator.deallocate(p, 1); - }); - } catch (...) { - allocator.deallocate(ptr, 1); - throw; - } + auto *cursor = allocator.template new_object(std::forward(args)...); + auto dtr = [allocator](Cursor *base_ptr) mutable { + auto *p = static_cast(base_ptr); + allocator.delete_object(p); + }; + // TODO: not std::function + return std::unique_ptr>(cursor, std::move(dtr)); } class Once; diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 437389128..151a33dad 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -191,9 +191,9 @@ std::shared_ptr Trigger::GetPlan(DbAccessor *db_accessor) return trigger_plan_; } -void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, - const double max_execution_time_sec, std::atomic *is_shutting_down, - std::atomic *transaction_status, const TriggerContext &context) const { +void Trigger::Execute(DbAccessor *dba, utils::MemoryResource *execution_memory, const double max_execution_time_sec, + std::atomic *is_shutting_down, std::atomic *transaction_status, + const TriggerContext &context) const { if (!context.ShouldEventTrigger(event_type_)) { return; } @@ -214,22 +214,7 @@ void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution ctx.is_shutting_down = is_shutting_down; ctx.transaction_status = transaction_status; ctx.is_profile_query = false; - - // Set up temporary memory for a single Pull. Initial memory comes from the - // stack. 256 KiB should fit on the stack and should be more than enough for a - // single `Pull`. - static constexpr size_t stack_size = 256UL * 1024UL; - char stack_data[stack_size]; - - // We can throw on every query because a simple queries for deleting will use only - // the stack allocated buffer. - // Also, we want to throw only when the query engine requests more memory and not the storage - // so we add the exception to the allocator. - utils::ResourceWithOutOfMemoryException resource_with_exception; - utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); - // TODO (mferencevic): Tune the parameters accordingly. - utils::PoolResource pool_memory(128, 1024, &monotonic_memory); - ctx.evaluation_context.memory = &pool_memory; + ctx.evaluation_context.memory = execution_memory; auto cursor = plan.plan().MakeCursor(execution_memory); Frame frame{plan.symbol_table().max_position(), execution_memory}; diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index 91c74579e..24bbf50ee 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -39,7 +39,7 @@ struct Trigger { utils::SkipList *query_cache, DbAccessor *db_accessor, const InterpreterConfig::Query &query_config, std::shared_ptr owner); - void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double max_execution_time_sec, + void Execute(DbAccessor *dba, utils::MemoryResource *execution_memory, double max_execution_time_sec, std::atomic *is_shutting_down, std::atomic *transaction_status, const TriggerContext &context) const; diff --git a/src/storage/v2/property_value.hpp b/src/storage/v2/property_value.hpp index e48be008a..161ad151a 100644 --- a/src/storage/v2/property_value.hpp +++ b/src/storage/v2/property_value.hpp @@ -92,7 +92,28 @@ class PropertyValue { // TODO: Implement copy assignment operators for primitive types. // TODO: Implement copy and move assignment operators for non-primitive types. - ~PropertyValue() { DestroyValue(); } + ~PropertyValue() { + switch (type_) { + // destructor for primitive types does nothing + case Type::Null: + case Type::Bool: + case Type::Int: + case Type::Double: + case Type::TemporalData: + return; + + // destructor for non primitive types since we used placement new + case Type::String: + std::destroy_at(&string_v.val_); + return; + case Type::List: + std::destroy_at(&list_v.val_); + return; + case Type::Map: + std::destroy_at(&map_v.val_); + return; + } + } Type type() const { return type_; } @@ -189,8 +210,6 @@ class PropertyValue { } private: - void DestroyValue() noexcept; - // NOTE: this may look strange but it is for better data layout // https://eel.is/c++draft/class.union#general-note-1 union { @@ -357,13 +376,13 @@ inline PropertyValue::PropertyValue(const PropertyValue &other) : type_(other.ty this->double_v.val_ = other.double_v.val_; return; case Type::String: - new (&string_v.val_) std::string(other.string_v.val_); + std::construct_at(&string_v.val_, other.string_v.val_); return; case Type::List: - new (&list_v.val_) std::vector(other.list_v.val_); + std::construct_at(&list_v.val_, other.list_v.val_); return; case Type::Map: - new (&map_v.val_) std::map(other.map_v.val_); + std::construct_at(&map_v.val_, other.map_v.val_); return; case Type::TemporalData: this->temporal_data_v.val_ = other.temporal_data_v.val_; @@ -371,7 +390,7 @@ inline PropertyValue::PropertyValue(const PropertyValue &other) : type_(other.ty } } -inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std::exchange(other.type_, Type::Null)) { +inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(other.type_) { switch (type_) { case Type::Null: break; @@ -386,15 +405,12 @@ inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std: break; case Type::String: std::construct_at(&string_v.val_, std::move(other.string_v.val_)); - std::destroy_at(&other.string_v.val_); break; case Type::List: std::construct_at(&list_v.val_, std::move(other.list_v.val_)); - std::destroy_at(&other.list_v.val_); break; case Type::Map: std::construct_at(&map_v.val_, std::move(other.map_v.val_)); - std::destroy_at(&other.map_v.val_); break; case Type::TemporalData: temporal_data_v.val_ = other.temporal_data_v.val_; @@ -403,38 +419,88 @@ inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std: } inline PropertyValue &PropertyValue::operator=(const PropertyValue &other) { - if (this == &other) return *this; + if (type_ == other.type_) { + if (this == &other) return *this; + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + int_v.val_ = other.int_v.val_; + break; + case Type::Double: + double_v.val_ = other.double_v.val_; + break; + case Type::String: + string_v.val_ = other.string_v.val_; + break; + case Type::List: + list_v.val_ = other.list_v.val_; + break; + case Type::Map: + map_v.val_ = other.map_v.val_; + break; + case Type::TemporalData: + temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } + return *this; + } else { + // destroy + switch (type_) { + case Type::Null: + break; + case Type::Bool: + break; + case Type::Int: + break; + case Type::Double: + break; + case Type::String: + std::destroy_at(&string_v.val_); + break; + case Type::List: + std::destroy_at(&list_v.val_); + break; + case Type::Map: + std::destroy_at(&map_v.val_); + break; + case Type::TemporalData: + break; + } + // construct + auto *new_this = std::launder(this); + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + new_this->bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + new_this->int_v.val_ = other.int_v.val_; + break; + case Type::Double: + new_this->double_v.val_ = other.double_v.val_; + break; + case Type::String: + std::construct_at(&new_this->string_v.val_, other.string_v.val_); + break; + case Type::List: + std::construct_at(&new_this->list_v.val_, other.list_v.val_); + break; + case Type::Map: + std::construct_at(&new_this->map_v.val_, other.map_v.val_); + break; + case Type::TemporalData: + new_this->temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } - DestroyValue(); - type_ = other.type_; - - switch (other.type_) { - case Type::Null: - break; - case Type::Bool: - this->bool_v.val_ = other.bool_v.val_; - break; - case Type::Int: - this->int_v.val_ = other.int_v.val_; - break; - case Type::Double: - this->double_v.val_ = other.double_v.val_; - break; - case Type::String: - new (&string_v.val_) std::string(other.string_v.val_); - break; - case Type::List: - new (&list_v.val_) std::vector(other.list_v.val_); - break; - case Type::Map: - new (&map_v.val_) std::map(other.map_v.val_); - break; - case Type::TemporalData: - this->temporal_data_v.val_ = other.temporal_data_v.val_; - break; + new_this->type_ = other.type_; + return *new_this; } - - return *this; } inline PropertyValue &PropertyValue::operator=(PropertyValue &&other) noexcept { @@ -456,48 +522,71 @@ inline PropertyValue &PropertyValue::operator=(PropertyValue &&other) noexcept { break; case Type::String: string_v.val_ = std::move(other.string_v.val_); - std::destroy_at(&other.string_v.val_); break; case Type::List: list_v.val_ = std::move(other.list_v.val_); - std::destroy_at(&other.list_v.val_); break; case Type::Map: map_v.val_ = std::move(other.map_v.val_); - std::destroy_at(&other.map_v.val_); break; case Type::TemporalData: temporal_data_v.val_ = other.temporal_data_v.val_; break; } - other.type_ = Type::Null; return *this; } else { - std::destroy_at(this); - return *std::construct_at(std::launder(this), std::move(other)); - } -} + // destroy + switch (type_) { + case Type::Null: + break; + case Type::Bool: + break; + case Type::Int: + break; + case Type::Double: + break; + case Type::String: + std::destroy_at(&string_v.val_); + break; + case Type::List: + std::destroy_at(&list_v.val_); + break; + case Type::Map: + std::destroy_at(&map_v.val_); + break; + case Type::TemporalData: + break; + } + // construct (no need to destroy moved from type) + auto *new_this = std::launder(this); + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + new_this->bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + new_this->int_v.val_ = other.int_v.val_; + break; + case Type::Double: + new_this->double_v.val_ = other.double_v.val_; + break; + case Type::String: + std::construct_at(&new_this->string_v.val_, std::move(other.string_v.val_)); + break; + case Type::List: + std::construct_at(&new_this->list_v.val_, std::move(other.list_v.val_)); + break; + case Type::Map: + std::construct_at(&new_this->map_v.val_, std::move(other.map_v.val_)); + break; + case Type::TemporalData: + new_this->temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } -inline void PropertyValue::DestroyValue() noexcept { - switch (std::exchange(type_, Type::Null)) { - // destructor for primitive types does nothing - case Type::Null: - case Type::Bool: - case Type::Int: - case Type::Double: - case Type::TemporalData: - return; - - // destructor for non primitive types since we used placement new - case Type::String: - std::destroy_at(&string_v.val_); - return; - case Type::List: - std::destroy_at(&list_v.val_); - return; - case Type::Map: - std::destroy_at(&map_v.val_); - return; + new_this->type_ = other.type_; + return *new_this; } } diff --git a/src/utils/memory.cpp b/src/utils/memory.cpp index d09f70fc3..6b1f26c11 100644 --- a/src/utils/memory.cpp +++ b/src/utils/memory.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -150,128 +150,133 @@ void *MonotonicBufferResource::DoAllocate(size_t bytes, size_t alignment) { namespace impl { -Pool::Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *memory) - : blocks_per_chunk_(blocks_per_chunk), block_size_(block_size), chunks_(memory) {} - -Pool::~Pool() { MG_ASSERT(chunks_.empty(), "You need to call Release before destruction!"); } - -void *Pool::Allocate() { - auto allocate_block_from_chunk = [this](Chunk *chunk) { - unsigned char *available_block = chunk->data + (chunk->first_available_block_ix * block_size_); - // Update free-list pointer (index in our case) by reading "next" from the - // available_block. - chunk->first_available_block_ix = *available_block; - --chunk->blocks_available; - return available_block; - }; - if (last_alloc_chunk_ && last_alloc_chunk_->blocks_available > 0U) - return allocate_block_from_chunk(last_alloc_chunk_); - // Find a Chunk with available memory. - for (auto &chunk : chunks_) { - if (chunk.blocks_available > 0U) { - last_alloc_chunk_ = &chunk; - return allocate_block_from_chunk(last_alloc_chunk_); - } - } - // We haven't found a Chunk with available memory, so allocate a new one. - if (block_size_ > std::numeric_limits::max() / blocks_per_chunk_) throw BadAlloc("Allocation size overflow"); - size_t data_size = blocks_per_chunk_ * block_size_; +Pool::Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *chunk_memory) + : blocks_per_chunk_(blocks_per_chunk), block_size_(block_size), chunks_(chunk_memory) { // Use the next pow2 of block_size_ as alignment, so that we cover alignment // requests between 1 and block_size_. Users of this class should make sure // that requested alignment of particular blocks is never greater than the // block itself. - size_t alignment = Ceil2(block_size_); - if (alignment < block_size_) throw BadAlloc("Allocation alignment overflow"); - auto *data = reinterpret_cast(GetUpstreamResource()->Allocate(data_size, alignment)); - // Form a free-list of blocks in data. - for (unsigned char i = 0U; i < blocks_per_chunk_; ++i) { - *(data + (i * block_size_)) = i + 1U; - } - Chunk chunk{data, 0, blocks_per_chunk_}; - // Insert the big block in the sorted position. - auto it = std::lower_bound(chunks_.begin(), chunks_.end(), chunk, - [](const auto &a, const auto &b) { return a.data < b.data; }); - try { - it = chunks_.insert(it, chunk); - } catch (...) { - GetUpstreamResource()->Deallocate(data, data_size, alignment); - throw; - } + if (block_size_ > std::numeric_limits::max() / blocks_per_chunk_) throw BadAlloc("Allocation size overflow"); +} - last_alloc_chunk_ = &*it; - last_dealloc_chunk_ = &*it; - return allocate_block_from_chunk(last_alloc_chunk_); +Pool::~Pool() { + if (!chunks_.empty()) { + auto *resource = GetUpstreamResource(); + auto const dataSize = blocks_per_chunk_ * block_size_; + auto const alignment = Ceil2(block_size_); + for (auto &chunk : chunks_) { + resource->Deallocate(chunk.raw_data, dataSize, alignment); + } + chunks_.clear(); + } + free_list_ = nullptr; +} + +void *Pool::Allocate() { + if (!free_list_) [[unlikely]] { + // need new chunk + auto const data_size = blocks_per_chunk_ * block_size_; + auto const alignment = Ceil2(block_size_); + auto *resource = GetUpstreamResource(); + auto *data = reinterpret_cast(resource->Allocate(data_size, alignment)); + try { + auto &new_chunk = chunks_.emplace_front(data); + free_list_ = new_chunk.build_freelist(block_size_, blocks_per_chunk_); + } catch (...) { + resource->Deallocate(data, data_size, alignment); + throw; + } + } + return std::exchange(free_list_, *reinterpret_cast(free_list_)); } void Pool::Deallocate(void *p) { - MG_ASSERT(last_dealloc_chunk_, "No chunk to deallocate"); - MG_ASSERT(!chunks_.empty(), - "Expected a call to Deallocate after at least a " - "single Allocate has been done."); - auto is_in_chunk = [this, p](const Chunk &chunk) { - auto ptr = reinterpret_cast(p); - size_t data_size = blocks_per_chunk_ * block_size_; - return reinterpret_cast(chunk.data) <= ptr && ptr < reinterpret_cast(chunk.data + data_size); - }; - auto deallocate_block_from_chunk = [this, p](Chunk *chunk) { - // NOTE: This check is not enough to cover all double-free issues. - MG_ASSERT(chunk->blocks_available < blocks_per_chunk_, - "Deallocating more blocks than a chunk can contain, possibly a " - "double-free situation or we have a bug in the allocator."); - // Link the block into the free-list - auto *block = reinterpret_cast(p); - *block = chunk->first_available_block_ix; - chunk->first_available_block_ix = (block - chunk->data) / block_size_; - chunk->blocks_available++; - }; - if (is_in_chunk(*last_dealloc_chunk_)) { - deallocate_block_from_chunk(last_dealloc_chunk_); - return; - } - - // Find the chunk which served this allocation - Chunk chunk{reinterpret_cast(p) - blocks_per_chunk_ * block_size_, 0, 0}; - auto it = std::lower_bound(chunks_.begin(), chunks_.end(), chunk, - [](const auto &a, const auto &b) { return a.data <= b.data; }); - MG_ASSERT(it != chunks_.end(), "Failed deallocation in utils::Pool"); - MG_ASSERT(is_in_chunk(*it), "Failed deallocation in utils::Pool"); - - // Update last_alloc_chunk_ as well because it now has a free block. - // Additionally this corresponds with C++ pattern of allocations and - // deallocations being done in reverse order. - last_alloc_chunk_ = &*it; - last_dealloc_chunk_ = &*it; - deallocate_block_from_chunk(last_dealloc_chunk_); - // TODO: We could release the Chunk to upstream memory -} - -void Pool::Release() { - for (auto &chunk : chunks_) { - size_t data_size = blocks_per_chunk_ * block_size_; - size_t alignment = Ceil2(block_size_); - GetUpstreamResource()->Deallocate(chunk.data, data_size, alignment); - } - chunks_.clear(); - last_alloc_chunk_ = nullptr; - last_dealloc_chunk_ = nullptr; + *reinterpret_cast(p) = std::exchange(free_list_, reinterpret_cast(p)); } } // namespace impl -PoolResource::PoolResource(size_t max_blocks_per_chunk, size_t max_block_size, MemoryResource *memory_pools, - MemoryResource *memory_unpooled) - : pools_(memory_pools), - unpooled_(memory_unpooled), - max_blocks_per_chunk_(std::min(max_blocks_per_chunk, static_cast(impl::Pool::MaxBlocksInChunk()))), - max_block_size_(max_block_size) { - MG_ASSERT(max_blocks_per_chunk_ > 0U, "Invalid number of blocks per chunk"); - MG_ASSERT(max_block_size_ > 0U, "Invalid size of block"); +struct NullMemoryResourceImpl final : public MemoryResource { + NullMemoryResourceImpl() = default; + NullMemoryResourceImpl(NullMemoryResourceImpl const &) = default; + NullMemoryResourceImpl &operator=(NullMemoryResourceImpl const &) = default; + NullMemoryResourceImpl(NullMemoryResourceImpl &&) = default; + NullMemoryResourceImpl &operator=(NullMemoryResourceImpl &&) = default; + ~NullMemoryResourceImpl() override = default; + + private: + void *DoAllocate(size_t /*bytes*/, size_t /*alignment*/) override { + throw BadAlloc{"NullMemoryResource doesn't allocate"}; + } + void DoDeallocate(void * /*p*/, size_t /*bytes*/, size_t /*alignment*/) override { + throw BadAlloc{"NullMemoryResource doesn't deallocate"}; + } + bool DoIsEqual(MemoryResource const &other) const noexcept override { + return dynamic_cast(&other) != nullptr; + } +}; + +MemoryResource *NullMemoryResource() noexcept { + static auto res = NullMemoryResourceImpl{}; + return &res; } +namespace impl { + +/// 1 bit sensitivity test +static_assert(bin_index<1>(9U) == 0); +static_assert(bin_index<1>(10U) == 0); +static_assert(bin_index<1>(11U) == 0); +static_assert(bin_index<1>(12U) == 0); +static_assert(bin_index<1>(13U) == 0); +static_assert(bin_index<1>(14U) == 0); +static_assert(bin_index<1>(15U) == 0); +static_assert(bin_index<1>(16U) == 0); + +static_assert(bin_index<1>(17U) == 1); +static_assert(bin_index<1>(18U) == 1); +static_assert(bin_index<1>(19U) == 1); +static_assert(bin_index<1>(20U) == 1); +static_assert(bin_index<1>(21U) == 1); +static_assert(bin_index<1>(22U) == 1); +static_assert(bin_index<1>(23U) == 1); +static_assert(bin_index<1>(24U) == 1); +static_assert(bin_index<1>(25U) == 1); +static_assert(bin_index<1>(26U) == 1); +static_assert(bin_index<1>(27U) == 1); +static_assert(bin_index<1>(28U) == 1); +static_assert(bin_index<1>(29U) == 1); +static_assert(bin_index<1>(30U) == 1); +static_assert(bin_index<1>(31U) == 1); +static_assert(bin_index<1>(32U) == 1); + +/// 2 bit sensitivity test + +static_assert(bin_index<2>(9U) == 0); +static_assert(bin_index<2>(10U) == 0); +static_assert(bin_index<2>(11U) == 0); +static_assert(bin_index<2>(12U) == 0); + +static_assert(bin_index<2>(13U) == 1); +static_assert(bin_index<2>(14U) == 1); +static_assert(bin_index<2>(15U) == 1); +static_assert(bin_index<2>(16U) == 1); + +static_assert(bin_index<2>(17U) == 2); +static_assert(bin_index<2>(18U) == 2); +static_assert(bin_index<2>(19U) == 2); +static_assert(bin_index<2>(20U) == 2); +static_assert(bin_index<2>(21U) == 2); +static_assert(bin_index<2>(22U) == 2); +static_assert(bin_index<2>(23U) == 2); +static_assert(bin_index<2>(24U) == 2); + +} // namespace impl + void *PoolResource::DoAllocate(size_t bytes, size_t alignment) { // Take the max of `bytes` and `alignment` so that we simplify handling // alignment requests. - size_t block_size = std::max(bytes, alignment); + size_t block_size = std::max({bytes, alignment, 1UL}); // Check that we have received a regular allocation request with non-padded // structs/classes in play. These will always have // `sizeof(T) % alignof(T) == 0`. Special requests which don't have that @@ -279,80 +284,36 @@ void *PoolResource::DoAllocate(size_t bytes, size_t alignment) { // have to write a general-purpose allocator which has to behave as complex // as malloc/free. if (block_size % alignment != 0) throw BadAlloc("Requested bytes must be a multiple of alignment"); - if (block_size > max_block_size_) { - // Allocate a big block. - BigBlock big_block{bytes, alignment, GetUpstreamResourceBlocks()->Allocate(bytes, alignment)}; - // Insert the big block in the sorted position. - auto it = std::lower_bound(unpooled_.begin(), unpooled_.end(), big_block, - [](const auto &a, const auto &b) { return a.data < b.data; }); - try { - unpooled_.insert(it, big_block); - } catch (...) { - GetUpstreamResourceBlocks()->Deallocate(big_block.data, bytes, alignment); - throw; - } - return big_block.data; - } - // Allocate a regular block, first check if last_alloc_pool_ is suitable. - if (last_alloc_pool_ && last_alloc_pool_->GetBlockSize() == block_size) { - return last_alloc_pool_->Allocate(); - } - // Find the pool with greater or equal block_size. - impl::Pool pool(block_size, max_blocks_per_chunk_, GetUpstreamResource()); - auto it = std::lower_bound(pools_.begin(), pools_.end(), pool, - [](const auto &a, const auto &b) { return a.GetBlockSize() < b.GetBlockSize(); }); - if (it != pools_.end() && it->GetBlockSize() == block_size) { - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Allocate(); - } - // We don't have a pool for this block_size, so insert it in the sorted - // position. - it = pools_.emplace(it, std::move(pool)); - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Allocate(); -} + if (block_size <= 64) { + return mini_pools_[(block_size - 1UL) / 8UL].Allocate(); + } + if (block_size <= 128) { + return pools_3bit_.allocate(block_size); + } + if (block_size <= 512) { + return pools_4bit_.allocate(block_size); + } + if (block_size <= 1024) { + return pools_5bit_.allocate(block_size); + } + return unpooled_memory_->Allocate(bytes, alignment); +} void PoolResource::DoDeallocate(void *p, size_t bytes, size_t alignment) { - size_t block_size = std::max(bytes, alignment); - MG_ASSERT(block_size % alignment == 0, - "PoolResource shouldn't serve allocation requests where bytes aren't " - "a multiple of alignment"); - if (block_size > max_block_size_) { - // Deallocate a big block. - BigBlock big_block{bytes, alignment, p}; - auto it = std::lower_bound(unpooled_.begin(), unpooled_.end(), big_block, - [](const auto &a, const auto &b) { return a.data < b.data; }); - MG_ASSERT(it != unpooled_.end(), "Failed deallocation"); - MG_ASSERT(it->data == p && it->bytes == bytes && it->alignment == alignment, "Failed deallocation"); - unpooled_.erase(it); - GetUpstreamResourceBlocks()->Deallocate(p, bytes, alignment); - return; + size_t block_size = std::max({bytes, alignment, 1UL}); + DMG_ASSERT(block_size % alignment == 0); + + if (block_size <= 64) { + mini_pools_[(block_size - 1UL) / 8UL].Deallocate(p); + } else if (block_size <= 128) { + pools_3bit_.deallocate(p, block_size); + } else if (block_size <= 512) { + pools_4bit_.deallocate(p, block_size); + } else if (block_size <= 1024) { + pools_5bit_.deallocate(p, block_size); + } else { + unpooled_memory_->Deallocate(p, bytes, alignment); } - // Deallocate a regular block, first check if last_dealloc_pool_ is suitable. - if (last_dealloc_pool_ && last_dealloc_pool_->GetBlockSize() == block_size) return last_dealloc_pool_->Deallocate(p); - // Find the pool with equal block_size. - impl::Pool pool(block_size, max_blocks_per_chunk_, GetUpstreamResource()); - auto it = std::lower_bound(pools_.begin(), pools_.end(), pool, - [](const auto &a, const auto &b) { return a.GetBlockSize() < b.GetBlockSize(); }); - MG_ASSERT(it != pools_.end(), "Failed deallocation"); - MG_ASSERT(it->GetBlockSize() == block_size, "Failed deallocation"); - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Deallocate(p); } - -void PoolResource::Release() { - for (auto &pool : pools_) pool.Release(); - pools_.clear(); - for (auto &big_block : unpooled_) - GetUpstreamResourceBlocks()->Deallocate(big_block.data, big_block.bytes, big_block.alignment); - unpooled_.clear(); - last_alloc_pool_ = nullptr; - last_dealloc_pool_ = nullptr; -} - -// PoolResource END - +bool PoolResource::DoIsEqual(MemoryResource const &other) const noexcept { return this == &other; } } // namespace memgraph::utils diff --git a/src/utils/memory.hpp b/src/utils/memory.hpp index 225a3b6a1..8ff6c3523 100644 --- a/src/utils/memory.hpp +++ b/src/utils/memory.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -15,7 +15,11 @@ #pragma once +#include #include +#include +#include +#include #include #include #include @@ -248,6 +252,8 @@ bool operator!=(const Allocator &a, const Allocator &b) { return !(a == b); } +auto NullMemoryResource() noexcept -> MemoryResource *; + /// Wraps std::pmr::memory_resource for use with out MemoryResource class StdMemoryResource final : public MemoryResource { public: @@ -380,37 +386,45 @@ class MonotonicBufferResource final : public MemoryResource { namespace impl { +template +using AList = std::forward_list>; + template using AVector = std::vector>; /// Holds a number of Chunks each serving blocks of particular size. When a -/// Chunk runs out of available blocks, a new Chunk is allocated. The naming is -/// taken from `libstdc++` implementation, but the implementation details are -/// more similar to `FixedAllocator` described in "Small Object Allocation" from -/// "Modern C++ Design". +/// Chunk runs out of available blocks, a new Chunk is allocated. class Pool final { /// Holds a pointer into a chunk of memory which consists of equal sized - /// blocks. Each Chunk can handle `std::numeric_limits::max()` - /// number of blocks. Blocks form a "free-list", where each unused block has - /// an embedded index to the next unused block. + /// blocks. Blocks form a "free-list" struct Chunk { - unsigned char *data; - unsigned char first_available_block_ix; - unsigned char blocks_available; + // TODO: make blocks_per_chunk a per chunk thing (ie. allow chunk growth) + std::byte *raw_data; + explicit Chunk(std::byte *rawData) : raw_data(rawData) {} + std::byte *build_freelist(std::size_t block_size, std::size_t blocks_in_chunk) { + auto current = raw_data; + std::byte *prev = nullptr; + auto end = current + (blocks_in_chunk * block_size); + while (current != end) { + std::byte **list_entry = reinterpret_cast(current); + *list_entry = std::exchange(prev, current); + current += block_size; + } + DMG_ASSERT(prev != nullptr); + return prev; + } }; - unsigned char blocks_per_chunk_; - size_t block_size_; - AVector chunks_; - Chunk *last_alloc_chunk_{nullptr}; - Chunk *last_dealloc_chunk_{nullptr}; + std::byte *free_list_{nullptr}; + uint8_t blocks_per_chunk_{}; + std::size_t block_size_{}; + + AList chunks_; // TODO: do ourself so we can do fast Release (detect monotonic, do nothing) public: - static constexpr auto MaxBlocksInChunk() { - return std::numeric_limits::max(); - } + static constexpr auto MaxBlocksInChunk = std::numeric_limits::max(); - Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *memory); + Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *chunk_memory); Pool(const Pool &) = delete; Pool &operator=(const Pool &) = delete; @@ -430,8 +444,147 @@ class Pool final { void *Allocate(); void Deallocate(void *p); +}; - void Release(); +// C++ overloads for clz +constexpr auto clz(unsigned int x) { return __builtin_clz(x); } +constexpr auto clz(unsigned long x) { return __builtin_clzl(x); } +constexpr auto clz(unsigned long long x) { return __builtin_clzll(x); } + +template +constexpr auto bits_sizeof = sizeof(T) * CHAR_BIT; + +/// 0-based bit index of the most significant bit assumed that `n` != 0 +template +constexpr auto msb_index(T n) { + return bits_sizeof - clz(n) - T(1); +} + +/* This function will in O(1) time provide a bin index based on: + * B - the number of most significant bits to be sensitive to + * LB - the value that should be considered below the consideration for bin index of 0 (LB is exclusive) + * + * lets say we were: + * - sensitive to two bits (B == 2) + * - lowest bin is for 8 (LB == 8) + * + * our bin indexes would look like: + * 0 - 0000'1100 12 + * 1 - 0001'0000 16 + * 2 - 0001'1000 24 + * 3 - 0010'0000 32 + * 4 - 0011'0000 48 + * 5 - 0100'0000 64 + * 6 - 0110'0000 96 + * 7 - 1000'0000 128 + * 8 - 1100'0000 192 + * ... + * + * Example: + * Given n == 70, we want to return the bin index to the first value which is + * larger than n. + * bin_index<2,8>(70) => 6, as 64 (index 5) < 70 and 70 <= 96 (index 6) + */ +template +constexpr std::size_t bin_index(std::size_t n) { + static_assert(B >= 1U, "Needs to be sensitive to at least one bit"); + static_assert(LB != 0U, "Lower bound need to be non-zero"); + DMG_ASSERT(n > LB); + + // We will alway be sensitive to at least the MSB + // exponent tells us how many bits we need to use to select within a level + constexpr auto kExponent = B - 1U; + // 2^exponent gives the size of each level + constexpr auto kSize = 1U << kExponent; + // offset help adjust results down to be inline with bin_index(LB) == 0 + constexpr auto kOffset = msb_index(LB); + + auto const msb_idx = msb_index(n); + DMG_ASSERT(msb_idx != 0); + + auto const mask = (1u << msb_idx) - 1u; + auto const under = n & mask; + auto const selector = under >> (msb_idx - kExponent); + + auto const rest = under & (mask >> kExponent); + auto const no_overflow = rest == 0U; + + auto const msb_level = kSize * (msb_idx - kOffset); + return msb_level + selector - no_overflow; +} + +// This is the inverse opperation for bin_index +// bin_size(bin_index(X)-1) < X <= bin_size(bin_index(X)) +template +std::size_t bin_size(std::size_t idx) { + constexpr auto kExponent = B - 1U; + constexpr auto kSize = 1U << kExponent; + constexpr auto kOffset = msb_index(LB); + + // no need to optimise `/` or `%` compiler can see `kSize` is a power of 2 + auto const level = (idx + 1) / kSize; + auto const sub_level = (idx + 1) % kSize; + return (1U << (level + kOffset)) | (sub_level << (level + kOffset - kExponent)); +} + +template +struct MultiPool { + static_assert(LB < UB, "lower bound must be less than upper bound"); + static_assert(IsPow2(LB) && IsPow2(UB), "Design untested for non powers of 2"); + static_assert((LB << Bits) % sizeof(void *) == 0, "Smallest pool must have space and alignment for freelist"); + + // upper bound is inclusive + static bool is_size_handled(std::size_t size) { return LB < size && size <= UB; } + static bool is_above_upper_bound(std::size_t size) { return UB < size; } + + static constexpr auto n_bins = bin_index(UB) + 1U; + + MultiPool(uint8_t blocks_per_chunk, MemoryResource *memory, MemoryResource *internal_memory) + : blocks_per_chunk_{blocks_per_chunk}, memory_{memory}, internal_memory_{internal_memory} {} + + ~MultiPool() { + if (pools_) { + auto pool_alloc = Allocator(internal_memory_); + for (auto i = 0U; i != n_bins; ++i) { + pool_alloc.destroy(&pools_[i]); + } + pool_alloc.deallocate(pools_, n_bins); + } + } + + void *allocate(std::size_t bytes) { + auto idx = bin_index(bytes); + if (!pools_) [[unlikely]] { + initialise_pools(); + } + return pools_[idx].Allocate(); + } + + void deallocate(void *ptr, std::size_t bytes) { + auto idx = bin_index(bytes); + pools_[idx].Deallocate(ptr); + } + + private: + void initialise_pools() { + auto pool_alloc = Allocator(internal_memory_); + auto pools = pool_alloc.allocate(n_bins); + try { + for (auto i = 0U; i != n_bins; ++i) { + auto block_size = bin_size(i); + pool_alloc.construct(&pools[i], block_size, blocks_per_chunk_, memory_); + } + pools_ = pools; + } catch (...) { + pool_alloc.deallocate(pools, n_bins); + throw; + } + } + + Pool *pools_{}; + uint8_t blocks_per_chunk_{}; + MemoryResource *memory_{}; + MemoryResource *internal_memory_{}; }; } // namespace impl @@ -442,8 +595,6 @@ class Pool final { /// /// This class has the following properties with regards to memory management. /// -/// * All allocated memory will be freed upon destruction, even if Deallocate -/// has not been called for some of the allocated blocks. /// * It consists of a collection of impl::Pool instances, each serving /// requests for different block sizes. Each impl::Pool manages a collection /// of impl::Pool::Chunk instances which are divided into blocks of uniform @@ -452,91 +603,46 @@ class Pool final { /// arbitrary alignment requests. Each requested block size must be a /// multiple of alignment or smaller than the alignment value. /// * An allocation request within the limits of the maximum block size will -/// find a Pool serving the requested size. If there's no Pool serving such -/// a request, a new one is instantiated. +/// find a Pool serving the requested size. Some requests will share a larger +/// pool size. /// * When a Pool exhausts its Chunk, a new one is allocated with the size for /// the maximum number of blocks. /// * Allocation requests which exceed the maximum block size will be /// forwarded to upstream MemoryResource. -/// * Maximum block size and maximum number of blocks per chunk can be tuned -/// by passing the arguments to the constructor. +/// * Maximum number of blocks per chunk can be tuned by passing the +/// arguments to the constructor. + class PoolResource final : public MemoryResource { public: - /// Construct with given max_blocks_per_chunk, max_block_size and upstream - /// memory. - /// - /// The implementation will use std::min(max_blocks_per_chunk, - /// impl::Pool::MaxBlocksInChunk()) as the real maximum number of blocks per - /// chunk. Allocation requests exceeding max_block_size are simply forwarded - /// to upstream memory. - PoolResource(size_t max_blocks_per_chunk, size_t max_block_size, MemoryResource *memory_pools = NewDeleteResource(), - MemoryResource *memory_unpooled = NewDeleteResource()); - - PoolResource(const PoolResource &) = delete; - PoolResource &operator=(const PoolResource &) = delete; - - PoolResource(PoolResource &&) = default; - PoolResource &operator=(PoolResource &&) = default; - - ~PoolResource() override { Release(); } - - MemoryResource *GetUpstreamResource() const { return pools_.get_allocator().GetMemoryResource(); } - MemoryResource *GetUpstreamResourceBlocks() const { return unpooled_.get_allocator().GetMemoryResource(); } - - /// Release all allocated memory. - void Release(); + PoolResource(uint8_t blocks_per_chunk, MemoryResource *memory = NewDeleteResource(), + MemoryResource *internal_memory = NewDeleteResource()) + : mini_pools_{ + impl::Pool{8, blocks_per_chunk, memory}, + impl::Pool{16, blocks_per_chunk, memory}, + impl::Pool{24, blocks_per_chunk, memory}, + impl::Pool{32, blocks_per_chunk, memory}, + impl::Pool{40, blocks_per_chunk, memory}, + impl::Pool{48, blocks_per_chunk, memory}, + impl::Pool{56, blocks_per_chunk, memory}, + impl::Pool{64, blocks_per_chunk, memory}, + }, + pools_3bit_(blocks_per_chunk, memory, internal_memory), + pools_4bit_(blocks_per_chunk, memory, internal_memory), + pools_5bit_(blocks_per_chunk, memory, internal_memory), + unpooled_memory_{internal_memory} {} + ~PoolResource() override = default; private: - // Big block larger than max_block_size_, doesn't go into a pool. - struct BigBlock { - size_t bytes; - size_t alignment; - void *data; - }; - - // TODO: Potential memory optimization is replacing `std::vector` with our - // custom vector implementation which doesn't store a `MemoryResource *`. - // Currently we have vectors for `pools_` and `unpooled_`, as well as each - // `impl::Pool` stores a `chunks_` vector. - - // Pools are sorted by bound_size_, ascending. - impl::AVector pools_; - impl::Pool *last_alloc_pool_{nullptr}; - impl::Pool *last_dealloc_pool_{nullptr}; - // Unpooled BigBlocks are sorted by data pointer. - impl::AVector unpooled_; - size_t max_blocks_per_chunk_; - size_t max_block_size_; - void *DoAllocate(size_t bytes, size_t alignment) override; - void DoDeallocate(void *p, size_t bytes, size_t alignment) override; - - bool DoIsEqual(const MemoryResource &other) const noexcept override { return this == &other; } -}; - -/// Like PoolResource but uses SpinLock for thread safe usage. -class SynchronizedPoolResource final : public MemoryResource { - public: - SynchronizedPoolResource(size_t max_blocks_per_chunk, size_t max_block_size, - MemoryResource *memory = NewDeleteResource()) - : pool_memory_(max_blocks_per_chunk, max_block_size, memory) {} + bool DoIsEqual(MemoryResource const &other) const noexcept override; private: - PoolResource pool_memory_; - SpinLock lock_; - - void *DoAllocate(size_t bytes, size_t alignment) override { - std::lock_guard guard(lock_); - return pool_memory_.Allocate(bytes, alignment); - } - - void DoDeallocate(void *p, size_t bytes, size_t alignment) override { - std::lock_guard guard(lock_); - pool_memory_.Deallocate(p, bytes, alignment); - } - - bool DoIsEqual(const MemoryResource &other) const noexcept override { return this == &other; } + std::array mini_pools_; + impl::MultiPool<3, 64, 128> pools_3bit_; + impl::MultiPool<4, 128, 512> pools_4bit_; + impl::MultiPool<5, 512, 1024> pools_5bit_; + MemoryResource *unpooled_memory_; }; class MemoryTrackingResource final : public utils::MemoryResource { diff --git a/src/utils/tag.hpp b/src/utils/tag.hpp new file mode 100644 index 000000000..dfd8c8f81 --- /dev/null +++ b/src/utils/tag.hpp @@ -0,0 +1,32 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +namespace memgraph::utils { + +template +struct tag_type { + using type = T; +}; + +template +struct tag_value { + static constexpr auto value = V; +}; + +template +auto tag_t = tag_type{}; + +template +auto tag_v = tag_value{}; + +} // namespace memgraph::utils diff --git a/tests/benchmark/query/execution.cpp b/tests/benchmark/query/execution.cpp index d49b14fc3..1d65cdb93 100644 --- a/tests/benchmark/query/execution.cpp +++ b/tests/benchmark/query/execution.cpp @@ -55,12 +55,12 @@ class NewDeleteResource final { }; class PoolResource final { - memgraph::utils::PoolResource memory_{128, 4 * 1024}; + memgraph::utils::PoolResource memory_{128}; public: memgraph::utils::MemoryResource *get() { return &memory_; } - void Reset() { memory_.Release(); } + void Reset() {} }; static void AddVertices(memgraph::storage::Storage *db, int vertex_count) { diff --git a/tests/benchmark/skip_list_vs_stl.cpp b/tests/benchmark/skip_list_vs_stl.cpp index 1a17e56e1..9a856822f 100644 --- a/tests/benchmark/skip_list_vs_stl.cpp +++ b/tests/benchmark/skip_list_vs_stl.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -101,8 +101,7 @@ class StdSetWithPoolAllocatorInsertFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::set, memgraph::utils::Allocator> container{&memory_}; memgraph::utils::SpinLock lock; }; @@ -208,8 +207,7 @@ class StdSetWithPoolAllocatorFindFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::set, memgraph::utils::Allocator> container{&memory_}; memgraph::utils::SpinLock lock; }; @@ -325,8 +323,7 @@ class StdMapWithPoolAllocatorInsertFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::map, memgraph::utils::Allocator>> container{ &memory_}; memgraph::utils::SpinLock lock; @@ -433,8 +430,7 @@ class StdMapWithPoolAllocatorFindFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::map, memgraph::utils::Allocator>> container{ &memory_}; memgraph::utils::SpinLock lock; diff --git a/tests/e2e/memory/workloads.yaml b/tests/e2e/memory/workloads.yaml index bf29e484c..c043e03d8 100644 --- a/tests/e2e/memory/workloads.yaml +++ b/tests/e2e/memory/workloads.yaml @@ -52,26 +52,26 @@ in_memory_query_limit_cluster: &in_memory_query_limit_cluster setup_queries: [] validation_queries: [] -args_450_MiB_limit: &args_450_MiB_limit +args_350_MiB_limit: &args_350_MiB_limit - "--bolt-port" - *bolt_port - - "--memory-limit=450" + - "--memory-limit=350" - "--storage-gc-cycle-sec=180" - "--log-level=INFO" -in_memory_450_MiB_limit_cluster: &in_memory_450_MiB_limit_cluster +in_memory_350_MiB_limit_cluster: &in_memory_350_MiB_limit_cluster cluster: main: - args: *args_450_MiB_limit + args: *args_350_MiB_limit log_file: "memory-e2e.log" setup_queries: [] validation_queries: [] -disk_450_MiB_limit_cluster: &disk_450_MiB_limit_cluster +disk_350_MiB_limit_cluster: &disk_350_MiB_limit_cluster cluster: main: - args: *args_450_MiB_limit + args: *args_350_MiB_limit log_file: "memory-e2e.log" setup_queries: [] validation_queries: [] @@ -192,22 +192,22 @@ workloads: - name: "Memory control for accumulation" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_accumulation" args: ["--bolt-port", *bolt_port] - <<: *in_memory_450_MiB_limit_cluster + <<: *in_memory_350_MiB_limit_cluster - name: "Memory control for accumulation on disk storage" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_accumulation" args: ["--bolt-port", *bolt_port] - <<: *disk_450_MiB_limit_cluster + <<: *disk_350_MiB_limit_cluster - name: "Memory control for edge create" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_edge_create" args: ["--bolt-port", *bolt_port] - <<: *in_memory_450_MiB_limit_cluster + <<: *in_memory_350_MiB_limit_cluster - name: "Memory control for edge create on disk storage" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_edge_create" args: ["--bolt-port", *bolt_port] - <<: *disk_450_MiB_limit_cluster + <<: *disk_350_MiB_limit_cluster - name: "Memory control for memory limit global thread alloc" binary: "tests/e2e/memory/memgraph__e2e__memory_limit_global_thread_alloc_proc" diff --git a/tests/mgbench/runners.py b/tests/mgbench/runners.py index e1f52b696..155ceac06 100644 --- a/tests/mgbench/runners.py +++ b/tests/mgbench/runners.py @@ -416,6 +416,7 @@ class Memgraph(BaseRunner): def __init__(self, benchmark_context: BenchmarkContext): super().__init__(benchmark_context=benchmark_context) self._memgraph_binary = benchmark_context.vendor_binary + self._bolt_num_workers = benchmark_context.num_workers_for_benchmark self._performance_tracking = benchmark_context.performance_tracking self._directory = tempfile.TemporaryDirectory(dir=benchmark_context.temporary_directory) self._vendor_args = benchmark_context.vendor_args @@ -440,6 +441,7 @@ class Memgraph(BaseRunner): kwargs["bolt_port"] = self._bolt_port kwargs["data_directory"] = data_directory kwargs["storage_properties_on_edges"] = True + kwargs["bolt_num_workers"] = self._bolt_num_workers for key, value in self._vendor_args.items(): kwargs[key] = value return _convert_args_to_flags(self._memgraph_binary, **kwargs) diff --git a/tests/unit/property_value_v2.cpp b/tests/unit/property_value_v2.cpp index aba322ce7..28937598e 100644 --- a/tests/unit/property_value_v2.cpp +++ b/tests/unit/property_value_v2.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -570,7 +570,6 @@ TEST(PropertyValue, MoveConstructor) { for (auto &item : data) { memgraph::storage::PropertyValue copy(item); memgraph::storage::PropertyValue pv(std::move(item)); - ASSERT_EQ(item.type(), memgraph::storage::PropertyValue::Type::Null); ASSERT_EQ(pv.type(), copy.type()); switch (copy.type()) { case memgraph::storage::PropertyValue::Type::Null: @@ -668,7 +667,6 @@ TEST(PropertyValue, MoveAssignment) { memgraph::storage::PropertyValue copy(item); memgraph::storage::PropertyValue pv(123); pv = std::move(item); - ASSERT_EQ(item.type(), memgraph::storage::PropertyValue::Type::Null); ASSERT_EQ(pv.type(), copy.type()); switch (copy.type()) { case memgraph::storage::PropertyValue::Type::Null: diff --git a/tests/unit/utils_memory.cpp b/tests/unit/utils_memory.cpp index 5173a5f7b..e46c6c1f9 100644 --- a/tests/unit/utils_memory.cpp +++ b/tests/unit/utils_memory.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -195,134 +195,6 @@ TEST(MonotonicBufferResource, AllocationWithInitialBufferOnStack) { } } -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, SingleSmallBlockAllocations) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - // Fill the first chunk. - CheckAllocation(&mem, 64U, 1U); - // May allocate more than once due to bookkeeping. - EXPECT_GE(test_mem.new_count_, 1U); - // Reset tracking and continue filling the first chunk. - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U, 64U); - CheckAllocation(&mem, 64U); - EXPECT_EQ(test_mem.new_count_, 0U); - // Reset tracking and fill the second chunk - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U, 32U); - auto *ptr1 = CheckAllocation(&mem, 32U, 64U); // this will become 64b block - auto *ptr2 = CheckAllocation(&mem, 64U, 32U); - // We expect one allocation for chunk and at most one for bookkeeping. - EXPECT_TRUE(test_mem.new_count_ >= 1U && test_mem.new_count_ <= 2U); - test_mem.delete_count_ = 0U; - mem.Deallocate(ptr1, 32U, 64U); - mem.Deallocate(ptr2, 64U, 32U); - EXPECT_EQ(test_mem.delete_count_, 0U); - mem.Release(); - EXPECT_GE(test_mem.delete_count_, 2U); - CheckAllocation(&mem, 64U, 1U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, MultipleSmallBlockAllocations) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 1U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - CheckAllocation(&mem, 64U); - CheckAllocation(&mem, 18U, 2U); - CheckAllocation(&mem, 24U, 8U); - // May allocate more than once per chunk due to bookkeeping. - EXPECT_GE(test_mem.new_count_, 3U); - // Reset tracking and fill the second chunk - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U); - CheckAllocation(&mem, 18U, 2U); - CheckAllocation(&mem, 24U, 8U); - // We expect one allocation for chunk and at most one for bookkeeping. - EXPECT_TRUE(test_mem.new_count_ >= 3U && test_mem.new_count_ <= 6U); - mem.Release(); - EXPECT_GE(test_mem.delete_count_, 6U); - CheckAllocation(&mem, 64U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, BigBlockAllocations) { - TestMemory test_mem; - TestMemory test_mem_unpooled; - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem, &test_mem_unpooled); - CheckAllocation(&mem, max_block_size + 1, 1U); - // May allocate more than once per block due to bookkeeping. - EXPECT_GE(test_mem_unpooled.new_count_, 1U); - CheckAllocation(&mem, max_block_size + 1, 1U); - EXPECT_GE(test_mem_unpooled.new_count_, 2U); - auto *ptr = CheckAllocation(&mem, max_block_size * 2, 1U); - EXPECT_GE(test_mem_unpooled.new_count_, 3U); - mem.Deallocate(ptr, max_block_size * 2, 1U); - EXPECT_GE(test_mem_unpooled.delete_count_, 1U); - mem.Release(); - EXPECT_GE(test_mem_unpooled.delete_count_, 3U); - CheckAllocation(&mem, max_block_size + 1, 1U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, BlockSizeIsNotMultipleOfAlignment) { - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size); - EXPECT_THROW(mem.Allocate(64U, 24U), std::bad_alloc); - EXPECT_THROW(mem.Allocate(63U), std::bad_alloc); - EXPECT_THROW(mem.Allocate(max_block_size + 1, max_block_size), std::bad_alloc); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, AllocationWithOverflow) { - { - const size_t max_blocks_per_chunk = 2U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, std::numeric_limits::max()); - EXPECT_THROW(mem.Allocate(std::numeric_limits::max(), 1U), std::bad_alloc); - // Throws because initial chunk block is aligned to - // memgraph::utils::Ceil2(block_size), which wraps in this case. - EXPECT_THROW(mem.Allocate((std::numeric_limits::max() - 1U) / max_blocks_per_chunk, 1U), std::bad_alloc); - } - { - const size_t max_blocks_per_chunk = memgraph::utils::impl::Pool::MaxBlocksInChunk(); - memgraph::utils::PoolResource mem(max_blocks_per_chunk, std::numeric_limits::max()); - EXPECT_THROW(mem.Allocate(std::numeric_limits::max(), 1U), std::bad_alloc); - // Throws because initial chunk block is aligned to - // memgraph::utils::Ceil2(block_size), which wraps in this case. - EXPECT_THROW(mem.Allocate((std::numeric_limits::max() - 1U) / max_blocks_per_chunk, 1U), std::bad_alloc); - } -} - -TEST(PoolResource, BlockDeallocation) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 2U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - auto *ptr = CheckAllocation(&mem, max_block_size); - test_mem.new_count_ = 0U; - // Do another allocation before deallocating `ptr`, so that we are sure that - // the chunk of 2 blocks is still alive and therefore `ptr` may be reused when - // it's deallocated. If we deallocate now, the implementation may choose to - // free the whole chunk, and we do not want that for the purposes of this - // test. - CheckAllocation(&mem, max_block_size); - EXPECT_EQ(test_mem.new_count_, 0U); - EXPECT_EQ(test_mem.delete_count_, 0U); - mem.Deallocate(ptr, max_block_size); - EXPECT_EQ(test_mem.delete_count_, 0U); - // CheckAllocation(&mem, max_block_size) will fail as PoolResource should - // reuse free blocks. - EXPECT_EQ(ptr, mem.Allocate(max_block_size)); - EXPECT_EQ(test_mem.new_count_, 0U); -} - class AllocationTrackingMemory final : public memgraph::utils::MemoryResource { public: std::vector allocated_sizes_; From 0ed2d18754157ff27f915ab8977c5d78df37405b Mon Sep 17 00:00:00 2001 From: Aidar Samerkhanov Date: Fri, 15 Mar 2024 11:39:37 +0400 Subject: [PATCH 07/16] Add RollUpApply operator support to edge type index rewrite. (#1816) --- src/query/plan/rewrite/edge_type_index_lookup.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/query/plan/rewrite/edge_type_index_lookup.hpp b/src/query/plan/rewrite/edge_type_index_lookup.hpp index ed8666513..893fef970 100644 --- a/src/query/plan/rewrite/edge_type_index_lookup.hpp +++ b/src/query/plan/rewrite/edge_type_index_lookup.hpp @@ -465,6 +465,18 @@ class EdgeTypeIndexRewriter final : public HierarchicalLogicalOperatorVisitor { return true; } + bool PreVisit(RollUpApply &op) override { + prev_ops_.push_back(&op); + op.input()->Accept(*this); + RewriteBranch(&op.list_collection_branch_); + return false; + } + + bool PostVisit(RollUpApply &) override { + prev_ops_.pop_back(); + return true; + } + std::shared_ptr new_root_; private: From 082f9a7d9b5d15f6525fd9514f863d56746a73c1 Mon Sep 17 00:00:00 2001 From: Josipmrden Date: Fri, 15 Mar 2024 14:45:21 +0100 Subject: [PATCH 08/16] Add behaviour of no updates if vertex is updated with same value (#1791) --- src/flags/general.cpp | 4 ++ src/flags/general.hpp | 2 + src/memgraph.cpp | 3 +- src/query/frontend/stripped.cpp | 2 +- src/query/plan/operator.cpp | 2 +- src/storage/v2/config.hpp | 1 + src/storage/v2/edge_accessor.cpp | 10 +++- src/storage/v2/vertex_accessor.cpp | 69 ++++++++++++++--------- src/utils/atomic_memory_block.hpp | 10 ++-- src/utils/on_scope_exit.hpp | 5 +- tests/e2e/CMakeLists.txt | 1 + tests/e2e/concurrency/CMakeLists.txt | 6 ++ tests/e2e/concurrency/common.py | 60 ++++++++++++++++++++ tests/e2e/concurrency/concurrency.py | 57 +++++++++++++++++++ tests/e2e/concurrency/workloads.yaml | 14 +++++ tests/e2e/configuration/default_config.py | 5 ++ 16 files changed, 209 insertions(+), 42 deletions(-) create mode 100644 tests/e2e/concurrency/CMakeLists.txt create mode 100644 tests/e2e/concurrency/common.py create mode 100644 tests/e2e/concurrency/concurrency.py create mode 100644 tests/e2e/concurrency/workloads.yaml diff --git a/src/flags/general.cpp b/src/flags/general.cpp index cd2c95c60..37fa17b36 100644 --- a/src/flags/general.cpp +++ b/src/flags/general.cpp @@ -131,6 +131,10 @@ DEFINE_uint64(storage_recovery_thread_count, DEFINE_bool(storage_enable_schema_metadata, false, "Controls whether metadata should be collected about the resident labels and edge types."); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_bool(storage_delta_on_identical_property_update, true, + "Controls whether updating a property with the same value should create a delta object."); + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_bool(telemetry_enabled, false, "Set to true to enable telemetry. We collect information about the " diff --git a/src/flags/general.hpp b/src/flags/general.hpp index a1e8729ab..52f51471d 100644 --- a/src/flags/general.hpp +++ b/src/flags/general.hpp @@ -84,6 +84,8 @@ DECLARE_bool(storage_parallel_schema_recovery); DECLARE_uint64(storage_recovery_thread_count); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_bool(storage_enable_schema_metadata); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_bool(storage_delta_on_identical_property_update); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_bool(telemetry_enabled); diff --git a/src/memgraph.cpp b/src/memgraph.cpp index d896bcc4c..9bf50131d 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -332,7 +332,8 @@ int main(int argc, char **argv) { .durability_directory = FLAGS_data_directory + "/rocksdb_durability", .wal_directory = FLAGS_data_directory + "/rocksdb_wal"}, .salient.items = {.properties_on_edges = FLAGS_storage_properties_on_edges, - .enable_schema_metadata = FLAGS_storage_enable_schema_metadata}, + .enable_schema_metadata = FLAGS_storage_enable_schema_metadata, + .delta_on_identical_property_update = FLAGS_storage_delta_on_identical_property_update}, .salient.storage_mode = memgraph::flags::ParseStorageMode()}; spdlog::info("config recover on startup {}, flags {} {}", db_config.durability.recover_on_startup, FLAGS_storage_recover_on_startup, FLAGS_data_recovery_on_startup); diff --git a/src/query/frontend/stripped.cpp b/src/query/frontend/stripped.cpp index 9740cd463..5ea26b041 100644 --- a/src/query/frontend/stripped.cpp +++ b/src/query/frontend/stripped.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 8e1b9f529..2b970cf49 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -329,7 +329,7 @@ CreateExpand::CreateExpand(NodeCreationInfo node_info, EdgeCreationInfo edge_inf ACCEPT_WITH_INPUT(CreateExpand) UniqueCursorPtr CreateExpand::MakeCursor(utils::MemoryResource *mem) const { - memgraph::metrics::IncrementCounter(memgraph::metrics::CreateNodeOperator); + memgraph::metrics::IncrementCounter(memgraph::metrics::CreateExpandOperator); return MakeUniqueCursorPtr(mem, *this, mem); } diff --git a/src/storage/v2/config.hpp b/src/storage/v2/config.hpp index b2a55a40a..419f29b85 100644 --- a/src/storage/v2/config.hpp +++ b/src/storage/v2/config.hpp @@ -37,6 +37,7 @@ struct SalientConfig { struct Items { bool properties_on_edges{true}; bool enable_schema_metadata{false}; + bool delta_on_identical_property_update{true}; friend bool operator==(const Items &lrh, const Items &rhs) = default; } items; diff --git a/src/storage/v2/edge_accessor.cpp b/src/storage/v2/edge_accessor.cpp index 62a9f4bcd..ba354371e 100644 --- a/src/storage/v2/edge_accessor.cpp +++ b/src/storage/v2/edge_accessor.cpp @@ -130,9 +130,13 @@ Result EdgeAccessor::SetProperty(PropertyId property, co if (edge_.ptr->deleted) return Error::DELETED_OBJECT; using ReturnType = decltype(edge_.ptr->properties.GetProperty(property)); std::optional current_value; + const bool skip_duplicate_write = !storage_->config_.salient.items.delta_on_identical_property_update; utils::AtomicMemoryBlock atomic_memory_block{ - [¤t_value, &property, &value, transaction = transaction_, edge = edge_]() { + [¤t_value, &property, &value, transaction = transaction_, edge = edge_, skip_duplicate_write]() { current_value.emplace(edge.ptr->properties.GetProperty(property)); + if (skip_duplicate_write && current_value == value) { + return; + } // We could skip setting the value if the previous one is the same to the new // one. This would save some memory as a delta would not be created as well as // avoid copying the value. The reason we are not doing that is because the @@ -184,12 +188,14 @@ Result>> EdgeAc if (edge_.ptr->deleted) return Error::DELETED_OBJECT; + const bool skip_duplicate_write = !storage_->config_.salient.items.delta_on_identical_property_update; using ReturnType = decltype(edge_.ptr->properties.UpdateProperties(properties)); std::optional id_old_new_change; utils::AtomicMemoryBlock atomic_memory_block{ - [transaction_ = transaction_, edge_ = edge_, &properties, &id_old_new_change]() { + [transaction_ = transaction_, edge_ = edge_, &properties, &id_old_new_change, skip_duplicate_write]() { id_old_new_change.emplace(edge_.ptr->properties.UpdateProperties(properties)); for (auto &[property, old_value, new_value] : *id_old_new_change) { + if (skip_duplicate_write && old_value == new_value) continue; CreateAndLinkDelta(transaction_, edge_.ptr, Delta::SetPropertyTag(), property, std::move(old_value)); } }}; diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index 7d78070a8..83dcc003b 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -261,20 +261,31 @@ Result VertexAccessor::SetProperty(PropertyId property, const Pro if (vertex_->deleted) return Error::DELETED_OBJECT; - auto current_value = vertex_->properties.GetProperty(property); - // We could skip setting the value if the previous one is the same to the new - // one. This would save some memory as a delta would not be created as well as - // avoid copying the value. The reason we are not doing that is because the - // current code always follows the logical pattern of "create a delta" and - // "modify in-place". Additionally, the created delta will make other - // transactions get a SERIALIZATION_ERROR. - + PropertyValue current_value; + const bool skip_duplicate_write = !storage_->config_.salient.items.delta_on_identical_property_update; utils::AtomicMemoryBlock atomic_memory_block{ - [transaction = transaction_, vertex = vertex_, &value, &property, ¤t_value]() { + [transaction = transaction_, vertex = vertex_, &value, &property, ¤t_value, skip_duplicate_write]() { + current_value = vertex->properties.GetProperty(property); + // We could skip setting the value if the previous one is the same to the new + // one. This would save some memory as a delta would not be created as well as + // avoid copying the value. The reason we are not doing that is because the + // current code always follows the logical pattern of "create a delta" and + // "modify in-place". Additionally, the created delta will make other + // transactions get a SERIALIZATION_ERROR. + if (skip_duplicate_write && current_value == value) { + return true; + } + CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, current_value); vertex->properties.SetProperty(property, value); + + return false; }}; - std::invoke(atomic_memory_block); + const bool early_exit = std::invoke(atomic_memory_block); + + if (early_exit) { + return std::move(current_value); + } if (transaction_->constraint_verification_info) { if (!value.IsNull()) { @@ -339,27 +350,29 @@ Result>> Vertex if (vertex_->deleted) return Error::DELETED_OBJECT; + const bool skip_duplicate_update = storage_->config_.salient.items.delta_on_identical_property_update; using ReturnType = decltype(vertex_->properties.UpdateProperties(properties)); std::optional id_old_new_change; - utils::AtomicMemoryBlock atomic_memory_block{ - [storage = storage_, transaction = transaction_, vertex = vertex_, &properties, &id_old_new_change]() { - id_old_new_change.emplace(vertex->properties.UpdateProperties(properties)); - if (!id_old_new_change.has_value()) { - return; + utils::AtomicMemoryBlock atomic_memory_block{[storage = storage_, transaction = transaction_, vertex = vertex_, + &properties, &id_old_new_change, skip_duplicate_update]() { + id_old_new_change.emplace(vertex->properties.UpdateProperties(properties)); + if (!id_old_new_change.has_value()) { + return; + } + for (auto &[id, old_value, new_value] : *id_old_new_change) { + storage->indices_.UpdateOnSetProperty(id, new_value, vertex, *transaction); + if (skip_duplicate_update && old_value == new_value) continue; + CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), id, std::move(old_value)); + transaction->manyDeltasCache.Invalidate(vertex, id); + if (transaction->constraint_verification_info) { + if (!new_value.IsNull()) { + transaction->constraint_verification_info->AddedProperty(vertex); + } else { + transaction->constraint_verification_info->RemovedProperty(vertex); } - for (auto &[id, old_value, new_value] : *id_old_new_change) { - storage->indices_.UpdateOnSetProperty(id, new_value, vertex, *transaction); - CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), id, std::move(old_value)); - transaction->manyDeltasCache.Invalidate(vertex, id); - if (transaction->constraint_verification_info) { - if (!new_value.IsNull()) { - transaction->constraint_verification_info->AddedProperty(vertex); - } else { - transaction->constraint_verification_info->RemovedProperty(vertex); - } - } - } - }}; + } + } + }}; std::invoke(atomic_memory_block); return id_old_new_change.has_value() ? std::move(id_old_new_change.value()) : ReturnType{}; diff --git a/src/utils/atomic_memory_block.hpp b/src/utils/atomic_memory_block.hpp index 31a3cf3a9..5ae2aab35 100644 --- a/src/utils/atomic_memory_block.hpp +++ b/src/utils/atomic_memory_block.hpp @@ -29,12 +29,10 @@ class [[nodiscard]] AtomicMemoryBlock { AtomicMemoryBlock &operator=(AtomicMemoryBlock &&) = delete; ~AtomicMemoryBlock() = default; - void operator()() { - { - utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_blocker; - function_(); - } - total_memory_tracker.DoCheck(); + auto operator()() -> std::invoke_result_t { + auto check_on_exit = OnScopeExit{[&] { total_memory_tracker.DoCheck(); }}; + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_blocker; + return function_(); } private: diff --git a/src/utils/on_scope_exit.hpp b/src/utils/on_scope_exit.hpp index a5398b017..114f1c370 100644 --- a/src/utils/on_scope_exit.hpp +++ b/src/utils/on_scope_exit.hpp @@ -35,7 +35,7 @@ namespace memgraph::utils { * // long block of code, might throw an exception * } */ -template +template class [[nodiscard]] OnScopeExit { public: template @@ -46,7 +46,7 @@ class [[nodiscard]] OnScopeExit { OnScopeExit &operator=(OnScopeExit const &) = delete; OnScopeExit &operator=(OnScopeExit &&) = delete; ~OnScopeExit() { - if (doCall_) function_(); + if (doCall_) std::invoke(std::move(function_)); } void Disable() { doCall_ = false; } @@ -57,5 +57,4 @@ class [[nodiscard]] OnScopeExit { }; template OnScopeExit(Callable &&) -> OnScopeExit; - } // namespace memgraph::utils diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt index 1876074ee..60743676d 100644 --- a/tests/e2e/CMakeLists.txt +++ b/tests/e2e/CMakeLists.txt @@ -77,6 +77,7 @@ add_subdirectory(garbage_collection) add_subdirectory(query_planning) add_subdirectory(awesome_functions) add_subdirectory(high_availability) +add_subdirectory(concurrency) add_subdirectory(replication_experimental) diff --git a/tests/e2e/concurrency/CMakeLists.txt b/tests/e2e/concurrency/CMakeLists.txt new file mode 100644 index 000000000..f981a2537 --- /dev/null +++ b/tests/e2e/concurrency/CMakeLists.txt @@ -0,0 +1,6 @@ +function(copy_concurrency_e2e_python_files FILE_NAME) + copy_e2e_python_files(concurrency ${FILE_NAME}) +endfunction() + +copy_concurrency_e2e_python_files(common.py) +copy_concurrency_e2e_python_files(concurrency.py) diff --git a/tests/e2e/concurrency/common.py b/tests/e2e/concurrency/common.py new file mode 100644 index 000000000..208278929 --- /dev/null +++ b/tests/e2e/concurrency/common.py @@ -0,0 +1,60 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import typing + +import mgclient +import pytest + + +def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]: + cursor.execute(query, params) + return cursor.fetchall() + + +def execute_and_fetch_all_with_commit( + connection: mgclient.Connection, query: str, params: dict = {} +) -> typing.List[tuple]: + cursor = connection.cursor() + cursor.execute(query, params) + results = cursor.fetchall() + connection.commit() + return results + + +@pytest.fixture +def first_connection(**kwargs) -> mgclient.Connection: + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + cursor = connection.cursor() + execute_and_fetch_all(cursor, "USE DATABASE memgraph") + try: + execute_and_fetch_all(cursor, "DROP DATABASE clean") + except: + pass + execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n") + connection.autocommit = False + yield connection + + +@pytest.fixture +def second_connection(**kwargs) -> mgclient.Connection: + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + cursor = connection.cursor() + execute_and_fetch_all(cursor, "USE DATABASE memgraph") + try: + execute_and_fetch_all(cursor, "DROP DATABASE clean") + except: + pass + execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n") + connection.autocommit = False + yield connection diff --git a/tests/e2e/concurrency/concurrency.py b/tests/e2e/concurrency/concurrency.py new file mode 100644 index 000000000..7961c1984 --- /dev/null +++ b/tests/e2e/concurrency/concurrency.py @@ -0,0 +1,57 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import sys + +import pytest +from common import execute_and_fetch_all, first_connection, second_connection + + +def test_concurrency_if_no_delta_on_same_node_property_update(first_connection, second_connection): + m1c = first_connection.cursor() + m2c = second_connection.cursor() + + execute_and_fetch_all(m1c, "CREATE (:Node {prop: 1})") + first_connection.commit() + + test_has_error = False + try: + m1c.execute("MATCH (n) SET n.prop = 1") + m2c.execute("MATCH (n) SET n.prop = 1") + first_connection.commit() + second_connection.commit() + except Exception as e: + test_has_error = True + + assert test_has_error is False + + +def test_concurrency_if_no_delta_on_same_edge_property_update(first_connection, second_connection): + m1c = first_connection.cursor() + m2c = second_connection.cursor() + + execute_and_fetch_all(m1c, "CREATE ()-[:TYPE {prop: 1}]->()") + first_connection.commit() + + test_has_error = False + try: + m1c.execute("MATCH (n)-[r]->(m) SET r.prop = 1") + m2c.execute("MATCH (n)-[r]->(m) SET n.prop = 1") + first_connection.commit() + second_connection.commit() + except Exception as e: + test_has_error = True + + assert test_has_error is False + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/concurrency/workloads.yaml b/tests/e2e/concurrency/workloads.yaml new file mode 100644 index 000000000..839090538 --- /dev/null +++ b/tests/e2e/concurrency/workloads.yaml @@ -0,0 +1,14 @@ +concurrency_cluster: &concurrency_cluster + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE", "--storage-delta-on-identical-property-update=false"] + log_file: "concurrency.log" + setup_queries: [] + validation_queries: [] + + +workloads: + - name: "Concurrency" + binary: "tests/e2e/pytest_runner.sh" + args: ["concurrency/concurrency.py"] + <<: *concurrency_cluster diff --git a/tests/e2e/configuration/default_config.py b/tests/e2e/configuration/default_config.py index 65a850f0b..75c211e0f 100644 --- a/tests/e2e/configuration/default_config.py +++ b/tests/e2e/configuration/default_config.py @@ -141,6 +141,11 @@ startup_config_dict = { "1", "The time duration between two replica checks/pings. If < 1, replicas will NOT be checked at all. NOTE: The MAIN instance allocates a new thread for each REPLICA.", ), + "storage_delta_on_identical_property_update": ( + "true", + "true", + "Controls whether updating a property with the same value should create a delta object.", + ), "storage_gc_cycle_sec": ("30", "30", "Storage garbage collector interval (in seconds)."), "storage_python_gc_cycle_sec": ("180", "180", "Storage python full garbage collection interval (in seconds)."), "storage_items_per_batch": ( From 84fe8531691a56292553e1a99c067e840a4044ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Bari=C5=A1i=C4=87?= <48765171+MarkoBarisic@users.noreply.github.com> Date: Mon, 18 Mar 2024 10:47:59 +0100 Subject: [PATCH 09/16] Fix cargo not found when buidling in mgbuild container (#1825) *Add source /home/mg/.cargo/env before cmake and make commands in mgbuild.sh --- release/package/mgbuild.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/release/package/mgbuild.sh b/release/package/mgbuild.sh index 3ff2f4979..e24776f60 100755 --- a/release/package/mgbuild.sh +++ b/release/package/mgbuild.sh @@ -211,6 +211,7 @@ check_support() { build_memgraph () { local build_container="mgbuild_${toolchain_version}_${os}" local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local ACTIVATE_CARGO="source $MGBUILD_HOME_DIR/.cargo/env" local container_build_dir="$MGBUILD_ROOT_DIR/build" local container_output_dir="$container_build_dir/output" local arm_flag="" @@ -316,7 +317,7 @@ build_memgraph () { # Define cmake command local cmake_cmd="cmake $build_type_flag $arm_flag $community_flag $telemetry_id_override_flag $coverage_flag $asan_flag $ubsan_flag .." - docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $cmake_cmd" + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO && $cmake_cmd" # ' is used instead of " because we need to run make within the allowed # container resources. @@ -326,11 +327,11 @@ build_memgraph () { # support nproc # shellcheck disable=SC2016 if [[ "$threads" == 0 ]]; then - docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc)' - docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc) -B mgconsole' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO "'&& make -j$(nproc)' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO "'&& make -j$(nproc) -B mgconsole' else - docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$threads' - docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$threads -B mgconsole' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO "'&& make -j$threads' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO "'&& make -j$threads -B mgconsole' fi } @@ -396,6 +397,7 @@ copy_memgraph() { test_memgraph() { local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" local ACTIVATE_VENV="./setup.sh /opt/toolchain-${toolchain_version}/activate" + local ACTIVATE_CARGO="source $MGBUILD_HOME_DIR/.cargo/env" local EXPORT_LICENSE="export MEMGRAPH_ENTERPRISE_LICENSE=$enterprise_license" local EXPORT_ORG_NAME="export MEMGRAPH_ORGANIZATION_NAME=$organization_name" local BUILD_DIR="$MGBUILD_ROOT_DIR/build" @@ -481,7 +483,7 @@ test_memgraph() { # docker network connect --alias $kafka_hostname $build_container_network $kafka_container > /dev/null 2>&1 || echo "Kafka container already inside correct network or something went wrong ..." # docker network connect --alias $pulsar_hostname $build_container_network $pulsar_container > /dev/null 2>&1 || echo "Kafka container already inside correct network or something went wrong ..." docker exec -u mg $build_container bash -c "pip install --user networkx && pip3 install --user networkx" - docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests && $ACTIVATE_VENV && source ve3/bin/activate_e2e && cd $MGBUILD_ROOT_DIR/tests/e2e "'&& ./run.sh' + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && $ACTIVATE_CARGO && cd $MGBUILD_ROOT_DIR/tests && $ACTIVATE_VENV && source ve3/bin/activate_e2e && cd $MGBUILD_ROOT_DIR/tests/e2e "'&& ./run.sh' ;; *) echo "Error: Unknown test '$1'" From ec8536e11bf61ac649f03b0d3e9893fe9153d223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Bari=C5=A1i=C4=87?= <48765171+MarkoBarisic@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:58:34 +0100 Subject: [PATCH 10/16] Make diff run on push to master again (#1826) * Add workflow dispatch and run on push to master --- .github/workflows/diff.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 6c96cda80..4ddba622a 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -4,6 +4,10 @@ concurrency: cancel-in-progress: true on: + push: + branches: + - master + workflow_dispatch: pull_request: paths-ignore: - "docs/**" From 2ac649f3b5ff4db8e9fb568f0e945539fa4df9d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Bari=C5=A1i=C4=87?= <48765171+MarkoBarisic@users.noreply.github.com> Date: Mon, 18 Mar 2024 16:38:58 +0100 Subject: [PATCH 11/16] Upgrade jepsen (#1594) * Try with jepsen v0.3.5 * Add a few WIP adjustments * Add replication restore state on startup flag * Fix some run.sh scripts issues * Improve cluster commands * Run Jepsen on debian-12 with toolchain v5 --------- Co-authored-by: Marko Budiselic --- .github/workflows/diff.yaml | 4 +- environment/os/run.sh | 47 +++++++++++++------- tests/jepsen/jepsen_0.3.0.patch | 13 ------ tests/jepsen/project.clj | 8 ++-- tests/jepsen/run.sh | 37 +++++++-------- tests/jepsen/src/jepsen/memgraph/support.clj | 1 + 6 files changed, 54 insertions(+), 56 deletions(-) delete mode 100644 tests/jepsen/jepsen_0.3.0.patch diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 4ddba622a..49b7d4273 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -475,8 +475,8 @@ jobs: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} - OS: debian-10 - TOOLCHAIN: v4 + OS: debian-12 + TOOLCHAIN: v5 ARCH: amd BUILD_TYPE: RelWithDebInfo diff --git a/environment/os/run.sh b/environment/os/run.sh index e7c370f62..654636a5f 100755 --- a/environment/os/run.sh +++ b/environment/os/run.sh @@ -5,17 +5,20 @@ IFS=' ' # NOTE: docker_image_name could be local image build based on release/package images. # NOTE: each line has to be under quotes, docker_container_type, script_name and docker_image_name separate with a space. # "docker_container_type script_name docker_image_name" +# docker_container_type OPTIONS: +# * mgrun -> running plain/empty operating system for the purposes of testing native memgraph package +# * mgbuild -> running the builder container to build memgraph inside it -> it's possible create builder images using release/package/run.sh OPERATING_SYSTEMS=( - "mgrun amzn-2 amazonlinux:2" - "mgrun centos-7 centos:7" - "mgrun centos-9 dokken/centos-stream-9" - "mgrun debian-10 debian:10" - "mgrun debian-11 debian:11" - "mgrun fedora-36 fedora:36" - "mgrun ubuntu-18.04 ubuntu:18.04" - "mgrun ubuntu-20.04 ubuntu:20.04" - "mgrun ubuntu-22.04 ubuntu:22.04" - # "mgbuild centos-7 package-mgbuild_centos-7" + # "mgrun amzn-2 amazonlinux:2" + # "mgrun centos-7 centos:7" + # "mgrun centos-9 dokken/centos-stream-9" + # "mgrun debian-10 debian:10" + # "mgrun debian-11 debian:11" + # "mgrun fedora-36 fedora:36" + # "mgrun ubuntu-18.04 ubuntu:18.04" + # "mgrun ubuntu-20.04 ubuntu:20.04" + # "mgrun ubuntu-22.04 ubuntu:22.04" + # "mgbuild debian-12 memgraph/memgraph-builder:v5_debian-12" ) if [ ! "$(docker info)" ]; then @@ -33,14 +36,24 @@ print_help () { # NOTE: This is an idempotent operation! # TODO(gitbuda): Consider making docker_run always delete + start a new container or add a new function. docker_run () { - cnt_name="$1" - cnt_image="$2" + cnt_type="$1" + if [[ "$cnt_type" != "mgbuild" && "$cnt_type" != "mgrun" ]]; then + echo "ERROR: Wrong docker_container_type -> valid options are mgbuild, mgrun" + exit 1 + fi + cnt_name="$2" + cnt_image="$3" if [ ! "$(docker ps -q -f name=$cnt_name)" ]; then if [ "$(docker ps -aq -f status=exited -f name=$cnt_name)" ]; then echo "Cleanup of the old exited container..." docker rm $cnt_name fi - docker run -d --volume "$SCRIPT_DIR/../../:/memgraph" --network host --name "$cnt_name" "$cnt_image" sleep infinity + if [[ "$cnt_type" == "mgbuild" ]]; then + docker run -d --volume "$SCRIPT_DIR/../../:/memgraph" --network host --name "$cnt_name" "$cnt_image" + fi + if [[ "$cnt_type" == "mgrun" ]]; then + docker run -d --volume "$SCRIPT_DIR/../../:/memgraph" --network host --name "$cnt_name" "$cnt_image" sleep infinity + fi fi echo "The $cnt_image container is active under $cnt_name name!" } @@ -55,9 +68,9 @@ docker_stop_and_rm () { cnt_name="$1" if [ "$(docker ps -q -f name=$cnt_name)" ]; then docker stop "$1" - if [ "$(docker ps -aq -f status=exited -f name=$cnt_name)" ]; then - docker rm "$1" - fi + fi + if [ "$(docker ps -aq -f status=exited -f name=$cnt_name)" ]; then + docker rm "$1" fi } @@ -71,7 +84,7 @@ start_all () { docker_name="${docker_container_type}_$script_name" echo "" echo "~~~~ OPERATING ON $docker_image as $docker_name..." - docker_run "$docker_name" "$docker_image" + docker_run "$docker_container_type" "$docker_name" "$docker_image" docker_exec "$docker_name" "/memgraph/environment/os/$script_name.sh install NEW_DEPS" echo "---- DONE EVERYHING FOR $docker_image as $docker_name..." echo "" diff --git a/tests/jepsen/jepsen_0.3.0.patch b/tests/jepsen/jepsen_0.3.0.patch deleted file mode 100644 index be47cc8b4..000000000 --- a/tests/jepsen/jepsen_0.3.0.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/docker/control/Dockerfile b/docker/control/Dockerfile -index 6b2d3c0e..195a7a60 100644 ---- a/docker/control/Dockerfile -+++ b/docker/control/Dockerfile -@@ -7,7 +7,7 @@ ENV LEIN_ROOT true - # Jepsen dependencies - # - RUN apt-get -y -q update && \ -- apt-get install -qy openjdk-17-jdk-headless \ -+ apt-get install -qy ca-certificates-java openjdk-17-jdk-headless \ - libjna-java \ - vim \ - emacs \ diff --git a/tests/jepsen/project.clj b/tests/jepsen/project.clj index 506bcff52..445e57b41 100644 --- a/tests/jepsen/project.clj +++ b/tests/jepsen/project.clj @@ -5,10 +5,10 @@ :url "https://github.com/memgraph/memgraph/blob/master/release/LICENSE_ENTERPRISE.md"} :main jepsen.memgraph.core :dependencies [[org.clojure/clojure "1.10.0"] - ;; 0.2.4-SNAPSHOT but 0.3.0, for more -> https://clojars.org/jepsen/versions - [jepsen "0.2.4-SNAPSHOT"] + ;; Details under https://clojars.org/jepsen/versions. + [jepsen "0.3.5-SNAPSHOT"] [gorillalabs/neo4j-clj "4.1.0"]] :profiles {:test {:dependencies [#_[org.neo4j.test/neo4j-harness "4.1.0"]]}} - ;; required to run 0.3.0 - ; :aot :all + ;; The below line is required to run after Jepsen 0.3.0. + :aot :all :repl-options {:init-ns jepsen.memgraph.core}) diff --git a/tests/jepsen/run.sh b/tests/jepsen/run.sh index a1587c8a1..040491c3f 100755 --- a/tests/jepsen/run.sh +++ b/tests/jepsen/run.sh @@ -2,12 +2,10 @@ set -Eeuo pipefail script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -MEMGRAPH_BINARY_PATH="../../build/memgraph" +MEMGRAPH_BUILD_PATH="$script_dir/../../build" +MEMGRAPH_BINARY_PATH="$MEMGRAPH_BUILD_PATH/memgraph" # NOTE: Jepsen Git tags are not consistent, there are: 0.2.4, v0.3.0, 0.3.2, ... -# NOTE: On Ubuntu 22.04 v0.3.2 uses non-existing docker compose --compatibility flag. -# NOTE: On Ubuntu 22.04 v0.3.0 and v0.3.1 seems to be runnable. -# TODO(gitbuda): Make sure Memgraph can be testes with Jepsen >= 0.3.0 -JEPSEN_VERSION="${JEPSEN_VERSION:-0.2.4}" +JEPSEN_VERSION="${JEPSEN_VERSION:-v0.3.5}" JEPSEN_ACTIVE_NODES_NO=5 CONTROL_LEIN_RUN_ARGS="test-all --node-configs resources/node-config.edn" CONTROL_LEIN_RUN_STDOUT_LOGS=1 @@ -24,7 +22,7 @@ PRINT_CONTEXT() { HELP_EXIT() { echo "" - echo "HELP: $0 help|cluster-up|cluster-refresh|cluster-cleanup|cluster-dealloc|mgbuild|test|test-all-individually [args]" + echo "HELP: $0 help|cluster-up|cluster-refresh|cluster-nodes-cleanup|cluster-dealloc|mgbuild|test|test-all-individually [args]" echo "" echo " test args --binary MEMGRAPH_BINARY_PATH" echo " --ignore-run-stdout-logs Ignore lein run stdout logs." @@ -43,24 +41,18 @@ INFO() { /bin/echo -e "\e[104m\e[97m[INFO]\e[49m\e[39m" "$@" } +if [[ "$#" -lt 1 || "$1" == "-h" || "$1" == "--help" ]]; then + HELP_EXIT +fi + if ! command -v docker > /dev/null 2>&1 || ! command -v docker-compose > /dev/null 2>&1; then ERROR "docker and docker-compose have to be installed." exit 1 fi if [ ! -d "$script_dir/jepsen" ]; then + # TODO(deda): install apt get docker-compose-plugin on all build machines. git clone https://github.com/jepsen-io/jepsen.git -b "$JEPSEN_VERSION" "$script_dir/jepsen" - if [ "$JEPSEN_VERSION" == "v0.3.0" ]; then - if [ -f "$script_dir/jepsen_0.3.0.patch" ]; then - cd "$script_dir/jepsen" - git apply "$script_dir/jepsen_0.3.0.patch" - cd "$script_dir" - fi - fi -fi - -if [ "$#" -lt 1 ]; then - HELP_EXIT fi PROCESS_ARGS() { @@ -199,7 +191,7 @@ CLUSTER_UP() { } CLUSTER_DEALLOC() { - ps=$(docker ps --filter name=jepsen* --filter status=running -q) + ps=$(docker ps -a --filter name=jepsen* -q) if [[ ! -z ${ps} ]]; then echo "Killing ${ps}" docker rm -f ${ps} @@ -213,6 +205,7 @@ CLUSTER_DEALLOC() { else echo "No Jepsen containers detected!" fi + echo "Cluster dealloc DONE" } # Initialize testing context by copying source/binary files. Inside CI, @@ -239,7 +232,7 @@ case $1 in CLUSTER_DEALLOC ;; - cluster-cleanup) + cluster-nodes-cleanup) jepsen_control_exec="docker exec jepsen-control bash -c" INFO "Deleting /jepsen/memgraph/store/* on jepsen-control" $jepsen_control_exec "rm -rf /jepsen/memgraph/store/*" @@ -252,9 +245,13 @@ case $1 in ;; mgbuild) + PROCESS_ARGS "$@" PRINT_CONTEXT + # docker cp -L mgbuild_debian-12:/memgraph/build/memgraph "${MEMGRAPH_BUILD_PATH}/" + # NOTE: mgconsole is interesting inside jepsen container to inspect Memgraph state. + # docker cp -L mgbuild_debian-12:/usr/local/bin/mgconsole "${MEMGRAPH_BUILD_PATH}/" echo "" - echo "TODO(gitbuda): Build memgraph for Debian 10 via memgraph/memgraph-builder" + echo "TODO(gitbuda): Build memgraph for Jepsen (on v0.3.5 for Debian 12) via memgraph/memgraph-builder" exit 1 ;; diff --git a/tests/jepsen/src/jepsen/memgraph/support.clj b/tests/jepsen/src/jepsen/memgraph/support.clj index b7846f8ce..aec2aadb1 100644 --- a/tests/jepsen/src/jepsen/memgraph/support.clj +++ b/tests/jepsen/src/jepsen/memgraph/support.clj @@ -25,6 +25,7 @@ :--storage-recover-on-startup :--storage-wal-enabled :--storage-snapshot-interval-sec 300 + :--replication-restore-state-on-startup :--storage-properties-on-edges)) (defn stop-node! From 9629f10166255a026aa60a4b80efdad9876d9c37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ante=20Pu=C5=A1i=C4=87?= Date: Wed, 20 Mar 2024 10:29:24 +0100 Subject: [PATCH 12/16] Text search (#1603, #1739) Add text search: * named property search * all-property search * regex search * aggregation over search results Text search works with: * non-parallel transactions * durability (WAL files and snapshots) * multitenancy --- include/_mgp.hpp | 15 + include/mg_procedure.h | 32 +- include/mgp.hpp | 79 +++- libs/CMakeLists.txt | 26 ++ query_modules/CMakeLists.txt | 18 + query_modules/text_search_module.cpp | 149 ++++++ src/CMakeLists.txt | 2 +- src/dbms/dbms_handler.hpp | 2 +- src/dbms/inmemory/replication_handlers.cpp | 11 + src/flags/experimental.cpp | 11 +- src/flags/experimental.hpp | 3 +- src/flags/run_time_configurable.cpp | 12 +- src/flags/run_time_configurable.hpp | 2 +- src/query/db_accessor.hpp | 24 + src/query/dump.cpp | 34 ++ src/query/dump.hpp | 1 + src/query/exceptions.hpp | 13 + src/query/frontend/ast/ast.cpp | 3 + src/query/frontend/ast/ast.hpp | 31 ++ src/query/frontend/ast/ast_visitor.hpp | 13 +- .../frontend/ast/cypher_main_visitor.cpp | 22 + .../frontend/ast/cypher_main_visitor.hpp | 17 +- .../frontend/opencypher/grammar/Cypher.g4 | 9 + .../opencypher/grammar/CypherLexer.g4 | 1 + .../opencypher/grammar/MemgraphCypher.g4 | 1 + .../frontend/semantic/required_privileges.cpp | 2 + src/query/frontend/semantic/symbol.hpp | 2 +- src/query/interpreter.cpp | 86 +++- src/query/plan/operator.cpp | 25 + src/query/plan/vertex_count_cache.hpp | 2 +- src/query/procedure/mg_procedure_impl.cpp | 161 ++++++- src/query/procedure/mg_procedure_impl.hpp | 9 +- src/storage/v2/CMakeLists.txt | 4 +- src/storage/v2/disk/durable_metadata.cpp | 28 +- src/storage/v2/disk/durable_metadata.hpp | 6 +- src/storage/v2/disk/storage.cpp | 27 +- src/storage/v2/durability/durability.cpp | 37 +- src/storage/v2/durability/durability.hpp | 5 +- src/storage/v2/durability/marker.hpp | 4 + src/storage/v2/durability/metadata.hpp | 1 + src/storage/v2/durability/serialization.cpp | 4 + src/storage/v2/durability/snapshot.cpp | 30 ++ .../durability/storage_global_operation.hpp | 2 + src/storage/v2/durability/wal.cpp | 65 ++- src/storage/v2/durability/wal.hpp | 17 +- src/storage/v2/indices/indices.cpp | 1 + src/storage/v2/indices/indices.hpp | 6 +- src/storage/v2/indices/text_index.cpp | 430 ++++++++++++++++++ src/storage/v2/indices/text_index.hpp | 105 +++++ src/storage/v2/inmemory/storage.cpp | 41 +- src/storage/v2/inmemory/storage.hpp | 8 +- src/storage/v2/metadata_delta.hpp | 19 + src/storage/v2/property_store.cpp | 2 +- .../v2/replication/replication_client.cpp | 5 +- src/storage/v2/storage.cpp | 23 + src/storage/v2/storage.hpp | 31 ++ src/utils/event_counter.cpp | 3 +- src/utils/typeinfo.hpp | 1 + tests/e2e/configuration/default_config.py | 2 +- tests/e2e/text_search/CMakeLists.txt | 6 + tests/e2e/text_search/common.py | 87 ++++ tests/e2e/text_search/test_text_search.py | 206 +++++++++ .../text_search/test_text_search_disabled.py | 69 +++ tests/e2e/text_search/workloads.yaml | 33 ++ tests/unit/query_dump.cpp | 21 +- tests/unit/storage_v2_decoder_encoder.cpp | 2 + tests/unit/storage_v2_get_info.cpp | 1 + tests/unit/storage_v2_wal_file.cpp | 10 +- 68 files changed, 2088 insertions(+), 72 deletions(-) create mode 100644 query_modules/text_search_module.cpp create mode 100644 src/storage/v2/indices/text_index.cpp create mode 100644 src/storage/v2/indices/text_index.hpp create mode 100644 tests/e2e/text_search/CMakeLists.txt create mode 100644 tests/e2e/text_search/common.py create mode 100644 tests/e2e/text_search/test_text_search.py create mode 100644 tests/e2e/text_search/test_text_search_disabled.py create mode 100644 tests/e2e/text_search/workloads.yaml diff --git a/include/_mgp.hpp b/include/_mgp.hpp index 8b67bc36a..b1d9e26d5 100644 --- a/include/_mgp.hpp +++ b/include/_mgp.hpp @@ -326,6 +326,21 @@ inline mgp_vertex *graph_get_vertex_by_id(mgp_graph *g, mgp_vertex_id id, mgp_me return MgInvoke(mgp_graph_get_vertex_by_id, g, id, memory); } +inline bool graph_has_text_index(mgp_graph *graph, const char *index_name) { + return MgInvoke(mgp_graph_has_text_index, graph, index_name); +} + +inline mgp_map *graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query, + text_search_mode search_mode, mgp_memory *memory) { + return MgInvoke(mgp_graph_search_text_index, graph, index_name, search_query, search_mode, memory); +} + +inline mgp_map *graph_aggregate_over_text_index(mgp_graph *graph, const char *index_name, const char *search_query, + const char *aggregation_query, mgp_memory *memory) { + return MgInvoke(mgp_graph_aggregate_over_text_index, graph, index_name, search_query, aggregation_query, + memory); +} + inline mgp_vertices_iterator *graph_iter_vertices(mgp_graph *g, mgp_memory *memory) { return MgInvoke(mgp_graph_iter_vertices, g, memory); } diff --git a/include/mg_procedure.h b/include/mg_procedure.h index 93ef241d8..117dc66ab 100644 --- a/include/mg_procedure.h +++ b/include/mg_procedure.h @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -891,6 +891,36 @@ enum mgp_error mgp_edge_iter_properties(struct mgp_edge *e, struct mgp_memory *m enum mgp_error mgp_graph_get_vertex_by_id(struct mgp_graph *g, struct mgp_vertex_id id, struct mgp_memory *memory, struct mgp_vertex **result); +/// Result is non-zero if the index with the given name exists. +/// The current implementation always returns without errors. +enum mgp_error mgp_graph_has_text_index(struct mgp_graph *graph, const char *index_name, int *result); + +/// Available modes of searching text indices. +MGP_ENUM_CLASS text_search_mode{ + SPECIFIED_PROPERTIES, + REGEX, + ALL_PROPERTIES, +}; + +/// Search the named text index for the given query. The result is a map with the "search_results" and "error_msg" keys. +/// The "search_results" key contains the vertices whose text-indexed properties match the given query. +/// In case of a Tantivy error, the "search_results" key is absent, and "error_msg" contains the error message. +/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if there’s an allocation error while constructing the results map. +/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once. +enum mgp_error mgp_graph_search_text_index(struct mgp_graph *graph, const char *index_name, const char *search_query, + enum text_search_mode search_mode, struct mgp_memory *memory, + struct mgp_map **result); + +/// Aggregate over the results of a search over the named text index. The result is a map with the "aggregation_results" +/// and "error_msg" keys. +/// The "aggregation_results" key contains the vertices whose text-indexed properties match the given query. +/// In case of a Tantivy error, the "aggregation_results" key is absent, and "error_msg" contains the error message. +/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if there’s an allocation error while constructing the results map. +/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once. +enum mgp_error mgp_graph_aggregate_over_text_index(struct mgp_graph *graph, const char *index_name, + const char *search_query, const char *aggregation_query, + struct mgp_memory *memory, struct mgp_map **result); + /// Creates label index for given label. /// mgp_error::MGP_ERROR_NO_ERROR is always returned. /// if label index already exists, result will be 0, otherwise 1. diff --git a/include/mgp.hpp b/include/mgp.hpp index 3f7ed591e..f35231062 100644 --- a/include/mgp.hpp +++ b/include/mgp.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -32,6 +32,15 @@ namespace mgp { +class TextSearchException : public std::exception { + public: + explicit TextSearchException(std::string message) : message_(std::move(message)) {} + const char *what() const noexcept override { return message_.c_str(); } + + private: + std::string message_; +}; + class IndexException : public std::exception { public: explicit IndexException(std::string message) : message_(std::move(message)) {} @@ -4306,12 +4315,12 @@ inline void AddParamsReturnsToProc(mgp_proc *proc, std::vector ¶m } } // namespace detail -inline bool CreateLabelIndex(mgp_graph *memgaph_graph, const std::string_view label) { - return create_label_index(memgaph_graph, label.data()); +inline bool CreateLabelIndex(mgp_graph *memgraph_graph, const std::string_view label) { + return create_label_index(memgraph_graph, label.data()); } -inline bool DropLabelIndex(mgp_graph *memgaph_graph, const std::string_view label) { - return drop_label_index(memgaph_graph, label.data()); +inline bool DropLabelIndex(mgp_graph *memgraph_graph, const std::string_view label) { + return drop_label_index(memgraph_graph, label.data()); } inline List ListAllLabelIndices(mgp_graph *memgraph_graph) { @@ -4322,14 +4331,14 @@ inline List ListAllLabelIndices(mgp_graph *memgraph_graph) { return List(label_indices); } -inline bool CreateLabelPropertyIndex(mgp_graph *memgaph_graph, const std::string_view label, +inline bool CreateLabelPropertyIndex(mgp_graph *memgraph_graph, const std::string_view label, const std::string_view property) { - return create_label_property_index(memgaph_graph, label.data(), property.data()); + return create_label_property_index(memgraph_graph, label.data(), property.data()); } -inline bool DropLabelPropertyIndex(mgp_graph *memgaph_graph, const std::string_view label, +inline bool DropLabelPropertyIndex(mgp_graph *memgraph_graph, const std::string_view label, const std::string_view property) { - return drop_label_property_index(memgaph_graph, label.data(), property.data()); + return drop_label_property_index(memgraph_graph, label.data(), property.data()); } inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) { @@ -4340,6 +4349,58 @@ inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) { return List(label_property_indices); } +namespace { +constexpr std::string_view kErrorMsgKey = "error_msg"; +constexpr std::string_view kSearchResultsKey = "search_results"; +constexpr std::string_view kAggregationResultsKey = "aggregation_results"; +} // namespace + +inline List SearchTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, std::string_view search_query, + text_search_mode search_mode) { + auto results_or_error = Map(mgp::MemHandlerCallback(graph_search_text_index, memgraph_graph, index_name.data(), + search_query.data(), search_mode)); + if (results_or_error.KeyExists(kErrorMsgKey)) { + if (!results_or_error.At(kErrorMsgKey).IsString()) { + throw TextSearchException{"The error message is not a string!"}; + } + throw TextSearchException(results_or_error.At(kErrorMsgKey).ValueString().data()); + } + + if (!results_or_error.KeyExists(kSearchResultsKey)) { + throw TextSearchException{"Incomplete text index search results!"}; + } + + if (!results_or_error.At(kSearchResultsKey).IsList()) { + throw TextSearchException{"Text index search results have wrong type!"}; + } + + return results_or_error.At(kSearchResultsKey).ValueList(); +} + +inline std::string_view AggregateOverTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, + std::string_view search_query, std::string_view aggregation_query) { + auto results_or_error = + Map(mgp::MemHandlerCallback(graph_aggregate_over_text_index, memgraph_graph, index_name.data(), + search_query.data(), aggregation_query.data())); + + if (results_or_error.KeyExists(kErrorMsgKey)) { + if (!results_or_error.At(kErrorMsgKey).IsString()) { + throw TextSearchException{"The error message is not a string!"}; + } + throw TextSearchException(results_or_error.At(kErrorMsgKey).ValueString().data()); + } + + if (!results_or_error.KeyExists(kAggregationResultsKey)) { + throw TextSearchException{"Incomplete text index aggregation results!"}; + } + + if (!results_or_error.At(kAggregationResultsKey).IsString()) { + throw TextSearchException{"Text index aggregation results have wrong type!"}; + } + + return results_or_error.At(kAggregationResultsKey).ValueString(); +} + inline bool CreateExistenceConstraint(mgp_graph *memgraph_graph, const std::string_view label, const std::string_view property) { return create_existence_constraint(memgraph_graph, label.data(), property.data()); diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 7d568d548..ab6a313f1 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -295,6 +295,32 @@ set_path_external_library(jemalloc STATIC import_header_library(rangev3 ${CMAKE_CURRENT_SOURCE_DIR}/rangev3/include) +ExternalProject_Add(mgcxx-proj + PREFIX mgcxx-proj + GIT_REPOSITORY https://github.com/memgraph/mgcxx + GIT_TAG "v0.0.4" + CMAKE_ARGS + "-DCMAKE_INSTALL_PREFIX=" + "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" + "-DENABLE_TESTS=OFF" + INSTALL_DIR "${PROJECT_BINARY_DIR}/mgcxx" +) +ExternalProject_Get_Property(mgcxx-proj install_dir) +set(MGCXX_ROOT ${install_dir}) + +add_library(tantivy_text_search STATIC IMPORTED GLOBAL) +add_dependencies(tantivy_text_search mgcxx-proj) +set_property(TARGET tantivy_text_search PROPERTY IMPORTED_LOCATION ${MGCXX_ROOT}/lib/libtantivy_text_search.a) + +add_library(mgcxx_text_search STATIC IMPORTED GLOBAL) +add_dependencies(mgcxx_text_search mgcxx-proj) +set_property(TARGET mgcxx_text_search PROPERTY IMPORTED_LOCATION ${MGCXX_ROOT}/lib/libmgcxx_text_search.a) +# We need to create the include directory first in order to be able to add it +# as an include directory. The header files in the include directory will be +# generated later during the build process. +file(MAKE_DIRECTORY ${MGCXX_ROOT}/include) +set_property(TARGET mgcxx_text_search PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${MGCXX_ROOT}/include) + # Setup NuRaft import_external_library(nuraft STATIC ${CMAKE_CURRENT_SOURCE_DIR}/nuraft/lib/libnuraft.a diff --git a/query_modules/CMakeLists.txt b/query_modules/CMakeLists.txt index 41dbb495c..1336f3eb0 100644 --- a/query_modules/CMakeLists.txt +++ b/query_modules/CMakeLists.txt @@ -6,6 +6,8 @@ project(memgraph_query_modules) disallow_in_source_build() +find_package(fmt REQUIRED) + # Everything that is installed here, should be under the "query_modules" component. set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "query_modules") string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) @@ -58,6 +60,22 @@ install(PROGRAMS $ # Also install the source of the example, so user can read it. install(FILES schema.cpp DESTINATION lib/memgraph/query_modules/src) +add_library(text SHARED text_search_module.cpp) +target_include_directories(text PRIVATE ${CMAKE_SOURCE_DIR}/include) +target_compile_options(text PRIVATE -Wall) +target_link_libraries(text PRIVATE -static-libgcc -static-libstdc++ fmt::fmt) +# Strip C++ example in release build. +if (lower_build_type STREQUAL "release") + add_custom_command(TARGET text POST_BUILD + COMMAND strip -s $ + COMMENT "Stripping symbols and sections from the C++ text_search module") +endif() +install(PROGRAMS $ + DESTINATION lib/memgraph/query_modules + RENAME text.so) +# Also install the source of the example, so user can read it. +install(FILES text_search_module.cpp DESTINATION lib/memgraph/query_modules/src) + # Install the Python example and modules install(FILES example.py DESTINATION lib/memgraph/query_modules RENAME py_example.py) install(FILES graph_analyzer.py DESTINATION lib/memgraph/query_modules) diff --git a/query_modules/text_search_module.cpp b/query_modules/text_search_module.cpp new file mode 100644 index 000000000..8e4405058 --- /dev/null +++ b/query_modules/text_search_module.cpp @@ -0,0 +1,149 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include +#include + +#include + +#include + +namespace TextSearch { +constexpr std::string_view kProcedureSearch = "search"; +constexpr std::string_view kProcedureRegexSearch = "regex_search"; +constexpr std::string_view kProcedureSearchAllProperties = "search_all"; +constexpr std::string_view kProcedureAggregate = "aggregate"; +constexpr std::string_view kParameterIndexName = "index_name"; +constexpr std::string_view kParameterSearchQuery = "search_query"; +constexpr std::string_view kParameterAggregationQuery = "aggregation_query"; +constexpr std::string_view kReturnNode = "node"; +constexpr std::string_view kReturnAggregation = "aggregation"; +const std::string kSearchAllPrefix = "all"; + +void Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory); +void RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory); +void SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory); +void Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory); +} // namespace TextSearch + +void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + auto arguments = mgp::List(args); + + try { + const auto *index_name = arguments[0].ValueString().data(); + const auto *search_query = arguments[1].ValueString().data(); + for (const auto &node : + mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::SPECIFIED_PROPERTIES)) { + auto record = record_factory.NewRecord(); + record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); + } + } catch (const std::exception &e) { + record_factory.SetErrorMessage(e.what()); + } +} + +void TextSearch::RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + auto arguments = mgp::List(args); + + try { + const auto *index_name = arguments[0].ValueString().data(); + const auto *search_query = arguments[1].ValueString().data(); + for (const auto &node : mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::REGEX)) { + auto record = record_factory.NewRecord(); + record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); + } + } catch (const std::exception &e) { + record_factory.SetErrorMessage(e.what()); + } +} + +void TextSearch::SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, + mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + auto arguments = mgp::List(args); + + try { + const auto *index_name = arguments[0].ValueString().data(); + const auto *search_query = fmt::format("{}:{}", kSearchAllPrefix, arguments[1].ValueString()).data(); + for (const auto &node : + mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::ALL_PROPERTIES)) { + auto record = record_factory.NewRecord(); + record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); + } + } catch (const std::exception &e) { + record_factory.SetErrorMessage(e.what()); + } +} + +void TextSearch::Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + mgp::MemoryDispatcherGuard guard{memory}; + const auto record_factory = mgp::RecordFactory(result); + auto arguments = mgp::List(args); + + try { + const auto *index_name = arguments[0].ValueString().data(); + const auto *search_query = arguments[1].ValueString().data(); + const auto *aggregation_query = arguments[2].ValueString().data(); + const auto aggregation_result = + mgp::AggregateOverTextIndex(memgraph_graph, index_name, search_query, aggregation_query); + auto record = record_factory.NewRecord(); + record.Insert(TextSearch::kReturnAggregation.data(), aggregation_result.data()); + } catch (const std::exception &e) { + record_factory.SetErrorMessage(e.what()); + } +} + +extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) { + try { + mgp::MemoryDispatcherGuard guard{memory}; + + AddProcedure(TextSearch::Search, TextSearch::kProcedureSearch, mgp::ProcedureType::Read, + { + mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String), + }, + {mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory); + + AddProcedure(TextSearch::RegexSearch, TextSearch::kProcedureRegexSearch, mgp::ProcedureType::Read, + { + mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String), + }, + {mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory); + + AddProcedure(TextSearch::SearchAllProperties, TextSearch::kProcedureSearchAllProperties, mgp::ProcedureType::Read, + { + mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String), + }, + {mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory); + + AddProcedure(TextSearch::Aggregate, TextSearch::kProcedureAggregate, mgp::ProcedureType::Read, + { + mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String), + mgp::Parameter(TextSearch::kParameterAggregationQuery, mgp::Type::String), + }, + {mgp::Return(TextSearch::kReturnAggregation, mgp::Type::String)}, module, memory); + } catch (const std::exception &e) { + std::cerr << "Error while initializing query module: " << e.what() << std::endl; + return 1; + } + + return 0; +} + +extern "C" int mgp_shutdown_module() { return 0; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4d5d523c6..af88e624a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,7 +45,7 @@ set(mg_single_node_v2_sources add_executable(memgraph ${mg_single_node_v2_sources}) target_include_directories(memgraph PUBLIC ${CMAKE_SOURCE_DIR}/include) target_link_libraries(memgraph stdc++fs Threads::Threads - mg-telemetry mg-communication mg-communication-metrics mg-memory mg-utils mg-license mg-settings mg-glue mg-flags mg::system mg::replication_handler) + mg-telemetry mgcxx_text_search tantivy_text_search mg-communication mg-communication-metrics mg-memory mg-utils mg-license mg-settings mg-glue mg-flags mg::system mg::replication_handler) # NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which # should be dynamically exported, so that `dlopen` can correctly link th diff --git a/src/dbms/dbms_handler.hpp b/src/dbms/dbms_handler.hpp index b0bbd5758..482423ebf 100644 --- a/src/dbms/dbms_handler.hpp +++ b/src/dbms/dbms_handler.hpp @@ -311,7 +311,7 @@ class DbmsHandler { stats.triggers += info.triggers; stats.streams += info.streams; ++stats.num_databases; - stats.indices += storage_info.label_indices + storage_info.label_property_indices; + stats.indices += storage_info.label_indices + storage_info.label_property_indices + storage_info.text_indices; stats.constraints += storage_info.existence_constraints + storage_info.unique_constraints; ++stats.storage_modes[(int)storage_info.storage_mode]; ++stats.isolation_levels[(int)storage_info.isolation_level]; diff --git a/src/dbms/inmemory/replication_handlers.cpp b/src/dbms/inmemory/replication_handlers.cpp index 69f04914c..f9ce7a9d8 100644 --- a/src/dbms/inmemory/replication_handlers.cpp +++ b/src/dbms/inmemory/replication_handlers.cpp @@ -615,6 +615,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW); if (!vertex) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) auto ret = vertex->AddLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label)); if (ret.HasError() || !ret.GetValue()) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); @@ -627,6 +628,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW); if (!vertex) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) auto ret = vertex->RemoveLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label)); if (ret.HasError() || !ret.GetValue()) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); @@ -640,6 +642,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto vertex = transaction->FindVertex(delta.vertex_edge_set_property.gid, View::NEW); if (!vertex) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + // NOTE: Phase 1 of the text search feature doesn't have replication in scope auto ret = vertex->SetProperty(transaction->NameToProperty(delta.vertex_edge_set_property.property), delta.vertex_edge_set_property.value); if (ret.HasError()) @@ -853,6 +856,14 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } + case WalDeltaData::Type::TEXT_INDEX_CREATE: { + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) + break; + } + case WalDeltaData::Type::TEXT_INDEX_DROP: { + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) + break; + } case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: { spdlog::trace(" Create existence constraint on :{} ({})", delta.operation_label_property.label, delta.operation_label_property.property); diff --git a/src/flags/experimental.cpp b/src/flags/experimental.cpp index 123903c96..8c29142a1 100644 --- a/src/flags/experimental.cpp +++ b/src/flags/experimental.cpp @@ -18,14 +18,15 @@ // Bolt server flags. // NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_string(experimental_enabled, "", - "Experimental features to be used, comma seperated. Options [system-replication, high-availability]"); - +DEFINE_string( + experimental_enabled, "", + "Experimental features to be used, comma-separated. Options [system-replication, text-search, high-availability]"); using namespace std::string_view_literals; namespace memgraph::flags { auto const mapping = std::map{std::pair{"system-replication"sv, Experiments::SYSTEM_REPLICATION}, + std::pair{"text-search"sv, Experiments::TEXT_SEARCH}, std::pair{"high-availability"sv, Experiments::HIGH_AVAILABILITY}}; auto ExperimentsInstance() -> Experiments & { @@ -45,7 +46,7 @@ bool AreExperimentsEnabled(Experiments experiments) { void InitializeExperimental() { namespace rv = ranges::views; - auto const connonicalize_string = [](auto &&rng) { + auto const canonicalize_string = [](auto &&rng) { auto const is_space = [](auto c) { return c == ' '; }; auto const to_lower = [](unsigned char c) { return std::tolower(c); }; @@ -56,7 +57,7 @@ void InitializeExperimental() { auto const mapping_end = mapping.cend(); using underlying_type = std::underlying_type_t; auto to_set = underlying_type{}; - for (auto &&experiment : FLAGS_experimental_enabled | rv::split(',') | rv::transform(connonicalize_string)) { + for (auto &&experiment : FLAGS_experimental_enabled | rv::split(',') | rv::transform(canonicalize_string)) { if (auto it = mapping.find(experiment); it != mapping_end) { to_set |= static_cast(it->second); } diff --git a/src/flags/experimental.hpp b/src/flags/experimental.hpp index 5a19889fe..0b209a4e8 100644 --- a/src/flags/experimental.hpp +++ b/src/flags/experimental.hpp @@ -23,7 +23,8 @@ namespace memgraph::flags { // old experiments can be reused once code cleanup has happened enum class Experiments : uint8_t { SYSTEM_REPLICATION = 1 << 0, - HIGH_AVAILABILITY = 1 << 1, + TEXT_SEARCH = 1 << 1, + HIGH_AVAILABILITY = 1 << 2, }; bool AreExperimentsEnabled(Experiments experiments); diff --git a/src/flags/run_time_configurable.cpp b/src/flags/run_time_configurable.cpp index a42ebd3d0..6c0fc54ac 100644 --- a/src/flags/run_time_configurable.cpp +++ b/src/flags/run_time_configurable.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -73,11 +73,11 @@ constexpr auto kLogToStderrGFlagsKey = "also_log_to_stderr"; constexpr auto kCartesianProductEnabledSettingKey = "cartesian-product-enabled"; constexpr auto kCartesianProductEnabledGFlagsKey = "cartesian-product-enabled"; -// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -std::atomic execution_timeout_sec_; // Local cache-like thing - -// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -std::atomic cartesian_product_enabled_{true}; // Local cache-like thing +// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables) +// Local cache-like thing +std::atomic execution_timeout_sec_; +std::atomic cartesian_product_enabled_{true}; +// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables) auto ToLLEnum(std::string_view val) { const auto ll_enum = memgraph::flags::LogLevelToEnum(val); diff --git a/src/flags/run_time_configurable.hpp b/src/flags/run_time_configurable.hpp index 944a0539f..b215d6540 100644 --- a/src/flags/run_time_configurable.hpp +++ b/src/flags/run_time_configurable.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index 915ea9936..ee4988e4a 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -634,6 +634,24 @@ class DbAccessor final { bool EdgeTypeIndexExists(storage::EdgeTypeId edge_type) const { return accessor_->EdgeTypeIndexExists(edge_type); } + bool TextIndexExists(const std::string &index_name) const { return accessor_->TextIndexExists(index_name); } + + void TextIndexAddVertex(const VertexAccessor &vertex) { accessor_->TextIndexAddVertex(vertex.impl_); } + + void TextIndexUpdateVertex(const VertexAccessor &vertex, const std::vector &removed_labels = {}) { + accessor_->TextIndexUpdateVertex(vertex.impl_, removed_labels); + } + + std::vector TextIndexSearch(const std::string &index_name, const std::string &search_query, + text_search_mode search_mode) const { + return accessor_->TextIndexSearch(index_name, search_query, search_mode); + } + + std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query, + const std::string &aggregation_query) const { + return accessor_->TextIndexAggregate(index_name, search_query, aggregation_query); + } + std::optional GetIndexStats(const storage::LabelId &label) const { return accessor_->GetIndexStats(label); } @@ -717,6 +735,12 @@ class DbAccessor final { return accessor_->DropIndex(edge_type); } + void CreateTextIndex(const std::string &index_name, storage::LabelId label) { + accessor_->CreateTextIndex(index_name, label, this); + } + + void DropTextIndex(const std::string &index_name) { accessor_->DropTextIndex(index_name); } + utils::BasicResult CreateExistenceConstraint( storage::LabelId label, storage::PropertyId property) { return accessor_->CreateExistenceConstraint(label, property); diff --git a/src/query/dump.cpp b/src/query/dump.cpp index f1dd08c8d..abc147ee8 100644 --- a/src/query/dump.cpp +++ b/src/query/dump.cpp @@ -252,6 +252,10 @@ void DumpLabelPropertyIndex(std::ostream *os, query::DbAccessor *dba, storage::L << ");"; } +void DumpTextIndex(std::ostream *os, query::DbAccessor *dba, const std::string &index_name, storage::LabelId label) { + *os << "CREATE TEXT INDEX " << EscapeName(index_name) << " ON :" << EscapeName(dba->LabelToName(label)) << ";"; +} + void DumpExistenceConstraint(std::ostream *os, query::DbAccessor *dba, storage::LabelId label, storage::PropertyId property) { *os << "CREATE CONSTRAINT ON (u:" << EscapeName(dba->LabelToName(label)) << ") ASSERT EXISTS (u." @@ -286,6 +290,8 @@ PullPlanDump::PullPlanDump(DbAccessor *dba, dbms::DatabaseAccess db_acc) CreateLabelIndicesPullChunk(), // Dump all label property indices CreateLabelPropertyIndicesPullChunk(), + // Dump all text indices + CreateTextIndicesPullChunk(), // Dump all existence constraints CreateExistenceConstraintsPullChunk(), // Dump all unique constraints @@ -412,6 +418,34 @@ PullPlanDump::PullChunk PullPlanDump::CreateLabelPropertyIndicesPullChunk() { }; } +PullPlanDump::PullChunk PullPlanDump::CreateTextIndicesPullChunk() { + // Dump all text indices + return [this, global_index = 0U](AnyStream *stream, std::optional n) mutable -> std::optional { + // Delay the construction of indices vectors + if (!indices_info_) { + indices_info_.emplace(dba_->ListAllIndices()); + } + const auto &text = indices_info_->text_indices; + + size_t local_counter = 0; + while (global_index < text.size() && (!n || local_counter < *n)) { + std::ostringstream os; + const auto &text_index = text[global_index]; + DumpTextIndex(&os, dba_, text_index.first, text_index.second); + stream->Result({TypedValue(os.str())}); + + ++global_index; + ++local_counter; + } + + if (global_index == text.size()) { + return local_counter; + } + + return std::nullopt; + }; +} + PullPlanDump::PullChunk PullPlanDump::CreateExistenceConstraintsPullChunk() { return [this, global_index = 0U](AnyStream *stream, std::optional n) mutable -> std::optional { // Delay the construction of constraint vectors diff --git a/src/query/dump.hpp b/src/query/dump.hpp index 05bd42967..0cf4a82a6 100644 --- a/src/query/dump.hpp +++ b/src/query/dump.hpp @@ -55,6 +55,7 @@ struct PullPlanDump { PullChunk CreateLabelIndicesPullChunk(); PullChunk CreateLabelPropertyIndicesPullChunk(); + PullChunk CreateTextIndicesPullChunk(); PullChunk CreateExistenceConstraintsPullChunk(); PullChunk CreateUniqueConstraintsPullChunk(); PullChunk CreateInternalIndexPullChunk(); diff --git a/src/query/exceptions.hpp b/src/query/exceptions.hpp index 147dc8710..a4c25fbae 100644 --- a/src/query/exceptions.hpp +++ b/src/query/exceptions.hpp @@ -433,4 +433,17 @@ class MultiDatabaseQueryInMulticommandTxException : public QueryException { SPECIALIZE_GET_EXCEPTION_NAME(MultiDatabaseQueryInMulticommandTxException) }; +class TextSearchException : public QueryException { + using QueryException::QueryException; + SPECIALIZE_GET_EXCEPTION_NAME(TextSearchException) +}; + +class TextSearchDisabledException : public TextSearchException { + public: + TextSearchDisabledException() + : TextSearchException( + "To use text indices and text search, start Memgraph with the experimental text search feature enabled.") {} + SPECIALIZE_GET_EXCEPTION_NAME(TextSearchDisabledException) +}; + } // namespace memgraph::query diff --git a/src/query/frontend/ast/ast.cpp b/src/query/frontend/ast/ast.cpp index 7da5c09a0..f0d09d453 100644 --- a/src/query/frontend/ast/ast.cpp +++ b/src/query/frontend/ast/ast.cpp @@ -189,6 +189,9 @@ constexpr utils::TypeInfo query::IndexQuery::kType{utils::TypeId::AST_INDEX_QUER constexpr utils::TypeInfo query::EdgeIndexQuery::kType{utils::TypeId::AST_EDGE_INDEX_QUERY, "EdgeIndexQuery", &query::Query::kType}; +constexpr utils::TypeInfo query::TextIndexQuery::kType{utils::TypeId::AST_TEXT_INDEX_QUERY, "TextIndexQuery", + &query::Query::kType}; + constexpr utils::TypeInfo query::Create::kType{utils::TypeId::AST_CREATE, "Create", &query::Clause::kType}; constexpr utils::TypeInfo query::CallProcedure::kType{utils::TypeId::AST_CALL_PROCEDURE, "CallProcedure", diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index 29f7be3cf..e3d7bc0b2 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -2273,6 +2273,37 @@ class EdgeIndexQuery : public memgraph::query::Query { friend class AstStorage; }; +class TextIndexQuery : public memgraph::query::Query { + public: + static const utils::TypeInfo kType; + const utils::TypeInfo &GetTypeInfo() const override { return kType; } + + enum class Action { CREATE, DROP }; + + TextIndexQuery() = default; + + DEFVISITABLE(QueryVisitor); + + memgraph::query::TextIndexQuery::Action action_; + memgraph::query::LabelIx label_; + std::string index_name_; + + TextIndexQuery *Clone(AstStorage *storage) const override { + TextIndexQuery *object = storage->Create(); + object->action_ = action_; + object->label_ = storage->GetLabelIx(label_.name); + object->index_name_ = index_name_; + return object; + } + + protected: + TextIndexQuery(Action action, LabelIx label, std::string index_name) + : action_(action), label_(std::move(label)), index_name_(index_name) {} + + private: + friend class AstStorage; +}; + class Create : public memgraph::query::Clause { public: static const utils::TypeInfo kType; diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index bf11878da..cc6aed138 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -83,6 +83,7 @@ class ExplainQuery; class ProfileQuery; class IndexQuery; class EdgeIndexQuery; +class TextIndexQuery; class DatabaseInfoQuery; class SystemInfoQuery; class ConstraintQuery; @@ -144,11 +145,11 @@ class ExpressionVisitor template class QueryVisitor - : public utils::Visitor {}; + : public utils::Visitor {}; } // namespace memgraph::query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 6da48c97c..35ccb3670 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -243,6 +243,13 @@ antlrcpp::Any CypherMainVisitor::visitIndexQuery(MemgraphCypher::IndexQueryConte return index_query; } +antlrcpp::Any CypherMainVisitor::visitTextIndexQuery(MemgraphCypher::TextIndexQueryContext *ctx) { + MG_ASSERT(ctx->children.size() == 1, "TextIndexQuery should have exactly one child!"); + auto *text_index_query = std::any_cast(ctx->children[0]->accept(this)); + query_ = text_index_query; + return text_index_query; +} + antlrcpp::Any CypherMainVisitor::visitCreateIndex(MemgraphCypher::CreateIndexContext *ctx) { auto *index_query = storage_->Create(); index_query->action_ = IndexQuery::Action::CREATE; @@ -286,6 +293,21 @@ antlrcpp::Any CypherMainVisitor::visitDropEdgeIndex(MemgraphCypher::DropEdgeInde return index_query; } +antlrcpp::Any CypherMainVisitor::visitCreateTextIndex(MemgraphCypher::CreateTextIndexContext *ctx) { + auto *index_query = storage_->Create(); + index_query->index_name_ = std::any_cast(ctx->indexName()->accept(this)); + index_query->action_ = TextIndexQuery::Action::CREATE; + index_query->label_ = AddLabel(std::any_cast(ctx->labelName()->accept(this))); + return index_query; +} + +antlrcpp::Any CypherMainVisitor::visitDropTextIndex(MemgraphCypher::DropTextIndexContext *ctx) { + auto *index_query = storage_->Create(); + index_query->index_name_ = std::any_cast(ctx->indexName()->accept(this)); + index_query->action_ = TextIndexQuery::Action::DROP; + return index_query; +} + antlrcpp::Any CypherMainVisitor::visitAuthQuery(MemgraphCypher::AuthQueryContext *ctx) { MG_ASSERT(ctx->children.size() == 1, "AuthQuery should have exactly one child!"); auto *auth_query = std::any_cast(ctx->children[0]->accept(this)); diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 8c65345c8..53738af61 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -153,6 +153,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitEdgeIndexQuery(MemgraphCypher::EdgeIndexQueryContext *ctx) override; + /** + * @return TextIndexQuery* + */ + antlrcpp::Any visitTextIndexQuery(MemgraphCypher::TextIndexQueryContext *ctx) override; + /** * @return ExplainQuery* */ @@ -500,7 +505,7 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { antlrcpp::Any visitCreateIndex(MemgraphCypher::CreateIndexContext *ctx) override; /** - * @return DropIndex* + * @return IndexQuery* */ antlrcpp::Any visitDropIndex(MemgraphCypher::DropIndexContext *ctx) override; @@ -514,6 +519,16 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitDropEdgeIndex(MemgraphCypher::DropEdgeIndexContext *ctx) override; + /** + * @return TextIndexQuery* + */ + antlrcpp::Any visitCreateTextIndex(MemgraphCypher::CreateTextIndexContext *ctx) override; + + /** + * @return TextIndexQuery* + */ + antlrcpp::Any visitDropTextIndex(MemgraphCypher::DropTextIndexContext *ctx) override; + /** * @return AuthQuery* */ diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index 7fa218598..911615314 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -25,6 +25,7 @@ statement : query ; query : cypherQuery | indexQuery + | textIndexQuery | explainQuery | profileQuery | databaseInfoQuery @@ -65,6 +66,8 @@ cypherQuery : singleQuery ( cypherUnion )* ( queryMemoryLimit )? ; indexQuery : createIndex | dropIndex; +textIndexQuery : createTextIndex | dropTextIndex; + singleQuery : clause ( clause )* ; cypherUnion : ( UNION ALL singleQuery ) @@ -342,6 +345,12 @@ createIndex : CREATE INDEX ON ':' labelName ( '(' propertyKeyName ')' )? ; dropIndex : DROP INDEX ON ':' labelName ( '(' propertyKeyName ')' )? ; +indexName : symbolicName ; + +createTextIndex : CREATE TEXT INDEX indexName ON ':' labelName ; + +dropTextIndex : DROP TEXT INDEX indexName ; + doubleLiteral : FloatingLiteral ; cypherKeyword : ALL diff --git a/src/query/frontend/opencypher/grammar/CypherLexer.g4 b/src/query/frontend/opencypher/grammar/CypherLexer.g4 index 3e3c640d6..fb8a30b0f 100644 --- a/src/query/frontend/opencypher/grammar/CypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/CypherLexer.g4 @@ -131,6 +131,7 @@ SHOW : S H O W ; SINGLE : S I N G L E ; STARTS : S T A R T S ; STORAGE : S T O R A G E ; +TEXT : T E X T ; THEN : T H E N ; TRUE : T R U E ; UNION : U N I O N ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index 378310c22..ad15d6213 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -134,6 +134,7 @@ symbolicName : UnescapedSymbolicName query : cypherQuery | indexQuery | edgeIndexQuery + | textIndexQuery | explainQuery | profileQuery | databaseInfoQuery diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index 15726e3e2..d87fcb10e 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -29,6 +29,8 @@ class PrivilegeExtractor : public QueryVisitor, public HierarchicalTreeVis void Visit(EdgeIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } + void Visit(TextIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } + void Visit(AnalyzeGraphQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } void Visit(AuthQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::AUTH); } diff --git a/src/query/frontend/semantic/symbol.hpp b/src/query/frontend/semantic/symbol.hpp index 0cfb86608..1a5aa2756 100644 --- a/src/query/frontend/semantic/symbol.hpp +++ b/src/query/frontend/semantic/symbol.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 1322a7b99..332054485 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -39,6 +39,7 @@ #include "dbms/dbms_handler.hpp" #include "dbms/global.hpp" #include "dbms/inmemory/storage_helper.hpp" +#include "flags/experimental.hpp" #include "flags/replication.hpp" #include "flags/run_time_configurable.hpp" #include "glue/communication.hpp" @@ -2709,6 +2710,75 @@ PreparedQuery PrepareEdgeIndexQuery(ParsedQuery parsed_query, bool in_explicit_t RWType::W}; } +PreparedQuery PrepareTextIndexQuery(ParsedQuery parsed_query, bool in_explicit_transaction, + std::vector *notifications, CurrentDB ¤t_db) { + if (in_explicit_transaction) { + throw IndexInMulticommandTxException(); + } + + auto *text_index_query = utils::Downcast(parsed_query.query); + std::function handler; + + // TODO: we will need transaction for replication + MG_ASSERT(current_db.db_acc_, "Text index query expects a current DB"); + auto &db_acc = *current_db.db_acc_; + + MG_ASSERT(current_db.db_transactional_accessor_, "Text index query expects a current DB transaction"); + auto *dba = &*current_db.execution_db_accessor_; + + // Creating an index influences computed plan costs. + auto invalidate_plan_cache = [plan_cache = db_acc->plan_cache()] { + plan_cache->WithLock([&](auto &cache) { cache.reset(); }); + }; + + auto *storage = db_acc->storage(); + auto label = storage->NameToLabel(text_index_query->label_.name); + auto &index_name = text_index_query->index_name_; + + Notification index_notification(SeverityLevel::INFO); + switch (text_index_query->action_) { + case TextIndexQuery::Action::CREATE: { + index_notification.code = NotificationCode::CREATE_INDEX; + index_notification.title = fmt::format("Created text index on label {}.", text_index_query->label_.name); + // TODO: not just storage + invalidate_plan_cache. Need a DB transaction (for replication) + handler = [dba, label, index_name, + invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw TextSearchDisabledException(); + } + dba->CreateTextIndex(index_name, label); + utils::OnScopeExit invalidator(invalidate_plan_cache); + }; + break; + } + case TextIndexQuery::Action::DROP: { + index_notification.code = NotificationCode::DROP_INDEX; + index_notification.title = fmt::format("Dropped text index on label {}.", text_index_query->label_.name); + // TODO: not just storage + invalidate_plan_cache. Need a DB transaction (for replication) + handler = [dba, index_name, + invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw TextSearchDisabledException(); + } + dba->DropTextIndex(index_name); + utils::OnScopeExit invalidator(invalidate_plan_cache); + }; + break; + } + } + + return PreparedQuery{ + {}, + std::move(parsed_query.required_privileges), + [handler = std::move(handler), notifications, index_notification = std::move(index_notification)]( + AnyStream * /*stream*/, std::optional /*unused*/) mutable { + handler(index_notification); + notifications->push_back(index_notification); + return QueryHandlerResult::COMMIT; // TODO: Will need to become COMMIT when we fix replication + }, + RWType::W}; +} + PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transaction, InterpreterContext *interpreter_context, Interpreter &interpreter) { if (in_explicit_transaction) { @@ -3499,7 +3569,7 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici } MG_ASSERT(current_db.db_acc_, "Database info query expects a current DB"); - MG_ASSERT(current_db.db_transactional_accessor_, "Database ifo query expects a current DB transaction"); + MG_ASSERT(current_db.db_transactional_accessor_, "Database info query expects a current DB transaction"); auto *dba = &*current_db.execution_db_accessor_; auto *info_query = utils::Downcast(parsed_query.query); @@ -3514,10 +3584,11 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici const std::string_view label_index_mark{"label"}; const std::string_view label_property_index_mark{"label+property"}; const std::string_view edge_type_index_mark{"edge-type"}; + const std::string_view text_index_mark{"text"}; auto info = dba->ListAllIndices(); auto storage_acc = database->Access(); std::vector> results; - results.reserve(info.label.size() + info.label_property.size()); + results.reserve(info.label.size() + info.label_property.size() + info.text_indices.size()); for (const auto &item : info.label) { results.push_back({TypedValue(label_index_mark), TypedValue(storage->LabelToName(item)), TypedValue(), TypedValue(static_cast(storage_acc->ApproximateVertexCount(item)))}); @@ -3532,6 +3603,10 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici results.push_back({TypedValue(edge_type_index_mark), TypedValue(storage->EdgeTypeToName(item)), TypedValue(), TypedValue(static_cast(storage_acc->ApproximateEdgeCount(item)))}); } + for (const auto &[index_name, label] : info.text_indices) { + results.push_back({TypedValue(fmt::format("{} (name: {})", text_index_mark, index_name)), + TypedValue(storage->LabelToName(label)), TypedValue(), TypedValue()}); + } std::sort(results.begin(), results.end(), [&label_index_mark](const auto &record_1, const auto &record_2) { const auto type_1 = record_1[0].ValueString(); const auto type_2 = record_2[0].ValueString(); @@ -4293,13 +4368,15 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || - utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query); + utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || + utils::Downcast(parsed_query.query); if (!in_explicit_transaction_ && requires_db_transaction) { // TODO: ATM only a single database, will change when we have multiple database transactions bool could_commit = utils::Downcast(parsed_query.query) != nullptr; bool unique = utils::Downcast(parsed_query.query) != nullptr || utils::Downcast(parsed_query.query) != nullptr || + utils::Downcast(parsed_query.query) != nullptr || utils::Downcast(parsed_query.query) != nullptr || upper_case_query.find(kSchemaAssert) != std::string::npos; SetupDatabaseTransaction(could_commit, unique); @@ -4337,6 +4414,9 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareEdgeIndexQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->notifications, current_db_); + } else if (utils::Downcast(parsed_query.query)) { + prepared_query = PrepareTextIndexQuery(std::move(parsed_query), in_explicit_transaction_, + &query_execution->notifications, current_db_); } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareAnalyzeGraphQuery(std::move(parsed_query), in_explicit_transaction_, current_db_); } else if (utils::Downcast(parsed_query.query)) { diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 2b970cf49..ff6c1dc9a 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -32,6 +32,7 @@ #include "spdlog/spdlog.h" #include "csv/parsing.hpp" +#include "flags/experimental.hpp" #include "license/license.hpp" #include "query/auth_checker.hpp" #include "query/context.hpp" @@ -266,6 +267,10 @@ VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *fram } MultiPropsInitChecked(&new_node, properties); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexAddVertex(new_node); + } + (*frame)[node_info.symbol] = new_node; return (*frame)[node_info.symbol].ValueVertex(); } @@ -2991,6 +2996,9 @@ bool SetProperty::SetPropertyCursor::Pull(Frame &frame, ExecutionContext &contex context.trigger_context_collector->RegisterSetObjectProperty(lhs.ValueVertex(), self_.property_, TypedValue{std::move(old_value)}, TypedValue{rhs}); } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex()); + } break; } case TypedValue::Type::Edge: { @@ -3147,6 +3155,9 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr case TypedValue::Type::Vertex: { PropertiesMap new_properties = get_props(rhs.ValueVertex()); update_props(new_properties); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context->db_accessor->TextIndexUpdateVertex(rhs.ValueVertex()); + } break; } case TypedValue::Type::Map: { @@ -3204,6 +3215,9 @@ bool SetProperties::SetPropertiesCursor::Pull(Frame &frame, ExecutionContext &co } #endif SetPropertiesOnRecord(&lhs.ValueVertex(), rhs, self_.op_, &context, cached_name_id_); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex()); + } break; case TypedValue::Type::Edge: #ifdef MG_ENTERPRISE @@ -3295,6 +3309,10 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { } } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(vertex); + } + return true; } @@ -3366,6 +3384,9 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext & } #endif remove_prop(&lhs.ValueVertex()); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex()); + } break; case TypedValue::Type::Edge: #ifdef MG_ENTERPRISE @@ -3458,6 +3479,10 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont } } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + context.db_accessor->TextIndexUpdateVertex(vertex, EvaluateLabels(self_.labels_, evaluator, context.db_accessor)); + } + return true; } diff --git a/src/query/plan/vertex_count_cache.hpp b/src/query/plan/vertex_count_cache.hpp index 802f4e09f..69e002c0a 100644 --- a/src/query/plan/vertex_count_cache.hpp +++ b/src/query/plan/vertex_count_cache.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/procedure/mg_procedure_impl.cpp b/src/query/procedure/mg_procedure_impl.cpp index d6ce3c7b7..a2bc23aa3 100644 --- a/src/query/procedure/mg_procedure_impl.cpp +++ b/src/query/procedure/mg_procedure_impl.cpp @@ -23,6 +23,8 @@ #include #include +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "license/license.hpp" #include "mg_procedure.h" #include "module.hpp" @@ -32,6 +34,7 @@ #include "query/procedure/fmt.hpp" #include "query/procedure/mg_procedure_helpers.hpp" #include "query/stream/common.hpp" +#include "storage/v2/indices/text_index.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/storage_mode.hpp" #include "storage/v2/view.hpp" @@ -1843,6 +1846,11 @@ mgp_error mgp_vertex_set_property(struct mgp_vertex *v, const char *property_nam const auto result = std::visit( [prop_key, property_value](auto &impl) { return impl.SetProperty(prop_key, ToPropertyValue(*property_value)); }, v->impl); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) { + auto v_impl = v->getImpl(); + v->graph->getImpl()->TextIndexUpdateVertex(v_impl); + } + if (result.HasError()) { switch (result.GetError()) { case memgraph::storage::Error::DELETED_OBJECT: @@ -1899,6 +1907,11 @@ mgp_error mgp_vertex_set_properties(struct mgp_vertex *v, struct mgp_map *proper } const auto result = v->getImpl().UpdateProperties(props); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) { + auto v_impl = v->getImpl(); + v->graph->getImpl()->TextIndexUpdateVertex(v_impl); + } + if (result.HasError()) { switch (result.GetError()) { case memgraph::storage::Error::DELETED_OBJECT: @@ -1956,6 +1969,10 @@ mgp_error mgp_vertex_add_label(struct mgp_vertex *v, mgp_label label) { } const auto result = std::visit([label_id](auto &impl) { return impl.AddLabel(label_id); }, v->impl); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) { + auto v_impl = v->getImpl(); + v->graph->getImpl()->TextIndexUpdateVertex(v_impl); + } if (result.HasError()) { switch (result.GetError()) { @@ -1998,6 +2015,10 @@ mgp_error mgp_vertex_remove_label(struct mgp_vertex *v, mgp_label label) { throw ImmutableObjectException{"Cannot remove a label from an immutable vertex!"}; } const auto result = std::visit([label_id](auto &impl) { return impl.RemoveLabel(label_id); }, v->impl); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) { + auto v_impl = v->getImpl(); + v->graph->getImpl()->TextIndexUpdateVertex(v_impl, {label_id}); + } if (result.HasError()) { switch (result.GetError()) { @@ -2590,7 +2611,7 @@ mgp_error mgp_edge_iter_properties(mgp_edge *e, mgp_memory *memory, mgp_properti mgp_error mgp_graph_get_vertex_by_id(mgp_graph *graph, mgp_vertex_id id, mgp_memory *memory, mgp_vertex **result) { return WrapExceptions( [graph, id, memory]() -> mgp_vertex * { - std::optional maybe_vertex = std::visit( + auto maybe_vertex = std::visit( [graph, id](auto *impl) { return impl->FindVertex(memgraph::storage::Gid::FromInt(id.as_int), graph->view); }, @@ -2967,6 +2988,10 @@ mgp_error mgp_graph_create_vertex(struct mgp_graph *graph, mgp_memory *memory, m } auto *vertex = std::visit( [=](auto *impl) { return NewRawMgpObject(memory, impl->InsertVertex(), graph); }, graph->impl); + if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH)) { + auto v_impl = vertex->getImpl(); + vertex->graph->getImpl()->TextIndexAddVertex(v_impl); + } auto &ctx = graph->ctx; ctx->execution_stats[memgraph::query::ExecutionStats::Key::CREATED_NODES] += 1; @@ -3324,6 +3349,140 @@ mgp_error mgp_graph_delete_edge(struct mgp_graph *graph, mgp_edge *edge) { }); } +mgp_error mgp_graph_has_text_index(mgp_graph *graph, const char *index_name, int *result) { + return WrapExceptions([graph, index_name, result]() { + std::visit(memgraph::utils::Overloaded{ + [&](memgraph::query::DbAccessor *impl) { *result = impl->TextIndexExists(index_name); }, + [&](memgraph::query::SubgraphDbAccessor *impl) { + *result = impl->GetAccessor()->TextIndexExists(index_name); + }}, + graph->impl); + }); +} + +mgp_vertex *GetVertexByGid(mgp_graph *graph, memgraph::storage::Gid id, mgp_memory *memory) { + auto get_vertex_by_gid = memgraph::utils::Overloaded{ + [graph, id, memory](memgraph::query::DbAccessor *impl) -> mgp_vertex * { + auto maybe_vertex = impl->FindVertex(id, graph->view); + if (!maybe_vertex) return nullptr; + return NewRawMgpObject(memory, *maybe_vertex, graph); + }, + [graph, id, memory](memgraph::query::SubgraphDbAccessor *impl) -> mgp_vertex * { + auto maybe_vertex = impl->FindVertex(id, graph->view); + if (!maybe_vertex) return nullptr; + return NewRawMgpObject( + memory, memgraph::query::SubgraphVertexAccessor(*maybe_vertex, impl->getGraph()), graph); + }}; + return std::visit(get_vertex_by_gid, graph->impl); +} + +void WrapTextSearch(mgp_graph *graph, mgp_memory *memory, mgp_map **result, + const std::vector &vertex_ids = {}, + const std::optional &error_msg = std::nullopt) { + if (const auto err = mgp_map_make_empty(memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a mgp_map"); + } + + mgp_value *error_value; + if (error_msg.has_value()) { + if (const auto err = mgp_value_make_string(error_msg.value().data(), memory, &error_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a string mgp_value"); + } + } + + mgp_list *search_results{}; + if (const auto err = mgp_list_make_empty(vertex_ids.size(), memory, &search_results); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a mgp_list"); + } + + for (const auto &vertex_id : vertex_ids) { + mgp_value *vertex; + if (const auto err = mgp_value_make_vertex(GetVertexByGid(graph, vertex_id, memory), &vertex); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a vertex mgp_value"); + } + if (const auto err = mgp_list_append(search_results, vertex); err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error( + "Retrieving text search results failed during insertion of the mgp_value into the result list"); + } + } + + mgp_value *search_results_value; + if (const auto err = mgp_value_make_list(search_results, &search_results_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a list mgp_value"); + } + + if (error_msg.has_value()) { + if (const auto err = mgp_map_insert(*result, "error_msg", error_value); err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text index search error failed during insertion into mgp_map"); + } + return; + } + + if (const auto err = mgp_map_insert(*result, "search_results", search_results_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text index search results failed during insertion into mgp_map"); + } +} + +void WrapTextIndexAggregation(mgp_memory *memory, mgp_map **result, const std::string &aggregation_result, + const std::optional &error_msg = std::nullopt) { + if (const auto err = mgp_map_make_empty(memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a mgp_map"); + } + + mgp_value *aggregation_result_or_error_value; + if (const auto err = mgp_value_make_string(error_msg.value_or(aggregation_result).data(), memory, + &aggregation_result_or_error_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text search results failed during creation of a string mgp_value"); + } + + if (error_msg.has_value()) { + if (const auto err = mgp_map_insert(*result, "error_msg", aggregation_result_or_error_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text index aggregation error failed during insertion into mgp_map"); + } + return; + } + + if (const auto err = mgp_map_insert(*result, "aggregation_results", aggregation_result_or_error_value); + err != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::logic_error("Retrieving text index aggregation results failed during insertion into mgp_map"); + } +} + +mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query, + text_search_mode search_mode, mgp_memory *memory, mgp_map **result) { + return WrapExceptions([graph, memory, index_name, search_query, search_mode, result]() { + std::vector found_vertices_ids; + std::optional error_msg = std::nullopt; + try { + found_vertices_ids = graph->getImpl()->TextIndexSearch(index_name, search_query, search_mode); + } catch (memgraph::query::QueryException &e) { + error_msg = e.what(); + } + WrapTextSearch(graph, memory, result, found_vertices_ids, error_msg); + }); +} + +mgp_error mgp_graph_aggregate_over_text_index(mgp_graph *graph, const char *index_name, const char *search_query, + const char *aggregation_query, mgp_memory *memory, mgp_map **result) { + return WrapExceptions([graph, memory, index_name, search_query, aggregation_query, result]() { + std::string search_results; + std::optional error_msg = std::nullopt; + try { + search_results = graph->getImpl()->TextIndexAggregate(index_name, search_query, aggregation_query); + } catch (memgraph::query::QueryException &e) { + error_msg = e.what(); + } + WrapTextIndexAggregation(memory, result, search_results, error_msg); + }); +} + #ifdef MG_ENTERPRISE namespace { void NextPermitted(mgp_vertices_iterator &it) { diff --git a/src/query/procedure/mg_procedure_impl.hpp b/src/query/procedure/mg_procedure_impl.hpp index 17cac4eca..a91b4386c 100644 --- a/src/query/procedure/mg_procedure_impl.hpp +++ b/src/query/procedure/mg_procedure_impl.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -562,6 +562,13 @@ struct mgp_graph { memgraph::query::ExecutionContext *ctx; memgraph::storage::StorageMode storage_mode; + memgraph::query::DbAccessor *getImpl() const { + return std::visit( + memgraph::utils::Overloaded{[](memgraph::query::DbAccessor *impl) { return impl; }, + [](memgraph::query::SubgraphDbAccessor *impl) { return impl->GetAccessor(); }}, + this->impl); + } + static mgp_graph WritableGraph(memgraph::query::DbAccessor &acc, memgraph::storage::View view, memgraph::query::ExecutionContext &ctx) { return mgp_graph{&acc, view, &ctx, acc.GetStorageMode()}; diff --git a/src/storage/v2/CMakeLists.txt b/src/storage/v2/CMakeLists.txt index ec5108d63..49601eb54 100644 --- a/src/storage/v2/CMakeLists.txt +++ b/src/storage/v2/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(mg-storage-v2 STATIC vertex_info_cache.cpp storage.cpp indices/indices.cpp + indices/text_index.cpp all_vertices_iterable.cpp edges_iterable.cpp vertices_iterable.cpp @@ -45,4 +46,5 @@ add_library(mg-storage-v2 STATIC inmemory/replication/recovery.cpp ) -target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory) +target_include_directories(mg-storage-v2 PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils mg-flags gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory mgcxx_text_search tantivy_text_search) diff --git a/src/storage/v2/disk/durable_metadata.cpp b/src/storage/v2/disk/durable_metadata.cpp index fe2c558ae..13d515af2 100644 --- a/src/storage/v2/disk/durable_metadata.cpp +++ b/src/storage/v2/disk/durable_metadata.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -26,6 +26,7 @@ constexpr const char *kVertexCountDescr = "vertex_count"; constexpr const char *kEdgeDountDescr = "edge_count"; constexpr const char *kLabelIndexStr = "label_index"; constexpr const char *kLabelPropertyIndexStr = "label_property_index"; +constexpr const char *kTextIndexStr = "text_index"; constexpr const char *kExistenceConstraintsStr = "existence_constraints"; constexpr const char *kUniqueConstraintsStr = "unique_constraints"; } // namespace @@ -144,6 +145,31 @@ bool DurableMetadata::PersistLabelPropertyIndexAndExistenceConstraintDeletion(La return true; } +bool DurableMetadata::PersistTextIndexCreation(const std::string &index_name, LabelId label) { + const std::string index_name_label_pair = index_name + "," + label.ToString(); + if (auto text_index_store = durability_kvstore_.Get(kTextIndexStr); text_index_store.has_value()) { + std::string &value = text_index_store.value(); + value += "|"; + value += index_name_label_pair; + return durability_kvstore_.Put(kTextIndexStr, value); + } + return durability_kvstore_.Put(kTextIndexStr, index_name_label_pair); +} + +bool DurableMetadata::PersistTextIndexDeletion(const std::string &index_name, LabelId label) { + const std::string index_name_label_pair = index_name + "," + label.ToString(); + if (auto text_index_store = durability_kvstore_.Get(kTextIndexStr); text_index_store.has_value()) { + const std::string &value = text_index_store.value(); + std::vector text_indices = utils::Split(value, "|"); + std::erase(text_indices, index_name_label_pair); + if (text_indices.empty()) { + return durability_kvstore_.Delete(kTextIndexStr); + } + return durability_kvstore_.Put(kTextIndexStr, utils::Join(text_indices, "|")); + } + return true; +} + bool DurableMetadata::PersistUniqueConstraintCreation(LabelId label, const std::set &properties) { const std::string entry = utils::GetKeyForUniqueConstraintsDurability(label, properties); diff --git a/src/storage/v2/disk/durable_metadata.hpp b/src/storage/v2/disk/durable_metadata.hpp index 168cce469..4aaa8a707 100644 --- a/src/storage/v2/disk/durable_metadata.hpp +++ b/src/storage/v2/disk/durable_metadata.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -53,6 +53,10 @@ class DurableMetadata { bool PersistLabelPropertyIndexAndExistenceConstraintDeletion(LabelId label, PropertyId property, const std::string &key); + bool PersistTextIndexCreation(const std::string &index_name, LabelId label); + + bool PersistTextIndexDeletion(const std::string &index_name, LabelId label); + bool PersistUniqueConstraintCreation(LabelId label, const std::set &properties); bool PersistUniqueConstraintDeletion(LabelId label, const std::set &properties); diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index 21fa5ecc7..4dbd248f7 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -29,6 +29,8 @@ #include #include +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "kvstore/kvstore.hpp" #include "spdlog/spdlog.h" #include "storage/v2/constraints/unique_constraints.hpp" @@ -856,6 +858,7 @@ StorageInfo DiskStorage::GetInfo(memgraph::replication_coordination_glue::Replic const auto &lbl = access->ListAllIndices(); info.label_indices = lbl.label.size(); info.label_property_indices = lbl.label_property.size(); + info.text_indices = lbl.text_indices.size(); const auto &con = access->ListAllConstraints(); info.existence_constraints = con.existence.size(); info.unique_constraints = con.unique.size(); @@ -1670,6 +1673,18 @@ utils::BasicResult DiskStorage::DiskAccessor::Co case MetadataDelta::Action::LABEL_PROPERTY_INDEX_STATS_CLEAR: { throw utils::NotYetImplemented("ClearIndexStats(stats) is not implemented for DiskStorage."); } break; + case MetadataDelta::Action::TEXT_INDEX_CREATE: { + const auto &info = md_delta.text_index; + if (!disk_storage->durable_metadata_.PersistTextIndexCreation(info.index_name, info.label)) { + return StorageManipulationError{PersistenceError{}}; + } + } break; + case MetadataDelta::Action::TEXT_INDEX_DROP: { + const auto &info = md_delta.text_index; + if (!disk_storage->durable_metadata_.PersistTextIndexDeletion(info.index_name, info.label)) { + return StorageManipulationError{PersistenceError{}}; + } + } break; case MetadataDelta::Action::EXISTENCE_CONSTRAINT_CREATE: { const auto &info = md_delta.label_property; if (!disk_storage->durable_metadata_.PersistLabelPropertyIndexAndExistenceConstraintCreation( @@ -1768,6 +1783,9 @@ utils::BasicResult DiskStorage::DiskAccessor::Co return StorageManipulationError{SerializationError{}}; } spdlog::trace("rocksdb: Commit successful"); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + disk_storage->indices_.text_index_.Commit(); + } is_transaction_active_ = false; @@ -1886,6 +1904,9 @@ void DiskStorage::DiskAccessor::Abort() { // query_plan_accumulate_aggregate.cpp transaction_.disk_transaction_->Rollback(); transaction_.disk_transaction_->ClearSnapshot(); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + storage_->indices_.text_index_.Rollback(); + } delete transaction_.disk_transaction_; transaction_.disk_transaction_ = nullptr; is_transaction_active_ = false; @@ -2092,7 +2113,11 @@ IndicesInfo DiskStorage::DiskAccessor::ListAllIndices() const { auto *disk_label_index = static_cast(on_disk->indices_.label_index_.get()); auto *disk_label_property_index = static_cast(on_disk->indices_.label_property_index_.get()); - return {disk_label_index->ListIndices(), disk_label_property_index->ListIndices()}; + auto &text_index = storage_->indices_.text_index_; + return {disk_label_index->ListIndices(), + disk_label_property_index->ListIndices(), + {/* edge type indices */}, + text_index.ListIndices()}; } ConstraintsInfo DiskStorage::DiskAccessor::ListAllConstraints() const { auto *disk_storage = static_cast(storage_); diff --git a/src/storage/v2/durability/durability.cpp b/src/storage/v2/durability/durability.cpp index fbbedbee5..db8bcd93b 100644 --- a/src/storage/v2/durability/durability.cpp +++ b/src/storage/v2/durability/durability.cpp @@ -151,7 +151,8 @@ void RecoverConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadat void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices, utils::SkipList *vertices, NameIdMapper *name_id_mapper, - const std::optional ¶llel_exec_info) { + const std::optional ¶llel_exec_info, + const std::optional &storage_dir) { spdlog::info("Recreating indices from metadata."); // Recover label indices. @@ -211,6 +212,26 @@ void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadat } spdlog::info("Edge-type indices are recreated."); + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + // Recover text indices. + spdlog::info("Recreating {} text indices from metadata.", indices_metadata.text_indices.size()); + auto &mem_text_index = indices->text_index_; + for (const auto &[index_name, label] : indices_metadata.text_indices) { + try { + if (!storage_dir.has_value()) { + throw RecoveryFailure("There must exist a storage directory in order to recover text indices!"); + } + + mem_text_index.RecoverIndex(storage_dir.value(), index_name, label, vertices->access(), name_id_mapper); + } catch (...) { + throw RecoveryFailure("The text index must be created here!"); + } + spdlog::info("Text index {} on :{} is recreated from metadata", index_name, + name_id_mapper->IdToName(label.AsUint())); + } + spdlog::info("Text indices are recreated."); + } + spdlog::info("Indices are recreated."); } @@ -331,8 +352,13 @@ std::optional Recovery::RecoverData(std::string *uuid, Replication repl_storage_state.epoch_.SetEpoch(std::move(recovered_snapshot->snapshot_info.epoch_id)); if (!utils::DirExists(wal_directory_)) { + std::optional storage_dir = std::nullopt; + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + storage_dir = config.durability.storage_directory; + } + RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper, - GetParallelExecInfoIndices(recovery_info, config)); + GetParallelExecInfoIndices(recovery_info, config), storage_dir); RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper, GetParallelExecInfo(recovery_info, config)); return recovered_snapshot->recovery_info; @@ -467,8 +493,13 @@ std::optional Recovery::RecoverData(std::string *uuid, Replication spdlog::info("All necessary WAL files are loaded successfully."); } + std::optional storage_dir = std::nullopt; + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + storage_dir = config.durability.storage_directory; + } + RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper, - GetParallelExecInfoIndices(recovery_info, config)); + GetParallelExecInfoIndices(recovery_info, config), storage_dir); RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper, GetParallelExecInfo(recovery_info, config)); diff --git a/src/storage/v2/durability/durability.hpp b/src/storage/v2/durability/durability.hpp index 97e2c7efc..5170b3b04 100644 --- a/src/storage/v2/durability/durability.hpp +++ b/src/storage/v2/durability/durability.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -102,7 +102,8 @@ std::optional> GetWalFiles(const std::filesystem: /// @throw RecoveryFailure void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices, utils::SkipList *vertices, NameIdMapper *name_id_mapper, - const std::optional ¶llel_exec_info = std::nullopt); + const std::optional ¶llel_exec_info = std::nullopt, + const std::optional &storage_dir = std::nullopt); // Helper function used to recover all discovered constraints. The // constraints must be recovered after the data recovery is done diff --git a/src/storage/v2/durability/marker.hpp b/src/storage/v2/durability/marker.hpp index ac0cc074d..18d693e51 100644 --- a/src/storage/v2/durability/marker.hpp +++ b/src/storage/v2/durability/marker.hpp @@ -64,6 +64,8 @@ enum class Marker : uint8_t { DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR = 0x64, DELTA_EDGE_TYPE_INDEX_CREATE = 0x65, DELTA_EDGE_TYPE_INDEX_DROP = 0x66, + DELTA_TEXT_INDEX_CREATE = 0x67, + DELTA_TEXT_INDEX_DROP = 0x68, VALUE_FALSE = 0x00, VALUE_TRUE = 0xff, @@ -110,6 +112,8 @@ static const Marker kMarkersAll[] = { Marker::DELTA_LABEL_PROPERTY_INDEX_DROP, Marker::DELTA_EDGE_TYPE_INDEX_CREATE, Marker::DELTA_EDGE_TYPE_INDEX_DROP, + Marker::DELTA_TEXT_INDEX_CREATE, + Marker::DELTA_TEXT_INDEX_DROP, Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE, Marker::DELTA_EXISTENCE_CONSTRAINT_DROP, Marker::DELTA_UNIQUE_CONSTRAINT_CREATE, diff --git a/src/storage/v2/durability/metadata.hpp b/src/storage/v2/durability/metadata.hpp index c8ee27b2f..f36fc068d 100644 --- a/src/storage/v2/durability/metadata.hpp +++ b/src/storage/v2/durability/metadata.hpp @@ -44,6 +44,7 @@ struct RecoveredIndicesAndConstraints { std::vector> label_stats; std::vector>> label_property_stats; std::vector edge; + std::vector> text_indices; } indices; struct ConstraintsMetadata { diff --git a/src/storage/v2/durability/serialization.cpp b/src/storage/v2/durability/serialization.cpp index 28ba64943..becfa7f34 100644 --- a/src/storage/v2/durability/serialization.cpp +++ b/src/storage/v2/durability/serialization.cpp @@ -353,6 +353,8 @@ std::optional Decoder::ReadPropertyValue() { case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: case Marker::DELTA_EDGE_TYPE_INDEX_CREATE: case Marker::DELTA_EDGE_TYPE_INDEX_DROP: + case Marker::DELTA_TEXT_INDEX_CREATE: + case Marker::DELTA_TEXT_INDEX_DROP: case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: @@ -459,6 +461,8 @@ bool Decoder::SkipPropertyValue() { case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: case Marker::DELTA_EDGE_TYPE_INDEX_CREATE: case Marker::DELTA_EDGE_TYPE_INDEX_DROP: + case Marker::DELTA_TEXT_INDEX_CREATE: + case Marker::DELTA_TEXT_INDEX_DROP: case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: diff --git a/src/storage/v2/durability/snapshot.cpp b/src/storage/v2/durability/snapshot.cpp index 5fea3dfa5..0e3bb96e3 100644 --- a/src/storage/v2/durability/snapshot.cpp +++ b/src/storage/v2/durability/snapshot.cpp @@ -13,6 +13,8 @@ #include +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "spdlog/spdlog.h" #include "storage/v2/durability/exceptions.hpp" #include "storage/v2/durability/paths.hpp" @@ -2004,6 +2006,24 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis spdlog::info("Metadata of edge-type indices are recovered."); } + // Recover text indices. + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't recover the number of text indices!"); + spdlog::info("Recovering metadata of {} text indices.", *size); + for (uint64_t i = 0; i < *size; ++i) { + auto index_name = snapshot.ReadString(); + if (!index_name.has_value()) throw RecoveryFailure("Couldn't read text index name!"); + auto label = snapshot.ReadUint(); + if (!label) throw RecoveryFailure("Couldn't read text index label!"); + AddRecoveredIndexConstraint(&indices_constraints.indices.text_indices, + {index_name.value(), get_label_from_id(*label)}, "The text index already exists!"); + SPDLOG_TRACE("Recovered metadata of text index {} for :{}", index_name.value(), + name_id_mapper->IdToName(snapshot_id_map.at(*label))); + } + spdlog::info("Metadata of text indices are recovered."); + } + spdlog::info("Metadata of indices are recovered."); } @@ -2493,6 +2513,16 @@ void CreateSnapshot(Storage *storage, Transaction *transaction, const std::files write_mapping(item); } } + + // Write text indices. + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + auto text_indices = storage->indices_.text_index_.ListIndices(); + snapshot.WriteUint(text_indices.size()); + for (const auto &[index_name, label] : text_indices) { + snapshot.WriteString(index_name); + write_mapping(label); + } + } } // Write constraints. diff --git a/src/storage/v2/durability/storage_global_operation.hpp b/src/storage/v2/durability/storage_global_operation.hpp index 7dd635e9d..d9c77b6c6 100644 --- a/src/storage/v2/durability/storage_global_operation.hpp +++ b/src/storage/v2/durability/storage_global_operation.hpp @@ -25,6 +25,8 @@ enum class StorageMetadataOperation { LABEL_PROPERTY_INDEX_STATS_CLEAR, EDGE_TYPE_INDEX_CREATE, EDGE_TYPE_INDEX_DROP, + TEXT_INDEX_CREATE, + TEXT_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, diff --git a/src/storage/v2/durability/wal.cpp b/src/storage/v2/durability/wal.cpp index 5c40ab1c5..c684d818c 100644 --- a/src/storage/v2/durability/wal.cpp +++ b/src/storage/v2/durability/wal.cpp @@ -99,6 +99,10 @@ Marker OperationToMarker(StorageMetadataOperation operation) { return Marker::DELTA_EDGE_TYPE_INDEX_CREATE; case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: return Marker::DELTA_EDGE_TYPE_INDEX_DROP; + case StorageMetadataOperation::TEXT_INDEX_CREATE: + return Marker::DELTA_TEXT_INDEX_CREATE; + case StorageMetadataOperation::TEXT_INDEX_DROP: + return Marker::DELTA_TEXT_INDEX_DROP; case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: return Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE; case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: @@ -172,6 +176,10 @@ WalDeltaData::Type MarkerToWalDeltaDataType(Marker marker) { return WalDeltaData::Type::LABEL_PROPERTY_INDEX_CREATE; case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: return WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP; + case Marker::DELTA_TEXT_INDEX_CREATE: + return WalDeltaData::Type::TEXT_INDEX_CREATE; + case Marker::DELTA_TEXT_INDEX_DROP: + return WalDeltaData::Type::TEXT_INDEX_DROP; case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_SET: return WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET; case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR: @@ -382,6 +390,21 @@ WalDeltaData ReadSkipWalDeltaData(BaseDecoder *decoder) { if (!decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!"); } } + break; + } + case WalDeltaData::Type::TEXT_INDEX_CREATE: + case WalDeltaData::Type::TEXT_INDEX_DROP: { + if constexpr (read_data) { + auto index_name = decoder->ReadString(); + if (!index_name) throw RecoveryFailure("Invalid WAL data!"); + delta.operation_text.index_name = std::move(*index_name); + auto label = decoder->ReadString(); + if (!label) throw RecoveryFailure("Invalid WAL data!"); + delta.operation_text.label = std::move(*label); + } else { + if (!decoder->SkipString() || !decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!"); + } + break; } } @@ -529,6 +552,12 @@ bool operator==(const WalDeltaData &a, const WalDeltaData &b) { case WalDeltaData::Type::LABEL_PROPERTY_INDEX_CREATE: case WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP: + case WalDeltaData::Type::TEXT_INDEX_CREATE: + return a.operation_text.index_name == b.operation_text.index_name && + a.operation_text.label == b.operation_text.label; + case WalDeltaData::Type::TEXT_INDEX_DROP: + return a.operation_text.index_name == b.operation_text.index_name && + a.operation_text.label == b.operation_text.label; case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP: return a.operation_label_property.label == b.operation_label_property.label && @@ -675,7 +704,8 @@ void EncodeTransactionEnd(BaseEncoder *encoder, uint64_t timestamp) { } void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation, - LabelId label, const std::set &properties, const LabelIndexStats &stats, + const std::optional text_index_name, LabelId label, + const std::set &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp) { encoder->WriteMarker(Marker::SECTION_DELTA); encoder->WriteUint(timestamp); @@ -731,6 +761,14 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: { MG_ASSERT(false, "Invalid function call!"); } + case StorageMetadataOperation::TEXT_INDEX_CREATE: + case StorageMetadataOperation::TEXT_INDEX_DROP: { + MG_ASSERT(text_index_name.has_value(), "Text indices must be named!"); + encoder->WriteMarker(OperationToMarker(operation)); + encoder->WriteString(text_index_name.value()); + encoder->WriteString(name_id_mapper->IdToName(label.AsUint())); + break; + } } } @@ -752,6 +790,8 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage case StorageMetadataOperation::LABEL_INDEX_STATS_SET: case StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE: case StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP: + case StorageMetadataOperation::TEXT_INDEX_CREATE: + case StorageMetadataOperation::TEXT_INDEX_DROP: case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: case StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET: @@ -1000,6 +1040,20 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst "The label index stats doesn't exist!"); break; } + case WalDeltaData::Type::TEXT_INDEX_CREATE: { + auto index_name = delta.operation_text.index_name; + auto label = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_text.label)); + AddRecoveredIndexConstraint(&indices_constraints->indices.text_indices, {index_name, label}, + "The text index already exists!"); + break; + } + case WalDeltaData::Type::TEXT_INDEX_DROP: { + auto index_name = delta.operation_text.index_name; + auto label = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_text.label)); + RemoveRecoveredIndexConstraint(&indices_constraints->indices.text_indices, {index_name, label}, + "The text index doesn't exist!"); + break; + } case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: { auto label_id = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_label_property.label)); auto property_id = PropertyId::FromUint(name_id_mapper->NameToId(delta.operation_label_property.property)); @@ -1148,10 +1202,11 @@ void WalFile::AppendTransactionEnd(uint64_t timestamp) { UpdateStats(timestamp); } -void WalFile::AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set &properties, - const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, - uint64_t timestamp) { - EncodeOperation(&wal_, name_id_mapper_, operation, label, properties, stats, property_stats, timestamp); +void WalFile::AppendOperation(StorageMetadataOperation operation, const std::optional text_index_name, + LabelId label, const std::set &properties, const LabelIndexStats &stats, + const LabelPropertyIndexStats &property_stats, uint64_t timestamp) { + EncodeOperation(&wal_, name_id_mapper_, operation, text_index_name, label, properties, stats, property_stats, + timestamp); UpdateStats(timestamp); } diff --git a/src/storage/v2/durability/wal.hpp b/src/storage/v2/durability/wal.hpp index 516487e0d..4990e6979 100644 --- a/src/storage/v2/durability/wal.hpp +++ b/src/storage/v2/durability/wal.hpp @@ -69,6 +69,8 @@ struct WalDeltaData { LABEL_PROPERTY_INDEX_STATS_CLEAR, EDGE_INDEX_CREATE, EDGE_INDEX_DROP, + TEXT_INDEX_CREATE, + TEXT_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, @@ -127,6 +129,11 @@ struct WalDeltaData { std::string property; std::string stats; } operation_label_property_stats; + + struct { + std::string index_name; + std::string label; + } operation_text; }; bool operator==(const WalDeltaData &a, const WalDeltaData &b); @@ -163,6 +170,8 @@ constexpr bool IsWalDeltaDataTypeTransactionEndVersion15(const WalDeltaData::Typ case WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR: case WalDeltaData::Type::EDGE_INDEX_CREATE: case WalDeltaData::Type::EDGE_INDEX_DROP: + case WalDeltaData::Type::TEXT_INDEX_CREATE: + case WalDeltaData::Type::TEXT_INDEX_DROP: case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP: case WalDeltaData::Type::UNIQUE_CONSTRAINT_CREATE: @@ -213,7 +222,8 @@ void EncodeTransactionEnd(BaseEncoder *encoder, uint64_t timestamp); /// Function used to encode non-transactional operation. void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation, - LabelId label, const std::set &properties, const LabelIndexStats &stats, + const std::optional text_index_name, LabelId label, + const std::set &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp); void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation, @@ -248,8 +258,9 @@ class WalFile { void AppendTransactionEnd(uint64_t timestamp); - void AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set &properties, - const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp); + void AppendOperation(StorageMetadataOperation operation, const std::optional text_index_name, + LabelId label, const std::set &properties, const LabelIndexStats &stats, + const LabelPropertyIndexStats &property_stats, uint64_t timestamp); void AppendOperation(StorageMetadataOperation operation, EdgeTypeId edge_type, uint64_t timestamp); diff --git a/src/storage/v2/indices/indices.cpp b/src/storage/v2/indices/indices.cpp index 6068f888f..1cbde2362 100644 --- a/src/storage/v2/indices/indices.cpp +++ b/src/storage/v2/indices/indices.cpp @@ -16,6 +16,7 @@ #include "storage/v2/inmemory/edge_type_index.hpp" #include "storage/v2/inmemory/label_index.hpp" #include "storage/v2/inmemory/label_property_index.hpp" +#include "storage/v2/storage.hpp" namespace memgraph::storage { diff --git a/src/storage/v2/indices/indices.hpp b/src/storage/v2/indices/indices.hpp index 40cff577f..6f1bc44db 100644 --- a/src/storage/v2/indices/indices.hpp +++ b/src/storage/v2/indices/indices.hpp @@ -18,6 +18,7 @@ #include "storage/v2/indices/edge_type_index.hpp" #include "storage/v2/indices/label_index.hpp" #include "storage/v2/indices/label_property_index.hpp" +#include "storage/v2/indices/text_index.hpp" #include "storage/v2/storage_mode.hpp" namespace memgraph::storage { @@ -31,12 +32,12 @@ struct Indices { Indices &operator=(Indices &&) = delete; ~Indices() = default; - /// This function should be called from garbage collection to clean-up the + /// This function should be called from garbage collection to clean up the /// index. /// TODO: unused in disk indices void RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp, std::stop_token token) const; - /// Surgical removal of entries that was inserted this transaction + /// Surgical removal of entries that were inserted in this transaction /// TODO: unused in disk indices void AbortEntries(LabelId labelId, std::span vertices, uint64_t exact_start_timestamp) const; void AbortEntries(PropertyId property, std::span const> vertices, @@ -71,6 +72,7 @@ struct Indices { std::unique_ptr label_index_; std::unique_ptr label_property_index_; std::unique_ptr edge_type_index_; + mutable TextIndex text_index_; }; } // namespace memgraph::storage diff --git a/src/storage/v2/indices/text_index.cpp b/src/storage/v2/indices/text_index.cpp new file mode 100644 index 000000000..1c9488097 --- /dev/null +++ b/src/storage/v2/indices/text_index.cpp @@ -0,0 +1,430 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "storage/v2/indices/text_index.hpp" +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" +#include "query/db_accessor.hpp" +#include "storage/v2/view.hpp" +#include "text_search.hpp" + +namespace memgraph::storage { + +std::string GetPropertyName(PropertyId prop_id, memgraph::query::DbAccessor *db) { return db->PropertyToName(prop_id); } + +std::string GetPropertyName(PropertyId prop_id, NameIdMapper *name_id_mapper) { + return name_id_mapper->IdToName(prop_id.AsUint()); +} + +inline std::string TextIndex::MakeIndexPath(const std::filesystem::path &storage_dir, const std::string &index_name) { + return (storage_dir / kTextIndicesDirectory / index_name).string(); +} + +void TextIndex::CreateEmptyIndex(const std::filesystem::path &storage_dir, const std::string &index_name, + LabelId label) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (index_.contains(index_name)) { + throw query::TextSearchException("Text index \"{}\" already exists.", index_name); + } + + try { + nlohmann::json mappings = {}; + mappings["properties"] = {}; + mappings["properties"]["metadata"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}}; + mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}}; + mappings["properties"]["all"] = {{"type", "text"}, {"fast", true}, {"stored", true}, {"text", true}}; + + index_.emplace(index_name, TextIndexData{.context_ = mgcxx::text_search::create_index( + MakeIndexPath(storage_dir, index_name), + mgcxx::text_search::IndexConfig{.mappings = mappings.dump()}), + .scope_ = label}); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + label_to_index_.emplace(label, index_name); +} + +template +nlohmann::json TextIndex::SerializeProperties(const std::map &properties, T *name_resolver) { + nlohmann::json serialized_properties = nlohmann::json::value_t::object; + for (const auto &[prop_id, prop_value] : properties) { + switch (prop_value.type()) { + case PropertyValue::Type::Bool: + serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueBool(); + break; + case PropertyValue::Type::Int: + serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueInt(); + break; + case PropertyValue::Type::Double: + serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueDouble(); + break; + case PropertyValue::Type::String: + serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueString(); + break; + case PropertyValue::Type::Null: + case PropertyValue::Type::List: + case PropertyValue::Type::Map: + case PropertyValue::Type::TemporalData: + default: + continue; + } + } + + return serialized_properties; +} + +std::string TextIndex::StringifyProperties(const std::map &properties) { + std::vector indexable_properties_as_string; + for (const auto &[_, prop_value] : properties) { + switch (prop_value.type()) { + case PropertyValue::Type::Bool: + indexable_properties_as_string.push_back(prop_value.ValueBool() ? "true" : "false"); + break; + case PropertyValue::Type::Int: + indexable_properties_as_string.push_back(std::to_string(prop_value.ValueInt())); + break; + case PropertyValue::Type::Double: + indexable_properties_as_string.push_back(std::to_string(prop_value.ValueDouble())); + break; + case PropertyValue::Type::String: + indexable_properties_as_string.push_back(prop_value.ValueString()); + break; + // NOTE: As the following types aren‘t indexed in Tantivy, they don’t appear in the property value string either. + case PropertyValue::Type::Null: + case PropertyValue::Type::List: + case PropertyValue::Type::Map: + case PropertyValue::Type::TemporalData: + default: + continue; + } + } + return utils::Join(indexable_properties_as_string, " "); +} + +std::vector TextIndex::GetApplicableTextIndices(const std::vector &labels) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + std::vector applicable_text_indices; + for (const auto &label : labels) { + if (label_to_index_.contains(label)) { + applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)).context_); + } + } + return applicable_text_indices; +} + +void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties, + const std::string &property_values_as_str, + const std::vector &applicable_text_indices) { + if (applicable_text_indices.empty()) { + return; + } + + // NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties, + // an indexable document should be created for each applicable index. + nlohmann::json document = {}; + document["data"] = properties; + document["all"] = property_values_as_str; + document["metadata"] = {}; + document["metadata"]["gid"] = gid; + document["metadata"]["deleted"] = false; + document["metadata"]["is_node"] = true; + + for (auto *index_context : applicable_text_indices) { + try { + mgcxx::text_search::add_document( + *index_context, + mgcxx::text_search::DocumentInput{ + .data = document.dump(-1, ' ', false, nlohmann::json::error_handler_t::replace)}, + kDoSkipCommit); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + } +} + +void TextIndex::CommitLoadedNodes(mgcxx::text_search::Context &index_context) { + // As CREATE TEXT INDEX (...) queries don’t accumulate deltas, db_transactional_accessor_->Commit() does not reach + // the code area where changes to indices are committed. To get around that without needing to commit text indices + // after every such query, we commit here. + try { + mgcxx::text_search::commit(index_context); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } +} + +void TextIndex::AddNode( + Vertex *vertex_after_update, NameIdMapper *name_id_mapper, + const std::optional> &maybe_applicable_text_indices) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + auto applicable_text_indices = + maybe_applicable_text_indices.value_or(GetApplicableTextIndices(vertex_after_update->labels)); + if (applicable_text_indices.empty()) { + return; + } + + auto vertex_properties = vertex_after_update->properties.Properties(); + LoadNodeToTextIndices(vertex_after_update->gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper), + StringifyProperties(vertex_properties), applicable_text_indices); +} + +void TextIndex::UpdateNode(Vertex *vertex_after_update, NameIdMapper *name_id_mapper, + const std::vector &removed_labels) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (!removed_labels.empty()) { + auto indexes_to_remove_node_from = GetApplicableTextIndices(removed_labels); + RemoveNode(vertex_after_update, indexes_to_remove_node_from); + } + + auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update->labels); + if (applicable_text_indices.empty()) return; + RemoveNode(vertex_after_update, applicable_text_indices); + AddNode(vertex_after_update, name_id_mapper, applicable_text_indices); +} + +void TextIndex::RemoveNode( + Vertex *vertex_after_update, + const std::optional> &maybe_applicable_text_indices) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + auto search_node_to_be_deleted = + mgcxx::text_search::SearchInput{.search_query = fmt::format("metadata.gid:{}", vertex_after_update->gid.AsInt())}; + + for (auto *index_context : + maybe_applicable_text_indices.value_or(GetApplicableTextIndices(vertex_after_update->labels))) { + try { + mgcxx::text_search::delete_document(*index_context, search_node_to_be_deleted, kDoSkipCommit); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + } +} + +void TextIndex::CreateIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label, + memgraph::query::DbAccessor *db) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + CreateEmptyIndex(storage_dir, index_name, label); + + for (const auto &v : db->Vertices(View::NEW)) { + if (!v.HasLabel(View::NEW, label).GetValue()) { + continue; + } + + auto vertex_properties = v.Properties(View::NEW).GetValue(); + LoadNodeToTextIndices(v.Gid().AsInt(), SerializeProperties(vertex_properties, db), + StringifyProperties(vertex_properties), {&index_.at(index_name).context_}); + } + + CommitLoadedNodes(index_.at(index_name).context_); +} + +void TextIndex::RecoverIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label, + memgraph::utils::SkipList::Accessor vertices, NameIdMapper *name_id_mapper) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + // Clear Tantivy-internal files if they exist from previous sessions + std::filesystem::remove_all(storage_dir / kTextIndicesDirectory / index_name); + + CreateEmptyIndex(storage_dir, index_name, label); + + for (const auto &v : vertices) { + if (std::find(v.labels.begin(), v.labels.end(), label) == v.labels.end()) { + continue; + } + + auto vertex_properties = v.properties.Properties(); + LoadNodeToTextIndices(v.gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper), + StringifyProperties(vertex_properties), {&index_.at(index_name).context_}); + } + + CommitLoadedNodes(index_.at(index_name).context_); +} + +LabelId TextIndex::DropIndex(const std::filesystem::path &storage_dir, const std::string &index_name) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (!index_.contains(index_name)) { + throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name); + } + + try { + mgcxx::text_search::drop_index(MakeIndexPath(storage_dir, index_name)); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + auto deleted_index_label = index_.at(index_name).scope_; + + index_.erase(index_name); + std::erase_if(label_to_index_, [index_name](const auto &item) { return item.second == index_name; }); + + return deleted_index_label; +} + +bool TextIndex::IndexExists(const std::string &index_name) const { return index_.contains(index_name); } + +mgcxx::text_search::SearchOutput TextIndex::SearchGivenProperties(const std::string &index_name, + const std::string &search_query) { + try { + return mgcxx::text_search::search( + index_.at(index_name).context_, + mgcxx::text_search::SearchInput{.search_query = search_query, .return_fields = {"metadata"}}); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + + return mgcxx::text_search::SearchOutput{}; +} + +mgcxx::text_search::SearchOutput TextIndex::RegexSearch(const std::string &index_name, + const std::string &search_query) { + try { + return mgcxx::text_search::regex_search( + index_.at(index_name).context_, + mgcxx::text_search::SearchInput{ + .search_fields = {"all"}, .search_query = search_query, .return_fields = {"metadata"}}); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + + return mgcxx::text_search::SearchOutput{}; +} + +mgcxx::text_search::SearchOutput TextIndex::SearchAllProperties(const std::string &index_name, + const std::string &search_query) { + try { + return mgcxx::text_search::search( + index_.at(index_name).context_, + mgcxx::text_search::SearchInput{ + .search_fields = {"all"}, .search_query = search_query, .return_fields = {"metadata"}}); + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + + return mgcxx::text_search::SearchOutput{}; +} + +std::vector TextIndex::Search(const std::string &index_name, const std::string &search_query, + text_search_mode search_mode) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (!index_.contains(index_name)) { + throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name); + } + + mgcxx::text_search::SearchOutput search_results; + switch (search_mode) { + case text_search_mode::SPECIFIED_PROPERTIES: + search_results = SearchGivenProperties(index_name, search_query); + break; + case text_search_mode::REGEX: + search_results = RegexSearch(index_name, search_query); + break; + case text_search_mode::ALL_PROPERTIES: + search_results = SearchAllProperties(index_name, search_query); + break; + default: + throw query::TextSearchException( + "Unsupported search mode: please use one of text_search.search, text_search.search_all, or " + "text_search.regex_search."); + } + + std::vector found_nodes; + for (const auto &doc : search_results.docs) { + // The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing + // errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method. + std::string doc_string = doc.data.data(); + doc_string.resize(doc.data.length()); + auto doc_json = nlohmann::json::parse(doc_string); + found_nodes.push_back(storage::Gid::FromString(doc_json["metadata"]["gid"].dump())); + } + return found_nodes; +} + +std::string TextIndex::Aggregate(const std::string &index_name, const std::string &search_query, + const std::string &aggregation_query) { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + if (!index_.contains(index_name)) { + throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name); + } + + mgcxx::text_search::DocumentOutput aggregation_result; + try { + aggregation_result = mgcxx::text_search::aggregate( + index_.at(index_name).context_, + mgcxx::text_search::SearchInput{ + .search_fields = {"all"}, .search_query = search_query, .aggregation_query = aggregation_query}); + + } catch (const std::exception &e) { + throw query::TextSearchException("Tantivy error: {}", e.what()); + } + // The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing + // errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method. + std::string result_string = aggregation_result.data.data(); + result_string.resize(aggregation_result.data.length()); + return result_string; +} + +void TextIndex::Commit() { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + for (auto &[_, index_data] : index_) { + mgcxx::text_search::commit(index_data.context_); + } +} + +void TextIndex::Rollback() { + if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + throw query::TextSearchDisabledException(); + } + + for (auto &[_, index_data] : index_) { + mgcxx::text_search::rollback(index_data.context_); + } +} + +std::vector> TextIndex::ListIndices() const { + std::vector> ret; + ret.reserve(index_.size()); + for (const auto &[index_name, index_data] : index_) { + ret.emplace_back(index_name, index_data.scope_); + } + return ret; +} + +} // namespace memgraph::storage diff --git a/src/storage/v2/indices/text_index.hpp b/src/storage/v2/indices/text_index.hpp new file mode 100644 index 000000000..af4748c6e --- /dev/null +++ b/src/storage/v2/indices/text_index.hpp @@ -0,0 +1,105 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include "mg_procedure.h" +#include "storage/v2/id_types.hpp" +#include "storage/v2/name_id_mapper.hpp" +#include "storage/v2/vertex.hpp" +#include "text_search.hpp" + +namespace memgraph::query { +class DbAccessor; +} + +namespace memgraph::storage { +struct TextIndexData { + mgcxx::text_search::Context context_; + LabelId scope_; +}; + +class TextIndex { + private: + static constexpr bool kDoSkipCommit = true; + static constexpr std::string_view kTextIndicesDirectory = "text_indices"; + + inline std::string MakeIndexPath(const std::filesystem::path &storage_dir, const std::string &index_name); + + void CreateEmptyIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label); + + template + nlohmann::json SerializeProperties(const std::map &properties, T *name_resolver); + + std::string StringifyProperties(const std::map &properties); + + std::vector GetApplicableTextIndices(const std::vector &labels); + + void LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties, + const std::string &property_values_as_str, + const std::vector &applicable_text_indices); + + void CommitLoadedNodes(mgcxx::text_search::Context &index_context); + + mgcxx::text_search::SearchOutput SearchGivenProperties(const std::string &index_name, + const std::string &search_query); + + mgcxx::text_search::SearchOutput RegexSearch(const std::string &index_name, const std::string &search_query); + + mgcxx::text_search::SearchOutput SearchAllProperties(const std::string &index_name, const std::string &search_query); + + public: + TextIndex() = default; + + TextIndex(const TextIndex &) = delete; + TextIndex(TextIndex &&) = delete; + TextIndex &operator=(const TextIndex &) = delete; + TextIndex &operator=(TextIndex &&) = delete; + + ~TextIndex() = default; + + std::map index_; + std::map label_to_index_; + + void AddNode( + Vertex *vertex, NameIdMapper *name_id_mapper, + const std::optional> &maybe_applicable_text_indices = std::nullopt); + + void UpdateNode(Vertex *vertex, NameIdMapper *name_id_mapper, const std::vector &removed_labels = {}); + + void RemoveNode( + Vertex *vertex, + const std::optional> &maybe_applicable_text_indices = std::nullopt); + + void CreateIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label, + memgraph::query::DbAccessor *db); + + void RecoverIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label, + memgraph::utils::SkipList::Accessor vertices, NameIdMapper *name_id_mapper); + + LabelId DropIndex(const std::filesystem::path &storage_dir, const std::string &index_name); + + bool IndexExists(const std::string &index_name) const; + + std::vector Search(const std::string &index_name, const std::string &search_query, text_search_mode search_mode); + + std::string Aggregate(const std::string &index_name, const std::string &search_query, + const std::string &aggregation_query); + + void Commit(); + + void Rollback(); + + std::vector> ListIndices() const; +}; + +} // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index dab56750b..dbaa56ce2 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -15,6 +15,8 @@ #include #include #include "dbms/constants.hpp" +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "memory/global_memory_control.hpp" #include "storage/v2/durability/durability.hpp" #include "storage/v2/durability/snapshot.hpp" @@ -890,6 +892,10 @@ utils::BasicResult InMemoryStorage::InMemoryAcce commit_timestamp_.reset(); // We have aborted, hence we have not committed return StorageManipulationError{*unique_constraint_violation}; } + + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + mem_storage->indices_.text_index_.Commit(); + } } is_transaction_active_ = false; @@ -1213,6 +1219,9 @@ void InMemoryStorage::InMemoryAccessor::Abort() { for (auto const &[property, prop_vertices] : property_cleanup) { storage_->indices_.AbortEntries(property, prop_vertices, transaction_.start_timestamp); } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + storage_->indices_.text_index_.Rollback(); + } // VERTICES { @@ -1846,6 +1855,7 @@ StorageInfo InMemoryStorage::GetInfo(memgraph::replication_coordination_glue::Re const auto &lbl = access->ListAllIndices(); info.label_indices = lbl.label.size(); info.label_property_indices = lbl.label_property.size(); + info.text_indices = lbl.text_indices.size(); const auto &con = access->ListAllConstraints(); info.existence_constraints = con.existence.size(); info.unique_constraints = con.unique.size(); @@ -2107,6 +2117,16 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final AppendToWalDataDefinition(durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR, info.label, final_commit_timestamp); } break; + case MetadataDelta::Action::TEXT_INDEX_CREATE: { + const auto &info = md_delta.text_index; + AppendToWalDataDefinition(durability::StorageMetadataOperation::TEXT_INDEX_CREATE, info.index_name, info.label, + final_commit_timestamp); + } break; + case MetadataDelta::Action::TEXT_INDEX_DROP: { + const auto &info = md_delta.text_index; + AppendToWalDataDefinition(durability::StorageMetadataOperation::TEXT_INDEX_DROP, info.index_name, info.label, + final_commit_timestamp); + } break; case MetadataDelta::Action::EXISTENCE_CONSTRAINT_CREATE: { const auto &info = md_delta.label_property; AppendToWalDataDefinition(durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE, info.label, @@ -2137,11 +2157,13 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final return repl_storage_state_.FinalizeTransaction(final_commit_timestamp, this, std::move(db_acc)); } -void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, +void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, + const std::optional text_index_name, LabelId label, const std::set &properties, LabelIndexStats stats, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp) { - wal_file_->AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp); + wal_file_->AppendOperation(operation, text_index_name, label, properties, stats, property_stats, + final_commit_timestamp); repl_storage_state_.AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp); } @@ -2155,12 +2177,13 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera const std::set &properties, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp) { - return AppendToWalDataDefinition(operation, label, properties, {}, property_stats, final_commit_timestamp); + return AppendToWalDataDefinition(operation, std::nullopt, label, properties, {}, property_stats, + final_commit_timestamp); } void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, LabelIndexStats stats, uint64_t final_commit_timestamp) { - return AppendToWalDataDefinition(operation, label, {}, stats, {}, final_commit_timestamp); + return AppendToWalDataDefinition(operation, std::nullopt, label, {}, stats, {}, final_commit_timestamp); } void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, @@ -2174,6 +2197,12 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera return AppendToWalDataDefinition(operation, label, {}, {}, final_commit_timestamp); } +void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, + const std::optional text_index_name, LabelId label, + uint64_t final_commit_timestamp) { + return AppendToWalDataDefinition(operation, text_index_name, label, {}, {}, {}, final_commit_timestamp); +} + utils::BasicResult InMemoryStorage::CreateSnapshot( memgraph::replication_coordination_glue::ReplicationRole replication_role) { using memgraph::replication_coordination_glue::ReplicationRole; @@ -2301,7 +2330,9 @@ IndicesInfo InMemoryStorage::InMemoryAccessor::ListAllIndices() const { auto *mem_label_property_index = static_cast(in_memory->indices_.label_property_index_.get()); auto *mem_edge_type_index = static_cast(in_memory->indices_.edge_type_index_.get()); - return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices()}; + auto &text_index = storage_->indices_.text_index_; + return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices(), + text_index.ListIndices()}; } ConstraintsInfo InMemoryStorage::InMemoryAccessor::ListAllConstraints() const { const auto *mem_storage = static_cast(storage_); diff --git a/src/storage/v2/inmemory/storage.hpp b/src/storage/v2/inmemory/storage.hpp index 6d10e0fbd..6532471f3 100644 --- a/src/storage/v2/inmemory/storage.hpp +++ b/src/storage/v2/inmemory/storage.hpp @@ -398,7 +398,7 @@ class InMemoryStorage final : public Storage { StorageInfo GetBaseInfo() override; StorageInfo GetInfo(memgraph::replication_coordination_glue::ReplicationRole replication_role) override; - /// Return true in all cases excepted if any sync replicas have not sent confirmation. + /// Return true in all cases except if any sync replicas have not sent confirmation. [[nodiscard]] bool AppendToWal(const Transaction &transaction, uint64_t final_commit_timestamp, DatabaseAccessProtector db_acc); void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, @@ -412,9 +412,13 @@ class InMemoryStorage final : public Storage { void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, const std::set &properties, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp); - void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, + void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, + const std::optional text_index_name, LabelId label, const std::set &properties, LabelIndexStats stats, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp); + void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, + const std::optional text_index_name, LabelId label, + uint64_t final_commit_timestamp); uint64_t CommitTimestamp(std::optional desired_commit_timestamp = {}); diff --git a/src/storage/v2/metadata_delta.hpp b/src/storage/v2/metadata_delta.hpp index b34966a62..e4616161d 100644 --- a/src/storage/v2/metadata_delta.hpp +++ b/src/storage/v2/metadata_delta.hpp @@ -37,6 +37,8 @@ struct MetadataDelta { LABEL_PROPERTY_INDEX_STATS_CLEAR, EDGE_INDEX_CREATE, EDGE_INDEX_DROP, + TEXT_INDEX_CREATE, + TEXT_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, @@ -63,6 +65,10 @@ struct MetadataDelta { } edge_index_create; static constexpr struct EdgeIndexDrop { } edge_index_drop; + static constexpr struct TextIndexCreate { + } text_index_create; + static constexpr struct TextIndexDrop { + } text_index_drop; static constexpr struct ExistenceConstraintCreate { } existence_constraint_create; static constexpr struct ExistenceConstraintDrop { @@ -98,6 +104,12 @@ struct MetadataDelta { MetadataDelta(EdgeIndexDrop /*tag*/, EdgeTypeId edge_type) : action(Action::EDGE_INDEX_DROP), edge_type(edge_type) {} + MetadataDelta(TextIndexCreate /*tag*/, std::string index_name, LabelId label) + : action(Action::TEXT_INDEX_CREATE), text_index{index_name, label} {} + + MetadataDelta(TextIndexDrop /*tag*/, std::string index_name, LabelId label) + : action(Action::TEXT_INDEX_DROP), text_index{index_name, label} {} + MetadataDelta(ExistenceConstraintCreate /*tag*/, LabelId label, PropertyId property) : action(Action::EXISTENCE_CONSTRAINT_CREATE), label_property{label, property} {} @@ -127,6 +139,8 @@ struct MetadataDelta { case Action::LABEL_PROPERTY_INDEX_STATS_CLEAR: case Action::EDGE_INDEX_CREATE: case Action::EDGE_INDEX_DROP: + case Action::TEXT_INDEX_CREATE: + case Action::TEXT_INDEX_DROP: case Action::EXISTENCE_CONSTRAINT_CREATE: case Action::EXISTENCE_CONSTRAINT_DROP: break; @@ -164,6 +178,11 @@ struct MetadataDelta { PropertyId property; LabelPropertyIndexStats stats; } label_property_stats; + + struct { + std::string index_name; + LabelId label; + } text_index; }; }; diff --git a/src/storage/v2/property_store.cpp b/src/storage/v2/property_store.cpp index adf3440a2..0cfee0f98 100644 --- a/src/storage/v2/property_store.cpp +++ b/src/storage/v2/property_store.cpp @@ -118,7 +118,7 @@ enum class Type : uint8_t { STRING = 0x50, LIST = 0x60, MAP = 0x70, - TEMPORAL_DATA = 0x80 + TEMPORAL_DATA = 0x80, }; const uint8_t kMaskType = 0xf0; diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index a02c1eff0..ee1394fdb 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -406,8 +406,9 @@ void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operati const std::set &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp) { replication::Encoder encoder(stream_.GetBuilder()); - EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, label, properties, stats, property_stats, - timestamp); + // NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) -> text index name not sent here + EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, std::nullopt, label, properties, stats, + property_stats, timestamp); } void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operation, EdgeTypeId edge_type, diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index 536a504a0..db4bec8be 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -13,6 +13,8 @@ #include "absl/container/flat_hash_set.h" #include "spdlog/spdlog.h" +#include "flags/experimental.hpp" +#include "flags/run_time_configurable.hpp" #include "storage/v2/disk/name_id_mapper.hpp" #include "storage/v2/storage.hpp" #include "storage/v2/transaction.hpp" @@ -273,6 +275,12 @@ Storage::Accessor::DetachDelete(std::vector nodes, std::vector return maybe_deleted_vertices.GetError(); } + if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { + for (auto *node : nodes_to_delete) { + storage_->indices_.text_index_.RemoveNode(node); + } + } + auto deleted_vertices = maybe_deleted_vertices.GetValue(); return std::make_optional(std::move(deleted_vertices), std::move(deleted_edges)); @@ -543,4 +551,19 @@ void Storage::Accessor::MarkEdgeAsDeleted(Edge *edge) { } } +void Storage::Accessor::CreateTextIndex(const std::string &index_name, LabelId label, query::DbAccessor *db) { + MG_ASSERT(unique_guard_.owns_lock(), "Creating a text index requires unique access to storage!"); + storage_->indices_.text_index_.CreateIndex(storage_->config_.durability.storage_directory, index_name, label, db); + transaction_.md_deltas.emplace_back(MetadataDelta::text_index_create, index_name, label); + memgraph::metrics::IncrementCounter(memgraph::metrics::ActiveTextIndices); +} + +void Storage::Accessor::DropTextIndex(const std::string &index_name) { + MG_ASSERT(unique_guard_.owns_lock(), "Dropping a text index requires unique access to storage!"); + auto deleted_index_label = + storage_->indices_.text_index_.DropIndex(storage_->config_.durability.storage_directory, index_name); + transaction_.md_deltas.emplace_back(MetadataDelta::text_index_drop, index_name, deleted_index_label); + memgraph::metrics::DecrementCounter(memgraph::metrics::ActiveTextIndices); +} + } // namespace memgraph::storage diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 58936bd56..a4436b1b7 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -20,6 +20,7 @@ #include "io/network/endpoint.hpp" #include "kvstore/kvstore.hpp" +#include "mg_procedure.h" #include "query/exceptions.hpp" #include "replication/config.hpp" #include "replication/replication_server.hpp" @@ -53,6 +54,7 @@ extern const Event SnapshotCreationLatency_us; extern const Event ActiveLabelIndices; extern const Event ActiveLabelPropertyIndices; +extern const Event ActiveTextIndices; } // namespace memgraph::metrics namespace memgraph::storage { @@ -63,6 +65,7 @@ struct IndicesInfo { std::vector label; std::vector> label_property; std::vector edge_type; + std::vector> text_indices; }; struct ConstraintsInfo { @@ -78,6 +81,7 @@ struct StorageInfo { uint64_t disk_usage; uint64_t label_indices; uint64_t label_property_indices; + uint64_t text_indices; uint64_t existence_constraints; uint64_t unique_constraints; StorageMode storage_mode; @@ -95,6 +99,7 @@ static inline nlohmann::json ToJson(const StorageInfo &info) { res["disk"] = info.disk_usage; res["label_indices"] = info.label_indices; res["label_prop_indices"] = info.label_property_indices; + res["text_indices"] = info.text_indices; res["existence_constraints"] = info.existence_constraints; res["unique_constraints"] = info.unique_constraints; res["storage_mode"] = storage::StorageModeToString(info.storage_mode); @@ -232,6 +237,28 @@ class Storage { virtual bool EdgeTypeIndexExists(EdgeTypeId edge_type) const = 0; + bool TextIndexExists(const std::string &index_name) const { + return storage_->indices_.text_index_.IndexExists(index_name); + } + + void TextIndexAddVertex(const VertexAccessor &vertex) { + storage_->indices_.text_index_.AddNode(vertex.vertex_, storage_->name_id_mapper_.get()); + } + + void TextIndexUpdateVertex(const VertexAccessor &vertex, const std::vector &removed_labels = {}) { + storage_->indices_.text_index_.UpdateNode(vertex.vertex_, storage_->name_id_mapper_.get(), removed_labels); + } + + std::vector TextIndexSearch(const std::string &index_name, const std::string &search_query, + text_search_mode search_mode) const { + return storage_->indices_.text_index_.Search(index_name, search_query, search_mode); + } + + std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query, + const std::string &aggregation_query) const { + return storage_->indices_.text_index_.Aggregate(index_name, search_query, aggregation_query); + } + virtual IndicesInfo ListAllIndices() const = 0; virtual ConstraintsInfo ListAllConstraints() const = 0; @@ -284,6 +311,10 @@ class Storage { virtual utils::BasicResult DropIndex(EdgeTypeId edge_type) = 0; + void CreateTextIndex(const std::string &index_name, LabelId label, query::DbAccessor *db); + + void DropTextIndex(const std::string &index_name); + virtual utils::BasicResult CreateExistenceConstraint( LabelId label, PropertyId property) = 0; diff --git a/src/utils/event_counter.cpp b/src/utils/event_counter.cpp index 54ff4ed5c..7b1579a93 100644 --- a/src/utils/event_counter.cpp +++ b/src/utils/event_counter.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -60,6 +60,7 @@ \ M(ActiveLabelIndices, Index, "Number of active label indices in the system.") \ M(ActiveLabelPropertyIndices, Index, "Number of active label property indices in the system.") \ + M(ActiveTextIndices, Index, "Number of active text indices in the system.") \ \ M(StreamsCreated, Stream, "Number of Streams created.") \ M(MessagesConsumed, Stream, "Number of consumed streamed messages.") \ diff --git a/src/utils/typeinfo.hpp b/src/utils/typeinfo.hpp index 77910f731..aeb62d2c1 100644 --- a/src/utils/typeinfo.hpp +++ b/src/utils/typeinfo.hpp @@ -187,6 +187,7 @@ enum class TypeId : uint64_t { AST_PROFILE_QUERY, AST_INDEX_QUERY, AST_EDGE_INDEX_QUERY, + AST_TEXT_INDEX_QUERY, AST_CREATE, AST_CALL_PROCEDURE, AST_MATCH, diff --git a/tests/e2e/configuration/default_config.py b/tests/e2e/configuration/default_config.py index 75c211e0f..11435da65 100644 --- a/tests/e2e/configuration/default_config.py +++ b/tests/e2e/configuration/default_config.py @@ -226,6 +226,6 @@ startup_config_dict = { "experimental_enabled": ( "", "", - "Experimental features to be used, comma seperated. Options [system-replication, high-availability]", + "Experimental features to be used, comma-separated. Options [system-replication, text-search, high-availability]", ), } diff --git a/tests/e2e/text_search/CMakeLists.txt b/tests/e2e/text_search/CMakeLists.txt new file mode 100644 index 000000000..db2af7a11 --- /dev/null +++ b/tests/e2e/text_search/CMakeLists.txt @@ -0,0 +1,6 @@ +function(copy_text_search_e2e_python_files FILE_NAME) + copy_e2e_python_files(text_search ${FILE_NAME}) +endfunction() + +copy_text_search_e2e_python_files(common.py) +copy_text_search_e2e_python_files(test_text_search.py) diff --git a/tests/e2e/text_search/common.py b/tests/e2e/text_search/common.py new file mode 100644 index 000000000..0f28351d3 --- /dev/null +++ b/tests/e2e/text_search/common.py @@ -0,0 +1,87 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import typing + +import mgclient +import pytest +from gqlalchemy import Memgraph + + +def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]: + cursor.execute(query, params) + return cursor.fetchall() + + +@pytest.fixture +def connect(**kwargs) -> mgclient.Connection: + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + cursor = connection.cursor() + execute_and_fetch_all(cursor, """USE DATABASE memgraph""") + try: + execute_and_fetch_all(cursor, """DROP DATABASE clean""") + except: + pass + execute_and_fetch_all(cursor, """MATCH (n) DETACH DELETE n""") + yield connection + + +@pytest.fixture +def memgraph(**kwargs) -> Memgraph: + memgraph = Memgraph() + + yield memgraph + + memgraph.drop_database() + memgraph.drop_indexes() + + +@pytest.fixture +def memgraph_with_text_indexed_data(**kwargs) -> Memgraph: + memgraph = Memgraph() + + memgraph.execute( + """CREATE (:Document {title: "Rules2024", version: 1, fulltext: "random works", date: date("2023-11-14")});""" + ) + memgraph.execute( + """CREATE (:Document {title: "Rules2023", version: 9, fulltext: "text Rules2024", date: date("2023-11-14")});""" + ) + memgraph.execute( + """CREATE (:Document:Revision {title: "Rules2024", version: 2, fulltext: "random words", date: date("2023-12-15")});""" + ) + memgraph.execute("""CREATE (:Revision {title: "OperationSchema", version: 3, date: date("2023-10-01")});""") + memgraph.execute("""CREATE TEXT INDEX complianceDocuments ON :Document;""") + + yield memgraph + + memgraph.execute("""DROP TEXT INDEX complianceDocuments;""") + memgraph.drop_database() + memgraph.drop_indexes() + + +@pytest.fixture +def memgraph_with_mixed_data(**kwargs) -> Memgraph: + memgraph = Memgraph() + + memgraph.execute( + """CREATE (:Document:Revision {title: "Rules2024", version: 1, date: date("2023-11-14"), contents: "Lorem ipsum dolor sit amet"});""" + ) + memgraph.execute( + """CREATE (:Revision {title: "Rules2024", version: 2, date: date("2023-12-15"), contents: "consectetur adipiscing elit"});""" + ) + memgraph.execute("""CREATE TEXT INDEX complianceDocuments ON :Document;""") + + yield memgraph + + memgraph.execute("""DROP TEXT INDEX complianceDocuments;""") + memgraph.drop_database() + memgraph.drop_indexes() diff --git a/tests/e2e/text_search/test_text_search.py b/tests/e2e/text_search/test_text_search.py new file mode 100644 index 000000000..8d538d464 --- /dev/null +++ b/tests/e2e/text_search/test_text_search.py @@ -0,0 +1,206 @@ +# Copyright 2024 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import json +import re +import sys + +import gqlalchemy +import mgclient +import pytest +from common import memgraph, memgraph_with_mixed_data, memgraph_with_text_indexed_data + +GET_RULES_2024_DOCUMENT = """CALL libtext.search("complianceDocuments", "data.title:Rules2024") YIELD node + RETURN node.title AS title, node.version AS version + ORDER BY version ASC, title ASC;""" + + +def test_create_index(memgraph): + memgraph.execute("""CREATE TEXT INDEX exampleIndex ON :Document;""") + + index_info = memgraph.execute_and_fetch("""SHOW INDEX INFO""") + + assert list(index_info) == [ + {"index type": "text (name: exampleIndex)", "label": "Document", "property": None, "count": None} + ] + + +def test_drop_index(memgraph): + memgraph.execute("""DROP TEXT INDEX exampleIndex;""") + + index_info = memgraph.execute_and_fetch("""SHOW INDEX INFO""") + + assert list(index_info) == [] + + +def test_create_existing_index(memgraph): + memgraph.execute("""CREATE TEXT INDEX duplicatedIndex ON :Document;""") + with pytest.raises( + gqlalchemy.exceptions.GQLAlchemyDatabaseError, match='Text index "duplicatedIndex" already exists.' + ) as _: + memgraph.execute("""CREATE TEXT INDEX duplicatedIndex ON :Document;""") + memgraph.execute("""DROP TEXT INDEX duplicatedIndex;""") # cleanup + + +def test_drop_nonexistent_index(memgraph): + with pytest.raises( + gqlalchemy.exceptions.GQLAlchemyDatabaseError, match='Text index "noSuchIndex" doesn’t exist.' + ) as _: + memgraph.execute("""DROP TEXT INDEX noSuchIndex;""") + + +def test_text_search_given_property(memgraph_with_text_indexed_data): + result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 2 and result == [{"title": "Rules2024", "version": 1}, {"title": "Rules2024", "version": 2}] + + +def test_text_search_all_properties(memgraph_with_text_indexed_data): + SEARCH_QUERY = "Rules2024" + + ALL_PROPERTIES_QUERY = f"""CALL libtext.search_all("complianceDocuments", "{SEARCH_QUERY}") YIELD node + RETURN node + ORDER BY node.version ASC, node.title ASC;""" + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(ALL_PROPERTIES_QUERY)) + result_nodes = [record["node"] for record in result] + + assert len(result) == 3 and ( + result_nodes[0].title == SEARCH_QUERY + and result_nodes[1].title == SEARCH_QUERY + and SEARCH_QUERY in result_nodes[2].fulltext + ) + + +def test_regex_text_search(memgraph_with_text_indexed_data): + REGEX_QUERY = """CALL libtext.regex_search("complianceDocuments", "wor.*s") YIELD node + RETURN node + ORDER BY node.version ASC, node.title ASC;""" + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(REGEX_QUERY)) + + assert ( + len(result) == 2 + and re.search("wor.*s", result[0]["node"].fulltext) + and re.search("wor.*s", result[1]["node"].fulltext) + # In this test, all values matching the regex string are found in the .node property only ^ + ) + + +def test_text_search_aggregate(memgraph_with_text_indexed_data): + input_aggregation = json.dumps({"count": {"value_count": {"field": "metadata.gid"}}}, separators=(",", ":")) + expected_aggregation = json.dumps({"count": {"value": 2.0}}, separators=(",", ":")) + + AGGREGATION_QUERY = f"""CALL libtext.aggregate("complianceDocuments", "data.title:Rules2024", '{input_aggregation}') + YIELD aggregation + RETURN aggregation;""" + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(AGGREGATION_QUERY)) + + assert len(result) == 1 and result[0]["aggregation"] == expected_aggregation + + +def test_text_search_query_boolean(memgraph_with_text_indexed_data): + BOOLEAN_QUERY = """CALL libtext.search("complianceDocuments", "(data.title:Rules2023 OR data.title:Rules2024) AND data.fulltext:words") YIELD node + RETURN node.title AS title, node.version AS version + ORDER BY version ASC, title ASC;""" + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(BOOLEAN_QUERY)) + + assert len(result) == 1 and result == [{"title": "Rules2024", "version": 2}] + + +def test_create_indexed_node(memgraph_with_text_indexed_data): + memgraph_with_text_indexed_data.execute("""CREATE (:Document {title: "Rules2024", version: 3});""") + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 3 and result == [ + {"title": "Rules2024", "version": 1}, + {"title": "Rules2024", "version": 2}, + {"title": "Rules2024", "version": 3}, + ] + + +def test_delete_indexed_node(memgraph_with_text_indexed_data): + memgraph_with_text_indexed_data.execute("""MATCH (n:Document {title: "Rules2024", version: 2}) DETACH DELETE n;""") + + result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 1 and result == [{"title": "Rules2024", "version": 1}] + + +def test_add_indexed_label(memgraph_with_mixed_data): + memgraph_with_mixed_data.execute("""MATCH (n:Revision {version:2}) SET n:Document;""") + + result = list(memgraph_with_mixed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 2 and result == [{"title": "Rules2024", "version": 1}, {"title": "Rules2024", "version": 2}] + + +def test_remove_indexed_label(memgraph_with_mixed_data): + memgraph_with_mixed_data.execute("""MATCH (n:Document {version: 1}) REMOVE n:Document;""") + + result = list(memgraph_with_mixed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT)) + + assert len(result) == 0 + + +def test_update_text_property_of_indexed_node(memgraph_with_text_indexed_data): + memgraph_with_text_indexed_data.execute("""MATCH (n:Document {version:1}) SET n.title = "Rules2030";""") + + result = list( + memgraph_with_text_indexed_data.execute_and_fetch( + """CALL libtext.search("complianceDocuments", "data.title:Rules2030") YIELD node + RETURN node.title AS title, node.version AS version + ORDER BY version ASC, title ASC;""" + ) + ) + + assert len(result) == 1 and result == [{"title": "Rules2030", "version": 1}] + + +def test_add_unindexable_property_to_indexed_node(memgraph_with_text_indexed_data): + try: + memgraph_with_text_indexed_data.execute("""MATCH (n:Document {version:1}) SET n.randomList = [2, 3, 4, 5];""") + except Exception: + assert False + + +def test_remove_indexable_property_from_indexed_node(memgraph_with_text_indexed_data): + try: + memgraph_with_text_indexed_data.execute( + """MATCH (n:Document {version:1}) REMOVE n.title, n.version, n.fulltext, n.date;""" + ) + except Exception: + assert False + + +def test_remove_unindexable_property_from_indexed_node(memgraph_with_text_indexed_data): + try: + memgraph_with_text_indexed_data.execute_and_fetch( + """MATCH (n:Document {date: date("2023-12-15")}) REMOVE n.date;""" + ) + except Exception: + assert False + + +def test_text_search_nonexistent_index(memgraph_with_text_indexed_data): + NONEXISTENT_INDEX_QUERY = """CALL libtext.search("noSuchIndex", "data.fulltext:words") YIELD node + RETURN node.title AS title, node.version AS version + ORDER BY version ASC, title ASC;""" + + with pytest.raises(mgclient.DatabaseError, match='Text index "noSuchIndex" doesn’t exist.') as _: + list(memgraph_with_text_indexed_data.execute_and_fetch(NONEXISTENT_INDEX_QUERY)) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/text_search/test_text_search_disabled.py b/tests/e2e/text_search/test_text_search_disabled.py new file mode 100644 index 000000000..064f7b409 --- /dev/null +++ b/tests/e2e/text_search/test_text_search_disabled.py @@ -0,0 +1,69 @@ +# Copyright 2024 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import json +import sys + +import gqlalchemy +import pytest +from common import memgraph + +TEXT_SEARCH_DISABLED_ERROR = ( + "To use text indices and text search, start Memgraph with the experimental text search feature enabled." +) + + +def test_create_index(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute("""CREATE TEXT INDEX exampleIndex ON :Document;""") + + +def test_drop_index(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute("""DROP TEXT INDEX exampleIndex;""") + + +def test_text_search_given_property(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute( + """CALL libtext.search("complianceDocuments", "data.title:Rules2024") YIELD node + RETURN node;""" + ) + + +def test_text_search_all_properties(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute( + """CALL libtext.search_all("complianceDocuments", "Rules2024") YIELD node + RETURN node;""" + ) + + +def test_regex_text_search(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + memgraph.execute( + """CALL libtext.regex_search("complianceDocuments", "wor.*s") YIELD node + RETURN node;""" + ) + + +def test_text_search_aggregate(memgraph): + with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _: + input_aggregation = json.dumps({"count": {"value_count": {"field": "metadata.gid"}}}, separators=(",", ":")) + + memgraph.execute( + f"""CALL libtext.aggregate("complianceDocuments", "wor.*s", '{input_aggregation}') YIELD aggregation + RETURN aggregation;""" + ) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/text_search/workloads.yaml b/tests/e2e/text_search/workloads.yaml new file mode 100644 index 000000000..5b1640715 --- /dev/null +++ b/tests/e2e/text_search/workloads.yaml @@ -0,0 +1,33 @@ +text_search_cluster: &text_search_cluster + cluster: + main: + args: + [ + "--bolt-port", + "7687", + "--log-level=TRACE", + "--experimental-enabled=text-search", + ] + log_file: "text_search.log" + setup_queries: [] + validation_queries: [] + +text_search_disabled_cluster: &text_search_disabled_cluster + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "text_search.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Test behavior of text search in Memgraph" + binary: "tests/e2e/pytest_runner.sh" + proc: "tests/e2e/text_search/query_modules/" + args: ["text_search/test_text_search.py"] + <<: *text_search_cluster + - name: "Test behavior of text search in Memgraph when disabled" + binary: "tests/e2e/pytest_runner.sh" + proc: "tests/e2e/text_search/query_modules/" + args: ["text_search/test_text_search_disabled.py"] + <<: *text_search_disabled_cluster diff --git a/tests/unit/query_dump.cpp b/tests/unit/query_dump.cpp index a2ca2864d..2dd1e7ac7 100644 --- a/tests/unit/query_dump.cpp +++ b/tests/unit/query_dump.cpp @@ -71,6 +71,11 @@ struct DatabaseState { std::string property; }; + struct TextItem { + std::string index_name; + std::string label; + }; + struct LabelPropertiesItem { std::string label; std::set> properties; @@ -80,6 +85,7 @@ struct DatabaseState { std::set edges; std::set label_indices; std::set label_property_indices; + std::set text_indices; std::set existence_constraints; std::set unique_constraints; }; @@ -106,6 +112,10 @@ bool operator<(const DatabaseState::LabelPropertyItem &first, const DatabaseStat return first.property < second.property; } +bool operator<(const DatabaseState::TextItem &first, const DatabaseState::TextItem &second) { + return first.index_name < second.index_name && first.label < second.label; +} + bool operator<(const DatabaseState::LabelPropertiesItem &first, const DatabaseState::LabelPropertiesItem &second) { if (first.label != second.label) return first.label < second.label; return first.properties < second.properties; @@ -128,6 +138,10 @@ bool operator==(const DatabaseState::LabelPropertyItem &first, const DatabaseSta return first.label == second.label && first.property == second.property; } +bool operator==(const DatabaseState::TextItem &first, const DatabaseState::TextItem &second) { + return first.index_name == second.index_name && first.label == second.label; +} + bool operator==(const DatabaseState::LabelPropertiesItem &first, const DatabaseState::LabelPropertiesItem &second) { return first.label == second.label && first.properties == second.properties; } @@ -185,6 +199,7 @@ DatabaseState GetState(memgraph::storage::Storage *db) { // Capture all indices std::set label_indices; std::set label_property_indices; + std::set text_indices; { auto info = dba->ListAllIndices(); for (const auto &item : info.label) { @@ -193,6 +208,9 @@ DatabaseState GetState(memgraph::storage::Storage *db) { for (const auto &item : info.label_property) { label_property_indices.insert({dba->LabelToName(item.first), dba->PropertyToName(item.second)}); } + for (const auto &item : info.text_indices) { + text_indices.insert({item.first, dba->LabelToName(item.second)}); + } } // Capture all constraints @@ -212,7 +230,8 @@ DatabaseState GetState(memgraph::storage::Storage *db) { } } - return {vertices, edges, label_indices, label_property_indices, existence_constraints, unique_constraints}; + return {vertices, edges, label_indices, label_property_indices, text_indices, existence_constraints, + unique_constraints}; } auto Execute(memgraph::query::InterpreterContext *context, memgraph::dbms::DatabaseAccess db, diff --git a/tests/unit/storage_v2_decoder_encoder.cpp b/tests/unit/storage_v2_decoder_encoder.cpp index 15db49b1c..0264e2287 100644 --- a/tests/unit/storage_v2_decoder_encoder.cpp +++ b/tests/unit/storage_v2_decoder_encoder.cpp @@ -358,6 +358,8 @@ TEST_F(DecoderEncoderTest, PropertyValueInvalidMarker) { case memgraph::storage::durability::Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR: case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_CREATE: case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_DROP: + case memgraph::storage::durability::Marker::DELTA_TEXT_INDEX_CREATE: + case memgraph::storage::durability::Marker::DELTA_TEXT_INDEX_DROP: case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case memgraph::storage::durability::Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: diff --git a/tests/unit/storage_v2_get_info.cpp b/tests/unit/storage_v2_get_info.cpp index 71dbc1a8d..ee5c1bb62 100644 --- a/tests/unit/storage_v2_get_info.cpp +++ b/tests/unit/storage_v2_get_info.cpp @@ -146,6 +146,7 @@ TYPED_TEST(InfoTest, InfoCheck) { ASSERT_LT(info.disk_usage, 1000'000); ASSERT_EQ(info.label_indices, 1); ASSERT_EQ(info.label_property_indices, 1); + ASSERT_EQ(info.text_indices, 0); ASSERT_EQ(info.existence_constraints, 0); ASSERT_EQ(info.unique_constraints, 2); ASSERT_EQ(info.storage_mode, this->mode); diff --git a/tests/unit/storage_v2_wal_file.cpp b/tests/unit/storage_v2_wal_file.cpp index 4094090f5..a94b20590 100644 --- a/tests/unit/storage_v2_wal_file.cpp +++ b/tests/unit/storage_v2_wal_file.cpp @@ -53,6 +53,10 @@ memgraph::storage::durability::WalDeltaData::Type StorageMetadataOperationToWalD return memgraph::storage::durability::WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET; case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR: return memgraph::storage::durability::WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR; + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE: + return memgraph::storage::durability::WalDeltaData::Type::TEXT_INDEX_CREATE; + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP: + return memgraph::storage::durability::WalDeltaData::Type::TEXT_INDEX_DROP; case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: return memgraph::storage::durability::WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE; case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: @@ -252,7 +256,7 @@ class DeltaGenerator final { ASSERT_TRUE(false) << "Unexpected statistics operation!"; } } - wal_file_.AppendOperation(operation, label_id, property_ids, l_stats, lp_stats, timestamp_); + wal_file_.AppendOperation(operation, std::nullopt, label_id, property_ids, l_stats, lp_stats, timestamp_); if (valid_) { UpdateStats(timestamp_, 1); memgraph::storage::durability::WalDeltaData data; @@ -271,6 +275,8 @@ class DeltaGenerator final { break; case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE: case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP: + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP: case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: data.operation_label_property.label = label; @@ -313,6 +319,8 @@ class DeltaGenerator final { case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_STATS_SET: case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE: case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP: + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP: case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:; case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET: From f699c0b37f1f24db27401cbbe0560e96409260d3 Mon Sep 17 00:00:00 2001 From: Andi Date: Thu, 21 Mar 2024 07:41:26 +0100 Subject: [PATCH 13/16] Support bolt+routing (#1796) --- .github/workflows/diff.yaml | 13 +- release/package/mgbuild.sh | 20 +- src/communication/bolt/v1/session.hpp | 8 +- .../bolt/v1/states/executing.hpp | 6 +- src/communication/bolt/v1/states/handlers.hpp | 20 +- src/coordination/CMakeLists.txt | 4 +- src/coordination/coordinator_client.cpp | 13 +- .../coordinator_cluster_state.cpp | 74 +- ...p => coordinator_communication_config.cpp} | 38 +- src/coordination/coordinator_handlers.cpp | 8 +- src/coordination/coordinator_instance.cpp | 67 +- src/coordination/coordinator_server.cpp | 5 +- src/coordination/coordinator_state.cpp | 17 +- .../coordinator_state_machine.cpp | 25 +- .../coordinator_state_manager.cpp | 6 + .../coordination/coordinator_client.hpp | 10 +- ...p => coordinator_communication_config.hpp} | 68 +- .../coordination/coordinator_exceptions.hpp | 11 + .../coordination/coordinator_instance.hpp | 17 +- .../include/coordination/coordinator_rpc.hpp | 11 +- .../coordination/coordinator_server.hpp | 4 +- .../include/coordination/coordinator_slk.hpp | 24 +- .../coordination/coordinator_state.hpp | 6 +- .../include/coordination/raft_state.hpp | 13 +- .../coordination/replication_instance.hpp | 4 +- .../nuraft/coordinator_cluster_state.hpp | 37 +- .../nuraft/coordinator_state_machine.hpp | 12 +- .../include/nuraft/raft_log_action.hpp | 16 +- src/coordination/raft_state.cpp | 45 +- src/coordination/replication_instance.cpp | 4 +- src/dbms/coordinator_handler.cpp | 7 +- src/dbms/coordinator_handler.hpp | 6 +- src/glue/SessionHL.cpp | 34 + src/glue/SessionHL.hpp | 7 + src/io/network/endpoint.cpp | 19 +- src/io/network/endpoint.hpp | 11 +- src/query/interpreter.cpp | 120 ++- src/query/interpreter.hpp | 19 +- tests/drivers/go/v5/docs_quick_start.go | 20 +- tests/drivers/go/v5/go.mod | 2 +- tests/drivers/go/v5/go.sum | 2 + tests/drivers/go/v5/read_routing.go | 51 ++ tests/drivers/go/v5/run.sh | 1 - tests/drivers/go/v5/run_cluster_tests.sh | 21 + tests/drivers/go/v5/write_routing.go | 51 ++ tests/drivers/java/v5_8/pom.xml | 39 + tests/drivers/java/v5_8/run.sh | 1 - tests/drivers/java/v5_8/run_cluster_tests.sh | 37 + .../src/main/java/memgraph/ReadRouting.java | 35 + .../src/main/java/memgraph/WriteRouting.java | 44 + tests/drivers/node/v5_8/read_routing.js | 59 ++ tests/drivers/node/v5_8/run.sh | 1 - tests/drivers/node/v5_8/run_cluster_tests.sh | 17 + tests/drivers/node/v5_8/write_routing.js | 59 ++ tests/drivers/python/v5_8/read_routing.py | 41 + .../drivers/python/v5_8/run_cluster_tests.sh | 25 + tests/drivers/python/v5_8/write_routing.py | 41 + tests/drivers/run_cluster.sh | 203 +++++ tests/e2e/high_availability/common.py | 11 - .../coord_cluster_registration.py | 818 +++++++++--------- .../disable_writing_on_main_after_restart.py | 6 +- tests/e2e/replication/common.hpp | 7 +- tests/unit/CMakeLists.txt | 9 +- tests/unit/bolt_session.cpp | 211 +++-- tests/unit/coordinator_cluster_state.cpp | 135 +-- tests/unit/raft_log_serialization.cpp | 54 +- tests/unit/routing_table.cpp | 176 ++++ tests/unit/slk_advanced.cpp | 22 +- 68 files changed, 2131 insertions(+), 897 deletions(-) rename src/coordination/{coordinator_config.cpp => coordinator_communication_config.cpp} (57%) rename src/coordination/include/coordination/{coordinator_config.hpp => coordinator_communication_config.hpp} (51%) create mode 100644 tests/drivers/go/v5/read_routing.go create mode 100755 tests/drivers/go/v5/run_cluster_tests.sh create mode 100644 tests/drivers/go/v5/write_routing.go create mode 100755 tests/drivers/java/v5_8/run_cluster_tests.sh create mode 100644 tests/drivers/java/v5_8/src/main/java/memgraph/ReadRouting.java create mode 100644 tests/drivers/java/v5_8/src/main/java/memgraph/WriteRouting.java create mode 100644 tests/drivers/node/v5_8/read_routing.js create mode 100755 tests/drivers/node/v5_8/run_cluster_tests.sh create mode 100644 tests/drivers/node/v5_8/write_routing.js create mode 100644 tests/drivers/python/v5_8/read_routing.py create mode 100755 tests/drivers/python/v5_8/run_cluster_tests.sh create mode 100644 tests/drivers/python/v5_8/write_routing.py create mode 100755 tests/drivers/run_cluster.sh create mode 100644 tests/unit/routing_table.cpp diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 49b7d4273..8b8ee607f 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -257,6 +257,17 @@ jobs: --organization-name $MEMGRAPH_ORGANIZATION_NAME \ test-memgraph drivers + - name: Run HA driver tests + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph drivers-high-availability + - name: Run integration tests run: | ./release/package/mgbuild.sh \ @@ -278,7 +289,7 @@ jobs: --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ --organization-name $MEMGRAPH_ORGANIZATION_NAME \ test-memgraph cppcheck-and-clang-format - + - name: Save cppcheck and clang-format errors uses: actions/upload-artifact@v4 with: diff --git a/release/package/mgbuild.sh b/release/package/mgbuild.sh index e24776f60..934e962b7 100755 --- a/release/package/mgbuild.sh +++ b/release/package/mgbuild.sh @@ -48,9 +48,9 @@ SUPPORTED_ARCHS=( ) SUPPORTED_TESTS=( clang-tidy cppcheck-and-clang-format code-analysis - code-coverage drivers durability e2e gql-behave + code-coverage drivers drivers-high-availability durability e2e gql-behave integration leftover-CTest macro-benchmark - mgbench stress-plain stress-ssl + mgbench stress-plain stress-ssl unit unit-coverage upload-to-bench-graph ) @@ -116,7 +116,7 @@ print_help () { echo -e "\nToolchain v5 supported OSs:" echo -e " \"${SUPPORTED_OS_V5[*]}\"" - + echo -e "\nExample usage:" echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd run" echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo build-memgraph --community" @@ -296,7 +296,7 @@ build_memgraph () { docker cp "$PROJECT_ROOT/." "$build_container:$MGBUILD_ROOT_DIR/" fi # Change ownership of copied files so the mg user inside container can access them - docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR" + docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR" echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..." docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check TOOLCHAIN_RUN_DEPS || /environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" @@ -318,10 +318,9 @@ build_memgraph () { # Define cmake command local cmake_cmd="cmake $build_type_flag $arm_flag $community_flag $telemetry_id_override_flag $coverage_flag $asan_flag $ubsan_flag .." docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $ACTIVATE_CARGO && $cmake_cmd" - # ' is used instead of " because we need to run make within the allowed # container resources. - # Default value for $threads is 0 instead of $(nproc) because macos + # Default value for $threads is 0 instead of $(nproc) because macos # doesn't support the nproc command. # 0 is set for default value and checked here because mgbuild containers # support nproc @@ -363,7 +362,7 @@ copy_memgraph() { local container_output_path="$MGBUILD_ROOT_DIR/build/memgraph" local host_output_path="$PROJECT_ROOT/build/memgraph" mkdir -p "$PROJECT_ROOT/build" - docker cp -L $build_container:$container_output_path $host_output_path + docker cp -L $build_container:$container_output_path $host_output_path echo "Binary saved to $host_output_path" ;; --build-logs) @@ -371,7 +370,7 @@ copy_memgraph() { local container_output_path="$MGBUILD_ROOT_DIR/build/logs" local host_output_path="$PROJECT_ROOT/build/logs" mkdir -p "$PROJECT_ROOT/build" - docker cp -L $build_container:$container_output_path $host_output_path + docker cp -L $build_container:$container_output_path $host_output_path echo "Build logs saved to $host_output_path" ;; --package) @@ -418,6 +417,9 @@ test_memgraph() { drivers) docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run.sh' ;; + drivers-high-availability) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run_cluster.sh' + ;; integration) docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& tests/integration/run.sh' ;; @@ -664,4 +666,4 @@ case $command in echo "Error: Unknown command '$command'" exit 1 ;; -esac +esac diff --git a/src/communication/bolt/v1/session.hpp b/src/communication/bolt/v1/session.hpp index 2261a3234..55d8a7a54 100644 --- a/src/communication/bolt/v1/session.hpp +++ b/src/communication/bolt/v1/session.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -88,6 +88,12 @@ class Session { virtual void Configure(const std::map &run_time_info) = 0; +#ifdef MG_ENTERPRISE + virtual auto Route(std::map const &routing, + std::vector const &bookmarks, + std::map const &extra) -> std::map = 0; +#endif + /** * Put results of the processed query in the `encoder`. * diff --git a/src/communication/bolt/v1/states/executing.hpp b/src/communication/bolt/v1/states/executing.hpp index b58b3c39b..2ab2cacc2 100644 --- a/src/communication/bolt/v1/states/executing.hpp +++ b/src/communication/bolt/v1/states/executing.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -79,9 +79,9 @@ State RunHandlerV4(Signature signature, TSession &session, State state, Marker m } case Signature::Route: { if constexpr (bolt_minor >= 3) { - if (signature == Signature::Route) return HandleRoute(session, marker); + return HandleRoute(session, marker); } else { - spdlog::trace("Supported only in bolt v4.3"); + spdlog::trace("Supported only in bolt versions >= 4.3"); return State::Close; } } diff --git a/src/communication/bolt/v1/states/handlers.hpp b/src/communication/bolt/v1/states/handlers.hpp index 3ffcb6f55..afdc37ad9 100644 --- a/src/communication/bolt/v1/states/handlers.hpp +++ b/src/communication/bolt/v1/states/handlers.hpp @@ -478,9 +478,6 @@ State HandleGoodbye() { template State HandleRoute(TSession &session, const Marker marker) { - // Route message is not implemented since it is Neo4j specific, therefore we will receive it and inform user that - // there is no implementation. Before that, we have to read out the fields from the buffer to leave it in a clean - // state. if (marker != Marker::TinyStruct3) { spdlog::trace("Expected TinyStruct3 marker, but received 0x{:02x}!", utils::UnderlyingCast(marker)); return State::Close; @@ -496,11 +493,27 @@ State HandleRoute(TSession &session, const Marker marker) { spdlog::trace("Couldn't read bookmarks field!"); return State::Close; } + + // TODO: (andi) Fix Bolt versions Value db; if (!session.decoder_.ReadValue(&db)) { spdlog::trace("Couldn't read db field!"); return State::Close; } + +#ifdef MG_ENTERPRISE + try { + auto res = session.Route(routing.ValueMap(), bookmarks.ValueList(), {}); + if (!session.encoder_.MessageSuccess(std::move(res))) { + spdlog::trace("Couldn't send result of routing!"); + return State::Close; + } + return State::Idle; + } catch (const std::exception &e) { + return HandleFailure(session, e); + } + +#else session.encoder_buffer_.Clear(); bool fail_sent = session.encoder_.MessageFailure({{"code", "66"}, {"message", "Route message is not supported in Memgraph!"}}); @@ -509,6 +522,7 @@ State HandleRoute(TSession &session, const Marker marker) { return State::Close; } return State::Error; +#endif } template diff --git a/src/coordination/CMakeLists.txt b/src/coordination/CMakeLists.txt index ef9376a70..d39d3e738 100644 --- a/src/coordination/CMakeLists.txt +++ b/src/coordination/CMakeLists.txt @@ -6,7 +6,7 @@ target_sources(mg-coordination include/coordination/coordinator_state.hpp include/coordination/coordinator_rpc.hpp include/coordination/coordinator_server.hpp - include/coordination/coordinator_config.hpp + include/coordination/coordinator_communication_config.hpp include/coordination/coordinator_exceptions.hpp include/coordination/coordinator_slk.hpp include/coordination/coordinator_instance.hpp @@ -23,7 +23,7 @@ target_sources(mg-coordination include/nuraft/coordinator_state_manager.hpp PRIVATE - coordinator_config.cpp + coordinator_communication_config.cpp coordinator_client.cpp coordinator_state.cpp coordinator_rpc.cpp diff --git a/src/coordination/coordinator_client.cpp b/src/coordination/coordinator_client.cpp index 8530faff3..44817ccfe 100644 --- a/src/coordination/coordinator_client.cpp +++ b/src/coordination/coordinator_client.cpp @@ -14,7 +14,7 @@ #include "coordination/coordinator_client.hpp" -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_rpc.hpp" #include "replication_coordination_glue/common.hpp" #include "replication_coordination_glue/messages.hpp" @@ -23,18 +23,17 @@ namespace memgraph::coordination { namespace { -auto CreateClientContext(memgraph::coordination::CoordinatorClientConfig const &config) +auto CreateClientContext(memgraph::coordination::CoordinatorToReplicaConfig const &config) -> communication::ClientContext { return (config.ssl) ? communication::ClientContext{config.ssl->key_file, config.ssl->cert_file} : communication::ClientContext{}; } } // namespace -CoordinatorClient::CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorClientConfig config, +CoordinatorClient::CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb) : rpc_context_{CreateClientContext(config)}, - rpc_client_{io::network::Endpoint(io::network::Endpoint::needs_resolving, config.ip_address, config.port), - &rpc_context_}, + rpc_client_{config.mgt_server, &rpc_context_}, config_{std::move(config)}, coord_instance_{coord_instance}, succ_cb_{std::move(succ_cb)}, @@ -86,7 +85,9 @@ void CoordinatorClient::StopFrequentCheck() { instance_checker_.Stop(); } void CoordinatorClient::PauseFrequentCheck() { instance_checker_.Pause(); } void CoordinatorClient::ResumeFrequentCheck() { instance_checker_.Resume(); } -auto CoordinatorClient::ReplicationClientInfo() const -> ReplClientInfo { return config_.replication_client_info; } +auto CoordinatorClient::ReplicationClientInfo() const -> coordination::ReplicationClientInfo { + return config_.replication_client_info; +} auto CoordinatorClient::SendPromoteReplicaToMainRpc(const utils::UUID &uuid, ReplicationClientsInfo replication_clients_info) const -> bool { diff --git a/src/coordination/coordinator_cluster_state.cpp b/src/coordination/coordinator_cluster_state.cpp index cf6e1a574..00bbc1336 100644 --- a/src/coordination/coordinator_cluster_state.cpp +++ b/src/coordination/coordinator_cluster_state.cpp @@ -18,86 +18,88 @@ namespace memgraph::coordination { -void to_json(nlohmann::json &j, InstanceState const &instance_state) { +void to_json(nlohmann::json &j, ReplicationInstanceState const &instance_state) { j = nlohmann::json{{"config", instance_state.config}, {"status", instance_state.status}}; } -void from_json(nlohmann::json const &j, InstanceState &instance_state) { +void from_json(nlohmann::json const &j, ReplicationInstanceState &instance_state) { j.at("config").get_to(instance_state.config); j.at("status").get_to(instance_state.status); } -CoordinatorClusterState::CoordinatorClusterState(std::map> instances) - : instances_{std::move(instances)} {} +CoordinatorClusterState::CoordinatorClusterState(std::map> instances) + : repl_instances_{std::move(instances)} {} -CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState const &other) : instances_{other.instances_} {} +CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState const &other) + : repl_instances_{other.repl_instances_} {} CoordinatorClusterState &CoordinatorClusterState::operator=(CoordinatorClusterState const &other) { if (this == &other) { return *this; } - instances_ = other.instances_; + repl_instances_ = other.repl_instances_; return *this; } CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState &&other) noexcept - : instances_{std::move(other.instances_)} {} + : repl_instances_{std::move(other.repl_instances_)} {} CoordinatorClusterState &CoordinatorClusterState::operator=(CoordinatorClusterState &&other) noexcept { if (this == &other) { return *this; } - instances_ = std::move(other.instances_); + repl_instances_ = std::move(other.repl_instances_); return *this; } auto CoordinatorClusterState::MainExists() const -> bool { auto lock = std::shared_lock{log_lock_}; - return std::ranges::any_of(instances_, + return std::ranges::any_of(repl_instances_, [](auto const &entry) { return entry.second.status == ReplicationRole::MAIN; }); } auto CoordinatorClusterState::IsMain(std::string_view instance_name) const -> bool { auto lock = std::shared_lock{log_lock_}; - auto const it = instances_.find(instance_name); - return it != instances_.end() && it->second.status == ReplicationRole::MAIN; + auto const it = repl_instances_.find(instance_name); + return it != repl_instances_.end() && it->second.status == ReplicationRole::MAIN; } auto CoordinatorClusterState::IsReplica(std::string_view instance_name) const -> bool { auto lock = std::shared_lock{log_lock_}; - auto const it = instances_.find(instance_name); - return it != instances_.end() && it->second.status == ReplicationRole::REPLICA; + auto const it = repl_instances_.find(instance_name); + return it != repl_instances_.end() && it->second.status == ReplicationRole::REPLICA; } -auto CoordinatorClusterState::InsertInstance(std::string instance_name, InstanceState instance_state) -> void { +auto CoordinatorClusterState::InsertInstance(std::string instance_name, ReplicationInstanceState instance_state) + -> void { auto lock = std::lock_guard{log_lock_}; - instances_.insert_or_assign(std::move(instance_name), std::move(instance_state)); + repl_instances_.insert_or_assign(std::move(instance_name), std::move(instance_state)); } auto CoordinatorClusterState::DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void { auto lock = std::lock_guard{log_lock_}; switch (log_action) { case RaftLogAction::REGISTER_REPLICATION_INSTANCE: { - auto const &config = std::get(log_entry); - instances_[config.instance_name] = InstanceState{config, ReplicationRole::REPLICA}; + auto const &config = std::get(log_entry); + repl_instances_[config.instance_name] = ReplicationInstanceState{config, ReplicationRole::REPLICA}; break; } case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: { auto const instance_name = std::get(log_entry); - instances_.erase(instance_name); + repl_instances_.erase(instance_name); break; } case RaftLogAction::SET_INSTANCE_AS_MAIN: { auto const instance_name = std::get(log_entry); - auto it = instances_.find(instance_name); - MG_ASSERT(it != instances_.end(), "Instance does not exist as part of raft state!"); + auto it = repl_instances_.find(instance_name); + MG_ASSERT(it != repl_instances_.end(), "Instance does not exist as part of raft state!"); it->second.status = ReplicationRole::MAIN; break; } case RaftLogAction::SET_INSTANCE_AS_REPLICA: { auto const instance_name = std::get(log_entry); - auto it = instances_.find(instance_name); - MG_ASSERT(it != instances_.end(), "Instance does not exist as part of raft state!"); + auto it = repl_instances_.find(instance_name); + MG_ASSERT(it != repl_instances_.end(), "Instance does not exist as part of raft state!"); it->second.status = ReplicationRole::REPLICA; break; } @@ -105,13 +107,18 @@ auto CoordinatorClusterState::DoAction(TRaftLog log_entry, RaftLogAction log_act uuid_ = std::get(log_entry); break; } + case RaftLogAction::ADD_COORDINATOR_INSTANCE: { + auto const &config = std::get(log_entry); + coordinators_.emplace_back(CoordinatorInstanceState{config}); + break; + } } } auto CoordinatorClusterState::Serialize(ptr &data) -> void { auto lock = std::shared_lock{log_lock_}; - auto const log = nlohmann::json(instances_).dump(); + auto const log = nlohmann::json(repl_instances_).dump(); data = buffer::alloc(sizeof(uint32_t) + log.size()); buffer_serializer bs(data); @@ -121,27 +128,22 @@ auto CoordinatorClusterState::Serialize(ptr &data) -> void { auto CoordinatorClusterState::Deserialize(buffer &data) -> CoordinatorClusterState { buffer_serializer bs(data); auto const j = nlohmann::json::parse(bs.get_str()); - auto instances = j.get>>(); + auto instances = j.get>>(); return CoordinatorClusterState{std::move(instances)}; } -auto CoordinatorClusterState::GetInstances() const -> std::vector { +auto CoordinatorClusterState::GetReplicationInstances() const -> std::vector { auto lock = std::shared_lock{log_lock_}; - return instances_ | ranges::views::values | ranges::to>; + return repl_instances_ | ranges::views::values | ranges::to>; +} + +auto CoordinatorClusterState::GetCoordinatorInstances() const -> std::vector { + auto lock = std::shared_lock{log_lock_}; + return coordinators_; } auto CoordinatorClusterState::GetUUID() const -> utils::UUID { return uuid_; } -auto CoordinatorClusterState::FindCurrentMainInstanceName() const -> std::optional { - auto lock = std::shared_lock{log_lock_}; - auto const it = - std::ranges::find_if(instances_, [](auto const &entry) { return entry.second.status == ReplicationRole::MAIN; }); - if (it == instances_.end()) { - return {}; - } - return it->first; -} - } // namespace memgraph::coordination #endif diff --git a/src/coordination/coordinator_config.cpp b/src/coordination/coordinator_communication_config.cpp similarity index 57% rename from src/coordination/coordinator_config.cpp rename to src/coordination/coordinator_communication_config.cpp index a1147d3b6..31ed20fd0 100644 --- a/src/coordination/coordinator_config.cpp +++ b/src/coordination/coordinator_communication_config.cpp @@ -11,43 +11,53 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" namespace memgraph::coordination { -void to_json(nlohmann::json &j, ReplClientInfo const &config) { +void to_json(nlohmann::json &j, CoordinatorToCoordinatorConfig const &config) { + j = nlohmann::json{{"coordinator_server_id", config.coordinator_server_id}, + {"coordinator_server", config.coordinator_server}, + {"bolt_server", config.bolt_server}}; +} + +void from_json(nlohmann::json const &j, CoordinatorToCoordinatorConfig &config) { + config.coordinator_server_id = j.at("coordinator_server_id").get(); + config.coordinator_server = j.at("coordinator_server").get(); + config.bolt_server = j.at("bolt_server").get(); +} + +void to_json(nlohmann::json &j, ReplicationClientInfo const &config) { j = nlohmann::json{{"instance_name", config.instance_name}, {"replication_mode", config.replication_mode}, - {"replication_ip_address", config.replication_ip_address}, - {"replication_port", config.replication_port}}; + {"replication_server", config.replication_server}}; } -void from_json(nlohmann::json const &j, ReplClientInfo &config) { +void from_json(nlohmann::json const &j, ReplicationClientInfo &config) { config.instance_name = j.at("instance_name").get(); config.replication_mode = j.at("replication_mode").get(); - config.replication_ip_address = j.at("replication_ip_address").get(); - config.replication_port = j.at("replication_port").get(); + config.replication_server = j.at("replication_server").get(); } -void to_json(nlohmann::json &j, CoordinatorClientConfig const &config) { +void to_json(nlohmann::json &j, CoordinatorToReplicaConfig const &config) { j = nlohmann::json{{"instance_name", config.instance_name}, - {"ip_address", config.ip_address}, - {"port", config.port}, + {"mgt_server", config.mgt_server}, + {"bolt_server", config.bolt_server}, {"instance_health_check_frequency_sec", config.instance_health_check_frequency_sec.count()}, {"instance_down_timeout_sec", config.instance_down_timeout_sec.count()}, {"instance_get_uuid_frequency_sec", config.instance_get_uuid_frequency_sec.count()}, {"replication_client_info", config.replication_client_info}}; } -void from_json(nlohmann::json const &j, CoordinatorClientConfig &config) { +void from_json(nlohmann::json const &j, CoordinatorToReplicaConfig &config) { config.instance_name = j.at("instance_name").get(); - config.ip_address = j.at("ip_address").get(); - config.port = j.at("port").get(); + config.mgt_server = j.at("mgt_server").get(); + config.bolt_server = j.at("bolt_server").get(); config.instance_health_check_frequency_sec = std::chrono::seconds{j.at("instance_health_check_frequency_sec").get()}; config.instance_down_timeout_sec = std::chrono::seconds{j.at("instance_down_timeout_sec").get()}; config.instance_get_uuid_frequency_sec = std::chrono::seconds{j.at("instance_get_uuid_frequency_sec").get()}; - config.replication_client_info = j.at("replication_client_info").get(); + config.replication_client_info = j.at("replication_client_info").get(); } } // namespace memgraph::coordination diff --git a/src/coordination/coordinator_handlers.cpp b/src/coordination/coordinator_handlers.cpp index 637360267..e5b7a663f 100644 --- a/src/coordination/coordinator_handlers.cpp +++ b/src/coordination/coordinator_handlers.cpp @@ -95,8 +95,8 @@ void CoordinatorHandlers::DemoteMainToReplicaHandler(replication::ReplicationHan slk::Load(&req, req_reader); const replication::ReplicationServerConfig clients_config{ - .ip_address = req.replication_client_info.replication_ip_address, - .port = req.replication_client_info.replication_port}; + .ip_address = req.replication_client_info.replication_server.address, + .port = req.replication_client_info.replication_server.port}; if (!replication_handler.SetReplicationRoleReplica(clients_config, std::nullopt)) { spdlog::error("Demoting main to replica failed!"); @@ -136,8 +136,8 @@ void CoordinatorHandlers::PromoteReplicaToMainHandler(replication::ReplicationHa return replication::ReplicationClientConfig{ .name = repl_info_config.instance_name, .mode = repl_info_config.replication_mode, - .ip_address = repl_info_config.replication_ip_address, - .port = repl_info_config.replication_port, + .ip_address = repl_info_config.replication_server.address, + .port = repl_info_config.replication_server.port, }; }; diff --git a/src/coordination/coordinator_instance.cpp b/src/coordination/coordinator_instance.cpp index 791ffbc59..2182e2405 100644 --- a/src/coordination/coordinator_instance.cpp +++ b/src/coordination/coordinator_instance.cpp @@ -14,7 +14,6 @@ #include "coordination/coordinator_instance.hpp" #include "coordination/coordinator_exceptions.hpp" -#include "coordination/fmt.hpp" #include "dbms/constants.hpp" #include "nuraft/coordinator_state_machine.hpp" #include "nuraft/coordinator_state_manager.hpp" @@ -34,7 +33,7 @@ CoordinatorInstance::CoordinatorInstance() : raft_state_(RaftState::MakeRaftState( [this]() { spdlog::info("Leader changed, starting all replication instances!"); - auto const instances = raft_state_.GetInstances(); + auto const instances = raft_state_.GetReplicationInstances(); auto replicas = instances | ranges::views::filter([](auto const &instance) { return instance.status == ReplicationRole::REPLICA; }); @@ -133,7 +132,7 @@ auto CoordinatorInstance::ShowInstances() const -> std::vector { .health = "unknown"}; }; - std::ranges::transform(raft_state_.GetInstances(), std::back_inserter(instances_status), + std::ranges::transform(raft_state_.GetReplicationInstances(), std::back_inserter(instances_status), process_repl_instance_as_follower); } @@ -288,7 +287,7 @@ auto CoordinatorInstance::SetReplicationInstanceToMain(std::string_view instance return SetInstanceToMainCoordinatorStatus::SUCCESS; } -auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig const &config) +auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorToReplicaConfig const &config) -> RegisterInstanceCoordinatorStatus { auto lock = std::lock_guard{coord_instance_lock_}; @@ -382,9 +381,12 @@ auto CoordinatorInstance::UnregisterReplicationInstance(std::string_view instanc return UnregisterInstanceCoordinatorStatus::SUCCESS; } -auto CoordinatorInstance::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, - std::string_view raft_address) -> void { - raft_state_.AddCoordinatorInstance(raft_server_id, raft_port, raft_address); +auto CoordinatorInstance::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { + raft_state_.AddCoordinatorInstance(config); + // NOTE: We ignore error we added coordinator instance to networkign stuff but not in raft log. + if (!raft_state_.AppendAddCoordinatorInstanceLog(config)) { + spdlog::error("Failed to append add coordinator instance log"); + } } void CoordinatorInstance::MainFailCallback(std::string_view repl_instance_name) { @@ -557,5 +559,56 @@ auto CoordinatorInstance::IsReplica(std::string_view instance_name) const -> boo return raft_state_.IsReplica(instance_name); } +auto CoordinatorInstance::GetRoutingTable(std::map const &routing) -> RoutingTable { + auto res = RoutingTable{}; + + auto const repl_instance_to_bolt = [](ReplicationInstanceState const &instance) { + return instance.config.BoltSocketAddress(); + }; + + // TODO: (andi) This is wrong check, Fico will correct in #1819. + auto const is_instance_main = [&](ReplicationInstanceState const &instance) { + return instance.status == ReplicationRole::MAIN; + }; + + auto const is_instance_replica = [&](ReplicationInstanceState const &instance) { + return instance.status == ReplicationRole::REPLICA; + }; + + auto const &raft_log_repl_instances = raft_state_.GetReplicationInstances(); + + auto bolt_mains = raft_log_repl_instances | ranges::views::filter(is_instance_main) | + ranges::views::transform(repl_instance_to_bolt) | ranges::to(); + MG_ASSERT(bolt_mains.size() <= 1, "There can be at most one main instance active!"); + + if (!std::ranges::empty(bolt_mains)) { + res.emplace_back(std::move(bolt_mains), "WRITE"); + } + + auto bolt_replicas = raft_log_repl_instances | ranges::views::filter(is_instance_replica) | + ranges::views::transform(repl_instance_to_bolt) | ranges::to(); + if (!std::ranges::empty(bolt_replicas)) { + res.emplace_back(std::move(bolt_replicas), "READ"); + } + + auto const coord_instance_to_bolt = [](CoordinatorInstanceState const &instance) { + return instance.config.bolt_server.SocketAddress(); + }; + + auto const &raft_log_coord_instances = raft_state_.GetCoordinatorInstances(); + auto bolt_coords = + raft_log_coord_instances | ranges::views::transform(coord_instance_to_bolt) | ranges::to(); + + auto const &local_bolt_coord = routing.find("address"); + if (local_bolt_coord == routing.end()) { + throw InvalidRoutingTableException("No bolt address found in routing table for the current coordinator!"); + } + + bolt_coords.push_back(local_bolt_coord->second); + res.emplace_back(std::move(bolt_coords), "ROUTE"); + + return res; +} + } // namespace memgraph::coordination #endif diff --git a/src/coordination/coordinator_server.cpp b/src/coordination/coordinator_server.cpp index 60dc5e348..327097830 100644 --- a/src/coordination/coordinator_server.cpp +++ b/src/coordination/coordinator_server.cpp @@ -18,8 +18,7 @@ namespace memgraph::coordination { namespace { -auto CreateServerContext(const memgraph::coordination::CoordinatorServerConfig &config) - -> communication::ServerContext { +auto CreateServerContext(const memgraph::coordination::ManagementServerConfig &config) -> communication::ServerContext { return (config.ssl) ? communication::ServerContext{config.ssl->key_file, config.ssl->cert_file, config.ssl->ca_file, config.ssl->verify_peer} : communication::ServerContext{}; @@ -32,7 +31,7 @@ constexpr auto kCoordinatorServerThreads = 1; } // namespace -CoordinatorServer::CoordinatorServer(const CoordinatorServerConfig &config) +CoordinatorServer::CoordinatorServer(const ManagementServerConfig &config) : rpc_server_context_{CreateServerContext(config)}, rpc_server_{io::network::Endpoint{config.ip_address, config.port}, &rpc_server_context_, kCoordinatorServerThreads} { diff --git a/src/coordination/coordinator_state.cpp b/src/coordination/coordinator_state.cpp index f429cd5a7..149a9cb97 100644 --- a/src/coordination/coordinator_state.cpp +++ b/src/coordination/coordinator_state.cpp @@ -13,7 +13,7 @@ #include "coordination/coordinator_state.hpp" -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/register_main_replica_coordinator_status.hpp" #include "flags/replication.hpp" #include "spdlog/spdlog.h" @@ -31,7 +31,7 @@ CoordinatorState::CoordinatorState() { spdlog::info("Executing coordinator constructor"); if (FLAGS_coordinator_server_port) { spdlog::info("Coordinator server port set"); - auto const config = CoordinatorServerConfig{ + auto const config = ManagementServerConfig{ .ip_address = kDefaultReplicationServerIp, .port = static_cast(FLAGS_coordinator_server_port), }; @@ -41,7 +41,7 @@ CoordinatorState::CoordinatorState() { } } -auto CoordinatorState::RegisterReplicationInstance(CoordinatorClientConfig const &config) +auto CoordinatorState::RegisterReplicationInstance(CoordinatorToReplicaConfig const &config) -> RegisterInstanceCoordinatorStatus { MG_ASSERT(std::holds_alternative(data_), "Coordinator cannot register replica since variant holds wrong alternative"); @@ -98,11 +98,16 @@ auto CoordinatorState::GetCoordinatorServer() const -> CoordinatorServer & { return *std::get(data_).coordinator_server_; } -auto CoordinatorState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, - std::string_view raft_address) -> void { +auto CoordinatorState::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { MG_ASSERT(std::holds_alternative(data_), "Coordinator cannot register replica since variant holds wrong alternative"); - return std::get(data_).AddCoordinatorInstance(raft_server_id, raft_port, raft_address); + return std::get(data_).AddCoordinatorInstance(config); +} + +auto CoordinatorState::GetRoutingTable(std::map const &routing) -> RoutingTable { + MG_ASSERT(std::holds_alternative(data_), + "Coordinator cannot get routing table since variant holds wrong alternative"); + return std::get(data_).GetRoutingTable(routing); } } // namespace memgraph::coordination diff --git a/src/coordination/coordinator_state_machine.cpp b/src/coordination/coordinator_state_machine.cpp index 631c3c4d2..789ac2e5e 100644 --- a/src/coordination/coordinator_state_machine.cpp +++ b/src/coordination/coordinator_state_machine.cpp @@ -20,10 +20,6 @@ constexpr int MAX_SNAPSHOTS = 3; namespace memgraph::coordination { -auto CoordinatorStateMachine::FindCurrentMainInstanceName() const -> std::optional { - return cluster_state_.FindCurrentMainInstanceName(); -} - auto CoordinatorStateMachine::MainExists() const -> bool { return cluster_state_.MainExists(); } auto CoordinatorStateMachine::IsMain(std::string_view instance_name) const -> bool { @@ -42,7 +38,7 @@ auto CoordinatorStateMachine::CreateLog(nlohmann::json &&log) -> ptr { return log_buf; } -auto CoordinatorStateMachine::SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr { +auto CoordinatorStateMachine::SerializeRegisterInstance(CoordinatorToReplicaConfig const &config) -> ptr { return CreateLog({{"action", RaftLogAction::REGISTER_REPLICATION_INSTANCE}, {"info", config}}); } @@ -62,6 +58,11 @@ auto CoordinatorStateMachine::SerializeUpdateUUID(utils::UUID const &uuid) -> pt return CreateLog({{"action", RaftLogAction::UPDATE_UUID}, {"info", uuid}}); } +auto CoordinatorStateMachine::SerializeAddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config) + -> ptr { + return CreateLog({{"action", RaftLogAction::ADD_COORDINATOR_INSTANCE}, {"info", config}}); +} + auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair { buffer_serializer bs(data); auto const json = nlohmann::json::parse(bs.get_str()); @@ -71,7 +72,7 @@ auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair(), action}; + return {info.get(), action}; case RaftLogAction::UPDATE_UUID: return {info.get(), action}; case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: @@ -79,6 +80,8 @@ auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair(), action}; + case RaftLogAction::ADD_COORDINATOR_INSTANCE: + return {info.get(), action}; } throw std::runtime_error("Unknown action"); } @@ -133,6 +136,7 @@ auto CoordinatorStateMachine::read_logical_snp_obj(snapshot &snapshot, void *& / } else { // Object ID > 0: second object, put actual value. ctx->cluster_state_.Serialize(data_out); + is_last_obj = true; } return 0; @@ -155,6 +159,7 @@ auto CoordinatorStateMachine::save_logical_snp_obj(snapshot &snapshot, ulong &ob DMG_ASSERT(entry != snapshots_.end()); entry->second->cluster_state_ = cluster_state; } + obj_id++; } auto CoordinatorStateMachine::apply_snapshot(snapshot &s) -> bool { @@ -205,8 +210,12 @@ auto CoordinatorStateMachine::create_snapshot_internal(ptr snapshot) - } } -auto CoordinatorStateMachine::GetInstances() const -> std::vector { - return cluster_state_.GetInstances(); +auto CoordinatorStateMachine::GetReplicationInstances() const -> std::vector { + return cluster_state_.GetReplicationInstances(); +} + +auto CoordinatorStateMachine::GetCoordinatorInstances() const -> std::vector { + return cluster_state_.GetCoordinatorInstances(); } auto CoordinatorStateMachine::GetUUID() const -> utils::UUID { return cluster_state_.GetUUID(); } diff --git a/src/coordination/coordinator_state_manager.cpp b/src/coordination/coordinator_state_manager.cpp index b2fb81ea1..db49b1f21 100644 --- a/src/coordination/coordinator_state_manager.cpp +++ b/src/coordination/coordinator_state_manager.cpp @@ -33,6 +33,7 @@ CoordinatorStateManager::CoordinatorStateManager(int srv_id, std::string const & auto CoordinatorStateManager::load_config() -> ptr { // Just return in-memory data in this example. // May require reading from disk here, if it has been written to disk. + spdlog::trace("Loading cluster config"); return cluster_config_; } @@ -41,6 +42,11 @@ auto CoordinatorStateManager::save_config(cluster_config const &config) -> void // Need to write to disk here, if want to make it durable. ptr buf = config.serialize(); cluster_config_ = cluster_config::deserialize(*buf); + spdlog::info("Saving cluster config."); + auto servers = cluster_config_->get_servers(); + for (auto const &server : servers) { + spdlog::trace("Server id: {}, endpoint: {}", server->get_id(), server->get_endpoint()); + } } auto CoordinatorStateManager::save_state(srv_state const &state) -> void { diff --git a/src/coordination/include/coordination/coordinator_client.hpp b/src/coordination/include/coordination/coordinator_client.hpp index 5d4795f81..875efaa45 100644 --- a/src/coordination/include/coordination/coordinator_client.hpp +++ b/src/coordination/include/coordination/coordinator_client.hpp @@ -13,7 +13,7 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "replication_coordination_glue/common.hpp" #include "rpc/client.hpp" #include "rpc_errors.hpp" @@ -25,11 +25,11 @@ namespace memgraph::coordination { class CoordinatorInstance; using HealthCheckClientCallback = std::function; -using ReplicationClientsInfo = std::vector; +using ReplicationClientsInfo = std::vector; class CoordinatorClient { public: - explicit CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorClientConfig config, + explicit CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb); ~CoordinatorClient() = default; @@ -62,7 +62,7 @@ class CoordinatorClient { auto SendGetInstanceUUIDRpc() const -> memgraph::utils::BasicResult>; - auto ReplicationClientInfo() const -> ReplClientInfo; + auto ReplicationClientInfo() const -> ReplicationClientInfo; auto SendGetInstanceTimestampsRpc() const -> utils::BasicResult; @@ -83,7 +83,7 @@ class CoordinatorClient { communication::ClientContext rpc_context_; mutable rpc::Client rpc_client_; - CoordinatorClientConfig config_; + CoordinatorToReplicaConfig config_; CoordinatorInstance *coord_instance_; HealthCheckClientCallback succ_cb_; HealthCheckClientCallback fail_cb_; diff --git a/src/coordination/include/coordination/coordinator_config.hpp b/src/coordination/include/coordination/coordinator_communication_config.hpp similarity index 51% rename from src/coordination/include/coordination/coordinator_config.hpp rename to src/coordination/include/coordination/coordinator_communication_config.hpp index 127a365eb..4f11b188f 100644 --- a/src/coordination/include/coordination/coordinator_config.hpp +++ b/src/coordination/include/coordination/coordinator_communication_config.hpp @@ -13,6 +13,7 @@ #ifdef MG_ENTERPRISE +#include "io/network/endpoint.hpp" #include "replication_coordination_glue/mode.hpp" #include "utils/string.hpp" @@ -28,46 +29,50 @@ namespace memgraph::coordination { inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0"; -struct CoordinatorClientConfig { - std::string instance_name; - std::string ip_address; - uint16_t port{}; +struct ReplicationClientInfo { + std::string instance_name{}; + replication_coordination_glue::ReplicationMode replication_mode{}; + io::network::Endpoint replication_server; + + friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default; +}; + +struct CoordinatorToReplicaConfig { + auto BoltSocketAddress() const -> std::string { return bolt_server.SocketAddress(); } + auto CoordinatorSocketAddress() const -> std::string { return mgt_server.SocketAddress(); } + auto ReplicationSocketAddress() const -> std::string { + return replication_client_info.replication_server.SocketAddress(); + } + + std::string instance_name{}; + io::network::Endpoint mgt_server; + io::network::Endpoint bolt_server; + ReplicationClientInfo replication_client_info; + std::chrono::seconds instance_health_check_frequency_sec{1}; std::chrono::seconds instance_down_timeout_sec{5}; std::chrono::seconds instance_get_uuid_frequency_sec{10}; - auto CoordinatorSocketAddress() const -> std::string { return fmt::format("{}:{}", ip_address, port); } - auto ReplicationSocketAddress() const -> std::string { - return fmt::format("{}:{}", replication_client_info.replication_ip_address, - replication_client_info.replication_port); - } - - struct ReplicationClientInfo { - std::string instance_name; - replication_coordination_glue::ReplicationMode replication_mode{}; - std::string replication_ip_address; - uint16_t replication_port{}; - - friend bool operator==(ReplicationClientInfo const &, ReplicationClientInfo const &) = default; - }; - - ReplicationClientInfo replication_client_info; - struct SSL { std::string key_file; std::string cert_file; - friend bool operator==(const SSL &, const SSL &) = default; }; std::optional ssl; - friend bool operator==(CoordinatorClientConfig const &, CoordinatorClientConfig const &) = default; + friend bool operator==(CoordinatorToReplicaConfig const &, CoordinatorToReplicaConfig const &) = default; }; -using ReplClientInfo = CoordinatorClientConfig::ReplicationClientInfo; +struct CoordinatorToCoordinatorConfig { + uint32_t coordinator_server_id{0}; + io::network::Endpoint bolt_server; + io::network::Endpoint coordinator_server; -struct CoordinatorServerConfig { + friend bool operator==(CoordinatorToCoordinatorConfig const &, CoordinatorToCoordinatorConfig const &) = default; +}; + +struct ManagementServerConfig { std::string ip_address; uint16_t port{}; struct SSL { @@ -80,14 +85,17 @@ struct CoordinatorServerConfig { std::optional ssl; - friend bool operator==(CoordinatorServerConfig const &, CoordinatorServerConfig const &) = default; + friend bool operator==(ManagementServerConfig const &, ManagementServerConfig const &) = default; }; -void to_json(nlohmann::json &j, CoordinatorClientConfig const &config); -void from_json(nlohmann::json const &j, CoordinatorClientConfig &config); +void to_json(nlohmann::json &j, CoordinatorToReplicaConfig const &config); +void from_json(nlohmann::json const &j, CoordinatorToReplicaConfig &config); -void to_json(nlohmann::json &j, ReplClientInfo const &config); -void from_json(nlohmann::json const &j, ReplClientInfo &config); +void to_json(nlohmann::json &j, CoordinatorToCoordinatorConfig const &config); +void from_json(nlohmann::json const &j, CoordinatorToCoordinatorConfig &config); + +void to_json(nlohmann::json &j, ReplicationClientInfo const &config); +void from_json(nlohmann::json const &j, ReplicationClientInfo &config); } // namespace memgraph::coordination #endif diff --git a/src/coordination/include/coordination/coordinator_exceptions.hpp b/src/coordination/include/coordination/coordinator_exceptions.hpp index 7a967f80b..6cff2e8c1 100644 --- a/src/coordination/include/coordination/coordinator_exceptions.hpp +++ b/src/coordination/include/coordination/coordinator_exceptions.hpp @@ -94,5 +94,16 @@ class InvalidRaftLogActionException final : public utils::BasicException { SPECIALIZE_GET_EXCEPTION_NAME(InvalidRaftLogActionException) }; +class InvalidRoutingTableException final : public utils::BasicException { + public: + explicit InvalidRoutingTableException(std::string_view what) noexcept : BasicException(what) {} + + template + explicit InvalidRoutingTableException(fmt::format_string fmt, Args &&...args) noexcept + : InvalidRoutingTableException(fmt::format(fmt, std::forward(args)...)) {} + + SPECIALIZE_GET_EXCEPTION_NAME(InvalidRoutingTableException) +}; + } // namespace memgraph::coordination #endif diff --git a/src/coordination/include/coordination/coordinator_instance.hpp b/src/coordination/include/coordination/coordinator_instance.hpp index 10549f468..a778d1238 100644 --- a/src/coordination/include/coordination/coordinator_instance.hpp +++ b/src/coordination/include/coordination/coordinator_instance.hpp @@ -26,6 +26,8 @@ namespace memgraph::coordination { +using RoutingTable = std::vector, std::string>>; + struct NewMainRes { std::string most_up_to_date_instance; std::string latest_epoch; @@ -36,8 +38,14 @@ using InstanceNameDbHistories = std::pair RegisterInstanceCoordinatorStatus; [[nodiscard]] auto UnregisterReplicationInstance(std::string_view instance_name) -> UnregisterInstanceCoordinatorStatus; @@ -48,15 +56,15 @@ class CoordinatorInstance { auto TryFailover() -> void; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; + auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void; + + auto GetRoutingTable(std::map const &routing) -> RoutingTable; static auto ChooseMostUpToDateInstance(std::span histories) -> NewMainRes; private: HealthCheckClientCallback client_succ_cb_, client_fail_cb_; - auto OnRaftCommitCallback(TRaftLog const &log_entry, RaftLogAction log_action) -> void; - auto FindReplicationInstance(std::string_view replication_instance_name) -> ReplicationInstance &; void MainFailCallback(std::string_view); @@ -71,7 +79,6 @@ class CoordinatorInstance { auto IsReplica(std::string_view instance_name) const -> bool; // NOTE: Must be std::list because we rely on pointer stability. - // Leader and followers should both have same view on repl_instances_ std::list repl_instances_; mutable utils::ResourceLock coord_instance_lock_{}; diff --git a/src/coordination/include/coordination/coordinator_rpc.hpp b/src/coordination/include/coordination/coordinator_rpc.hpp index d799b2955..b0b466859 100644 --- a/src/coordination/include/coordination/coordinator_rpc.hpp +++ b/src/coordination/include/coordination/coordinator_rpc.hpp @@ -14,7 +14,7 @@ #include "utils/uuid.hpp" #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "replication_coordination_glue/common.hpp" #include "rpc/messages.hpp" #include "slk/serialization.hpp" @@ -28,14 +28,13 @@ struct PromoteReplicaToMainReq { static void Load(PromoteReplicaToMainReq *self, memgraph::slk::Reader *reader); static void Save(const PromoteReplicaToMainReq &self, memgraph::slk::Builder *builder); - explicit PromoteReplicaToMainReq(const utils::UUID &uuid, - std::vector replication_clients_info) + explicit PromoteReplicaToMainReq(const utils::UUID &uuid, std::vector replication_clients_info) : main_uuid_(uuid), replication_clients_info(std::move(replication_clients_info)) {} PromoteReplicaToMainReq() = default; // get uuid here utils::UUID main_uuid_; - std::vector replication_clients_info; + std::vector replication_clients_info; }; struct PromoteReplicaToMainRes { @@ -60,12 +59,12 @@ struct DemoteMainToReplicaReq { static void Load(DemoteMainToReplicaReq *self, memgraph::slk::Reader *reader); static void Save(const DemoteMainToReplicaReq &self, memgraph::slk::Builder *builder); - explicit DemoteMainToReplicaReq(CoordinatorClientConfig::ReplicationClientInfo replication_client_info) + explicit DemoteMainToReplicaReq(ReplicationClientInfo replication_client_info) : replication_client_info(std::move(replication_client_info)) {} DemoteMainToReplicaReq() = default; - CoordinatorClientConfig::ReplicationClientInfo replication_client_info; + ReplicationClientInfo replication_client_info; }; struct DemoteMainToReplicaRes { diff --git a/src/coordination/include/coordination/coordinator_server.hpp b/src/coordination/include/coordination/coordinator_server.hpp index 2a261bc32..52a0befc5 100644 --- a/src/coordination/include/coordination/coordinator_server.hpp +++ b/src/coordination/include/coordination/coordinator_server.hpp @@ -13,14 +13,14 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "rpc/server.hpp" namespace memgraph::coordination { class CoordinatorServer { public: - explicit CoordinatorServer(const CoordinatorServerConfig &config); + explicit CoordinatorServer(const ManagementServerConfig &config); CoordinatorServer(const CoordinatorServer &) = delete; CoordinatorServer(CoordinatorServer &&) = delete; CoordinatorServer &operator=(const CoordinatorServer &) = delete; diff --git a/src/coordination/include/coordination/coordinator_slk.hpp b/src/coordination/include/coordination/coordinator_slk.hpp index ee393b7b6..3d809da26 100644 --- a/src/coordination/include/coordination/coordinator_slk.hpp +++ b/src/coordination/include/coordination/coordinator_slk.hpp @@ -13,27 +13,37 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "replication_coordination_glue/common.hpp" #include "slk/serialization.hpp" #include "slk/streams.hpp" namespace memgraph::slk { -using ReplicationClientInfo = coordination::CoordinatorClientConfig::ReplicationClientInfo; +using ReplicationClientInfo = coordination::ReplicationClientInfo; -inline void Save(const ReplicationClientInfo &obj, Builder *builder) { +inline void Save(io::network::Endpoint const &obj, Builder *builder) { + Save(obj.address, builder); + Save(obj.port, builder); + Save(obj.family, builder); +} + +inline void Load(io::network::Endpoint *obj, Reader *reader) { + Load(&obj->address, reader); + Load(&obj->port, reader); + Load(&obj->family, reader); +} + +inline void Save(ReplicationClientInfo const &obj, Builder *builder) { Save(obj.instance_name, builder); Save(obj.replication_mode, builder); - Save(obj.replication_ip_address, builder); - Save(obj.replication_port, builder); + Save(obj.replication_server, builder); } inline void Load(ReplicationClientInfo *obj, Reader *reader) { Load(&obj->instance_name, reader); Load(&obj->replication_mode, reader); - Load(&obj->replication_ip_address, reader); - Load(&obj->replication_port, reader); + Load(&obj->replication_server, reader); } inline void Save(const replication_coordination_glue::DatabaseHistory &obj, Builder *builder) { diff --git a/src/coordination/include/coordination/coordinator_state.hpp b/src/coordination/include/coordination/coordinator_state.hpp index 400c36940..f2a88e9b8 100644 --- a/src/coordination/include/coordination/coordinator_state.hpp +++ b/src/coordination/include/coordination/coordinator_state.hpp @@ -33,7 +33,7 @@ class CoordinatorState { CoordinatorState(CoordinatorState &&) noexcept = delete; CoordinatorState &operator=(CoordinatorState &&) noexcept = delete; - [[nodiscard]] auto RegisterReplicationInstance(CoordinatorClientConfig const &config) + [[nodiscard]] auto RegisterReplicationInstance(CoordinatorToReplicaConfig const &config) -> RegisterInstanceCoordinatorStatus; [[nodiscard]] auto UnregisterReplicationInstance(std::string_view instance_name) -> UnregisterInstanceCoordinatorStatus; @@ -42,11 +42,13 @@ class CoordinatorState { auto ShowInstances() const -> std::vector; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; + auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void; // NOTE: The client code must check that the server exists before calling this method. auto GetCoordinatorServer() const -> CoordinatorServer &; + auto GetRoutingTable(std::map const &routing) -> RoutingTable; + private: struct CoordinatorMainReplicaData { std::unique_ptr coordinator_server_; diff --git a/src/coordination/include/coordination/raft_state.hpp b/src/coordination/include/coordination/raft_state.hpp index 34da3e2a6..6e322ab78 100644 --- a/src/coordination/include/coordination/raft_state.hpp +++ b/src/coordination/include/coordination/raft_state.hpp @@ -23,7 +23,7 @@ namespace memgraph::coordination { class CoordinatorInstance; -struct CoordinatorClientConfig; +struct CoordinatorToReplicaConfig; using BecomeLeaderCb = std::function; using BecomeFollowerCb = std::function; @@ -58,24 +58,27 @@ class RaftState { auto InstanceName() const -> std::string; auto RaftSocketAddress() const -> std::string; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; + auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void; auto GetAllCoordinators() const -> std::vector>; auto RequestLeadership() -> bool; auto IsLeader() const -> bool; - auto FindCurrentMainInstanceName() const -> std::optional; auto MainExists() const -> bool; auto IsMain(std::string_view instance_name) const -> bool; auto IsReplica(std::string_view instance_name) const -> bool; - auto AppendRegisterReplicationInstanceLog(CoordinatorClientConfig const &config) -> bool; + auto AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig const &config) -> bool; auto AppendUnregisterReplicationInstanceLog(std::string_view instance_name) -> bool; auto AppendSetInstanceAsMainLog(std::string_view instance_name) -> bool; auto AppendSetInstanceAsReplicaLog(std::string_view instance_name) -> bool; auto AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool; + auto AppendAddCoordinatorInstanceLog(CoordinatorToCoordinatorConfig const &config) -> bool; + + auto GetReplicationInstances() const -> std::vector; + // TODO: (andi) Do we need then GetAllCoordinators? + auto GetCoordinatorInstances() const -> std::vector; - auto GetInstances() const -> std::vector; auto GetUUID() const -> utils::UUID; private: diff --git a/src/coordination/include/coordination/replication_instance.hpp b/src/coordination/include/coordination/replication_instance.hpp index 7b5d73b81..1e6c042c5 100644 --- a/src/coordination/include/coordination/replication_instance.hpp +++ b/src/coordination/include/coordination/replication_instance.hpp @@ -32,7 +32,7 @@ using HealthCheckInstanceCallback = void (CoordinatorInstance::*)(std::string_vi class ReplicationInstance { public: - ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config, HealthCheckClientCallback succ_cb, + ReplicationInstance(CoordinatorInstance *peer, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb, HealthCheckInstanceCallback succ_instance_cb, HealthCheckInstanceCallback fail_instance_cb); @@ -67,7 +67,7 @@ class ReplicationInstance { auto PauseFrequentCheck() -> void; auto ResumeFrequentCheck() -> void; - auto ReplicationClientInfo() const -> ReplClientInfo; + auto ReplicationClientInfo() const -> ReplicationClientInfo; auto EnsureReplicaHasCorrectMainUUID(utils::UUID const &curr_main_uuid) -> bool; diff --git a/src/coordination/include/nuraft/coordinator_cluster_state.hpp b/src/coordination/include/nuraft/coordinator_cluster_state.hpp index 11d539a14..5d9afe89e 100644 --- a/src/coordination/include/nuraft/coordinator_cluster_state.hpp +++ b/src/coordination/include/nuraft/coordinator_cluster_state.hpp @@ -13,7 +13,7 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "nuraft/raft_log_action.hpp" #include "replication_coordination_glue/role.hpp" #include "utils/resource_lock.hpp" @@ -32,19 +32,29 @@ namespace memgraph::coordination { using replication_coordination_glue::ReplicationRole; -struct InstanceState { - CoordinatorClientConfig config; +struct ReplicationInstanceState { + CoordinatorToReplicaConfig config; ReplicationRole status; - friend auto operator==(InstanceState const &lhs, InstanceState const &rhs) -> bool { + friend auto operator==(ReplicationInstanceState const &lhs, ReplicationInstanceState const &rhs) -> bool { return lhs.config == rhs.config && lhs.status == rhs.status; } }; -void to_json(nlohmann::json &j, InstanceState const &instance_state); -void from_json(nlohmann::json const &j, InstanceState &instance_state); +// NOTE: Currently instance of coordinator doesn't change from the registration. Hence, just wrap +// CoordinatorToCoordinatorConfig. +struct CoordinatorInstanceState { + CoordinatorToCoordinatorConfig config; -using TRaftLog = std::variant; + friend auto operator==(CoordinatorInstanceState const &lhs, CoordinatorInstanceState const &rhs) -> bool { + return lhs.config == rhs.config; + } +}; + +void to_json(nlohmann::json &j, ReplicationInstanceState const &instance_state); +void from_json(nlohmann::json const &j, ReplicationInstanceState &instance_state); + +using TRaftLog = std::variant; using nuraft::buffer; using nuraft::buffer_serializer; @@ -53,7 +63,7 @@ using nuraft::ptr; class CoordinatorClusterState { public: CoordinatorClusterState() = default; - explicit CoordinatorClusterState(std::map> instances); + explicit CoordinatorClusterState(std::map> instances); CoordinatorClusterState(CoordinatorClusterState const &); CoordinatorClusterState &operator=(CoordinatorClusterState const &); @@ -62,15 +72,13 @@ class CoordinatorClusterState { CoordinatorClusterState &operator=(CoordinatorClusterState &&other) noexcept; ~CoordinatorClusterState() = default; - auto FindCurrentMainInstanceName() const -> std::optional; - auto MainExists() const -> bool; auto IsMain(std::string_view instance_name) const -> bool; auto IsReplica(std::string_view instance_name) const -> bool; - auto InsertInstance(std::string instance_name, InstanceState instance_state) -> void; + auto InsertInstance(std::string instance_name, ReplicationInstanceState instance_state) -> void; auto DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void; @@ -78,12 +86,15 @@ class CoordinatorClusterState { static auto Deserialize(buffer &data) -> CoordinatorClusterState; - auto GetInstances() const -> std::vector; + auto GetReplicationInstances() const -> std::vector; + + auto GetCoordinatorInstances() const -> std::vector; auto GetUUID() const -> utils::UUID; private: - std::map> instances_{}; + std::vector coordinators_{}; + std::map> repl_instances_{}; utils::UUID uuid_{}; mutable utils::ResourceLock log_lock_{}; }; diff --git a/src/coordination/include/nuraft/coordinator_state_machine.hpp b/src/coordination/include/nuraft/coordinator_state_machine.hpp index 836ac17a6..6340cf604 100644 --- a/src/coordination/include/nuraft/coordinator_state_machine.hpp +++ b/src/coordination/include/nuraft/coordinator_state_machine.hpp @@ -13,7 +13,7 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "nuraft/coordinator_cluster_state.hpp" #include "nuraft/raft_log_action.hpp" @@ -42,17 +42,18 @@ class CoordinatorStateMachine : public state_machine { CoordinatorStateMachine &operator=(CoordinatorStateMachine &&) = delete; ~CoordinatorStateMachine() override {} - auto FindCurrentMainInstanceName() const -> std::optional; + // TODO: (andi) Check API of this class. auto MainExists() const -> bool; auto IsMain(std::string_view instance_name) const -> bool; auto IsReplica(std::string_view instance_name) const -> bool; static auto CreateLog(nlohmann::json &&log) -> ptr; - static auto SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr; + static auto SerializeRegisterInstance(CoordinatorToReplicaConfig const &config) -> ptr; static auto SerializeUnregisterInstance(std::string_view instance_name) -> ptr; static auto SerializeSetInstanceAsMain(std::string_view instance_name) -> ptr; static auto SerializeSetInstanceAsReplica(std::string_view instance_name) -> ptr; static auto SerializeUpdateUUID(utils::UUID const &uuid) -> ptr; + static auto SerializeAddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config) -> ptr; static auto DecodeLog(buffer &data) -> std::pair; @@ -80,7 +81,10 @@ class CoordinatorStateMachine : public state_machine { auto create_snapshot(snapshot &s, async_result::handler_type &when_done) -> void override; - auto GetInstances() const -> std::vector; + auto GetReplicationInstances() const -> std::vector; + + auto GetCoordinatorInstances() const -> std::vector; + auto GetUUID() const -> utils::UUID; private: diff --git a/src/coordination/include/nuraft/raft_log_action.hpp b/src/coordination/include/nuraft/raft_log_action.hpp index 3f1b26dfa..b9cdd233a 100644 --- a/src/coordination/include/nuraft/raft_log_action.hpp +++ b/src/coordination/include/nuraft/raft_log_action.hpp @@ -27,16 +27,16 @@ enum class RaftLogAction : uint8_t { UNREGISTER_REPLICATION_INSTANCE, SET_INSTANCE_AS_MAIN, SET_INSTANCE_AS_REPLICA, - UPDATE_UUID + UPDATE_UUID, + ADD_COORDINATOR_INSTANCE }; -NLOHMANN_JSON_SERIALIZE_ENUM(RaftLogAction, { - {RaftLogAction::REGISTER_REPLICATION_INSTANCE, "register"}, - {RaftLogAction::UNREGISTER_REPLICATION_INSTANCE, "unregister"}, - {RaftLogAction::SET_INSTANCE_AS_MAIN, "promote"}, - {RaftLogAction::SET_INSTANCE_AS_REPLICA, "demote"}, - {RaftLogAction::UPDATE_UUID, "update_uuid"}, - }) +NLOHMANN_JSON_SERIALIZE_ENUM(RaftLogAction, {{RaftLogAction::REGISTER_REPLICATION_INSTANCE, "register"}, + {RaftLogAction::UNREGISTER_REPLICATION_INSTANCE, "unregister"}, + {RaftLogAction::SET_INSTANCE_AS_MAIN, "promote"}, + {RaftLogAction::SET_INSTANCE_AS_REPLICA, "demote"}, + {RaftLogAction::UPDATE_UUID, "update_uuid"}, + {RaftLogAction::ADD_COORDINATOR_INSTANCE, "add_coordinator_instance"}}) } // namespace memgraph::coordination #endif diff --git a/src/coordination/raft_state.cpp b/src/coordination/raft_state.cpp index fd93160b6..6175fda4b 100644 --- a/src/coordination/raft_state.cpp +++ b/src/coordination/raft_state.cpp @@ -13,7 +13,7 @@ #include #include -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_exceptions.hpp" #include "coordination/raft_state.hpp" #include "utils/counter.hpp" @@ -113,10 +113,9 @@ auto RaftState::InstanceName() const -> std::string { auto RaftState::RaftSocketAddress() const -> std::string { return raft_endpoint_.SocketAddress(); } -auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) - -> void { - auto const endpoint = fmt::format("{}:{}", raft_address, raft_port); - srv_config const srv_config_to_add(static_cast(raft_server_id), endpoint); +auto RaftState::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { + auto const endpoint = config.coordinator_server.SocketAddress(); + srv_config const srv_config_to_add(static_cast(config.coordinator_server_id), endpoint); auto cmd_result = raft_server_->add_srv(srv_config_to_add); @@ -134,9 +133,9 @@ auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_po bool added{false}; while (!maybe_stop()) { std::this_thread::sleep_for(std::chrono::milliseconds(waiting_period)); - const auto server_config = raft_server_->get_srv_config(static_cast(raft_server_id)); + const auto server_config = raft_server_->get_srv_config(static_cast(config.coordinator_server_id)); if (server_config) { - spdlog::trace("Server with id {} added to cluster", raft_server_id); + spdlog::trace("Server with id {} added to cluster", config.coordinator_server_id); added = true; break; } @@ -158,7 +157,7 @@ auto RaftState::IsLeader() const -> bool { return raft_server_->is_leader(); } auto RaftState::RequestLeadership() -> bool { return raft_server_->is_leader() || raft_server_->request_leadership(); } -auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorClientConfig const &config) -> bool { +auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig const &config) -> bool { auto new_log = CoordinatorStateMachine::SerializeRegisterInstance(config); auto const res = raft_server_->append_entries({new_log}); @@ -261,8 +260,26 @@ auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool { return true; } -auto RaftState::FindCurrentMainInstanceName() const -> std::optional { - return state_machine_->FindCurrentMainInstanceName(); +auto RaftState::AppendAddCoordinatorInstanceLog(CoordinatorToCoordinatorConfig const &config) -> bool { + auto new_log = CoordinatorStateMachine::SerializeAddCoordinatorInstance(config); + auto const res = raft_server_->append_entries({new_log}); + if (!res->get_accepted()) { + spdlog::error( + "Failed to accept request for adding coordinator instance {}. Most likely the reason is that the instance is " + "not the leader.", + config.coordinator_server_id); + return false; + } + + spdlog::info("Request for adding coordinator instance {} accepted", config.coordinator_server_id); + + if (res->get_result_code() != nuraft::cmd_result_code::OK) { + spdlog::error("Failed to add coordinator instance {} with error code {}", config.coordinator_server_id, + static_cast(res->get_result_code())); + return false; + } + + return true; } auto RaftState::MainExists() const -> bool { return state_machine_->MainExists(); } @@ -273,7 +290,13 @@ auto RaftState::IsReplica(std::string_view instance_name) const -> bool { return state_machine_->IsReplica(instance_name); } -auto RaftState::GetInstances() const -> std::vector { return state_machine_->GetInstances(); } +auto RaftState::GetReplicationInstances() const -> std::vector { + return state_machine_->GetReplicationInstances(); +} + +auto RaftState::GetCoordinatorInstances() const -> std::vector { + return state_machine_->GetCoordinatorInstances(); +} auto RaftState::GetUUID() const -> utils::UUID { return state_machine_->GetUUID(); } diff --git a/src/coordination/replication_instance.cpp b/src/coordination/replication_instance.cpp index ca7572ea7..34d889775 100644 --- a/src/coordination/replication_instance.cpp +++ b/src/coordination/replication_instance.cpp @@ -20,7 +20,7 @@ namespace memgraph::coordination { -ReplicationInstance::ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config, +ReplicationInstance::ReplicationInstance(CoordinatorInstance *peer, CoordinatorToReplicaConfig config, HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb, HealthCheckInstanceCallback succ_instance_cb, HealthCheckInstanceCallback fail_instance_cb) @@ -82,7 +82,7 @@ auto ReplicationInstance::StopFrequentCheck() -> void { client_.StopFrequentChec auto ReplicationInstance::PauseFrequentCheck() -> void { client_.PauseFrequentCheck(); } auto ReplicationInstance::ResumeFrequentCheck() -> void { client_.ResumeFrequentCheck(); } -auto ReplicationInstance::ReplicationClientInfo() const -> CoordinatorClientConfig::ReplicationClientInfo { +auto ReplicationInstance::ReplicationClientInfo() const -> coordination::ReplicationClientInfo { return client_.ReplicationClientInfo(); } diff --git a/src/dbms/coordinator_handler.cpp b/src/dbms/coordinator_handler.cpp index 292d50d3d..1f64892bc 100644 --- a/src/dbms/coordinator_handler.cpp +++ b/src/dbms/coordinator_handler.cpp @@ -20,7 +20,7 @@ namespace memgraph::dbms { CoordinatorHandler::CoordinatorHandler(coordination::CoordinatorState &coordinator_state) : coordinator_state_(coordinator_state) {} -auto CoordinatorHandler::RegisterReplicationInstance(coordination::CoordinatorClientConfig const &config) +auto CoordinatorHandler::RegisterReplicationInstance(coordination::CoordinatorToReplicaConfig const &config) -> coordination::RegisterInstanceCoordinatorStatus { return coordinator_state_.RegisterReplicationInstance(config); } @@ -39,9 +39,8 @@ auto CoordinatorHandler::ShowInstances() const -> std::vector void { - coordinator_state_.AddCoordinatorInstance(raft_server_id, raft_port, raft_address); +auto CoordinatorHandler::AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void { + coordinator_state_.AddCoordinatorInstance(config); } } // namespace memgraph::dbms diff --git a/src/dbms/coordinator_handler.hpp b/src/dbms/coordinator_handler.hpp index 1c456134d..f3640736a 100644 --- a/src/dbms/coordinator_handler.hpp +++ b/src/dbms/coordinator_handler.hpp @@ -13,7 +13,7 @@ #ifdef MG_ENTERPRISE -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_state.hpp" #include "coordination/instance_status.hpp" #include "coordination/register_main_replica_coordinator_status.hpp" @@ -30,7 +30,7 @@ class CoordinatorHandler { // TODO: (andi) When moving coordinator state on same instances, rename from RegisterReplicationInstance to // RegisterInstance - auto RegisterReplicationInstance(coordination::CoordinatorClientConfig const &config) + auto RegisterReplicationInstance(coordination::CoordinatorToReplicaConfig const &config) -> coordination::RegisterInstanceCoordinatorStatus; auto UnregisterReplicationInstance(std::string_view instance_name) @@ -40,7 +40,7 @@ class CoordinatorHandler { auto ShowInstances() const -> std::vector; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; + auto AddCoordinatorInstance(coordination::CoordinatorToCoordinatorConfig const &config) -> void; private: coordination::CoordinatorState &coordinator_state_; diff --git a/src/glue/SessionHL.cpp b/src/glue/SessionHL.cpp index 6a48f15ca..51a444a30 100644 --- a/src/glue/SessionHL.cpp +++ b/src/glue/SessionHL.cpp @@ -249,6 +249,40 @@ std::pair, std::optional> SessionHL::Interpret( } } +using memgraph::communication::bolt::Value; + +#ifdef MG_ENTERPRISE +auto SessionHL::Route(std::map const &routing, + std::vector const & /*bookmarks*/, + std::map const & /*extra*/) -> std::map { + auto routing_map = ranges::views::transform( + routing, [](auto const &pair) { return std::pair(pair.first, pair.second.ValueString()); }) | + ranges::to>(); + + auto routing_table_res = interpreter_.Route(routing_map); + + auto create_server = [](auto const &server_info) -> Value { + auto const &[addresses, role] = server_info; + std::map server_map; + auto bolt_addresses = ranges::views::transform(addresses, [](auto const &addr) { return Value{addr}; }) | + ranges::to>(); + + server_map["addresses"] = std::move(bolt_addresses); + server_map["role"] = memgraph::communication::bolt::Value{role}; + return Value{std::move(server_map)}; + }; + + std::map communication_res; + communication_res["ttl"] = Value{routing_table_res.ttl}; + communication_res["db"] = Value{}; + + auto servers = ranges::views::transform(routing_table_res.servers, create_server) | ranges::to>(); + communication_res["servers"] = memgraph::communication::bolt::Value{std::move(servers)}; + + return {{"rt", memgraph::communication::bolt::Value{std::move(communication_res)}}}; +} +#endif + void SessionHL::RollbackTransaction() { try { interpreter_.RollbackTransaction(); diff --git a/src/glue/SessionHL.hpp b/src/glue/SessionHL.hpp index cf0280fcc..9360f96b2 100644 --- a/src/glue/SessionHL.hpp +++ b/src/glue/SessionHL.hpp @@ -55,6 +55,13 @@ class SessionHL final : public memgraph::communication::bolt::Session ¶ms, const std::map &extra) override; +#ifdef MG_ENTERPRISE + auto Route(std::map const &routing, + std::vector const &bookmarks, + std::map const &extra) + -> std::map override; +#endif + std::map Pull(TEncoder *encoder, std::optional n, std::optional qid) override; diff --git a/src/io/network/endpoint.cpp b/src/io/network/endpoint.cpp index 6ed4a6753..c996055ff 100644 --- a/src/io/network/endpoint.cpp +++ b/src/io/network/endpoint.cpp @@ -82,8 +82,7 @@ bool Endpoint::IsResolvableAddress(std::string_view address, uint16_t port) { return status == 0; } -std::optional Endpoint::ParseSocketOrAddress(std::string_view address, - std::optional default_port) { +std::optional Endpoint::ParseSocketOrAddress(std::string_view address, std::optional default_port) { auto const parts = utils::SplitView(address, delimiter); if (parts.size() > 2) { @@ -109,13 +108,13 @@ std::optional Endpoint::ParseSocketOrAddress(std::string_view add }(); if (GetIpFamily(addr) == IpFamily::NONE) { - if (IsResolvableAddress(addr, *port)) { // NOLINT - return std::pair{addr, *port}; // NOLINT + if (IsResolvableAddress(addr, *port)) { // NOLINT + return Endpoint{std::string(addr), *port}; // NOLINT } return std::nullopt; } - return std::pair{addr, *port}; // NOLINT + return Endpoint{std::string(addr), *port}; // NOLINT } auto Endpoint::ValidatePort(std::optional port) -> bool { @@ -138,4 +137,14 @@ auto Endpoint::ValidatePort(std::optional port) -> bool { return true; } +void to_json(nlohmann::json &j, Endpoint const &config) { + j = nlohmann::json{{"address", config.address}, {"port", config.port}, {"family", config.family}}; +} + +void from_json(nlohmann::json const &j, Endpoint &config) { + config.address = j.at("address").get(); + config.port = j.at("port").get(); + config.family = j.at("family").get(); +} + } // namespace memgraph::io::network diff --git a/src/io/network/endpoint.hpp b/src/io/network/endpoint.hpp index f46d28ace..c47c736ee 100644 --- a/src/io/network/endpoint.hpp +++ b/src/io/network/endpoint.hpp @@ -17,9 +17,9 @@ #include #include -namespace memgraph::io::network { +#include "json/json.hpp" -using ParsedAddress = std::pair; +namespace memgraph::io::network { struct Endpoint { static const struct needs_resolving_t { @@ -39,8 +39,8 @@ struct Endpoint { enum class IpFamily : std::uint8_t { NONE, IP4, IP6 }; - static std::optional ParseSocketOrAddress(std::string_view address, - std::optional default_port = {}); + static std::optional ParseSocketOrAddress(std::string_view address, + std::optional default_port = {}); std::string SocketAddress() const; @@ -59,4 +59,7 @@ struct Endpoint { static auto ValidatePort(std::optional port) -> bool; }; +void to_json(nlohmann::json &j, Endpoint const &config); +void from_json(nlohmann::json const &j, Endpoint &config); + } // namespace memgraph::io::network diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 332054485..2fba0addb 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -328,15 +328,14 @@ class ReplQueryHandler { const auto repl_mode = convertToReplicationMode(sync_mode); - const auto maybe_ip_and_port = + auto maybe_endpoint = io::network::Endpoint::ParseSocketOrAddress(socket_address, memgraph::replication::kDefaultReplicationPort); - if (maybe_ip_and_port) { - const auto [ip, port] = *maybe_ip_and_port; + if (maybe_endpoint) { const auto replication_config = replication::ReplicationClientConfig{.name = name, .mode = repl_mode, - .ip_address = std::string(ip), - .port = port, + .ip_address = std::move(maybe_endpoint->address), + .port = maybe_endpoint->port, .replica_check_frequency = replica_check_frequency, .ssl = std::nullopt}; @@ -413,39 +412,41 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } } - void RegisterReplicationInstance(std::string_view coordinator_socket_address, - std::string_view replication_socket_address, + void RegisterReplicationInstance(std::string_view bolt_server, std::string_view management_server, + std::string_view replication_server, std::chrono::seconds const &instance_check_frequency, std::chrono::seconds const &instance_down_timeout, std::chrono::seconds const &instance_get_uuid_frequency, std::string_view instance_name, CoordinatorQuery::SyncMode sync_mode) override { - const auto maybe_replication_ip_port = io::network::Endpoint::ParseSocketOrAddress(replication_socket_address); - if (!maybe_replication_ip_port) { + auto const maybe_bolt_server = io::network::Endpoint::ParseSocketOrAddress(bolt_server); + if (!maybe_bolt_server) { + throw QueryRuntimeException("Invalid bolt socket address!"); + } + + auto const maybe_management_server = io::network::Endpoint::ParseSocketOrAddress(management_server); + if (!maybe_management_server) { + throw QueryRuntimeException("Invalid management socket address!"); + } + + auto const maybe_replication_server = io::network::Endpoint::ParseSocketOrAddress(replication_server); + if (!maybe_replication_server) { throw QueryRuntimeException("Invalid replication socket address!"); } - const auto maybe_coordinator_ip_port = io::network::Endpoint::ParseSocketOrAddress(coordinator_socket_address); - if (!maybe_replication_ip_port) { - throw QueryRuntimeException("Invalid replication socket address!"); - } - - const auto [replication_ip, replication_port] = *maybe_replication_ip_port; - const auto [coordinator_server_ip, coordinator_server_port] = *maybe_coordinator_ip_port; - const auto repl_config = coordination::CoordinatorClientConfig::ReplicationClientInfo{ - .instance_name = std::string(instance_name), - .replication_mode = convertFromCoordinatorToReplicationMode(sync_mode), - .replication_ip_address = std::string(replication_ip), - .replication_port = replication_port}; + auto const repl_config = + coordination::ReplicationClientInfo{.instance_name = std::string(instance_name), + .replication_mode = convertFromCoordinatorToReplicationMode(sync_mode), + .replication_server = *maybe_replication_server}; auto coordinator_client_config = - coordination::CoordinatorClientConfig{.instance_name = std::string(instance_name), - .ip_address = std::string(coordinator_server_ip), - .port = coordinator_server_port, - .instance_health_check_frequency_sec = instance_check_frequency, - .instance_down_timeout_sec = instance_down_timeout, - .instance_get_uuid_frequency_sec = instance_get_uuid_frequency, - .replication_client_info = repl_config, - .ssl = std::nullopt}; + coordination::CoordinatorToReplicaConfig{.instance_name = std::string(instance_name), + .mgt_server = *maybe_management_server, + .bolt_server = *maybe_bolt_server, + .replication_client_info = repl_config, + .instance_health_check_frequency_sec = instance_check_frequency, + .instance_down_timeout_sec = instance_down_timeout, + .instance_get_uuid_frequency_sec = instance_get_uuid_frequency, + .ssl = std::nullopt}; auto status = coordinator_handler_.RegisterReplicationInstance(coordinator_client_config); switch (status) { @@ -473,15 +474,25 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } } - auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view raft_socket_address) -> void override { - auto const maybe_ip_and_port = io::network::Endpoint::ParseSocketOrAddress(raft_socket_address); - if (maybe_ip_and_port) { - auto const [ip, port] = *maybe_ip_and_port; - spdlog::info("Adding instance {} with raft socket address {}:{}.", raft_server_id, ip, port); - coordinator_handler_.AddCoordinatorInstance(raft_server_id, port, ip); - } else { - spdlog::error("Invalid raft socket address {}.", raft_socket_address); + auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view bolt_server, + std::string_view coordinator_server) -> void override { + auto const maybe_coordinator_server = io::network::Endpoint::ParseSocketOrAddress(coordinator_server); + if (!maybe_coordinator_server) { + throw QueryRuntimeException("Invalid coordinator socket address!"); } + + auto const maybe_bolt_server = io::network::Endpoint::ParseSocketOrAddress(bolt_server); + if (!maybe_bolt_server) { + throw QueryRuntimeException("Invalid bolt socket address!"); + } + + auto const coord_coord_config = + coordination::CoordinatorToCoordinatorConfig{.coordinator_server_id = raft_server_id, + .bolt_server = *maybe_bolt_server, + .coordinator_server = *maybe_coordinator_server}; + + coordinator_handler_.AddCoordinatorInstance(coord_coord_config); + spdlog::info("Added instance on coordinator server {}", maybe_coordinator_server->SocketAddress()); } void SetReplicationInstanceToMain(std::string_view instance_name) override { @@ -1197,8 +1208,9 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param auto coord_server_id = coordinator_query->coordinator_server_id_->Accept(evaluator).ValueInt(); callback.fn = [handler = CoordQueryHandler{*coordinator_state}, coord_server_id, + bolt_server = bolt_server_it->second, coordinator_server = coordinator_server_it->second]() mutable { - handler.AddCoordinatorInstance(coord_server_id, coordinator_server); + handler.AddCoordinatorInstance(coord_server_id, bolt_server, coordinator_server); return std::vector>(); }; @@ -1243,15 +1255,15 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param callback.fn = [handler = CoordQueryHandler{*coordinator_state}, instance_health_check_frequency_sec = config.instance_health_check_frequency_sec, - management_server = management_server_it->second, - replication_server = replication_server_it->second, bolt_server = bolt_server_it->second, + bolt_server = bolt_server_it->second, management_server = management_server_it->second, + replication_server = replication_server_it->second, instance_name = coordinator_query->instance_name_, instance_down_timeout_sec = config.instance_down_timeout_sec, instance_get_uuid_frequency_sec = config.instance_get_uuid_frequency_sec, sync_mode = coordinator_query->sync_mode_]() mutable { - handler.RegisterReplicationInstance(management_server, replication_server, instance_health_check_frequency_sec, - instance_down_timeout_sec, instance_get_uuid_frequency_sec, instance_name, - sync_mode); + handler.RegisterReplicationInstance(bolt_server, management_server, replication_server, + instance_health_check_frequency_sec, instance_down_timeout_sec, + instance_get_uuid_frequency_sec, instance_name, sync_mode); return std::vector>(); }; @@ -4266,6 +4278,28 @@ void Interpreter::RollbackTransaction() { ResetInterpreter(); } +#ifdef MG_ENTERPRISE +auto Interpreter::Route(std::map const &routing) -> RouteResult { + // TODO: (andi) Test + if (!FLAGS_raft_server_id) { + auto const &address = routing.find("address"); + if (address == routing.end()) { + throw QueryException("Routing table must contain address field."); + } + + auto result = RouteResult{}; + if (interpreter_context_->repl_state->IsMain()) { + result.servers.emplace_back(std::vector{address->second}, "WRITE"); + } else { + result.servers.emplace_back(std::vector{address->second}, "READ"); + } + return result; + } + + return RouteResult{.servers = interpreter_context_->coordinator_state_->GetRoutingTable(routing)}; +} +#endif + #if MG_ENTERPRISE // Before Prepare or during Prepare, but single-threaded. // TODO: Is there any cleanup? diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 5366b4472..5d10a24de 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -143,8 +143,8 @@ class CoordinatorQueryHandler { }; /// @throw QueryRuntimeException if an error ocurred. - virtual void RegisterReplicationInstance(std::string_view coordinator_socket_address, - std::string_view replication_socket_address, + virtual void RegisterReplicationInstance(std::string_view bolt_server, std::string_view management_server, + std::string_view replication_server, std::chrono::seconds const &instance_health_check_frequency, std::chrono::seconds const &instance_down_timeout, std::chrono::seconds const &instance_get_uuid_frequency, @@ -160,7 +160,8 @@ class CoordinatorQueryHandler { virtual std::vector ShowInstances() const = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view coordinator_socket_address) -> void = 0; + virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view bolt_server, + std::string_view coordinator_server) -> void = 0; }; #endif @@ -247,6 +248,14 @@ class Interpreter final { std::optional db; }; +#ifdef MG_ENTERPRISE + struct RouteResult { + int ttl{300}; + std::string db{}; // Currently not used since we don't have any specific replication groups etc. + coordination::RoutingTable servers{}; + }; +#endif + std::shared_ptr user_or_role_{}; bool in_explicit_transaction_{false}; CurrentDB current_db_; @@ -272,6 +281,10 @@ class Interpreter final { const std::map ¶ms, QueryExtras const &extras); +#ifdef MG_ENTERPRISE + auto Route(std::map const &routing) -> RouteResult; +#endif + /** * Execute the last prepared query and stream *all* of the results into the * given stream. diff --git a/tests/drivers/go/v5/docs_quick_start.go b/tests/drivers/go/v5/docs_quick_start.go index 69805acc1..5788ed703 100644 --- a/tests/drivers/go/v5/docs_quick_start.go +++ b/tests/drivers/go/v5/docs_quick_start.go @@ -13,12 +13,13 @@ func handle_if_error(err error) { } func main() { - dbUri := "bolt://localhost:7687" - driver, err := neo4j.NewDriver(dbUri, neo4j.BasicAuth("", "", "")) - if err != nil { - log.Fatal("An error occurred opening conn: %s", err) - } - defer driver.Close() + fmt.Println("Started running docs_quick_start.go test") + dbUri := "bolt://localhost:7687" + driver, err := neo4j.NewDriver(dbUri, neo4j.BasicAuth("", "", "")) + if err != nil { + log.Fatal("An error occurred opening conn: %s", err) + } + defer driver.Close() session := driver.NewSession(neo4j.SessionConfig{}) defer session.Close() @@ -33,7 +34,7 @@ func main() { _,err = session.WriteTransaction(testAll) handle_if_error(err) - fmt.Println("All ok!") + fmt.Println("doc_quick_start.go test finished successfully.") } func clearDatabase(tx neo4j.Transaction) (interface{}, error) { @@ -75,15 +76,14 @@ func testAll(tx neo4j.Transaction) (interface{}, error) { handle_if_error(err) age, err := neo4j.GetProperty[int64](node_value, "age") handle_if_error(err) - + if label != "Person" && name != "Alice" && age != 22 { return nil, fmt.Errorf("Data doesn't match.") } - + fmt.Println("Label", label) fmt.Println("name", name) fmt.Println("age", age) return result.Consume() } - diff --git a/tests/drivers/go/v5/go.mod b/tests/drivers/go/v5/go.mod index a44baf405..f05f98dc6 100644 --- a/tests/drivers/go/v5/go.mod +++ b/tests/drivers/go/v5/go.mod @@ -3,6 +3,6 @@ module bolt-test go 1.18 require ( - github.com/neo4j/neo4j-go-driver/v5 v5.13.0 // indirect + github.com/neo4j/neo4j-go-driver/v5 v5.18.0 // indirect golang.org/dl v0.0.0-20230502172222-5216546bad51 // indirect ) diff --git a/tests/drivers/go/v5/go.sum b/tests/drivers/go/v5/go.sum index dc85aef95..1c956d94a 100644 --- a/tests/drivers/go/v5/go.sum +++ b/tests/drivers/go/v5/go.sum @@ -8,5 +8,7 @@ github.com/neo4j/neo4j-go-driver/v5 v5.9.0 h1:TYxT0RSiwnvVFia90V7TLnRXv8HkdQQ6rT github.com/neo4j/neo4j-go-driver/v5 v5.9.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= github.com/neo4j/neo4j-go-driver/v5 v5.13.0 h1:NmyUxh4LYTdcJdI6EnazHyUKu1f0/BPiHCYUZUZIGQw= github.com/neo4j/neo4j-go-driver/v5 v5.13.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= +github.com/neo4j/neo4j-go-driver/v5 v5.18.0 h1:3dmYsCYt/Fc/bPeSyGRGGfn/T6h06/OmHm72OFQKa3c= +github.com/neo4j/neo4j-go-driver/v5 v5.18.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= golang.org/dl v0.0.0-20230502172222-5216546bad51 h1:Bmo/kmR2hzyhGt3jjtl1ghkCqa5LINbB9D3QTkiLJIY= golang.org/dl v0.0.0-20230502172222-5216546bad51/go.mod h1:IUMfjQLJQd4UTqG1Z90tenwKoCX93Gn3MAQJMOSBsDQ= diff --git a/tests/drivers/go/v5/read_routing.go b/tests/drivers/go/v5/read_routing.go new file mode 100644 index 000000000..e8c2ffba2 --- /dev/null +++ b/tests/drivers/go/v5/read_routing.go @@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" +) + +func read_messages(uri string) { + username := "" + password := "" + + // Connect to Memgraph + driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) + if err != nil { + panic(err) + } + defer driver.Close() + + // Use AccessModeRead for read transactions + session := driver.NewSession(neo4j.SessionConfig{AccessMode: neo4j.AccessModeRead}) + defer session.Close() + + greeting, err := session.ReadTransaction(func(transaction neo4j.Transaction) (interface{}, error) { + result, err := transaction.Run("MATCH (n:Greeting) RETURN n.message AS message LIMIT 1", nil) + if err != nil { + return nil, err + } + + if result.Next() { + return result.Record().Values[0], nil + } + + return nil, result.Err() + }) + + if err != nil { + panic(err) + } + + fmt.Println(greeting) + +} + +// Test checks that you can use bolt+routing for connecting to main and coordinators for reading. +func main() { + fmt.Println("Started running read_route.go test") + read_messages("neo4j://localhost:7690") // coordinator_1 + read_messages("neo4j://localhost:7691") // coordinator_2 + read_messages("neo4j://localhost:7692") // coordinator_3 + fmt.Println("Successfully finished running coordinator_route.go test") +} diff --git a/tests/drivers/go/v5/run.sh b/tests/drivers/go/v5/run.sh index cbe31bd26..344495f15 100755 --- a/tests/drivers/go/v5/run.sh +++ b/tests/drivers/go/v5/run.sh @@ -18,4 +18,3 @@ done go get github.com/neo4j/neo4j-go-driver/v5 go run docs_quick_start.go -# go run parallel_edge_import.go diff --git a/tests/drivers/go/v5/run_cluster_tests.sh b/tests/drivers/go/v5/run_cluster_tests.sh new file mode 100755 index 000000000..9ccd7b0c0 --- /dev/null +++ b/tests/drivers/go/v5/run_cluster_tests.sh @@ -0,0 +1,21 @@ +#!/bin/bash -e + +GO_VERSION="1.18.9" +GO_VERSION_DIR="/opt/go$GO_VERSION" +if [ -f "$GO_VERSION_DIR/go/bin/go" ]; then + export GOROOT="$GO_VERSION_DIR/go" + export GOPATH="$HOME/go$GO_VERSION" + export PATH="$GO_VERSION_DIR/go/bin:$PATH" +fi + +# check if go is installed +for i in go; do + if ! which $i >/dev/null; then + echo "Please install $i!" + exit 1 + fi +done + +go get github.com/neo4j/neo4j-go-driver/v5 +go run write_routing.go +go run read_routing.go diff --git a/tests/drivers/go/v5/write_routing.go b/tests/drivers/go/v5/write_routing.go new file mode 100644 index 000000000..f77dd29ca --- /dev/null +++ b/tests/drivers/go/v5/write_routing.go @@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" +) + +func create_message(uri string) { + username := "" + password := "" + + // Connect to Memgraph + driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) + if err != nil { + panic(err) + } + defer driver.Close() + + session := driver.NewSession(neo4j.SessionConfig{AccessMode: neo4j.AccessModeWrite}) + defer session.Close() + + greeting, err := session.WriteTransaction(func(transaction neo4j.Transaction) (interface{}, error) { + result, err := transaction.Run("CREATE (n:Greeting) SET n.message = $message RETURN n.message", map[string]interface{}{ + "message": "Hello, World!", + }) + if err != nil { + return nil, err + } + + if result.Next() { + return result.Record().Values[0], nil + } + + return nil, result.Err() + }) + + if err != nil { + panic(err) + } + + fmt.Println(greeting) +} + +// Test checks that you can use bolt+routing for connecting to main and coordinators for writing. +func main() { + fmt.Println("Started running main_route.go test") + create_message("neo4j://localhost:7690") // coordinator_1 + create_message("neo4j://localhost:7691") // coordinator_2 + create_message("neo4j://localhost:7692") // coordinator_3 + fmt.Println("Successfully finished running main_route.go test") +} diff --git a/tests/drivers/java/v5_8/pom.xml b/tests/drivers/java/v5_8/pom.xml index 6db6a6ded..6db821683 100644 --- a/tests/drivers/java/v5_8/pom.xml +++ b/tests/drivers/java/v5_8/pom.xml @@ -104,6 +104,45 @@ single + + build-e + + + + memgraph.WriteRouting + + + + jar-with-dependencies + + false + WriteRouting + + package + + single + + + + build-f + + + + memgraph.ReadRouting + + + + jar-with-dependencies + + false + ReadRouting + + package + + single + + + diff --git a/tests/drivers/java/v5_8/run.sh b/tests/drivers/java/v5_8/run.sh index 03400e385..cb3ebb2ca 100755 --- a/tests/drivers/java/v5_8/run.sh +++ b/tests/drivers/java/v5_8/run.sh @@ -36,4 +36,3 @@ mvn clean package java -jar target/DocsHowToQuery.jar java -jar target/MaxQueryLength.jar java -jar target/Transactions.jar -# java -jar target/ParallelEdgeImport.jar diff --git a/tests/drivers/java/v5_8/run_cluster_tests.sh b/tests/drivers/java/v5_8/run_cluster_tests.sh new file mode 100755 index 000000000..0b01d5de4 --- /dev/null +++ b/tests/drivers/java/v5_8/run_cluster_tests.sh @@ -0,0 +1,37 @@ +#!/bin/bash -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" + +if [ -d "/usr/lib/jvm/java-17-oracle" ]; then + export JAVA_HOME="/usr/lib/jvm/java-17-oracle" +fi +if [ -d "/usr/lib/jvm/java-17-openjdk-amd64" ]; then + export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" +fi +if [ -d "/opt/apache-maven-3.9.3" ]; then + export M2_HOME="/opt/apache-maven-3.9.3" +fi +export PATH="$JAVA_HOME/bin:$M2_HOME/bin:$PATH" + +for i in java mvn; do + if ! which $i >/dev/null; then + echo "Please install $i!" + exit 1 + fi +done + +JAVA_VER=$(java -version 2>&1 >/dev/null | grep 'version' | cut -d "\"" -f2 | cut -d "." -f1) +if [ $JAVA_VER -ne 17 ] +then + echo "neo4j-java-driver v5.8 requires Java 17. Please install it!" + exit 1 +fi + +# CentOS 7 doesn't have Java version that supports var keyword +source ../../../../environment/util.sh + +mvn clean package + +java -jar target/WriteRouting.jar +java -jar target/ReadRouting.jar diff --git a/tests/drivers/java/v5_8/src/main/java/memgraph/ReadRouting.java b/tests/drivers/java/v5_8/src/main/java/memgraph/ReadRouting.java new file mode 100644 index 000000000..b8654a890 --- /dev/null +++ b/tests/drivers/java/v5_8/src/main/java/memgraph/ReadRouting.java @@ -0,0 +1,35 @@ +package memgraph; + +import static org.neo4j.driver.Values.parameters; + +import java.util.*; +import java.util.concurrent.TimeUnit; +import org.neo4j.driver.AuthTokens; +import org.neo4j.driver.Driver; +import org.neo4j.driver.GraphDatabase; +import org.neo4j.driver.Session; +import org.neo4j.driver.Transaction; + +public class ReadRouting { + private Driver driver; + + private void readMessage(String uri) { + driver = GraphDatabase.driver(uri, AuthTokens.basic("", "")); + try (Session session = driver.session()) { + String greeting = session.readTransaction(tx -> { + var result = tx.run("MATCH (n:Greeting) RETURN n.message AS message"); + System.out.println("Read txn passed!"); + return "OK"; + }); + } + } + + public static void main(String... args) { + System.out.println("Started running ReadRoutingTest..."); + ReadRouting greeter = new ReadRouting(); + greeter.readMessage("neo4j://localhost:7690"); // coordinator_1 + greeter.readMessage("neo4j://localhost:7691"); // coordinator_2 + greeter.readMessage("neo4j://localhost:7692"); // coordinator_3 + System.out.println("All good!"); + } +} diff --git a/tests/drivers/java/v5_8/src/main/java/memgraph/WriteRouting.java b/tests/drivers/java/v5_8/src/main/java/memgraph/WriteRouting.java new file mode 100644 index 000000000..df3948558 --- /dev/null +++ b/tests/drivers/java/v5_8/src/main/java/memgraph/WriteRouting.java @@ -0,0 +1,44 @@ +package memgraph; + +import static org.neo4j.driver.Values.parameters; + +import java.util.*; +import java.util.concurrent.TimeUnit; +import org.neo4j.driver.AuthTokens; +import org.neo4j.driver.Config; +import org.neo4j.driver.Driver; +import org.neo4j.driver.GraphDatabase; +import org.neo4j.driver.Result; +import org.neo4j.driver.Session; +import org.neo4j.driver.Transaction; +import org.neo4j.driver.TransactionWork; +import org.neo4j.driver.exceptions.ClientException; +import org.neo4j.driver.exceptions.TransientException; + +public class WriteRouting { + private Driver driver; + + private void createMessage(String uri) { + driver = GraphDatabase.driver(uri, AuthTokens.basic("", "")); + try (Session session = driver.session()) { + String greeting = session.writeTransaction(tx -> { + var result = tx.run("CREATE (n:Greeting) SET n.message = $message RETURN n.message", + parameters("message", "Hello, World!")); + if (result.hasNext()) { + return result.single().get(0).asString(); + } + throw new RuntimeException("No result found."); + }); + System.out.println(greeting); + } + } + + public static void main(String... args) { + System.out.println("Started running WriteRoutingTest..."); + WriteRouting greeter = new WriteRouting(); + greeter.createMessage("neo4j://localhost:7690"); // coordinator_1 + greeter.createMessage("neo4j://localhost:7691"); // coordinator_2 + greeter.createMessage("neo4j://localhost:7692"); // coordinator_3 + System.out.println("All good!"); + } +} diff --git a/tests/drivers/node/v5_8/read_routing.js b/tests/drivers/node/v5_8/read_routing.js new file mode 100644 index 000000000..905b184d3 --- /dev/null +++ b/tests/drivers/node/v5_8/read_routing.js @@ -0,0 +1,59 @@ +const neo4j = require('neo4j-driver'); + +function die() { + session.close(); + driver.close(); + process.exit(1); +} + +function Neo4jService(uri) { + const driver = neo4j.driver(uri, neo4j.auth.basic("", "")); + + async function readGreeting() { + const session = driver.session({ defaultAccessMode: neo4j.session.READ }); + try { + const result = await session.readTransaction(tx => + tx.run('MATCH (n:Greeting) RETURN n.message AS message') + ); + console.log("Read txn finished"); + } finally { + await session.close(); + } + } + + async function close() { + await driver.close(); + } + + return { + readGreeting, + close + }; +} + +async function readGreetingsFromUri(uri) { + const service = Neo4jService(uri); + await service.readGreeting(); + await service.close(); +} + +async function main() { + console.log("Started reading route"); + const uris = [ + 'neo4j://localhost:7690', + 'neo4j://localhost:7691', + 'neo4j://localhost:7692' + ]; + + try { + for (const uri of uris) { + await readGreetingsFromUri(uri); + } + } catch (error) { + console.error('An error occurred:', error); + die(); + } + console.log("Finished reading route"); +} + +main().catch(error => console.error(error)); diff --git a/tests/drivers/node/v5_8/run.sh b/tests/drivers/node/v5_8/run.sh index 276fdbb2b..a24c5110c 100755 --- a/tests/drivers/node/v5_8/run.sh +++ b/tests/drivers/node/v5_8/run.sh @@ -15,4 +15,3 @@ fi node docs_how_to_query.js node max_query_length.js -# node parallel_edge_import.js diff --git a/tests/drivers/node/v5_8/run_cluster_tests.sh b/tests/drivers/node/v5_8/run_cluster_tests.sh new file mode 100755 index 000000000..3f4fee5ff --- /dev/null +++ b/tests/drivers/node/v5_8/run_cluster_tests.sh @@ -0,0 +1,17 @@ +#!/bin/bash -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" + +if ! which node >/dev/null; then + echo "Please install nodejs!" + exit 1 +fi + +if [ ! -d node_modules ]; then + # Driver generated with: `npm install neo4j-driver` + npm install --no-package-lock --no-save neo4j-driver@5.8.0 +fi + +node write_routing.js +node read_routing.js diff --git a/tests/drivers/node/v5_8/write_routing.js b/tests/drivers/node/v5_8/write_routing.js new file mode 100644 index 000000000..fdb4b74d9 --- /dev/null +++ b/tests/drivers/node/v5_8/write_routing.js @@ -0,0 +1,59 @@ +const neo4j = require('neo4j-driver'); + +function die() { + session.close(); + driver.close(); + process.exit(1); +} + +function Neo4jService(uri) { + const driver = neo4j.driver(uri, neo4j.auth.basic("", "")); + + async function createGreeting() { + const session = driver.session({ defaultAccessMode: neo4j.session.WRITE }); + try { + const result = await session.writeTransaction(tx => + tx.run('CREATE (n:Greeting {message: "Hello NodeJs"}) RETURN n.message AS message') + ); + console.log("Write txn finished"); + } finally { + await session.close(); + } + } + + async function close() { + await driver.close(); + } + + return { + createGreeting, + close + }; +} + +async function createGreetingsFromUri(uri) { + const service = Neo4jService(uri); + await service.createGreeting(); + await service.close(); +} + +async function main() { + console.log("Started writing route"); + const uris = [ + 'neo4j://localhost:7690', + 'neo4j://localhost:7691', + 'neo4j://localhost:7692' + ]; + + try { + for (const uri of uris) { + await createGreetingsFromUri(uri); + } + } catch (error) { + console.error('An error occurred:', error); + die(); + } + console.log("Finished writing route"); +} + +main().catch(error => console.error(error)); diff --git a/tests/drivers/python/v5_8/read_routing.py b/tests/drivers/python/v5_8/read_routing.py new file mode 100644 index 000000000..b08982aa3 --- /dev/null +++ b/tests/drivers/python/v5_8/read_routing.py @@ -0,0 +1,41 @@ +from neo4j import GraphDatabase + + +class Neo4jService: + def __init__(self, uri, user="", password=""): + self.driver = GraphDatabase.driver(uri, auth=(user, password)) + + def close(self): + self.driver.close() + + def read_greeting(self): + with self.driver.session() as session: + session.execute_read(self._create_and_return_greeting) + print("Read txn passed!") + + @staticmethod + def _create_and_return_greeting(tx): + tx.run("MATCH (n:Greeting) RETURN n.message AS message") + + +def read_greetings_from_uri(uri): + service = Neo4jService(uri) + service.read_greeting() + service.close() + + +def main(): + print("Started reading route") + uris = ["neo4j://localhost:7690", "neo4j://localhost:7691", "neo4j://localhost:7692"] + + try: + for uri in uris: + read_greetings_from_uri(uri) + except Exception as error: + print(f"An error occurred: {error}") + exit(-1) + print("Finished reading route") + + +if __name__ == "__main__": + main() diff --git a/tests/drivers/python/v5_8/run_cluster_tests.sh b/tests/drivers/python/v5_8/run_cluster_tests.sh new file mode 100755 index 000000000..f22c1a8da --- /dev/null +++ b/tests/drivers/python/v5_8/run_cluster_tests.sh @@ -0,0 +1,25 @@ +#!/bin/bash -e + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" + +# system check +if ! which virtualenv >/dev/null; then + echo "Please install virtualenv!" + exit 1 +fi + +# setup virtual environment +if [ ! -d "ve3" ]; then + virtualenv -p python3 ve3 || exit 1 + source ve3/bin/activate + python3 -m pip install neo4j==5.8.0 || exit 1 + deactivate +fi + +# activate virtualenv +source ve3/bin/activate + +# execute test +python3 write_routing.py || exit 1 +python3 read_routing.py || exit 1 diff --git a/tests/drivers/python/v5_8/write_routing.py b/tests/drivers/python/v5_8/write_routing.py new file mode 100644 index 000000000..427d6e6f2 --- /dev/null +++ b/tests/drivers/python/v5_8/write_routing.py @@ -0,0 +1,41 @@ +from neo4j import GraphDatabase + + +class Neo4jService: + def __init__(self, uri, user="", password=""): + self.driver = GraphDatabase.driver(uri, auth=(user, password)) + + def close(self): + self.driver.close() + + def create_greeting(self): + with self.driver.session() as session: + session.execute_write(self._create_and_return_greeting) + print("Write txn passed!") + + @staticmethod + def _create_and_return_greeting(tx): + tx.run("CREATE (n:Greeting {message: 'Hello from Python'}) RETURN n.message AS message") + + +def create_greetings_from_uri(uri): + service = Neo4jService(uri) + service.create_greeting() + service.close() + + +def main(): + print("Started writing route") + uris = ["neo4j://localhost:7690", "neo4j://localhost:7691", "neo4j://localhost:7692"] + + try: + for uri in uris: + create_greetings_from_uri(uri) + except Exception as error: + print(f"An error occurred: {error}") + exit(-1) + print("Finished writing route") + + +if __name__ == "__main__": + main() diff --git a/tests/drivers/run_cluster.sh b/tests/drivers/run_cluster.sh new file mode 100755 index 000000000..b5f75f2ef --- /dev/null +++ b/tests/drivers/run_cluster.sh @@ -0,0 +1,203 @@ +#!/bin/bash + +pushd () { command pushd "$@" > /dev/null; } +popd () { command popd "$@" > /dev/null; } + +function wait_for_server { + port=$1 + while ! nc -z -w 1 127.0.0.1 $port; do + sleep 0.1 + done + sleep 1 +} + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$DIR" + +# create a temporary directory. +tmpdir=/tmp/memgraph_drivers +if [ -d $tmpdir ]; then + rm -rf $tmpdir +fi + +mkdir -p $tmpdir + +# find memgraph binaries. +binary_dir="$DIR/../../build" + +# Start instance_1 +$binary_dir/memgraph \ + --bolt-port=7687 \ + --data-directory=$tmpdir/instance_1/ \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/instance1.log \ + --also-log-to-stderr \ + --coordinator-server-port=10011 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_instance_1=$! +wait_for_server 7687 + +# Start instance_2 +$binary_dir/memgraph \ + --bolt-port=7688 \ + --data-directory=$tmpdir/instance_2 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/instance2.log \ + --also-log-to-stderr \ + --coordinator-server-port=10012 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_instance_2=$! +wait_for_server 7688 + +# Start instance_3 +$binary_dir/memgraph \ + --bolt-port=7689 \ + --data-directory=$tmpdir/instance_3 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/instance3.log \ + --also-log-to-stderr \ + --coordinator-server-port=10013 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_instance_3=$! +wait_for_server 7689 + + +# Start coordinator_1 +$binary_dir/memgraph \ + --bolt-port=7690 \ + --data-directory=$tmpdir/coordinator_1 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/coordinator1.log \ + --also-log-to-stderr \ + --raft-server-id=1 \ + --raft-server-port=10111 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_coordinator_1=$! +wait_for_server 7690 + +# Start coordinator_2 +$binary_dir/memgraph \ + --bolt-port=7691 \ + --data-directory=$tmpdir/coordinator_2 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/coordinator2.log \ + --also-log-to-stderr \ + --raft-server-id=2 \ + --raft-server-port=10112 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_coordinator_2=$! +wait_for_server 7691 + +# Start coordinator_3 +$binary_dir/memgraph \ + --bolt-port=7692 \ + --data-directory=$tmpdir/coordinator_3 \ + --query-execution-timeout-sec=5 \ + --bolt-session-inactivity-timeout=10 \ + --bolt-server-name-for-init="Neo4j/1.1" \ + --bolt-cert-file="" \ + --log-file=$tmpdir/logs/coordinator3.log \ + --also-log-to-stderr \ + --raft-server-id=3 \ + --raft-server-port=10113 \ + --experimental-enabled=high-availability \ + --log-level ERROR & +pid_coordinator_3=$! +wait_for_server 7692 + +sleep 5 + +echo 'ADD COORDINATOR 2 WITH CONFIG {"bolt_server": "127.0.0.1:7691", "coordinator_server": "127.0.0.1:10112"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'ADD COORDINATOR 3 WITH CONFIG {"bolt_server": "127.0.0.1:7692", "coordinator_server": "127.0.0.1:10113"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": "127.0.0.1:7687", "management_server": "127.0.0.1:10011", "replication_server": "127.0.0.1:10001"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'REGISTER INSTANCE instance_2 WITH CONFIG {"bolt_server": "127.0.0.1:7688", "management_server": "127.0.0.1:10012", "replication_server": "127.0.0.1:10002"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'REGISTER INSTANCE instance_3 WITH CONFIG {"bolt_server": "127.0.0.1:7689", "management_server": "127.0.0.1:10013", "replication_server": "127.0.0.1:10003"};' | $binary_dir/bin/mgconsole --port 7690 +echo 'SET INSTANCE instance_1 TO MAIN;' | $binary_dir/bin/mgconsole --port 7690 + + +code_test=0 +for lang in *; do + if [ ! -d $lang ]; then continue; fi + pushd $lang + echo "Running tests for language: $lang" + for version in *; do + if [ ! -d $version ]; then continue; fi + pushd $version + if [ -f "run_cluster_tests.sh" ]; then + echo "Running version: $version" + ./run_cluster_tests.sh + code_test=$? + if [ $code_test -ne 0 ]; then + echo "FAILED: $lang-$version" + break + fi + fi + popd + done; + popd +done + + +# Function to stop a process by PID and check its exit code +stop_process() { + local pid=$1 # Capture the PID from the first argument + + # Stop the process + kill $pid + wait $pid + local exit_code=$? # Capture the exit code + + # Check the process's exit code + if [ $exit_code -ne 0 ]; then + echo "The process with PID $pid didn't terminate properly!" + exit $exit_code + else + echo "Process with PID $pid terminated successfully." + fi +} + +echo "Stopping coordinator1" +stop_process $pid_coordinator_1 +echo "Stopping coordinator2" +stop_process $pid_coordinator_2 +echo "Stopping coordinator3" +stop_process $pid_coordinator_3 + +echo "Stopping instance1" +stop_process $pid_instance_1 +echo "Stopping instance2" +stop_process $pid_instance_2 +echo "Stopping instance3" +stop_process $pid_instance_3 + + +# Check test exit code. +if [ $code_test -ne 0 ]; then + echo "One of the tests failed!" + exit $code_test +fi + +# Temporary directory cleanup. +if [ -d $tmpdir ]; then + rm -rf $tmpdir +fi diff --git a/tests/e2e/high_availability/common.py b/tests/e2e/high_availability/common.py index 2157b29ca..adfabd87a 100644 --- a/tests/e2e/high_availability/common.py +++ b/tests/e2e/high_availability/common.py @@ -30,14 +30,3 @@ def safe_execute(function, *args): function(*args) except: pass - - -# NOTE: Repeated execution because it can fail if Raft server is not up -def add_coordinator(cursor, query): - for _ in range(10): - try: - execute_and_fetch_all(cursor, query) - return True - except Exception: - pass - return False diff --git a/tests/e2e/high_availability/coord_cluster_registration.py b/tests/e2e/high_availability/coord_cluster_registration.py index 13aaf27fe..89279b23d 100644 --- a/tests/e2e/high_availability/coord_cluster_registration.py +++ b/tests/e2e/high_availability/coord_cluster_registration.py @@ -16,7 +16,7 @@ import tempfile import interactive_mg_runner import pytest -from common import add_coordinator, connect, execute_and_fetch_all, safe_execute +from common import connect, execute_and_fetch_all, safe_execute from mg_utils import mg_sleep_and_assert interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -110,134 +110,134 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { } -def test_register_repl_instances_then_coordinators(): - safe_execute(shutil.rmtree, TEMP_DIR) - interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - - coordinator3_cursor = connect(host="localhost", port=7692).cursor() - - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", - ) - execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", - ) - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", - ) - - def check_coordinator3(): - return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) - - expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "replica"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) - - coordinator1_cursor = connect(host="localhost", port=7690).cursor() - - def check_coordinator1(): - return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "replica"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - - coordinator2_cursor = connect(host="localhost", port=7691).cursor() - - def check_coordinator2(): - return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - - -def test_register_coordinator_then_repl_instances(): - safe_execute(shutil.rmtree, TEMP_DIR) - interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - - coordinator3_cursor = connect(host="localhost", port=7692).cursor() - - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", - ) - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", - ) - execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - - def check_coordinator3(): - return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) - - expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "replica"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) - - coordinator1_cursor = connect(host="localhost", port=7690).cursor() - - def check_coordinator1(): - return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "replica"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - - coordinator2_cursor = connect(host="localhost", port=7691).cursor() - - def check_coordinator2(): - return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# def test_register_repl_instances_then_coordinators(): +# safe_execute(shutil.rmtree, TEMP_DIR) +# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) +# +# coordinator3_cursor = connect(host="localhost", port=7692).cursor() +# +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", +# ) +# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", +# ) +# +# def check_coordinator3(): +# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) +# +# expected_cluster_coord3 = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "replica"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) +# +# coordinator1_cursor = connect(host="localhost", port=7690).cursor() +# +# def check_coordinator1(): +# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "replica"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# +# coordinator2_cursor = connect(host="localhost", port=7691).cursor() +# +# def check_coordinator2(): +# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# +# +# def test_register_coordinator_then_repl_instances(): +# safe_execute(shutil.rmtree, TEMP_DIR) +# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) +# +# coordinator3_cursor = connect(host="localhost", port=7692).cursor() +# +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", +# ) +# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") +# +# def check_coordinator3(): +# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) +# +# expected_cluster_coord3 = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "replica"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) +# +# coordinator1_cursor = connect(host="localhost", port=7690).cursor() +# +# def check_coordinator1(): +# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "replica"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# +# coordinator2_cursor = connect(host="localhost", port=7691).cursor() +# +# def check_coordinator2(): +# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) def test_coordinators_communication_with_restarts(): @@ -246,11 +246,11 @@ def test_coordinators_communication_with_restarts(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() - assert add_coordinator( + execute_and_fetch_all( coordinator3_cursor, "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", ) - assert add_coordinator( + execute_and_fetch_all( coordinator3_cursor, "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) @@ -310,284 +310,284 @@ def test_coordinators_communication_with_restarts(): # # TODO: (andi) Test when dealing with distributed coordinators that you can register on one coordinator and unregister from any other coordinator -@pytest.mark.parametrize( - "kill_instance", - [True, False], -) -def test_unregister_replicas(kill_instance): - safe_execute(shutil.rmtree, TEMP_DIR) - interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - - coordinator1_cursor = connect(host="localhost", port=7690).cursor() - coordinator2_cursor = connect(host="localhost", port=7691).cursor() - coordinator3_cursor = connect(host="localhost", port=7692).cursor() - - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", - ) - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", - ) - execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - - def check_coordinator1(): - return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - - def check_coordinator2(): - return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - - def check_coordinator3(): - return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) - - main_cursor = connect(host="localhost", port=7689).cursor() - - def check_main(): - return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "replica"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "replica"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - expected_replicas = [ - ( - "instance_1", - "127.0.0.1:10001", - "sync", - {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, - ), - ( - "instance_2", - "127.0.0.1:10002", - "sync", - {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, - ), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - mg_sleep_and_assert(expected_replicas, check_main) - - if kill_instance: - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") - execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_1") - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - expected_replicas = [ - ( - "instance_2", - "127.0.0.1:10002", - "sync", - {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, - ), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - mg_sleep_and_assert(expected_replicas, check_main) - - if kill_instance: - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2") - execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_2") - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_3", "", "", "unknown", "main"), - ] - expected_replicas = [] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - mg_sleep_and_assert(expected_replicas, check_main) - - -def test_unregister_main(): - safe_execute(shutil.rmtree, TEMP_DIR) - interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) - - coordinator1_cursor = connect(host="localhost", port=7690).cursor() - coordinator2_cursor = connect(host="localhost", port=7691).cursor() - coordinator3_cursor = connect(host="localhost", port=7692).cursor() - - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", - ) - assert add_coordinator( - coordinator3_cursor, - "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", - ) - execute_and_fetch_all( - coordinator3_cursor, - "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", - ) - execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - - def check_coordinator1(): - return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - - def check_coordinator2(): - return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - - def check_coordinator3(): - return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "replica"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "up", "main"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "replica"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - - try: - execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") - except Exception as e: - assert ( - str(e) - == "Alive main instance can't be unregistered! Shut it down to trigger failover and then unregister it!" - ) - - interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "main"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "main"), - ("instance_2", "", "", "unknown", "replica"), - ("instance_3", "", "", "unknown", "main"), - ] - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - - execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") - - expected_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "127.0.0.1:10011", "up", "main"), - ("instance_2", "", "127.0.0.1:10012", "up", "replica"), - ] - - expected_cluster_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), - ("instance_1", "", "", "unknown", "main"), - ("instance_2", "", "", "unknown", "replica"), - ] - - expected_replicas = [ - ( - "instance_2", - "127.0.0.1:10002", - "sync", - {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, - ), - ] - - main_cursor = connect(host="localhost", port=7687).cursor() - - def check_main(): - return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) - - mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) - mg_sleep_and_assert(expected_cluster, check_coordinator3) - mg_sleep_and_assert(expected_replicas, check_main) +# @pytest.mark.parametrize( +# "kill_instance", +# [True, False], +# ) +# def test_unregister_replicas(kill_instance): +# safe_execute(shutil.rmtree, TEMP_DIR) +# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) +# +# coordinator1_cursor = connect(host="localhost", port=7690).cursor() +# coordinator2_cursor = connect(host="localhost", port=7691).cursor() +# coordinator3_cursor = connect(host="localhost", port=7692).cursor() +# +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", +# ) +# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") +# +# def check_coordinator1(): +# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) +# +# def check_coordinator2(): +# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) +# +# def check_coordinator3(): +# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) +# +# main_cursor = connect(host="localhost", port=7689).cursor() +# +# def check_main(): +# return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "replica"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "replica"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# expected_replicas = [ +# ( +# "instance_1", +# "127.0.0.1:10001", +# "sync", +# {"ts": 0, "behind": None, "status": "ready"}, +# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, +# ), +# ( +# "instance_2", +# "127.0.0.1:10002", +# "sync", +# {"ts": 0, "behind": None, "status": "ready"}, +# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, +# ), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# mg_sleep_and_assert(expected_replicas, check_main) +# +# if kill_instance: +# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") +# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_1") +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# expected_replicas = [ +# ( +# "instance_2", +# "127.0.0.1:10002", +# "sync", +# {"ts": 0, "behind": None, "status": "ready"}, +# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, +# ), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# mg_sleep_and_assert(expected_replicas, check_main) +# +# if kill_instance: +# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2") +# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_2") +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# expected_replicas = [] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# mg_sleep_and_assert(expected_replicas, check_main) +# +# +# def test_unregister_main(): +# safe_execute(shutil.rmtree, TEMP_DIR) +# interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) +# +# coordinator1_cursor = connect(host="localhost", port=7690).cursor() +# coordinator2_cursor = connect(host="localhost", port=7691).cursor() +# coordinator3_cursor = connect(host="localhost", port=7692).cursor() +# +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", +# ) +# execute_and_fetch_all( +# coordinator3_cursor, +# "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", +# ) +# execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") +# +# def check_coordinator1(): +# return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) +# +# def check_coordinator2(): +# return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) +# +# def check_coordinator3(): +# return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "replica"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "up", "main"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "replica"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# +# try: +# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") +# except Exception as e: +# assert ( +# str(e) +# == "Alive main instance can't be unregistered! Shut it down to trigger failover and then unregister it!" +# ) +# +# interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "main"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "main"), +# ("instance_2", "", "", "unknown", "replica"), +# ("instance_3", "", "", "unknown", "main"), +# ] +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# +# execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") +# +# expected_cluster = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "127.0.0.1:10011", "up", "main"), +# ("instance_2", "", "127.0.0.1:10012", "up", "replica"), +# ] +# +# expected_cluster_shared = [ +# ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), +# ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), +# ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), +# ("instance_1", "", "", "unknown", "main"), +# ("instance_2", "", "", "unknown", "replica"), +# ] +# +# expected_replicas = [ +# ( +# "instance_2", +# "127.0.0.1:10002", +# "sync", +# {"ts": 0, "behind": None, "status": "ready"}, +# {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, +# ), +# ] +# +# main_cursor = connect(host="localhost", port=7687).cursor() +# +# def check_main(): +# return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) +# +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) +# mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) +# mg_sleep_and_assert(expected_cluster, check_coordinator3) +# mg_sleep_and_assert(expected_replicas, check_main) if __name__ == "__main__": diff --git a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py index 517bf346f..e61eb4eb8 100644 --- a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py +++ b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py @@ -16,7 +16,7 @@ import tempfile import interactive_mg_runner import pytest -from common import add_coordinator, connect, execute_and_fetch_all, safe_execute +from common import connect, execute_and_fetch_all, safe_execute from mg_utils import mg_sleep_and_assert interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -137,11 +137,11 @@ def test_writing_disabled_on_main_restart(): "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - assert add_coordinator( + execute_and_fetch_all( coordinator3_cursor, "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", ) - assert add_coordinator( + execute_and_fetch_all( coordinator3_cursor, "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) diff --git a/tests/e2e/replication/common.hpp b/tests/e2e/replication/common.hpp index 1938eb0f3..e2ec43978 100644 --- a/tests/e2e/replication/common.hpp +++ b/tests/e2e/replication/common.hpp @@ -37,10 +37,9 @@ auto ParseDatabaseEndpoints(const std::string &database_endpoints_str) { const auto db_endpoints_strs = memgraph::utils::SplitView(database_endpoints_str, ","); std::vector database_endpoints; for (const auto &db_endpoint_str : db_endpoints_strs) { - const auto maybe_host_port = memgraph::io::network::Endpoint::ParseSocketOrAddress(db_endpoint_str, 7687); - MG_ASSERT(maybe_host_port); - auto const [ip, port] = *maybe_host_port; - database_endpoints.emplace_back(std::string(ip), port); + auto maybe_endpoint = memgraph::io::network::Endpoint::ParseSocketOrAddress(db_endpoint_str, 7687); + MG_ASSERT(maybe_endpoint); + database_endpoints.emplace_back(std::move(*maybe_endpoint)); } return database_endpoints; } diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 44b24b6f6..008211af3 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -446,9 +446,16 @@ target_link_libraries(${test_prefix}raft_log_serialization gflags mg-coordinatio target_include_directories(${test_prefix}raft_log_serialization PRIVATE ${CMAKE_SOURCE_DIR}/include) endif() -# Test Raft log serialization +# Test CoordinatorClusterState if(MG_ENTERPRISE) add_unit_test(coordinator_cluster_state.cpp) target_link_libraries(${test_prefix}coordinator_cluster_state gflags mg-coordination mg-repl_coord_glue) target_include_directories(${test_prefix}coordinator_cluster_state PRIVATE ${CMAKE_SOURCE_DIR}/include) endif() + +# Test Raft log serialization +if(MG_ENTERPRISE) +add_unit_test(routing_table.cpp) +target_link_libraries(${test_prefix}routing_table gflags mg-coordination mg-repl_coord_glue) +target_include_directories(${test_prefix}routing_table PRIVATE ${CMAKE_SOURCE_DIR}/include) +endif() diff --git a/tests/unit/bolt_session.cpp b/tests/unit/bolt_session.cpp index f0f3ae14c..411e13e3d 100644 --- a/tests/unit/bolt_session.cpp +++ b/tests/unit/bolt_session.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -114,6 +114,14 @@ class TestSession final : public Session { bool Authenticate(const std::string & /*username*/, const std::string & /*password*/) override { return true; } +#ifdef MG_ENTERPRISE + auto Route(std::map const & /*routing*/, + std::vector const & /*bookmarks*/, + std::map const & /*extra*/) -> std::map override { + return {}; + } +#endif + std::optional GetServerNameForInit() override { return std::nullopt; } void Configure(const std::map &) override {} @@ -1027,104 +1035,115 @@ TEST(BoltSession, Noop) { } } -TEST(BoltSession, Route) { - // Memgraph does not support route message, but it handles it - { - SCOPED_TRACE("v1"); - INIT_VARS; +TEST(BoltSession, Route){{SCOPED_TRACE("v1"); +INIT_VARS; - ExecuteHandshake(input_stream, session, output); - ExecuteInit(input_stream, session, output); - ASSERT_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)), SessionException); - EXPECT_EQ(session.state_, State::Close); - } - { - SCOPED_TRACE("v4"); - INIT_VARS; +ExecuteHandshake(input_stream, session, output); +ExecuteInit(input_stream, session, output); +ASSERT_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route)), SessionException); +EXPECT_EQ(session.state_, State::Close); +} +#ifdef MG_ENTERPRISE +{ + SCOPED_TRACE("v4"); + INIT_VARS; - ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp); - ExecuteInit(input_stream, session, output, true); - ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route))); - static constexpr uint8_t expected_resp[] = { - 0x00 /*two bytes of chunk header, chunk contains 64 bytes of data*/, - 0x40, - 0xb1 /*TinyStruct1*/, - 0x7f /*Failure*/, - 0xa2 /*TinyMap with 2 items*/, - 0x84 /*TinyString with 4 chars*/, - 'c', - 'o', - 'd', - 'e', - 0x82 /*TinyString with 2 chars*/, - '6', - '6', - 0x87 /*TinyString with 7 chars*/, - 'm', - 'e', - 's', - 's', - 'a', - 'g', - 'e', - 0xd0 /*String*/, - 0x2b /*With 43 chars*/, - 'R', - 'o', - 'u', - 't', - 'e', - ' ', - 'm', - 'e', - 's', - 's', - 'a', - 'g', - 'e', - ' ', - 'i', - 's', - ' ', - 'n', - 'o', - 't', - ' ', - 's', - 'u', - 'p', - 'p', - 'o', - 'r', - 't', - 'e', - 'd', - ' ', - 'i', - 'n', - ' ', - 'M', - 'e', - 'm', - 'g', - 'r', - 'a', - 'p', - 'h', - '!', - 0x00 /*Terminating zeros*/, - 0x00, - }; - EXPECT_EQ(input_stream.size(), 0U); - CheckOutput(output, expected_resp, sizeof(expected_resp)); - EXPECT_EQ(session.state_, State::Error); + ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp); + ExecuteInit(input_stream, session, output, true); + ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route))); - SCOPED_TRACE("Try to reset connection after ROUTE failed"); - ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4::reset_req, sizeof(v4::reset_req))); - EXPECT_EQ(input_stream.size(), 0U); - CheckOutput(output, success_resp, sizeof(success_resp)); - EXPECT_EQ(session.state_, State::Idle); - } + EXPECT_EQ(session.state_, State::Idle); + CheckSuccessMessage(output); +} +#else +{ + SCOPED_TRACE("v4"); + INIT_VARS; + + ExecuteHandshake(input_stream, session, output, v4_3::handshake_req, v4_3::handshake_resp); + ExecuteInit(input_stream, session, output, true); + ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4_3::route, sizeof(v4_3::route))); + static constexpr uint8_t expected_resp[] = { + 0x00 /*two bytes of chunk header, chunk contains 64 bytes of data*/, + 0x40, + 0xb1 /*TinyStruct1*/, + 0x7f /*Failure*/, + 0xa2 /*TinyMap with 2 items*/, + 0x84 /*TinyString with 4 chars*/, + 'c', + 'o', + 'd', + 'e', + 0x82 /*TinyString with 2 chars*/, + '6', + '6', + 0x87 /*TinyString with 7 chars*/, + 'm', + 'e', + 's', + 's', + 'a', + 'g', + 'e', + 0xd0 /*String*/, + 0x2b /*With 43 chars*/, + 'R', + 'o', + 'u', + 't', + 'e', + ' ', + 'm', + 'e', + 's', + 's', + 'a', + 'g', + 'e', + ' ', + 'i', + 's', + ' ', + 'n', + 'o', + 't', + ' ', + 's', + 'u', + 'p', + 'p', + 'o', + 'r', + 't', + 'e', + 'd', + ' ', + 'i', + 'n', + ' ', + 'M', + 'e', + 'm', + 'g', + 'r', + 'a', + 'p', + 'h', + '!', + 0x00 /*Terminating zeros*/, + 0x00, + }; + EXPECT_EQ(input_stream.size(), 0U); + CheckOutput(output, expected_resp, sizeof(expected_resp)); + EXPECT_EQ(session.state_, State::Error); + + SCOPED_TRACE("Try to reset connection after ROUTE failed"); + ASSERT_NO_THROW(ExecuteCommand(input_stream, session, v4::reset_req, sizeof(v4::reset_req))); + EXPECT_EQ(input_stream.size(), 0U); + CheckOutput(output, success_resp, sizeof(success_resp)); + EXPECT_EQ(session.state_, State::Idle); +} +#endif } TEST(BoltSession, Rollback) { diff --git a/tests/unit/coordinator_cluster_state.cpp b/tests/unit/coordinator_cluster_state.cpp index 8df2797f2..e7ccf2ada 100644 --- a/tests/unit/coordinator_cluster_state.cpp +++ b/tests/unit/coordinator_cluster_state.cpp @@ -10,6 +10,7 @@ // licenses/APL.txt. #include "nuraft/coordinator_cluster_state.hpp" +#include "io/network/endpoint.hpp" #include "nuraft/coordinator_state_machine.hpp" #include "replication_coordination_glue/role.hpp" @@ -21,11 +22,12 @@ #include "libnuraft/nuraft.hxx" -using memgraph::coordination::CoordinatorClientConfig; using memgraph::coordination::CoordinatorClusterState; using memgraph::coordination::CoordinatorStateMachine; -using memgraph::coordination::InstanceState; +using memgraph::coordination::CoordinatorToReplicaConfig; using memgraph::coordination::RaftLogAction; +using memgraph::coordination::ReplicationInstanceState; +using memgraph::io::network::Endpoint; using memgraph::replication_coordination_glue::ReplicationMode; using memgraph::replication_coordination_glue::ReplicationRole; using nuraft::buffer; @@ -42,20 +44,22 @@ class CoordinatorClusterStateTest : public ::testing::Test { "MG_tests_unit_coordinator_cluster_state"}; }; -TEST_F(CoordinatorClusterStateTest, InstanceStateSerialization) { - InstanceState instance_state{ - CoordinatorClientConfig{"instance3", - "127.0.0.1", - 10112, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, - .ssl = std::nullopt}, +TEST_F(CoordinatorClusterStateTest, ReplicationInstanceStateSerialization) { + ReplicationInstanceState instance_state{ + CoordinatorToReplicaConfig{.instance_name = "instance3", + .mgt_server = Endpoint{"127.0.0.1", 10112}, + .bolt_server = Endpoint{"127.0.0.1", 7687}, + .replication_client_info = {.instance_name = "instance_name", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, + .ssl = std::nullopt}, ReplicationRole::MAIN}; nlohmann::json j = instance_state; - InstanceState deserialized_instance_state = j.get(); + ReplicationInstanceState deserialized_instance_state = j.get(); EXPECT_EQ(instance_state.config, deserialized_instance_state.config); EXPECT_EQ(instance_state.status, deserialized_instance_state.status); @@ -65,13 +69,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { auto coordinator_cluster_state = memgraph::coordination::CoordinatorClusterState{}; { - CoordinatorClientConfig config{"instance1", - "127.0.0.1", - 10111, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance1", + .mgt_server = Endpoint{"127.0.0.1", 10111}, + .bolt_server = Endpoint{"127.0.0.1", 7687}, + .replication_client_info = {.instance_name = "instance1", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -80,13 +87,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance2", - "127.0.0.1", - 10112, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10002}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance2", + .mgt_server = Endpoint{"127.0.0.1", 10112}, + .bolt_server = Endpoint{"127.0.0.1", 7688}, + .replication_client_info = {.instance_name = "instance2", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10002}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -95,13 +105,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance3", - "127.0.0.1", - 10113, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10003}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance3", + .mgt_server = Endpoint{"127.0.0.1", 10113}, + .bolt_server = Endpoint{"127.0.0.1", 7689}, + .replication_client_info = {.instance_name = "instance3", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10003}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -110,13 +123,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance4", - "127.0.0.1", - 10114, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10004}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance4", + .mgt_server = Endpoint{"127.0.0.1", 10114}, + .bolt_server = Endpoint{"127.0.0.1", 7690}, + .replication_client_info = {.instance_name = "instance4", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10004}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -125,13 +141,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance5", - "127.0.0.1", - 10115, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10005}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance5", + .mgt_server = Endpoint{"127.0.0.1", 10115}, + .bolt_server = Endpoint{"127.0.0.1", 7691}, + .replication_client_info = {.instance_name = "instance5", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10005}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -140,13 +159,16 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.DoAction(payload, action); } { - CoordinatorClientConfig config{"instance6", - "127.0.0.1", - 10116, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10006}, + auto config = + CoordinatorToReplicaConfig{.instance_name = "instance6", + .mgt_server = Endpoint{"127.0.0.1", 10116}, + .bolt_server = Endpoint{"127.0.0.1", 7692}, + .replication_client_info = {.instance_name = "instance6", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10006}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); @@ -159,5 +181,6 @@ TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { coordinator_cluster_state.Serialize(data); auto deserialized_coordinator_cluster_state = CoordinatorClusterState::Deserialize(*data); - ASSERT_EQ(coordinator_cluster_state.GetInstances(), deserialized_coordinator_cluster_state.GetInstances()); + ASSERT_EQ(coordinator_cluster_state.GetReplicationInstances(), + deserialized_coordinator_cluster_state.GetReplicationInstances()); } diff --git a/tests/unit/raft_log_serialization.cpp b/tests/unit/raft_log_serialization.cpp index 8550cf5b8..bda690855 100644 --- a/tests/unit/raft_log_serialization.cpp +++ b/tests/unit/raft_log_serialization.cpp @@ -9,7 +9,8 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" +#include "io/network/endpoint.hpp" #include "nuraft/coordinator_state_machine.hpp" #include "nuraft/raft_log_action.hpp" #include "utils/file.hpp" @@ -19,10 +20,11 @@ #include #include "json/json.hpp" -using memgraph::coordination::CoordinatorClientConfig; using memgraph::coordination::CoordinatorStateMachine; +using memgraph::coordination::CoordinatorToReplicaConfig; using memgraph::coordination::RaftLogAction; -using memgraph::coordination::ReplClientInfo; +using memgraph::coordination::ReplicationClientInfo; +using memgraph::io::network::Endpoint; using memgraph::replication_coordination_glue::ReplicationMode; using memgraph::utils::UUID; @@ -36,26 +38,29 @@ class RaftLogSerialization : public ::testing::Test { }; TEST_F(RaftLogSerialization, ReplClientInfo) { - ReplClientInfo info{"instance_name", ReplicationMode::SYNC, "127.0.0.1", 10111}; + ReplicationClientInfo info{.instance_name = "instance_name", + .replication_mode = ReplicationMode::SYNC, + .replication_server = Endpoint{"127.0.0.1", 10111}}; nlohmann::json j = info; - ReplClientInfo info2 = j.get(); + ReplicationClientInfo info2 = j.get(); ASSERT_EQ(info, info2); } -TEST_F(RaftLogSerialization, CoordinatorClientConfig) { - CoordinatorClientConfig config{"instance3", - "127.0.0.1", - 10112, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, - .ssl = std::nullopt}; +TEST_F(RaftLogSerialization, CoordinatorToReplicaConfig) { + CoordinatorToReplicaConfig config{.instance_name = "instance3", + .mgt_server = Endpoint{"127.0.0.1", 10112}, + .replication_client_info = {.instance_name = "instance_name", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, + .ssl = std::nullopt}; nlohmann::json j = config; - CoordinatorClientConfig config2 = j.get(); + CoordinatorToReplicaConfig config2 = j.get(); ASSERT_EQ(config, config2); } @@ -106,19 +111,20 @@ TEST_F(RaftLogSerialization, RaftLogActionUpdateUUID) { } TEST_F(RaftLogSerialization, RegisterInstance) { - CoordinatorClientConfig config{"instance3", - "127.0.0.1", - 10112, - std::chrono::seconds{1}, - std::chrono::seconds{5}, - std::chrono::seconds{10}, - {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, - .ssl = std::nullopt}; + CoordinatorToReplicaConfig config{.instance_name = "instance3", + .mgt_server = Endpoint{"127.0.0.1", 10112}, + .replication_client_info = {.instance_name = "instance_name", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}, + .instance_health_check_frequency_sec = std::chrono::seconds{1}, + .instance_down_timeout_sec = std::chrono::seconds{5}, + .instance_get_uuid_frequency_sec = std::chrono::seconds{10}, + .ssl = std::nullopt}; auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); ASSERT_EQ(action, RaftLogAction::REGISTER_REPLICATION_INSTANCE); - ASSERT_EQ(config, std::get(payload)); + ASSERT_EQ(config, std::get(payload)); } TEST_F(RaftLogSerialization, UnregisterInstance) { diff --git a/tests/unit/routing_table.cpp b/tests/unit/routing_table.cpp new file mode 100644 index 000000000..42815d461 --- /dev/null +++ b/tests/unit/routing_table.cpp @@ -0,0 +1,176 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "auth/auth.hpp" +#include "coordination/coordinator_instance.hpp" +#include "flags/run_time_configurable.hpp" +#include "interpreter_faker.hpp" +#include "io/network/endpoint.hpp" +#include "license/license.hpp" +#include "replication_handler/replication_handler.hpp" +#include "storage/v2/config.hpp" + +#include "utils/file.hpp" + +#include +#include +#include "json/json.hpp" + +using memgraph::coordination::CoordinatorInstance; +using memgraph::coordination::CoordinatorToCoordinatorConfig; +using memgraph::coordination::CoordinatorToReplicaConfig; +using memgraph::coordination::RaftState; +using memgraph::coordination::ReplicationClientInfo; +using memgraph::io::network::Endpoint; +using memgraph::replication::ReplicationHandler; +using memgraph::replication_coordination_glue::ReplicationMode; +using memgraph::storage::Config; + +// class MockCoordinatorInstance : CoordinatorInstance { +// auto AddCoordinatorInstance(CoordinatorToCoordinatorConfig const &config) -> void override {} +// }; + +class RoutingTableTest : public ::testing::Test { + protected: + std::filesystem::path main_data_directory{std::filesystem::temp_directory_path() / + "MG_tests_unit_coordinator_cluster_state"}; + std::filesystem::path repl1_data_directory{std::filesystem::temp_directory_path() / + "MG_test_unit_storage_v2_replication_repl"}; + std::filesystem::path repl2_data_directory{std::filesystem::temp_directory_path() / + "MG_test_unit_storage_v2_replication_repl2"}; + void SetUp() override { Clear(); } + + void TearDown() override { Clear(); } + + Config main_conf = [&] { + Config config{ + .durability = + { + .snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL, + }, + .salient.items = {.properties_on_edges = true}, + }; + UpdatePaths(config, main_data_directory); + return config; + }(); + Config repl1_conf = [&] { + Config config{ + .durability = + { + .snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL, + }, + .salient.items = {.properties_on_edges = true}, + }; + UpdatePaths(config, repl1_data_directory); + return config; + }(); + Config repl2_conf = [&] { + Config config{ + .durability = + { + .snapshot_wal_mode = Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL, + }, + .salient.items = {.properties_on_edges = true}, + }; + UpdatePaths(config, repl2_data_directory); + return config; + }(); + + const std::string local_host = ("127.0.0.1"); + const std::array ports{10000, 20000}; + const std::array replicas = {"REPLICA1", "REPLICA2"}; + + private: + void Clear() { + if (std::filesystem::exists(main_data_directory)) std::filesystem::remove_all(main_data_directory); + if (std::filesystem::exists(repl1_data_directory)) std::filesystem::remove_all(repl1_data_directory); + if (std::filesystem::exists(repl2_data_directory)) std::filesystem::remove_all(repl2_data_directory); + } +}; + +struct MinMemgraph { + MinMemgraph(const memgraph::storage::Config &conf) + : auth{conf.durability.storage_directory / "auth", memgraph::auth::Auth::Config{/* default */}}, + repl_state{ReplicationStateRootPath(conf)}, + dbms{conf, repl_state +#ifdef MG_ENTERPRISE + , + auth, true +#endif + }, + db_acc{dbms.Get()}, + db{*db_acc.get()}, + repl_handler(repl_state, dbms +#ifdef MG_ENTERPRISE + , + system_, auth +#endif + ) { + } + memgraph::auth::SynchedAuth auth; + memgraph::system::System system_; + memgraph::replication::ReplicationState repl_state; + memgraph::dbms::DbmsHandler dbms; + memgraph::dbms::DatabaseAccess db_acc; + memgraph::dbms::Database &db; + ReplicationHandler repl_handler; +}; +; + +TEST_F(RoutingTableTest, GetSingleRouterRoutingTable) { + CoordinatorInstance instance1; + auto routing = std::map{{"address", "localhost:7688"}}; + auto routing_table = instance1.GetRoutingTable(routing); + + ASSERT_EQ(routing_table.size(), 1); + + auto const routers = routing_table[0]; + ASSERT_EQ(routers.first, std::vector{"localhost:7688"}); + ASSERT_EQ(routers.second, "ROUTE"); +} + +TEST_F(RoutingTableTest, GetMixedRoutingTable) { + auto instance1 = RaftState::MakeRaftState([]() {}, []() {}); + auto routing = std::map{{"address", "localhost:7690"}}; + instance1.AppendRegisterReplicationInstanceLog(CoordinatorToReplicaConfig{ + .instance_name = "instance2", + .mgt_server = Endpoint{"127.0.0.1", 10011}, + .bolt_server = Endpoint{"127.0.0.1", 7687}, + .replication_client_info = ReplicationClientInfo{.instance_name = "instance2", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10001}}}); + instance1.GetAllCoordinators(); + // auto routing_table = instance1.GetRoutingTable(routing); + + // ASSERT_EQ(routing_table.size(), 1); + // auto const routers = routing_table[0]; + // ASSERT_EQ(routers.second, "ROUTE"); +} + +// TEST_F(RoutingTableTest, GetMultipleRoutersRoutingTable) { +// +// CoordinatorInstance instance1; +// instance1.AddCoordinatorInstance(CoordinatorToCoordinatorConfig{.coordinator_server_id = 1, +// .bolt_server = Endpoint{"127.0.0.1", 7689}, +// .coordinator_server = Endpoint{"127.0.0.1", +// 10111}}); +// +// auto routing = std::map{{"address", "localhost:7688"}}; +// auto routing_table = instance1.GetRoutingTable(routing); +// +// ASSERT_EQ(routing_table.size(), 1); +// +// auto const routers = routing_table[0]; +// ASSERT_EQ(routers.second, "ROUTE"); +// ASSERT_EQ(routers.first.size(), 2); +// auto const expected_routers = std::vector{"localhost:7689", "localhost:7688"}; +// ASSERT_EQ(routers.first, expected_routers); +// } diff --git a/tests/unit/slk_advanced.cpp b/tests/unit/slk_advanced.cpp index f41946388..46254746a 100644 --- a/tests/unit/slk_advanced.cpp +++ b/tests/unit/slk_advanced.cpp @@ -11,8 +11,9 @@ #include -#include "coordination/coordinator_config.hpp" +#include "coordination/coordinator_communication_config.hpp" #include "coordination/coordinator_slk.hpp" +#include "io/network/endpoint.hpp" #include "replication/config.hpp" #include "replication_coordination_glue/mode.hpp" #include "slk_common.hpp" @@ -20,6 +21,8 @@ #include "storage/v2/replication/slk.hpp" #include "storage/v2/temporal.hpp" +using memgraph::io::network::Endpoint; + TEST(SlkAdvanced, PropertyValueList) { std::vector original{ memgraph::storage::PropertyValue("hello world!"), @@ -119,24 +122,19 @@ TEST(SlkAdvanced, PropertyValueComplex) { } TEST(SlkAdvanced, ReplicationClientConfigs) { - using ReplicationClientInfo = memgraph::coordination::CoordinatorClientConfig::ReplicationClientInfo; + using ReplicationClientInfo = memgraph::coordination::ReplicationClientInfo; using ReplicationClientInfoVec = std::vector; using ReplicationMode = memgraph::replication_coordination_glue::ReplicationMode; ReplicationClientInfoVec original{ReplicationClientInfo{.instance_name = "replica1", .replication_mode = ReplicationMode::SYNC, - .replication_ip_address = "127.0.0.1", - .replication_port = 10000}, + .replication_server = Endpoint{"127.0.0.1", 10000}}, ReplicationClientInfo{.instance_name = "replica2", .replication_mode = ReplicationMode::ASYNC, - .replication_ip_address = "127.0.1.1", - .replication_port = 10010}, - ReplicationClientInfo{ - .instance_name = "replica3", - .replication_mode = ReplicationMode::ASYNC, - .replication_ip_address = "127.1.1.1", - .replication_port = 1110, - }}; + .replication_server = Endpoint{"127.0.0.1", 10010}}, + ReplicationClientInfo{.instance_name = "replica3", + .replication_mode = ReplicationMode::ASYNC, + .replication_server = Endpoint{"127.0.0.1", 10011}}}; memgraph::slk::Loopback loopback; auto builder = loopback.GetBuilder(); From 0913e951678260a0ede3ef8256085336998aba67 Mon Sep 17 00:00:00 2001 From: Andi Date: Thu, 21 Mar 2024 10:12:28 +0100 Subject: [PATCH 14/16] Rename HA startup flags (#1820) --- src/coordination/coordinator_state.cpp | 6 +- .../include/coordination/raft_state.hpp | 4 +- src/coordination/raft_state.cpp | 14 ++-- src/flags/replication.cpp | 6 +- src/flags/replication.hpp | 6 +- src/memgraph.cpp | 2 +- src/query/interpreter.cpp | 32 ++++---- src/query/interpreter.hpp | 2 +- src/replication/state.cpp | 2 +- .../replication_handler.hpp | 2 +- src/storage/v2/config.hpp | 2 +- .../v2/replication/replication_client.cpp | 2 +- tests/drivers/run_cluster.sh | 18 ++-- tests/e2e/configuration/default_config.py | 6 +- .../coord_cluster_registration.py | 18 ++-- .../disable_writing_on_main_after_restart.py | 18 ++-- .../high_availability/distributed_coords.py | 82 +++++++++---------- .../manual_setting_replicas.py | 2 +- .../not_replicate_from_old_main.py | 14 ++-- .../high_availability/single_coordinator.py | 56 ++++++------- tests/e2e/high_availability/workloads.yaml | 8 +- 21 files changed, 151 insertions(+), 151 deletions(-) diff --git a/src/coordination/coordinator_state.cpp b/src/coordination/coordinator_state.cpp index 149a9cb97..0d6ce17c4 100644 --- a/src/coordination/coordinator_state.cpp +++ b/src/coordination/coordinator_state.cpp @@ -25,15 +25,15 @@ namespace memgraph::coordination { CoordinatorState::CoordinatorState() { - MG_ASSERT(!(FLAGS_raft_server_id && FLAGS_coordinator_server_port), + MG_ASSERT(!(FLAGS_coordinator_id && FLAGS_management_port), "Instance cannot be a coordinator and have registered coordinator server."); spdlog::info("Executing coordinator constructor"); - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { spdlog::info("Coordinator server port set"); auto const config = ManagementServerConfig{ .ip_address = kDefaultReplicationServerIp, - .port = static_cast(FLAGS_coordinator_server_port), + .port = static_cast(FLAGS_management_port), }; spdlog::info("Executing coordinator constructor main replica"); diff --git a/src/coordination/include/coordination/raft_state.hpp b/src/coordination/include/coordination/raft_state.hpp index 6e322ab78..c4958a5ba 100644 --- a/src/coordination/include/coordination/raft_state.hpp +++ b/src/coordination/include/coordination/raft_state.hpp @@ -40,7 +40,7 @@ using raft_result = nuraft::cmd_result>; class RaftState { private: - explicit RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id, + explicit RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t coordinator_id, uint32_t raft_port, std::string raft_address); auto InitRaftServer() -> void; @@ -84,7 +84,7 @@ class RaftState { private: // TODO: (andi) I think variables below can be abstracted/clean them. io::network::Endpoint raft_endpoint_; - uint32_t raft_server_id_; + uint32_t coordinator_id_; ptr state_machine_; ptr state_manager_; diff --git a/src/coordination/raft_state.cpp b/src/coordination/raft_state.cpp index 6175fda4b..3c1cbd158 100644 --- a/src/coordination/raft_state.cpp +++ b/src/coordination/raft_state.cpp @@ -31,12 +31,12 @@ using nuraft::raft_server; using nuraft::srv_config; using raft_result = cmd_result>; -RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id, +RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t coordinator_id, uint32_t raft_port, std::string raft_address) : raft_endpoint_(raft_address, raft_port), - raft_server_id_(raft_server_id), + coordinator_id_(coordinator_id), state_machine_(cs_new()), - state_manager_(cs_new(raft_server_id_, raft_endpoint_.SocketAddress())), + state_manager_(cs_new(coordinator_id_, raft_endpoint_.SocketAddress())), logger_(nullptr), become_leader_cb_(std::move(become_leader_cb)), become_follower_cb_(std::move(become_follower_cb)) {} @@ -95,11 +95,11 @@ auto RaftState::InitRaftServer() -> void { } auto RaftState::MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerCb &&become_follower_cb) -> RaftState { - uint32_t raft_server_id = FLAGS_raft_server_id; - uint32_t raft_port = FLAGS_raft_server_port; + uint32_t coordinator_id = FLAGS_coordinator_id; + uint32_t raft_port = FLAGS_coordinator_port; auto raft_state = - RaftState(std::move(become_leader_cb), std::move(become_follower_cb), raft_server_id, raft_port, "127.0.0.1"); + RaftState(std::move(become_leader_cb), std::move(become_follower_cb), coordinator_id, raft_port, "127.0.0.1"); raft_state.InitRaftServer(); return raft_state; @@ -108,7 +108,7 @@ auto RaftState::MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerC RaftState::~RaftState() { launcher_.shutdown(); } auto RaftState::InstanceName() const -> std::string { - return fmt::format("coordinator_{}", std::to_string(raft_server_id_)); + return fmt::format("coordinator_{}", std::to_string(coordinator_id_)); } auto RaftState::RaftSocketAddress() const -> std::string { return raft_endpoint_.SocketAddress(); } diff --git a/src/flags/replication.cpp b/src/flags/replication.cpp index e6b71b942..3f8fd2400 100644 --- a/src/flags/replication.cpp +++ b/src/flags/replication.cpp @@ -13,11 +13,11 @@ #ifdef MG_ENTERPRISE // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_uint32(coordinator_server_port, 0, "Port on which coordinator servers will be started."); +DEFINE_uint32(management_port, 0, "Port on which coordinator servers will be started."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_uint32(raft_server_port, 0, "Port on which raft servers will be started."); +DEFINE_uint32(coordinator_port, 0, "Port on which raft servers will be started."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_uint32(raft_server_id, 0, "Unique ID of the raft server."); +DEFINE_uint32(coordinator_id, 0, "Unique ID of the raft server."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_uint32(instance_down_timeout_sec, 5, "Time duration after which an instance is considered down."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) diff --git a/src/flags/replication.hpp b/src/flags/replication.hpp index 0a4982f12..e0d1aff8c 100644 --- a/src/flags/replication.hpp +++ b/src/flags/replication.hpp @@ -15,11 +15,11 @@ #ifdef MG_ENTERPRISE // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DECLARE_uint32(coordinator_server_port); +DECLARE_uint32(management_port); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DECLARE_uint32(raft_server_port); +DECLARE_uint32(coordinator_port); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DECLARE_uint32(raft_server_id); +DECLARE_uint32(coordinator_id); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_uint32(instance_down_timeout_sec); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 9bf50131d..107cccb59 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -429,7 +429,7 @@ int main(int argc, char **argv) { #ifdef MG_ENTERPRISE // MAIN or REPLICA instance - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { memgraph::dbms::CoordinatorHandlers::Register(coordinator_state.GetCoordinatorServer(), replication_handler); MG_ASSERT(coordinator_state.GetCoordinatorServer().Start(), "Failed to start coordinator server!"); } diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 2fba0addb..87eccca87 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -474,7 +474,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } } - auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view bolt_server, + auto AddCoordinatorInstance(uint32_t coordinator_id, std::string_view bolt_server, std::string_view coordinator_server) -> void override { auto const maybe_coordinator_server = io::network::Endpoint::ParseSocketOrAddress(coordinator_server); if (!maybe_coordinator_server) { @@ -487,7 +487,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } auto const coord_coord_config = - coordination::CoordinatorToCoordinatorConfig{.coordinator_server_id = raft_server_id, + coordination::CoordinatorToCoordinatorConfig{.coordinator_server_id = coordinator_id, .bolt_server = *maybe_bolt_server, .coordinator_server = *maybe_coordinator_server}; @@ -942,10 +942,10 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & switch (repl_query->action_) { case ReplicationQuery::Action::SET_REPLICATION_ROLE: { #ifdef MG_ENTERPRISE - if (FLAGS_raft_server_id) { + if (FLAGS_coordinator_id) { throw QueryRuntimeException("Coordinator can't set roles!"); } - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { throw QueryRuntimeException("Can't set role manually on instance with coordinator server port."); } #endif @@ -972,7 +972,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::SHOW_REPLICATION_ROLE: { #ifdef MG_ENTERPRISE - if (FLAGS_raft_server_id) { + if (FLAGS_coordinator_id) { throw QueryRuntimeException("Coordinator doesn't have a replication role!"); } #endif @@ -993,7 +993,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::REGISTER_REPLICA: { #ifdef MG_ENTERPRISE - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { throw QueryRuntimeException("Can't register replica manually on instance with coordinator server port."); } #endif @@ -1014,7 +1014,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & case ReplicationQuery::Action::DROP_REPLICA: { #ifdef MG_ENTERPRISE - if (FLAGS_coordinator_server_port) { + if (FLAGS_management_port) { throw QueryRuntimeException("Can't drop replica manually on instance with coordinator server port."); } #endif @@ -1029,7 +1029,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::SHOW_REPLICAS: { #ifdef MG_ENTERPRISE - if (FLAGS_raft_server_id) { + if (FLAGS_coordinator_id) { throw QueryRuntimeException("Coordinator cannot call SHOW REPLICAS! Use SHOW INSTANCES instead."); } #endif @@ -1176,7 +1176,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param Callback callback; switch (coordinator_query->action_) { case CoordinatorQuery::Action::ADD_COORDINATOR_INSTANCE: { - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can add coordinator instance!"); } @@ -1220,7 +1220,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; } case CoordinatorQuery::Action::REGISTER_INSTANCE: { - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can register coordinator server!"); } // TODO: MemoryResource for EvaluationContext, it should probably be passed as @@ -1273,7 +1273,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; } case CoordinatorQuery::Action::UNREGISTER_INSTANCE: - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can register coordinator server!"); } callback.fn = [handler = CoordQueryHandler{*coordinator_state}, @@ -1288,7 +1288,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; case CoordinatorQuery::Action::SET_INSTANCE_TO_MAIN: { - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can register coordinator server!"); } // TODO: MemoryResource for EvaluationContext, it should probably be passed as @@ -1305,7 +1305,7 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param return callback; } case CoordinatorQuery::Action::SHOW_INSTANCES: { - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { throw QueryRuntimeException("Only coordinator can run SHOW INSTANCES."); } @@ -4281,7 +4281,7 @@ void Interpreter::RollbackTransaction() { #ifdef MG_ENTERPRISE auto Interpreter::Route(std::map const &routing) -> RouteResult { // TODO: (andi) Test - if (!FLAGS_raft_server_id) { + if (!FLAGS_coordinator_id) { auto const &address = routing.find("address"); if (address == routing.end()) { throw QueryException("Routing table must contain address field."); @@ -4417,7 +4417,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, } #ifdef MG_ENTERPRISE - if (FLAGS_raft_server_id && !utils::Downcast(parsed_query.query) && + if (FLAGS_coordinator_id && !utils::Downcast(parsed_query.query) && !utils::Downcast(parsed_query.query)) { throw QueryRuntimeException("Coordinator can run only coordinator queries!"); } @@ -4548,7 +4548,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, throw QueryException("Write query forbidden on the replica!"); } #ifdef MG_ENTERPRISE - if (FLAGS_coordinator_server_port && !interpreter_context_->repl_state->IsMainWriteable()) { + if (FLAGS_management_port && !interpreter_context_->repl_state->IsMainWriteable()) { query_execution = nullptr; throw QueryException( "Write query forbidden on the main! Coordinator needs to enable writing on main by sending RPC message."); diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 5d10a24de..b6cb869a4 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -160,7 +160,7 @@ class CoordinatorQueryHandler { virtual std::vector ShowInstances() const = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view bolt_server, + virtual auto AddCoordinatorInstance(uint32_t coordinator_id, std::string_view bolt_server, std::string_view coordinator_server) -> void = 0; }; #endif diff --git a/src/replication/state.cpp b/src/replication/state.cpp index 1155fdb51..2e00670ec 100644 --- a/src/replication/state.cpp +++ b/src/replication/state.cpp @@ -56,7 +56,7 @@ ReplicationState::ReplicationState(std::optional durabili } auto replication_data = std::move(fetched_replication_data).GetValue(); #ifdef MG_ENTERPRISE - if (FLAGS_coordinator_server_port && std::holds_alternative(replication_data)) { + if (FLAGS_management_port && std::holds_alternative(replication_data)) { spdlog::trace("Restarted replication uuid for replica"); std::get(replication_data).uuid_.reset(); } diff --git a/src/replication_handler/include/replication_handler/replication_handler.hpp b/src/replication_handler/include/replication_handler/replication_handler.hpp index e1da19bfa..452ccce19 100644 --- a/src/replication_handler/include/replication_handler/replication_handler.hpp +++ b/src/replication_handler/include/replication_handler/replication_handler.hpp @@ -213,7 +213,7 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler { // We force sync replicas in other situation if (state == storage::replication::ReplicaState::DIVERGED_FROM_MAIN) { #ifdef MG_ENTERPRISE - return FLAGS_coordinator_server_port != 0; + return FLAGS_management_port != 0; #else return false; #endif diff --git a/src/storage/v2/config.hpp b/src/storage/v2/config.hpp index 419f29b85..2d06ffe0d 100644 --- a/src/storage/v2/config.hpp +++ b/src/storage/v2/config.hpp @@ -132,7 +132,7 @@ struct Config { inline auto ReplicationStateRootPath(memgraph::storage::Config const &config) -> std::optional { if (!config.durability.restore_replication_state_on_startup #ifdef MG_ENTERPRISE - && !FLAGS_coordinator_server_port + && !FLAGS_management_port #endif ) { spdlog::warn( diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index ee1394fdb..008d4b619 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -92,7 +92,7 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce client_name, client_name, client_name); }; #ifdef MG_ENTERPRISE - if (!FLAGS_coordinator_server_port) { + if (!FLAGS_management_port) { log_error(); return; } diff --git a/tests/drivers/run_cluster.sh b/tests/drivers/run_cluster.sh index b5f75f2ef..6931c082b 100755 --- a/tests/drivers/run_cluster.sh +++ b/tests/drivers/run_cluster.sh @@ -35,7 +35,7 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/instance1.log \ --also-log-to-stderr \ - --coordinator-server-port=10011 \ + --management-port=10011 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_instance_1=$! @@ -51,7 +51,7 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/instance2.log \ --also-log-to-stderr \ - --coordinator-server-port=10012 \ + --management-port=10012 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_instance_2=$! @@ -67,7 +67,7 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/instance3.log \ --also-log-to-stderr \ - --coordinator-server-port=10013 \ + --management-port=10013 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_instance_3=$! @@ -84,8 +84,8 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/coordinator1.log \ --also-log-to-stderr \ - --raft-server-id=1 \ - --raft-server-port=10111 \ + --coordinator-id=1 \ + --coordinator-port=10111 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_coordinator_1=$! @@ -101,8 +101,8 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/coordinator2.log \ --also-log-to-stderr \ - --raft-server-id=2 \ - --raft-server-port=10112 \ + --coordinator-id=2 \ + --coordinator-port=10112 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_coordinator_2=$! @@ -118,8 +118,8 @@ $binary_dir/memgraph \ --bolt-cert-file="" \ --log-file=$tmpdir/logs/coordinator3.log \ --also-log-to-stderr \ - --raft-server-id=3 \ - --raft-server-port=10113 \ + --coordinator-id=3 \ + --coordinator-port=10113 \ --experimental-enabled=high-availability \ --log-level ERROR & pid_coordinator_3=$! diff --git a/tests/e2e/configuration/default_config.py b/tests/e2e/configuration/default_config.py index 11435da65..d2ba5c279 100644 --- a/tests/e2e/configuration/default_config.py +++ b/tests/e2e/configuration/default_config.py @@ -59,9 +59,9 @@ startup_config_dict = { "Time in seconds after which inactive Bolt sessions will be closed.", ), "cartesian_product_enabled": ("true", "true", "Enable cartesian product expansion."), - "coordinator_server_port": ("0", "0", "Port on which coordinator servers will be started."), - "raft_server_port": ("0", "0", "Port on which raft servers will be started."), - "raft_server_id": ("0", "0", "Unique ID of the raft server."), + "management_port": ("0", "0", "Port on which coordinator servers will be started."), + "coordinator_port": ("0", "0", "Port on which raft servers will be started."), + "coordinator_id": ("0", "0", "Unique ID of the raft server."), "instance_down_timeout_sec": ("5", "5", "Time duration after which an instance is considered down."), "instance_health_check_frequency_sec": ("1", "1", "The time duration between two health checks/pings."), "instance_get_uuid_frequency_sec": ("10", "10", "The time duration between two instance uuid checks."), diff --git a/tests/e2e/high_availability/coord_cluster_registration.py b/tests/e2e/high_availability/coord_cluster_registration.py index 89279b23d..16f91214d 100644 --- a/tests/e2e/high_availability/coord_cluster_registration.py +++ b/tests/e2e/high_availability/coord_cluster_registration.py @@ -36,7 +36,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -50,7 +50,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -64,7 +64,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -77,8 +77,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -89,8 +89,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -101,8 +101,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "setup_queries": [], diff --git a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py index e61eb4eb8..66264fe0d 100644 --- a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py +++ b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py @@ -36,7 +36,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--also-log-to-stderr", "--instance-health-check-frequency-sec", @@ -55,7 +55,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--also-log-to-stderr", "--instance-health-check-frequency-sec", @@ -74,7 +74,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--also-log-to-stderr", "--instance-health-check-frequency-sec", @@ -92,8 +92,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -104,8 +104,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -116,8 +116,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", "--also-log-to-stderr", ], "log_file": "coordinator3.log", diff --git a/tests/e2e/high_availability/distributed_coords.py b/tests/e2e/high_availability/distributed_coords.py index 59e083545..b863ca519 100644 --- a/tests/e2e/high_availability/distributed_coords.py +++ b/tests/e2e/high_availability/distributed_coords.py @@ -40,7 +40,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -54,7 +54,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -68,7 +68,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -81,8 +81,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -93,8 +93,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -105,8 +105,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "setup_queries": [ @@ -130,7 +130,7 @@ def get_instances_description_no_setup(): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -144,7 +144,7 @@ def get_instances_description_no_setup(): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -158,7 +158,7 @@ def get_instances_description_no_setup(): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -171,8 +171,8 @@ def get_instances_description_no_setup(): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "data_directory": f"{TEMP_DIR}/coordinator_1", @@ -184,8 +184,8 @@ def get_instances_description_no_setup(): "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "data_directory": f"{TEMP_DIR}/coordinator_2", @@ -197,8 +197,8 @@ def get_instances_description_no_setup(): "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "data_directory": f"{TEMP_DIR}/coordinator_3", @@ -640,7 +640,7 @@ def test_registering_4_coords(): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -654,7 +654,7 @@ def test_registering_4_coords(): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -668,7 +668,7 @@ def test_registering_4_coords(): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -681,8 +681,8 @@ def test_registering_4_coords(): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -693,8 +693,8 @@ def test_registering_4_coords(): "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -705,8 +705,8 @@ def test_registering_4_coords(): "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "setup_queries": [], @@ -717,8 +717,8 @@ def test_registering_4_coords(): "--bolt-port", "7693", "--log-level=TRACE", - "--raft-server-id=4", - "--raft-server-port=10114", + "--coordinator-id=4", + "--coordinator-port=10114", ], "log_file": "coordinator4.log", "setup_queries": [ @@ -775,7 +775,7 @@ def test_registering_coord_log_store(): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -789,7 +789,7 @@ def test_registering_coord_log_store(): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -803,7 +803,7 @@ def test_registering_coord_log_store(): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -816,8 +816,8 @@ def test_registering_coord_log_store(): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator1.log", "setup_queries": [], @@ -828,8 +828,8 @@ def test_registering_coord_log_store(): "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=2", - "--raft-server-port=10112", + "--coordinator-id=2", + "--coordinator-port=10112", ], "log_file": "coordinator2.log", "setup_queries": [], @@ -840,8 +840,8 @@ def test_registering_coord_log_store(): "--bolt-port", "7692", "--log-level=TRACE", - "--raft-server-id=3", - "--raft-server-port=10113", + "--coordinator-id=3", + "--coordinator-port=10113", ], "log_file": "coordinator3.log", "setup_queries": [], @@ -852,8 +852,8 @@ def test_registering_coord_log_store(): "--bolt-port", "7693", "--log-level=TRACE", - "--raft-server-id=4", - "--raft-server-port=10114", + "--coordinator-id=4", + "--coordinator-port=10114", ], "log_file": "coordinator4.log", "setup_queries": [ @@ -911,7 +911,7 @@ def test_registering_coord_log_store(): bolt_port = f"--bolt-port={bolt_port_id}" - manag_server_port = f"--coordinator-server-port={manag_port_id}" + manag_server_port = f"--management-port={manag_port_id}" args_desc.append(bolt_port) args_desc.append(manag_server_port) diff --git a/tests/e2e/high_availability/manual_setting_replicas.py b/tests/e2e/high_availability/manual_setting_replicas.py index b0b0965bc..02d0ea4e9 100644 --- a/tests/e2e/high_availability/manual_setting_replicas.py +++ b/tests/e2e/high_availability/manual_setting_replicas.py @@ -31,7 +31,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "main.log", diff --git a/tests/e2e/high_availability/not_replicate_from_old_main.py b/tests/e2e/high_availability/not_replicate_from_old_main.py index d9729f650..3e328a544 100644 --- a/tests/e2e/high_availability/not_replicate_from_old_main.py +++ b/tests/e2e/high_availability/not_replicate_from_old_main.py @@ -153,7 +153,7 @@ def test_not_replicate_old_main_register_new_cluster(): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", ], "log_file": "instance_1.log", @@ -167,7 +167,7 @@ def test_not_replicate_old_main_register_new_cluster(): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", ], "log_file": "instance_2.log", @@ -180,8 +180,8 @@ def test_not_replicate_old_main_register_new_cluster(): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -220,7 +220,7 @@ def test_not_replicate_old_main_register_new_cluster(): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", ], "log_file": "instance_3.log", @@ -233,8 +233,8 @@ def test_not_replicate_old_main_register_new_cluster(): "--bolt-port", "7691", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10112", + "--coordinator-id=1", + "--coordinator-port=10112", ], "log_file": "coordinator.log", "setup_queries": [], diff --git a/tests/e2e/high_availability/single_coordinator.py b/tests/e2e/high_availability/single_coordinator.py index 1d839b4fc..6582ddfec 100644 --- a/tests/e2e/high_availability/single_coordinator.py +++ b/tests/e2e/high_availability/single_coordinator.py @@ -35,7 +35,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup=true", "--storage-recover-on-startup=false", @@ -52,7 +52,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup=true", "--storage-recover-on-startup=false", @@ -69,7 +69,7 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup=true", "--storage-recover-on-startup=false", @@ -85,8 +85,8 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -126,7 +126,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup", "true", @@ -144,7 +144,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup", "true", @@ -162,7 +162,7 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup", "true", @@ -180,8 +180,8 @@ def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recov "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -337,7 +337,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup", "true", @@ -355,7 +355,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup", "true", @@ -373,7 +373,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup", "true", @@ -392,7 +392,7 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "7691", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10014", "--replication-restore-state-on-startup", "true", @@ -410,8 +410,8 @@ def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_r "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -624,7 +624,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup", "true", @@ -642,7 +642,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup", "true", @@ -660,7 +660,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup", "true", @@ -679,7 +679,7 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "7691", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10014", "--replication-restore-state-on-startup", "true", @@ -697,8 +697,8 @@ def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_reco "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ @@ -911,7 +911,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "7688", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10011", "--replication-restore-state-on-startup", "true", @@ -929,7 +929,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "7689", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10012", "--replication-restore-state-on-startup", "true", @@ -947,7 +947,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "7687", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10013", "--replication-restore-state-on-startup", "true", @@ -966,7 +966,7 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "7691", "--log-level", "TRACE", - "--coordinator-server-port", + "--management-port", "10014", "--replication-restore-state-on-startup", "true", @@ -984,8 +984,8 @@ def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): "--bolt-port", "7690", "--log-level=TRACE", - "--raft-server-id=1", - "--raft-server-port=10111", + "--coordinator-id=1", + "--coordinator-port=10111", ], "log_file": "coordinator.log", "setup_queries": [ diff --git a/tests/e2e/high_availability/workloads.yaml b/tests/e2e/high_availability/workloads.yaml index aaf76fc6b..9d3bd3126 100644 --- a/tests/e2e/high_availability/workloads.yaml +++ b/tests/e2e/high_availability/workloads.yaml @@ -1,19 +1,19 @@ ha_cluster: &ha_cluster cluster: replica_1: - args: ["--experimental-enabled=high-availability", "--bolt-port", "7688", "--log-level=TRACE", "--coordinator-server-port=10011"] + args: ["--experimental-enabled=high-availability", "--bolt-port", "7688", "--log-level=TRACE", "--management-port=10011"] log_file: "replication-e2e-replica1.log" setup_queries: [] replica_2: - args: ["--experimental-enabled=high-availability", "--bolt-port", "7689", "--log-level=TRACE", "--coordinator-server-port=10012"] + args: ["--experimental-enabled=high-availability", "--bolt-port", "7689", "--log-level=TRACE", "--management-port=10012"] log_file: "replication-e2e-replica2.log" setup_queries: [] main: - args: ["--experimental-enabled=high-availability", "--bolt-port", "7687", "--log-level=TRACE", "--coordinator-server-port=10013"] + args: ["--experimental-enabled=high-availability", "--bolt-port", "7687", "--log-level=TRACE", "--management-port=10013"] log_file: "replication-e2e-main.log" setup_queries: [] coordinator: - args: ["--experimental-enabled=high-availability", "--bolt-port", "7690", "--log-level=TRACE", "--raft-server-id=1", "--raft-server-port=10111"] + args: ["--experimental-enabled=high-availability", "--bolt-port", "7690", "--log-level=TRACE", "--coordinator-id=1", "--coordinator-port=10111"] log_file: "replication-e2e-coordinator.log" setup_queries: [ "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", From a3d2474c5b68a5bbef60667caa8c9e4829a37479 Mon Sep 17 00:00:00 2001 From: DavIvek Date: Thu, 21 Mar 2024 11:50:55 +0100 Subject: [PATCH 15/16] Fix timestamps saving on-disk (#1811) --- src/storage/v2/disk/durable_metadata.cpp | 2 +- src/storage/v2/disk/durable_metadata.hpp | 2 +- src/storage/v2/disk/storage.cpp | 7 +++--- src/storage/v2/disk/storage.hpp | 2 ++ tests/unit/clearing_old_disk_data.cpp | 32 ++++++++++++++++++++++++ 5 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/storage/v2/disk/durable_metadata.cpp b/src/storage/v2/disk/durable_metadata.cpp index 13d515af2..c1f44a587 100644 --- a/src/storage/v2/disk/durable_metadata.cpp +++ b/src/storage/v2/disk/durable_metadata.cpp @@ -42,7 +42,7 @@ DurableMetadata::DurableMetadata(const Config &config) DurableMetadata::DurableMetadata(DurableMetadata &&other) noexcept : durability_kvstore_(std::move(other.durability_kvstore_)), config_(std::move(other.config_)) {} -void DurableMetadata::SaveBeforeClosingDB(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count) { +void DurableMetadata::UpdateMetaData(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count) { durability_kvstore_.Put(kLastTransactionStartTimeStamp, std::to_string(timestamp)); durability_kvstore_.Put(kVertexCountDescr, std::to_string(vertex_count)); durability_kvstore_.Put(kEdgeDountDescr, std::to_string(edge_count)); diff --git a/src/storage/v2/disk/durable_metadata.hpp b/src/storage/v2/disk/durable_metadata.hpp index 4aaa8a707..06a26ac15 100644 --- a/src/storage/v2/disk/durable_metadata.hpp +++ b/src/storage/v2/disk/durable_metadata.hpp @@ -41,7 +41,7 @@ class DurableMetadata { std::optional> LoadExistenceConstraintInfoIfExists() const; std::optional> LoadUniqueConstraintInfoIfExists() const; - void SaveBeforeClosingDB(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count); + void UpdateMetaData(uint64_t timestamp, uint64_t vertex_count, uint64_t edge_count); bool PersistLabelIndexCreation(LabelId label); diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index 4dbd248f7..9aa6613c7 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -274,8 +274,8 @@ DiskStorage::DiskStorage(Config config) } DiskStorage::~DiskStorage() { - durable_metadata_.SaveBeforeClosingDB(timestamp_, vertex_count_.load(std::memory_order_acquire), - edge_count_.load(std::memory_order_acquire)); + durable_metadata_.UpdateMetaData(timestamp_, vertex_count_.load(std::memory_order_acquire), + edge_count_.load(std::memory_order_acquire)); logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->vertex_chandle)); logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->edge_chandle)); logging::AssertRocksDBStatus(kvstore_->db_->DestroyColumnFamilyHandle(kvstore_->out_edges_chandle)); @@ -1786,7 +1786,8 @@ utils::BasicResult DiskStorage::DiskAccessor::Co if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { disk_storage->indices_.text_index_.Commit(); } - + disk_storage->durable_metadata_.UpdateMetaData(disk_storage->timestamp_, disk_storage->vertex_count_, + disk_storage->edge_count_); is_transaction_active_ = false; return {}; diff --git a/src/storage/v2/disk/storage.hpp b/src/storage/v2/disk/storage.hpp index 349a7454a..74f4f4136 100644 --- a/src/storage/v2/disk/storage.hpp +++ b/src/storage/v2/disk/storage.hpp @@ -301,6 +301,8 @@ class DiskStorage final : public Storage { EdgeImportMode GetEdgeImportMode() const; + DurableMetadata *GetDurableMetadata() { return &durable_metadata_; } + private: void LoadPersistingMetadataInfo(); diff --git a/tests/unit/clearing_old_disk_data.cpp b/tests/unit/clearing_old_disk_data.cpp index 395391e12..58682a845 100644 --- a/tests/unit/clearing_old_disk_data.cpp +++ b/tests/unit/clearing_old_disk_data.cpp @@ -179,3 +179,35 @@ TEST_F(ClearingOldDiskDataTest, TestNumOfEntriesWithEdgeValueUpdate) { ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 5); } + +TEST_F(ClearingOldDiskDataTest, TestTimestampAfterCommit) { + auto *tx_db = disk_storage->GetRocksDBStorage()->db_; + ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 0); + + auto acc1 = disk_storage->Access(ReplicationRole::MAIN); + auto vertex1 = acc1->CreateVertex(); + auto label1 = acc1->NameToLabel("DiskLabel"); + auto property1 = acc1->NameToProperty("DiskProperty"); + ASSERT_TRUE(vertex1.AddLabel(label1).HasValue()); + ASSERT_TRUE(vertex1.SetProperty(property1, memgraph::storage::PropertyValue(10)).HasValue()); + ASSERT_FALSE(acc1->Commit().HasError()); + ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 1); + + auto saved_timestamp = disk_storage->GetDurableMetadata()->LoadTimestampIfExists(); + ASSERT_EQ(saved_timestamp.has_value(), true); + ASSERT_EQ(disk_storage->timestamp_, saved_timestamp); + + auto acc2 = disk_storage->Access(ReplicationRole::MAIN); + auto vertex2 = acc2->CreateVertex(); + auto label2 = acc2->NameToLabel("DiskLabel2"); + auto property2 = acc2->NameToProperty("DiskProperty2"); + + ASSERT_TRUE(vertex2.AddLabel(label2).HasValue()); + ASSERT_TRUE(vertex2.SetProperty(property2, memgraph::storage::PropertyValue(10)).HasValue()); + ASSERT_FALSE(acc2->Commit().HasError()); + ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 2); + + saved_timestamp = disk_storage->GetDurableMetadata()->LoadTimestampIfExists(); + ASSERT_EQ(saved_timestamp.has_value(), true); + ASSERT_EQ(disk_storage->timestamp_, saved_timestamp); +} From 56be736d30fc9df48b840e421a8ce7afea997947 Mon Sep 17 00:00:00 2001 From: DavIvek Date: Thu, 21 Mar 2024 13:34:59 +0100 Subject: [PATCH 16/16] Fix and update mgbench (#1838) --- tests/mgbench/benchmark.py | 6 +- tests/mgbench/graph_bench.py | 2 - tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh | 99 +++++++++++++++++++ tests/mgbench/runners.py | 2 +- tests/mgbench/workloads/base.py | 30 +++--- tests/mgbench/workloads/disk_pokec.py | 52 +++++++--- .../importers/disk_importer_pokec.py | 2 +- tests/mgbench/workloads/pokec.py | 38 ++++++- 8 files changed, 195 insertions(+), 36 deletions(-) create mode 100644 tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh diff --git a/tests/mgbench/benchmark.py b/tests/mgbench/benchmark.py index cd3fb846f..9c8f1a7d2 100755 --- a/tests/mgbench/benchmark.py +++ b/tests/mgbench/benchmark.py @@ -632,10 +632,12 @@ def run_isolated_workload_without_authorization(vendor_runner, client, queries, def setup_indices_and_import_dataset(client, vendor_runner, generated_queries, workload, storage_mode): - vendor_runner.start_db_init(VENDOR_RUNNER_IMPORT) + if benchmark_context.vendor_name == "memgraph": + # Neo4j will get started just before import -> without this if statement it would try to start it twice + vendor_runner.start_db_init(VENDOR_RUNNER_IMPORT) log.info("Executing database index setup") start_time = time.time() - + import_results = None if generated_queries: client.execute(queries=workload.indexes_generator(), num_workers=1) log.info("Finished setting up indexes.") diff --git a/tests/mgbench/graph_bench.py b/tests/mgbench/graph_bench.py index f329cfcb7..bcba55324 100644 --- a/tests/mgbench/graph_bench.py +++ b/tests/mgbench/graph_bench.py @@ -127,8 +127,6 @@ def run_full_benchmarks( ], ] - assert not realistic or not mixed, "Cannot run both realistic and mixed workload, please select one!" - if realistic: # Configurations for full workload for count, write, read, update, analytical in realistic: diff --git a/tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh b/tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh new file mode 100644 index 000000000..0381448fa --- /dev/null +++ b/tests/mgbench/mg_ondisk_vs_neo4j_pokec.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# Currently only pokec dataset is modified to be used with memgraph on-disk storage + +pushd () { command pushd "$@" > /dev/null; } +popd () { command popd "$@" > /dev/null; } +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +pushd "$SCRIPT_DIR" + +# Help function +function show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " -n, --neo4j-path Path to Neo4j binary" + echo " -m, --memgraph-path Path to Memgraph binary" + echo " -w, --num-workers Number of workers for benchmark and import" + echo " -d, --dataset_size dataset_size (small, medium, large)" + echo " -h, --help Show this help message" + exit 0 +} + +# Default values +neo4j_path="/usr/share/neo4j/bin/neo4j" +memgraph_path="../../build/memgraph" +num_workers=12 +dataset_size="small" + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + -n|--neo4j-path) + neo4j_path="$2" + shift + shift + ;; + -m|--memgraph-path) + memgraph_path="$2" + shift + shift + ;; + -w|--num-workers) + num_workers="$2" + shift + shift + ;; + -d|--dataset_size) + dataset_size="$2" + shift + shift + ;; + -h|--help) + show_help + ;; + *) + echo "Invalid option: $1" + show_help + ;; + esac +done + +if [ ! -d "pokec_${dataset_size}_results" ]; then + mkdir "pokec_${dataset_size}_results" +fi + +# Run Python: Mgbench - Neo4j +echo "Running Python: Mgbench - Neo4j" +python3 benchmark.py vendor-native \ + --vendor-binary "$neo4j_path" \ + --vendor-name neo4j \ + --num-workers-for-benchmark "$num_workers" \ + --num-workers-for-import "$num_workers" \ + --no-load-query-counts \ + --export-results "pokec_${dataset_size}_results/neo4j_${dataset_size}_pokec.json" \ + "pokec_disk/${dataset_size}/*/*" \ + --vendor-specific "config=$neo4j_path/conf/neo4j.conf" \ + --no-authorization + +# Run Python: Mgbench - Memgraph - on-disk +echo "Running Python: Mgbench - Memgraph - on-disk" +python3 benchmark.py vendor-native \ + --vendor-binary "$memgraph_path" \ + --vendor-name memgraph \ + --num-workers-for-benchmark "$num_workers" \ + --num-workers-for-import "$num_workers" \ + --no-load-query-counts \ + --export-results-on-disk-txn "pokec_${dataset_size}_results/on_disk_${dataset_size}_pokec.json" \ + --export-results "pokec_${dataset_size}_results/on_disk_export_${dataset_size}_pokec.json" \ + "pokec_disk/${dataset_size}/*/*" \ + --no-authorization \ + --vendor-specific "data-directory=benchmark_datadir" "storage-mode=ON_DISK_TRANSACTIONAL" + +echo "Comparing results" +python3 compare_results.py --compare \ + "pokec_${dataset_size}_results/neo4j_${dataset_size}_pokec.json" \ + "pokec_${dataset_size}_results/on_disk_${dataset_size}_pokec.json" \ + --output \ + "pokec_${dataset_size}_results/neo4j_vs_mg_ondisk_results.html" \ + --different-vendors diff --git a/tests/mgbench/runners.py b/tests/mgbench/runners.py index 155ceac06..005bcb60f 100644 --- a/tests/mgbench/runners.py +++ b/tests/mgbench/runners.py @@ -634,7 +634,7 @@ class Neo4j(BaseRunner): exit_proc = subprocess.run(args=[self._neo4j_binary, "stop"], capture_output=True, check=True) return exit_proc.returncode, usage else: - return 0 + return 0, 0 def start_db_init(self, workload): if self._performance_tracking: diff --git a/tests/mgbench/workloads/base.py b/tests/mgbench/workloads/base.py index 5264dcba9..ab4c21059 100644 --- a/tests/mgbench/workloads/base.py +++ b/tests/mgbench/workloads/base.py @@ -160,12 +160,7 @@ class Workload(ABC): raise ValueError("Vendor does not have INDEX for dataset!") def _set_local_files(self) -> None: - if not self.disk_workload: - if self.LOCAL_FILE is not None: - self._local_file = self.LOCAL_FILE.get(self._variant, None) - else: - self._local_file = None - else: + if self.disk_workload and self._vendor != "neo4j": if self.LOCAL_FILE_NODES is not None: self._local_file_nodes = self.LOCAL_FILE_NODES.get(self._variant, None) else: @@ -175,14 +170,14 @@ class Workload(ABC): self._local_file_edges = self.LOCAL_FILE_EDGES.get(self._variant, None) else: self._local_file_edges = None + else: + if self.LOCAL_FILE is not None: + self._local_file = self.LOCAL_FILE.get(self._variant, None) + else: + self._local_file = None def _set_url_files(self) -> None: - if not self.disk_workload: - if self.URL_FILE is not None: - self._url_file = self.URL_FILE.get(self._variant, None) - else: - self._url_file = None - else: + if self.disk_workload and self._vendor != "neo4j": if self.URL_FILE_NODES is not None: self._url_file_nodes = self.URL_FILE_NODES.get(self._variant, None) else: @@ -191,6 +186,11 @@ class Workload(ABC): self._url_file_edges = self.URL_FILE_EDGES.get(self._variant, None) else: self._url_file_edges = None + else: + if self.URL_FILE is not None: + self._url_file = self.URL_FILE.get(self._variant, None) + else: + self._url_file = None def _set_local_index_file(self) -> None: if self.LOCAL_INDEX_FILE is not None: @@ -205,10 +205,10 @@ class Workload(ABC): self._url_index = None def prepare(self, directory): - if not self.disk_workload: - self._prepare_dataset_for_in_memory_workload(directory) - else: + if self.disk_workload and self._vendor != "neo4j": self._prepare_dataset_for_on_disk_workload(directory) + else: + self._prepare_dataset_for_in_memory_workload(directory) if self._local_index is not None: print("Using local index file:", self._local_index) diff --git a/tests/mgbench/workloads/disk_pokec.py b/tests/mgbench/workloads/disk_pokec.py index f19110a0c..a296e4836 100644 --- a/tests/mgbench/workloads/disk_pokec.py +++ b/tests/mgbench/workloads/disk_pokec.py @@ -13,7 +13,8 @@ import random from benchmark_context import BenchmarkContext from workloads.base import Workload -from workloads.importers.disk_importer_pokec import ImporterPokec +from workloads.importers.disk_importer_pokec import DiskImporterPokec +from workloads.importers.importer_pokec import ImporterPokec class Pokec(Workload): @@ -22,6 +23,12 @@ class Pokec(Workload): DEFAULT_VARIANT = "small" FILE = None + URL_FILE = { + "small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_small_import.cypher", + "medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_medium_import.cypher", + "large": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_large.setup.cypher.gz", + } + URL_FILE_NODES = { "small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_small_import_nodes.cypher", "medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/pokec_medium_import_nodes.cypher", @@ -42,7 +49,7 @@ class Pokec(Workload): URL_INDEX_FILE = { "memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/memgraph.cypher", - "neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec_disk/benchmark/neo4j.cypher", + "neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/neo4j.cypher", } PROPERTIES_ON_EDGES = False @@ -51,15 +58,26 @@ class Pokec(Workload): super().__init__(variant, benchmark_context=benchmark_context, disk_workload=True) def custom_import(self) -> bool: - importer = ImporterPokec( - benchmark_context=self.benchmark_context, - dataset_name=self.NAME, - index_file=self._file_index, - dataset_nodes_file=self._node_file, - dataset_edges_file=self._edge_file, - variant=self._variant, - ) - return importer.execute_import() + if self._vendor == "neo4j": + importer = ImporterPokec( + benchmark_context=self.benchmark_context, + dataset_name=self.NAME, + index_file=self._file_index, + dataset_file=self._file, + variant=self._variant, + ) + return importer.execute_import() + + else: + importer = DiskImporterPokec( + benchmark_context=self.benchmark_context, + dataset_name=self.NAME, + index_file=self._file_index, + dataset_nodes_file=self._node_file, + dataset_edges_file=self._edge_file, + variant=self._variant, + ) + return importer.execute_import() # Helpers used to generate the queries def _get_random_vertex(self): @@ -214,12 +232,22 @@ class Pokec(Workload): # OK def benchmark__arango__allshortest_paths(self): vertex_from, vertex_to = self._get_random_from_to() - return ( + memgraph = ( "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) " "RETURN extract(n in nodes(p) | n.id) AS path", {"from": vertex_from, "to": vertex_to}, ) + neo4j = ( + "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " + "MATCH p = allShortestPaths((n)-[*..2]->(m)) " + "RETURN [node in nodes(p) | node.id] AS path", + {"from": vertex_from, "to": vertex_to}, + ) + if self._vendor == "neo4j": + return neo4j + else: + return memgraph # Our benchmark queries diff --git a/tests/mgbench/workloads/importers/disk_importer_pokec.py b/tests/mgbench/workloads/importers/disk_importer_pokec.py index 560d7da9e..f487dc8f3 100644 --- a/tests/mgbench/workloads/importers/disk_importer_pokec.py +++ b/tests/mgbench/workloads/importers/disk_importer_pokec.py @@ -17,7 +17,7 @@ from constants import * from runners import BaseRunner -class ImporterPokec: +class DiskImporterPokec: def __init__( self, benchmark_context: BenchmarkContext, diff --git a/tests/mgbench/workloads/pokec.py b/tests/mgbench/workloads/pokec.py index 6733d38f2..4c05796b2 100644 --- a/tests/mgbench/workloads/pokec.py +++ b/tests/mgbench/workloads/pokec.py @@ -167,30 +167,62 @@ class Pokec(Workload): def benchmark__arango__shortest_path(self): vertex_from, vertex_to = self._get_random_from_to() - return ( + memgraph = ( "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "MATCH p=(n)-[*bfs..15]->(m) " "RETURN extract(n in nodes(p) | n.id) AS path", {"from": vertex_from, "to": vertex_to}, ) + neo4j = ( + "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " + "MATCH p=shortestPath((n)-[*..15]->(m)) " + "RETURN [n in nodes(p) | n.id] AS path", + {"from": vertex_from, "to": vertex_to}, + ) + if self._vendor == "memgraph": + return memgraph + else: + return neo4j def benchmark__arango__shortest_path_with_filter(self): vertex_from, vertex_to = self._get_random_from_to() - return ( + memgraph = ( "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "MATCH p=(n)-[*bfs..15 (e, n | n.age >= 18)]->(m) " "RETURN extract(n in nodes(p) | n.id) AS path", {"from": vertex_from, "to": vertex_to}, ) + neo4j = ( + "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " + "MATCH p=shortestPath((n)-[*..15]->(m)) " + "WHERE all(node in nodes(p) WHERE node.age >= 18) " + "RETURN [n in nodes(p) | n.id] AS path", + {"from": vertex_from, "to": vertex_to}, + ) + if self._vendor == "memgraph": + return memgraph + else: + return neo4j + def benchmark__arango__allshortest_paths(self): vertex_from, vertex_to = self._get_random_from_to() - return ( + memgraph = ( "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " "MATCH p=(n)-[*allshortest 2 (r, n | 1) total_weight]->(m) " "RETURN extract(n in nodes(p) | n.id) AS path", {"from": vertex_from, "to": vertex_to}, ) + neo4j = ( + "MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m " + "MATCH p = allShortestPaths((n)-[*..2]->(m)) " + "RETURN [node in nodes(p) | node.id] AS path", + {"from": vertex_from, "to": vertex_to}, + ) + if self._vendor == "memgraph": + return memgraph + else: + return neo4j # Our benchmark queries