Text search (#1603, #1739)

Add text search:
* named property search
* all-property search
* regex search
* aggregation over search results

Text search works with:
* non-parallel transactions
* durability (WAL files and snapshots)
* multitenancy
This commit is contained in:
Ante Pušić 2024-03-20 10:29:24 +01:00 committed by GitHub
parent 2ac649f3b5
commit 9629f10166
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
68 changed files with 2088 additions and 72 deletions

View File

@ -326,6 +326,21 @@ inline mgp_vertex *graph_get_vertex_by_id(mgp_graph *g, mgp_vertex_id id, mgp_me
return MgInvoke<mgp_vertex *>(mgp_graph_get_vertex_by_id, g, id, memory);
}
inline bool graph_has_text_index(mgp_graph *graph, const char *index_name) {
return MgInvoke<int>(mgp_graph_has_text_index, graph, index_name);
}
inline mgp_map *graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
text_search_mode search_mode, mgp_memory *memory) {
return MgInvoke<mgp_map *>(mgp_graph_search_text_index, graph, index_name, search_query, search_mode, memory);
}
inline mgp_map *graph_aggregate_over_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
const char *aggregation_query, mgp_memory *memory) {
return MgInvoke<mgp_map *>(mgp_graph_aggregate_over_text_index, graph, index_name, search_query, aggregation_query,
memory);
}
inline mgp_vertices_iterator *graph_iter_vertices(mgp_graph *g, mgp_memory *memory) {
return MgInvoke<mgp_vertices_iterator *>(mgp_graph_iter_vertices, g, memory);
}

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -891,6 +891,36 @@ enum mgp_error mgp_edge_iter_properties(struct mgp_edge *e, struct mgp_memory *m
enum mgp_error mgp_graph_get_vertex_by_id(struct mgp_graph *g, struct mgp_vertex_id id, struct mgp_memory *memory,
struct mgp_vertex **result);
/// Result is non-zero if the index with the given name exists.
/// The current implementation always returns without errors.
enum mgp_error mgp_graph_has_text_index(struct mgp_graph *graph, const char *index_name, int *result);
/// Available modes of searching text indices.
MGP_ENUM_CLASS text_search_mode{
SPECIFIED_PROPERTIES,
REGEX,
ALL_PROPERTIES,
};
/// Search the named text index for the given query. The result is a map with the "search_results" and "error_msg" keys.
/// The "search_results" key contains the vertices whose text-indexed properties match the given query.
/// In case of a Tantivy error, the "search_results" key is absent, and "error_msg" contains the error message.
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if theres an allocation error while constructing the results map.
/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once.
enum mgp_error mgp_graph_search_text_index(struct mgp_graph *graph, const char *index_name, const char *search_query,
enum text_search_mode search_mode, struct mgp_memory *memory,
struct mgp_map **result);
/// Aggregate over the results of a search over the named text index. The result is a map with the "aggregation_results"
/// and "error_msg" keys.
/// The "aggregation_results" key contains the vertices whose text-indexed properties match the given query.
/// In case of a Tantivy error, the "aggregation_results" key is absent, and "error_msg" contains the error message.
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if theres an allocation error while constructing the results map.
/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once.
enum mgp_error mgp_graph_aggregate_over_text_index(struct mgp_graph *graph, const char *index_name,
const char *search_query, const char *aggregation_query,
struct mgp_memory *memory, struct mgp_map **result);
/// Creates label index for given label.
/// mgp_error::MGP_ERROR_NO_ERROR is always returned.
/// if label index already exists, result will be 0, otherwise 1.

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -32,6 +32,15 @@
namespace mgp {
class TextSearchException : public std::exception {
public:
explicit TextSearchException(std::string message) : message_(std::move(message)) {}
const char *what() const noexcept override { return message_.c_str(); }
private:
std::string message_;
};
class IndexException : public std::exception {
public:
explicit IndexException(std::string message) : message_(std::move(message)) {}
@ -4306,12 +4315,12 @@ inline void AddParamsReturnsToProc(mgp_proc *proc, std::vector<Parameter> &param
}
} // namespace detail
inline bool CreateLabelIndex(mgp_graph *memgaph_graph, const std::string_view label) {
return create_label_index(memgaph_graph, label.data());
inline bool CreateLabelIndex(mgp_graph *memgraph_graph, const std::string_view label) {
return create_label_index(memgraph_graph, label.data());
}
inline bool DropLabelIndex(mgp_graph *memgaph_graph, const std::string_view label) {
return drop_label_index(memgaph_graph, label.data());
inline bool DropLabelIndex(mgp_graph *memgraph_graph, const std::string_view label) {
return drop_label_index(memgraph_graph, label.data());
}
inline List ListAllLabelIndices(mgp_graph *memgraph_graph) {
@ -4322,14 +4331,14 @@ inline List ListAllLabelIndices(mgp_graph *memgraph_graph) {
return List(label_indices);
}
inline bool CreateLabelPropertyIndex(mgp_graph *memgaph_graph, const std::string_view label,
inline bool CreateLabelPropertyIndex(mgp_graph *memgraph_graph, const std::string_view label,
const std::string_view property) {
return create_label_property_index(memgaph_graph, label.data(), property.data());
return create_label_property_index(memgraph_graph, label.data(), property.data());
}
inline bool DropLabelPropertyIndex(mgp_graph *memgaph_graph, const std::string_view label,
inline bool DropLabelPropertyIndex(mgp_graph *memgraph_graph, const std::string_view label,
const std::string_view property) {
return drop_label_property_index(memgaph_graph, label.data(), property.data());
return drop_label_property_index(memgraph_graph, label.data(), property.data());
}
inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) {
@ -4340,6 +4349,58 @@ inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) {
return List(label_property_indices);
}
namespace {
constexpr std::string_view kErrorMsgKey = "error_msg";
constexpr std::string_view kSearchResultsKey = "search_results";
constexpr std::string_view kAggregationResultsKey = "aggregation_results";
} // namespace
inline List SearchTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, std::string_view search_query,
text_search_mode search_mode) {
auto results_or_error = Map(mgp::MemHandlerCallback(graph_search_text_index, memgraph_graph, index_name.data(),
search_query.data(), search_mode));
if (results_or_error.KeyExists(kErrorMsgKey)) {
if (!results_or_error.At(kErrorMsgKey).IsString()) {
throw TextSearchException{"The error message is not a string!"};
}
throw TextSearchException(results_or_error.At(kErrorMsgKey).ValueString().data());
}
if (!results_or_error.KeyExists(kSearchResultsKey)) {
throw TextSearchException{"Incomplete text index search results!"};
}
if (!results_or_error.At(kSearchResultsKey).IsList()) {
throw TextSearchException{"Text index search results have wrong type!"};
}
return results_or_error.At(kSearchResultsKey).ValueList();
}
inline std::string_view AggregateOverTextIndex(mgp_graph *memgraph_graph, std::string_view index_name,
std::string_view search_query, std::string_view aggregation_query) {
auto results_or_error =
Map(mgp::MemHandlerCallback(graph_aggregate_over_text_index, memgraph_graph, index_name.data(),
search_query.data(), aggregation_query.data()));
if (results_or_error.KeyExists(kErrorMsgKey)) {
if (!results_or_error.At(kErrorMsgKey).IsString()) {
throw TextSearchException{"The error message is not a string!"};
}
throw TextSearchException(results_or_error.At(kErrorMsgKey).ValueString().data());
}
if (!results_or_error.KeyExists(kAggregationResultsKey)) {
throw TextSearchException{"Incomplete text index aggregation results!"};
}
if (!results_or_error.At(kAggregationResultsKey).IsString()) {
throw TextSearchException{"Text index aggregation results have wrong type!"};
}
return results_or_error.At(kAggregationResultsKey).ValueString();
}
inline bool CreateExistenceConstraint(mgp_graph *memgraph_graph, const std::string_view label,
const std::string_view property) {
return create_existence_constraint(memgraph_graph, label.data(), property.data());

View File

@ -295,6 +295,32 @@ set_path_external_library(jemalloc STATIC
import_header_library(rangev3 ${CMAKE_CURRENT_SOURCE_DIR}/rangev3/include)
ExternalProject_Add(mgcxx-proj
PREFIX mgcxx-proj
GIT_REPOSITORY https://github.com/memgraph/mgcxx
GIT_TAG "v0.0.4"
CMAKE_ARGS
"-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
"-DENABLE_TESTS=OFF"
INSTALL_DIR "${PROJECT_BINARY_DIR}/mgcxx"
)
ExternalProject_Get_Property(mgcxx-proj install_dir)
set(MGCXX_ROOT ${install_dir})
add_library(tantivy_text_search STATIC IMPORTED GLOBAL)
add_dependencies(tantivy_text_search mgcxx-proj)
set_property(TARGET tantivy_text_search PROPERTY IMPORTED_LOCATION ${MGCXX_ROOT}/lib/libtantivy_text_search.a)
add_library(mgcxx_text_search STATIC IMPORTED GLOBAL)
add_dependencies(mgcxx_text_search mgcxx-proj)
set_property(TARGET mgcxx_text_search PROPERTY IMPORTED_LOCATION ${MGCXX_ROOT}/lib/libmgcxx_text_search.a)
# We need to create the include directory first in order to be able to add it
# as an include directory. The header files in the include directory will be
# generated later during the build process.
file(MAKE_DIRECTORY ${MGCXX_ROOT}/include)
set_property(TARGET mgcxx_text_search PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${MGCXX_ROOT}/include)
# Setup NuRaft
import_external_library(nuraft STATIC
${CMAKE_CURRENT_SOURCE_DIR}/nuraft/lib/libnuraft.a

View File

@ -6,6 +6,8 @@ project(memgraph_query_modules)
disallow_in_source_build()
find_package(fmt REQUIRED)
# Everything that is installed here, should be under the "query_modules" component.
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "query_modules")
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
@ -58,6 +60,22 @@ install(PROGRAMS $<TARGET_FILE:schema>
# Also install the source of the example, so user can read it.
install(FILES schema.cpp DESTINATION lib/memgraph/query_modules/src)
add_library(text SHARED text_search_module.cpp)
target_include_directories(text PRIVATE ${CMAKE_SOURCE_DIR}/include)
target_compile_options(text PRIVATE -Wall)
target_link_libraries(text PRIVATE -static-libgcc -static-libstdc++ fmt::fmt)
# Strip C++ example in release build.
if (lower_build_type STREQUAL "release")
add_custom_command(TARGET text POST_BUILD
COMMAND strip -s $<TARGET_FILE:text>
COMMENT "Stripping symbols and sections from the C++ text_search module")
endif()
install(PROGRAMS $<TARGET_FILE:text>
DESTINATION lib/memgraph/query_modules
RENAME text.so)
# Also install the source of the example, so user can read it.
install(FILES text_search_module.cpp DESTINATION lib/memgraph/query_modules/src)
# Install the Python example and modules
install(FILES example.py DESTINATION lib/memgraph/query_modules RENAME py_example.py)
install(FILES graph_analyzer.py DESTINATION lib/memgraph/query_modules)

View File

@ -0,0 +1,149 @@
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include <string>
#include <string_view>
#include <fmt/format.h>
#include <mgp.hpp>
namespace TextSearch {
constexpr std::string_view kProcedureSearch = "search";
constexpr std::string_view kProcedureRegexSearch = "regex_search";
constexpr std::string_view kProcedureSearchAllProperties = "search_all";
constexpr std::string_view kProcedureAggregate = "aggregate";
constexpr std::string_view kParameterIndexName = "index_name";
constexpr std::string_view kParameterSearchQuery = "search_query";
constexpr std::string_view kParameterAggregationQuery = "aggregation_query";
constexpr std::string_view kReturnNode = "node";
constexpr std::string_view kReturnAggregation = "aggregation";
const std::string kSearchAllPrefix = "all";
void Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
void RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
void SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
void Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
} // namespace TextSearch
void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
mgp::MemoryDispatcherGuard guard{memory};
const auto record_factory = mgp::RecordFactory(result);
auto arguments = mgp::List(args);
try {
const auto *index_name = arguments[0].ValueString().data();
const auto *search_query = arguments[1].ValueString().data();
for (const auto &node :
mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::SPECIFIED_PROPERTIES)) {
auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
}
} catch (const std::exception &e) {
record_factory.SetErrorMessage(e.what());
}
}
void TextSearch::RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
mgp::MemoryDispatcherGuard guard{memory};
const auto record_factory = mgp::RecordFactory(result);
auto arguments = mgp::List(args);
try {
const auto *index_name = arguments[0].ValueString().data();
const auto *search_query = arguments[1].ValueString().data();
for (const auto &node : mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::REGEX)) {
auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
}
} catch (const std::exception &e) {
record_factory.SetErrorMessage(e.what());
}
}
void TextSearch::SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result,
mgp_memory *memory) {
mgp::MemoryDispatcherGuard guard{memory};
const auto record_factory = mgp::RecordFactory(result);
auto arguments = mgp::List(args);
try {
const auto *index_name = arguments[0].ValueString().data();
const auto *search_query = fmt::format("{}:{}", kSearchAllPrefix, arguments[1].ValueString()).data();
for (const auto &node :
mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::ALL_PROPERTIES)) {
auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
}
} catch (const std::exception &e) {
record_factory.SetErrorMessage(e.what());
}
}
void TextSearch::Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
mgp::MemoryDispatcherGuard guard{memory};
const auto record_factory = mgp::RecordFactory(result);
auto arguments = mgp::List(args);
try {
const auto *index_name = arguments[0].ValueString().data();
const auto *search_query = arguments[1].ValueString().data();
const auto *aggregation_query = arguments[2].ValueString().data();
const auto aggregation_result =
mgp::AggregateOverTextIndex(memgraph_graph, index_name, search_query, aggregation_query);
auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnAggregation.data(), aggregation_result.data());
} catch (const std::exception &e) {
record_factory.SetErrorMessage(e.what());
}
}
extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) {
try {
mgp::MemoryDispatcherGuard guard{memory};
AddProcedure(TextSearch::Search, TextSearch::kProcedureSearch, mgp::ProcedureType::Read,
{
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String),
},
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
AddProcedure(TextSearch::RegexSearch, TextSearch::kProcedureRegexSearch, mgp::ProcedureType::Read,
{
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String),
},
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
AddProcedure(TextSearch::SearchAllProperties, TextSearch::kProcedureSearchAllProperties, mgp::ProcedureType::Read,
{
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String),
},
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
AddProcedure(TextSearch::Aggregate, TextSearch::kProcedureAggregate, mgp::ProcedureType::Read,
{
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String),
mgp::Parameter(TextSearch::kParameterAggregationQuery, mgp::Type::String),
},
{mgp::Return(TextSearch::kReturnAggregation, mgp::Type::String)}, module, memory);
} catch (const std::exception &e) {
std::cerr << "Error while initializing query module: " << e.what() << std::endl;
return 1;
}
return 0;
}
extern "C" int mgp_shutdown_module() { return 0; }

View File

@ -45,7 +45,7 @@ set(mg_single_node_v2_sources
add_executable(memgraph ${mg_single_node_v2_sources})
target_include_directories(memgraph PUBLIC ${CMAKE_SOURCE_DIR}/include)
target_link_libraries(memgraph stdc++fs Threads::Threads
mg-telemetry mg-communication mg-communication-metrics mg-memory mg-utils mg-license mg-settings mg-glue mg-flags mg::system mg::replication_handler)
mg-telemetry mgcxx_text_search tantivy_text_search mg-communication mg-communication-metrics mg-memory mg-utils mg-license mg-settings mg-glue mg-flags mg::system mg::replication_handler)
# NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which
# should be dynamically exported, so that `dlopen` can correctly link th

View File

@ -311,7 +311,7 @@ class DbmsHandler {
stats.triggers += info.triggers;
stats.streams += info.streams;
++stats.num_databases;
stats.indices += storage_info.label_indices + storage_info.label_property_indices;
stats.indices += storage_info.label_indices + storage_info.label_property_indices + storage_info.text_indices;
stats.constraints += storage_info.existence_constraints + storage_info.unique_constraints;
++stats.storage_modes[(int)storage_info.storage_mode];
++stats.isolation_levels[(int)storage_info.isolation_level];

View File

@ -615,6 +615,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage
auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW);
if (!vertex)
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
// NOTE: Text search doesnt have replication in scope yet (Phases 1 and 2)
auto ret = vertex->AddLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label));
if (ret.HasError() || !ret.GetValue())
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
@ -627,6 +628,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage
auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW);
if (!vertex)
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
// NOTE: Text search doesnt have replication in scope yet (Phases 1 and 2)
auto ret = vertex->RemoveLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label));
if (ret.HasError() || !ret.GetValue())
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
@ -640,6 +642,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage
auto vertex = transaction->FindVertex(delta.vertex_edge_set_property.gid, View::NEW);
if (!vertex)
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
// NOTE: Phase 1 of the text search feature doesn't have replication in scope
auto ret = vertex->SetProperty(transaction->NameToProperty(delta.vertex_edge_set_property.property),
delta.vertex_edge_set_property.value);
if (ret.HasError())
@ -853,6 +856,14 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
break;
}
case WalDeltaData::Type::TEXT_INDEX_CREATE: {
// NOTE: Text search doesnt have replication in scope yet (Phases 1 and 2)
break;
}
case WalDeltaData::Type::TEXT_INDEX_DROP: {
// NOTE: Text search doesnt have replication in scope yet (Phases 1 and 2)
break;
}
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: {
spdlog::trace(" Create existence constraint on :{} ({})", delta.operation_label_property.label,
delta.operation_label_property.property);

View File

@ -18,14 +18,15 @@
// Bolt server flags.
// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_string(experimental_enabled, "",
"Experimental features to be used, comma seperated. Options [system-replication, high-availability]");
DEFINE_string(
experimental_enabled, "",
"Experimental features to be used, comma-separated. Options [system-replication, text-search, high-availability]");
using namespace std::string_view_literals;
namespace memgraph::flags {
auto const mapping = std::map{std::pair{"system-replication"sv, Experiments::SYSTEM_REPLICATION},
std::pair{"text-search"sv, Experiments::TEXT_SEARCH},
std::pair{"high-availability"sv, Experiments::HIGH_AVAILABILITY}};
auto ExperimentsInstance() -> Experiments & {
@ -45,7 +46,7 @@ bool AreExperimentsEnabled(Experiments experiments) {
void InitializeExperimental() {
namespace rv = ranges::views;
auto const connonicalize_string = [](auto &&rng) {
auto const canonicalize_string = [](auto &&rng) {
auto const is_space = [](auto c) { return c == ' '; };
auto const to_lower = [](unsigned char c) { return std::tolower(c); };
@ -56,7 +57,7 @@ void InitializeExperimental() {
auto const mapping_end = mapping.cend();
using underlying_type = std::underlying_type_t<Experiments>;
auto to_set = underlying_type{};
for (auto &&experiment : FLAGS_experimental_enabled | rv::split(',') | rv::transform(connonicalize_string)) {
for (auto &&experiment : FLAGS_experimental_enabled | rv::split(',') | rv::transform(canonicalize_string)) {
if (auto it = mapping.find(experiment); it != mapping_end) {
to_set |= static_cast<underlying_type>(it->second);
}

View File

@ -23,7 +23,8 @@ namespace memgraph::flags {
// old experiments can be reused once code cleanup has happened
enum class Experiments : uint8_t {
SYSTEM_REPLICATION = 1 << 0,
HIGH_AVAILABILITY = 1 << 1,
TEXT_SEARCH = 1 << 1,
HIGH_AVAILABILITY = 1 << 2,
};
bool AreExperimentsEnabled(Experiments experiments);

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -73,11 +73,11 @@ constexpr auto kLogToStderrGFlagsKey = "also_log_to_stderr";
constexpr auto kCartesianProductEnabledSettingKey = "cartesian-product-enabled";
constexpr auto kCartesianProductEnabledGFlagsKey = "cartesian-product-enabled";
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
std::atomic<double> execution_timeout_sec_; // Local cache-like thing
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
std::atomic<bool> cartesian_product_enabled_{true}; // Local cache-like thing
// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
// Local cache-like thing
std::atomic<double> execution_timeout_sec_;
std::atomic<bool> cartesian_product_enabled_{true};
// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
auto ToLLEnum(std::string_view val) {
const auto ll_enum = memgraph::flags::LogLevelToEnum(val);

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source

View File

@ -634,6 +634,24 @@ class DbAccessor final {
bool EdgeTypeIndexExists(storage::EdgeTypeId edge_type) const { return accessor_->EdgeTypeIndexExists(edge_type); }
bool TextIndexExists(const std::string &index_name) const { return accessor_->TextIndexExists(index_name); }
void TextIndexAddVertex(const VertexAccessor &vertex) { accessor_->TextIndexAddVertex(vertex.impl_); }
void TextIndexUpdateVertex(const VertexAccessor &vertex, const std::vector<storage::LabelId> &removed_labels = {}) {
accessor_->TextIndexUpdateVertex(vertex.impl_, removed_labels);
}
std::vector<storage::Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query,
text_search_mode search_mode) const {
return accessor_->TextIndexSearch(index_name, search_query, search_mode);
}
std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query,
const std::string &aggregation_query) const {
return accessor_->TextIndexAggregate(index_name, search_query, aggregation_query);
}
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const {
return accessor_->GetIndexStats(label);
}
@ -717,6 +735,12 @@ class DbAccessor final {
return accessor_->DropIndex(edge_type);
}
void CreateTextIndex(const std::string &index_name, storage::LabelId label) {
accessor_->CreateTextIndex(index_name, label, this);
}
void DropTextIndex(const std::string &index_name) { accessor_->DropTextIndex(index_name); }
utils::BasicResult<storage::StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint(
storage::LabelId label, storage::PropertyId property) {
return accessor_->CreateExistenceConstraint(label, property);

View File

@ -252,6 +252,10 @@ void DumpLabelPropertyIndex(std::ostream *os, query::DbAccessor *dba, storage::L
<< ");";
}
void DumpTextIndex(std::ostream *os, query::DbAccessor *dba, const std::string &index_name, storage::LabelId label) {
*os << "CREATE TEXT INDEX " << EscapeName(index_name) << " ON :" << EscapeName(dba->LabelToName(label)) << ";";
}
void DumpExistenceConstraint(std::ostream *os, query::DbAccessor *dba, storage::LabelId label,
storage::PropertyId property) {
*os << "CREATE CONSTRAINT ON (u:" << EscapeName(dba->LabelToName(label)) << ") ASSERT EXISTS (u."
@ -286,6 +290,8 @@ PullPlanDump::PullPlanDump(DbAccessor *dba, dbms::DatabaseAccess db_acc)
CreateLabelIndicesPullChunk(),
// Dump all label property indices
CreateLabelPropertyIndicesPullChunk(),
// Dump all text indices
CreateTextIndicesPullChunk(),
// Dump all existence constraints
CreateExistenceConstraintsPullChunk(),
// Dump all unique constraints
@ -412,6 +418,34 @@ PullPlanDump::PullChunk PullPlanDump::CreateLabelPropertyIndicesPullChunk() {
};
}
PullPlanDump::PullChunk PullPlanDump::CreateTextIndicesPullChunk() {
// Dump all text indices
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
// Delay the construction of indices vectors
if (!indices_info_) {
indices_info_.emplace(dba_->ListAllIndices());
}
const auto &text = indices_info_->text_indices;
size_t local_counter = 0;
while (global_index < text.size() && (!n || local_counter < *n)) {
std::ostringstream os;
const auto &text_index = text[global_index];
DumpTextIndex(&os, dba_, text_index.first, text_index.second);
stream->Result({TypedValue(os.str())});
++global_index;
++local_counter;
}
if (global_index == text.size()) {
return local_counter;
}
return std::nullopt;
};
}
PullPlanDump::PullChunk PullPlanDump::CreateExistenceConstraintsPullChunk() {
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
// Delay the construction of constraint vectors

View File

@ -55,6 +55,7 @@ struct PullPlanDump {
PullChunk CreateLabelIndicesPullChunk();
PullChunk CreateLabelPropertyIndicesPullChunk();
PullChunk CreateTextIndicesPullChunk();
PullChunk CreateExistenceConstraintsPullChunk();
PullChunk CreateUniqueConstraintsPullChunk();
PullChunk CreateInternalIndexPullChunk();

View File

@ -433,4 +433,17 @@ class MultiDatabaseQueryInMulticommandTxException : public QueryException {
SPECIALIZE_GET_EXCEPTION_NAME(MultiDatabaseQueryInMulticommandTxException)
};
class TextSearchException : public QueryException {
using QueryException::QueryException;
SPECIALIZE_GET_EXCEPTION_NAME(TextSearchException)
};
class TextSearchDisabledException : public TextSearchException {
public:
TextSearchDisabledException()
: TextSearchException(
"To use text indices and text search, start Memgraph with the experimental text search feature enabled.") {}
SPECIALIZE_GET_EXCEPTION_NAME(TextSearchDisabledException)
};
} // namespace memgraph::query

View File

@ -189,6 +189,9 @@ constexpr utils::TypeInfo query::IndexQuery::kType{utils::TypeId::AST_INDEX_QUER
constexpr utils::TypeInfo query::EdgeIndexQuery::kType{utils::TypeId::AST_EDGE_INDEX_QUERY, "EdgeIndexQuery",
&query::Query::kType};
constexpr utils::TypeInfo query::TextIndexQuery::kType{utils::TypeId::AST_TEXT_INDEX_QUERY, "TextIndexQuery",
&query::Query::kType};
constexpr utils::TypeInfo query::Create::kType{utils::TypeId::AST_CREATE, "Create", &query::Clause::kType};
constexpr utils::TypeInfo query::CallProcedure::kType{utils::TypeId::AST_CALL_PROCEDURE, "CallProcedure",

View File

@ -2273,6 +2273,37 @@ class EdgeIndexQuery : public memgraph::query::Query {
friend class AstStorage;
};
class TextIndexQuery : public memgraph::query::Query {
public:
static const utils::TypeInfo kType;
const utils::TypeInfo &GetTypeInfo() const override { return kType; }
enum class Action { CREATE, DROP };
TextIndexQuery() = default;
DEFVISITABLE(QueryVisitor<void>);
memgraph::query::TextIndexQuery::Action action_;
memgraph::query::LabelIx label_;
std::string index_name_;
TextIndexQuery *Clone(AstStorage *storage) const override {
TextIndexQuery *object = storage->Create<TextIndexQuery>();
object->action_ = action_;
object->label_ = storage->GetLabelIx(label_.name);
object->index_name_ = index_name_;
return object;
}
protected:
TextIndexQuery(Action action, LabelIx label, std::string index_name)
: action_(action), label_(std::move(label)), index_name_(index_name) {}
private:
friend class AstStorage;
};
class Create : public memgraph::query::Clause {
public:
static const utils::TypeInfo kType;

View File

@ -83,6 +83,7 @@ class ExplainQuery;
class ProfileQuery;
class IndexQuery;
class EdgeIndexQuery;
class TextIndexQuery;
class DatabaseInfoQuery;
class SystemInfoQuery;
class ConstraintQuery;
@ -144,11 +145,11 @@ class ExpressionVisitor
template <class TResult>
class QueryVisitor
: public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, EdgeIndexQuery, AuthQuery,
DatabaseInfoQuery, SystemInfoQuery, ConstraintQuery, DumpQuery, ReplicationQuery,
LockPathQuery, FreeMemoryQuery, TriggerQuery, IsolationLevelQuery, CreateSnapshotQuery,
StreamQuery, SettingQuery, VersionQuery, ShowConfigQuery, TransactionQueueQuery,
StorageModeQuery, AnalyzeGraphQuery, MultiDatabaseQuery, ShowDatabasesQuery,
EdgeImportModeQuery, CoordinatorQuery> {};
: public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, EdgeIndexQuery,
TextIndexQuery, AuthQuery, DatabaseInfoQuery, SystemInfoQuery, ConstraintQuery, DumpQuery,
ReplicationQuery, LockPathQuery, FreeMemoryQuery, TriggerQuery, IsolationLevelQuery,
CreateSnapshotQuery, StreamQuery, SettingQuery, VersionQuery, ShowConfigQuery,
TransactionQueueQuery, StorageModeQuery, AnalyzeGraphQuery, MultiDatabaseQuery,
ShowDatabasesQuery, EdgeImportModeQuery, CoordinatorQuery> {};
} // namespace memgraph::query

View File

@ -243,6 +243,13 @@ antlrcpp::Any CypherMainVisitor::visitIndexQuery(MemgraphCypher::IndexQueryConte
return index_query;
}
antlrcpp::Any CypherMainVisitor::visitTextIndexQuery(MemgraphCypher::TextIndexQueryContext *ctx) {
MG_ASSERT(ctx->children.size() == 1, "TextIndexQuery should have exactly one child!");
auto *text_index_query = std::any_cast<TextIndexQuery *>(ctx->children[0]->accept(this));
query_ = text_index_query;
return text_index_query;
}
antlrcpp::Any CypherMainVisitor::visitCreateIndex(MemgraphCypher::CreateIndexContext *ctx) {
auto *index_query = storage_->Create<IndexQuery>();
index_query->action_ = IndexQuery::Action::CREATE;
@ -286,6 +293,21 @@ antlrcpp::Any CypherMainVisitor::visitDropEdgeIndex(MemgraphCypher::DropEdgeInde
return index_query;
}
antlrcpp::Any CypherMainVisitor::visitCreateTextIndex(MemgraphCypher::CreateTextIndexContext *ctx) {
auto *index_query = storage_->Create<TextIndexQuery>();
index_query->index_name_ = std::any_cast<std::string>(ctx->indexName()->accept(this));
index_query->action_ = TextIndexQuery::Action::CREATE;
index_query->label_ = AddLabel(std::any_cast<std::string>(ctx->labelName()->accept(this)));
return index_query;
}
antlrcpp::Any CypherMainVisitor::visitDropTextIndex(MemgraphCypher::DropTextIndexContext *ctx) {
auto *index_query = storage_->Create<TextIndexQuery>();
index_query->index_name_ = std::any_cast<std::string>(ctx->indexName()->accept(this));
index_query->action_ = TextIndexQuery::Action::DROP;
return index_query;
}
antlrcpp::Any CypherMainVisitor::visitAuthQuery(MemgraphCypher::AuthQueryContext *ctx) {
MG_ASSERT(ctx->children.size() == 1, "AuthQuery should have exactly one child!");
auto *auth_query = std::any_cast<AuthQuery *>(ctx->children[0]->accept(this));

View File

@ -153,6 +153,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
*/
antlrcpp::Any visitEdgeIndexQuery(MemgraphCypher::EdgeIndexQueryContext *ctx) override;
/**
* @return TextIndexQuery*
*/
antlrcpp::Any visitTextIndexQuery(MemgraphCypher::TextIndexQueryContext *ctx) override;
/**
* @return ExplainQuery*
*/
@ -500,7 +505,7 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
antlrcpp::Any visitCreateIndex(MemgraphCypher::CreateIndexContext *ctx) override;
/**
* @return DropIndex*
* @return IndexQuery*
*/
antlrcpp::Any visitDropIndex(MemgraphCypher::DropIndexContext *ctx) override;
@ -514,6 +519,16 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
*/
antlrcpp::Any visitDropEdgeIndex(MemgraphCypher::DropEdgeIndexContext *ctx) override;
/**
* @return TextIndexQuery*
*/
antlrcpp::Any visitCreateTextIndex(MemgraphCypher::CreateTextIndexContext *ctx) override;
/**
* @return TextIndexQuery*
*/
antlrcpp::Any visitDropTextIndex(MemgraphCypher::DropTextIndexContext *ctx) override;
/**
* @return AuthQuery*
*/

View File

@ -25,6 +25,7 @@ statement : query ;
query : cypherQuery
| indexQuery
| textIndexQuery
| explainQuery
| profileQuery
| databaseInfoQuery
@ -65,6 +66,8 @@ cypherQuery : singleQuery ( cypherUnion )* ( queryMemoryLimit )? ;
indexQuery : createIndex | dropIndex;
textIndexQuery : createTextIndex | dropTextIndex;
singleQuery : clause ( clause )* ;
cypherUnion : ( UNION ALL singleQuery )
@ -342,6 +345,12 @@ createIndex : CREATE INDEX ON ':' labelName ( '(' propertyKeyName ')' )? ;
dropIndex : DROP INDEX ON ':' labelName ( '(' propertyKeyName ')' )? ;
indexName : symbolicName ;
createTextIndex : CREATE TEXT INDEX indexName ON ':' labelName ;
dropTextIndex : DROP TEXT INDEX indexName ;
doubleLiteral : FloatingLiteral ;
cypherKeyword : ALL

View File

@ -131,6 +131,7 @@ SHOW : S H O W ;
SINGLE : S I N G L E ;
STARTS : S T A R T S ;
STORAGE : S T O R A G E ;
TEXT : T E X T ;
THEN : T H E N ;
TRUE : T R U E ;
UNION : U N I O N ;

View File

@ -134,6 +134,7 @@ symbolicName : UnescapedSymbolicName
query : cypherQuery
| indexQuery
| edgeIndexQuery
| textIndexQuery
| explainQuery
| profileQuery
| databaseInfoQuery

View File

@ -29,6 +29,8 @@ class PrivilegeExtractor : public QueryVisitor<void>, public HierarchicalTreeVis
void Visit(EdgeIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
void Visit(TextIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
void Visit(AnalyzeGraphQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
void Visit(AuthQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::AUTH); }

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source

View File

@ -39,6 +39,7 @@
#include "dbms/dbms_handler.hpp"
#include "dbms/global.hpp"
#include "dbms/inmemory/storage_helper.hpp"
#include "flags/experimental.hpp"
#include "flags/replication.hpp"
#include "flags/run_time_configurable.hpp"
#include "glue/communication.hpp"
@ -2709,6 +2710,75 @@ PreparedQuery PrepareEdgeIndexQuery(ParsedQuery parsed_query, bool in_explicit_t
RWType::W};
}
PreparedQuery PrepareTextIndexQuery(ParsedQuery parsed_query, bool in_explicit_transaction,
std::vector<Notification> *notifications, CurrentDB &current_db) {
if (in_explicit_transaction) {
throw IndexInMulticommandTxException();
}
auto *text_index_query = utils::Downcast<TextIndexQuery>(parsed_query.query);
std::function<void(Notification &)> handler;
// TODO: we will need transaction for replication
MG_ASSERT(current_db.db_acc_, "Text index query expects a current DB");
auto &db_acc = *current_db.db_acc_;
MG_ASSERT(current_db.db_transactional_accessor_, "Text index query expects a current DB transaction");
auto *dba = &*current_db.execution_db_accessor_;
// Creating an index influences computed plan costs.
auto invalidate_plan_cache = [plan_cache = db_acc->plan_cache()] {
plan_cache->WithLock([&](auto &cache) { cache.reset(); });
};
auto *storage = db_acc->storage();
auto label = storage->NameToLabel(text_index_query->label_.name);
auto &index_name = text_index_query->index_name_;
Notification index_notification(SeverityLevel::INFO);
switch (text_index_query->action_) {
case TextIndexQuery::Action::CREATE: {
index_notification.code = NotificationCode::CREATE_INDEX;
index_notification.title = fmt::format("Created text index on label {}.", text_index_query->label_.name);
// TODO: not just storage + invalidate_plan_cache. Need a DB transaction (for replication)
handler = [dba, label, index_name,
invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw TextSearchDisabledException();
}
dba->CreateTextIndex(index_name, label);
utils::OnScopeExit invalidator(invalidate_plan_cache);
};
break;
}
case TextIndexQuery::Action::DROP: {
index_notification.code = NotificationCode::DROP_INDEX;
index_notification.title = fmt::format("Dropped text index on label {}.", text_index_query->label_.name);
// TODO: not just storage + invalidate_plan_cache. Need a DB transaction (for replication)
handler = [dba, index_name,
invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw TextSearchDisabledException();
}
dba->DropTextIndex(index_name);
utils::OnScopeExit invalidator(invalidate_plan_cache);
};
break;
}
}
return PreparedQuery{
{},
std::move(parsed_query.required_privileges),
[handler = std::move(handler), notifications, index_notification = std::move(index_notification)](
AnyStream * /*stream*/, std::optional<int> /*unused*/) mutable {
handler(index_notification);
notifications->push_back(index_notification);
return QueryHandlerResult::COMMIT; // TODO: Will need to become COMMIT when we fix replication
},
RWType::W};
}
PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transaction,
InterpreterContext *interpreter_context, Interpreter &interpreter) {
if (in_explicit_transaction) {
@ -3499,7 +3569,7 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
}
MG_ASSERT(current_db.db_acc_, "Database info query expects a current DB");
MG_ASSERT(current_db.db_transactional_accessor_, "Database ifo query expects a current DB transaction");
MG_ASSERT(current_db.db_transactional_accessor_, "Database info query expects a current DB transaction");
auto *dba = &*current_db.execution_db_accessor_;
auto *info_query = utils::Downcast<DatabaseInfoQuery>(parsed_query.query);
@ -3514,10 +3584,11 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
const std::string_view label_index_mark{"label"};
const std::string_view label_property_index_mark{"label+property"};
const std::string_view edge_type_index_mark{"edge-type"};
const std::string_view text_index_mark{"text"};
auto info = dba->ListAllIndices();
auto storage_acc = database->Access();
std::vector<std::vector<TypedValue>> results;
results.reserve(info.label.size() + info.label_property.size());
results.reserve(info.label.size() + info.label_property.size() + info.text_indices.size());
for (const auto &item : info.label) {
results.push_back({TypedValue(label_index_mark), TypedValue(storage->LabelToName(item)), TypedValue(),
TypedValue(static_cast<int>(storage_acc->ApproximateVertexCount(item)))});
@ -3532,6 +3603,10 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
results.push_back({TypedValue(edge_type_index_mark), TypedValue(storage->EdgeTypeToName(item)), TypedValue(),
TypedValue(static_cast<int>(storage_acc->ApproximateEdgeCount(item)))});
}
for (const auto &[index_name, label] : info.text_indices) {
results.push_back({TypedValue(fmt::format("{} (name: {})", text_index_mark, index_name)),
TypedValue(storage->LabelToName(label)), TypedValue(), TypedValue()});
}
std::sort(results.begin(), results.end(), [&label_index_mark](const auto &record_1, const auto &record_2) {
const auto type_1 = record_1[0].ValueString();
const auto type_2 = record_2[0].ValueString();
@ -4293,13 +4368,15 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
utils::Downcast<ProfileQuery>(parsed_query.query) || utils::Downcast<DumpQuery>(parsed_query.query) ||
utils::Downcast<TriggerQuery>(parsed_query.query) || utils::Downcast<AnalyzeGraphQuery>(parsed_query.query) ||
utils::Downcast<IndexQuery>(parsed_query.query) || utils::Downcast<EdgeIndexQuery>(parsed_query.query) ||
utils::Downcast<DatabaseInfoQuery>(parsed_query.query) || utils::Downcast<ConstraintQuery>(parsed_query.query);
utils::Downcast<TextIndexQuery>(parsed_query.query) || utils::Downcast<DatabaseInfoQuery>(parsed_query.query) ||
utils::Downcast<ConstraintQuery>(parsed_query.query);
if (!in_explicit_transaction_ && requires_db_transaction) {
// TODO: ATM only a single database, will change when we have multiple database transactions
bool could_commit = utils::Downcast<CypherQuery>(parsed_query.query) != nullptr;
bool unique = utils::Downcast<IndexQuery>(parsed_query.query) != nullptr ||
utils::Downcast<EdgeIndexQuery>(parsed_query.query) != nullptr ||
utils::Downcast<TextIndexQuery>(parsed_query.query) != nullptr ||
utils::Downcast<ConstraintQuery>(parsed_query.query) != nullptr ||
upper_case_query.find(kSchemaAssert) != std::string::npos;
SetupDatabaseTransaction(could_commit, unique);
@ -4337,6 +4414,9 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
} else if (utils::Downcast<EdgeIndexQuery>(parsed_query.query)) {
prepared_query = PrepareEdgeIndexQuery(std::move(parsed_query), in_explicit_transaction_,
&query_execution->notifications, current_db_);
} else if (utils::Downcast<TextIndexQuery>(parsed_query.query)) {
prepared_query = PrepareTextIndexQuery(std::move(parsed_query), in_explicit_transaction_,
&query_execution->notifications, current_db_);
} else if (utils::Downcast<AnalyzeGraphQuery>(parsed_query.query)) {
prepared_query = PrepareAnalyzeGraphQuery(std::move(parsed_query), in_explicit_transaction_, current_db_);
} else if (utils::Downcast<AuthQuery>(parsed_query.query)) {

View File

@ -32,6 +32,7 @@
#include "spdlog/spdlog.h"
#include "csv/parsing.hpp"
#include "flags/experimental.hpp"
#include "license/license.hpp"
#include "query/auth_checker.hpp"
#include "query/context.hpp"
@ -266,6 +267,10 @@ VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *fram
}
MultiPropsInitChecked(&new_node, properties);
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
context.db_accessor->TextIndexAddVertex(new_node);
}
(*frame)[node_info.symbol] = new_node;
return (*frame)[node_info.symbol].ValueVertex();
}
@ -2991,6 +2996,9 @@ bool SetProperty::SetPropertyCursor::Pull(Frame &frame, ExecutionContext &contex
context.trigger_context_collector->RegisterSetObjectProperty(lhs.ValueVertex(), self_.property_,
TypedValue{std::move(old_value)}, TypedValue{rhs});
}
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex());
}
break;
}
case TypedValue::Type::Edge: {
@ -3147,6 +3155,9 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr
case TypedValue::Type::Vertex: {
PropertiesMap new_properties = get_props(rhs.ValueVertex());
update_props(new_properties);
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
context->db_accessor->TextIndexUpdateVertex(rhs.ValueVertex());
}
break;
}
case TypedValue::Type::Map: {
@ -3204,6 +3215,9 @@ bool SetProperties::SetPropertiesCursor::Pull(Frame &frame, ExecutionContext &co
}
#endif
SetPropertiesOnRecord(&lhs.ValueVertex(), rhs, self_.op_, &context, cached_name_id_);
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex());
}
break;
case TypedValue::Type::Edge:
#ifdef MG_ENTERPRISE
@ -3295,6 +3309,10 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) {
}
}
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
context.db_accessor->TextIndexUpdateVertex(vertex);
}
return true;
}
@ -3366,6 +3384,9 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext &
}
#endif
remove_prop(&lhs.ValueVertex());
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex());
}
break;
case TypedValue::Type::Edge:
#ifdef MG_ENTERPRISE
@ -3458,6 +3479,10 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont
}
}
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
context.db_accessor->TextIndexUpdateVertex(vertex, EvaluateLabels(self_.labels_, evaluator, context.db_accessor));
}
return true;
}

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source

View File

@ -23,6 +23,8 @@
#include <utility>
#include <variant>
#include "flags/experimental.hpp"
#include "flags/run_time_configurable.hpp"
#include "license/license.hpp"
#include "mg_procedure.h"
#include "module.hpp"
@ -32,6 +34,7 @@
#include "query/procedure/fmt.hpp"
#include "query/procedure/mg_procedure_helpers.hpp"
#include "query/stream/common.hpp"
#include "storage/v2/indices/text_index.hpp"
#include "storage/v2/property_value.hpp"
#include "storage/v2/storage_mode.hpp"
#include "storage/v2/view.hpp"
@ -1843,6 +1846,11 @@ mgp_error mgp_vertex_set_property(struct mgp_vertex *v, const char *property_nam
const auto result = std::visit(
[prop_key, property_value](auto &impl) { return impl.SetProperty(prop_key, ToPropertyValue(*property_value)); },
v->impl);
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) {
auto v_impl = v->getImpl();
v->graph->getImpl()->TextIndexUpdateVertex(v_impl);
}
if (result.HasError()) {
switch (result.GetError()) {
case memgraph::storage::Error::DELETED_OBJECT:
@ -1899,6 +1907,11 @@ mgp_error mgp_vertex_set_properties(struct mgp_vertex *v, struct mgp_map *proper
}
const auto result = v->getImpl().UpdateProperties(props);
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) {
auto v_impl = v->getImpl();
v->graph->getImpl()->TextIndexUpdateVertex(v_impl);
}
if (result.HasError()) {
switch (result.GetError()) {
case memgraph::storage::Error::DELETED_OBJECT:
@ -1956,6 +1969,10 @@ mgp_error mgp_vertex_add_label(struct mgp_vertex *v, mgp_label label) {
}
const auto result = std::visit([label_id](auto &impl) { return impl.AddLabel(label_id); }, v->impl);
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) {
auto v_impl = v->getImpl();
v->graph->getImpl()->TextIndexUpdateVertex(v_impl);
}
if (result.HasError()) {
switch (result.GetError()) {
@ -1998,6 +2015,10 @@ mgp_error mgp_vertex_remove_label(struct mgp_vertex *v, mgp_label label) {
throw ImmutableObjectException{"Cannot remove a label from an immutable vertex!"};
}
const auto result = std::visit([label_id](auto &impl) { return impl.RemoveLabel(label_id); }, v->impl);
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) {
auto v_impl = v->getImpl();
v->graph->getImpl()->TextIndexUpdateVertex(v_impl, {label_id});
}
if (result.HasError()) {
switch (result.GetError()) {
@ -2590,7 +2611,7 @@ mgp_error mgp_edge_iter_properties(mgp_edge *e, mgp_memory *memory, mgp_properti
mgp_error mgp_graph_get_vertex_by_id(mgp_graph *graph, mgp_vertex_id id, mgp_memory *memory, mgp_vertex **result) {
return WrapExceptions(
[graph, id, memory]() -> mgp_vertex * {
std::optional<memgraph::query::VertexAccessor> maybe_vertex = std::visit(
auto maybe_vertex = std::visit(
[graph, id](auto *impl) {
return impl->FindVertex(memgraph::storage::Gid::FromInt(id.as_int), graph->view);
},
@ -2967,6 +2988,10 @@ mgp_error mgp_graph_create_vertex(struct mgp_graph *graph, mgp_memory *memory, m
}
auto *vertex = std::visit(
[=](auto *impl) { return NewRawMgpObject<mgp_vertex>(memory, impl->InsertVertex(), graph); }, graph->impl);
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH)) {
auto v_impl = vertex->getImpl();
vertex->graph->getImpl()->TextIndexAddVertex(v_impl);
}
auto &ctx = graph->ctx;
ctx->execution_stats[memgraph::query::ExecutionStats::Key::CREATED_NODES] += 1;
@ -3324,6 +3349,140 @@ mgp_error mgp_graph_delete_edge(struct mgp_graph *graph, mgp_edge *edge) {
});
}
mgp_error mgp_graph_has_text_index(mgp_graph *graph, const char *index_name, int *result) {
return WrapExceptions([graph, index_name, result]() {
std::visit(memgraph::utils::Overloaded{
[&](memgraph::query::DbAccessor *impl) { *result = impl->TextIndexExists(index_name); },
[&](memgraph::query::SubgraphDbAccessor *impl) {
*result = impl->GetAccessor()->TextIndexExists(index_name);
}},
graph->impl);
});
}
mgp_vertex *GetVertexByGid(mgp_graph *graph, memgraph::storage::Gid id, mgp_memory *memory) {
auto get_vertex_by_gid = memgraph::utils::Overloaded{
[graph, id, memory](memgraph::query::DbAccessor *impl) -> mgp_vertex * {
auto maybe_vertex = impl->FindVertex(id, graph->view);
if (!maybe_vertex) return nullptr;
return NewRawMgpObject<mgp_vertex>(memory, *maybe_vertex, graph);
},
[graph, id, memory](memgraph::query::SubgraphDbAccessor *impl) -> mgp_vertex * {
auto maybe_vertex = impl->FindVertex(id, graph->view);
if (!maybe_vertex) return nullptr;
return NewRawMgpObject<mgp_vertex>(
memory, memgraph::query::SubgraphVertexAccessor(*maybe_vertex, impl->getGraph()), graph);
}};
return std::visit(get_vertex_by_gid, graph->impl);
}
void WrapTextSearch(mgp_graph *graph, mgp_memory *memory, mgp_map **result,
const std::vector<memgraph::storage::Gid> &vertex_ids = {},
const std::optional<std::string> &error_msg = std::nullopt) {
if (const auto err = mgp_map_make_empty(memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a mgp_map");
}
mgp_value *error_value;
if (error_msg.has_value()) {
if (const auto err = mgp_value_make_string(error_msg.value().data(), memory, &error_value);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a string mgp_value");
}
}
mgp_list *search_results{};
if (const auto err = mgp_list_make_empty(vertex_ids.size(), memory, &search_results);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a mgp_list");
}
for (const auto &vertex_id : vertex_ids) {
mgp_value *vertex;
if (const auto err = mgp_value_make_vertex(GetVertexByGid(graph, vertex_id, memory), &vertex);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a vertex mgp_value");
}
if (const auto err = mgp_list_append(search_results, vertex); err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error(
"Retrieving text search results failed during insertion of the mgp_value into the result list");
}
}
mgp_value *search_results_value;
if (const auto err = mgp_value_make_list(search_results, &search_results_value);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a list mgp_value");
}
if (error_msg.has_value()) {
if (const auto err = mgp_map_insert(*result, "error_msg", error_value); err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text index search error failed during insertion into mgp_map");
}
return;
}
if (const auto err = mgp_map_insert(*result, "search_results", search_results_value);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text index search results failed during insertion into mgp_map");
}
}
void WrapTextIndexAggregation(mgp_memory *memory, mgp_map **result, const std::string &aggregation_result,
const std::optional<std::string> &error_msg = std::nullopt) {
if (const auto err = mgp_map_make_empty(memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a mgp_map");
}
mgp_value *aggregation_result_or_error_value;
if (const auto err = mgp_value_make_string(error_msg.value_or(aggregation_result).data(), memory,
&aggregation_result_or_error_value);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a string mgp_value");
}
if (error_msg.has_value()) {
if (const auto err = mgp_map_insert(*result, "error_msg", aggregation_result_or_error_value);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text index aggregation error failed during insertion into mgp_map");
}
return;
}
if (const auto err = mgp_map_insert(*result, "aggregation_results", aggregation_result_or_error_value);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text index aggregation results failed during insertion into mgp_map");
}
}
mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
text_search_mode search_mode, mgp_memory *memory, mgp_map **result) {
return WrapExceptions([graph, memory, index_name, search_query, search_mode, result]() {
std::vector<memgraph::storage::Gid> found_vertices_ids;
std::optional<std::string> error_msg = std::nullopt;
try {
found_vertices_ids = graph->getImpl()->TextIndexSearch(index_name, search_query, search_mode);
} catch (memgraph::query::QueryException &e) {
error_msg = e.what();
}
WrapTextSearch(graph, memory, result, found_vertices_ids, error_msg);
});
}
mgp_error mgp_graph_aggregate_over_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
const char *aggregation_query, mgp_memory *memory, mgp_map **result) {
return WrapExceptions([graph, memory, index_name, search_query, aggregation_query, result]() {
std::string search_results;
std::optional<std::string> error_msg = std::nullopt;
try {
search_results = graph->getImpl()->TextIndexAggregate(index_name, search_query, aggregation_query);
} catch (memgraph::query::QueryException &e) {
error_msg = e.what();
}
WrapTextIndexAggregation(memory, result, search_results, error_msg);
});
}
#ifdef MG_ENTERPRISE
namespace {
void NextPermitted(mgp_vertices_iterator &it) {

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -562,6 +562,13 @@ struct mgp_graph {
memgraph::query::ExecutionContext *ctx;
memgraph::storage::StorageMode storage_mode;
memgraph::query::DbAccessor *getImpl() const {
return std::visit(
memgraph::utils::Overloaded{[](memgraph::query::DbAccessor *impl) { return impl; },
[](memgraph::query::SubgraphDbAccessor *impl) { return impl->GetAccessor(); }},
this->impl);
}
static mgp_graph WritableGraph(memgraph::query::DbAccessor &acc, memgraph::storage::View view,
memgraph::query::ExecutionContext &ctx) {
return mgp_graph{&acc, view, &ctx, acc.GetStorageMode()};

View File

@ -20,6 +20,7 @@ add_library(mg-storage-v2 STATIC
vertex_info_cache.cpp
storage.cpp
indices/indices.cpp
indices/text_index.cpp
all_vertices_iterable.cpp
edges_iterable.cpp
vertices_iterable.cpp
@ -45,4 +46,5 @@ add_library(mg-storage-v2 STATIC
inmemory/replication/recovery.cpp
)
target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory)
target_include_directories(mg-storage-v2 PUBLIC ${CMAKE_SOURCE_DIR}/include)
target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils mg-flags gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory mgcxx_text_search tantivy_text_search)

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -26,6 +26,7 @@ constexpr const char *kVertexCountDescr = "vertex_count";
constexpr const char *kEdgeDountDescr = "edge_count";
constexpr const char *kLabelIndexStr = "label_index";
constexpr const char *kLabelPropertyIndexStr = "label_property_index";
constexpr const char *kTextIndexStr = "text_index";
constexpr const char *kExistenceConstraintsStr = "existence_constraints";
constexpr const char *kUniqueConstraintsStr = "unique_constraints";
} // namespace
@ -144,6 +145,31 @@ bool DurableMetadata::PersistLabelPropertyIndexAndExistenceConstraintDeletion(La
return true;
}
bool DurableMetadata::PersistTextIndexCreation(const std::string &index_name, LabelId label) {
const std::string index_name_label_pair = index_name + "," + label.ToString();
if (auto text_index_store = durability_kvstore_.Get(kTextIndexStr); text_index_store.has_value()) {
std::string &value = text_index_store.value();
value += "|";
value += index_name_label_pair;
return durability_kvstore_.Put(kTextIndexStr, value);
}
return durability_kvstore_.Put(kTextIndexStr, index_name_label_pair);
}
bool DurableMetadata::PersistTextIndexDeletion(const std::string &index_name, LabelId label) {
const std::string index_name_label_pair = index_name + "," + label.ToString();
if (auto text_index_store = durability_kvstore_.Get(kTextIndexStr); text_index_store.has_value()) {
const std::string &value = text_index_store.value();
std::vector<std::string> text_indices = utils::Split(value, "|");
std::erase(text_indices, index_name_label_pair);
if (text_indices.empty()) {
return durability_kvstore_.Delete(kTextIndexStr);
}
return durability_kvstore_.Put(kTextIndexStr, utils::Join(text_indices, "|"));
}
return true;
}
bool DurableMetadata::PersistUniqueConstraintCreation(LabelId label, const std::set<PropertyId> &properties) {
const std::string entry = utils::GetKeyForUniqueConstraintsDurability(label, properties);

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -53,6 +53,10 @@ class DurableMetadata {
bool PersistLabelPropertyIndexAndExistenceConstraintDeletion(LabelId label, PropertyId property,
const std::string &key);
bool PersistTextIndexCreation(const std::string &index_name, LabelId label);
bool PersistTextIndexDeletion(const std::string &index_name, LabelId label);
bool PersistUniqueConstraintCreation(LabelId label, const std::set<PropertyId> &properties);
bool PersistUniqueConstraintDeletion(LabelId label, const std::set<PropertyId> &properties);

View File

@ -29,6 +29,8 @@
#include <rocksdb/utilities/transaction.h>
#include <rocksdb/utilities/transaction_db.h>
#include "flags/experimental.hpp"
#include "flags/run_time_configurable.hpp"
#include "kvstore/kvstore.hpp"
#include "spdlog/spdlog.h"
#include "storage/v2/constraints/unique_constraints.hpp"
@ -856,6 +858,7 @@ StorageInfo DiskStorage::GetInfo(memgraph::replication_coordination_glue::Replic
const auto &lbl = access->ListAllIndices();
info.label_indices = lbl.label.size();
info.label_property_indices = lbl.label_property.size();
info.text_indices = lbl.text_indices.size();
const auto &con = access->ListAllConstraints();
info.existence_constraints = con.existence.size();
info.unique_constraints = con.unique.size();
@ -1670,6 +1673,18 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co
case MetadataDelta::Action::LABEL_PROPERTY_INDEX_STATS_CLEAR: {
throw utils::NotYetImplemented("ClearIndexStats(stats) is not implemented for DiskStorage.");
} break;
case MetadataDelta::Action::TEXT_INDEX_CREATE: {
const auto &info = md_delta.text_index;
if (!disk_storage->durable_metadata_.PersistTextIndexCreation(info.index_name, info.label)) {
return StorageManipulationError{PersistenceError{}};
}
} break;
case MetadataDelta::Action::TEXT_INDEX_DROP: {
const auto &info = md_delta.text_index;
if (!disk_storage->durable_metadata_.PersistTextIndexDeletion(info.index_name, info.label)) {
return StorageManipulationError{PersistenceError{}};
}
} break;
case MetadataDelta::Action::EXISTENCE_CONSTRAINT_CREATE: {
const auto &info = md_delta.label_property;
if (!disk_storage->durable_metadata_.PersistLabelPropertyIndexAndExistenceConstraintCreation(
@ -1768,6 +1783,9 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co
return StorageManipulationError{SerializationError{}};
}
spdlog::trace("rocksdb: Commit successful");
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
disk_storage->indices_.text_index_.Commit();
}
is_transaction_active_ = false;
@ -1886,6 +1904,9 @@ void DiskStorage::DiskAccessor::Abort() {
// query_plan_accumulate_aggregate.cpp
transaction_.disk_transaction_->Rollback();
transaction_.disk_transaction_->ClearSnapshot();
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
storage_->indices_.text_index_.Rollback();
}
delete transaction_.disk_transaction_;
transaction_.disk_transaction_ = nullptr;
is_transaction_active_ = false;
@ -2092,7 +2113,11 @@ IndicesInfo DiskStorage::DiskAccessor::ListAllIndices() const {
auto *disk_label_index = static_cast<DiskLabelIndex *>(on_disk->indices_.label_index_.get());
auto *disk_label_property_index =
static_cast<DiskLabelPropertyIndex *>(on_disk->indices_.label_property_index_.get());
return {disk_label_index->ListIndices(), disk_label_property_index->ListIndices()};
auto &text_index = storage_->indices_.text_index_;
return {disk_label_index->ListIndices(),
disk_label_property_index->ListIndices(),
{/* edge type indices */},
text_index.ListIndices()};
}
ConstraintsInfo DiskStorage::DiskAccessor::ListAllConstraints() const {
auto *disk_storage = static_cast<DiskStorage *>(storage_);

View File

@ -151,7 +151,8 @@ void RecoverConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadat
void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices,
utils::SkipList<Vertex> *vertices, NameIdMapper *name_id_mapper,
const std::optional<ParallelizedSchemaCreationInfo> &parallel_exec_info) {
const std::optional<ParallelizedSchemaCreationInfo> &parallel_exec_info,
const std::optional<std::filesystem::path> &storage_dir) {
spdlog::info("Recreating indices from metadata.");
// Recover label indices.
@ -211,6 +212,26 @@ void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadat
}
spdlog::info("Edge-type indices are recreated.");
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
// Recover text indices.
spdlog::info("Recreating {} text indices from metadata.", indices_metadata.text_indices.size());
auto &mem_text_index = indices->text_index_;
for (const auto &[index_name, label] : indices_metadata.text_indices) {
try {
if (!storage_dir.has_value()) {
throw RecoveryFailure("There must exist a storage directory in order to recover text indices!");
}
mem_text_index.RecoverIndex(storage_dir.value(), index_name, label, vertices->access(), name_id_mapper);
} catch (...) {
throw RecoveryFailure("The text index must be created here!");
}
spdlog::info("Text index {} on :{} is recreated from metadata", index_name,
name_id_mapper->IdToName(label.AsUint()));
}
spdlog::info("Text indices are recreated.");
}
spdlog::info("Indices are recreated.");
}
@ -331,8 +352,13 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication
repl_storage_state.epoch_.SetEpoch(std::move(recovered_snapshot->snapshot_info.epoch_id));
if (!utils::DirExists(wal_directory_)) {
std::optional<std::filesystem::path> storage_dir = std::nullopt;
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
storage_dir = config.durability.storage_directory;
}
RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper,
GetParallelExecInfoIndices(recovery_info, config));
GetParallelExecInfoIndices(recovery_info, config), storage_dir);
RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper,
GetParallelExecInfo(recovery_info, config));
return recovered_snapshot->recovery_info;
@ -467,8 +493,13 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication
spdlog::info("All necessary WAL files are loaded successfully.");
}
std::optional<std::filesystem::path> storage_dir = std::nullopt;
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
storage_dir = config.durability.storage_directory;
}
RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper,
GetParallelExecInfoIndices(recovery_info, config));
GetParallelExecInfoIndices(recovery_info, config), storage_dir);
RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper,
GetParallelExecInfo(recovery_info, config));

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -102,7 +102,8 @@ std::optional<std::vector<WalDurabilityInfo>> GetWalFiles(const std::filesystem:
/// @throw RecoveryFailure
void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices,
utils::SkipList<Vertex> *vertices, NameIdMapper *name_id_mapper,
const std::optional<ParallelizedSchemaCreationInfo> &parallel_exec_info = std::nullopt);
const std::optional<ParallelizedSchemaCreationInfo> &parallel_exec_info = std::nullopt,
const std::optional<std::filesystem::path> &storage_dir = std::nullopt);
// Helper function used to recover all discovered constraints. The
// constraints must be recovered after the data recovery is done

View File

@ -64,6 +64,8 @@ enum class Marker : uint8_t {
DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR = 0x64,
DELTA_EDGE_TYPE_INDEX_CREATE = 0x65,
DELTA_EDGE_TYPE_INDEX_DROP = 0x66,
DELTA_TEXT_INDEX_CREATE = 0x67,
DELTA_TEXT_INDEX_DROP = 0x68,
VALUE_FALSE = 0x00,
VALUE_TRUE = 0xff,
@ -110,6 +112,8 @@ static const Marker kMarkersAll[] = {
Marker::DELTA_LABEL_PROPERTY_INDEX_DROP,
Marker::DELTA_EDGE_TYPE_INDEX_CREATE,
Marker::DELTA_EDGE_TYPE_INDEX_DROP,
Marker::DELTA_TEXT_INDEX_CREATE,
Marker::DELTA_TEXT_INDEX_DROP,
Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE,
Marker::DELTA_EXISTENCE_CONSTRAINT_DROP,
Marker::DELTA_UNIQUE_CONSTRAINT_CREATE,

View File

@ -44,6 +44,7 @@ struct RecoveredIndicesAndConstraints {
std::vector<std::pair<LabelId, LabelIndexStats>> label_stats;
std::vector<std::pair<LabelId, std::pair<PropertyId, LabelPropertyIndexStats>>> label_property_stats;
std::vector<EdgeTypeId> edge;
std::vector<std::pair<std::string, LabelId>> text_indices;
} indices;
struct ConstraintsMetadata {

View File

@ -353,6 +353,8 @@ std::optional<PropertyValue> Decoder::ReadPropertyValue() {
case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP:
case Marker::DELTA_EDGE_TYPE_INDEX_CREATE:
case Marker::DELTA_EDGE_TYPE_INDEX_DROP:
case Marker::DELTA_TEXT_INDEX_CREATE:
case Marker::DELTA_TEXT_INDEX_DROP:
case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE:
case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP:
case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE:
@ -459,6 +461,8 @@ bool Decoder::SkipPropertyValue() {
case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP:
case Marker::DELTA_EDGE_TYPE_INDEX_CREATE:
case Marker::DELTA_EDGE_TYPE_INDEX_DROP:
case Marker::DELTA_TEXT_INDEX_CREATE:
case Marker::DELTA_TEXT_INDEX_DROP:
case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE:
case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP:
case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE:

View File

@ -13,6 +13,8 @@
#include <thread>
#include "flags/experimental.hpp"
#include "flags/run_time_configurable.hpp"
#include "spdlog/spdlog.h"
#include "storage/v2/durability/exceptions.hpp"
#include "storage/v2/durability/paths.hpp"
@ -2004,6 +2006,24 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis
spdlog::info("Metadata of edge-type indices are recovered.");
}
// Recover text indices.
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
auto size = snapshot.ReadUint();
if (!size) throw RecoveryFailure("Couldn't recover the number of text indices!");
spdlog::info("Recovering metadata of {} text indices.", *size);
for (uint64_t i = 0; i < *size; ++i) {
auto index_name = snapshot.ReadString();
if (!index_name.has_value()) throw RecoveryFailure("Couldn't read text index name!");
auto label = snapshot.ReadUint();
if (!label) throw RecoveryFailure("Couldn't read text index label!");
AddRecoveredIndexConstraint(&indices_constraints.indices.text_indices,
{index_name.value(), get_label_from_id(*label)}, "The text index already exists!");
SPDLOG_TRACE("Recovered metadata of text index {} for :{}", index_name.value(),
name_id_mapper->IdToName(snapshot_id_map.at(*label)));
}
spdlog::info("Metadata of text indices are recovered.");
}
spdlog::info("Metadata of indices are recovered.");
}
@ -2493,6 +2513,16 @@ void CreateSnapshot(Storage *storage, Transaction *transaction, const std::files
write_mapping(item);
}
}
// Write text indices.
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
auto text_indices = storage->indices_.text_index_.ListIndices();
snapshot.WriteUint(text_indices.size());
for (const auto &[index_name, label] : text_indices) {
snapshot.WriteString(index_name);
write_mapping(label);
}
}
}
// Write constraints.

View File

@ -25,6 +25,8 @@ enum class StorageMetadataOperation {
LABEL_PROPERTY_INDEX_STATS_CLEAR,
EDGE_TYPE_INDEX_CREATE,
EDGE_TYPE_INDEX_DROP,
TEXT_INDEX_CREATE,
TEXT_INDEX_DROP,
EXISTENCE_CONSTRAINT_CREATE,
EXISTENCE_CONSTRAINT_DROP,
UNIQUE_CONSTRAINT_CREATE,

View File

@ -99,6 +99,10 @@ Marker OperationToMarker(StorageMetadataOperation operation) {
return Marker::DELTA_EDGE_TYPE_INDEX_CREATE;
case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP:
return Marker::DELTA_EDGE_TYPE_INDEX_DROP;
case StorageMetadataOperation::TEXT_INDEX_CREATE:
return Marker::DELTA_TEXT_INDEX_CREATE;
case StorageMetadataOperation::TEXT_INDEX_DROP:
return Marker::DELTA_TEXT_INDEX_DROP;
case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
return Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE;
case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:
@ -172,6 +176,10 @@ WalDeltaData::Type MarkerToWalDeltaDataType(Marker marker) {
return WalDeltaData::Type::LABEL_PROPERTY_INDEX_CREATE;
case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP:
return WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP;
case Marker::DELTA_TEXT_INDEX_CREATE:
return WalDeltaData::Type::TEXT_INDEX_CREATE;
case Marker::DELTA_TEXT_INDEX_DROP:
return WalDeltaData::Type::TEXT_INDEX_DROP;
case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_SET:
return WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET;
case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR:
@ -382,6 +390,21 @@ WalDeltaData ReadSkipWalDeltaData(BaseDecoder *decoder) {
if (!decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!");
}
}
break;
}
case WalDeltaData::Type::TEXT_INDEX_CREATE:
case WalDeltaData::Type::TEXT_INDEX_DROP: {
if constexpr (read_data) {
auto index_name = decoder->ReadString();
if (!index_name) throw RecoveryFailure("Invalid WAL data!");
delta.operation_text.index_name = std::move(*index_name);
auto label = decoder->ReadString();
if (!label) throw RecoveryFailure("Invalid WAL data!");
delta.operation_text.label = std::move(*label);
} else {
if (!decoder->SkipString() || !decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!");
}
break;
}
}
@ -529,6 +552,12 @@ bool operator==(const WalDeltaData &a, const WalDeltaData &b) {
case WalDeltaData::Type::LABEL_PROPERTY_INDEX_CREATE:
case WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP:
case WalDeltaData::Type::TEXT_INDEX_CREATE:
return a.operation_text.index_name == b.operation_text.index_name &&
a.operation_text.label == b.operation_text.label;
case WalDeltaData::Type::TEXT_INDEX_DROP:
return a.operation_text.index_name == b.operation_text.index_name &&
a.operation_text.label == b.operation_text.label;
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE:
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP:
return a.operation_label_property.label == b.operation_label_property.label &&
@ -675,7 +704,8 @@ void EncodeTransactionEnd(BaseEncoder *encoder, uint64_t timestamp) {
}
void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation,
LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats,
const std::optional<std::string> text_index_name, LabelId label,
const std::set<PropertyId> &properties, const LabelIndexStats &stats,
const LabelPropertyIndexStats &property_stats, uint64_t timestamp) {
encoder->WriteMarker(Marker::SECTION_DELTA);
encoder->WriteUint(timestamp);
@ -731,6 +761,14 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage
case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: {
MG_ASSERT(false, "Invalid function call!");
}
case StorageMetadataOperation::TEXT_INDEX_CREATE:
case StorageMetadataOperation::TEXT_INDEX_DROP: {
MG_ASSERT(text_index_name.has_value(), "Text indices must be named!");
encoder->WriteMarker(OperationToMarker(operation));
encoder->WriteString(text_index_name.value());
encoder->WriteString(name_id_mapper->IdToName(label.AsUint()));
break;
}
}
}
@ -752,6 +790,8 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage
case StorageMetadataOperation::LABEL_INDEX_STATS_SET:
case StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE:
case StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP:
case StorageMetadataOperation::TEXT_INDEX_CREATE:
case StorageMetadataOperation::TEXT_INDEX_DROP:
case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:
case StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET:
@ -1000,6 +1040,20 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst
"The label index stats doesn't exist!");
break;
}
case WalDeltaData::Type::TEXT_INDEX_CREATE: {
auto index_name = delta.operation_text.index_name;
auto label = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_text.label));
AddRecoveredIndexConstraint(&indices_constraints->indices.text_indices, {index_name, label},
"The text index already exists!");
break;
}
case WalDeltaData::Type::TEXT_INDEX_DROP: {
auto index_name = delta.operation_text.index_name;
auto label = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_text.label));
RemoveRecoveredIndexConstraint(&indices_constraints->indices.text_indices, {index_name, label},
"The text index doesn't exist!");
break;
}
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: {
auto label_id = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_label_property.label));
auto property_id = PropertyId::FromUint(name_id_mapper->NameToId(delta.operation_label_property.property));
@ -1148,10 +1202,11 @@ void WalFile::AppendTransactionEnd(uint64_t timestamp) {
UpdateStats(timestamp);
}
void WalFile::AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties,
const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats,
uint64_t timestamp) {
EncodeOperation(&wal_, name_id_mapper_, operation, label, properties, stats, property_stats, timestamp);
void WalFile::AppendOperation(StorageMetadataOperation operation, const std::optional<std::string> text_index_name,
LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats,
const LabelPropertyIndexStats &property_stats, uint64_t timestamp) {
EncodeOperation(&wal_, name_id_mapper_, operation, text_index_name, label, properties, stats, property_stats,
timestamp);
UpdateStats(timestamp);
}

View File

@ -69,6 +69,8 @@ struct WalDeltaData {
LABEL_PROPERTY_INDEX_STATS_CLEAR,
EDGE_INDEX_CREATE,
EDGE_INDEX_DROP,
TEXT_INDEX_CREATE,
TEXT_INDEX_DROP,
EXISTENCE_CONSTRAINT_CREATE,
EXISTENCE_CONSTRAINT_DROP,
UNIQUE_CONSTRAINT_CREATE,
@ -127,6 +129,11 @@ struct WalDeltaData {
std::string property;
std::string stats;
} operation_label_property_stats;
struct {
std::string index_name;
std::string label;
} operation_text;
};
bool operator==(const WalDeltaData &a, const WalDeltaData &b);
@ -163,6 +170,8 @@ constexpr bool IsWalDeltaDataTypeTransactionEndVersion15(const WalDeltaData::Typ
case WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR:
case WalDeltaData::Type::EDGE_INDEX_CREATE:
case WalDeltaData::Type::EDGE_INDEX_DROP:
case WalDeltaData::Type::TEXT_INDEX_CREATE:
case WalDeltaData::Type::TEXT_INDEX_DROP:
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE:
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP:
case WalDeltaData::Type::UNIQUE_CONSTRAINT_CREATE:
@ -213,7 +222,8 @@ void EncodeTransactionEnd(BaseEncoder *encoder, uint64_t timestamp);
/// Function used to encode non-transactional operation.
void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation,
LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats,
const std::optional<std::string> text_index_name, LabelId label,
const std::set<PropertyId> &properties, const LabelIndexStats &stats,
const LabelPropertyIndexStats &property_stats, uint64_t timestamp);
void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation,
@ -248,8 +258,9 @@ class WalFile {
void AppendTransactionEnd(uint64_t timestamp);
void AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties,
const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp);
void AppendOperation(StorageMetadataOperation operation, const std::optional<std::string> text_index_name,
LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats,
const LabelPropertyIndexStats &property_stats, uint64_t timestamp);
void AppendOperation(StorageMetadataOperation operation, EdgeTypeId edge_type, uint64_t timestamp);

View File

@ -16,6 +16,7 @@
#include "storage/v2/inmemory/edge_type_index.hpp"
#include "storage/v2/inmemory/label_index.hpp"
#include "storage/v2/inmemory/label_property_index.hpp"
#include "storage/v2/storage.hpp"
namespace memgraph::storage {

View File

@ -18,6 +18,7 @@
#include "storage/v2/indices/edge_type_index.hpp"
#include "storage/v2/indices/label_index.hpp"
#include "storage/v2/indices/label_property_index.hpp"
#include "storage/v2/indices/text_index.hpp"
#include "storage/v2/storage_mode.hpp"
namespace memgraph::storage {
@ -31,12 +32,12 @@ struct Indices {
Indices &operator=(Indices &&) = delete;
~Indices() = default;
/// This function should be called from garbage collection to clean-up the
/// This function should be called from garbage collection to clean up the
/// index.
/// TODO: unused in disk indices
void RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp, std::stop_token token) const;
/// Surgical removal of entries that was inserted this transaction
/// Surgical removal of entries that were inserted in this transaction
/// TODO: unused in disk indices
void AbortEntries(LabelId labelId, std::span<Vertex *const> vertices, uint64_t exact_start_timestamp) const;
void AbortEntries(PropertyId property, std::span<std::pair<PropertyValue, Vertex *> const> vertices,
@ -71,6 +72,7 @@ struct Indices {
std::unique_ptr<LabelIndex> label_index_;
std::unique_ptr<LabelPropertyIndex> label_property_index_;
std::unique_ptr<EdgeTypeIndex> edge_type_index_;
mutable TextIndex text_index_;
};
} // namespace memgraph::storage

View File

@ -0,0 +1,430 @@
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#include "storage/v2/indices/text_index.hpp"
#include "flags/experimental.hpp"
#include "flags/run_time_configurable.hpp"
#include "query/db_accessor.hpp"
#include "storage/v2/view.hpp"
#include "text_search.hpp"
namespace memgraph::storage {
std::string GetPropertyName(PropertyId prop_id, memgraph::query::DbAccessor *db) { return db->PropertyToName(prop_id); }
std::string GetPropertyName(PropertyId prop_id, NameIdMapper *name_id_mapper) {
return name_id_mapper->IdToName(prop_id.AsUint());
}
inline std::string TextIndex::MakeIndexPath(const std::filesystem::path &storage_dir, const std::string &index_name) {
return (storage_dir / kTextIndicesDirectory / index_name).string();
}
void TextIndex::CreateEmptyIndex(const std::filesystem::path &storage_dir, const std::string &index_name,
LabelId label) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
if (index_.contains(index_name)) {
throw query::TextSearchException("Text index \"{}\" already exists.", index_name);
}
try {
nlohmann::json mappings = {};
mappings["properties"] = {};
mappings["properties"]["metadata"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
mappings["properties"]["all"] = {{"type", "text"}, {"fast", true}, {"stored", true}, {"text", true}};
index_.emplace(index_name, TextIndexData{.context_ = mgcxx::text_search::create_index(
MakeIndexPath(storage_dir, index_name),
mgcxx::text_search::IndexConfig{.mappings = mappings.dump()}),
.scope_ = label});
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
label_to_index_.emplace(label, index_name);
}
template <typename T>
nlohmann::json TextIndex::SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver) {
nlohmann::json serialized_properties = nlohmann::json::value_t::object;
for (const auto &[prop_id, prop_value] : properties) {
switch (prop_value.type()) {
case PropertyValue::Type::Bool:
serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueBool();
break;
case PropertyValue::Type::Int:
serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueInt();
break;
case PropertyValue::Type::Double:
serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueDouble();
break;
case PropertyValue::Type::String:
serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueString();
break;
case PropertyValue::Type::Null:
case PropertyValue::Type::List:
case PropertyValue::Type::Map:
case PropertyValue::Type::TemporalData:
default:
continue;
}
}
return serialized_properties;
}
std::string TextIndex::StringifyProperties(const std::map<PropertyId, PropertyValue> &properties) {
std::vector<std::string> indexable_properties_as_string;
for (const auto &[_, prop_value] : properties) {
switch (prop_value.type()) {
case PropertyValue::Type::Bool:
indexable_properties_as_string.push_back(prop_value.ValueBool() ? "true" : "false");
break;
case PropertyValue::Type::Int:
indexable_properties_as_string.push_back(std::to_string(prop_value.ValueInt()));
break;
case PropertyValue::Type::Double:
indexable_properties_as_string.push_back(std::to_string(prop_value.ValueDouble()));
break;
case PropertyValue::Type::String:
indexable_properties_as_string.push_back(prop_value.ValueString());
break;
// NOTE: As the following types arent indexed in Tantivy, they dont appear in the property value string either.
case PropertyValue::Type::Null:
case PropertyValue::Type::List:
case PropertyValue::Type::Map:
case PropertyValue::Type::TemporalData:
default:
continue;
}
}
return utils::Join(indexable_properties_as_string, " ");
}
std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(const std::vector<LabelId> &labels) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
std::vector<mgcxx::text_search::Context *> applicable_text_indices;
for (const auto &label : labels) {
if (label_to_index_.contains(label)) {
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)).context_);
}
}
return applicable_text_indices;
}
void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
const std::string &property_values_as_str,
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) {
if (applicable_text_indices.empty()) {
return;
}
// NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties,
// an indexable document should be created for each applicable index.
nlohmann::json document = {};
document["data"] = properties;
document["all"] = property_values_as_str;
document["metadata"] = {};
document["metadata"]["gid"] = gid;
document["metadata"]["deleted"] = false;
document["metadata"]["is_node"] = true;
for (auto *index_context : applicable_text_indices) {
try {
mgcxx::text_search::add_document(
*index_context,
mgcxx::text_search::DocumentInput{
.data = document.dump(-1, ' ', false, nlohmann::json::error_handler_t::replace)},
kDoSkipCommit);
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
}
}
void TextIndex::CommitLoadedNodes(mgcxx::text_search::Context &index_context) {
// As CREATE TEXT INDEX (...) queries dont accumulate deltas, db_transactional_accessor_->Commit() does not reach
// the code area where changes to indices are committed. To get around that without needing to commit text indices
// after every such query, we commit here.
try {
mgcxx::text_search::commit(index_context);
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
}
void TextIndex::AddNode(
Vertex *vertex_after_update, NameIdMapper *name_id_mapper,
const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
auto applicable_text_indices =
maybe_applicable_text_indices.value_or(GetApplicableTextIndices(vertex_after_update->labels));
if (applicable_text_indices.empty()) {
return;
}
auto vertex_properties = vertex_after_update->properties.Properties();
LoadNodeToTextIndices(vertex_after_update->gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
StringifyProperties(vertex_properties), applicable_text_indices);
}
void TextIndex::UpdateNode(Vertex *vertex_after_update, NameIdMapper *name_id_mapper,
const std::vector<LabelId> &removed_labels) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
if (!removed_labels.empty()) {
auto indexes_to_remove_node_from = GetApplicableTextIndices(removed_labels);
RemoveNode(vertex_after_update, indexes_to_remove_node_from);
}
auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update->labels);
if (applicable_text_indices.empty()) return;
RemoveNode(vertex_after_update, applicable_text_indices);
AddNode(vertex_after_update, name_id_mapper, applicable_text_indices);
}
void TextIndex::RemoveNode(
Vertex *vertex_after_update,
const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
auto search_node_to_be_deleted =
mgcxx::text_search::SearchInput{.search_query = fmt::format("metadata.gid:{}", vertex_after_update->gid.AsInt())};
for (auto *index_context :
maybe_applicable_text_indices.value_or(GetApplicableTextIndices(vertex_after_update->labels))) {
try {
mgcxx::text_search::delete_document(*index_context, search_node_to_be_deleted, kDoSkipCommit);
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
}
}
void TextIndex::CreateIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label,
memgraph::query::DbAccessor *db) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
CreateEmptyIndex(storage_dir, index_name, label);
for (const auto &v : db->Vertices(View::NEW)) {
if (!v.HasLabel(View::NEW, label).GetValue()) {
continue;
}
auto vertex_properties = v.Properties(View::NEW).GetValue();
LoadNodeToTextIndices(v.Gid().AsInt(), SerializeProperties(vertex_properties, db),
StringifyProperties(vertex_properties), {&index_.at(index_name).context_});
}
CommitLoadedNodes(index_.at(index_name).context_);
}
void TextIndex::RecoverIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label,
memgraph::utils::SkipList<Vertex>::Accessor vertices, NameIdMapper *name_id_mapper) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
// Clear Tantivy-internal files if they exist from previous sessions
std::filesystem::remove_all(storage_dir / kTextIndicesDirectory / index_name);
CreateEmptyIndex(storage_dir, index_name, label);
for (const auto &v : vertices) {
if (std::find(v.labels.begin(), v.labels.end(), label) == v.labels.end()) {
continue;
}
auto vertex_properties = v.properties.Properties();
LoadNodeToTextIndices(v.gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
StringifyProperties(vertex_properties), {&index_.at(index_name).context_});
}
CommitLoadedNodes(index_.at(index_name).context_);
}
LabelId TextIndex::DropIndex(const std::filesystem::path &storage_dir, const std::string &index_name) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
if (!index_.contains(index_name)) {
throw query::TextSearchException("Text index \"{}\" doesnt exist.", index_name);
}
try {
mgcxx::text_search::drop_index(MakeIndexPath(storage_dir, index_name));
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
auto deleted_index_label = index_.at(index_name).scope_;
index_.erase(index_name);
std::erase_if(label_to_index_, [index_name](const auto &item) { return item.second == index_name; });
return deleted_index_label;
}
bool TextIndex::IndexExists(const std::string &index_name) const { return index_.contains(index_name); }
mgcxx::text_search::SearchOutput TextIndex::SearchGivenProperties(const std::string &index_name,
const std::string &search_query) {
try {
return mgcxx::text_search::search(
index_.at(index_name).context_,
mgcxx::text_search::SearchInput{.search_query = search_query, .return_fields = {"metadata"}});
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
return mgcxx::text_search::SearchOutput{};
}
mgcxx::text_search::SearchOutput TextIndex::RegexSearch(const std::string &index_name,
const std::string &search_query) {
try {
return mgcxx::text_search::regex_search(
index_.at(index_name).context_,
mgcxx::text_search::SearchInput{
.search_fields = {"all"}, .search_query = search_query, .return_fields = {"metadata"}});
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
return mgcxx::text_search::SearchOutput{};
}
mgcxx::text_search::SearchOutput TextIndex::SearchAllProperties(const std::string &index_name,
const std::string &search_query) {
try {
return mgcxx::text_search::search(
index_.at(index_name).context_,
mgcxx::text_search::SearchInput{
.search_fields = {"all"}, .search_query = search_query, .return_fields = {"metadata"}});
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
return mgcxx::text_search::SearchOutput{};
}
std::vector<Gid> TextIndex::Search(const std::string &index_name, const std::string &search_query,
text_search_mode search_mode) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
if (!index_.contains(index_name)) {
throw query::TextSearchException("Text index \"{}\" doesnt exist.", index_name);
}
mgcxx::text_search::SearchOutput search_results;
switch (search_mode) {
case text_search_mode::SPECIFIED_PROPERTIES:
search_results = SearchGivenProperties(index_name, search_query);
break;
case text_search_mode::REGEX:
search_results = RegexSearch(index_name, search_query);
break;
case text_search_mode::ALL_PROPERTIES:
search_results = SearchAllProperties(index_name, search_query);
break;
default:
throw query::TextSearchException(
"Unsupported search mode: please use one of text_search.search, text_search.search_all, or "
"text_search.regex_search.");
}
std::vector<Gid> found_nodes;
for (const auto &doc : search_results.docs) {
// The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing
// errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method.
std::string doc_string = doc.data.data();
doc_string.resize(doc.data.length());
auto doc_json = nlohmann::json::parse(doc_string);
found_nodes.push_back(storage::Gid::FromString(doc_json["metadata"]["gid"].dump()));
}
return found_nodes;
}
std::string TextIndex::Aggregate(const std::string &index_name, const std::string &search_query,
const std::string &aggregation_query) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
if (!index_.contains(index_name)) {
throw query::TextSearchException("Text index \"{}\" doesnt exist.", index_name);
}
mgcxx::text_search::DocumentOutput aggregation_result;
try {
aggregation_result = mgcxx::text_search::aggregate(
index_.at(index_name).context_,
mgcxx::text_search::SearchInput{
.search_fields = {"all"}, .search_query = search_query, .aggregation_query = aggregation_query});
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
// The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing
// errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method.
std::string result_string = aggregation_result.data.data();
result_string.resize(aggregation_result.data.length());
return result_string;
}
void TextIndex::Commit() {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
for (auto &[_, index_data] : index_) {
mgcxx::text_search::commit(index_data.context_);
}
}
void TextIndex::Rollback() {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException();
}
for (auto &[_, index_data] : index_) {
mgcxx::text_search::rollback(index_data.context_);
}
}
std::vector<std::pair<std::string, LabelId>> TextIndex::ListIndices() const {
std::vector<std::pair<std::string, LabelId>> ret;
ret.reserve(index_.size());
for (const auto &[index_name, index_data] : index_) {
ret.emplace_back(index_name, index_data.scope_);
}
return ret;
}
} // namespace memgraph::storage

View File

@ -0,0 +1,105 @@
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#pragma once
#include <json/json.hpp>
#include "mg_procedure.h"
#include "storage/v2/id_types.hpp"
#include "storage/v2/name_id_mapper.hpp"
#include "storage/v2/vertex.hpp"
#include "text_search.hpp"
namespace memgraph::query {
class DbAccessor;
}
namespace memgraph::storage {
struct TextIndexData {
mgcxx::text_search::Context context_;
LabelId scope_;
};
class TextIndex {
private:
static constexpr bool kDoSkipCommit = true;
static constexpr std::string_view kTextIndicesDirectory = "text_indices";
inline std::string MakeIndexPath(const std::filesystem::path &storage_dir, const std::string &index_name);
void CreateEmptyIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label);
template <typename T>
nlohmann::json SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver);
std::string StringifyProperties(const std::map<PropertyId, PropertyValue> &properties);
std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(const std::vector<LabelId> &labels);
void LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
const std::string &property_values_as_str,
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices);
void CommitLoadedNodes(mgcxx::text_search::Context &index_context);
mgcxx::text_search::SearchOutput SearchGivenProperties(const std::string &index_name,
const std::string &search_query);
mgcxx::text_search::SearchOutput RegexSearch(const std::string &index_name, const std::string &search_query);
mgcxx::text_search::SearchOutput SearchAllProperties(const std::string &index_name, const std::string &search_query);
public:
TextIndex() = default;
TextIndex(const TextIndex &) = delete;
TextIndex(TextIndex &&) = delete;
TextIndex &operator=(const TextIndex &) = delete;
TextIndex &operator=(TextIndex &&) = delete;
~TextIndex() = default;
std::map<std::string, TextIndexData> index_;
std::map<LabelId, std::string> label_to_index_;
void AddNode(
Vertex *vertex, NameIdMapper *name_id_mapper,
const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices = std::nullopt);
void UpdateNode(Vertex *vertex, NameIdMapper *name_id_mapper, const std::vector<LabelId> &removed_labels = {});
void RemoveNode(
Vertex *vertex,
const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices = std::nullopt);
void CreateIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label,
memgraph::query::DbAccessor *db);
void RecoverIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label,
memgraph::utils::SkipList<Vertex>::Accessor vertices, NameIdMapper *name_id_mapper);
LabelId DropIndex(const std::filesystem::path &storage_dir, const std::string &index_name);
bool IndexExists(const std::string &index_name) const;
std::vector<Gid> Search(const std::string &index_name, const std::string &search_query, text_search_mode search_mode);
std::string Aggregate(const std::string &index_name, const std::string &search_query,
const std::string &aggregation_query);
void Commit();
void Rollback();
std::vector<std::pair<std::string, LabelId>> ListIndices() const;
};
} // namespace memgraph::storage

View File

@ -15,6 +15,8 @@
#include <functional>
#include <optional>
#include "dbms/constants.hpp"
#include "flags/experimental.hpp"
#include "flags/run_time_configurable.hpp"
#include "memory/global_memory_control.hpp"
#include "storage/v2/durability/durability.hpp"
#include "storage/v2/durability/snapshot.hpp"
@ -890,6 +892,10 @@ utils::BasicResult<StorageManipulationError, void> InMemoryStorage::InMemoryAcce
commit_timestamp_.reset(); // We have aborted, hence we have not committed
return StorageManipulationError{*unique_constraint_violation};
}
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
mem_storage->indices_.text_index_.Commit();
}
}
is_transaction_active_ = false;
@ -1213,6 +1219,9 @@ void InMemoryStorage::InMemoryAccessor::Abort() {
for (auto const &[property, prop_vertices] : property_cleanup) {
storage_->indices_.AbortEntries(property, prop_vertices, transaction_.start_timestamp);
}
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
storage_->indices_.text_index_.Rollback();
}
// VERTICES
{
@ -1846,6 +1855,7 @@ StorageInfo InMemoryStorage::GetInfo(memgraph::replication_coordination_glue::Re
const auto &lbl = access->ListAllIndices();
info.label_indices = lbl.label.size();
info.label_property_indices = lbl.label_property.size();
info.text_indices = lbl.text_indices.size();
const auto &con = access->ListAllConstraints();
info.existence_constraints = con.existence.size();
info.unique_constraints = con.unique.size();
@ -2107,6 +2117,16 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final
AppendToWalDataDefinition(durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR, info.label,
final_commit_timestamp);
} break;
case MetadataDelta::Action::TEXT_INDEX_CREATE: {
const auto &info = md_delta.text_index;
AppendToWalDataDefinition(durability::StorageMetadataOperation::TEXT_INDEX_CREATE, info.index_name, info.label,
final_commit_timestamp);
} break;
case MetadataDelta::Action::TEXT_INDEX_DROP: {
const auto &info = md_delta.text_index;
AppendToWalDataDefinition(durability::StorageMetadataOperation::TEXT_INDEX_DROP, info.index_name, info.label,
final_commit_timestamp);
} break;
case MetadataDelta::Action::EXISTENCE_CONSTRAINT_CREATE: {
const auto &info = md_delta.label_property;
AppendToWalDataDefinition(durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE, info.label,
@ -2137,11 +2157,13 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final
return repl_storage_state_.FinalizeTransaction(final_commit_timestamp, this, std::move(db_acc));
}
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation,
const std::optional<std::string> text_index_name, LabelId label,
const std::set<PropertyId> &properties, LabelIndexStats stats,
LabelPropertyIndexStats property_stats,
uint64_t final_commit_timestamp) {
wal_file_->AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp);
wal_file_->AppendOperation(operation, text_index_name, label, properties, stats, property_stats,
final_commit_timestamp);
repl_storage_state_.AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp);
}
@ -2155,12 +2177,13 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera
const std::set<PropertyId> &properties,
LabelPropertyIndexStats property_stats,
uint64_t final_commit_timestamp) {
return AppendToWalDataDefinition(operation, label, properties, {}, property_stats, final_commit_timestamp);
return AppendToWalDataDefinition(operation, std::nullopt, label, properties, {}, property_stats,
final_commit_timestamp);
}
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
LabelIndexStats stats, uint64_t final_commit_timestamp) {
return AppendToWalDataDefinition(operation, label, {}, stats, {}, final_commit_timestamp);
return AppendToWalDataDefinition(operation, std::nullopt, label, {}, stats, {}, final_commit_timestamp);
}
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
@ -2174,6 +2197,12 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera
return AppendToWalDataDefinition(operation, label, {}, {}, final_commit_timestamp);
}
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation,
const std::optional<std::string> text_index_name, LabelId label,
uint64_t final_commit_timestamp) {
return AppendToWalDataDefinition(operation, text_index_name, label, {}, {}, {}, final_commit_timestamp);
}
utils::BasicResult<InMemoryStorage::CreateSnapshotError> InMemoryStorage::CreateSnapshot(
memgraph::replication_coordination_glue::ReplicationRole replication_role) {
using memgraph::replication_coordination_glue::ReplicationRole;
@ -2301,7 +2330,9 @@ IndicesInfo InMemoryStorage::InMemoryAccessor::ListAllIndices() const {
auto *mem_label_property_index =
static_cast<InMemoryLabelPropertyIndex *>(in_memory->indices_.label_property_index_.get());
auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(in_memory->indices_.edge_type_index_.get());
return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices()};
auto &text_index = storage_->indices_.text_index_;
return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices(),
text_index.ListIndices()};
}
ConstraintsInfo InMemoryStorage::InMemoryAccessor::ListAllConstraints() const {
const auto *mem_storage = static_cast<InMemoryStorage *>(storage_);

View File

@ -398,7 +398,7 @@ class InMemoryStorage final : public Storage {
StorageInfo GetBaseInfo() override;
StorageInfo GetInfo(memgraph::replication_coordination_glue::ReplicationRole replication_role) override;
/// Return true in all cases excepted if any sync replicas have not sent confirmation.
/// Return true in all cases except if any sync replicas have not sent confirmation.
[[nodiscard]] bool AppendToWal(const Transaction &transaction, uint64_t final_commit_timestamp,
DatabaseAccessProtector db_acc);
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
@ -412,9 +412,13 @@ class InMemoryStorage final : public Storage {
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
const std::set<PropertyId> &properties, LabelPropertyIndexStats property_stats,
uint64_t final_commit_timestamp);
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation,
const std::optional<std::string> text_index_name, LabelId label,
const std::set<PropertyId> &properties, LabelIndexStats stats,
LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp);
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation,
const std::optional<std::string> text_index_name, LabelId label,
uint64_t final_commit_timestamp);
uint64_t CommitTimestamp(std::optional<uint64_t> desired_commit_timestamp = {});

View File

@ -37,6 +37,8 @@ struct MetadataDelta {
LABEL_PROPERTY_INDEX_STATS_CLEAR,
EDGE_INDEX_CREATE,
EDGE_INDEX_DROP,
TEXT_INDEX_CREATE,
TEXT_INDEX_DROP,
EXISTENCE_CONSTRAINT_CREATE,
EXISTENCE_CONSTRAINT_DROP,
UNIQUE_CONSTRAINT_CREATE,
@ -63,6 +65,10 @@ struct MetadataDelta {
} edge_index_create;
static constexpr struct EdgeIndexDrop {
} edge_index_drop;
static constexpr struct TextIndexCreate {
} text_index_create;
static constexpr struct TextIndexDrop {
} text_index_drop;
static constexpr struct ExistenceConstraintCreate {
} existence_constraint_create;
static constexpr struct ExistenceConstraintDrop {
@ -98,6 +104,12 @@ struct MetadataDelta {
MetadataDelta(EdgeIndexDrop /*tag*/, EdgeTypeId edge_type) : action(Action::EDGE_INDEX_DROP), edge_type(edge_type) {}
MetadataDelta(TextIndexCreate /*tag*/, std::string index_name, LabelId label)
: action(Action::TEXT_INDEX_CREATE), text_index{index_name, label} {}
MetadataDelta(TextIndexDrop /*tag*/, std::string index_name, LabelId label)
: action(Action::TEXT_INDEX_DROP), text_index{index_name, label} {}
MetadataDelta(ExistenceConstraintCreate /*tag*/, LabelId label, PropertyId property)
: action(Action::EXISTENCE_CONSTRAINT_CREATE), label_property{label, property} {}
@ -127,6 +139,8 @@ struct MetadataDelta {
case Action::LABEL_PROPERTY_INDEX_STATS_CLEAR:
case Action::EDGE_INDEX_CREATE:
case Action::EDGE_INDEX_DROP:
case Action::TEXT_INDEX_CREATE:
case Action::TEXT_INDEX_DROP:
case Action::EXISTENCE_CONSTRAINT_CREATE:
case Action::EXISTENCE_CONSTRAINT_DROP:
break;
@ -164,6 +178,11 @@ struct MetadataDelta {
PropertyId property;
LabelPropertyIndexStats stats;
} label_property_stats;
struct {
std::string index_name;
LabelId label;
} text_index;
};
};

View File

@ -118,7 +118,7 @@ enum class Type : uint8_t {
STRING = 0x50,
LIST = 0x60,
MAP = 0x70,
TEMPORAL_DATA = 0x80
TEMPORAL_DATA = 0x80,
};
const uint8_t kMaskType = 0xf0;

View File

@ -406,8 +406,9 @@ void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operati
const std::set<PropertyId> &properties, const LabelIndexStats &stats,
const LabelPropertyIndexStats &property_stats, uint64_t timestamp) {
replication::Encoder encoder(stream_.GetBuilder());
EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, label, properties, stats, property_stats,
timestamp);
// NOTE: Text search doesnt have replication in scope yet (Phases 1 and 2) -> text index name not sent here
EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, std::nullopt, label, properties, stats,
property_stats, timestamp);
}
void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operation, EdgeTypeId edge_type,

View File

@ -13,6 +13,8 @@
#include "absl/container/flat_hash_set.h"
#include "spdlog/spdlog.h"
#include "flags/experimental.hpp"
#include "flags/run_time_configurable.hpp"
#include "storage/v2/disk/name_id_mapper.hpp"
#include "storage/v2/storage.hpp"
#include "storage/v2/transaction.hpp"
@ -273,6 +275,12 @@ Storage::Accessor::DetachDelete(std::vector<VertexAccessor *> nodes, std::vector
return maybe_deleted_vertices.GetError();
}
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
for (auto *node : nodes_to_delete) {
storage_->indices_.text_index_.RemoveNode(node);
}
}
auto deleted_vertices = maybe_deleted_vertices.GetValue();
return std::make_optional<ReturnType>(std::move(deleted_vertices), std::move(deleted_edges));
@ -543,4 +551,19 @@ void Storage::Accessor::MarkEdgeAsDeleted(Edge *edge) {
}
}
void Storage::Accessor::CreateTextIndex(const std::string &index_name, LabelId label, query::DbAccessor *db) {
MG_ASSERT(unique_guard_.owns_lock(), "Creating a text index requires unique access to storage!");
storage_->indices_.text_index_.CreateIndex(storage_->config_.durability.storage_directory, index_name, label, db);
transaction_.md_deltas.emplace_back(MetadataDelta::text_index_create, index_name, label);
memgraph::metrics::IncrementCounter(memgraph::metrics::ActiveTextIndices);
}
void Storage::Accessor::DropTextIndex(const std::string &index_name) {
MG_ASSERT(unique_guard_.owns_lock(), "Dropping a text index requires unique access to storage!");
auto deleted_index_label =
storage_->indices_.text_index_.DropIndex(storage_->config_.durability.storage_directory, index_name);
transaction_.md_deltas.emplace_back(MetadataDelta::text_index_drop, index_name, deleted_index_label);
memgraph::metrics::DecrementCounter(memgraph::metrics::ActiveTextIndices);
}
} // namespace memgraph::storage

View File

@ -20,6 +20,7 @@
#include "io/network/endpoint.hpp"
#include "kvstore/kvstore.hpp"
#include "mg_procedure.h"
#include "query/exceptions.hpp"
#include "replication/config.hpp"
#include "replication/replication_server.hpp"
@ -53,6 +54,7 @@ extern const Event SnapshotCreationLatency_us;
extern const Event ActiveLabelIndices;
extern const Event ActiveLabelPropertyIndices;
extern const Event ActiveTextIndices;
} // namespace memgraph::metrics
namespace memgraph::storage {
@ -63,6 +65,7 @@ struct IndicesInfo {
std::vector<LabelId> label;
std::vector<std::pair<LabelId, PropertyId>> label_property;
std::vector<EdgeTypeId> edge_type;
std::vector<std::pair<std::string, LabelId>> text_indices;
};
struct ConstraintsInfo {
@ -78,6 +81,7 @@ struct StorageInfo {
uint64_t disk_usage;
uint64_t label_indices;
uint64_t label_property_indices;
uint64_t text_indices;
uint64_t existence_constraints;
uint64_t unique_constraints;
StorageMode storage_mode;
@ -95,6 +99,7 @@ static inline nlohmann::json ToJson(const StorageInfo &info) {
res["disk"] = info.disk_usage;
res["label_indices"] = info.label_indices;
res["label_prop_indices"] = info.label_property_indices;
res["text_indices"] = info.text_indices;
res["existence_constraints"] = info.existence_constraints;
res["unique_constraints"] = info.unique_constraints;
res["storage_mode"] = storage::StorageModeToString(info.storage_mode);
@ -232,6 +237,28 @@ class Storage {
virtual bool EdgeTypeIndexExists(EdgeTypeId edge_type) const = 0;
bool TextIndexExists(const std::string &index_name) const {
return storage_->indices_.text_index_.IndexExists(index_name);
}
void TextIndexAddVertex(const VertexAccessor &vertex) {
storage_->indices_.text_index_.AddNode(vertex.vertex_, storage_->name_id_mapper_.get());
}
void TextIndexUpdateVertex(const VertexAccessor &vertex, const std::vector<LabelId> &removed_labels = {}) {
storage_->indices_.text_index_.UpdateNode(vertex.vertex_, storage_->name_id_mapper_.get(), removed_labels);
}
std::vector<Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query,
text_search_mode search_mode) const {
return storage_->indices_.text_index_.Search(index_name, search_query, search_mode);
}
std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query,
const std::string &aggregation_query) const {
return storage_->indices_.text_index_.Aggregate(index_name, search_query, aggregation_query);
}
virtual IndicesInfo ListAllIndices() const = 0;
virtual ConstraintsInfo ListAllConstraints() const = 0;
@ -284,6 +311,10 @@ class Storage {
virtual utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(EdgeTypeId edge_type) = 0;
void CreateTextIndex(const std::string &index_name, LabelId label, query::DbAccessor *db);
void DropTextIndex(const std::string &index_name);
virtual utils::BasicResult<StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint(
LabelId label, PropertyId property) = 0;

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -60,6 +60,7 @@
\
M(ActiveLabelIndices, Index, "Number of active label indices in the system.") \
M(ActiveLabelPropertyIndices, Index, "Number of active label property indices in the system.") \
M(ActiveTextIndices, Index, "Number of active text indices in the system.") \
\
M(StreamsCreated, Stream, "Number of Streams created.") \
M(MessagesConsumed, Stream, "Number of consumed streamed messages.") \

View File

@ -187,6 +187,7 @@ enum class TypeId : uint64_t {
AST_PROFILE_QUERY,
AST_INDEX_QUERY,
AST_EDGE_INDEX_QUERY,
AST_TEXT_INDEX_QUERY,
AST_CREATE,
AST_CALL_PROCEDURE,
AST_MATCH,

View File

@ -226,6 +226,6 @@ startup_config_dict = {
"experimental_enabled": (
"",
"",
"Experimental features to be used, comma seperated. Options [system-replication, high-availability]",
"Experimental features to be used, comma-separated. Options [system-replication, text-search, high-availability]",
),
}

View File

@ -0,0 +1,6 @@
function(copy_text_search_e2e_python_files FILE_NAME)
copy_e2e_python_files(text_search ${FILE_NAME})
endfunction()
copy_text_search_e2e_python_files(common.py)
copy_text_search_e2e_python_files(test_text_search.py)

View File

@ -0,0 +1,87 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import typing
import mgclient
import pytest
from gqlalchemy import Memgraph
def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]:
cursor.execute(query, params)
return cursor.fetchall()
@pytest.fixture
def connect(**kwargs) -> mgclient.Connection:
connection = mgclient.connect(host="localhost", port=7687, **kwargs)
connection.autocommit = True
cursor = connection.cursor()
execute_and_fetch_all(cursor, """USE DATABASE memgraph""")
try:
execute_and_fetch_all(cursor, """DROP DATABASE clean""")
except:
pass
execute_and_fetch_all(cursor, """MATCH (n) DETACH DELETE n""")
yield connection
@pytest.fixture
def memgraph(**kwargs) -> Memgraph:
memgraph = Memgraph()
yield memgraph
memgraph.drop_database()
memgraph.drop_indexes()
@pytest.fixture
def memgraph_with_text_indexed_data(**kwargs) -> Memgraph:
memgraph = Memgraph()
memgraph.execute(
"""CREATE (:Document {title: "Rules2024", version: 1, fulltext: "random works", date: date("2023-11-14")});"""
)
memgraph.execute(
"""CREATE (:Document {title: "Rules2023", version: 9, fulltext: "text Rules2024", date: date("2023-11-14")});"""
)
memgraph.execute(
"""CREATE (:Document:Revision {title: "Rules2024", version: 2, fulltext: "random words", date: date("2023-12-15")});"""
)
memgraph.execute("""CREATE (:Revision {title: "OperationSchema", version: 3, date: date("2023-10-01")});""")
memgraph.execute("""CREATE TEXT INDEX complianceDocuments ON :Document;""")
yield memgraph
memgraph.execute("""DROP TEXT INDEX complianceDocuments;""")
memgraph.drop_database()
memgraph.drop_indexes()
@pytest.fixture
def memgraph_with_mixed_data(**kwargs) -> Memgraph:
memgraph = Memgraph()
memgraph.execute(
"""CREATE (:Document:Revision {title: "Rules2024", version: 1, date: date("2023-11-14"), contents: "Lorem ipsum dolor sit amet"});"""
)
memgraph.execute(
"""CREATE (:Revision {title: "Rules2024", version: 2, date: date("2023-12-15"), contents: "consectetur adipiscing elit"});"""
)
memgraph.execute("""CREATE TEXT INDEX complianceDocuments ON :Document;""")
yield memgraph
memgraph.execute("""DROP TEXT INDEX complianceDocuments;""")
memgraph.drop_database()
memgraph.drop_indexes()

View File

@ -0,0 +1,206 @@
# Copyright 2024 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import json
import re
import sys
import gqlalchemy
import mgclient
import pytest
from common import memgraph, memgraph_with_mixed_data, memgraph_with_text_indexed_data
GET_RULES_2024_DOCUMENT = """CALL libtext.search("complianceDocuments", "data.title:Rules2024") YIELD node
RETURN node.title AS title, node.version AS version
ORDER BY version ASC, title ASC;"""
def test_create_index(memgraph):
memgraph.execute("""CREATE TEXT INDEX exampleIndex ON :Document;""")
index_info = memgraph.execute_and_fetch("""SHOW INDEX INFO""")
assert list(index_info) == [
{"index type": "text (name: exampleIndex)", "label": "Document", "property": None, "count": None}
]
def test_drop_index(memgraph):
memgraph.execute("""DROP TEXT INDEX exampleIndex;""")
index_info = memgraph.execute_and_fetch("""SHOW INDEX INFO""")
assert list(index_info) == []
def test_create_existing_index(memgraph):
memgraph.execute("""CREATE TEXT INDEX duplicatedIndex ON :Document;""")
with pytest.raises(
gqlalchemy.exceptions.GQLAlchemyDatabaseError, match='Text index "duplicatedIndex" already exists.'
) as _:
memgraph.execute("""CREATE TEXT INDEX duplicatedIndex ON :Document;""")
memgraph.execute("""DROP TEXT INDEX duplicatedIndex;""") # cleanup
def test_drop_nonexistent_index(memgraph):
with pytest.raises(
gqlalchemy.exceptions.GQLAlchemyDatabaseError, match='Text index "noSuchIndex" doesnt exist.'
) as _:
memgraph.execute("""DROP TEXT INDEX noSuchIndex;""")
def test_text_search_given_property(memgraph_with_text_indexed_data):
result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
assert len(result) == 2 and result == [{"title": "Rules2024", "version": 1}, {"title": "Rules2024", "version": 2}]
def test_text_search_all_properties(memgraph_with_text_indexed_data):
SEARCH_QUERY = "Rules2024"
ALL_PROPERTIES_QUERY = f"""CALL libtext.search_all("complianceDocuments", "{SEARCH_QUERY}") YIELD node
RETURN node
ORDER BY node.version ASC, node.title ASC;"""
result = list(memgraph_with_text_indexed_data.execute_and_fetch(ALL_PROPERTIES_QUERY))
result_nodes = [record["node"] for record in result]
assert len(result) == 3 and (
result_nodes[0].title == SEARCH_QUERY
and result_nodes[1].title == SEARCH_QUERY
and SEARCH_QUERY in result_nodes[2].fulltext
)
def test_regex_text_search(memgraph_with_text_indexed_data):
REGEX_QUERY = """CALL libtext.regex_search("complianceDocuments", "wor.*s") YIELD node
RETURN node
ORDER BY node.version ASC, node.title ASC;"""
result = list(memgraph_with_text_indexed_data.execute_and_fetch(REGEX_QUERY))
assert (
len(result) == 2
and re.search("wor.*s", result[0]["node"].fulltext)
and re.search("wor.*s", result[1]["node"].fulltext)
# In this test, all values matching the regex string are found in the .node property only ^
)
def test_text_search_aggregate(memgraph_with_text_indexed_data):
input_aggregation = json.dumps({"count": {"value_count": {"field": "metadata.gid"}}}, separators=(",", ":"))
expected_aggregation = json.dumps({"count": {"value": 2.0}}, separators=(",", ":"))
AGGREGATION_QUERY = f"""CALL libtext.aggregate("complianceDocuments", "data.title:Rules2024", '{input_aggregation}')
YIELD aggregation
RETURN aggregation;"""
result = list(memgraph_with_text_indexed_data.execute_and_fetch(AGGREGATION_QUERY))
assert len(result) == 1 and result[0]["aggregation"] == expected_aggregation
def test_text_search_query_boolean(memgraph_with_text_indexed_data):
BOOLEAN_QUERY = """CALL libtext.search("complianceDocuments", "(data.title:Rules2023 OR data.title:Rules2024) AND data.fulltext:words") YIELD node
RETURN node.title AS title, node.version AS version
ORDER BY version ASC, title ASC;"""
result = list(memgraph_with_text_indexed_data.execute_and_fetch(BOOLEAN_QUERY))
assert len(result) == 1 and result == [{"title": "Rules2024", "version": 2}]
def test_create_indexed_node(memgraph_with_text_indexed_data):
memgraph_with_text_indexed_data.execute("""CREATE (:Document {title: "Rules2024", version: 3});""")
result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
assert len(result) == 3 and result == [
{"title": "Rules2024", "version": 1},
{"title": "Rules2024", "version": 2},
{"title": "Rules2024", "version": 3},
]
def test_delete_indexed_node(memgraph_with_text_indexed_data):
memgraph_with_text_indexed_data.execute("""MATCH (n:Document {title: "Rules2024", version: 2}) DETACH DELETE n;""")
result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
assert len(result) == 1 and result == [{"title": "Rules2024", "version": 1}]
def test_add_indexed_label(memgraph_with_mixed_data):
memgraph_with_mixed_data.execute("""MATCH (n:Revision {version:2}) SET n:Document;""")
result = list(memgraph_with_mixed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
assert len(result) == 2 and result == [{"title": "Rules2024", "version": 1}, {"title": "Rules2024", "version": 2}]
def test_remove_indexed_label(memgraph_with_mixed_data):
memgraph_with_mixed_data.execute("""MATCH (n:Document {version: 1}) REMOVE n:Document;""")
result = list(memgraph_with_mixed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
assert len(result) == 0
def test_update_text_property_of_indexed_node(memgraph_with_text_indexed_data):
memgraph_with_text_indexed_data.execute("""MATCH (n:Document {version:1}) SET n.title = "Rules2030";""")
result = list(
memgraph_with_text_indexed_data.execute_and_fetch(
"""CALL libtext.search("complianceDocuments", "data.title:Rules2030") YIELD node
RETURN node.title AS title, node.version AS version
ORDER BY version ASC, title ASC;"""
)
)
assert len(result) == 1 and result == [{"title": "Rules2030", "version": 1}]
def test_add_unindexable_property_to_indexed_node(memgraph_with_text_indexed_data):
try:
memgraph_with_text_indexed_data.execute("""MATCH (n:Document {version:1}) SET n.randomList = [2, 3, 4, 5];""")
except Exception:
assert False
def test_remove_indexable_property_from_indexed_node(memgraph_with_text_indexed_data):
try:
memgraph_with_text_indexed_data.execute(
"""MATCH (n:Document {version:1}) REMOVE n.title, n.version, n.fulltext, n.date;"""
)
except Exception:
assert False
def test_remove_unindexable_property_from_indexed_node(memgraph_with_text_indexed_data):
try:
memgraph_with_text_indexed_data.execute_and_fetch(
"""MATCH (n:Document {date: date("2023-12-15")}) REMOVE n.date;"""
)
except Exception:
assert False
def test_text_search_nonexistent_index(memgraph_with_text_indexed_data):
NONEXISTENT_INDEX_QUERY = """CALL libtext.search("noSuchIndex", "data.fulltext:words") YIELD node
RETURN node.title AS title, node.version AS version
ORDER BY version ASC, title ASC;"""
with pytest.raises(mgclient.DatabaseError, match='Text index "noSuchIndex" doesnt exist.') as _:
list(memgraph_with_text_indexed_data.execute_and_fetch(NONEXISTENT_INDEX_QUERY))
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -0,0 +1,69 @@
# Copyright 2024 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import json
import sys
import gqlalchemy
import pytest
from common import memgraph
TEXT_SEARCH_DISABLED_ERROR = (
"To use text indices and text search, start Memgraph with the experimental text search feature enabled."
)
def test_create_index(memgraph):
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
memgraph.execute("""CREATE TEXT INDEX exampleIndex ON :Document;""")
def test_drop_index(memgraph):
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
memgraph.execute("""DROP TEXT INDEX exampleIndex;""")
def test_text_search_given_property(memgraph):
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
memgraph.execute(
"""CALL libtext.search("complianceDocuments", "data.title:Rules2024") YIELD node
RETURN node;"""
)
def test_text_search_all_properties(memgraph):
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
memgraph.execute(
"""CALL libtext.search_all("complianceDocuments", "Rules2024") YIELD node
RETURN node;"""
)
def test_regex_text_search(memgraph):
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
memgraph.execute(
"""CALL libtext.regex_search("complianceDocuments", "wor.*s") YIELD node
RETURN node;"""
)
def test_text_search_aggregate(memgraph):
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
input_aggregation = json.dumps({"count": {"value_count": {"field": "metadata.gid"}}}, separators=(",", ":"))
memgraph.execute(
f"""CALL libtext.aggregate("complianceDocuments", "wor.*s", '{input_aggregation}') YIELD aggregation
RETURN aggregation;"""
)
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -0,0 +1,33 @@
text_search_cluster: &text_search_cluster
cluster:
main:
args:
[
"--bolt-port",
"7687",
"--log-level=TRACE",
"--experimental-enabled=text-search",
]
log_file: "text_search.log"
setup_queries: []
validation_queries: []
text_search_disabled_cluster: &text_search_disabled_cluster
cluster:
main:
args: ["--bolt-port", "7687", "--log-level=TRACE"]
log_file: "text_search.log"
setup_queries: []
validation_queries: []
workloads:
- name: "Test behavior of text search in Memgraph"
binary: "tests/e2e/pytest_runner.sh"
proc: "tests/e2e/text_search/query_modules/"
args: ["text_search/test_text_search.py"]
<<: *text_search_cluster
- name: "Test behavior of text search in Memgraph when disabled"
binary: "tests/e2e/pytest_runner.sh"
proc: "tests/e2e/text_search/query_modules/"
args: ["text_search/test_text_search_disabled.py"]
<<: *text_search_disabled_cluster

View File

@ -71,6 +71,11 @@ struct DatabaseState {
std::string property;
};
struct TextItem {
std::string index_name;
std::string label;
};
struct LabelPropertiesItem {
std::string label;
std::set<std::string, std::less<>> properties;
@ -80,6 +85,7 @@ struct DatabaseState {
std::set<Edge> edges;
std::set<LabelItem> label_indices;
std::set<LabelPropertyItem> label_property_indices;
std::set<TextItem> text_indices;
std::set<LabelPropertyItem> existence_constraints;
std::set<LabelPropertiesItem> unique_constraints;
};
@ -106,6 +112,10 @@ bool operator<(const DatabaseState::LabelPropertyItem &first, const DatabaseStat
return first.property < second.property;
}
bool operator<(const DatabaseState::TextItem &first, const DatabaseState::TextItem &second) {
return first.index_name < second.index_name && first.label < second.label;
}
bool operator<(const DatabaseState::LabelPropertiesItem &first, const DatabaseState::LabelPropertiesItem &second) {
if (first.label != second.label) return first.label < second.label;
return first.properties < second.properties;
@ -128,6 +138,10 @@ bool operator==(const DatabaseState::LabelPropertyItem &first, const DatabaseSta
return first.label == second.label && first.property == second.property;
}
bool operator==(const DatabaseState::TextItem &first, const DatabaseState::TextItem &second) {
return first.index_name == second.index_name && first.label == second.label;
}
bool operator==(const DatabaseState::LabelPropertiesItem &first, const DatabaseState::LabelPropertiesItem &second) {
return first.label == second.label && first.properties == second.properties;
}
@ -185,6 +199,7 @@ DatabaseState GetState(memgraph::storage::Storage *db) {
// Capture all indices
std::set<DatabaseState::LabelItem> label_indices;
std::set<DatabaseState::LabelPropertyItem> label_property_indices;
std::set<DatabaseState::TextItem> text_indices;
{
auto info = dba->ListAllIndices();
for (const auto &item : info.label) {
@ -193,6 +208,9 @@ DatabaseState GetState(memgraph::storage::Storage *db) {
for (const auto &item : info.label_property) {
label_property_indices.insert({dba->LabelToName(item.first), dba->PropertyToName(item.second)});
}
for (const auto &item : info.text_indices) {
text_indices.insert({item.first, dba->LabelToName(item.second)});
}
}
// Capture all constraints
@ -212,7 +230,8 @@ DatabaseState GetState(memgraph::storage::Storage *db) {
}
}
return {vertices, edges, label_indices, label_property_indices, existence_constraints, unique_constraints};
return {vertices, edges, label_indices, label_property_indices, text_indices, existence_constraints,
unique_constraints};
}
auto Execute(memgraph::query::InterpreterContext *context, memgraph::dbms::DatabaseAccess db,

View File

@ -358,6 +358,8 @@ TEST_F(DecoderEncoderTest, PropertyValueInvalidMarker) {
case memgraph::storage::durability::Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR:
case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_CREATE:
case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_DROP:
case memgraph::storage::durability::Marker::DELTA_TEXT_INDEX_CREATE:
case memgraph::storage::durability::Marker::DELTA_TEXT_INDEX_DROP:
case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE:
case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_DROP:
case memgraph::storage::durability::Marker::DELTA_UNIQUE_CONSTRAINT_CREATE:

View File

@ -146,6 +146,7 @@ TYPED_TEST(InfoTest, InfoCheck) {
ASSERT_LT(info.disk_usage, 1000'000);
ASSERT_EQ(info.label_indices, 1);
ASSERT_EQ(info.label_property_indices, 1);
ASSERT_EQ(info.text_indices, 0);
ASSERT_EQ(info.existence_constraints, 0);
ASSERT_EQ(info.unique_constraints, 2);
ASSERT_EQ(info.storage_mode, this->mode);

View File

@ -53,6 +53,10 @@ memgraph::storage::durability::WalDeltaData::Type StorageMetadataOperationToWalD
return memgraph::storage::durability::WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET;
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR:
return memgraph::storage::durability::WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR;
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE:
return memgraph::storage::durability::WalDeltaData::Type::TEXT_INDEX_CREATE;
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP:
return memgraph::storage::durability::WalDeltaData::Type::TEXT_INDEX_DROP;
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
return memgraph::storage::durability::WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE;
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:
@ -252,7 +256,7 @@ class DeltaGenerator final {
ASSERT_TRUE(false) << "Unexpected statistics operation!";
}
}
wal_file_.AppendOperation(operation, label_id, property_ids, l_stats, lp_stats, timestamp_);
wal_file_.AppendOperation(operation, std::nullopt, label_id, property_ids, l_stats, lp_stats, timestamp_);
if (valid_) {
UpdateStats(timestamp_, 1);
memgraph::storage::durability::WalDeltaData data;
@ -271,6 +275,8 @@ class DeltaGenerator final {
break;
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE:
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP:
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE:
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP:
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:
data.operation_label_property.label = label;
@ -313,6 +319,8 @@ class DeltaGenerator final {
case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_STATS_SET:
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE:
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP:
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE:
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP:
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:;
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET: