Add text search: * named property search * all-property search * regex search * aggregation over search results Text search works with: * non-parallel transactions * durability (WAL files and snapshots) * multitenancy
This commit is contained in:
parent
2ac649f3b5
commit
9629f10166
@ -326,6 +326,21 @@ inline mgp_vertex *graph_get_vertex_by_id(mgp_graph *g, mgp_vertex_id id, mgp_me
|
||||
return MgInvoke<mgp_vertex *>(mgp_graph_get_vertex_by_id, g, id, memory);
|
||||
}
|
||||
|
||||
inline bool graph_has_text_index(mgp_graph *graph, const char *index_name) {
|
||||
return MgInvoke<int>(mgp_graph_has_text_index, graph, index_name);
|
||||
}
|
||||
|
||||
inline mgp_map *graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
|
||||
text_search_mode search_mode, mgp_memory *memory) {
|
||||
return MgInvoke<mgp_map *>(mgp_graph_search_text_index, graph, index_name, search_query, search_mode, memory);
|
||||
}
|
||||
|
||||
inline mgp_map *graph_aggregate_over_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
|
||||
const char *aggregation_query, mgp_memory *memory) {
|
||||
return MgInvoke<mgp_map *>(mgp_graph_aggregate_over_text_index, graph, index_name, search_query, aggregation_query,
|
||||
memory);
|
||||
}
|
||||
|
||||
inline mgp_vertices_iterator *graph_iter_vertices(mgp_graph *g, mgp_memory *memory) {
|
||||
return MgInvoke<mgp_vertices_iterator *>(mgp_graph_iter_vertices, g, memory);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -891,6 +891,36 @@ enum mgp_error mgp_edge_iter_properties(struct mgp_edge *e, struct mgp_memory *m
|
||||
enum mgp_error mgp_graph_get_vertex_by_id(struct mgp_graph *g, struct mgp_vertex_id id, struct mgp_memory *memory,
|
||||
struct mgp_vertex **result);
|
||||
|
||||
/// Result is non-zero if the index with the given name exists.
|
||||
/// The current implementation always returns without errors.
|
||||
enum mgp_error mgp_graph_has_text_index(struct mgp_graph *graph, const char *index_name, int *result);
|
||||
|
||||
/// Available modes of searching text indices.
|
||||
MGP_ENUM_CLASS text_search_mode{
|
||||
SPECIFIED_PROPERTIES,
|
||||
REGEX,
|
||||
ALL_PROPERTIES,
|
||||
};
|
||||
|
||||
/// Search the named text index for the given query. The result is a map with the "search_results" and "error_msg" keys.
|
||||
/// The "search_results" key contains the vertices whose text-indexed properties match the given query.
|
||||
/// In case of a Tantivy error, the "search_results" key is absent, and "error_msg" contains the error message.
|
||||
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if there’s an allocation error while constructing the results map.
|
||||
/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once.
|
||||
enum mgp_error mgp_graph_search_text_index(struct mgp_graph *graph, const char *index_name, const char *search_query,
|
||||
enum text_search_mode search_mode, struct mgp_memory *memory,
|
||||
struct mgp_map **result);
|
||||
|
||||
/// Aggregate over the results of a search over the named text index. The result is a map with the "aggregation_results"
|
||||
/// and "error_msg" keys.
|
||||
/// The "aggregation_results" key contains the vertices whose text-indexed properties match the given query.
|
||||
/// In case of a Tantivy error, the "aggregation_results" key is absent, and "error_msg" contains the error message.
|
||||
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if there’s an allocation error while constructing the results map.
|
||||
/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once.
|
||||
enum mgp_error mgp_graph_aggregate_over_text_index(struct mgp_graph *graph, const char *index_name,
|
||||
const char *search_query, const char *aggregation_query,
|
||||
struct mgp_memory *memory, struct mgp_map **result);
|
||||
|
||||
/// Creates label index for given label.
|
||||
/// mgp_error::MGP_ERROR_NO_ERROR is always returned.
|
||||
/// if label index already exists, result will be 0, otherwise 1.
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -32,6 +32,15 @@
|
||||
|
||||
namespace mgp {
|
||||
|
||||
class TextSearchException : public std::exception {
|
||||
public:
|
||||
explicit TextSearchException(std::string message) : message_(std::move(message)) {}
|
||||
const char *what() const noexcept override { return message_.c_str(); }
|
||||
|
||||
private:
|
||||
std::string message_;
|
||||
};
|
||||
|
||||
class IndexException : public std::exception {
|
||||
public:
|
||||
explicit IndexException(std::string message) : message_(std::move(message)) {}
|
||||
@ -4306,12 +4315,12 @@ inline void AddParamsReturnsToProc(mgp_proc *proc, std::vector<Parameter> ¶m
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
inline bool CreateLabelIndex(mgp_graph *memgaph_graph, const std::string_view label) {
|
||||
return create_label_index(memgaph_graph, label.data());
|
||||
inline bool CreateLabelIndex(mgp_graph *memgraph_graph, const std::string_view label) {
|
||||
return create_label_index(memgraph_graph, label.data());
|
||||
}
|
||||
|
||||
inline bool DropLabelIndex(mgp_graph *memgaph_graph, const std::string_view label) {
|
||||
return drop_label_index(memgaph_graph, label.data());
|
||||
inline bool DropLabelIndex(mgp_graph *memgraph_graph, const std::string_view label) {
|
||||
return drop_label_index(memgraph_graph, label.data());
|
||||
}
|
||||
|
||||
inline List ListAllLabelIndices(mgp_graph *memgraph_graph) {
|
||||
@ -4322,14 +4331,14 @@ inline List ListAllLabelIndices(mgp_graph *memgraph_graph) {
|
||||
return List(label_indices);
|
||||
}
|
||||
|
||||
inline bool CreateLabelPropertyIndex(mgp_graph *memgaph_graph, const std::string_view label,
|
||||
inline bool CreateLabelPropertyIndex(mgp_graph *memgraph_graph, const std::string_view label,
|
||||
const std::string_view property) {
|
||||
return create_label_property_index(memgaph_graph, label.data(), property.data());
|
||||
return create_label_property_index(memgraph_graph, label.data(), property.data());
|
||||
}
|
||||
|
||||
inline bool DropLabelPropertyIndex(mgp_graph *memgaph_graph, const std::string_view label,
|
||||
inline bool DropLabelPropertyIndex(mgp_graph *memgraph_graph, const std::string_view label,
|
||||
const std::string_view property) {
|
||||
return drop_label_property_index(memgaph_graph, label.data(), property.data());
|
||||
return drop_label_property_index(memgraph_graph, label.data(), property.data());
|
||||
}
|
||||
|
||||
inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) {
|
||||
@ -4340,6 +4349,58 @@ inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) {
|
||||
return List(label_property_indices);
|
||||
}
|
||||
|
||||
namespace {
|
||||
constexpr std::string_view kErrorMsgKey = "error_msg";
|
||||
constexpr std::string_view kSearchResultsKey = "search_results";
|
||||
constexpr std::string_view kAggregationResultsKey = "aggregation_results";
|
||||
} // namespace
|
||||
|
||||
inline List SearchTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, std::string_view search_query,
|
||||
text_search_mode search_mode) {
|
||||
auto results_or_error = Map(mgp::MemHandlerCallback(graph_search_text_index, memgraph_graph, index_name.data(),
|
||||
search_query.data(), search_mode));
|
||||
if (results_or_error.KeyExists(kErrorMsgKey)) {
|
||||
if (!results_or_error.At(kErrorMsgKey).IsString()) {
|
||||
throw TextSearchException{"The error message is not a string!"};
|
||||
}
|
||||
throw TextSearchException(results_or_error.At(kErrorMsgKey).ValueString().data());
|
||||
}
|
||||
|
||||
if (!results_or_error.KeyExists(kSearchResultsKey)) {
|
||||
throw TextSearchException{"Incomplete text index search results!"};
|
||||
}
|
||||
|
||||
if (!results_or_error.At(kSearchResultsKey).IsList()) {
|
||||
throw TextSearchException{"Text index search results have wrong type!"};
|
||||
}
|
||||
|
||||
return results_or_error.At(kSearchResultsKey).ValueList();
|
||||
}
|
||||
|
||||
inline std::string_view AggregateOverTextIndex(mgp_graph *memgraph_graph, std::string_view index_name,
|
||||
std::string_view search_query, std::string_view aggregation_query) {
|
||||
auto results_or_error =
|
||||
Map(mgp::MemHandlerCallback(graph_aggregate_over_text_index, memgraph_graph, index_name.data(),
|
||||
search_query.data(), aggregation_query.data()));
|
||||
|
||||
if (results_or_error.KeyExists(kErrorMsgKey)) {
|
||||
if (!results_or_error.At(kErrorMsgKey).IsString()) {
|
||||
throw TextSearchException{"The error message is not a string!"};
|
||||
}
|
||||
throw TextSearchException(results_or_error.At(kErrorMsgKey).ValueString().data());
|
||||
}
|
||||
|
||||
if (!results_or_error.KeyExists(kAggregationResultsKey)) {
|
||||
throw TextSearchException{"Incomplete text index aggregation results!"};
|
||||
}
|
||||
|
||||
if (!results_or_error.At(kAggregationResultsKey).IsString()) {
|
||||
throw TextSearchException{"Text index aggregation results have wrong type!"};
|
||||
}
|
||||
|
||||
return results_or_error.At(kAggregationResultsKey).ValueString();
|
||||
}
|
||||
|
||||
inline bool CreateExistenceConstraint(mgp_graph *memgraph_graph, const std::string_view label,
|
||||
const std::string_view property) {
|
||||
return create_existence_constraint(memgraph_graph, label.data(), property.data());
|
||||
|
@ -295,6 +295,32 @@ set_path_external_library(jemalloc STATIC
|
||||
|
||||
import_header_library(rangev3 ${CMAKE_CURRENT_SOURCE_DIR}/rangev3/include)
|
||||
|
||||
ExternalProject_Add(mgcxx-proj
|
||||
PREFIX mgcxx-proj
|
||||
GIT_REPOSITORY https://github.com/memgraph/mgcxx
|
||||
GIT_TAG "v0.0.4"
|
||||
CMAKE_ARGS
|
||||
"-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
|
||||
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
|
||||
"-DENABLE_TESTS=OFF"
|
||||
INSTALL_DIR "${PROJECT_BINARY_DIR}/mgcxx"
|
||||
)
|
||||
ExternalProject_Get_Property(mgcxx-proj install_dir)
|
||||
set(MGCXX_ROOT ${install_dir})
|
||||
|
||||
add_library(tantivy_text_search STATIC IMPORTED GLOBAL)
|
||||
add_dependencies(tantivy_text_search mgcxx-proj)
|
||||
set_property(TARGET tantivy_text_search PROPERTY IMPORTED_LOCATION ${MGCXX_ROOT}/lib/libtantivy_text_search.a)
|
||||
|
||||
add_library(mgcxx_text_search STATIC IMPORTED GLOBAL)
|
||||
add_dependencies(mgcxx_text_search mgcxx-proj)
|
||||
set_property(TARGET mgcxx_text_search PROPERTY IMPORTED_LOCATION ${MGCXX_ROOT}/lib/libmgcxx_text_search.a)
|
||||
# We need to create the include directory first in order to be able to add it
|
||||
# as an include directory. The header files in the include directory will be
|
||||
# generated later during the build process.
|
||||
file(MAKE_DIRECTORY ${MGCXX_ROOT}/include)
|
||||
set_property(TARGET mgcxx_text_search PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${MGCXX_ROOT}/include)
|
||||
|
||||
# Setup NuRaft
|
||||
import_external_library(nuraft STATIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nuraft/lib/libnuraft.a
|
||||
|
@ -6,6 +6,8 @@ project(memgraph_query_modules)
|
||||
|
||||
disallow_in_source_build()
|
||||
|
||||
find_package(fmt REQUIRED)
|
||||
|
||||
# Everything that is installed here, should be under the "query_modules" component.
|
||||
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "query_modules")
|
||||
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
|
||||
@ -58,6 +60,22 @@ install(PROGRAMS $<TARGET_FILE:schema>
|
||||
# Also install the source of the example, so user can read it.
|
||||
install(FILES schema.cpp DESTINATION lib/memgraph/query_modules/src)
|
||||
|
||||
add_library(text SHARED text_search_module.cpp)
|
||||
target_include_directories(text PRIVATE ${CMAKE_SOURCE_DIR}/include)
|
||||
target_compile_options(text PRIVATE -Wall)
|
||||
target_link_libraries(text PRIVATE -static-libgcc -static-libstdc++ fmt::fmt)
|
||||
# Strip C++ example in release build.
|
||||
if (lower_build_type STREQUAL "release")
|
||||
add_custom_command(TARGET text POST_BUILD
|
||||
COMMAND strip -s $<TARGET_FILE:text>
|
||||
COMMENT "Stripping symbols and sections from the C++ text_search module")
|
||||
endif()
|
||||
install(PROGRAMS $<TARGET_FILE:text>
|
||||
DESTINATION lib/memgraph/query_modules
|
||||
RENAME text.so)
|
||||
# Also install the source of the example, so user can read it.
|
||||
install(FILES text_search_module.cpp DESTINATION lib/memgraph/query_modules/src)
|
||||
|
||||
# Install the Python example and modules
|
||||
install(FILES example.py DESTINATION lib/memgraph/query_modules RENAME py_example.py)
|
||||
install(FILES graph_analyzer.py DESTINATION lib/memgraph/query_modules)
|
||||
|
149
query_modules/text_search_module.cpp
Normal file
149
query_modules/text_search_module.cpp
Normal file
@ -0,0 +1,149 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <mgp.hpp>
|
||||
|
||||
namespace TextSearch {
|
||||
constexpr std::string_view kProcedureSearch = "search";
|
||||
constexpr std::string_view kProcedureRegexSearch = "regex_search";
|
||||
constexpr std::string_view kProcedureSearchAllProperties = "search_all";
|
||||
constexpr std::string_view kProcedureAggregate = "aggregate";
|
||||
constexpr std::string_view kParameterIndexName = "index_name";
|
||||
constexpr std::string_view kParameterSearchQuery = "search_query";
|
||||
constexpr std::string_view kParameterAggregationQuery = "aggregation_query";
|
||||
constexpr std::string_view kReturnNode = "node";
|
||||
constexpr std::string_view kReturnAggregation = "aggregation";
|
||||
const std::string kSearchAllPrefix = "all";
|
||||
|
||||
void Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
|
||||
void RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
|
||||
void SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
|
||||
void Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
|
||||
} // namespace TextSearch
|
||||
|
||||
void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
|
||||
mgp::MemoryDispatcherGuard guard{memory};
|
||||
const auto record_factory = mgp::RecordFactory(result);
|
||||
auto arguments = mgp::List(args);
|
||||
|
||||
try {
|
||||
const auto *index_name = arguments[0].ValueString().data();
|
||||
const auto *search_query = arguments[1].ValueString().data();
|
||||
for (const auto &node :
|
||||
mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::SPECIFIED_PROPERTIES)) {
|
||||
auto record = record_factory.NewRecord();
|
||||
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
record_factory.SetErrorMessage(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void TextSearch::RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
|
||||
mgp::MemoryDispatcherGuard guard{memory};
|
||||
const auto record_factory = mgp::RecordFactory(result);
|
||||
auto arguments = mgp::List(args);
|
||||
|
||||
try {
|
||||
const auto *index_name = arguments[0].ValueString().data();
|
||||
const auto *search_query = arguments[1].ValueString().data();
|
||||
for (const auto &node : mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::REGEX)) {
|
||||
auto record = record_factory.NewRecord();
|
||||
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
record_factory.SetErrorMessage(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void TextSearch::SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result,
|
||||
mgp_memory *memory) {
|
||||
mgp::MemoryDispatcherGuard guard{memory};
|
||||
const auto record_factory = mgp::RecordFactory(result);
|
||||
auto arguments = mgp::List(args);
|
||||
|
||||
try {
|
||||
const auto *index_name = arguments[0].ValueString().data();
|
||||
const auto *search_query = fmt::format("{}:{}", kSearchAllPrefix, arguments[1].ValueString()).data();
|
||||
for (const auto &node :
|
||||
mgp::SearchTextIndex(memgraph_graph, index_name, search_query, text_search_mode::ALL_PROPERTIES)) {
|
||||
auto record = record_factory.NewRecord();
|
||||
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
record_factory.SetErrorMessage(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void TextSearch::Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
|
||||
mgp::MemoryDispatcherGuard guard{memory};
|
||||
const auto record_factory = mgp::RecordFactory(result);
|
||||
auto arguments = mgp::List(args);
|
||||
|
||||
try {
|
||||
const auto *index_name = arguments[0].ValueString().data();
|
||||
const auto *search_query = arguments[1].ValueString().data();
|
||||
const auto *aggregation_query = arguments[2].ValueString().data();
|
||||
const auto aggregation_result =
|
||||
mgp::AggregateOverTextIndex(memgraph_graph, index_name, search_query, aggregation_query);
|
||||
auto record = record_factory.NewRecord();
|
||||
record.Insert(TextSearch::kReturnAggregation.data(), aggregation_result.data());
|
||||
} catch (const std::exception &e) {
|
||||
record_factory.SetErrorMessage(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) {
|
||||
try {
|
||||
mgp::MemoryDispatcherGuard guard{memory};
|
||||
|
||||
AddProcedure(TextSearch::Search, TextSearch::kProcedureSearch, mgp::ProcedureType::Read,
|
||||
{
|
||||
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
|
||||
mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String),
|
||||
},
|
||||
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
|
||||
|
||||
AddProcedure(TextSearch::RegexSearch, TextSearch::kProcedureRegexSearch, mgp::ProcedureType::Read,
|
||||
{
|
||||
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
|
||||
mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String),
|
||||
},
|
||||
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
|
||||
|
||||
AddProcedure(TextSearch::SearchAllProperties, TextSearch::kProcedureSearchAllProperties, mgp::ProcedureType::Read,
|
||||
{
|
||||
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
|
||||
mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String),
|
||||
},
|
||||
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
|
||||
|
||||
AddProcedure(TextSearch::Aggregate, TextSearch::kProcedureAggregate, mgp::ProcedureType::Read,
|
||||
{
|
||||
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
|
||||
mgp::Parameter(TextSearch::kParameterSearchQuery, mgp::Type::String),
|
||||
mgp::Parameter(TextSearch::kParameterAggregationQuery, mgp::Type::String),
|
||||
},
|
||||
{mgp::Return(TextSearch::kReturnAggregation, mgp::Type::String)}, module, memory);
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "Error while initializing query module: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" int mgp_shutdown_module() { return 0; }
|
@ -45,7 +45,7 @@ set(mg_single_node_v2_sources
|
||||
add_executable(memgraph ${mg_single_node_v2_sources})
|
||||
target_include_directories(memgraph PUBLIC ${CMAKE_SOURCE_DIR}/include)
|
||||
target_link_libraries(memgraph stdc++fs Threads::Threads
|
||||
mg-telemetry mg-communication mg-communication-metrics mg-memory mg-utils mg-license mg-settings mg-glue mg-flags mg::system mg::replication_handler)
|
||||
mg-telemetry mgcxx_text_search tantivy_text_search mg-communication mg-communication-metrics mg-memory mg-utils mg-license mg-settings mg-glue mg-flags mg::system mg::replication_handler)
|
||||
|
||||
# NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which
|
||||
# should be dynamically exported, so that `dlopen` can correctly link th
|
||||
|
@ -311,7 +311,7 @@ class DbmsHandler {
|
||||
stats.triggers += info.triggers;
|
||||
stats.streams += info.streams;
|
||||
++stats.num_databases;
|
||||
stats.indices += storage_info.label_indices + storage_info.label_property_indices;
|
||||
stats.indices += storage_info.label_indices + storage_info.label_property_indices + storage_info.text_indices;
|
||||
stats.constraints += storage_info.existence_constraints + storage_info.unique_constraints;
|
||||
++stats.storage_modes[(int)storage_info.storage_mode];
|
||||
++stats.isolation_levels[(int)storage_info.isolation_level];
|
||||
|
@ -615,6 +615,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage
|
||||
auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW);
|
||||
if (!vertex)
|
||||
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
|
||||
// NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2)
|
||||
auto ret = vertex->AddLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label));
|
||||
if (ret.HasError() || !ret.GetValue())
|
||||
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
|
||||
@ -627,6 +628,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage
|
||||
auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW);
|
||||
if (!vertex)
|
||||
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
|
||||
// NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2)
|
||||
auto ret = vertex->RemoveLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label));
|
||||
if (ret.HasError() || !ret.GetValue())
|
||||
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
|
||||
@ -640,6 +642,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage
|
||||
auto vertex = transaction->FindVertex(delta.vertex_edge_set_property.gid, View::NEW);
|
||||
if (!vertex)
|
||||
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
|
||||
// NOTE: Phase 1 of the text search feature doesn't have replication in scope
|
||||
auto ret = vertex->SetProperty(transaction->NameToProperty(delta.vertex_edge_set_property.property),
|
||||
delta.vertex_edge_set_property.value);
|
||||
if (ret.HasError())
|
||||
@ -853,6 +856,14 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage
|
||||
throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__);
|
||||
break;
|
||||
}
|
||||
case WalDeltaData::Type::TEXT_INDEX_CREATE: {
|
||||
// NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2)
|
||||
break;
|
||||
}
|
||||
case WalDeltaData::Type::TEXT_INDEX_DROP: {
|
||||
// NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2)
|
||||
break;
|
||||
}
|
||||
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: {
|
||||
spdlog::trace(" Create existence constraint on :{} ({})", delta.operation_label_property.label,
|
||||
delta.operation_label_property.property);
|
||||
|
@ -18,14 +18,15 @@
|
||||
|
||||
// Bolt server flags.
|
||||
// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables)
|
||||
DEFINE_string(experimental_enabled, "",
|
||||
"Experimental features to be used, comma seperated. Options [system-replication, high-availability]");
|
||||
|
||||
DEFINE_string(
|
||||
experimental_enabled, "",
|
||||
"Experimental features to be used, comma-separated. Options [system-replication, text-search, high-availability]");
|
||||
using namespace std::string_view_literals;
|
||||
|
||||
namespace memgraph::flags {
|
||||
|
||||
auto const mapping = std::map{std::pair{"system-replication"sv, Experiments::SYSTEM_REPLICATION},
|
||||
std::pair{"text-search"sv, Experiments::TEXT_SEARCH},
|
||||
std::pair{"high-availability"sv, Experiments::HIGH_AVAILABILITY}};
|
||||
|
||||
auto ExperimentsInstance() -> Experiments & {
|
||||
@ -45,7 +46,7 @@ bool AreExperimentsEnabled(Experiments experiments) {
|
||||
void InitializeExperimental() {
|
||||
namespace rv = ranges::views;
|
||||
|
||||
auto const connonicalize_string = [](auto &&rng) {
|
||||
auto const canonicalize_string = [](auto &&rng) {
|
||||
auto const is_space = [](auto c) { return c == ' '; };
|
||||
auto const to_lower = [](unsigned char c) { return std::tolower(c); };
|
||||
|
||||
@ -56,7 +57,7 @@ void InitializeExperimental() {
|
||||
auto const mapping_end = mapping.cend();
|
||||
using underlying_type = std::underlying_type_t<Experiments>;
|
||||
auto to_set = underlying_type{};
|
||||
for (auto &&experiment : FLAGS_experimental_enabled | rv::split(',') | rv::transform(connonicalize_string)) {
|
||||
for (auto &&experiment : FLAGS_experimental_enabled | rv::split(',') | rv::transform(canonicalize_string)) {
|
||||
if (auto it = mapping.find(experiment); it != mapping_end) {
|
||||
to_set |= static_cast<underlying_type>(it->second);
|
||||
}
|
||||
|
@ -23,7 +23,8 @@ namespace memgraph::flags {
|
||||
// old experiments can be reused once code cleanup has happened
|
||||
enum class Experiments : uint8_t {
|
||||
SYSTEM_REPLICATION = 1 << 0,
|
||||
HIGH_AVAILABILITY = 1 << 1,
|
||||
TEXT_SEARCH = 1 << 1,
|
||||
HIGH_AVAILABILITY = 1 << 2,
|
||||
};
|
||||
|
||||
bool AreExperimentsEnabled(Experiments experiments);
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -73,11 +73,11 @@ constexpr auto kLogToStderrGFlagsKey = "also_log_to_stderr";
|
||||
constexpr auto kCartesianProductEnabledSettingKey = "cartesian-product-enabled";
|
||||
constexpr auto kCartesianProductEnabledGFlagsKey = "cartesian-product-enabled";
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
std::atomic<double> execution_timeout_sec_; // Local cache-like thing
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
std::atomic<bool> cartesian_product_enabled_{true}; // Local cache-like thing
|
||||
// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
// Local cache-like thing
|
||||
std::atomic<double> execution_timeout_sec_;
|
||||
std::atomic<bool> cartesian_product_enabled_{true};
|
||||
// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
|
||||
auto ToLLEnum(std::string_view val) {
|
||||
const auto ll_enum = memgraph::flags::LogLevelToEnum(val);
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
|
@ -634,6 +634,24 @@ class DbAccessor final {
|
||||
|
||||
bool EdgeTypeIndexExists(storage::EdgeTypeId edge_type) const { return accessor_->EdgeTypeIndexExists(edge_type); }
|
||||
|
||||
bool TextIndexExists(const std::string &index_name) const { return accessor_->TextIndexExists(index_name); }
|
||||
|
||||
void TextIndexAddVertex(const VertexAccessor &vertex) { accessor_->TextIndexAddVertex(vertex.impl_); }
|
||||
|
||||
void TextIndexUpdateVertex(const VertexAccessor &vertex, const std::vector<storage::LabelId> &removed_labels = {}) {
|
||||
accessor_->TextIndexUpdateVertex(vertex.impl_, removed_labels);
|
||||
}
|
||||
|
||||
std::vector<storage::Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query,
|
||||
text_search_mode search_mode) const {
|
||||
return accessor_->TextIndexSearch(index_name, search_query, search_mode);
|
||||
}
|
||||
|
||||
std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query,
|
||||
const std::string &aggregation_query) const {
|
||||
return accessor_->TextIndexAggregate(index_name, search_query, aggregation_query);
|
||||
}
|
||||
|
||||
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const {
|
||||
return accessor_->GetIndexStats(label);
|
||||
}
|
||||
@ -717,6 +735,12 @@ class DbAccessor final {
|
||||
return accessor_->DropIndex(edge_type);
|
||||
}
|
||||
|
||||
void CreateTextIndex(const std::string &index_name, storage::LabelId label) {
|
||||
accessor_->CreateTextIndex(index_name, label, this);
|
||||
}
|
||||
|
||||
void DropTextIndex(const std::string &index_name) { accessor_->DropTextIndex(index_name); }
|
||||
|
||||
utils::BasicResult<storage::StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint(
|
||||
storage::LabelId label, storage::PropertyId property) {
|
||||
return accessor_->CreateExistenceConstraint(label, property);
|
||||
|
@ -252,6 +252,10 @@ void DumpLabelPropertyIndex(std::ostream *os, query::DbAccessor *dba, storage::L
|
||||
<< ");";
|
||||
}
|
||||
|
||||
void DumpTextIndex(std::ostream *os, query::DbAccessor *dba, const std::string &index_name, storage::LabelId label) {
|
||||
*os << "CREATE TEXT INDEX " << EscapeName(index_name) << " ON :" << EscapeName(dba->LabelToName(label)) << ";";
|
||||
}
|
||||
|
||||
void DumpExistenceConstraint(std::ostream *os, query::DbAccessor *dba, storage::LabelId label,
|
||||
storage::PropertyId property) {
|
||||
*os << "CREATE CONSTRAINT ON (u:" << EscapeName(dba->LabelToName(label)) << ") ASSERT EXISTS (u."
|
||||
@ -286,6 +290,8 @@ PullPlanDump::PullPlanDump(DbAccessor *dba, dbms::DatabaseAccess db_acc)
|
||||
CreateLabelIndicesPullChunk(),
|
||||
// Dump all label property indices
|
||||
CreateLabelPropertyIndicesPullChunk(),
|
||||
// Dump all text indices
|
||||
CreateTextIndicesPullChunk(),
|
||||
// Dump all existence constraints
|
||||
CreateExistenceConstraintsPullChunk(),
|
||||
// Dump all unique constraints
|
||||
@ -412,6 +418,34 @@ PullPlanDump::PullChunk PullPlanDump::CreateLabelPropertyIndicesPullChunk() {
|
||||
};
|
||||
}
|
||||
|
||||
PullPlanDump::PullChunk PullPlanDump::CreateTextIndicesPullChunk() {
|
||||
// Dump all text indices
|
||||
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
|
||||
// Delay the construction of indices vectors
|
||||
if (!indices_info_) {
|
||||
indices_info_.emplace(dba_->ListAllIndices());
|
||||
}
|
||||
const auto &text = indices_info_->text_indices;
|
||||
|
||||
size_t local_counter = 0;
|
||||
while (global_index < text.size() && (!n || local_counter < *n)) {
|
||||
std::ostringstream os;
|
||||
const auto &text_index = text[global_index];
|
||||
DumpTextIndex(&os, dba_, text_index.first, text_index.second);
|
||||
stream->Result({TypedValue(os.str())});
|
||||
|
||||
++global_index;
|
||||
++local_counter;
|
||||
}
|
||||
|
||||
if (global_index == text.size()) {
|
||||
return local_counter;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
};
|
||||
}
|
||||
|
||||
PullPlanDump::PullChunk PullPlanDump::CreateExistenceConstraintsPullChunk() {
|
||||
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
|
||||
// Delay the construction of constraint vectors
|
||||
|
@ -55,6 +55,7 @@ struct PullPlanDump {
|
||||
|
||||
PullChunk CreateLabelIndicesPullChunk();
|
||||
PullChunk CreateLabelPropertyIndicesPullChunk();
|
||||
PullChunk CreateTextIndicesPullChunk();
|
||||
PullChunk CreateExistenceConstraintsPullChunk();
|
||||
PullChunk CreateUniqueConstraintsPullChunk();
|
||||
PullChunk CreateInternalIndexPullChunk();
|
||||
|
@ -433,4 +433,17 @@ class MultiDatabaseQueryInMulticommandTxException : public QueryException {
|
||||
SPECIALIZE_GET_EXCEPTION_NAME(MultiDatabaseQueryInMulticommandTxException)
|
||||
};
|
||||
|
||||
class TextSearchException : public QueryException {
|
||||
using QueryException::QueryException;
|
||||
SPECIALIZE_GET_EXCEPTION_NAME(TextSearchException)
|
||||
};
|
||||
|
||||
class TextSearchDisabledException : public TextSearchException {
|
||||
public:
|
||||
TextSearchDisabledException()
|
||||
: TextSearchException(
|
||||
"To use text indices and text search, start Memgraph with the experimental text search feature enabled.") {}
|
||||
SPECIALIZE_GET_EXCEPTION_NAME(TextSearchDisabledException)
|
||||
};
|
||||
|
||||
} // namespace memgraph::query
|
||||
|
@ -189,6 +189,9 @@ constexpr utils::TypeInfo query::IndexQuery::kType{utils::TypeId::AST_INDEX_QUER
|
||||
constexpr utils::TypeInfo query::EdgeIndexQuery::kType{utils::TypeId::AST_EDGE_INDEX_QUERY, "EdgeIndexQuery",
|
||||
&query::Query::kType};
|
||||
|
||||
constexpr utils::TypeInfo query::TextIndexQuery::kType{utils::TypeId::AST_TEXT_INDEX_QUERY, "TextIndexQuery",
|
||||
&query::Query::kType};
|
||||
|
||||
constexpr utils::TypeInfo query::Create::kType{utils::TypeId::AST_CREATE, "Create", &query::Clause::kType};
|
||||
|
||||
constexpr utils::TypeInfo query::CallProcedure::kType{utils::TypeId::AST_CALL_PROCEDURE, "CallProcedure",
|
||||
|
@ -2273,6 +2273,37 @@ class EdgeIndexQuery : public memgraph::query::Query {
|
||||
friend class AstStorage;
|
||||
};
|
||||
|
||||
class TextIndexQuery : public memgraph::query::Query {
|
||||
public:
|
||||
static const utils::TypeInfo kType;
|
||||
const utils::TypeInfo &GetTypeInfo() const override { return kType; }
|
||||
|
||||
enum class Action { CREATE, DROP };
|
||||
|
||||
TextIndexQuery() = default;
|
||||
|
||||
DEFVISITABLE(QueryVisitor<void>);
|
||||
|
||||
memgraph::query::TextIndexQuery::Action action_;
|
||||
memgraph::query::LabelIx label_;
|
||||
std::string index_name_;
|
||||
|
||||
TextIndexQuery *Clone(AstStorage *storage) const override {
|
||||
TextIndexQuery *object = storage->Create<TextIndexQuery>();
|
||||
object->action_ = action_;
|
||||
object->label_ = storage->GetLabelIx(label_.name);
|
||||
object->index_name_ = index_name_;
|
||||
return object;
|
||||
}
|
||||
|
||||
protected:
|
||||
TextIndexQuery(Action action, LabelIx label, std::string index_name)
|
||||
: action_(action), label_(std::move(label)), index_name_(index_name) {}
|
||||
|
||||
private:
|
||||
friend class AstStorage;
|
||||
};
|
||||
|
||||
class Create : public memgraph::query::Clause {
|
||||
public:
|
||||
static const utils::TypeInfo kType;
|
||||
|
@ -83,6 +83,7 @@ class ExplainQuery;
|
||||
class ProfileQuery;
|
||||
class IndexQuery;
|
||||
class EdgeIndexQuery;
|
||||
class TextIndexQuery;
|
||||
class DatabaseInfoQuery;
|
||||
class SystemInfoQuery;
|
||||
class ConstraintQuery;
|
||||
@ -144,11 +145,11 @@ class ExpressionVisitor
|
||||
|
||||
template <class TResult>
|
||||
class QueryVisitor
|
||||
: public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, EdgeIndexQuery, AuthQuery,
|
||||
DatabaseInfoQuery, SystemInfoQuery, ConstraintQuery, DumpQuery, ReplicationQuery,
|
||||
LockPathQuery, FreeMemoryQuery, TriggerQuery, IsolationLevelQuery, CreateSnapshotQuery,
|
||||
StreamQuery, SettingQuery, VersionQuery, ShowConfigQuery, TransactionQueueQuery,
|
||||
StorageModeQuery, AnalyzeGraphQuery, MultiDatabaseQuery, ShowDatabasesQuery,
|
||||
EdgeImportModeQuery, CoordinatorQuery> {};
|
||||
: public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, EdgeIndexQuery,
|
||||
TextIndexQuery, AuthQuery, DatabaseInfoQuery, SystemInfoQuery, ConstraintQuery, DumpQuery,
|
||||
ReplicationQuery, LockPathQuery, FreeMemoryQuery, TriggerQuery, IsolationLevelQuery,
|
||||
CreateSnapshotQuery, StreamQuery, SettingQuery, VersionQuery, ShowConfigQuery,
|
||||
TransactionQueueQuery, StorageModeQuery, AnalyzeGraphQuery, MultiDatabaseQuery,
|
||||
ShowDatabasesQuery, EdgeImportModeQuery, CoordinatorQuery> {};
|
||||
|
||||
} // namespace memgraph::query
|
||||
|
@ -243,6 +243,13 @@ antlrcpp::Any CypherMainVisitor::visitIndexQuery(MemgraphCypher::IndexQueryConte
|
||||
return index_query;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitTextIndexQuery(MemgraphCypher::TextIndexQueryContext *ctx) {
|
||||
MG_ASSERT(ctx->children.size() == 1, "TextIndexQuery should have exactly one child!");
|
||||
auto *text_index_query = std::any_cast<TextIndexQuery *>(ctx->children[0]->accept(this));
|
||||
query_ = text_index_query;
|
||||
return text_index_query;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitCreateIndex(MemgraphCypher::CreateIndexContext *ctx) {
|
||||
auto *index_query = storage_->Create<IndexQuery>();
|
||||
index_query->action_ = IndexQuery::Action::CREATE;
|
||||
@ -286,6 +293,21 @@ antlrcpp::Any CypherMainVisitor::visitDropEdgeIndex(MemgraphCypher::DropEdgeInde
|
||||
return index_query;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitCreateTextIndex(MemgraphCypher::CreateTextIndexContext *ctx) {
|
||||
auto *index_query = storage_->Create<TextIndexQuery>();
|
||||
index_query->index_name_ = std::any_cast<std::string>(ctx->indexName()->accept(this));
|
||||
index_query->action_ = TextIndexQuery::Action::CREATE;
|
||||
index_query->label_ = AddLabel(std::any_cast<std::string>(ctx->labelName()->accept(this)));
|
||||
return index_query;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitDropTextIndex(MemgraphCypher::DropTextIndexContext *ctx) {
|
||||
auto *index_query = storage_->Create<TextIndexQuery>();
|
||||
index_query->index_name_ = std::any_cast<std::string>(ctx->indexName()->accept(this));
|
||||
index_query->action_ = TextIndexQuery::Action::DROP;
|
||||
return index_query;
|
||||
}
|
||||
|
||||
antlrcpp::Any CypherMainVisitor::visitAuthQuery(MemgraphCypher::AuthQueryContext *ctx) {
|
||||
MG_ASSERT(ctx->children.size() == 1, "AuthQuery should have exactly one child!");
|
||||
auto *auth_query = std::any_cast<AuthQuery *>(ctx->children[0]->accept(this));
|
||||
|
@ -153,6 +153,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
|
||||
*/
|
||||
antlrcpp::Any visitEdgeIndexQuery(MemgraphCypher::EdgeIndexQueryContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return TextIndexQuery*
|
||||
*/
|
||||
antlrcpp::Any visitTextIndexQuery(MemgraphCypher::TextIndexQueryContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return ExplainQuery*
|
||||
*/
|
||||
@ -500,7 +505,7 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
|
||||
antlrcpp::Any visitCreateIndex(MemgraphCypher::CreateIndexContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return DropIndex*
|
||||
* @return IndexQuery*
|
||||
*/
|
||||
antlrcpp::Any visitDropIndex(MemgraphCypher::DropIndexContext *ctx) override;
|
||||
|
||||
@ -514,6 +519,16 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor {
|
||||
*/
|
||||
antlrcpp::Any visitDropEdgeIndex(MemgraphCypher::DropEdgeIndexContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return TextIndexQuery*
|
||||
*/
|
||||
antlrcpp::Any visitCreateTextIndex(MemgraphCypher::CreateTextIndexContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return TextIndexQuery*
|
||||
*/
|
||||
antlrcpp::Any visitDropTextIndex(MemgraphCypher::DropTextIndexContext *ctx) override;
|
||||
|
||||
/**
|
||||
* @return AuthQuery*
|
||||
*/
|
||||
|
@ -25,6 +25,7 @@ statement : query ;
|
||||
|
||||
query : cypherQuery
|
||||
| indexQuery
|
||||
| textIndexQuery
|
||||
| explainQuery
|
||||
| profileQuery
|
||||
| databaseInfoQuery
|
||||
@ -65,6 +66,8 @@ cypherQuery : singleQuery ( cypherUnion )* ( queryMemoryLimit )? ;
|
||||
|
||||
indexQuery : createIndex | dropIndex;
|
||||
|
||||
textIndexQuery : createTextIndex | dropTextIndex;
|
||||
|
||||
singleQuery : clause ( clause )* ;
|
||||
|
||||
cypherUnion : ( UNION ALL singleQuery )
|
||||
@ -342,6 +345,12 @@ createIndex : CREATE INDEX ON ':' labelName ( '(' propertyKeyName ')' )? ;
|
||||
|
||||
dropIndex : DROP INDEX ON ':' labelName ( '(' propertyKeyName ')' )? ;
|
||||
|
||||
indexName : symbolicName ;
|
||||
|
||||
createTextIndex : CREATE TEXT INDEX indexName ON ':' labelName ;
|
||||
|
||||
dropTextIndex : DROP TEXT INDEX indexName ;
|
||||
|
||||
doubleLiteral : FloatingLiteral ;
|
||||
|
||||
cypherKeyword : ALL
|
||||
|
@ -131,6 +131,7 @@ SHOW : S H O W ;
|
||||
SINGLE : S I N G L E ;
|
||||
STARTS : S T A R T S ;
|
||||
STORAGE : S T O R A G E ;
|
||||
TEXT : T E X T ;
|
||||
THEN : T H E N ;
|
||||
TRUE : T R U E ;
|
||||
UNION : U N I O N ;
|
||||
|
@ -134,6 +134,7 @@ symbolicName : UnescapedSymbolicName
|
||||
query : cypherQuery
|
||||
| indexQuery
|
||||
| edgeIndexQuery
|
||||
| textIndexQuery
|
||||
| explainQuery
|
||||
| profileQuery
|
||||
| databaseInfoQuery
|
||||
|
@ -29,6 +29,8 @@ class PrivilegeExtractor : public QueryVisitor<void>, public HierarchicalTreeVis
|
||||
|
||||
void Visit(EdgeIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
|
||||
|
||||
void Visit(TextIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
|
||||
|
||||
void Visit(AnalyzeGraphQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); }
|
||||
|
||||
void Visit(AuthQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::AUTH); }
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "dbms/dbms_handler.hpp"
|
||||
#include "dbms/global.hpp"
|
||||
#include "dbms/inmemory/storage_helper.hpp"
|
||||
#include "flags/experimental.hpp"
|
||||
#include "flags/replication.hpp"
|
||||
#include "flags/run_time_configurable.hpp"
|
||||
#include "glue/communication.hpp"
|
||||
@ -2709,6 +2710,75 @@ PreparedQuery PrepareEdgeIndexQuery(ParsedQuery parsed_query, bool in_explicit_t
|
||||
RWType::W};
|
||||
}
|
||||
|
||||
PreparedQuery PrepareTextIndexQuery(ParsedQuery parsed_query, bool in_explicit_transaction,
|
||||
std::vector<Notification> *notifications, CurrentDB ¤t_db) {
|
||||
if (in_explicit_transaction) {
|
||||
throw IndexInMulticommandTxException();
|
||||
}
|
||||
|
||||
auto *text_index_query = utils::Downcast<TextIndexQuery>(parsed_query.query);
|
||||
std::function<void(Notification &)> handler;
|
||||
|
||||
// TODO: we will need transaction for replication
|
||||
MG_ASSERT(current_db.db_acc_, "Text index query expects a current DB");
|
||||
auto &db_acc = *current_db.db_acc_;
|
||||
|
||||
MG_ASSERT(current_db.db_transactional_accessor_, "Text index query expects a current DB transaction");
|
||||
auto *dba = &*current_db.execution_db_accessor_;
|
||||
|
||||
// Creating an index influences computed plan costs.
|
||||
auto invalidate_plan_cache = [plan_cache = db_acc->plan_cache()] {
|
||||
plan_cache->WithLock([&](auto &cache) { cache.reset(); });
|
||||
};
|
||||
|
||||
auto *storage = db_acc->storage();
|
||||
auto label = storage->NameToLabel(text_index_query->label_.name);
|
||||
auto &index_name = text_index_query->index_name_;
|
||||
|
||||
Notification index_notification(SeverityLevel::INFO);
|
||||
switch (text_index_query->action_) {
|
||||
case TextIndexQuery::Action::CREATE: {
|
||||
index_notification.code = NotificationCode::CREATE_INDEX;
|
||||
index_notification.title = fmt::format("Created text index on label {}.", text_index_query->label_.name);
|
||||
// TODO: not just storage + invalidate_plan_cache. Need a DB transaction (for replication)
|
||||
handler = [dba, label, index_name,
|
||||
invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw TextSearchDisabledException();
|
||||
}
|
||||
dba->CreateTextIndex(index_name, label);
|
||||
utils::OnScopeExit invalidator(invalidate_plan_cache);
|
||||
};
|
||||
break;
|
||||
}
|
||||
case TextIndexQuery::Action::DROP: {
|
||||
index_notification.code = NotificationCode::DROP_INDEX;
|
||||
index_notification.title = fmt::format("Dropped text index on label {}.", text_index_query->label_.name);
|
||||
// TODO: not just storage + invalidate_plan_cache. Need a DB transaction (for replication)
|
||||
handler = [dba, index_name,
|
||||
invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw TextSearchDisabledException();
|
||||
}
|
||||
dba->DropTextIndex(index_name);
|
||||
utils::OnScopeExit invalidator(invalidate_plan_cache);
|
||||
};
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return PreparedQuery{
|
||||
{},
|
||||
std::move(parsed_query.required_privileges),
|
||||
[handler = std::move(handler), notifications, index_notification = std::move(index_notification)](
|
||||
AnyStream * /*stream*/, std::optional<int> /*unused*/) mutable {
|
||||
handler(index_notification);
|
||||
notifications->push_back(index_notification);
|
||||
return QueryHandlerResult::COMMIT; // TODO: Will need to become COMMIT when we fix replication
|
||||
},
|
||||
RWType::W};
|
||||
}
|
||||
|
||||
PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transaction,
|
||||
InterpreterContext *interpreter_context, Interpreter &interpreter) {
|
||||
if (in_explicit_transaction) {
|
||||
@ -3499,7 +3569,7 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
|
||||
}
|
||||
|
||||
MG_ASSERT(current_db.db_acc_, "Database info query expects a current DB");
|
||||
MG_ASSERT(current_db.db_transactional_accessor_, "Database ifo query expects a current DB transaction");
|
||||
MG_ASSERT(current_db.db_transactional_accessor_, "Database info query expects a current DB transaction");
|
||||
auto *dba = &*current_db.execution_db_accessor_;
|
||||
|
||||
auto *info_query = utils::Downcast<DatabaseInfoQuery>(parsed_query.query);
|
||||
@ -3514,10 +3584,11 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
|
||||
const std::string_view label_index_mark{"label"};
|
||||
const std::string_view label_property_index_mark{"label+property"};
|
||||
const std::string_view edge_type_index_mark{"edge-type"};
|
||||
const std::string_view text_index_mark{"text"};
|
||||
auto info = dba->ListAllIndices();
|
||||
auto storage_acc = database->Access();
|
||||
std::vector<std::vector<TypedValue>> results;
|
||||
results.reserve(info.label.size() + info.label_property.size());
|
||||
results.reserve(info.label.size() + info.label_property.size() + info.text_indices.size());
|
||||
for (const auto &item : info.label) {
|
||||
results.push_back({TypedValue(label_index_mark), TypedValue(storage->LabelToName(item)), TypedValue(),
|
||||
TypedValue(static_cast<int>(storage_acc->ApproximateVertexCount(item)))});
|
||||
@ -3532,6 +3603,10 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
|
||||
results.push_back({TypedValue(edge_type_index_mark), TypedValue(storage->EdgeTypeToName(item)), TypedValue(),
|
||||
TypedValue(static_cast<int>(storage_acc->ApproximateEdgeCount(item)))});
|
||||
}
|
||||
for (const auto &[index_name, label] : info.text_indices) {
|
||||
results.push_back({TypedValue(fmt::format("{} (name: {})", text_index_mark, index_name)),
|
||||
TypedValue(storage->LabelToName(label)), TypedValue(), TypedValue()});
|
||||
}
|
||||
std::sort(results.begin(), results.end(), [&label_index_mark](const auto &record_1, const auto &record_2) {
|
||||
const auto type_1 = record_1[0].ValueString();
|
||||
const auto type_2 = record_2[0].ValueString();
|
||||
@ -4293,13 +4368,15 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
|
||||
utils::Downcast<ProfileQuery>(parsed_query.query) || utils::Downcast<DumpQuery>(parsed_query.query) ||
|
||||
utils::Downcast<TriggerQuery>(parsed_query.query) || utils::Downcast<AnalyzeGraphQuery>(parsed_query.query) ||
|
||||
utils::Downcast<IndexQuery>(parsed_query.query) || utils::Downcast<EdgeIndexQuery>(parsed_query.query) ||
|
||||
utils::Downcast<DatabaseInfoQuery>(parsed_query.query) || utils::Downcast<ConstraintQuery>(parsed_query.query);
|
||||
utils::Downcast<TextIndexQuery>(parsed_query.query) || utils::Downcast<DatabaseInfoQuery>(parsed_query.query) ||
|
||||
utils::Downcast<ConstraintQuery>(parsed_query.query);
|
||||
|
||||
if (!in_explicit_transaction_ && requires_db_transaction) {
|
||||
// TODO: ATM only a single database, will change when we have multiple database transactions
|
||||
bool could_commit = utils::Downcast<CypherQuery>(parsed_query.query) != nullptr;
|
||||
bool unique = utils::Downcast<IndexQuery>(parsed_query.query) != nullptr ||
|
||||
utils::Downcast<EdgeIndexQuery>(parsed_query.query) != nullptr ||
|
||||
utils::Downcast<TextIndexQuery>(parsed_query.query) != nullptr ||
|
||||
utils::Downcast<ConstraintQuery>(parsed_query.query) != nullptr ||
|
||||
upper_case_query.find(kSchemaAssert) != std::string::npos;
|
||||
SetupDatabaseTransaction(could_commit, unique);
|
||||
@ -4337,6 +4414,9 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string,
|
||||
} else if (utils::Downcast<EdgeIndexQuery>(parsed_query.query)) {
|
||||
prepared_query = PrepareEdgeIndexQuery(std::move(parsed_query), in_explicit_transaction_,
|
||||
&query_execution->notifications, current_db_);
|
||||
} else if (utils::Downcast<TextIndexQuery>(parsed_query.query)) {
|
||||
prepared_query = PrepareTextIndexQuery(std::move(parsed_query), in_explicit_transaction_,
|
||||
&query_execution->notifications, current_db_);
|
||||
} else if (utils::Downcast<AnalyzeGraphQuery>(parsed_query.query)) {
|
||||
prepared_query = PrepareAnalyzeGraphQuery(std::move(parsed_query), in_explicit_transaction_, current_db_);
|
||||
} else if (utils::Downcast<AuthQuery>(parsed_query.query)) {
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "spdlog/spdlog.h"
|
||||
|
||||
#include "csv/parsing.hpp"
|
||||
#include "flags/experimental.hpp"
|
||||
#include "license/license.hpp"
|
||||
#include "query/auth_checker.hpp"
|
||||
#include "query/context.hpp"
|
||||
@ -266,6 +267,10 @@ VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *fram
|
||||
}
|
||||
MultiPropsInitChecked(&new_node, properties);
|
||||
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
context.db_accessor->TextIndexAddVertex(new_node);
|
||||
}
|
||||
|
||||
(*frame)[node_info.symbol] = new_node;
|
||||
return (*frame)[node_info.symbol].ValueVertex();
|
||||
}
|
||||
@ -2991,6 +2996,9 @@ bool SetProperty::SetPropertyCursor::Pull(Frame &frame, ExecutionContext &contex
|
||||
context.trigger_context_collector->RegisterSetObjectProperty(lhs.ValueVertex(), self_.property_,
|
||||
TypedValue{std::move(old_value)}, TypedValue{rhs});
|
||||
}
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TypedValue::Type::Edge: {
|
||||
@ -3147,6 +3155,9 @@ void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetPr
|
||||
case TypedValue::Type::Vertex: {
|
||||
PropertiesMap new_properties = get_props(rhs.ValueVertex());
|
||||
update_props(new_properties);
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
context->db_accessor->TextIndexUpdateVertex(rhs.ValueVertex());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TypedValue::Type::Map: {
|
||||
@ -3204,6 +3215,9 @@ bool SetProperties::SetPropertiesCursor::Pull(Frame &frame, ExecutionContext &co
|
||||
}
|
||||
#endif
|
||||
SetPropertiesOnRecord(&lhs.ValueVertex(), rhs, self_.op_, &context, cached_name_id_);
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex());
|
||||
}
|
||||
break;
|
||||
case TypedValue::Type::Edge:
|
||||
#ifdef MG_ENTERPRISE
|
||||
@ -3295,6 +3309,10 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) {
|
||||
}
|
||||
}
|
||||
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
context.db_accessor->TextIndexUpdateVertex(vertex);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3366,6 +3384,9 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext &
|
||||
}
|
||||
#endif
|
||||
remove_prop(&lhs.ValueVertex());
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
context.db_accessor->TextIndexUpdateVertex(lhs.ValueVertex());
|
||||
}
|
||||
break;
|
||||
case TypedValue::Type::Edge:
|
||||
#ifdef MG_ENTERPRISE
|
||||
@ -3458,6 +3479,10 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont
|
||||
}
|
||||
}
|
||||
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
context.db_accessor->TextIndexUpdateVertex(vertex, EvaluateLabels(self_.labels_, evaluator, context.db_accessor));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
|
@ -23,6 +23,8 @@
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "flags/experimental.hpp"
|
||||
#include "flags/run_time_configurable.hpp"
|
||||
#include "license/license.hpp"
|
||||
#include "mg_procedure.h"
|
||||
#include "module.hpp"
|
||||
@ -32,6 +34,7 @@
|
||||
#include "query/procedure/fmt.hpp"
|
||||
#include "query/procedure/mg_procedure_helpers.hpp"
|
||||
#include "query/stream/common.hpp"
|
||||
#include "storage/v2/indices/text_index.hpp"
|
||||
#include "storage/v2/property_value.hpp"
|
||||
#include "storage/v2/storage_mode.hpp"
|
||||
#include "storage/v2/view.hpp"
|
||||
@ -1843,6 +1846,11 @@ mgp_error mgp_vertex_set_property(struct mgp_vertex *v, const char *property_nam
|
||||
const auto result = std::visit(
|
||||
[prop_key, property_value](auto &impl) { return impl.SetProperty(prop_key, ToPropertyValue(*property_value)); },
|
||||
v->impl);
|
||||
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) {
|
||||
auto v_impl = v->getImpl();
|
||||
v->graph->getImpl()->TextIndexUpdateVertex(v_impl);
|
||||
}
|
||||
|
||||
if (result.HasError()) {
|
||||
switch (result.GetError()) {
|
||||
case memgraph::storage::Error::DELETED_OBJECT:
|
||||
@ -1899,6 +1907,11 @@ mgp_error mgp_vertex_set_properties(struct mgp_vertex *v, struct mgp_map *proper
|
||||
}
|
||||
|
||||
const auto result = v->getImpl().UpdateProperties(props);
|
||||
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) {
|
||||
auto v_impl = v->getImpl();
|
||||
v->graph->getImpl()->TextIndexUpdateVertex(v_impl);
|
||||
}
|
||||
|
||||
if (result.HasError()) {
|
||||
switch (result.GetError()) {
|
||||
case memgraph::storage::Error::DELETED_OBJECT:
|
||||
@ -1956,6 +1969,10 @@ mgp_error mgp_vertex_add_label(struct mgp_vertex *v, mgp_label label) {
|
||||
}
|
||||
|
||||
const auto result = std::visit([label_id](auto &impl) { return impl.AddLabel(label_id); }, v->impl);
|
||||
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) {
|
||||
auto v_impl = v->getImpl();
|
||||
v->graph->getImpl()->TextIndexUpdateVertex(v_impl);
|
||||
}
|
||||
|
||||
if (result.HasError()) {
|
||||
switch (result.GetError()) {
|
||||
@ -1998,6 +2015,10 @@ mgp_error mgp_vertex_remove_label(struct mgp_vertex *v, mgp_label label) {
|
||||
throw ImmutableObjectException{"Cannot remove a label from an immutable vertex!"};
|
||||
}
|
||||
const auto result = std::visit([label_id](auto &impl) { return impl.RemoveLabel(label_id); }, v->impl);
|
||||
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH) && !result.HasError()) {
|
||||
auto v_impl = v->getImpl();
|
||||
v->graph->getImpl()->TextIndexUpdateVertex(v_impl, {label_id});
|
||||
}
|
||||
|
||||
if (result.HasError()) {
|
||||
switch (result.GetError()) {
|
||||
@ -2590,7 +2611,7 @@ mgp_error mgp_edge_iter_properties(mgp_edge *e, mgp_memory *memory, mgp_properti
|
||||
mgp_error mgp_graph_get_vertex_by_id(mgp_graph *graph, mgp_vertex_id id, mgp_memory *memory, mgp_vertex **result) {
|
||||
return WrapExceptions(
|
||||
[graph, id, memory]() -> mgp_vertex * {
|
||||
std::optional<memgraph::query::VertexAccessor> maybe_vertex = std::visit(
|
||||
auto maybe_vertex = std::visit(
|
||||
[graph, id](auto *impl) {
|
||||
return impl->FindVertex(memgraph::storage::Gid::FromInt(id.as_int), graph->view);
|
||||
},
|
||||
@ -2967,6 +2988,10 @@ mgp_error mgp_graph_create_vertex(struct mgp_graph *graph, mgp_memory *memory, m
|
||||
}
|
||||
auto *vertex = std::visit(
|
||||
[=](auto *impl) { return NewRawMgpObject<mgp_vertex>(memory, impl->InsertVertex(), graph); }, graph->impl);
|
||||
if (memgraph::flags::AreExperimentsEnabled(memgraph::flags::Experiments::TEXT_SEARCH)) {
|
||||
auto v_impl = vertex->getImpl();
|
||||
vertex->graph->getImpl()->TextIndexAddVertex(v_impl);
|
||||
}
|
||||
|
||||
auto &ctx = graph->ctx;
|
||||
ctx->execution_stats[memgraph::query::ExecutionStats::Key::CREATED_NODES] += 1;
|
||||
@ -3324,6 +3349,140 @@ mgp_error mgp_graph_delete_edge(struct mgp_graph *graph, mgp_edge *edge) {
|
||||
});
|
||||
}
|
||||
|
||||
mgp_error mgp_graph_has_text_index(mgp_graph *graph, const char *index_name, int *result) {
|
||||
return WrapExceptions([graph, index_name, result]() {
|
||||
std::visit(memgraph::utils::Overloaded{
|
||||
[&](memgraph::query::DbAccessor *impl) { *result = impl->TextIndexExists(index_name); },
|
||||
[&](memgraph::query::SubgraphDbAccessor *impl) {
|
||||
*result = impl->GetAccessor()->TextIndexExists(index_name);
|
||||
}},
|
||||
graph->impl);
|
||||
});
|
||||
}
|
||||
|
||||
mgp_vertex *GetVertexByGid(mgp_graph *graph, memgraph::storage::Gid id, mgp_memory *memory) {
|
||||
auto get_vertex_by_gid = memgraph::utils::Overloaded{
|
||||
[graph, id, memory](memgraph::query::DbAccessor *impl) -> mgp_vertex * {
|
||||
auto maybe_vertex = impl->FindVertex(id, graph->view);
|
||||
if (!maybe_vertex) return nullptr;
|
||||
return NewRawMgpObject<mgp_vertex>(memory, *maybe_vertex, graph);
|
||||
},
|
||||
[graph, id, memory](memgraph::query::SubgraphDbAccessor *impl) -> mgp_vertex * {
|
||||
auto maybe_vertex = impl->FindVertex(id, graph->view);
|
||||
if (!maybe_vertex) return nullptr;
|
||||
return NewRawMgpObject<mgp_vertex>(
|
||||
memory, memgraph::query::SubgraphVertexAccessor(*maybe_vertex, impl->getGraph()), graph);
|
||||
}};
|
||||
return std::visit(get_vertex_by_gid, graph->impl);
|
||||
}
|
||||
|
||||
void WrapTextSearch(mgp_graph *graph, mgp_memory *memory, mgp_map **result,
|
||||
const std::vector<memgraph::storage::Gid> &vertex_ids = {},
|
||||
const std::optional<std::string> &error_msg = std::nullopt) {
|
||||
if (const auto err = mgp_map_make_empty(memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text search results failed during creation of a mgp_map");
|
||||
}
|
||||
|
||||
mgp_value *error_value;
|
||||
if (error_msg.has_value()) {
|
||||
if (const auto err = mgp_value_make_string(error_msg.value().data(), memory, &error_value);
|
||||
err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text search results failed during creation of a string mgp_value");
|
||||
}
|
||||
}
|
||||
|
||||
mgp_list *search_results{};
|
||||
if (const auto err = mgp_list_make_empty(vertex_ids.size(), memory, &search_results);
|
||||
err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text search results failed during creation of a mgp_list");
|
||||
}
|
||||
|
||||
for (const auto &vertex_id : vertex_ids) {
|
||||
mgp_value *vertex;
|
||||
if (const auto err = mgp_value_make_vertex(GetVertexByGid(graph, vertex_id, memory), &vertex);
|
||||
err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text search results failed during creation of a vertex mgp_value");
|
||||
}
|
||||
if (const auto err = mgp_list_append(search_results, vertex); err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error(
|
||||
"Retrieving text search results failed during insertion of the mgp_value into the result list");
|
||||
}
|
||||
}
|
||||
|
||||
mgp_value *search_results_value;
|
||||
if (const auto err = mgp_value_make_list(search_results, &search_results_value);
|
||||
err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text search results failed during creation of a list mgp_value");
|
||||
}
|
||||
|
||||
if (error_msg.has_value()) {
|
||||
if (const auto err = mgp_map_insert(*result, "error_msg", error_value); err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text index search error failed during insertion into mgp_map");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto err = mgp_map_insert(*result, "search_results", search_results_value);
|
||||
err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text index search results failed during insertion into mgp_map");
|
||||
}
|
||||
}
|
||||
|
||||
void WrapTextIndexAggregation(mgp_memory *memory, mgp_map **result, const std::string &aggregation_result,
|
||||
const std::optional<std::string> &error_msg = std::nullopt) {
|
||||
if (const auto err = mgp_map_make_empty(memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text search results failed during creation of a mgp_map");
|
||||
}
|
||||
|
||||
mgp_value *aggregation_result_or_error_value;
|
||||
if (const auto err = mgp_value_make_string(error_msg.value_or(aggregation_result).data(), memory,
|
||||
&aggregation_result_or_error_value);
|
||||
err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text search results failed during creation of a string mgp_value");
|
||||
}
|
||||
|
||||
if (error_msg.has_value()) {
|
||||
if (const auto err = mgp_map_insert(*result, "error_msg", aggregation_result_or_error_value);
|
||||
err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text index aggregation error failed during insertion into mgp_map");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto err = mgp_map_insert(*result, "aggregation_results", aggregation_result_or_error_value);
|
||||
err != mgp_error::MGP_ERROR_NO_ERROR) {
|
||||
throw std::logic_error("Retrieving text index aggregation results failed during insertion into mgp_map");
|
||||
}
|
||||
}
|
||||
|
||||
mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
|
||||
text_search_mode search_mode, mgp_memory *memory, mgp_map **result) {
|
||||
return WrapExceptions([graph, memory, index_name, search_query, search_mode, result]() {
|
||||
std::vector<memgraph::storage::Gid> found_vertices_ids;
|
||||
std::optional<std::string> error_msg = std::nullopt;
|
||||
try {
|
||||
found_vertices_ids = graph->getImpl()->TextIndexSearch(index_name, search_query, search_mode);
|
||||
} catch (memgraph::query::QueryException &e) {
|
||||
error_msg = e.what();
|
||||
}
|
||||
WrapTextSearch(graph, memory, result, found_vertices_ids, error_msg);
|
||||
});
|
||||
}
|
||||
|
||||
mgp_error mgp_graph_aggregate_over_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
|
||||
const char *aggregation_query, mgp_memory *memory, mgp_map **result) {
|
||||
return WrapExceptions([graph, memory, index_name, search_query, aggregation_query, result]() {
|
||||
std::string search_results;
|
||||
std::optional<std::string> error_msg = std::nullopt;
|
||||
try {
|
||||
search_results = graph->getImpl()->TextIndexAggregate(index_name, search_query, aggregation_query);
|
||||
} catch (memgraph::query::QueryException &e) {
|
||||
error_msg = e.what();
|
||||
}
|
||||
WrapTextIndexAggregation(memory, result, search_results, error_msg);
|
||||
});
|
||||
}
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
namespace {
|
||||
void NextPermitted(mgp_vertices_iterator &it) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -562,6 +562,13 @@ struct mgp_graph {
|
||||
memgraph::query::ExecutionContext *ctx;
|
||||
memgraph::storage::StorageMode storage_mode;
|
||||
|
||||
memgraph::query::DbAccessor *getImpl() const {
|
||||
return std::visit(
|
||||
memgraph::utils::Overloaded{[](memgraph::query::DbAccessor *impl) { return impl; },
|
||||
[](memgraph::query::SubgraphDbAccessor *impl) { return impl->GetAccessor(); }},
|
||||
this->impl);
|
||||
}
|
||||
|
||||
static mgp_graph WritableGraph(memgraph::query::DbAccessor &acc, memgraph::storage::View view,
|
||||
memgraph::query::ExecutionContext &ctx) {
|
||||
return mgp_graph{&acc, view, &ctx, acc.GetStorageMode()};
|
||||
|
@ -20,6 +20,7 @@ add_library(mg-storage-v2 STATIC
|
||||
vertex_info_cache.cpp
|
||||
storage.cpp
|
||||
indices/indices.cpp
|
||||
indices/text_index.cpp
|
||||
all_vertices_iterable.cpp
|
||||
edges_iterable.cpp
|
||||
vertices_iterable.cpp
|
||||
@ -45,4 +46,5 @@ add_library(mg-storage-v2 STATIC
|
||||
inmemory/replication/recovery.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory)
|
||||
target_include_directories(mg-storage-v2 PUBLIC ${CMAKE_SOURCE_DIR}/include)
|
||||
target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils mg-flags gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory mgcxx_text_search tantivy_text_search)
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -26,6 +26,7 @@ constexpr const char *kVertexCountDescr = "vertex_count";
|
||||
constexpr const char *kEdgeDountDescr = "edge_count";
|
||||
constexpr const char *kLabelIndexStr = "label_index";
|
||||
constexpr const char *kLabelPropertyIndexStr = "label_property_index";
|
||||
constexpr const char *kTextIndexStr = "text_index";
|
||||
constexpr const char *kExistenceConstraintsStr = "existence_constraints";
|
||||
constexpr const char *kUniqueConstraintsStr = "unique_constraints";
|
||||
} // namespace
|
||||
@ -144,6 +145,31 @@ bool DurableMetadata::PersistLabelPropertyIndexAndExistenceConstraintDeletion(La
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DurableMetadata::PersistTextIndexCreation(const std::string &index_name, LabelId label) {
|
||||
const std::string index_name_label_pair = index_name + "," + label.ToString();
|
||||
if (auto text_index_store = durability_kvstore_.Get(kTextIndexStr); text_index_store.has_value()) {
|
||||
std::string &value = text_index_store.value();
|
||||
value += "|";
|
||||
value += index_name_label_pair;
|
||||
return durability_kvstore_.Put(kTextIndexStr, value);
|
||||
}
|
||||
return durability_kvstore_.Put(kTextIndexStr, index_name_label_pair);
|
||||
}
|
||||
|
||||
bool DurableMetadata::PersistTextIndexDeletion(const std::string &index_name, LabelId label) {
|
||||
const std::string index_name_label_pair = index_name + "," + label.ToString();
|
||||
if (auto text_index_store = durability_kvstore_.Get(kTextIndexStr); text_index_store.has_value()) {
|
||||
const std::string &value = text_index_store.value();
|
||||
std::vector<std::string> text_indices = utils::Split(value, "|");
|
||||
std::erase(text_indices, index_name_label_pair);
|
||||
if (text_indices.empty()) {
|
||||
return durability_kvstore_.Delete(kTextIndexStr);
|
||||
}
|
||||
return durability_kvstore_.Put(kTextIndexStr, utils::Join(text_indices, "|"));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DurableMetadata::PersistUniqueConstraintCreation(LabelId label, const std::set<PropertyId> &properties) {
|
||||
const std::string entry = utils::GetKeyForUniqueConstraintsDurability(label, properties);
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -53,6 +53,10 @@ class DurableMetadata {
|
||||
bool PersistLabelPropertyIndexAndExistenceConstraintDeletion(LabelId label, PropertyId property,
|
||||
const std::string &key);
|
||||
|
||||
bool PersistTextIndexCreation(const std::string &index_name, LabelId label);
|
||||
|
||||
bool PersistTextIndexDeletion(const std::string &index_name, LabelId label);
|
||||
|
||||
bool PersistUniqueConstraintCreation(LabelId label, const std::set<PropertyId> &properties);
|
||||
|
||||
bool PersistUniqueConstraintDeletion(LabelId label, const std::set<PropertyId> &properties);
|
||||
|
@ -29,6 +29,8 @@
|
||||
#include <rocksdb/utilities/transaction.h>
|
||||
#include <rocksdb/utilities/transaction_db.h>
|
||||
|
||||
#include "flags/experimental.hpp"
|
||||
#include "flags/run_time_configurable.hpp"
|
||||
#include "kvstore/kvstore.hpp"
|
||||
#include "spdlog/spdlog.h"
|
||||
#include "storage/v2/constraints/unique_constraints.hpp"
|
||||
@ -856,6 +858,7 @@ StorageInfo DiskStorage::GetInfo(memgraph::replication_coordination_glue::Replic
|
||||
const auto &lbl = access->ListAllIndices();
|
||||
info.label_indices = lbl.label.size();
|
||||
info.label_property_indices = lbl.label_property.size();
|
||||
info.text_indices = lbl.text_indices.size();
|
||||
const auto &con = access->ListAllConstraints();
|
||||
info.existence_constraints = con.existence.size();
|
||||
info.unique_constraints = con.unique.size();
|
||||
@ -1670,6 +1673,18 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co
|
||||
case MetadataDelta::Action::LABEL_PROPERTY_INDEX_STATS_CLEAR: {
|
||||
throw utils::NotYetImplemented("ClearIndexStats(stats) is not implemented for DiskStorage.");
|
||||
} break;
|
||||
case MetadataDelta::Action::TEXT_INDEX_CREATE: {
|
||||
const auto &info = md_delta.text_index;
|
||||
if (!disk_storage->durable_metadata_.PersistTextIndexCreation(info.index_name, info.label)) {
|
||||
return StorageManipulationError{PersistenceError{}};
|
||||
}
|
||||
} break;
|
||||
case MetadataDelta::Action::TEXT_INDEX_DROP: {
|
||||
const auto &info = md_delta.text_index;
|
||||
if (!disk_storage->durable_metadata_.PersistTextIndexDeletion(info.index_name, info.label)) {
|
||||
return StorageManipulationError{PersistenceError{}};
|
||||
}
|
||||
} break;
|
||||
case MetadataDelta::Action::EXISTENCE_CONSTRAINT_CREATE: {
|
||||
const auto &info = md_delta.label_property;
|
||||
if (!disk_storage->durable_metadata_.PersistLabelPropertyIndexAndExistenceConstraintCreation(
|
||||
@ -1768,6 +1783,9 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co
|
||||
return StorageManipulationError{SerializationError{}};
|
||||
}
|
||||
spdlog::trace("rocksdb: Commit successful");
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
disk_storage->indices_.text_index_.Commit();
|
||||
}
|
||||
|
||||
is_transaction_active_ = false;
|
||||
|
||||
@ -1886,6 +1904,9 @@ void DiskStorage::DiskAccessor::Abort() {
|
||||
// query_plan_accumulate_aggregate.cpp
|
||||
transaction_.disk_transaction_->Rollback();
|
||||
transaction_.disk_transaction_->ClearSnapshot();
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
storage_->indices_.text_index_.Rollback();
|
||||
}
|
||||
delete transaction_.disk_transaction_;
|
||||
transaction_.disk_transaction_ = nullptr;
|
||||
is_transaction_active_ = false;
|
||||
@ -2092,7 +2113,11 @@ IndicesInfo DiskStorage::DiskAccessor::ListAllIndices() const {
|
||||
auto *disk_label_index = static_cast<DiskLabelIndex *>(on_disk->indices_.label_index_.get());
|
||||
auto *disk_label_property_index =
|
||||
static_cast<DiskLabelPropertyIndex *>(on_disk->indices_.label_property_index_.get());
|
||||
return {disk_label_index->ListIndices(), disk_label_property_index->ListIndices()};
|
||||
auto &text_index = storage_->indices_.text_index_;
|
||||
return {disk_label_index->ListIndices(),
|
||||
disk_label_property_index->ListIndices(),
|
||||
{/* edge type indices */},
|
||||
text_index.ListIndices()};
|
||||
}
|
||||
ConstraintsInfo DiskStorage::DiskAccessor::ListAllConstraints() const {
|
||||
auto *disk_storage = static_cast<DiskStorage *>(storage_);
|
||||
|
@ -151,7 +151,8 @@ void RecoverConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadat
|
||||
|
||||
void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices,
|
||||
utils::SkipList<Vertex> *vertices, NameIdMapper *name_id_mapper,
|
||||
const std::optional<ParallelizedSchemaCreationInfo> ¶llel_exec_info) {
|
||||
const std::optional<ParallelizedSchemaCreationInfo> ¶llel_exec_info,
|
||||
const std::optional<std::filesystem::path> &storage_dir) {
|
||||
spdlog::info("Recreating indices from metadata.");
|
||||
|
||||
// Recover label indices.
|
||||
@ -211,6 +212,26 @@ void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadat
|
||||
}
|
||||
spdlog::info("Edge-type indices are recreated.");
|
||||
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
// Recover text indices.
|
||||
spdlog::info("Recreating {} text indices from metadata.", indices_metadata.text_indices.size());
|
||||
auto &mem_text_index = indices->text_index_;
|
||||
for (const auto &[index_name, label] : indices_metadata.text_indices) {
|
||||
try {
|
||||
if (!storage_dir.has_value()) {
|
||||
throw RecoveryFailure("There must exist a storage directory in order to recover text indices!");
|
||||
}
|
||||
|
||||
mem_text_index.RecoverIndex(storage_dir.value(), index_name, label, vertices->access(), name_id_mapper);
|
||||
} catch (...) {
|
||||
throw RecoveryFailure("The text index must be created here!");
|
||||
}
|
||||
spdlog::info("Text index {} on :{} is recreated from metadata", index_name,
|
||||
name_id_mapper->IdToName(label.AsUint()));
|
||||
}
|
||||
spdlog::info("Text indices are recreated.");
|
||||
}
|
||||
|
||||
spdlog::info("Indices are recreated.");
|
||||
}
|
||||
|
||||
@ -331,8 +352,13 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication
|
||||
repl_storage_state.epoch_.SetEpoch(std::move(recovered_snapshot->snapshot_info.epoch_id));
|
||||
|
||||
if (!utils::DirExists(wal_directory_)) {
|
||||
std::optional<std::filesystem::path> storage_dir = std::nullopt;
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
storage_dir = config.durability.storage_directory;
|
||||
}
|
||||
|
||||
RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper,
|
||||
GetParallelExecInfoIndices(recovery_info, config));
|
||||
GetParallelExecInfoIndices(recovery_info, config), storage_dir);
|
||||
RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper,
|
||||
GetParallelExecInfo(recovery_info, config));
|
||||
return recovered_snapshot->recovery_info;
|
||||
@ -467,8 +493,13 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication
|
||||
spdlog::info("All necessary WAL files are loaded successfully.");
|
||||
}
|
||||
|
||||
std::optional<std::filesystem::path> storage_dir = std::nullopt;
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
storage_dir = config.durability.storage_directory;
|
||||
}
|
||||
|
||||
RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper,
|
||||
GetParallelExecInfoIndices(recovery_info, config));
|
||||
GetParallelExecInfoIndices(recovery_info, config), storage_dir);
|
||||
RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper,
|
||||
GetParallelExecInfo(recovery_info, config));
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -102,7 +102,8 @@ std::optional<std::vector<WalDurabilityInfo>> GetWalFiles(const std::filesystem:
|
||||
/// @throw RecoveryFailure
|
||||
void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices,
|
||||
utils::SkipList<Vertex> *vertices, NameIdMapper *name_id_mapper,
|
||||
const std::optional<ParallelizedSchemaCreationInfo> ¶llel_exec_info = std::nullopt);
|
||||
const std::optional<ParallelizedSchemaCreationInfo> ¶llel_exec_info = std::nullopt,
|
||||
const std::optional<std::filesystem::path> &storage_dir = std::nullopt);
|
||||
|
||||
// Helper function used to recover all discovered constraints. The
|
||||
// constraints must be recovered after the data recovery is done
|
||||
|
@ -64,6 +64,8 @@ enum class Marker : uint8_t {
|
||||
DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR = 0x64,
|
||||
DELTA_EDGE_TYPE_INDEX_CREATE = 0x65,
|
||||
DELTA_EDGE_TYPE_INDEX_DROP = 0x66,
|
||||
DELTA_TEXT_INDEX_CREATE = 0x67,
|
||||
DELTA_TEXT_INDEX_DROP = 0x68,
|
||||
|
||||
VALUE_FALSE = 0x00,
|
||||
VALUE_TRUE = 0xff,
|
||||
@ -110,6 +112,8 @@ static const Marker kMarkersAll[] = {
|
||||
Marker::DELTA_LABEL_PROPERTY_INDEX_DROP,
|
||||
Marker::DELTA_EDGE_TYPE_INDEX_CREATE,
|
||||
Marker::DELTA_EDGE_TYPE_INDEX_DROP,
|
||||
Marker::DELTA_TEXT_INDEX_CREATE,
|
||||
Marker::DELTA_TEXT_INDEX_DROP,
|
||||
Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE,
|
||||
Marker::DELTA_EXISTENCE_CONSTRAINT_DROP,
|
||||
Marker::DELTA_UNIQUE_CONSTRAINT_CREATE,
|
||||
|
@ -44,6 +44,7 @@ struct RecoveredIndicesAndConstraints {
|
||||
std::vector<std::pair<LabelId, LabelIndexStats>> label_stats;
|
||||
std::vector<std::pair<LabelId, std::pair<PropertyId, LabelPropertyIndexStats>>> label_property_stats;
|
||||
std::vector<EdgeTypeId> edge;
|
||||
std::vector<std::pair<std::string, LabelId>> text_indices;
|
||||
} indices;
|
||||
|
||||
struct ConstraintsMetadata {
|
||||
|
@ -353,6 +353,8 @@ std::optional<PropertyValue> Decoder::ReadPropertyValue() {
|
||||
case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP:
|
||||
case Marker::DELTA_EDGE_TYPE_INDEX_CREATE:
|
||||
case Marker::DELTA_EDGE_TYPE_INDEX_DROP:
|
||||
case Marker::DELTA_TEXT_INDEX_CREATE:
|
||||
case Marker::DELTA_TEXT_INDEX_DROP:
|
||||
case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE:
|
||||
case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP:
|
||||
case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE:
|
||||
@ -459,6 +461,8 @@ bool Decoder::SkipPropertyValue() {
|
||||
case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP:
|
||||
case Marker::DELTA_EDGE_TYPE_INDEX_CREATE:
|
||||
case Marker::DELTA_EDGE_TYPE_INDEX_DROP:
|
||||
case Marker::DELTA_TEXT_INDEX_CREATE:
|
||||
case Marker::DELTA_TEXT_INDEX_DROP:
|
||||
case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE:
|
||||
case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP:
|
||||
case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE:
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
#include <thread>
|
||||
|
||||
#include "flags/experimental.hpp"
|
||||
#include "flags/run_time_configurable.hpp"
|
||||
#include "spdlog/spdlog.h"
|
||||
#include "storage/v2/durability/exceptions.hpp"
|
||||
#include "storage/v2/durability/paths.hpp"
|
||||
@ -2004,6 +2006,24 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis
|
||||
spdlog::info("Metadata of edge-type indices are recovered.");
|
||||
}
|
||||
|
||||
// Recover text indices.
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
auto size = snapshot.ReadUint();
|
||||
if (!size) throw RecoveryFailure("Couldn't recover the number of text indices!");
|
||||
spdlog::info("Recovering metadata of {} text indices.", *size);
|
||||
for (uint64_t i = 0; i < *size; ++i) {
|
||||
auto index_name = snapshot.ReadString();
|
||||
if (!index_name.has_value()) throw RecoveryFailure("Couldn't read text index name!");
|
||||
auto label = snapshot.ReadUint();
|
||||
if (!label) throw RecoveryFailure("Couldn't read text index label!");
|
||||
AddRecoveredIndexConstraint(&indices_constraints.indices.text_indices,
|
||||
{index_name.value(), get_label_from_id(*label)}, "The text index already exists!");
|
||||
SPDLOG_TRACE("Recovered metadata of text index {} for :{}", index_name.value(),
|
||||
name_id_mapper->IdToName(snapshot_id_map.at(*label)));
|
||||
}
|
||||
spdlog::info("Metadata of text indices are recovered.");
|
||||
}
|
||||
|
||||
spdlog::info("Metadata of indices are recovered.");
|
||||
}
|
||||
|
||||
@ -2493,6 +2513,16 @@ void CreateSnapshot(Storage *storage, Transaction *transaction, const std::files
|
||||
write_mapping(item);
|
||||
}
|
||||
}
|
||||
|
||||
// Write text indices.
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
auto text_indices = storage->indices_.text_index_.ListIndices();
|
||||
snapshot.WriteUint(text_indices.size());
|
||||
for (const auto &[index_name, label] : text_indices) {
|
||||
snapshot.WriteString(index_name);
|
||||
write_mapping(label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write constraints.
|
||||
|
@ -25,6 +25,8 @@ enum class StorageMetadataOperation {
|
||||
LABEL_PROPERTY_INDEX_STATS_CLEAR,
|
||||
EDGE_TYPE_INDEX_CREATE,
|
||||
EDGE_TYPE_INDEX_DROP,
|
||||
TEXT_INDEX_CREATE,
|
||||
TEXT_INDEX_DROP,
|
||||
EXISTENCE_CONSTRAINT_CREATE,
|
||||
EXISTENCE_CONSTRAINT_DROP,
|
||||
UNIQUE_CONSTRAINT_CREATE,
|
||||
|
@ -99,6 +99,10 @@ Marker OperationToMarker(StorageMetadataOperation operation) {
|
||||
return Marker::DELTA_EDGE_TYPE_INDEX_CREATE;
|
||||
case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP:
|
||||
return Marker::DELTA_EDGE_TYPE_INDEX_DROP;
|
||||
case StorageMetadataOperation::TEXT_INDEX_CREATE:
|
||||
return Marker::DELTA_TEXT_INDEX_CREATE;
|
||||
case StorageMetadataOperation::TEXT_INDEX_DROP:
|
||||
return Marker::DELTA_TEXT_INDEX_DROP;
|
||||
case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
|
||||
return Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE;
|
||||
case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:
|
||||
@ -172,6 +176,10 @@ WalDeltaData::Type MarkerToWalDeltaDataType(Marker marker) {
|
||||
return WalDeltaData::Type::LABEL_PROPERTY_INDEX_CREATE;
|
||||
case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP:
|
||||
return WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP;
|
||||
case Marker::DELTA_TEXT_INDEX_CREATE:
|
||||
return WalDeltaData::Type::TEXT_INDEX_CREATE;
|
||||
case Marker::DELTA_TEXT_INDEX_DROP:
|
||||
return WalDeltaData::Type::TEXT_INDEX_DROP;
|
||||
case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_SET:
|
||||
return WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET;
|
||||
case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR:
|
||||
@ -382,6 +390,21 @@ WalDeltaData ReadSkipWalDeltaData(BaseDecoder *decoder) {
|
||||
if (!decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case WalDeltaData::Type::TEXT_INDEX_CREATE:
|
||||
case WalDeltaData::Type::TEXT_INDEX_DROP: {
|
||||
if constexpr (read_data) {
|
||||
auto index_name = decoder->ReadString();
|
||||
if (!index_name) throw RecoveryFailure("Invalid WAL data!");
|
||||
delta.operation_text.index_name = std::move(*index_name);
|
||||
auto label = decoder->ReadString();
|
||||
if (!label) throw RecoveryFailure("Invalid WAL data!");
|
||||
delta.operation_text.label = std::move(*label);
|
||||
} else {
|
||||
if (!decoder->SkipString() || !decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -529,6 +552,12 @@ bool operator==(const WalDeltaData &a, const WalDeltaData &b) {
|
||||
|
||||
case WalDeltaData::Type::LABEL_PROPERTY_INDEX_CREATE:
|
||||
case WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP:
|
||||
case WalDeltaData::Type::TEXT_INDEX_CREATE:
|
||||
return a.operation_text.index_name == b.operation_text.index_name &&
|
||||
a.operation_text.label == b.operation_text.label;
|
||||
case WalDeltaData::Type::TEXT_INDEX_DROP:
|
||||
return a.operation_text.index_name == b.operation_text.index_name &&
|
||||
a.operation_text.label == b.operation_text.label;
|
||||
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE:
|
||||
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP:
|
||||
return a.operation_label_property.label == b.operation_label_property.label &&
|
||||
@ -675,7 +704,8 @@ void EncodeTransactionEnd(BaseEncoder *encoder, uint64_t timestamp) {
|
||||
}
|
||||
|
||||
void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation,
|
||||
LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats,
|
||||
const std::optional<std::string> text_index_name, LabelId label,
|
||||
const std::set<PropertyId> &properties, const LabelIndexStats &stats,
|
||||
const LabelPropertyIndexStats &property_stats, uint64_t timestamp) {
|
||||
encoder->WriteMarker(Marker::SECTION_DELTA);
|
||||
encoder->WriteUint(timestamp);
|
||||
@ -731,6 +761,14 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage
|
||||
case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: {
|
||||
MG_ASSERT(false, "Invalid function call!");
|
||||
}
|
||||
case StorageMetadataOperation::TEXT_INDEX_CREATE:
|
||||
case StorageMetadataOperation::TEXT_INDEX_DROP: {
|
||||
MG_ASSERT(text_index_name.has_value(), "Text indices must be named!");
|
||||
encoder->WriteMarker(OperationToMarker(operation));
|
||||
encoder->WriteString(text_index_name.value());
|
||||
encoder->WriteString(name_id_mapper->IdToName(label.AsUint()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -752,6 +790,8 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage
|
||||
case StorageMetadataOperation::LABEL_INDEX_STATS_SET:
|
||||
case StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE:
|
||||
case StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP:
|
||||
case StorageMetadataOperation::TEXT_INDEX_CREATE:
|
||||
case StorageMetadataOperation::TEXT_INDEX_DROP:
|
||||
case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
|
||||
case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:
|
||||
case StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET:
|
||||
@ -1000,6 +1040,20 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst
|
||||
"The label index stats doesn't exist!");
|
||||
break;
|
||||
}
|
||||
case WalDeltaData::Type::TEXT_INDEX_CREATE: {
|
||||
auto index_name = delta.operation_text.index_name;
|
||||
auto label = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_text.label));
|
||||
AddRecoveredIndexConstraint(&indices_constraints->indices.text_indices, {index_name, label},
|
||||
"The text index already exists!");
|
||||
break;
|
||||
}
|
||||
case WalDeltaData::Type::TEXT_INDEX_DROP: {
|
||||
auto index_name = delta.operation_text.index_name;
|
||||
auto label = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_text.label));
|
||||
RemoveRecoveredIndexConstraint(&indices_constraints->indices.text_indices, {index_name, label},
|
||||
"The text index doesn't exist!");
|
||||
break;
|
||||
}
|
||||
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: {
|
||||
auto label_id = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_label_property.label));
|
||||
auto property_id = PropertyId::FromUint(name_id_mapper->NameToId(delta.operation_label_property.property));
|
||||
@ -1148,10 +1202,11 @@ void WalFile::AppendTransactionEnd(uint64_t timestamp) {
|
||||
UpdateStats(timestamp);
|
||||
}
|
||||
|
||||
void WalFile::AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties,
|
||||
const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats,
|
||||
uint64_t timestamp) {
|
||||
EncodeOperation(&wal_, name_id_mapper_, operation, label, properties, stats, property_stats, timestamp);
|
||||
void WalFile::AppendOperation(StorageMetadataOperation operation, const std::optional<std::string> text_index_name,
|
||||
LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats,
|
||||
const LabelPropertyIndexStats &property_stats, uint64_t timestamp) {
|
||||
EncodeOperation(&wal_, name_id_mapper_, operation, text_index_name, label, properties, stats, property_stats,
|
||||
timestamp);
|
||||
UpdateStats(timestamp);
|
||||
}
|
||||
|
||||
|
@ -69,6 +69,8 @@ struct WalDeltaData {
|
||||
LABEL_PROPERTY_INDEX_STATS_CLEAR,
|
||||
EDGE_INDEX_CREATE,
|
||||
EDGE_INDEX_DROP,
|
||||
TEXT_INDEX_CREATE,
|
||||
TEXT_INDEX_DROP,
|
||||
EXISTENCE_CONSTRAINT_CREATE,
|
||||
EXISTENCE_CONSTRAINT_DROP,
|
||||
UNIQUE_CONSTRAINT_CREATE,
|
||||
@ -127,6 +129,11 @@ struct WalDeltaData {
|
||||
std::string property;
|
||||
std::string stats;
|
||||
} operation_label_property_stats;
|
||||
|
||||
struct {
|
||||
std::string index_name;
|
||||
std::string label;
|
||||
} operation_text;
|
||||
};
|
||||
|
||||
bool operator==(const WalDeltaData &a, const WalDeltaData &b);
|
||||
@ -163,6 +170,8 @@ constexpr bool IsWalDeltaDataTypeTransactionEndVersion15(const WalDeltaData::Typ
|
||||
case WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR:
|
||||
case WalDeltaData::Type::EDGE_INDEX_CREATE:
|
||||
case WalDeltaData::Type::EDGE_INDEX_DROP:
|
||||
case WalDeltaData::Type::TEXT_INDEX_CREATE:
|
||||
case WalDeltaData::Type::TEXT_INDEX_DROP:
|
||||
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE:
|
||||
case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP:
|
||||
case WalDeltaData::Type::UNIQUE_CONSTRAINT_CREATE:
|
||||
@ -213,7 +222,8 @@ void EncodeTransactionEnd(BaseEncoder *encoder, uint64_t timestamp);
|
||||
|
||||
/// Function used to encode non-transactional operation.
|
||||
void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation,
|
||||
LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats,
|
||||
const std::optional<std::string> text_index_name, LabelId label,
|
||||
const std::set<PropertyId> &properties, const LabelIndexStats &stats,
|
||||
const LabelPropertyIndexStats &property_stats, uint64_t timestamp);
|
||||
|
||||
void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation,
|
||||
@ -248,8 +258,9 @@ class WalFile {
|
||||
|
||||
void AppendTransactionEnd(uint64_t timestamp);
|
||||
|
||||
void AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties,
|
||||
const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp);
|
||||
void AppendOperation(StorageMetadataOperation operation, const std::optional<std::string> text_index_name,
|
||||
LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats,
|
||||
const LabelPropertyIndexStats &property_stats, uint64_t timestamp);
|
||||
|
||||
void AppendOperation(StorageMetadataOperation operation, EdgeTypeId edge_type, uint64_t timestamp);
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "storage/v2/inmemory/edge_type_index.hpp"
|
||||
#include "storage/v2/inmemory/label_index.hpp"
|
||||
#include "storage/v2/inmemory/label_property_index.hpp"
|
||||
#include "storage/v2/storage.hpp"
|
||||
|
||||
namespace memgraph::storage {
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "storage/v2/indices/edge_type_index.hpp"
|
||||
#include "storage/v2/indices/label_index.hpp"
|
||||
#include "storage/v2/indices/label_property_index.hpp"
|
||||
#include "storage/v2/indices/text_index.hpp"
|
||||
#include "storage/v2/storage_mode.hpp"
|
||||
|
||||
namespace memgraph::storage {
|
||||
@ -31,12 +32,12 @@ struct Indices {
|
||||
Indices &operator=(Indices &&) = delete;
|
||||
~Indices() = default;
|
||||
|
||||
/// This function should be called from garbage collection to clean-up the
|
||||
/// This function should be called from garbage collection to clean up the
|
||||
/// index.
|
||||
/// TODO: unused in disk indices
|
||||
void RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp, std::stop_token token) const;
|
||||
|
||||
/// Surgical removal of entries that was inserted this transaction
|
||||
/// Surgical removal of entries that were inserted in this transaction
|
||||
/// TODO: unused in disk indices
|
||||
void AbortEntries(LabelId labelId, std::span<Vertex *const> vertices, uint64_t exact_start_timestamp) const;
|
||||
void AbortEntries(PropertyId property, std::span<std::pair<PropertyValue, Vertex *> const> vertices,
|
||||
@ -71,6 +72,7 @@ struct Indices {
|
||||
std::unique_ptr<LabelIndex> label_index_;
|
||||
std::unique_ptr<LabelPropertyIndex> label_property_index_;
|
||||
std::unique_ptr<EdgeTypeIndex> edge_type_index_;
|
||||
mutable TextIndex text_index_;
|
||||
};
|
||||
|
||||
} // namespace memgraph::storage
|
||||
|
430
src/storage/v2/indices/text_index.cpp
Normal file
430
src/storage/v2/indices/text_index.cpp
Normal file
@ -0,0 +1,430 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#include "storage/v2/indices/text_index.hpp"
|
||||
#include "flags/experimental.hpp"
|
||||
#include "flags/run_time_configurable.hpp"
|
||||
#include "query/db_accessor.hpp"
|
||||
#include "storage/v2/view.hpp"
|
||||
#include "text_search.hpp"
|
||||
|
||||
namespace memgraph::storage {
|
||||
|
||||
std::string GetPropertyName(PropertyId prop_id, memgraph::query::DbAccessor *db) { return db->PropertyToName(prop_id); }
|
||||
|
||||
std::string GetPropertyName(PropertyId prop_id, NameIdMapper *name_id_mapper) {
|
||||
return name_id_mapper->IdToName(prop_id.AsUint());
|
||||
}
|
||||
|
||||
inline std::string TextIndex::MakeIndexPath(const std::filesystem::path &storage_dir, const std::string &index_name) {
|
||||
return (storage_dir / kTextIndicesDirectory / index_name).string();
|
||||
}
|
||||
|
||||
void TextIndex::CreateEmptyIndex(const std::filesystem::path &storage_dir, const std::string &index_name,
|
||||
LabelId label) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
if (index_.contains(index_name)) {
|
||||
throw query::TextSearchException("Text index \"{}\" already exists.", index_name);
|
||||
}
|
||||
|
||||
try {
|
||||
nlohmann::json mappings = {};
|
||||
mappings["properties"] = {};
|
||||
mappings["properties"]["metadata"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
|
||||
mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
|
||||
mappings["properties"]["all"] = {{"type", "text"}, {"fast", true}, {"stored", true}, {"text", true}};
|
||||
|
||||
index_.emplace(index_name, TextIndexData{.context_ = mgcxx::text_search::create_index(
|
||||
MakeIndexPath(storage_dir, index_name),
|
||||
mgcxx::text_search::IndexConfig{.mappings = mappings.dump()}),
|
||||
.scope_ = label});
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
label_to_index_.emplace(label, index_name);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
nlohmann::json TextIndex::SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver) {
|
||||
nlohmann::json serialized_properties = nlohmann::json::value_t::object;
|
||||
for (const auto &[prop_id, prop_value] : properties) {
|
||||
switch (prop_value.type()) {
|
||||
case PropertyValue::Type::Bool:
|
||||
serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueBool();
|
||||
break;
|
||||
case PropertyValue::Type::Int:
|
||||
serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueInt();
|
||||
break;
|
||||
case PropertyValue::Type::Double:
|
||||
serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueDouble();
|
||||
break;
|
||||
case PropertyValue::Type::String:
|
||||
serialized_properties[GetPropertyName(prop_id, name_resolver)] = prop_value.ValueString();
|
||||
break;
|
||||
case PropertyValue::Type::Null:
|
||||
case PropertyValue::Type::List:
|
||||
case PropertyValue::Type::Map:
|
||||
case PropertyValue::Type::TemporalData:
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return serialized_properties;
|
||||
}
|
||||
|
||||
std::string TextIndex::StringifyProperties(const std::map<PropertyId, PropertyValue> &properties) {
|
||||
std::vector<std::string> indexable_properties_as_string;
|
||||
for (const auto &[_, prop_value] : properties) {
|
||||
switch (prop_value.type()) {
|
||||
case PropertyValue::Type::Bool:
|
||||
indexable_properties_as_string.push_back(prop_value.ValueBool() ? "true" : "false");
|
||||
break;
|
||||
case PropertyValue::Type::Int:
|
||||
indexable_properties_as_string.push_back(std::to_string(prop_value.ValueInt()));
|
||||
break;
|
||||
case PropertyValue::Type::Double:
|
||||
indexable_properties_as_string.push_back(std::to_string(prop_value.ValueDouble()));
|
||||
break;
|
||||
case PropertyValue::Type::String:
|
||||
indexable_properties_as_string.push_back(prop_value.ValueString());
|
||||
break;
|
||||
// NOTE: As the following types aren‘t indexed in Tantivy, they don’t appear in the property value string either.
|
||||
case PropertyValue::Type::Null:
|
||||
case PropertyValue::Type::List:
|
||||
case PropertyValue::Type::Map:
|
||||
case PropertyValue::Type::TemporalData:
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return utils::Join(indexable_properties_as_string, " ");
|
||||
}
|
||||
|
||||
std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(const std::vector<LabelId> &labels) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
std::vector<mgcxx::text_search::Context *> applicable_text_indices;
|
||||
for (const auto &label : labels) {
|
||||
if (label_to_index_.contains(label)) {
|
||||
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)).context_);
|
||||
}
|
||||
}
|
||||
return applicable_text_indices;
|
||||
}
|
||||
|
||||
void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
|
||||
const std::string &property_values_as_str,
|
||||
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) {
|
||||
if (applicable_text_indices.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties,
|
||||
// an indexable document should be created for each applicable index.
|
||||
nlohmann::json document = {};
|
||||
document["data"] = properties;
|
||||
document["all"] = property_values_as_str;
|
||||
document["metadata"] = {};
|
||||
document["metadata"]["gid"] = gid;
|
||||
document["metadata"]["deleted"] = false;
|
||||
document["metadata"]["is_node"] = true;
|
||||
|
||||
for (auto *index_context : applicable_text_indices) {
|
||||
try {
|
||||
mgcxx::text_search::add_document(
|
||||
*index_context,
|
||||
mgcxx::text_search::DocumentInput{
|
||||
.data = document.dump(-1, ' ', false, nlohmann::json::error_handler_t::replace)},
|
||||
kDoSkipCommit);
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TextIndex::CommitLoadedNodes(mgcxx::text_search::Context &index_context) {
|
||||
// As CREATE TEXT INDEX (...) queries don’t accumulate deltas, db_transactional_accessor_->Commit() does not reach
|
||||
// the code area where changes to indices are committed. To get around that without needing to commit text indices
|
||||
// after every such query, we commit here.
|
||||
try {
|
||||
mgcxx::text_search::commit(index_context);
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void TextIndex::AddNode(
|
||||
Vertex *vertex_after_update, NameIdMapper *name_id_mapper,
|
||||
const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
auto applicable_text_indices =
|
||||
maybe_applicable_text_indices.value_or(GetApplicableTextIndices(vertex_after_update->labels));
|
||||
if (applicable_text_indices.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto vertex_properties = vertex_after_update->properties.Properties();
|
||||
LoadNodeToTextIndices(vertex_after_update->gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
|
||||
StringifyProperties(vertex_properties), applicable_text_indices);
|
||||
}
|
||||
|
||||
void TextIndex::UpdateNode(Vertex *vertex_after_update, NameIdMapper *name_id_mapper,
|
||||
const std::vector<LabelId> &removed_labels) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
if (!removed_labels.empty()) {
|
||||
auto indexes_to_remove_node_from = GetApplicableTextIndices(removed_labels);
|
||||
RemoveNode(vertex_after_update, indexes_to_remove_node_from);
|
||||
}
|
||||
|
||||
auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update->labels);
|
||||
if (applicable_text_indices.empty()) return;
|
||||
RemoveNode(vertex_after_update, applicable_text_indices);
|
||||
AddNode(vertex_after_update, name_id_mapper, applicable_text_indices);
|
||||
}
|
||||
|
||||
void TextIndex::RemoveNode(
|
||||
Vertex *vertex_after_update,
|
||||
const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
auto search_node_to_be_deleted =
|
||||
mgcxx::text_search::SearchInput{.search_query = fmt::format("metadata.gid:{}", vertex_after_update->gid.AsInt())};
|
||||
|
||||
for (auto *index_context :
|
||||
maybe_applicable_text_indices.value_or(GetApplicableTextIndices(vertex_after_update->labels))) {
|
||||
try {
|
||||
mgcxx::text_search::delete_document(*index_context, search_node_to_be_deleted, kDoSkipCommit);
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TextIndex::CreateIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label,
|
||||
memgraph::query::DbAccessor *db) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
CreateEmptyIndex(storage_dir, index_name, label);
|
||||
|
||||
for (const auto &v : db->Vertices(View::NEW)) {
|
||||
if (!v.HasLabel(View::NEW, label).GetValue()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto vertex_properties = v.Properties(View::NEW).GetValue();
|
||||
LoadNodeToTextIndices(v.Gid().AsInt(), SerializeProperties(vertex_properties, db),
|
||||
StringifyProperties(vertex_properties), {&index_.at(index_name).context_});
|
||||
}
|
||||
|
||||
CommitLoadedNodes(index_.at(index_name).context_);
|
||||
}
|
||||
|
||||
void TextIndex::RecoverIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label,
|
||||
memgraph::utils::SkipList<Vertex>::Accessor vertices, NameIdMapper *name_id_mapper) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
// Clear Tantivy-internal files if they exist from previous sessions
|
||||
std::filesystem::remove_all(storage_dir / kTextIndicesDirectory / index_name);
|
||||
|
||||
CreateEmptyIndex(storage_dir, index_name, label);
|
||||
|
||||
for (const auto &v : vertices) {
|
||||
if (std::find(v.labels.begin(), v.labels.end(), label) == v.labels.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto vertex_properties = v.properties.Properties();
|
||||
LoadNodeToTextIndices(v.gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
|
||||
StringifyProperties(vertex_properties), {&index_.at(index_name).context_});
|
||||
}
|
||||
|
||||
CommitLoadedNodes(index_.at(index_name).context_);
|
||||
}
|
||||
|
||||
LabelId TextIndex::DropIndex(const std::filesystem::path &storage_dir, const std::string &index_name) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
if (!index_.contains(index_name)) {
|
||||
throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name);
|
||||
}
|
||||
|
||||
try {
|
||||
mgcxx::text_search::drop_index(MakeIndexPath(storage_dir, index_name));
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
auto deleted_index_label = index_.at(index_name).scope_;
|
||||
|
||||
index_.erase(index_name);
|
||||
std::erase_if(label_to_index_, [index_name](const auto &item) { return item.second == index_name; });
|
||||
|
||||
return deleted_index_label;
|
||||
}
|
||||
|
||||
bool TextIndex::IndexExists(const std::string &index_name) const { return index_.contains(index_name); }
|
||||
|
||||
mgcxx::text_search::SearchOutput TextIndex::SearchGivenProperties(const std::string &index_name,
|
||||
const std::string &search_query) {
|
||||
try {
|
||||
return mgcxx::text_search::search(
|
||||
index_.at(index_name).context_,
|
||||
mgcxx::text_search::SearchInput{.search_query = search_query, .return_fields = {"metadata"}});
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
|
||||
return mgcxx::text_search::SearchOutput{};
|
||||
}
|
||||
|
||||
mgcxx::text_search::SearchOutput TextIndex::RegexSearch(const std::string &index_name,
|
||||
const std::string &search_query) {
|
||||
try {
|
||||
return mgcxx::text_search::regex_search(
|
||||
index_.at(index_name).context_,
|
||||
mgcxx::text_search::SearchInput{
|
||||
.search_fields = {"all"}, .search_query = search_query, .return_fields = {"metadata"}});
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
|
||||
return mgcxx::text_search::SearchOutput{};
|
||||
}
|
||||
|
||||
mgcxx::text_search::SearchOutput TextIndex::SearchAllProperties(const std::string &index_name,
|
||||
const std::string &search_query) {
|
||||
try {
|
||||
return mgcxx::text_search::search(
|
||||
index_.at(index_name).context_,
|
||||
mgcxx::text_search::SearchInput{
|
||||
.search_fields = {"all"}, .search_query = search_query, .return_fields = {"metadata"}});
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
|
||||
return mgcxx::text_search::SearchOutput{};
|
||||
}
|
||||
|
||||
std::vector<Gid> TextIndex::Search(const std::string &index_name, const std::string &search_query,
|
||||
text_search_mode search_mode) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
if (!index_.contains(index_name)) {
|
||||
throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name);
|
||||
}
|
||||
|
||||
mgcxx::text_search::SearchOutput search_results;
|
||||
switch (search_mode) {
|
||||
case text_search_mode::SPECIFIED_PROPERTIES:
|
||||
search_results = SearchGivenProperties(index_name, search_query);
|
||||
break;
|
||||
case text_search_mode::REGEX:
|
||||
search_results = RegexSearch(index_name, search_query);
|
||||
break;
|
||||
case text_search_mode::ALL_PROPERTIES:
|
||||
search_results = SearchAllProperties(index_name, search_query);
|
||||
break;
|
||||
default:
|
||||
throw query::TextSearchException(
|
||||
"Unsupported search mode: please use one of text_search.search, text_search.search_all, or "
|
||||
"text_search.regex_search.");
|
||||
}
|
||||
|
||||
std::vector<Gid> found_nodes;
|
||||
for (const auto &doc : search_results.docs) {
|
||||
// The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing
|
||||
// errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method.
|
||||
std::string doc_string = doc.data.data();
|
||||
doc_string.resize(doc.data.length());
|
||||
auto doc_json = nlohmann::json::parse(doc_string);
|
||||
found_nodes.push_back(storage::Gid::FromString(doc_json["metadata"]["gid"].dump()));
|
||||
}
|
||||
return found_nodes;
|
||||
}
|
||||
|
||||
std::string TextIndex::Aggregate(const std::string &index_name, const std::string &search_query,
|
||||
const std::string &aggregation_query) {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
if (!index_.contains(index_name)) {
|
||||
throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name);
|
||||
}
|
||||
|
||||
mgcxx::text_search::DocumentOutput aggregation_result;
|
||||
try {
|
||||
aggregation_result = mgcxx::text_search::aggregate(
|
||||
index_.at(index_name).context_,
|
||||
mgcxx::text_search::SearchInput{
|
||||
.search_fields = {"all"}, .search_query = search_query, .aggregation_query = aggregation_query});
|
||||
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
// The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing
|
||||
// errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method.
|
||||
std::string result_string = aggregation_result.data.data();
|
||||
result_string.resize(aggregation_result.data.length());
|
||||
return result_string;
|
||||
}
|
||||
|
||||
void TextIndex::Commit() {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
for (auto &[_, index_data] : index_) {
|
||||
mgcxx::text_search::commit(index_data.context_);
|
||||
}
|
||||
}
|
||||
|
||||
void TextIndex::Rollback() {
|
||||
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
for (auto &[_, index_data] : index_) {
|
||||
mgcxx::text_search::rollback(index_data.context_);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, LabelId>> TextIndex::ListIndices() const {
|
||||
std::vector<std::pair<std::string, LabelId>> ret;
|
||||
ret.reserve(index_.size());
|
||||
for (const auto &[index_name, index_data] : index_) {
|
||||
ret.emplace_back(index_name, index_data.scope_);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace memgraph::storage
|
105
src/storage/v2/indices/text_index.hpp
Normal file
105
src/storage/v2/indices/text_index.hpp
Normal file
@ -0,0 +1,105 @@
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
// License, and you may not use this file except in compliance with the Business Source License.
|
||||
//
|
||||
// As of the Change Date specified in that file, in accordance with
|
||||
// the Business Source License, use of this software will be governed
|
||||
// by the Apache License, Version 2.0, included in the file
|
||||
// licenses/APL.txt.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <json/json.hpp>
|
||||
#include "mg_procedure.h"
|
||||
#include "storage/v2/id_types.hpp"
|
||||
#include "storage/v2/name_id_mapper.hpp"
|
||||
#include "storage/v2/vertex.hpp"
|
||||
#include "text_search.hpp"
|
||||
|
||||
namespace memgraph::query {
|
||||
class DbAccessor;
|
||||
}
|
||||
|
||||
namespace memgraph::storage {
|
||||
struct TextIndexData {
|
||||
mgcxx::text_search::Context context_;
|
||||
LabelId scope_;
|
||||
};
|
||||
|
||||
class TextIndex {
|
||||
private:
|
||||
static constexpr bool kDoSkipCommit = true;
|
||||
static constexpr std::string_view kTextIndicesDirectory = "text_indices";
|
||||
|
||||
inline std::string MakeIndexPath(const std::filesystem::path &storage_dir, const std::string &index_name);
|
||||
|
||||
void CreateEmptyIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label);
|
||||
|
||||
template <typename T>
|
||||
nlohmann::json SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver);
|
||||
|
||||
std::string StringifyProperties(const std::map<PropertyId, PropertyValue> &properties);
|
||||
|
||||
std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(const std::vector<LabelId> &labels);
|
||||
|
||||
void LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
|
||||
const std::string &property_values_as_str,
|
||||
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices);
|
||||
|
||||
void CommitLoadedNodes(mgcxx::text_search::Context &index_context);
|
||||
|
||||
mgcxx::text_search::SearchOutput SearchGivenProperties(const std::string &index_name,
|
||||
const std::string &search_query);
|
||||
|
||||
mgcxx::text_search::SearchOutput RegexSearch(const std::string &index_name, const std::string &search_query);
|
||||
|
||||
mgcxx::text_search::SearchOutput SearchAllProperties(const std::string &index_name, const std::string &search_query);
|
||||
|
||||
public:
|
||||
TextIndex() = default;
|
||||
|
||||
TextIndex(const TextIndex &) = delete;
|
||||
TextIndex(TextIndex &&) = delete;
|
||||
TextIndex &operator=(const TextIndex &) = delete;
|
||||
TextIndex &operator=(TextIndex &&) = delete;
|
||||
|
||||
~TextIndex() = default;
|
||||
|
||||
std::map<std::string, TextIndexData> index_;
|
||||
std::map<LabelId, std::string> label_to_index_;
|
||||
|
||||
void AddNode(
|
||||
Vertex *vertex, NameIdMapper *name_id_mapper,
|
||||
const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices = std::nullopt);
|
||||
|
||||
void UpdateNode(Vertex *vertex, NameIdMapper *name_id_mapper, const std::vector<LabelId> &removed_labels = {});
|
||||
|
||||
void RemoveNode(
|
||||
Vertex *vertex,
|
||||
const std::optional<std::vector<mgcxx::text_search::Context *>> &maybe_applicable_text_indices = std::nullopt);
|
||||
|
||||
void CreateIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label,
|
||||
memgraph::query::DbAccessor *db);
|
||||
|
||||
void RecoverIndex(const std::filesystem::path &storage_dir, const std::string &index_name, LabelId label,
|
||||
memgraph::utils::SkipList<Vertex>::Accessor vertices, NameIdMapper *name_id_mapper);
|
||||
|
||||
LabelId DropIndex(const std::filesystem::path &storage_dir, const std::string &index_name);
|
||||
|
||||
bool IndexExists(const std::string &index_name) const;
|
||||
|
||||
std::vector<Gid> Search(const std::string &index_name, const std::string &search_query, text_search_mode search_mode);
|
||||
|
||||
std::string Aggregate(const std::string &index_name, const std::string &search_query,
|
||||
const std::string &aggregation_query);
|
||||
|
||||
void Commit();
|
||||
|
||||
void Rollback();
|
||||
|
||||
std::vector<std::pair<std::string, LabelId>> ListIndices() const;
|
||||
};
|
||||
|
||||
} // namespace memgraph::storage
|
@ -15,6 +15,8 @@
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include "dbms/constants.hpp"
|
||||
#include "flags/experimental.hpp"
|
||||
#include "flags/run_time_configurable.hpp"
|
||||
#include "memory/global_memory_control.hpp"
|
||||
#include "storage/v2/durability/durability.hpp"
|
||||
#include "storage/v2/durability/snapshot.hpp"
|
||||
@ -890,6 +892,10 @@ utils::BasicResult<StorageManipulationError, void> InMemoryStorage::InMemoryAcce
|
||||
commit_timestamp_.reset(); // We have aborted, hence we have not committed
|
||||
return StorageManipulationError{*unique_constraint_violation};
|
||||
}
|
||||
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
mem_storage->indices_.text_index_.Commit();
|
||||
}
|
||||
}
|
||||
|
||||
is_transaction_active_ = false;
|
||||
@ -1213,6 +1219,9 @@ void InMemoryStorage::InMemoryAccessor::Abort() {
|
||||
for (auto const &[property, prop_vertices] : property_cleanup) {
|
||||
storage_->indices_.AbortEntries(property, prop_vertices, transaction_.start_timestamp);
|
||||
}
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
storage_->indices_.text_index_.Rollback();
|
||||
}
|
||||
|
||||
// VERTICES
|
||||
{
|
||||
@ -1846,6 +1855,7 @@ StorageInfo InMemoryStorage::GetInfo(memgraph::replication_coordination_glue::Re
|
||||
const auto &lbl = access->ListAllIndices();
|
||||
info.label_indices = lbl.label.size();
|
||||
info.label_property_indices = lbl.label_property.size();
|
||||
info.text_indices = lbl.text_indices.size();
|
||||
const auto &con = access->ListAllConstraints();
|
||||
info.existence_constraints = con.existence.size();
|
||||
info.unique_constraints = con.unique.size();
|
||||
@ -2107,6 +2117,16 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final
|
||||
AppendToWalDataDefinition(durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR, info.label,
|
||||
final_commit_timestamp);
|
||||
} break;
|
||||
case MetadataDelta::Action::TEXT_INDEX_CREATE: {
|
||||
const auto &info = md_delta.text_index;
|
||||
AppendToWalDataDefinition(durability::StorageMetadataOperation::TEXT_INDEX_CREATE, info.index_name, info.label,
|
||||
final_commit_timestamp);
|
||||
} break;
|
||||
case MetadataDelta::Action::TEXT_INDEX_DROP: {
|
||||
const auto &info = md_delta.text_index;
|
||||
AppendToWalDataDefinition(durability::StorageMetadataOperation::TEXT_INDEX_DROP, info.index_name, info.label,
|
||||
final_commit_timestamp);
|
||||
} break;
|
||||
case MetadataDelta::Action::EXISTENCE_CONSTRAINT_CREATE: {
|
||||
const auto &info = md_delta.label_property;
|
||||
AppendToWalDataDefinition(durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE, info.label,
|
||||
@ -2137,11 +2157,13 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final
|
||||
return repl_storage_state_.FinalizeTransaction(final_commit_timestamp, this, std::move(db_acc));
|
||||
}
|
||||
|
||||
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
|
||||
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation,
|
||||
const std::optional<std::string> text_index_name, LabelId label,
|
||||
const std::set<PropertyId> &properties, LabelIndexStats stats,
|
||||
LabelPropertyIndexStats property_stats,
|
||||
uint64_t final_commit_timestamp) {
|
||||
wal_file_->AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp);
|
||||
wal_file_->AppendOperation(operation, text_index_name, label, properties, stats, property_stats,
|
||||
final_commit_timestamp);
|
||||
repl_storage_state_.AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp);
|
||||
}
|
||||
|
||||
@ -2155,12 +2177,13 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera
|
||||
const std::set<PropertyId> &properties,
|
||||
LabelPropertyIndexStats property_stats,
|
||||
uint64_t final_commit_timestamp) {
|
||||
return AppendToWalDataDefinition(operation, label, properties, {}, property_stats, final_commit_timestamp);
|
||||
return AppendToWalDataDefinition(operation, std::nullopt, label, properties, {}, property_stats,
|
||||
final_commit_timestamp);
|
||||
}
|
||||
|
||||
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
|
||||
LabelIndexStats stats, uint64_t final_commit_timestamp) {
|
||||
return AppendToWalDataDefinition(operation, label, {}, stats, {}, final_commit_timestamp);
|
||||
return AppendToWalDataDefinition(operation, std::nullopt, label, {}, stats, {}, final_commit_timestamp);
|
||||
}
|
||||
|
||||
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
|
||||
@ -2174,6 +2197,12 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera
|
||||
return AppendToWalDataDefinition(operation, label, {}, {}, final_commit_timestamp);
|
||||
}
|
||||
|
||||
void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation,
|
||||
const std::optional<std::string> text_index_name, LabelId label,
|
||||
uint64_t final_commit_timestamp) {
|
||||
return AppendToWalDataDefinition(operation, text_index_name, label, {}, {}, {}, final_commit_timestamp);
|
||||
}
|
||||
|
||||
utils::BasicResult<InMemoryStorage::CreateSnapshotError> InMemoryStorage::CreateSnapshot(
|
||||
memgraph::replication_coordination_glue::ReplicationRole replication_role) {
|
||||
using memgraph::replication_coordination_glue::ReplicationRole;
|
||||
@ -2301,7 +2330,9 @@ IndicesInfo InMemoryStorage::InMemoryAccessor::ListAllIndices() const {
|
||||
auto *mem_label_property_index =
|
||||
static_cast<InMemoryLabelPropertyIndex *>(in_memory->indices_.label_property_index_.get());
|
||||
auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(in_memory->indices_.edge_type_index_.get());
|
||||
return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices()};
|
||||
auto &text_index = storage_->indices_.text_index_;
|
||||
return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices(),
|
||||
text_index.ListIndices()};
|
||||
}
|
||||
ConstraintsInfo InMemoryStorage::InMemoryAccessor::ListAllConstraints() const {
|
||||
const auto *mem_storage = static_cast<InMemoryStorage *>(storage_);
|
||||
|
@ -398,7 +398,7 @@ class InMemoryStorage final : public Storage {
|
||||
StorageInfo GetBaseInfo() override;
|
||||
StorageInfo GetInfo(memgraph::replication_coordination_glue::ReplicationRole replication_role) override;
|
||||
|
||||
/// Return true in all cases excepted if any sync replicas have not sent confirmation.
|
||||
/// Return true in all cases except if any sync replicas have not sent confirmation.
|
||||
[[nodiscard]] bool AppendToWal(const Transaction &transaction, uint64_t final_commit_timestamp,
|
||||
DatabaseAccessProtector db_acc);
|
||||
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
|
||||
@ -412,9 +412,13 @@ class InMemoryStorage final : public Storage {
|
||||
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
|
||||
const std::set<PropertyId> &properties, LabelPropertyIndexStats property_stats,
|
||||
uint64_t final_commit_timestamp);
|
||||
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label,
|
||||
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation,
|
||||
const std::optional<std::string> text_index_name, LabelId label,
|
||||
const std::set<PropertyId> &properties, LabelIndexStats stats,
|
||||
LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp);
|
||||
void AppendToWalDataDefinition(durability::StorageMetadataOperation operation,
|
||||
const std::optional<std::string> text_index_name, LabelId label,
|
||||
uint64_t final_commit_timestamp);
|
||||
|
||||
uint64_t CommitTimestamp(std::optional<uint64_t> desired_commit_timestamp = {});
|
||||
|
||||
|
@ -37,6 +37,8 @@ struct MetadataDelta {
|
||||
LABEL_PROPERTY_INDEX_STATS_CLEAR,
|
||||
EDGE_INDEX_CREATE,
|
||||
EDGE_INDEX_DROP,
|
||||
TEXT_INDEX_CREATE,
|
||||
TEXT_INDEX_DROP,
|
||||
EXISTENCE_CONSTRAINT_CREATE,
|
||||
EXISTENCE_CONSTRAINT_DROP,
|
||||
UNIQUE_CONSTRAINT_CREATE,
|
||||
@ -63,6 +65,10 @@ struct MetadataDelta {
|
||||
} edge_index_create;
|
||||
static constexpr struct EdgeIndexDrop {
|
||||
} edge_index_drop;
|
||||
static constexpr struct TextIndexCreate {
|
||||
} text_index_create;
|
||||
static constexpr struct TextIndexDrop {
|
||||
} text_index_drop;
|
||||
static constexpr struct ExistenceConstraintCreate {
|
||||
} existence_constraint_create;
|
||||
static constexpr struct ExistenceConstraintDrop {
|
||||
@ -98,6 +104,12 @@ struct MetadataDelta {
|
||||
|
||||
MetadataDelta(EdgeIndexDrop /*tag*/, EdgeTypeId edge_type) : action(Action::EDGE_INDEX_DROP), edge_type(edge_type) {}
|
||||
|
||||
MetadataDelta(TextIndexCreate /*tag*/, std::string index_name, LabelId label)
|
||||
: action(Action::TEXT_INDEX_CREATE), text_index{index_name, label} {}
|
||||
|
||||
MetadataDelta(TextIndexDrop /*tag*/, std::string index_name, LabelId label)
|
||||
: action(Action::TEXT_INDEX_DROP), text_index{index_name, label} {}
|
||||
|
||||
MetadataDelta(ExistenceConstraintCreate /*tag*/, LabelId label, PropertyId property)
|
||||
: action(Action::EXISTENCE_CONSTRAINT_CREATE), label_property{label, property} {}
|
||||
|
||||
@ -127,6 +139,8 @@ struct MetadataDelta {
|
||||
case Action::LABEL_PROPERTY_INDEX_STATS_CLEAR:
|
||||
case Action::EDGE_INDEX_CREATE:
|
||||
case Action::EDGE_INDEX_DROP:
|
||||
case Action::TEXT_INDEX_CREATE:
|
||||
case Action::TEXT_INDEX_DROP:
|
||||
case Action::EXISTENCE_CONSTRAINT_CREATE:
|
||||
case Action::EXISTENCE_CONSTRAINT_DROP:
|
||||
break;
|
||||
@ -164,6 +178,11 @@ struct MetadataDelta {
|
||||
PropertyId property;
|
||||
LabelPropertyIndexStats stats;
|
||||
} label_property_stats;
|
||||
|
||||
struct {
|
||||
std::string index_name;
|
||||
LabelId label;
|
||||
} text_index;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -118,7 +118,7 @@ enum class Type : uint8_t {
|
||||
STRING = 0x50,
|
||||
LIST = 0x60,
|
||||
MAP = 0x70,
|
||||
TEMPORAL_DATA = 0x80
|
||||
TEMPORAL_DATA = 0x80,
|
||||
};
|
||||
|
||||
const uint8_t kMaskType = 0xf0;
|
||||
|
@ -406,8 +406,9 @@ void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operati
|
||||
const std::set<PropertyId> &properties, const LabelIndexStats &stats,
|
||||
const LabelPropertyIndexStats &property_stats, uint64_t timestamp) {
|
||||
replication::Encoder encoder(stream_.GetBuilder());
|
||||
EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, label, properties, stats, property_stats,
|
||||
timestamp);
|
||||
// NOTE: Text search doesn’t have replication in scope yet (Phases 1 and 2) -> text index name not sent here
|
||||
EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, std::nullopt, label, properties, stats,
|
||||
property_stats, timestamp);
|
||||
}
|
||||
|
||||
void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operation, EdgeTypeId edge_type,
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "spdlog/spdlog.h"
|
||||
|
||||
#include "flags/experimental.hpp"
|
||||
#include "flags/run_time_configurable.hpp"
|
||||
#include "storage/v2/disk/name_id_mapper.hpp"
|
||||
#include "storage/v2/storage.hpp"
|
||||
#include "storage/v2/transaction.hpp"
|
||||
@ -273,6 +275,12 @@ Storage::Accessor::DetachDelete(std::vector<VertexAccessor *> nodes, std::vector
|
||||
return maybe_deleted_vertices.GetError();
|
||||
}
|
||||
|
||||
if (flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
|
||||
for (auto *node : nodes_to_delete) {
|
||||
storage_->indices_.text_index_.RemoveNode(node);
|
||||
}
|
||||
}
|
||||
|
||||
auto deleted_vertices = maybe_deleted_vertices.GetValue();
|
||||
|
||||
return std::make_optional<ReturnType>(std::move(deleted_vertices), std::move(deleted_edges));
|
||||
@ -543,4 +551,19 @@ void Storage::Accessor::MarkEdgeAsDeleted(Edge *edge) {
|
||||
}
|
||||
}
|
||||
|
||||
void Storage::Accessor::CreateTextIndex(const std::string &index_name, LabelId label, query::DbAccessor *db) {
|
||||
MG_ASSERT(unique_guard_.owns_lock(), "Creating a text index requires unique access to storage!");
|
||||
storage_->indices_.text_index_.CreateIndex(storage_->config_.durability.storage_directory, index_name, label, db);
|
||||
transaction_.md_deltas.emplace_back(MetadataDelta::text_index_create, index_name, label);
|
||||
memgraph::metrics::IncrementCounter(memgraph::metrics::ActiveTextIndices);
|
||||
}
|
||||
|
||||
void Storage::Accessor::DropTextIndex(const std::string &index_name) {
|
||||
MG_ASSERT(unique_guard_.owns_lock(), "Dropping a text index requires unique access to storage!");
|
||||
auto deleted_index_label =
|
||||
storage_->indices_.text_index_.DropIndex(storage_->config_.durability.storage_directory, index_name);
|
||||
transaction_.md_deltas.emplace_back(MetadataDelta::text_index_drop, index_name, deleted_index_label);
|
||||
memgraph::metrics::DecrementCounter(memgraph::metrics::ActiveTextIndices);
|
||||
}
|
||||
|
||||
} // namespace memgraph::storage
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include "io/network/endpoint.hpp"
|
||||
#include "kvstore/kvstore.hpp"
|
||||
#include "mg_procedure.h"
|
||||
#include "query/exceptions.hpp"
|
||||
#include "replication/config.hpp"
|
||||
#include "replication/replication_server.hpp"
|
||||
@ -53,6 +54,7 @@ extern const Event SnapshotCreationLatency_us;
|
||||
|
||||
extern const Event ActiveLabelIndices;
|
||||
extern const Event ActiveLabelPropertyIndices;
|
||||
extern const Event ActiveTextIndices;
|
||||
} // namespace memgraph::metrics
|
||||
|
||||
namespace memgraph::storage {
|
||||
@ -63,6 +65,7 @@ struct IndicesInfo {
|
||||
std::vector<LabelId> label;
|
||||
std::vector<std::pair<LabelId, PropertyId>> label_property;
|
||||
std::vector<EdgeTypeId> edge_type;
|
||||
std::vector<std::pair<std::string, LabelId>> text_indices;
|
||||
};
|
||||
|
||||
struct ConstraintsInfo {
|
||||
@ -78,6 +81,7 @@ struct StorageInfo {
|
||||
uint64_t disk_usage;
|
||||
uint64_t label_indices;
|
||||
uint64_t label_property_indices;
|
||||
uint64_t text_indices;
|
||||
uint64_t existence_constraints;
|
||||
uint64_t unique_constraints;
|
||||
StorageMode storage_mode;
|
||||
@ -95,6 +99,7 @@ static inline nlohmann::json ToJson(const StorageInfo &info) {
|
||||
res["disk"] = info.disk_usage;
|
||||
res["label_indices"] = info.label_indices;
|
||||
res["label_prop_indices"] = info.label_property_indices;
|
||||
res["text_indices"] = info.text_indices;
|
||||
res["existence_constraints"] = info.existence_constraints;
|
||||
res["unique_constraints"] = info.unique_constraints;
|
||||
res["storage_mode"] = storage::StorageModeToString(info.storage_mode);
|
||||
@ -232,6 +237,28 @@ class Storage {
|
||||
|
||||
virtual bool EdgeTypeIndexExists(EdgeTypeId edge_type) const = 0;
|
||||
|
||||
bool TextIndexExists(const std::string &index_name) const {
|
||||
return storage_->indices_.text_index_.IndexExists(index_name);
|
||||
}
|
||||
|
||||
void TextIndexAddVertex(const VertexAccessor &vertex) {
|
||||
storage_->indices_.text_index_.AddNode(vertex.vertex_, storage_->name_id_mapper_.get());
|
||||
}
|
||||
|
||||
void TextIndexUpdateVertex(const VertexAccessor &vertex, const std::vector<LabelId> &removed_labels = {}) {
|
||||
storage_->indices_.text_index_.UpdateNode(vertex.vertex_, storage_->name_id_mapper_.get(), removed_labels);
|
||||
}
|
||||
|
||||
std::vector<Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query,
|
||||
text_search_mode search_mode) const {
|
||||
return storage_->indices_.text_index_.Search(index_name, search_query, search_mode);
|
||||
}
|
||||
|
||||
std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query,
|
||||
const std::string &aggregation_query) const {
|
||||
return storage_->indices_.text_index_.Aggregate(index_name, search_query, aggregation_query);
|
||||
}
|
||||
|
||||
virtual IndicesInfo ListAllIndices() const = 0;
|
||||
|
||||
virtual ConstraintsInfo ListAllConstraints() const = 0;
|
||||
@ -284,6 +311,10 @@ class Storage {
|
||||
|
||||
virtual utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(EdgeTypeId edge_type) = 0;
|
||||
|
||||
void CreateTextIndex(const std::string &index_name, LabelId label, query::DbAccessor *db);
|
||||
|
||||
void DropTextIndex(const std::string &index_name);
|
||||
|
||||
virtual utils::BasicResult<StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint(
|
||||
LabelId label, PropertyId property) = 0;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
// Copyright 2024 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
@ -60,6 +60,7 @@
|
||||
\
|
||||
M(ActiveLabelIndices, Index, "Number of active label indices in the system.") \
|
||||
M(ActiveLabelPropertyIndices, Index, "Number of active label property indices in the system.") \
|
||||
M(ActiveTextIndices, Index, "Number of active text indices in the system.") \
|
||||
\
|
||||
M(StreamsCreated, Stream, "Number of Streams created.") \
|
||||
M(MessagesConsumed, Stream, "Number of consumed streamed messages.") \
|
||||
|
@ -187,6 +187,7 @@ enum class TypeId : uint64_t {
|
||||
AST_PROFILE_QUERY,
|
||||
AST_INDEX_QUERY,
|
||||
AST_EDGE_INDEX_QUERY,
|
||||
AST_TEXT_INDEX_QUERY,
|
||||
AST_CREATE,
|
||||
AST_CALL_PROCEDURE,
|
||||
AST_MATCH,
|
||||
|
@ -226,6 +226,6 @@ startup_config_dict = {
|
||||
"experimental_enabled": (
|
||||
"",
|
||||
"",
|
||||
"Experimental features to be used, comma seperated. Options [system-replication, high-availability]",
|
||||
"Experimental features to be used, comma-separated. Options [system-replication, text-search, high-availability]",
|
||||
),
|
||||
}
|
||||
|
6
tests/e2e/text_search/CMakeLists.txt
Normal file
6
tests/e2e/text_search/CMakeLists.txt
Normal file
@ -0,0 +1,6 @@
|
||||
function(copy_text_search_e2e_python_files FILE_NAME)
|
||||
copy_e2e_python_files(text_search ${FILE_NAME})
|
||||
endfunction()
|
||||
|
||||
copy_text_search_e2e_python_files(common.py)
|
||||
copy_text_search_e2e_python_files(test_text_search.py)
|
87
tests/e2e/text_search/common.py
Normal file
87
tests/e2e/text_search/common.py
Normal file
@ -0,0 +1,87 @@
|
||||
# Copyright 2023 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import typing
|
||||
|
||||
import mgclient
|
||||
import pytest
|
||||
from gqlalchemy import Memgraph
|
||||
|
||||
|
||||
def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]:
|
||||
cursor.execute(query, params)
|
||||
return cursor.fetchall()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def connect(**kwargs) -> mgclient.Connection:
|
||||
connection = mgclient.connect(host="localhost", port=7687, **kwargs)
|
||||
connection.autocommit = True
|
||||
cursor = connection.cursor()
|
||||
execute_and_fetch_all(cursor, """USE DATABASE memgraph""")
|
||||
try:
|
||||
execute_and_fetch_all(cursor, """DROP DATABASE clean""")
|
||||
except:
|
||||
pass
|
||||
execute_and_fetch_all(cursor, """MATCH (n) DETACH DELETE n""")
|
||||
yield connection
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memgraph(**kwargs) -> Memgraph:
|
||||
memgraph = Memgraph()
|
||||
|
||||
yield memgraph
|
||||
|
||||
memgraph.drop_database()
|
||||
memgraph.drop_indexes()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memgraph_with_text_indexed_data(**kwargs) -> Memgraph:
|
||||
memgraph = Memgraph()
|
||||
|
||||
memgraph.execute(
|
||||
"""CREATE (:Document {title: "Rules2024", version: 1, fulltext: "random works", date: date("2023-11-14")});"""
|
||||
)
|
||||
memgraph.execute(
|
||||
"""CREATE (:Document {title: "Rules2023", version: 9, fulltext: "text Rules2024", date: date("2023-11-14")});"""
|
||||
)
|
||||
memgraph.execute(
|
||||
"""CREATE (:Document:Revision {title: "Rules2024", version: 2, fulltext: "random words", date: date("2023-12-15")});"""
|
||||
)
|
||||
memgraph.execute("""CREATE (:Revision {title: "OperationSchema", version: 3, date: date("2023-10-01")});""")
|
||||
memgraph.execute("""CREATE TEXT INDEX complianceDocuments ON :Document;""")
|
||||
|
||||
yield memgraph
|
||||
|
||||
memgraph.execute("""DROP TEXT INDEX complianceDocuments;""")
|
||||
memgraph.drop_database()
|
||||
memgraph.drop_indexes()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memgraph_with_mixed_data(**kwargs) -> Memgraph:
|
||||
memgraph = Memgraph()
|
||||
|
||||
memgraph.execute(
|
||||
"""CREATE (:Document:Revision {title: "Rules2024", version: 1, date: date("2023-11-14"), contents: "Lorem ipsum dolor sit amet"});"""
|
||||
)
|
||||
memgraph.execute(
|
||||
"""CREATE (:Revision {title: "Rules2024", version: 2, date: date("2023-12-15"), contents: "consectetur adipiscing elit"});"""
|
||||
)
|
||||
memgraph.execute("""CREATE TEXT INDEX complianceDocuments ON :Document;""")
|
||||
|
||||
yield memgraph
|
||||
|
||||
memgraph.execute("""DROP TEXT INDEX complianceDocuments;""")
|
||||
memgraph.drop_database()
|
||||
memgraph.drop_indexes()
|
206
tests/e2e/text_search/test_text_search.py
Normal file
206
tests/e2e/text_search/test_text_search.py
Normal file
@ -0,0 +1,206 @@
|
||||
# Copyright 2024 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
||||
import gqlalchemy
|
||||
import mgclient
|
||||
import pytest
|
||||
from common import memgraph, memgraph_with_mixed_data, memgraph_with_text_indexed_data
|
||||
|
||||
GET_RULES_2024_DOCUMENT = """CALL libtext.search("complianceDocuments", "data.title:Rules2024") YIELD node
|
||||
RETURN node.title AS title, node.version AS version
|
||||
ORDER BY version ASC, title ASC;"""
|
||||
|
||||
|
||||
def test_create_index(memgraph):
|
||||
memgraph.execute("""CREATE TEXT INDEX exampleIndex ON :Document;""")
|
||||
|
||||
index_info = memgraph.execute_and_fetch("""SHOW INDEX INFO""")
|
||||
|
||||
assert list(index_info) == [
|
||||
{"index type": "text (name: exampleIndex)", "label": "Document", "property": None, "count": None}
|
||||
]
|
||||
|
||||
|
||||
def test_drop_index(memgraph):
|
||||
memgraph.execute("""DROP TEXT INDEX exampleIndex;""")
|
||||
|
||||
index_info = memgraph.execute_and_fetch("""SHOW INDEX INFO""")
|
||||
|
||||
assert list(index_info) == []
|
||||
|
||||
|
||||
def test_create_existing_index(memgraph):
|
||||
memgraph.execute("""CREATE TEXT INDEX duplicatedIndex ON :Document;""")
|
||||
with pytest.raises(
|
||||
gqlalchemy.exceptions.GQLAlchemyDatabaseError, match='Text index "duplicatedIndex" already exists.'
|
||||
) as _:
|
||||
memgraph.execute("""CREATE TEXT INDEX duplicatedIndex ON :Document;""")
|
||||
memgraph.execute("""DROP TEXT INDEX duplicatedIndex;""") # cleanup
|
||||
|
||||
|
||||
def test_drop_nonexistent_index(memgraph):
|
||||
with pytest.raises(
|
||||
gqlalchemy.exceptions.GQLAlchemyDatabaseError, match='Text index "noSuchIndex" doesn’t exist.'
|
||||
) as _:
|
||||
memgraph.execute("""DROP TEXT INDEX noSuchIndex;""")
|
||||
|
||||
|
||||
def test_text_search_given_property(memgraph_with_text_indexed_data):
|
||||
result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
|
||||
|
||||
assert len(result) == 2 and result == [{"title": "Rules2024", "version": 1}, {"title": "Rules2024", "version": 2}]
|
||||
|
||||
|
||||
def test_text_search_all_properties(memgraph_with_text_indexed_data):
|
||||
SEARCH_QUERY = "Rules2024"
|
||||
|
||||
ALL_PROPERTIES_QUERY = f"""CALL libtext.search_all("complianceDocuments", "{SEARCH_QUERY}") YIELD node
|
||||
RETURN node
|
||||
ORDER BY node.version ASC, node.title ASC;"""
|
||||
|
||||
result = list(memgraph_with_text_indexed_data.execute_and_fetch(ALL_PROPERTIES_QUERY))
|
||||
result_nodes = [record["node"] for record in result]
|
||||
|
||||
assert len(result) == 3 and (
|
||||
result_nodes[0].title == SEARCH_QUERY
|
||||
and result_nodes[1].title == SEARCH_QUERY
|
||||
and SEARCH_QUERY in result_nodes[2].fulltext
|
||||
)
|
||||
|
||||
|
||||
def test_regex_text_search(memgraph_with_text_indexed_data):
|
||||
REGEX_QUERY = """CALL libtext.regex_search("complianceDocuments", "wor.*s") YIELD node
|
||||
RETURN node
|
||||
ORDER BY node.version ASC, node.title ASC;"""
|
||||
|
||||
result = list(memgraph_with_text_indexed_data.execute_and_fetch(REGEX_QUERY))
|
||||
|
||||
assert (
|
||||
len(result) == 2
|
||||
and re.search("wor.*s", result[0]["node"].fulltext)
|
||||
and re.search("wor.*s", result[1]["node"].fulltext)
|
||||
# In this test, all values matching the regex string are found in the .node property only ^
|
||||
)
|
||||
|
||||
|
||||
def test_text_search_aggregate(memgraph_with_text_indexed_data):
|
||||
input_aggregation = json.dumps({"count": {"value_count": {"field": "metadata.gid"}}}, separators=(",", ":"))
|
||||
expected_aggregation = json.dumps({"count": {"value": 2.0}}, separators=(",", ":"))
|
||||
|
||||
AGGREGATION_QUERY = f"""CALL libtext.aggregate("complianceDocuments", "data.title:Rules2024", '{input_aggregation}')
|
||||
YIELD aggregation
|
||||
RETURN aggregation;"""
|
||||
|
||||
result = list(memgraph_with_text_indexed_data.execute_and_fetch(AGGREGATION_QUERY))
|
||||
|
||||
assert len(result) == 1 and result[0]["aggregation"] == expected_aggregation
|
||||
|
||||
|
||||
def test_text_search_query_boolean(memgraph_with_text_indexed_data):
|
||||
BOOLEAN_QUERY = """CALL libtext.search("complianceDocuments", "(data.title:Rules2023 OR data.title:Rules2024) AND data.fulltext:words") YIELD node
|
||||
RETURN node.title AS title, node.version AS version
|
||||
ORDER BY version ASC, title ASC;"""
|
||||
|
||||
result = list(memgraph_with_text_indexed_data.execute_and_fetch(BOOLEAN_QUERY))
|
||||
|
||||
assert len(result) == 1 and result == [{"title": "Rules2024", "version": 2}]
|
||||
|
||||
|
||||
def test_create_indexed_node(memgraph_with_text_indexed_data):
|
||||
memgraph_with_text_indexed_data.execute("""CREATE (:Document {title: "Rules2024", version: 3});""")
|
||||
|
||||
result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
|
||||
|
||||
assert len(result) == 3 and result == [
|
||||
{"title": "Rules2024", "version": 1},
|
||||
{"title": "Rules2024", "version": 2},
|
||||
{"title": "Rules2024", "version": 3},
|
||||
]
|
||||
|
||||
|
||||
def test_delete_indexed_node(memgraph_with_text_indexed_data):
|
||||
memgraph_with_text_indexed_data.execute("""MATCH (n:Document {title: "Rules2024", version: 2}) DETACH DELETE n;""")
|
||||
|
||||
result = list(memgraph_with_text_indexed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
|
||||
|
||||
assert len(result) == 1 and result == [{"title": "Rules2024", "version": 1}]
|
||||
|
||||
|
||||
def test_add_indexed_label(memgraph_with_mixed_data):
|
||||
memgraph_with_mixed_data.execute("""MATCH (n:Revision {version:2}) SET n:Document;""")
|
||||
|
||||
result = list(memgraph_with_mixed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
|
||||
|
||||
assert len(result) == 2 and result == [{"title": "Rules2024", "version": 1}, {"title": "Rules2024", "version": 2}]
|
||||
|
||||
|
||||
def test_remove_indexed_label(memgraph_with_mixed_data):
|
||||
memgraph_with_mixed_data.execute("""MATCH (n:Document {version: 1}) REMOVE n:Document;""")
|
||||
|
||||
result = list(memgraph_with_mixed_data.execute_and_fetch(GET_RULES_2024_DOCUMENT))
|
||||
|
||||
assert len(result) == 0
|
||||
|
||||
|
||||
def test_update_text_property_of_indexed_node(memgraph_with_text_indexed_data):
|
||||
memgraph_with_text_indexed_data.execute("""MATCH (n:Document {version:1}) SET n.title = "Rules2030";""")
|
||||
|
||||
result = list(
|
||||
memgraph_with_text_indexed_data.execute_and_fetch(
|
||||
"""CALL libtext.search("complianceDocuments", "data.title:Rules2030") YIELD node
|
||||
RETURN node.title AS title, node.version AS version
|
||||
ORDER BY version ASC, title ASC;"""
|
||||
)
|
||||
)
|
||||
|
||||
assert len(result) == 1 and result == [{"title": "Rules2030", "version": 1}]
|
||||
|
||||
|
||||
def test_add_unindexable_property_to_indexed_node(memgraph_with_text_indexed_data):
|
||||
try:
|
||||
memgraph_with_text_indexed_data.execute("""MATCH (n:Document {version:1}) SET n.randomList = [2, 3, 4, 5];""")
|
||||
except Exception:
|
||||
assert False
|
||||
|
||||
|
||||
def test_remove_indexable_property_from_indexed_node(memgraph_with_text_indexed_data):
|
||||
try:
|
||||
memgraph_with_text_indexed_data.execute(
|
||||
"""MATCH (n:Document {version:1}) REMOVE n.title, n.version, n.fulltext, n.date;"""
|
||||
)
|
||||
except Exception:
|
||||
assert False
|
||||
|
||||
|
||||
def test_remove_unindexable_property_from_indexed_node(memgraph_with_text_indexed_data):
|
||||
try:
|
||||
memgraph_with_text_indexed_data.execute_and_fetch(
|
||||
"""MATCH (n:Document {date: date("2023-12-15")}) REMOVE n.date;"""
|
||||
)
|
||||
except Exception:
|
||||
assert False
|
||||
|
||||
|
||||
def test_text_search_nonexistent_index(memgraph_with_text_indexed_data):
|
||||
NONEXISTENT_INDEX_QUERY = """CALL libtext.search("noSuchIndex", "data.fulltext:words") YIELD node
|
||||
RETURN node.title AS title, node.version AS version
|
||||
ORDER BY version ASC, title ASC;"""
|
||||
|
||||
with pytest.raises(mgclient.DatabaseError, match='Text index "noSuchIndex" doesn’t exist.') as _:
|
||||
list(memgraph_with_text_indexed_data.execute_and_fetch(NONEXISTENT_INDEX_QUERY))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__, "-rA"]))
|
69
tests/e2e/text_search/test_text_search_disabled.py
Normal file
69
tests/e2e/text_search/test_text_search_disabled.py
Normal file
@ -0,0 +1,69 @@
|
||||
# Copyright 2024 Memgraph Ltd.
|
||||
#
|
||||
# Use of this software is governed by the Business Source License
|
||||
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
# License, and you may not use this file except in compliance with the Business Source License.
|
||||
#
|
||||
# As of the Change Date specified in that file, in accordance with
|
||||
# the Business Source License, use of this software will be governed
|
||||
# by the Apache License, Version 2.0, included in the file
|
||||
# licenses/APL.txt.
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
import gqlalchemy
|
||||
import pytest
|
||||
from common import memgraph
|
||||
|
||||
TEXT_SEARCH_DISABLED_ERROR = (
|
||||
"To use text indices and text search, start Memgraph with the experimental text search feature enabled."
|
||||
)
|
||||
|
||||
|
||||
def test_create_index(memgraph):
|
||||
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
|
||||
memgraph.execute("""CREATE TEXT INDEX exampleIndex ON :Document;""")
|
||||
|
||||
|
||||
def test_drop_index(memgraph):
|
||||
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
|
||||
memgraph.execute("""DROP TEXT INDEX exampleIndex;""")
|
||||
|
||||
|
||||
def test_text_search_given_property(memgraph):
|
||||
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
|
||||
memgraph.execute(
|
||||
"""CALL libtext.search("complianceDocuments", "data.title:Rules2024") YIELD node
|
||||
RETURN node;"""
|
||||
)
|
||||
|
||||
|
||||
def test_text_search_all_properties(memgraph):
|
||||
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
|
||||
memgraph.execute(
|
||||
"""CALL libtext.search_all("complianceDocuments", "Rules2024") YIELD node
|
||||
RETURN node;"""
|
||||
)
|
||||
|
||||
|
||||
def test_regex_text_search(memgraph):
|
||||
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
|
||||
memgraph.execute(
|
||||
"""CALL libtext.regex_search("complianceDocuments", "wor.*s") YIELD node
|
||||
RETURN node;"""
|
||||
)
|
||||
|
||||
|
||||
def test_text_search_aggregate(memgraph):
|
||||
with pytest.raises(gqlalchemy.exceptions.GQLAlchemyDatabaseError, match=TEXT_SEARCH_DISABLED_ERROR) as _:
|
||||
input_aggregation = json.dumps({"count": {"value_count": {"field": "metadata.gid"}}}, separators=(",", ":"))
|
||||
|
||||
memgraph.execute(
|
||||
f"""CALL libtext.aggregate("complianceDocuments", "wor.*s", '{input_aggregation}') YIELD aggregation
|
||||
RETURN aggregation;"""
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__, "-rA"]))
|
33
tests/e2e/text_search/workloads.yaml
Normal file
33
tests/e2e/text_search/workloads.yaml
Normal file
@ -0,0 +1,33 @@
|
||||
text_search_cluster: &text_search_cluster
|
||||
cluster:
|
||||
main:
|
||||
args:
|
||||
[
|
||||
"--bolt-port",
|
||||
"7687",
|
||||
"--log-level=TRACE",
|
||||
"--experimental-enabled=text-search",
|
||||
]
|
||||
log_file: "text_search.log"
|
||||
setup_queries: []
|
||||
validation_queries: []
|
||||
|
||||
text_search_disabled_cluster: &text_search_disabled_cluster
|
||||
cluster:
|
||||
main:
|
||||
args: ["--bolt-port", "7687", "--log-level=TRACE"]
|
||||
log_file: "text_search.log"
|
||||
setup_queries: []
|
||||
validation_queries: []
|
||||
|
||||
workloads:
|
||||
- name: "Test behavior of text search in Memgraph"
|
||||
binary: "tests/e2e/pytest_runner.sh"
|
||||
proc: "tests/e2e/text_search/query_modules/"
|
||||
args: ["text_search/test_text_search.py"]
|
||||
<<: *text_search_cluster
|
||||
- name: "Test behavior of text search in Memgraph when disabled"
|
||||
binary: "tests/e2e/pytest_runner.sh"
|
||||
proc: "tests/e2e/text_search/query_modules/"
|
||||
args: ["text_search/test_text_search_disabled.py"]
|
||||
<<: *text_search_disabled_cluster
|
@ -71,6 +71,11 @@ struct DatabaseState {
|
||||
std::string property;
|
||||
};
|
||||
|
||||
struct TextItem {
|
||||
std::string index_name;
|
||||
std::string label;
|
||||
};
|
||||
|
||||
struct LabelPropertiesItem {
|
||||
std::string label;
|
||||
std::set<std::string, std::less<>> properties;
|
||||
@ -80,6 +85,7 @@ struct DatabaseState {
|
||||
std::set<Edge> edges;
|
||||
std::set<LabelItem> label_indices;
|
||||
std::set<LabelPropertyItem> label_property_indices;
|
||||
std::set<TextItem> text_indices;
|
||||
std::set<LabelPropertyItem> existence_constraints;
|
||||
std::set<LabelPropertiesItem> unique_constraints;
|
||||
};
|
||||
@ -106,6 +112,10 @@ bool operator<(const DatabaseState::LabelPropertyItem &first, const DatabaseStat
|
||||
return first.property < second.property;
|
||||
}
|
||||
|
||||
bool operator<(const DatabaseState::TextItem &first, const DatabaseState::TextItem &second) {
|
||||
return first.index_name < second.index_name && first.label < second.label;
|
||||
}
|
||||
|
||||
bool operator<(const DatabaseState::LabelPropertiesItem &first, const DatabaseState::LabelPropertiesItem &second) {
|
||||
if (first.label != second.label) return first.label < second.label;
|
||||
return first.properties < second.properties;
|
||||
@ -128,6 +138,10 @@ bool operator==(const DatabaseState::LabelPropertyItem &first, const DatabaseSta
|
||||
return first.label == second.label && first.property == second.property;
|
||||
}
|
||||
|
||||
bool operator==(const DatabaseState::TextItem &first, const DatabaseState::TextItem &second) {
|
||||
return first.index_name == second.index_name && first.label == second.label;
|
||||
}
|
||||
|
||||
bool operator==(const DatabaseState::LabelPropertiesItem &first, const DatabaseState::LabelPropertiesItem &second) {
|
||||
return first.label == second.label && first.properties == second.properties;
|
||||
}
|
||||
@ -185,6 +199,7 @@ DatabaseState GetState(memgraph::storage::Storage *db) {
|
||||
// Capture all indices
|
||||
std::set<DatabaseState::LabelItem> label_indices;
|
||||
std::set<DatabaseState::LabelPropertyItem> label_property_indices;
|
||||
std::set<DatabaseState::TextItem> text_indices;
|
||||
{
|
||||
auto info = dba->ListAllIndices();
|
||||
for (const auto &item : info.label) {
|
||||
@ -193,6 +208,9 @@ DatabaseState GetState(memgraph::storage::Storage *db) {
|
||||
for (const auto &item : info.label_property) {
|
||||
label_property_indices.insert({dba->LabelToName(item.first), dba->PropertyToName(item.second)});
|
||||
}
|
||||
for (const auto &item : info.text_indices) {
|
||||
text_indices.insert({item.first, dba->LabelToName(item.second)});
|
||||
}
|
||||
}
|
||||
|
||||
// Capture all constraints
|
||||
@ -212,7 +230,8 @@ DatabaseState GetState(memgraph::storage::Storage *db) {
|
||||
}
|
||||
}
|
||||
|
||||
return {vertices, edges, label_indices, label_property_indices, existence_constraints, unique_constraints};
|
||||
return {vertices, edges, label_indices, label_property_indices, text_indices, existence_constraints,
|
||||
unique_constraints};
|
||||
}
|
||||
|
||||
auto Execute(memgraph::query::InterpreterContext *context, memgraph::dbms::DatabaseAccess db,
|
||||
|
@ -358,6 +358,8 @@ TEST_F(DecoderEncoderTest, PropertyValueInvalidMarker) {
|
||||
case memgraph::storage::durability::Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR:
|
||||
case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_CREATE:
|
||||
case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_DROP:
|
||||
case memgraph::storage::durability::Marker::DELTA_TEXT_INDEX_CREATE:
|
||||
case memgraph::storage::durability::Marker::DELTA_TEXT_INDEX_DROP:
|
||||
case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE:
|
||||
case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_DROP:
|
||||
case memgraph::storage::durability::Marker::DELTA_UNIQUE_CONSTRAINT_CREATE:
|
||||
|
@ -146,6 +146,7 @@ TYPED_TEST(InfoTest, InfoCheck) {
|
||||
ASSERT_LT(info.disk_usage, 1000'000);
|
||||
ASSERT_EQ(info.label_indices, 1);
|
||||
ASSERT_EQ(info.label_property_indices, 1);
|
||||
ASSERT_EQ(info.text_indices, 0);
|
||||
ASSERT_EQ(info.existence_constraints, 0);
|
||||
ASSERT_EQ(info.unique_constraints, 2);
|
||||
ASSERT_EQ(info.storage_mode, this->mode);
|
||||
|
@ -53,6 +53,10 @@ memgraph::storage::durability::WalDeltaData::Type StorageMetadataOperationToWalD
|
||||
return memgraph::storage::durability::WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET;
|
||||
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR:
|
||||
return memgraph::storage::durability::WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR;
|
||||
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE:
|
||||
return memgraph::storage::durability::WalDeltaData::Type::TEXT_INDEX_CREATE;
|
||||
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP:
|
||||
return memgraph::storage::durability::WalDeltaData::Type::TEXT_INDEX_DROP;
|
||||
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
|
||||
return memgraph::storage::durability::WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE;
|
||||
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:
|
||||
@ -252,7 +256,7 @@ class DeltaGenerator final {
|
||||
ASSERT_TRUE(false) << "Unexpected statistics operation!";
|
||||
}
|
||||
}
|
||||
wal_file_.AppendOperation(operation, label_id, property_ids, l_stats, lp_stats, timestamp_);
|
||||
wal_file_.AppendOperation(operation, std::nullopt, label_id, property_ids, l_stats, lp_stats, timestamp_);
|
||||
if (valid_) {
|
||||
UpdateStats(timestamp_, 1);
|
||||
memgraph::storage::durability::WalDeltaData data;
|
||||
@ -271,6 +275,8 @@ class DeltaGenerator final {
|
||||
break;
|
||||
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:
|
||||
data.operation_label_property.label = label;
|
||||
@ -313,6 +319,8 @@ class DeltaGenerator final {
|
||||
case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_STATS_SET:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_CREATE:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::TEXT_INDEX_DROP:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE:
|
||||
case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:;
|
||||
case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET:
|
||||
|
Loading…
Reference in New Issue
Block a user