diff --git a/src/storage/v2/CMakeLists.txt b/src/storage/v2/CMakeLists.txt index 150a02cc7..9df453571 100644 --- a/src/storage/v2/CMakeLists.txt +++ b/src/storage/v2/CMakeLists.txt @@ -20,6 +20,7 @@ add_library(mg-storage-v2 STATIC vertex_info_cache.cpp storage.cpp indices/indices.cpp + indices/text_index.cpp all_vertices_iterable.cpp vertices_iterable.cpp inmemory/storage.cpp @@ -42,4 +43,4 @@ add_library(mg-storage-v2 STATIC inmemory/replication/recovery.cpp ) -target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory) +target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils mg-flags gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory mgcxx_text_search tantivy_text_search) diff --git a/src/storage/v2/indices/text_index.cpp b/src/storage/v2/indices/text_index.cpp new file mode 100644 index 000000000..d97206687 --- /dev/null +++ b/src/storage/v2/indices/text_index.cpp @@ -0,0 +1,81 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "storage/v2/indices/text_index.hpp" +#include "query/db_accessor.hpp" +#include "storage/v2/mgcxx_mock.hpp" + +namespace memgraph::storage { + +void TextIndex::UpdateOnAddLabel(LabelId added_label, Vertex *vertex_after_update, const Transaction &tx) const {} + +void TextIndex::UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_update, const Transaction &tx) const {} + +void TextIndex::UpdateOnSetProperty(PropertyId property, const PropertyValue &value, Vertex *vertex, + const Transaction &tx) const {} + +std::vector TextIndex::GetApplicableTextIndices(Vertex *vertex) { + std::vector applicable_text_indices; + for (const auto &label : vertex->labels) { + if (label_to_index_.contains(label)) { + applicable_text_indices.push_back(&index_.at(label_to_index_.at(label))); + } + } + return applicable_text_indices; +} + +bool TextIndex::CreateIndex(std::string index_name, LabelId label, memgraph::query::DbAccessor *db) { + auto index_config = mgcxx_mock::text_search::IndexConfig{ + .mappings = "TODO devise the mapping by reading the indexable nodes' properties"}; + auto new_index = mgcxx_mock::text_search::Mock::create_index(index_name, index_config); + index_[index_name] = new_index; + label_to_index_[label] = index_name; + return true; + + // TODO add documents (indexable nodes) to index +} + +bool TextIndex::DropIndex(std::string index_name) { + memcxx::text_search::drop_index(index_name); + + mgcxx_mock::text_search::Mock::drop_index(index_name); + index_.erase(index_name); + std::erase_if(label_to_index_, [index_name](const auto &item) { return item.second == index_name; }); + return true; +} + +bool TextIndex::IndexExists(std::string index_name) const { return index_.contains(index_name); } + +std::vector TextIndex::Search(std::string index_name, std::string search_query) const { + auto input = mgcxx_mock::text_search::SearchInput{.search_query = search_query, .return_fields = {"metadata.gid"}}; + // Basic check for search fields in the query (Tantivy syntax delimits them with a `:` to the right)I + if (search_query.find(":") == std::string::npos) { + input.search_fields = {"data"}; + } + + std::vector found_nodes; + for (const auto &doc : mgcxx_mock::text_search::Mock::search(index_.at(index_name), input).docs) { + found_nodes.push_back(storage::Gid::FromString(doc.data)); + } + return found_nodes; +} + +std::vector TextIndex::ListIndices() const { + std::vector ret; + ret.reserve(index_.size()); + for (const auto &item : index_) { + ret.push_back(item.first); + } + return ret; +} +uint64_t TextIndex::ApproximateVertexCount(std::string index_name) const { return 10; } + +} // namespace memgraph::storage diff --git a/src/storage/v2/indices/text_index.hpp b/src/storage/v2/indices/text_index.hpp index 6b51bbcf0..c567db9b3 100644 --- a/src/storage/v2/indices/text_index.hpp +++ b/src/storage/v2/indices/text_index.hpp @@ -9,8 +9,17 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#include "query/db_accessor.hpp" +#pragma once + +#include "storage/v2/id_types.hpp" #include "storage/v2/mgcxx_mock.hpp" +#include "storage/v2/transaction.hpp" +#include "storage/v2/vertex.hpp" +#include "text_search.hpp" + +namespace memgraph::query { +class DbAccessor; +} namespace memgraph::storage { @@ -25,70 +34,29 @@ class TextIndex { ~TextIndex() = default; - void UpdateOnAddLabel(LabelId added_label, Vertex *vertex_after_update, const Transaction &tx) {} + std::map index_; + std::map label_to_index_; - void UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_update, const Transaction &tx) {} + void UpdateOnAddLabel(LabelId added_label, Vertex *vertex_after_update, const Transaction &tx) const; - void UpdateOnSetProperty(PropertyId property, const PropertyValue &value, Vertex *vertex, const Transaction &tx) {} + void UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_update, const Transaction &tx) const; - std::vector GetApplicableTextIndices(Vertex *vertex, Storage *storage) { - std::vector applicable_text_indices; - for (const auto &label : vertex->labels) { - if (label_to_index_.contains(label)) { - applicable_text_indices.push_back(&index_.at(label_to_index_.at(label))); - } - } - return applicable_text_indices; - } + void UpdateOnSetProperty(PropertyId property, const PropertyValue &value, Vertex *vertex, + const Transaction &tx) const; - bool CreateIndex(std::string index_name, LabelId label, memgraph::query::DbAccessor *db) { - auto index_config = mgcxx_mock::text_search::IndexConfig{ - .mappings = "TODO devise the mapping by reading the indexable nodes' properties"}; - auto new_index = mgcxx_mock::text_search::Mock::create_index(index_name, index_config); - index_[index_name] = new_index; - label_to_index_[label] = index_name; - return true; + std::vector GetApplicableTextIndices(Vertex *vertex); - // TODO add documents (indexable nodes) to index - } + bool CreateIndex(std::string index_name, LabelId label, memgraph::query::DbAccessor *db); - bool DropIndex(std::string index_name) { - mgcxx_mock::text_search::Mock::drop_index(index_name); - index_.erase(index_name); - std::erase_if(label_to_index_, [index_name](const auto &item) { return item.second == index_name; }); - return true; - } + bool DropIndex(std::string index_name); - bool IndexExists(std::string index_name) { return index_.contains(index_name); } + bool IndexExists(std::string index_name) const; - std::vector Search(std::string index_name, std::string search_query) { - auto input = mgcxx_mock::text_search::SearchInput { - .search_query = search_query, .return_fields = {{"metadata.gid"}}; - // Basic check for search fields in the query (Tantivy syntax delimits them with a `:` to the right) - if (search_query.find(":") == std::string::npos) { - input.search_fields = {"data"}; - } + std::vector Search(std::string index_name, std::string search_query) const; - std::vector found_nodes; - for (const auto &doc : mgcxx_mock::text_search::Mock::search(index_.at(index_name), input).docs) { - found_nodes.push_back(storage::Gid::FromString(doc.data)); - } - return found_nodes; - } + std::vector ListIndices() const; - std::vector ListIndices() { - std::vector ret; - ret.reserve(index_.size()); - for (const auto &item : index_) { - ret.push_back(item.first); - } - return ret; - } - - uint64_t ApproximateVertexCount(std::string index_name) { return 10; } - - std::map index_; - std::map label_to_index_; - }; + uint64_t ApproximateVertexCount(std::string index_name) const; +}; } // namespace memgraph::storage diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 2c44b717c..1e0466d34 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -269,7 +269,7 @@ class Storage { virtual utils::BasicResult DropIndex(LabelId label, PropertyId property) = 0; virtual utils::BasicResult CreateTextIndex(std::string index_name, LabelId label, - DbAccessor *db) { + query::DbAccessor *db) { storage_->indices_.text_index_->CreateIndex(index_name, label, db); return {}; } diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index 009624f93..59eb7a79d 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -277,7 +277,7 @@ Result VertexAccessor::SetProperty(PropertyId property, const Pro [transaction = transaction_, storage = storage_, vertex = vertex_, &value, &property, ¤t_value]() { CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, current_value); if (flags::run_time::GetTextSearchEnabled()) { - for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex, storage)) { + for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) { auto search_input = mgcxx_mock::text_search::SearchInput{ .search_query = fmt::format("metadata.gid:{}", vertex->gid.AsInt()), .return_fields = {"data"}}; @@ -323,7 +323,7 @@ Result VertexAccessor::InitProperties(const std::mapindices_.text_index_->GetApplicableTextIndices(vertex, storage)) { + for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) { auto new_properties_document = mgcxx_mock::text_search::DocumentInput{}; // TODO (pending real Tantivy operation): create a JSON, set // properties and convert to string @@ -366,7 +366,7 @@ Result>> Vertex [storage = storage_, transaction = transaction_, vertex = vertex_, &properties, &id_old_new_change]() { id_old_new_change.emplace(vertex->properties.UpdateProperties(properties)); if (flags::run_time::GetTextSearchEnabled()) { - for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex, storage)) { + for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) { auto search_input = mgcxx_mock::text_search::SearchInput{ .search_query = fmt::format("metadata.gid:{}", vertex->gid.AsInt()), .return_fields = {"data"}}; @@ -425,7 +425,7 @@ Result> VertexAccessor::ClearProperties() { vertex->properties.ClearProperties(); if (flags::run_time::GetTextSearchEnabled()) { - for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex, storage)) { + for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) { auto search_input = mgcxx_mock::text_search::SearchInput{.search_query = fmt::format("metadata.gid:{}", vertex->gid.AsInt())}; mgcxx_mock::text_search::Mock::delete_document(*index_context, search_input, true);