Integrate mgcxx into text search

This commit is contained in:
Ante Pušić 2024-01-15 17:12:03 +01:00
parent 5fac8de106
commit 1d6fa4281d
3 changed files with 52 additions and 53 deletions

View File

@ -12,6 +12,7 @@
#include "storage/v2/indices/text_index.hpp"
#include "query/db_accessor.hpp"
#include "storage/v2/mgcxx_mock.hpp"
#include "text_search.hpp"
namespace memgraph::storage {
@ -22,8 +23,8 @@ void TextIndex::UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_
void TextIndex::UpdateOnSetProperty(PropertyId property, const PropertyValue &value, Vertex *vertex,
const Transaction &tx) const {}
std::vector<mgcxx_mock::text_search::IndexContext *> TextIndex::GetApplicableTextIndices(Vertex *vertex) {
std::vector<mgcxx_mock::text_search::IndexContext *> applicable_text_indices;
std::vector<memcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(Vertex *vertex) {
std::vector<memcxx::text_search::Context *> applicable_text_indices;
for (const auto &label : vertex->labels) {
if (label_to_index_.contains(label)) {
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)));
@ -33,11 +34,10 @@ std::vector<mgcxx_mock::text_search::IndexContext *> TextIndex::GetApplicableTex
}
bool TextIndex::CreateIndex(std::string index_name, LabelId label, memgraph::query::DbAccessor *db) {
auto index_config = mgcxx_mock::text_search::IndexConfig{
auto index_config = memcxx::text_search::IndexConfig{
.mappings = "TODO devise the mapping by reading the indexable nodes' properties"};
auto new_index = mgcxx_mock::text_search::Mock::create_index(index_name, index_config);
index_[index_name] = new_index;
label_to_index_[label] = index_name;
index_.emplace(index_name, memcxx::text_search::create_index(index_name, index_config));
label_to_index_.emplace(label, index_name);
return true;
// TODO add documents (indexable nodes) to index
@ -45,8 +45,6 @@ bool TextIndex::CreateIndex(std::string index_name, LabelId label, memgraph::que
bool TextIndex::DropIndex(std::string index_name) {
memcxx::text_search::drop_index(index_name);
mgcxx_mock::text_search::Mock::drop_index(index_name);
index_.erase(index_name);
std::erase_if(label_to_index_, [index_name](const auto &item) { return item.second == index_name; });
return true;
@ -54,16 +52,16 @@ bool TextIndex::DropIndex(std::string index_name) {
bool TextIndex::IndexExists(std::string index_name) const { return index_.contains(index_name); }
std::vector<Gid> TextIndex::Search(std::string index_name, std::string search_query) const {
auto input = mgcxx_mock::text_search::SearchInput{.search_query = search_query, .return_fields = {"metadata.gid"}};
// Basic check for search fields in the query (Tantivy syntax delimits them with a `:` to the right)I
std::vector<Gid> TextIndex::Search(std::string index_name, std::string search_query) {
auto input = memcxx::text_search::SearchInput{.search_query = search_query, .return_fields = {"metadata.gid"}};
// Basic check for search fields in the query (Tantivy syntax delimits them with a `:` to the right)
if (search_query.find(":") == std::string::npos) {
input.search_fields = {"data"};
}
std::vector<Gid> found_nodes;
for (const auto &doc : mgcxx_mock::text_search::Mock::search(index_.at(index_name), input).docs) {
found_nodes.push_back(storage::Gid::FromString(doc.data));
for (const auto &doc : memcxx::text_search::search(index_.at(index_name), input).docs) {
found_nodes.push_back(storage::Gid::FromString(doc.data.data()));
}
return found_nodes;
}

View File

@ -34,7 +34,7 @@ class TextIndex {
~TextIndex() = default;
std::map<std::string, mgcxx_mock::text_search::IndexContext> index_;
std::map<std::string, memcxx::text_search::Context> index_;
std::map<LabelId, std::string> label_to_index_;
void UpdateOnAddLabel(LabelId added_label, Vertex *vertex_after_update, const Transaction &tx) const;
@ -44,7 +44,7 @@ class TextIndex {
void UpdateOnSetProperty(PropertyId property, const PropertyValue &value, Vertex *vertex,
const Transaction &tx) const;
std::vector<mgcxx_mock::text_search::IndexContext *> GetApplicableTextIndices(Vertex *vertex);
std::vector<memcxx::text_search::Context *> GetApplicableTextIndices(Vertex *vertex);
bool CreateIndex(std::string index_name, LabelId label, memgraph::query::DbAccessor *db);
@ -52,7 +52,7 @@ class TextIndex {
bool IndexExists(std::string index_name) const;
std::vector<Gid> Search(std::string index_name, std::string search_query) const;
std::vector<Gid> Search(std::string index_name, std::string search_query);
std::vector<std::string> ListIndices() const;

View File

@ -30,6 +30,7 @@
#include "storage/v2/storage.hpp"
#include "storage/v2/vertex_info_cache.hpp"
#include "storage/v2/vertex_info_helpers.hpp"
#include "text_search.hpp"
#include "utils/atomic_memory_block.hpp"
#include "utils/logging.hpp"
#include "utils/memory_tracker.hpp"
@ -277,16 +278,16 @@ Result<PropertyValue> VertexAccessor::SetProperty(PropertyId property, const Pro
[transaction = transaction_, storage = storage_, vertex = vertex_, &value, &property, &current_value]() {
CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, current_value);
if (flags::run_time::GetTextSearchEnabled()) {
for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) {
auto search_input = mgcxx_mock::text_search::SearchInput{
for (auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) {
auto search_input = memcxx::text_search::SearchInput{
.search_query = fmt::format("metadata.gid:{}", vertex->gid.AsInt()), .return_fields = {"data"}};
auto search_result = mgcxx_mock::text_search::Mock::search(*index_context, search_input);
mgcxx_mock::text_search::Mock::delete_document(*index_context, search_input, true);
auto search_result = memcxx::text_search::search(*index_context, search_input);
// memcxx::text_search::delete_document(*index_context, search_input, true);
auto new_properties = search_result.docs[0].data; // TODO (pending real Tantivy results): parse result to
// JSON, set property and convert back to string
auto new_properties_document = mgcxx_mock::text_search::DocumentInput{.data = new_properties};
mgcxx_mock::text_search::Mock::add(*index_context, new_properties_document, true);
auto new_properties_document = memcxx::text_search::DocumentInput{.data = new_properties};
memcxx::text_search::add(*index_context, new_properties_document, true);
}
}
vertex->properties.SetProperty(property, value);
@ -323,11 +324,11 @@ Result<bool> VertexAccessor::InitProperties(const std::map<storage::PropertyId,
return;
}
if (flags::run_time::GetTextSearchEnabled()) {
for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) {
for (auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) {
auto new_properties_document =
mgcxx_mock::text_search::DocumentInput{}; // TODO (pending real Tantivy operation): create a JSON, set
// properties and convert to string
mgcxx_mock::text_search::Mock::add(*index_context, new_properties_document, true);
memcxx::text_search::DocumentInput{}; // TODO (pending real Tantivy operation): create a JSON, set
// properties and convert to string
memcxx::text_search::add(*index_context, new_properties_document, true);
}
}
for (const auto &[property, value] : properties) {
@ -366,16 +367,16 @@ Result<std::vector<std::tuple<PropertyId, PropertyValue, PropertyValue>>> Vertex
[storage = storage_, transaction = transaction_, vertex = vertex_, &properties, &id_old_new_change]() {
id_old_new_change.emplace(vertex->properties.UpdateProperties(properties));
if (flags::run_time::GetTextSearchEnabled()) {
for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) {
auto search_input = mgcxx_mock::text_search::SearchInput{
for (auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) {
auto search_input = memcxx::text_search::SearchInput{
.search_query = fmt::format("metadata.gid:{}", vertex->gid.AsInt()), .return_fields = {"data"}};
auto search_result = mgcxx_mock::text_search::Mock::search(*index_context, search_input);
mgcxx_mock::text_search::Mock::delete_document(*index_context, search_input, true);
auto search_result = memcxx::text_search::search(*index_context, search_input);
// memcxx::text_search::delete_document(*index_context, search_input, true);
auto new_properties = search_result.docs[0].data; // TODO (pending real Tantivy results): parse result to
// JSON, set property and convert back to string
auto new_properties_document = mgcxx_mock::text_search::DocumentInput{.data = new_properties};
mgcxx_mock::text_search::Mock::add(*index_context, new_properties_document, true);
auto new_properties_document = memcxx::text_search::DocumentInput{.data = new_properties};
memcxx::text_search::add(*index_context, new_properties_document, true);
}
}
@ -410,28 +411,28 @@ Result<std::map<PropertyId, PropertyValue>> VertexAccessor::ClearProperties() {
using ReturnType = decltype(vertex_->properties.Properties());
std::optional<ReturnType> properties;
utils::AtomicMemoryBlock atomic_memory_block{[storage = storage_, transaction = transaction_, vertex = vertex_,
&properties]() {
properties.emplace(vertex->properties.Properties());
if (!properties.has_value()) {
return;
}
for (const auto &[property, value] : *properties) {
CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, value);
storage->indices_.UpdateOnSetProperty(property, PropertyValue(), vertex, *transaction);
transaction->constraint_verification_info.RemovedProperty(vertex);
transaction->manyDeltasCache.Invalidate(vertex, property);
}
utils::AtomicMemoryBlock atomic_memory_block{
[storage = storage_, transaction = transaction_, vertex = vertex_, &properties]() {
properties.emplace(vertex->properties.Properties());
if (!properties.has_value()) {
return;
}
for (const auto &[property, value] : *properties) {
CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, value);
storage->indices_.UpdateOnSetProperty(property, PropertyValue(), vertex, *transaction);
transaction->constraint_verification_info.RemovedProperty(vertex);
transaction->manyDeltasCache.Invalidate(vertex, property);
}
vertex->properties.ClearProperties();
if (flags::run_time::GetTextSearchEnabled()) {
for (const auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) {
auto search_input =
mgcxx_mock::text_search::SearchInput{.search_query = fmt::format("metadata.gid:{}", vertex->gid.AsInt())};
mgcxx_mock::text_search::Mock::delete_document(*index_context, search_input, true);
}
}
}};
vertex->properties.ClearProperties();
if (flags::run_time::GetTextSearchEnabled()) {
for (auto *index_context : storage->indices_.text_index_->GetApplicableTextIndices(vertex)) {
auto search_input =
memcxx::text_search::SearchInput{.search_query = fmt::format("metadata.gid:{}", vertex->gid.AsInt())};
// memcxx::text_search::delete_document(*index_context, search_input, true);
}
}
}};
std::invoke(atomic_memory_block);
return properties.has_value() ? std::move(properties.value()) : ReturnType{};