Address review suggestions

This commit is contained in:
Ante Pušić 2024-02-26 15:33:45 +01:00
parent 6851e71540
commit 4841ee1573
9 changed files with 89 additions and 49 deletions

View File

@ -331,7 +331,7 @@ inline bool graph_has_text_index(mgp_graph *graph, const char *index_name) {
} }
inline mgp_map *graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query, inline mgp_map *graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
const char *search_mode, mgp_memory *memory) { int search_mode, mgp_memory *memory) {
return MgInvoke<mgp_map *>(mgp_graph_search_text_index, graph, index_name, search_query, search_mode, memory); return MgInvoke<mgp_map *>(mgp_graph_search_text_index, graph, index_name, search_query, search_mode, memory);
} }

View File

@ -900,7 +900,7 @@ enum mgp_error mgp_graph_has_text_index(struct mgp_graph *graph, const char *ind
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if theres an allocation error while constructing the results map. /// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if theres an allocation error while constructing the results map.
/// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once. /// Return mgp_error::MGP_ERROR_KEY_ALREADY_EXISTS if the same key is being created in the results map more than once.
enum mgp_error mgp_graph_search_text_index(struct mgp_graph *graph, const char *index_name, const char *search_query, enum mgp_error mgp_graph_search_text_index(struct mgp_graph *graph, const char *index_name, const char *search_query,
const char *search_mode, struct mgp_memory *memory, struct mgp_map **result); int search_mode, struct mgp_memory *memory, struct mgp_map **result);
/// Search the named text index for the given query and aggregate over the search results. /// Search the named text index for the given query and aggregate over the search results.
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if theres an allocation error while constructing the results map. /// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if theres an allocation error while constructing the results map.

View File

@ -4349,28 +4349,55 @@ inline List ListAllLabelPropertyIndices(mgp_graph *memgraph_graph) {
return List(label_property_indices); return List(label_property_indices);
} }
enum class TextSearchMode : uint8_t {
SPECIFIED_PROPERTIES = 0,
REGEX = 1,
ALL_PROPERTIES = 2,
};
namespace {
constexpr std::string_view kErrorMsgKey = "error_msg";
constexpr std::string_view kSearchResultsKey = "search_results";
constexpr std::string_view kAggregationResultsKey = "aggregation_results";
} // namespace
inline List SearchTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, std::string_view search_query, inline List SearchTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, std::string_view search_query,
std::string_view search_mode) { TextSearchMode search_mode) {
auto results_or_error = Map(mgp::MemHandlerCallback(graph_search_text_index, memgraph_graph, index_name.data(), auto results_or_error = Map(mgp::MemHandlerCallback(graph_search_text_index, memgraph_graph, index_name.data(),
search_query.data(), search_mode.data())); search_query.data(), static_cast<int>(search_mode)));
auto maybe_error = results_or_error["error_msg"].ValueString(); if (!results_or_error.KeyExists(kErrorMsgKey) || !results_or_error.KeyExists(kSearchResultsKey)) {
throw TextSearchException{"Incomplete text index search results!"};
}
if (!results_or_error.At(kErrorMsgKey).IsString() || !results_or_error.At(kSearchResultsKey).IsList()) {
throw TextSearchException{"Text index search results have wrong type!"};
}
auto maybe_error = results_or_error[kErrorMsgKey].ValueString();
if (!maybe_error.empty()) { if (!maybe_error.empty()) {
throw TextSearchException(maybe_error.data()); throw TextSearchException(maybe_error.data());
} }
return results_or_error["search_results"].ValueList(); return results_or_error[kSearchResultsKey].ValueList();
} }
inline std::string AggregateOverTextIndex(mgp_graph *memgraph_graph, std::string_view index_name, inline std::string_view AggregateOverTextIndex(mgp_graph *memgraph_graph, std::string_view index_name,
std::string_view search_query, std::string_view search_mode) { std::string_view search_query, std::string_view aggregation_query) {
auto results_or_error = Map(mgp::MemHandlerCallback(graph_aggregate_over_text_index, memgraph_graph, auto results_or_error =
index_name.data(), search_query.data(), search_mode.data())); Map(mgp::MemHandlerCallback(graph_aggregate_over_text_index, memgraph_graph, index_name.data(),
auto maybe_error = results_or_error["error_msg"].ValueString(); search_query.data(), aggregation_query.data()));
if (!results_or_error.KeyExists(kErrorMsgKey) || !results_or_error.KeyExists(kAggregationResultsKey)) {
throw TextSearchException{"Incomplete text index aggregation results!"};
}
if (!results_or_error.At(kErrorMsgKey).IsString() || !results_or_error.At(kAggregationResultsKey).IsString()) {
throw TextSearchException{"Text index aggregation results have wrong type!"};
}
auto maybe_error = results_or_error[kErrorMsgKey].ValueString();
if (!maybe_error.empty()) { if (!maybe_error.empty()) {
throw TextSearchException(maybe_error.data()); throw TextSearchException(maybe_error.data());
} }
return results_or_error["aggregation_results"].ValueString().data(); return results_or_error[kAggregationResultsKey].ValueString();
} }
inline bool CreateExistenceConstraint(mgp_graph *memgraph_graph, const std::string_view label, inline bool CreateExistenceConstraint(mgp_graph *memgraph_graph, const std::string_view label,

View File

@ -42,7 +42,8 @@ void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *r
try { try {
const auto *index_name = arguments[0].ValueString().data(); const auto *index_name = arguments[0].ValueString().data();
const auto *search_query = arguments[1].ValueString().data(); const auto *search_query = arguments[1].ValueString().data();
for (const auto &node : mgp::SearchTextIndex(memgraph_graph, index_name, search_query, "specify_property")) { for (const auto &node :
mgp::SearchTextIndex(memgraph_graph, index_name, search_query, mgp::TextSearchMode::SPECIFIED_PROPERTIES)) {
auto record = record_factory.NewRecord(); auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
} }
@ -59,7 +60,8 @@ void TextSearch::RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_resu
try { try {
const auto *index_name = arguments[0].ValueString().data(); const auto *index_name = arguments[0].ValueString().data();
const auto *search_query = arguments[1].ValueString().data(); const auto *search_query = arguments[1].ValueString().data();
for (const auto &node : mgp::SearchTextIndex(memgraph_graph, index_name, search_query, "regex")) { for (const auto &node :
mgp::SearchTextIndex(memgraph_graph, index_name, search_query, mgp::TextSearchMode::REGEX)) {
auto record = record_factory.NewRecord(); auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
} }
@ -77,7 +79,8 @@ void TextSearch::SearchAllProperties(mgp_list *args, mgp_graph *memgraph_graph,
try { try {
const auto *index_name = arguments[0].ValueString().data(); const auto *index_name = arguments[0].ValueString().data();
const auto search_query = kSearchAllPrefix + std::string(arguments[1].ValueString()); const auto search_query = kSearchAllPrefix + std::string(arguments[1].ValueString());
for (const auto &node : mgp::SearchTextIndex(memgraph_graph, index_name, search_query, "all_properties")) { for (const auto &node :
mgp::SearchTextIndex(memgraph_graph, index_name, search_query, mgp::TextSearchMode::ALL_PROPERTIES)) {
auto record = record_factory.NewRecord(); auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode()); record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
} }
@ -95,9 +98,10 @@ void TextSearch::Aggregate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result
const auto *index_name = arguments[0].ValueString().data(); const auto *index_name = arguments[0].ValueString().data();
const auto *search_query = arguments[1].ValueString().data(); const auto *search_query = arguments[1].ValueString().data();
const auto *aggregation_query = arguments[2].ValueString().data(); const auto *aggregation_query = arguments[2].ValueString().data();
const auto result = mgp::AggregateOverTextIndex(memgraph_graph, index_name, search_query, aggregation_query); const auto aggregation_result =
mgp::AggregateOverTextIndex(memgraph_graph, index_name, search_query, aggregation_query);
auto record = record_factory.NewRecord(); auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnAggregation.data(), result); record.Insert(TextSearch::kReturnAggregation.data(), aggregation_result.data());
} catch (const std::exception &e) { } catch (const std::exception &e) {
record_factory.SetErrorMessage(e.what()); record_factory.SetErrorMessage(e.what());
} }

View File

@ -581,8 +581,8 @@ class DbAccessor final {
} }
std::vector<storage::Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query, std::vector<storage::Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query,
const std::string &search_mode) const { uint8_t search_mode) const {
return accessor_->TextIndexSearch(index_name, search_query, search_mode); return accessor_->TextIndexSearch(index_name, search_query, static_cast<storage::TextSearchMode>(search_mode));
} }
std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query, std::string TextIndexAggregate(const std::string &index_name, const std::string &search_query,

View File

@ -34,6 +34,7 @@
#include "query/procedure/fmt.hpp" #include "query/procedure/fmt.hpp"
#include "query/procedure/mg_procedure_helpers.hpp" #include "query/procedure/mg_procedure_helpers.hpp"
#include "query/stream/common.hpp" #include "query/stream/common.hpp"
#include "storage/v2/indices/text_index.hpp"
#include "storage/v2/property_value.hpp" #include "storage/v2/property_value.hpp"
#include "storage/v2/storage_mode.hpp" #include "storage/v2/storage_mode.hpp"
#include "storage/v2/view.hpp" #include "storage/v2/view.hpp"
@ -3450,7 +3451,7 @@ void WrapTextIndexAggregation(mgp_memory *memory, mgp_map **result, const std::s
} }
mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query, mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
const char *search_mode, mgp_memory *memory, mgp_map **result) { int search_mode, mgp_memory *memory, mgp_map **result) {
return WrapExceptions([graph, memory, index_name, search_query, search_mode, result]() { return WrapExceptions([graph, memory, index_name, search_query, search_mode, result]() {
std::vector<memgraph::storage::Gid> search_results; std::vector<memgraph::storage::Gid> search_results;
std::string error_msg; std::string error_msg;

View File

@ -79,7 +79,7 @@ nlohmann::json TextIndex::SerializeProperties(const std::map<PropertyId, Propert
return serialized_properties; return serialized_properties;
} }
std::string TextIndex::CopyPropertyValuesToString(const std::map<PropertyId, PropertyValue> &properties) { std::string TextIndex::StringifyProperties(const std::map<PropertyId, PropertyValue> &properties) {
std::vector<std::string> indexable_properties_as_string; std::vector<std::string> indexable_properties_as_string;
for (const auto &[_, prop_value] : properties) { for (const auto &[_, prop_value] : properties) {
switch (prop_value.type()) { switch (prop_value.type()) {
@ -122,7 +122,7 @@ std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(c
} }
void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties, void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
const std::string &all_property_values_string, const std::string &property_values_as_str,
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) { const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) {
if (applicable_text_indices.empty()) { if (applicable_text_indices.empty()) {
return; return;
@ -132,7 +132,7 @@ void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::js
// an indexable document should be created for each applicable index. // an indexable document should be created for each applicable index.
nlohmann::json document = {}; nlohmann::json document = {};
document["data"] = properties; document["data"] = properties;
document["all"] = all_property_values_string; document["all"] = property_values_as_str;
document["metadata"] = {}; document["metadata"] = {};
document["metadata"]["gid"] = gid; document["metadata"]["gid"] = gid;
document["metadata"]["deleted"] = false; document["metadata"]["deleted"] = false;
@ -176,7 +176,7 @@ void TextIndex::AddNode(Vertex *vertex_after_update, NameIdMapper *name_id_mappe
auto vertex_properties = vertex_after_update->properties.Properties(); auto vertex_properties = vertex_after_update->properties.Properties();
LoadNodeToTextIndices(vertex_after_update->gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper), LoadNodeToTextIndices(vertex_after_update->gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
CopyPropertyValuesToString(vertex_properties), applicable_text_indices); StringifyProperties(vertex_properties), applicable_text_indices);
} }
void TextIndex::UpdateNode(Vertex *vertex_after_update, NameIdMapper *name_id_mapper, void TextIndex::UpdateNode(Vertex *vertex_after_update, NameIdMapper *name_id_mapper,
@ -228,7 +228,7 @@ void TextIndex::CreateIndex(const std::string &index_name, LabelId label, memgra
auto vertex_properties = v.Properties(View::NEW).GetValue(); auto vertex_properties = v.Properties(View::NEW).GetValue();
LoadNodeToTextIndices(v.Gid().AsInt(), SerializeProperties(vertex_properties, db), LoadNodeToTextIndices(v.Gid().AsInt(), SerializeProperties(vertex_properties, db),
CopyPropertyValuesToString(vertex_properties), {&index_.at(index_name).context_}); StringifyProperties(vertex_properties), {&index_.at(index_name).context_});
} }
CommitLoadedNodes(index_.at(index_name).context_); CommitLoadedNodes(index_.at(index_name).context_);
@ -249,7 +249,7 @@ void TextIndex::RecoverIndex(const std::string &index_name, LabelId label,
auto vertex_properties = v.properties.Properties(); auto vertex_properties = v.properties.Properties();
LoadNodeToTextIndices(v.gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper), LoadNodeToTextIndices(v.gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
CopyPropertyValuesToString(vertex_properties), {&index_.at(index_name).context_}); StringifyProperties(vertex_properties), {&index_.at(index_name).context_});
} }
CommitLoadedNodes(index_.at(index_name).context_); CommitLoadedNodes(index_.at(index_name).context_);
@ -282,14 +282,13 @@ bool TextIndex::IndexExists(const std::string &index_name) const { return index_
mgcxx::text_search::SearchOutput TextIndex::SearchGivenProperties(const std::string &index_name, mgcxx::text_search::SearchOutput TextIndex::SearchGivenProperties(const std::string &index_name,
const std::string &search_query) { const std::string &search_query) {
auto input = mgcxx::text_search::SearchInput{.search_query = search_query, .return_fields = {"data", "metadata"}}; auto input = mgcxx::text_search::SearchInput{.search_query = search_query, .return_fields = {"data", "metadata"}};
mgcxx::text_search::SearchOutput search_results;
try { try {
search_results = mgcxx::text_search::search(index_.at(index_name).context_, input); return mgcxx::text_search::search(index_.at(index_name).context_, input);
} catch (const std::exception &e) { } catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what()); throw query::TextSearchException("Tantivy error: {}", e.what());
} }
return search_results; return mgcxx::text_search::SearchOutput{};
} }
mgcxx::text_search::SearchOutput TextIndex::RegexSearch(const std::string &index_name, mgcxx::text_search::SearchOutput TextIndex::RegexSearch(const std::string &index_name,
@ -299,12 +298,12 @@ mgcxx::text_search::SearchOutput TextIndex::RegexSearch(const std::string &index
mgcxx::text_search::SearchOutput search_results; mgcxx::text_search::SearchOutput search_results;
try { try {
search_results = mgcxx::text_search::regex_search(index_.at(index_name).context_, input); return mgcxx::text_search::regex_search(index_.at(index_name).context_, input);
} catch (const std::exception &e) { } catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what()); throw query::TextSearchException("Tantivy error: {}", e.what());
} }
return search_results; return mgcxx::text_search::SearchOutput{};
} }
mgcxx::text_search::SearchOutput TextIndex::SearchAllProperties(const std::string &index_name, mgcxx::text_search::SearchOutput TextIndex::SearchAllProperties(const std::string &index_name,
@ -315,16 +314,16 @@ mgcxx::text_search::SearchOutput TextIndex::SearchAllProperties(const std::strin
mgcxx::text_search::SearchOutput search_results; mgcxx::text_search::SearchOutput search_results;
try { try {
search_results = mgcxx::text_search::search(index_.at(index_name).context_, input); return mgcxx::text_search::search(index_.at(index_name).context_, input);
} catch (const std::exception &e) { } catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what()); throw query::TextSearchException("Tantivy error: {}", e.what());
} }
return search_results; return mgcxx::text_search::SearchOutput{};
} }
std::vector<Gid> TextIndex::Search(const std::string &index_name, const std::string &search_query, std::vector<Gid> TextIndex::Search(const std::string &index_name, const std::string &search_query,
const std::string &search_mode) { TextSearchMode search_mode) {
if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) { if (!flags::AreExperimentsEnabled(flags::Experiments::TEXT_SEARCH)) {
throw query::TextSearchDisabledException(); throw query::TextSearchDisabledException();
} }
@ -334,16 +333,20 @@ std::vector<Gid> TextIndex::Search(const std::string &index_name, const std::str
} }
mgcxx::text_search::SearchOutput search_results; mgcxx::text_search::SearchOutput search_results;
if (search_mode == "specify_property") { switch (search_mode) {
search_results = SearchGivenProperties(index_name, search_query); case TextSearchMode::SPECIFIED_PROPERTIES:
} else if (search_mode == "regex") { search_results = SearchGivenProperties(index_name, search_query);
search_results = RegexSearch(index_name, search_query); break;
} else if (search_mode == "all_properties") { case TextSearchMode::REGEX:
search_results = SearchAllProperties(index_name, search_query); search_results = RegexSearch(index_name, search_query);
} else { break;
throw query::TextSearchException( case TextSearchMode::ALL_PROPERTIES:
"Unsupported search mode: please use one of text_search.search, text_search.search_all, or " search_results = SearchAllProperties(index_name, search_query);
"text_search.regex_search."); break;
default:
throw query::TextSearchException(
"Unsupported search mode: please use one of text_search.search, text_search.search_all, or "
"text_search.regex_search.");
} }
std::vector<Gid> found_nodes; std::vector<Gid> found_nodes;

View File

@ -27,6 +27,12 @@ struct TextIndexData {
LabelId scope_; LabelId scope_;
}; };
enum class TextSearchMode : uint8_t {
SPECIFIED_PROPERTIES = 0,
REGEX = 1,
ALL_PROPERTIES = 2,
};
class TextIndex { class TextIndex {
private: private:
static constexpr bool kDoSkipCommit = true; static constexpr bool kDoSkipCommit = true;
@ -36,12 +42,12 @@ class TextIndex {
template <typename T> template <typename T>
nlohmann::json SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver); nlohmann::json SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver);
std::string CopyPropertyValuesToString(const std::map<PropertyId, PropertyValue> &properties); std::string StringifyProperties(const std::map<PropertyId, PropertyValue> &properties);
std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(const std::vector<LabelId> &labels); std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(const std::vector<LabelId> &labels);
void LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties, void LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
const std::string &all_property_values_string, const std::string &property_values_as_str,
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices); const std::vector<mgcxx::text_search::Context *> &applicable_text_indices);
void CommitLoadedNodes(mgcxx::text_search::Context &index_context); void CommitLoadedNodes(mgcxx::text_search::Context &index_context);
@ -83,8 +89,7 @@ class TextIndex {
bool IndexExists(const std::string &index_name) const; bool IndexExists(const std::string &index_name) const;
std::vector<Gid> Search(const std::string &index_name, const std::string &search_query, std::vector<Gid> Search(const std::string &index_name, const std::string &search_query, TextSearchMode search_mode);
const std::string &search_mode);
std::string Aggregate(const std::string &index_name, const std::string &search_query, std::string Aggregate(const std::string &index_name, const std::string &search_query,
const std::string &aggregation_query); const std::string &aggregation_query);

View File

@ -241,7 +241,7 @@ class Storage {
} }
std::vector<Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query, std::vector<Gid> TextIndexSearch(const std::string &index_name, const std::string &search_query,
const std::string &search_mode) const { TextSearchMode search_mode) const {
return storage_->indices_.text_index_.Search(index_name, search_query, search_mode); return storage_->indices_.text_index_.Search(index_name, search_query, search_mode);
} }