Remove rough regex search

This commit is contained in:
Ante Pušić 2024-02-18 00:21:33 +01:00
parent 147c36b07c
commit 1d191aa693
9 changed files with 5 additions and 159 deletions

View File

@ -335,11 +335,6 @@ inline mgp_map *graph_search_text_index(mgp_graph *graph, const char *index_name
return MgInvoke<mgp_map *>(mgp_graph_search_text_index, graph, index_name, search_query, memory);
}
inline mgp_map *graph_regex_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
mgp_memory *memory) {
return MgInvoke<mgp_map *>(mgp_graph_regex_search_text_index, graph, index_name, search_query, memory);
}
inline mgp_vertices_iterator *graph_iter_vertices(mgp_graph *g, mgp_memory *memory) {
return MgInvoke<mgp_vertices_iterator *>(mgp_graph_iter_vertices, g, memory);
}

View File

@ -901,13 +901,6 @@ enum mgp_error mgp_graph_has_text_index(struct mgp_graph *graph, const char *ind
enum mgp_error mgp_graph_search_text_index(struct mgp_graph *graph, const char *index_name, const char *search_query,
struct mgp_memory *memory, struct mgp_map **result);
/// Search the named text index for the given regex. The result is a list of the vertices whose text properties match
/// the given query.
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if unable to allocate search result vertices.
enum mgp_error mgp_graph_regex_search_text_index(struct mgp_graph *graph, const char *index_name,
const char *search_query, struct mgp_memory *memory,
struct mgp_map **result);
/// Creates label index for given label.
/// mgp_error::MGP_ERROR_NO_ERROR is always returned.
/// if label index already exists, result will be 0, otherwise 1.

View File

@ -4351,18 +4351,6 @@ inline List RunTextSearchQuery(mgp_graph *memgraph_graph, std::string_view index
return results_or_error["search_results"].ValueList();
}
inline List RunTextRegexSearchQuery(mgp_graph *memgraph_graph, std::string_view index_name,
std::string_view search_query) {
auto results_or_error = Map(
mgp::MemHandlerCallback(graph_regex_search_text_index, memgraph_graph, index_name.data(), search_query.data()));
auto maybe_error = results_or_error["error_msg"].ValueString();
if (!maybe_error.empty()) {
throw std::runtime_error{maybe_error.data()};
}
return results_or_error["search_results"].ValueList();
}
inline bool CreateExistenceConstraint(mgp_graph *memgraph_graph, const std::string_view label,
const std::string_view property) {
return create_existence_constraint(memgraph_graph, label.data(), property.data());

View File

@ -18,13 +18,11 @@
namespace TextSearch {
constexpr std::string_view kProcedureSearch = "search";
constexpr std::string_view kProcedureRegexSearch = "regex_search";
constexpr std::string_view kParameterIndexName = "index_name";
constexpr std::string_view kParameterSearchString = "search_query";
constexpr std::string_view kReturnNode = "node";
void Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
void RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
} // namespace TextSearch
void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
@ -52,31 +50,6 @@ void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *r
}
}
void TextSearch::RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
mgp::MemoryDispatcherGuard guard{memory};
const auto record_factory = mgp::RecordFactory(result);
auto arguments = mgp::List(args);
try {
const auto *index_name = arguments[0].ValueString().data();
const auto *search_query = arguments[1].ValueString().data();
// 1. See if the given index_name is text-indexed
if (!mgp::graph_has_text_index(memgraph_graph, index_name)) {
record_factory.SetErrorMessage(fmt::format("Text index \"{}\" doesnt exist.", index_name));
return;
}
// 2. Run a text search of that index and return the search results
for (const auto &node : mgp::RunTextRegexSearchQuery(memgraph_graph, index_name, search_query)) {
auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
}
} catch (const std::exception &e) {
record_factory.SetErrorMessage(e.what());
}
}
extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) {
try {
mgp::MemoryDispatcherGuard guard{memory};
@ -87,13 +60,6 @@ extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *mem
mgp::Parameter(TextSearch::kParameterSearchString, mgp::Type::String),
},
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
AddProcedure(TextSearch::RegexSearch, TextSearch::kProcedureRegexSearch, mgp::ProcedureType::Read,
{
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
mgp::Parameter(TextSearch::kParameterSearchString, mgp::Type::String),
},
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
} catch (const std::exception &e) {
std::cerr << "Error while initializing query module: " << e.what() << std::endl;
return 1;

View File

@ -570,10 +570,6 @@ class DbAccessor final {
return accessor_->TextIndexSearch(index_name, search_query);
}
std::vector<storage::Gid> TextIndexRegexSearch(const std::string &index_name, const std::string &search_query) const {
return accessor_->TextIndexRegexSearch(index_name, search_query);
}
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const {
return accessor_->GetIndexStats(label);
}

View File

@ -3438,34 +3438,6 @@ mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name,
});
}
mgp_error mgp_graph_regex_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
mgp_memory *memory, mgp_map **result) {
return WrapExceptions([graph, memory, index_name, search_query, result]() {
std::visit(memgraph::utils::Overloaded{[&](memgraph::query::DbAccessor *impl) {
std::vector<memgraph::storage::Gid> search_results;
std::string error_msg;
try {
search_results = impl->TextIndexRegexSearch(index_name, search_query);
} catch (memgraph::query::QueryException &e) {
error_msg = e.what();
}
WrapTextSearch(search_results, error_msg, graph, memory, result);
},
[&](memgraph::query::SubgraphDbAccessor *impl) {
std::vector<memgraph::storage::Gid> search_results;
std::string error_msg;
try {
search_results =
impl->GetAccessor()->TextIndexRegexSearch(index_name, search_query);
} catch (memgraph::query::QueryException &e) {
error_msg = e.what();
}
WrapTextSearch(search_results, error_msg, graph, memory, result);
}},
graph->impl);
});
}
#ifdef MG_ENTERPRISE
namespace {
void NextPermitted(mgp_vertices_iterator &it) {

View File

@ -14,7 +14,6 @@
#include "query/db_accessor.hpp"
#include "storage/v2/view.hpp"
#include "text_search.hpp"
#include "utils/string.hpp"
namespace memgraph::storage {
@ -33,7 +32,6 @@ void TextIndex::CreateEmptyIndex(const std::string &index_name, LabelId label) {
nlohmann::json mappings = {};
mappings["properties"] = {};
mappings["properties"]["metadata"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
mappings["properties"]["all"] = {{"type", "text"}, {"fast", true}, {"stored", true}, {"text", true}};
mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
index_.emplace(index_name,
@ -46,27 +44,6 @@ void TextIndex::CreateEmptyIndex(const std::string &index_name, LabelId label) {
label_to_index_.emplace(label, index_name);
}
std::string TextIndex::PropertiesToString(const std::map<PropertyId, PropertyValue> &properties) {
std::vector<std::string> indexable_properties_as_string;
for (const auto &[_, prop_value] : properties) {
switch (prop_value.type()) {
case PropertyValue::Type::Bool:
case PropertyValue::Type::Int:
case PropertyValue::Type::Double:
case PropertyValue::Type::String:
indexable_properties_as_string.push_back(prop_value.ValueString());
break;
case PropertyValue::Type::Null:
case PropertyValue::Type::List:
case PropertyValue::Type::Map:
case PropertyValue::Type::TemporalData:
default:
continue;
}
}
return utils::Join(indexable_properties_as_string, " ");
}
template <typename T>
nlohmann::json TextIndex::SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver) {
nlohmann::json serialized_properties = nlohmann::json::value_t::object;
@ -115,13 +92,11 @@ std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(V
}
void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
const std::string &indexable_properties_as_string,
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) {
// NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties,
// an indexable document should be created for each applicable index.
nlohmann::json document = {};
document["data"] = properties;
document["all"] = indexable_properties_as_string;
document["metadata"] = {};
document["metadata"]["gid"] = gid;
document["metadata"]["deleted"] = false;
@ -159,7 +134,7 @@ void TextIndex::AddNode(Vertex *vertex_after_update, NameIdMapper *name_id_mappe
auto vertex_properties = vertex_after_update->properties.Properties();
LoadNodeToTextIndices(vertex_after_update->gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
PropertiesToString(vertex_properties), applicable_text_indices);
applicable_text_indices);
}
void TextIndex::AddNode(Vertex *vertex_after_update, NameIdMapper *name_id_mapper) {
@ -179,7 +154,8 @@ void TextIndex::UpdateNode(Vertex *vertex_after_update, NameIdMapper *name_id_ma
}
if (!removed_labels.empty()) {
RemoveNode(vertex_after_update, GetApplicableTextIndices(removed_labels));
auto indexes_to_remove_node_from = GetApplicableTextIndices(removed_labels);
RemoveNode(vertex_after_update, indexes_to_remove_node_from);
}
auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update);
@ -226,7 +202,7 @@ void TextIndex::CreateIndex(const std::string &index_name, LabelId label, memgra
auto vertex_properties = v.Properties(View::NEW).GetValue();
LoadNodeToTextIndices(v.Gid().AsInt(), SerializeProperties(vertex_properties, db),
PropertiesToString(vertex_properties), {&index_.at(index_name).context_});
{&index_.at(index_name).context_});
}
CommitLoadedNodes(index_.at(index_name).context_);
@ -244,7 +220,7 @@ void TextIndex::RecoverIndex(const std::string &index_name, LabelId label,
nlohmann::json document = {};
auto vertex_properties = v.properties.Properties();
LoadNodeToTextIndices(v.gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
PropertiesToString(vertex_properties), {&index_.at(index_name).context_});
{&index_.at(index_name).context_});
}
CommitLoadedNodes(index_.at(index_name).context_);
@ -300,37 +276,6 @@ std::vector<Gid> TextIndex::Search(const std::string &index_name, const std::str
return found_nodes;
}
std::vector<Gid> TextIndex::RegexSearch(const std::string &index_name, const std::string &search_query) {
if (!flags::run_time::GetExperimentalTextSearchEnabled()) {
throw query::TextSearchDisabledException();
}
if (!index_.contains(index_name)) {
throw query::TextSearchException("Text index \"{}\" doesnt exist.", index_name);
}
auto input = mgcxx::text_search::SearchInput{
.search_fields = {"all"}, .search_query = search_query, .return_fields = {"data", "metadata"}};
std::vector<Gid> found_nodes;
mgcxx::text_search::SearchOutput search_results;
try {
search_results = mgcxx::text_search::regex_search(index_.at(index_name).context_, input);
} catch (const std::exception &e) {
throw query::TextSearchException("Tantivy error: {}", e.what());
}
for (const auto &doc : search_results.docs) {
// The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing
// errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method.
std::string doc_string = doc.data.data();
doc_string.resize(doc.data.length());
auto doc_json = nlohmann::json::parse(doc_string);
found_nodes.push_back(storage::Gid::FromString(doc_json["metadata"]["gid"].dump()));
}
return found_nodes;
}
void TextIndex::Commit() {
for (auto &[_, index_data] : index_) {
mgcxx::text_search::commit(index_data.context_);

View File

@ -36,8 +36,6 @@ class TextIndex {
private:
void CreateEmptyIndex(const std::string &index_name, LabelId label);
std::string PropertiesToString(const std::map<PropertyId, PropertyValue> &properties);
template <typename T>
nlohmann::json SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver);
@ -46,7 +44,6 @@ class TextIndex {
std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(Vertex *vertex);
void LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
const std::string &indexable_properties_as_string,
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices);
void CommitLoadedNodes(mgcxx::text_search::Context &index_context);
@ -86,8 +83,6 @@ class TextIndex {
std::vector<Gid> Search(const std::string &index_name, const std::string &search_query);
std::vector<Gid> RegexSearch(const std::string &index_name, const std::string &search_query);
void Commit();
void Rollback();

View File

@ -247,10 +247,6 @@ class Storage {
return storage_->indices_.text_index_.Search(index_name, search_query);
}
std::vector<Gid> TextIndexRegexSearch(const std::string &index_name, const std::string &search_query) const {
return storage_->indices_.text_index_.RegexSearch(index_name, search_query);
}
virtual IndicesInfo ListAllIndices() const = 0;
virtual ConstraintsInfo ListAllConstraints() const = 0;