Remove rough regex search
This commit is contained in:
parent
147c36b07c
commit
1d191aa693
@ -335,11 +335,6 @@ inline mgp_map *graph_search_text_index(mgp_graph *graph, const char *index_name
|
||||
return MgInvoke<mgp_map *>(mgp_graph_search_text_index, graph, index_name, search_query, memory);
|
||||
}
|
||||
|
||||
inline mgp_map *graph_regex_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
|
||||
mgp_memory *memory) {
|
||||
return MgInvoke<mgp_map *>(mgp_graph_regex_search_text_index, graph, index_name, search_query, memory);
|
||||
}
|
||||
|
||||
inline mgp_vertices_iterator *graph_iter_vertices(mgp_graph *g, mgp_memory *memory) {
|
||||
return MgInvoke<mgp_vertices_iterator *>(mgp_graph_iter_vertices, g, memory);
|
||||
}
|
||||
|
@ -901,13 +901,6 @@ enum mgp_error mgp_graph_has_text_index(struct mgp_graph *graph, const char *ind
|
||||
enum mgp_error mgp_graph_search_text_index(struct mgp_graph *graph, const char *index_name, const char *search_query,
|
||||
struct mgp_memory *memory, struct mgp_map **result);
|
||||
|
||||
/// Search the named text index for the given regex. The result is a list of the vertices whose text properties match
|
||||
/// the given query.
|
||||
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if unable to allocate search result vertices.
|
||||
enum mgp_error mgp_graph_regex_search_text_index(struct mgp_graph *graph, const char *index_name,
|
||||
const char *search_query, struct mgp_memory *memory,
|
||||
struct mgp_map **result);
|
||||
|
||||
/// Creates label index for given label.
|
||||
/// mgp_error::MGP_ERROR_NO_ERROR is always returned.
|
||||
/// if label index already exists, result will be 0, otherwise 1.
|
||||
|
@ -4351,18 +4351,6 @@ inline List RunTextSearchQuery(mgp_graph *memgraph_graph, std::string_view index
|
||||
return results_or_error["search_results"].ValueList();
|
||||
}
|
||||
|
||||
inline List RunTextRegexSearchQuery(mgp_graph *memgraph_graph, std::string_view index_name,
|
||||
std::string_view search_query) {
|
||||
auto results_or_error = Map(
|
||||
mgp::MemHandlerCallback(graph_regex_search_text_index, memgraph_graph, index_name.data(), search_query.data()));
|
||||
auto maybe_error = results_or_error["error_msg"].ValueString();
|
||||
if (!maybe_error.empty()) {
|
||||
throw std::runtime_error{maybe_error.data()};
|
||||
}
|
||||
|
||||
return results_or_error["search_results"].ValueList();
|
||||
}
|
||||
|
||||
inline bool CreateExistenceConstraint(mgp_graph *memgraph_graph, const std::string_view label,
|
||||
const std::string_view property) {
|
||||
return create_existence_constraint(memgraph_graph, label.data(), property.data());
|
||||
|
@ -18,13 +18,11 @@
|
||||
|
||||
namespace TextSearch {
|
||||
constexpr std::string_view kProcedureSearch = "search";
|
||||
constexpr std::string_view kProcedureRegexSearch = "regex_search";
|
||||
constexpr std::string_view kParameterIndexName = "index_name";
|
||||
constexpr std::string_view kParameterSearchString = "search_query";
|
||||
constexpr std::string_view kReturnNode = "node";
|
||||
|
||||
void Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
|
||||
void RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
|
||||
} // namespace TextSearch
|
||||
|
||||
void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
|
||||
@ -52,31 +50,6 @@ void TextSearch::Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *r
|
||||
}
|
||||
}
|
||||
|
||||
void TextSearch::RegexSearch(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
|
||||
mgp::MemoryDispatcherGuard guard{memory};
|
||||
const auto record_factory = mgp::RecordFactory(result);
|
||||
auto arguments = mgp::List(args);
|
||||
|
||||
try {
|
||||
const auto *index_name = arguments[0].ValueString().data();
|
||||
const auto *search_query = arguments[1].ValueString().data();
|
||||
|
||||
// 1. See if the given index_name is text-indexed
|
||||
if (!mgp::graph_has_text_index(memgraph_graph, index_name)) {
|
||||
record_factory.SetErrorMessage(fmt::format("Text index \"{}\" doesn’t exist.", index_name));
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. Run a text search of that index and return the search results
|
||||
for (const auto &node : mgp::RunTextRegexSearchQuery(memgraph_graph, index_name, search_query)) {
|
||||
auto record = record_factory.NewRecord();
|
||||
record.Insert(TextSearch::kReturnNode.data(), node.ValueNode());
|
||||
}
|
||||
} catch (const std::exception &e) {
|
||||
record_factory.SetErrorMessage(e.what());
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) {
|
||||
try {
|
||||
mgp::MemoryDispatcherGuard guard{memory};
|
||||
@ -87,13 +60,6 @@ extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *mem
|
||||
mgp::Parameter(TextSearch::kParameterSearchString, mgp::Type::String),
|
||||
},
|
||||
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
|
||||
|
||||
AddProcedure(TextSearch::RegexSearch, TextSearch::kProcedureRegexSearch, mgp::ProcedureType::Read,
|
||||
{
|
||||
mgp::Parameter(TextSearch::kParameterIndexName, mgp::Type::String),
|
||||
mgp::Parameter(TextSearch::kParameterSearchString, mgp::Type::String),
|
||||
},
|
||||
{mgp::Return(TextSearch::kReturnNode, mgp::Type::Node)}, module, memory);
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "Error while initializing query module: " << e.what() << std::endl;
|
||||
return 1;
|
||||
|
@ -570,10 +570,6 @@ class DbAccessor final {
|
||||
return accessor_->TextIndexSearch(index_name, search_query);
|
||||
}
|
||||
|
||||
std::vector<storage::Gid> TextIndexRegexSearch(const std::string &index_name, const std::string &search_query) const {
|
||||
return accessor_->TextIndexRegexSearch(index_name, search_query);
|
||||
}
|
||||
|
||||
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const {
|
||||
return accessor_->GetIndexStats(label);
|
||||
}
|
||||
|
@ -3438,34 +3438,6 @@ mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name,
|
||||
});
|
||||
}
|
||||
|
||||
mgp_error mgp_graph_regex_search_text_index(mgp_graph *graph, const char *index_name, const char *search_query,
|
||||
mgp_memory *memory, mgp_map **result) {
|
||||
return WrapExceptions([graph, memory, index_name, search_query, result]() {
|
||||
std::visit(memgraph::utils::Overloaded{[&](memgraph::query::DbAccessor *impl) {
|
||||
std::vector<memgraph::storage::Gid> search_results;
|
||||
std::string error_msg;
|
||||
try {
|
||||
search_results = impl->TextIndexRegexSearch(index_name, search_query);
|
||||
} catch (memgraph::query::QueryException &e) {
|
||||
error_msg = e.what();
|
||||
}
|
||||
WrapTextSearch(search_results, error_msg, graph, memory, result);
|
||||
},
|
||||
[&](memgraph::query::SubgraphDbAccessor *impl) {
|
||||
std::vector<memgraph::storage::Gid> search_results;
|
||||
std::string error_msg;
|
||||
try {
|
||||
search_results =
|
||||
impl->GetAccessor()->TextIndexRegexSearch(index_name, search_query);
|
||||
} catch (memgraph::query::QueryException &e) {
|
||||
error_msg = e.what();
|
||||
}
|
||||
WrapTextSearch(search_results, error_msg, graph, memory, result);
|
||||
}},
|
||||
graph->impl);
|
||||
});
|
||||
}
|
||||
|
||||
#ifdef MG_ENTERPRISE
|
||||
namespace {
|
||||
void NextPermitted(mgp_vertices_iterator &it) {
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include "query/db_accessor.hpp"
|
||||
#include "storage/v2/view.hpp"
|
||||
#include "text_search.hpp"
|
||||
#include "utils/string.hpp"
|
||||
|
||||
namespace memgraph::storage {
|
||||
|
||||
@ -33,7 +32,6 @@ void TextIndex::CreateEmptyIndex(const std::string &index_name, LabelId label) {
|
||||
nlohmann::json mappings = {};
|
||||
mappings["properties"] = {};
|
||||
mappings["properties"]["metadata"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
|
||||
mappings["properties"]["all"] = {{"type", "text"}, {"fast", true}, {"stored", true}, {"text", true}};
|
||||
mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
|
||||
|
||||
index_.emplace(index_name,
|
||||
@ -46,27 +44,6 @@ void TextIndex::CreateEmptyIndex(const std::string &index_name, LabelId label) {
|
||||
label_to_index_.emplace(label, index_name);
|
||||
}
|
||||
|
||||
std::string TextIndex::PropertiesToString(const std::map<PropertyId, PropertyValue> &properties) {
|
||||
std::vector<std::string> indexable_properties_as_string;
|
||||
for (const auto &[_, prop_value] : properties) {
|
||||
switch (prop_value.type()) {
|
||||
case PropertyValue::Type::Bool:
|
||||
case PropertyValue::Type::Int:
|
||||
case PropertyValue::Type::Double:
|
||||
case PropertyValue::Type::String:
|
||||
indexable_properties_as_string.push_back(prop_value.ValueString());
|
||||
break;
|
||||
case PropertyValue::Type::Null:
|
||||
case PropertyValue::Type::List:
|
||||
case PropertyValue::Type::Map:
|
||||
case PropertyValue::Type::TemporalData:
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return utils::Join(indexable_properties_as_string, " ");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
nlohmann::json TextIndex::SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver) {
|
||||
nlohmann::json serialized_properties = nlohmann::json::value_t::object;
|
||||
@ -115,13 +92,11 @@ std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(V
|
||||
}
|
||||
|
||||
void TextIndex::LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
|
||||
const std::string &indexable_properties_as_string,
|
||||
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) {
|
||||
// NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties,
|
||||
// an indexable document should be created for each applicable index.
|
||||
nlohmann::json document = {};
|
||||
document["data"] = properties;
|
||||
document["all"] = indexable_properties_as_string;
|
||||
document["metadata"] = {};
|
||||
document["metadata"]["gid"] = gid;
|
||||
document["metadata"]["deleted"] = false;
|
||||
@ -159,7 +134,7 @@ void TextIndex::AddNode(Vertex *vertex_after_update, NameIdMapper *name_id_mappe
|
||||
|
||||
auto vertex_properties = vertex_after_update->properties.Properties();
|
||||
LoadNodeToTextIndices(vertex_after_update->gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
|
||||
PropertiesToString(vertex_properties), applicable_text_indices);
|
||||
applicable_text_indices);
|
||||
}
|
||||
|
||||
void TextIndex::AddNode(Vertex *vertex_after_update, NameIdMapper *name_id_mapper) {
|
||||
@ -179,7 +154,8 @@ void TextIndex::UpdateNode(Vertex *vertex_after_update, NameIdMapper *name_id_ma
|
||||
}
|
||||
|
||||
if (!removed_labels.empty()) {
|
||||
RemoveNode(vertex_after_update, GetApplicableTextIndices(removed_labels));
|
||||
auto indexes_to_remove_node_from = GetApplicableTextIndices(removed_labels);
|
||||
RemoveNode(vertex_after_update, indexes_to_remove_node_from);
|
||||
}
|
||||
|
||||
auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update);
|
||||
@ -226,7 +202,7 @@ void TextIndex::CreateIndex(const std::string &index_name, LabelId label, memgra
|
||||
|
||||
auto vertex_properties = v.Properties(View::NEW).GetValue();
|
||||
LoadNodeToTextIndices(v.Gid().AsInt(), SerializeProperties(vertex_properties, db),
|
||||
PropertiesToString(vertex_properties), {&index_.at(index_name).context_});
|
||||
{&index_.at(index_name).context_});
|
||||
}
|
||||
|
||||
CommitLoadedNodes(index_.at(index_name).context_);
|
||||
@ -244,7 +220,7 @@ void TextIndex::RecoverIndex(const std::string &index_name, LabelId label,
|
||||
nlohmann::json document = {};
|
||||
auto vertex_properties = v.properties.Properties();
|
||||
LoadNodeToTextIndices(v.gid.AsInt(), SerializeProperties(vertex_properties, name_id_mapper),
|
||||
PropertiesToString(vertex_properties), {&index_.at(index_name).context_});
|
||||
{&index_.at(index_name).context_});
|
||||
}
|
||||
|
||||
CommitLoadedNodes(index_.at(index_name).context_);
|
||||
@ -300,37 +276,6 @@ std::vector<Gid> TextIndex::Search(const std::string &index_name, const std::str
|
||||
return found_nodes;
|
||||
}
|
||||
|
||||
std::vector<Gid> TextIndex::RegexSearch(const std::string &index_name, const std::string &search_query) {
|
||||
if (!flags::run_time::GetExperimentalTextSearchEnabled()) {
|
||||
throw query::TextSearchDisabledException();
|
||||
}
|
||||
|
||||
if (!index_.contains(index_name)) {
|
||||
throw query::TextSearchException("Text index \"{}\" doesn’t exist.", index_name);
|
||||
}
|
||||
|
||||
auto input = mgcxx::text_search::SearchInput{
|
||||
.search_fields = {"all"}, .search_query = search_query, .return_fields = {"data", "metadata"}};
|
||||
|
||||
std::vector<Gid> found_nodes;
|
||||
mgcxx::text_search::SearchOutput search_results;
|
||||
|
||||
try {
|
||||
search_results = mgcxx::text_search::regex_search(index_.at(index_name).context_, input);
|
||||
} catch (const std::exception &e) {
|
||||
throw query::TextSearchException("Tantivy error: {}", e.what());
|
||||
}
|
||||
for (const auto &doc : search_results.docs) {
|
||||
// The CXX .data() method (https://cxx.rs/binding/string.html) may overestimate string length, causing JSON parsing
|
||||
// errors downstream. We prevent this by resizing the converted string with the correctly-working .length() method.
|
||||
std::string doc_string = doc.data.data();
|
||||
doc_string.resize(doc.data.length());
|
||||
auto doc_json = nlohmann::json::parse(doc_string);
|
||||
found_nodes.push_back(storage::Gid::FromString(doc_json["metadata"]["gid"].dump()));
|
||||
}
|
||||
return found_nodes;
|
||||
}
|
||||
|
||||
void TextIndex::Commit() {
|
||||
for (auto &[_, index_data] : index_) {
|
||||
mgcxx::text_search::commit(index_data.context_);
|
||||
|
@ -36,8 +36,6 @@ class TextIndex {
|
||||
private:
|
||||
void CreateEmptyIndex(const std::string &index_name, LabelId label);
|
||||
|
||||
std::string PropertiesToString(const std::map<PropertyId, PropertyValue> &properties);
|
||||
|
||||
template <typename T>
|
||||
nlohmann::json SerializeProperties(const std::map<PropertyId, PropertyValue> &properties, T *name_resolver);
|
||||
|
||||
@ -46,7 +44,6 @@ class TextIndex {
|
||||
std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(Vertex *vertex);
|
||||
|
||||
void LoadNodeToTextIndices(const std::int64_t gid, const nlohmann::json &properties,
|
||||
const std::string &indexable_properties_as_string,
|
||||
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices);
|
||||
|
||||
void CommitLoadedNodes(mgcxx::text_search::Context &index_context);
|
||||
@ -86,8 +83,6 @@ class TextIndex {
|
||||
|
||||
std::vector<Gid> Search(const std::string &index_name, const std::string &search_query);
|
||||
|
||||
std::vector<Gid> RegexSearch(const std::string &index_name, const std::string &search_query);
|
||||
|
||||
void Commit();
|
||||
|
||||
void Rollback();
|
||||
|
@ -247,10 +247,6 @@ class Storage {
|
||||
return storage_->indices_.text_index_.Search(index_name, search_query);
|
||||
}
|
||||
|
||||
std::vector<Gid> TextIndexRegexSearch(const std::string &index_name, const std::string &search_query) const {
|
||||
return storage_->indices_.text_index_.RegexSearch(index_name, search_query);
|
||||
}
|
||||
|
||||
virtual IndicesInfo ListAllIndices() const = 0;
|
||||
|
||||
virtual ConstraintsInfo ListAllConstraints() const = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user