Implement the text search query module

This commit is contained in:
Ante Pušić 2024-01-10 00:47:55 +01:00
parent bed5651716
commit 00275f5736
8 changed files with 91 additions and 34 deletions

View File

@ -330,9 +330,9 @@ inline bool graph_has_text_index(mgp_graph *graph, const char *index_name) {
return MgInvoke<int>(mgp_graph_has_text_index, graph, index_name);
}
// TODO antepusic change result type
inline bool graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_string) {
return MgInvoke<int>(mgp_graph_has_text_index, graph, index_name, search_string);
inline mgp_list *graph_search_text_index(mgp_graph *graph, mgp_memory *memory, const char *index_name,
const char *search_query) {
return MgInvoke<mgp_list *>(graph_search_text_index, graph, memory, index_name, search_query);
}
inline mgp_vertices_iterator *graph_iter_vertices(mgp_graph *g, mgp_memory *memory) {

View File

@ -891,11 +891,15 @@ enum mgp_error mgp_edge_iter_properties(struct mgp_edge *e, struct mgp_memory *m
enum mgp_error mgp_graph_get_vertex_by_id(struct mgp_graph *g, struct mgp_vertex_id id, struct mgp_memory *memory,
struct mgp_vertex **result);
enum mgp_error mgp_graph_has_text_index(mgp_graph *graph, const char *label, int *result);
/// Result is non-zero if the index with the given name exists.
/// Current implementation always returns without errors.
enum mgp_error mgp_graph_has_text_index(mgp_graph *graph, const char *index_name, int *result);
// TODO antepusic change result type
enum mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_string,
int *result);
/// Search the named text index for the given query. The result is a list of the vertices whose text properties match
/// the given query.
/// Return mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE if unable to allocate search result vertices.
enum mgp_error mgp_graph_search_text_index(mgp_graph *graph, mgp_memory *memory, const char *index_name,
const char *search_query, struct mgp_list **result);
/// Creates label index for given label.
/// mgp_error::MGP_ERROR_NO_ERROR is always returned.

View File

@ -17,34 +17,30 @@
namespace TextSearch {
constexpr std::string_view kProcedureSearch = "search";
constexpr std::string_view kParameterLabel = "label";
constexpr std::string_view kParameterSearchString = "search_string";
constexpr std::string_view kParameterSearchString = "search_query";
constexpr std::string_view kReturnNode = "node";
void Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory);
} // namespace TextSearch
void Search(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
// CALL text_search.search("Label", "someQuery", searchFields, returnFields) RETURN node, score
mgp::MemoryDispatcherGuard guard{memory};
const auto record_factory = mgp::RecordFactory(result);
auto arguments = mgp::List(args);
auto label = arguments[0].ValueString();
auto search_string = arguments[1].ValueString();
auto search_query = arguments[1].ValueString();
// 1. See if the given label is text-indexed
if (!mgp::graph_has_text_index(memgraph_graph, label.data())) {
return;
}
// 2. Run text search of that index
mgp::graph_search_text_index(memgraph_graph, label.data(), search_string);
// text_index.search(label, search_string);
// 3. Get the graph elements from their IDs in the search results
// 4. Return records (one per element)
// 2. Run a text search of that index and return the search results
for (const auto &node :
mgp::List(mgp::graph_search_text_index(memgraph_graph, memory, label.data(), search_query.data()))) {
auto record = record_factory.NewRecord();
record.Insert(TextSearch::kReturnNode.data(), node);
}
}
extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) {

View File

@ -555,8 +555,8 @@ class DbAccessor final {
bool TextIndexExists(std::string index_name) const { return accessor_->TextIndexExists(index_name); }
mgcxx_mock::text_search::SearchOutput SearchTextIndex(std::string index_name, std::string search_string) const {
return accessor_->SearchTextIndex(index_name, search_string);
std::vector<storage::Gid> SearchTextIndex(std::string index_name, std::string search_query) const {
return accessor_->SearchTextIndex(index_name, search_query);
}
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const {

View File

@ -3333,9 +3333,56 @@ mgp_error mgp_graph_has_text_index(mgp_graph *graph, const char *index_name, int
});
}
mgp_error mgp_graph_search_text_index(mgp_graph *graph, const char *index_name, const char *search_string,
int *result) {
return WrapExceptions([graph, index_name, result]() { *result = 1; });
mgp_vertex *GetVertexByGid(mgp_graph *graph, memgraph::storage::Gid id, mgp_memory *memory) {
std::optional<memgraph::query::VertexAccessor> maybe_vertex =
std::visit([graph, id](auto *impl) { return impl->FindVertex(id, graph->view); }, graph->impl);
if (maybe_vertex) {
return std::visit(memgraph::utils::Overloaded{
[memory, graph, maybe_vertex](memgraph::query::DbAccessor *) {
return NewRawMgpObject<mgp_vertex>(memory, *maybe_vertex, graph);
},
[memory, graph, maybe_vertex](memgraph::query::SubgraphDbAccessor *impl) {
return NewRawMgpObject<mgp_vertex>(
memory, memgraph::query::SubgraphVertexAccessor(*maybe_vertex, impl->getGraph()),
graph);
}},
graph->impl);
}
return nullptr;
}
void WrapIntoVertexList(std::vector<memgraph::storage::Gid> vertex_ids, mgp_graph *graph, mgp_memory *memory,
mgp_list **result) {
if (const auto err = mgp_list_make_empty(vertex_ids.size(), memory, result); err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a mgp_vertex");
}
for (const auto &vertex_id : vertex_ids) {
mgp_value *vertex;
if (const auto err = mgp_value_make_vertex(GetVertexByGid(graph, vertex_id, memory), &vertex);
err != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error("Retrieving text search results failed during creation of a vertex mgp_value");
}
if (const auto err_list = mgp_list_append(*result, vertex); err_list != mgp_error::MGP_ERROR_NO_ERROR) {
throw std::logic_error(
"Retrieving text search results failed during insertion of the mgp_value into the result list");
}
}
}
mgp_error mgp_graph_search_text_index(mgp_graph *graph, mgp_memory *memory, const char *index_name,
const char *search_query, mgp_list **result) {
return WrapExceptions([graph, memory, index_name, search_query, result]() {
std::visit(memgraph::utils::Overloaded{
[&](memgraph::query::DbAccessor *impl) {
WrapIntoVertexList(impl->SearchTextIndex(index_name, search_query), graph, memory, result);
},
[&](memgraph::query::SubgraphDbAccessor *impl) {
WrapIntoVertexList(impl->GetAccessor()->SearchTextIndex(index_name, search_query), graph, memory,
result);
}},
graph->impl);
});
}
#ifdef MG_ENTERPRISE

View File

@ -57,10 +57,18 @@ class TextIndex {
bool IndexExists(std::string index_name) { return index_.contains(index_name); }
mgcxx_mock::text_search::SearchOutput Search(std::string index_name, std::string search_string) {
// TODO antepusic: Add metadata to the return fields before search
auto input = mgcxx_mock::text_search::SearchInput{};
return mgcxx_mock::text_search::Mock::search(index_.at(index_name), input);
std::vector<Gid> Search(std::string index_name, std::string search_query) {
auto input = mgcxx_mock::text_search::SearchInput{.search_query = search_query, .return_fields = {"metadata.gid"}};
// Basic check for search fields in the query (Tantivy syntax delimits them with `:` to the right)
if (search_query.find(":") == std::string::npos) {
input.search_fields = {"data"};
}
std::vector<Gid> found_nodes;
for (const auto &doc : mgcxx_mock::text_search::Mock::search(index_.at(index_name), input).docs) {
found_nodes.push_back(storage::Gid::FromString(doc.data));
}
return found_nodes;
}
std::vector<std::string> ListIndices() {

View File

@ -43,7 +43,9 @@ struct SearchOutput {
std::vector<DocumentOutput> docs;
};
// NOTE: The function names don't follow the style guide in order to be uniform with the mgcxx API
// NOTE:
// * The function names don't follow the style guide in order to be uniform with the mgcxx API
// * All methods are static in order to avoid having to make a Mock object that's globally available
class Mock {
public:
static void init(std::string _log_level) {}
@ -59,7 +61,7 @@ class Mock {
static void rollback(IndexContext context) {}
static SearchOutput search(IndexContext context, SearchInput input) {
return SearchOutput{.docs = {DocumentOutput{.data = ""}}};
return SearchOutput{.docs = {DocumentOutput{.data = "0"}}};
}
static DocumentOutput aggregate(IndexContext context, SearchInput input) { return DocumentOutput(); }

View File

@ -216,8 +216,8 @@ class Storage {
return storage_->indices_.text_index_->IndexExists(index_name);
}
mgcxx_mock::text_search::SearchOutput SearchTextIndex(std::string index_name, std::string search_string) const {
return storage_->indices_.text_index_->Search(index_name, search_string);
std::vector<Gid> SearchTextIndex(std::string index_name, std::string search_query) const {
return storage_->indices_.text_index_->Search(index_name, search_query);
}
virtual IndicesInfo ListAllIndices() const = 0;
@ -256,8 +256,8 @@ class Storage {
std::vector<EdgeTypeId> ListAllPossiblyPresentEdgeTypes() const;
mgcxx_mock::text_search::SearchOutput TextSearch(std::string index_name, std::string &search_string) const {
return storage_->indices_.text_index_->Search(index_name, search_string);
std::vector<Gid> TextSearch(std::string index_name, std::string &search_query) const {
return storage_->indices_.text_index_->Search(index_name, search_query);
}
virtual utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(LabelId label) = 0;