Add text index information to DUMP DATABASE and make it extensible

This commit is contained in:
Ante Pušić 2024-01-29 12:07:09 +01:00
parent b24afcde0a
commit 3e7e0d896c
6 changed files with 69 additions and 22 deletions

View File

@ -248,6 +248,10 @@ void DumpLabelPropertyIndex(std::ostream *os, query::DbAccessor *dba, storage::L
<< ");";
}
void DumpTextIndex(std::ostream *os, query::DbAccessor *dba, std::string index_name, storage::LabelId label) {
*os << "CREATE TEXT INDEX " << EscapeName(index_name) << " ON :" << EscapeName(dba->LabelToName(label)) << ";";
}
void DumpExistenceConstraint(std::ostream *os, query::DbAccessor *dba, storage::LabelId label,
storage::PropertyId property) {
*os << "CREATE CONSTRAINT ON (u:" << EscapeName(dba->LabelToName(label)) << ") ASSERT EXISTS (u."
@ -282,6 +286,8 @@ PullPlanDump::PullPlanDump(DbAccessor *dba, dbms::DatabaseAccess db_acc)
CreateLabelIndicesPullChunk(),
// Dump all label property indices
CreateLabelPropertyIndicesPullChunk(),
// Dump all text indices
CreateTextIndicesPullChunk(),
// Dump all existence constraints
CreateExistenceConstraintsPullChunk(),
// Dump all unique constraints
@ -379,6 +385,34 @@ PullPlanDump::PullChunk PullPlanDump::CreateLabelPropertyIndicesPullChunk() {
};
}
PullPlanDump::PullChunk PullPlanDump::CreateTextIndicesPullChunk() {
// Dump all text indices
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
// Delay the construction of indices vectors
if (!indices_info_) {
indices_info_.emplace(dba_->ListAllIndices());
}
const auto &text = indices_info_->text;
size_t local_counter = 0;
while (global_index < text.size() && (!n || local_counter < *n)) {
std::ostringstream os;
const auto &text_index = text[global_index];
DumpTextIndex(&os, dba_, text_index.first, text_index.second);
stream->Result({TypedValue(os.str())});
++global_index;
++local_counter;
}
if (global_index == text.size()) {
return local_counter;
}
return std::nullopt;
};
}
PullPlanDump::PullChunk PullPlanDump::CreateExistenceConstraintsPullChunk() {
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
// Delay the construction of constraint vectors

View File

@ -55,6 +55,7 @@ struct PullPlanDump {
PullChunk CreateLabelIndicesPullChunk();
PullChunk CreateLabelPropertyIndicesPullChunk();
PullChunk CreateTextIndicesPullChunk();
PullChunk CreateExistenceConstraintsPullChunk();
PullChunk CreateUniqueConstraintsPullChunk();
PullChunk CreateInternalIndexPullChunk();

View File

@ -3015,7 +3015,7 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
TypedValue(static_cast<int>(storage_acc->ApproximateVertexCount(item.first, item.second)))});
}
for (const auto &item : info.text) {
results.push_back({TypedValue(text_index_mark), TypedValue(item), TypedValue(), TypedValue()});
results.push_back({TypedValue(text_index_mark), TypedValue(item.first), TypedValue(), TypedValue()});
}
std::sort(results.begin(), results.end(), [&label_index_mark](const auto &record_1, const auto &record_2) {
const auto type_1 = record_1[0].ValueString();

View File

@ -17,7 +17,7 @@
namespace memgraph::storage {
void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage, const std::uint64_t transaction_start_timestamp,
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) {
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices, bool skip_commit) {
// NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties,
// an indexable document should be created for each applicable index.
nlohmann::json document = {};
@ -65,11 +65,11 @@ void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage, const std
}
}
void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage,
const std::uint64_t transaction_start_timestamp) {
void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage, const std::uint64_t transaction_start_timestamp,
bool skip_commit) {
auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update);
if (applicable_text_indices.empty()) return;
AddNode(vertex_after_update, storage, transaction_start_timestamp, applicable_text_indices);
AddNode(vertex_after_update, storage, transaction_start_timestamp, applicable_text_indices, skip_commit);
}
void TextIndex::UpdateNode(Vertex *vertex_after_update, Storage *storage,
@ -117,7 +117,8 @@ void TextIndex::UpdateOnAddLabel(LabelId added_label, Vertex *vertex_after_updat
if (!label_to_index_.contains(added_label)) {
return;
}
AddNode(vertex_after_update, storage, transaction_start_timestamp, {&index_.at(label_to_index_.at(added_label))});
AddNode(vertex_after_update, storage, transaction_start_timestamp,
std::vector<mgcxx::text_search::Context *>{&index_.at(label_to_index_.at(added_label)).context_});
}
void TextIndex::UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_update,
@ -125,7 +126,7 @@ void TextIndex::UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_
if (!label_to_index_.contains(removed_label)) {
return;
}
RemoveNode(vertex_after_update, {&index_.at(label_to_index_.at(removed_label))});
RemoveNode(vertex_after_update, {&index_.at(label_to_index_.at(removed_label)).context_});
}
void TextIndex::UpdateOnSetProperty(Vertex *vertex_after_update, Storage *storage,
@ -137,7 +138,7 @@ std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(c
std::vector<mgcxx::text_search::Context *> applicable_text_indices;
for (const auto &label : labels) {
if (label_to_index_.contains(label)) {
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)));
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)).context_);
}
}
return applicable_text_indices;
@ -147,7 +148,7 @@ std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(V
std::vector<mgcxx::text_search::Context *> applicable_text_indices;
for (const auto &label : vertex->labels) {
if (label_to_index_.contains(label)) {
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)));
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)).context_);
}
}
return applicable_text_indices;
@ -160,8 +161,10 @@ bool TextIndex::CreateIndex(std::string index_name, LabelId label, memgraph::que
mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
try {
index_.emplace(index_name, mgcxx::text_search::create_index(
index_name, mgcxx::text_search::IndexConfig{.mappings = mappings.dump()}));
index_.emplace(index_name,
TextIndexData{.context_ = mgcxx::text_search::create_index(
index_name, mgcxx::text_search::IndexConfig{.mappings = mappings.dump()}),
.scope_ = label});
} catch (const std::exception &e) {
throw query::QueryException(fmt::format("Tantivy error: {}", e.what()));
}
@ -169,7 +172,7 @@ bool TextIndex::CreateIndex(std::string index_name, LabelId label, memgraph::que
bool has_schema = false;
std::vector<std::pair<PropertyId, std::string>> indexed_properties{};
auto &index_context = index_.at(index_name);
auto &index_context = index_.at(index_name).context_;
for (const auto &v : db->Vertices(View::OLD)) {
if (!v.HasLabel(View::OLD, label).GetValue()) {
continue;
@ -259,8 +262,11 @@ std::vector<Gid> TextIndex::Search(std::string index_name, std::string search_qu
std::vector<Gid> found_nodes;
mgcxx::text_search::SearchOutput search_results;
// if (!index_.contains(index_name)) throw InvalidArgumentException("InvalidArgumentException");
try {
search_results = mgcxx::text_search::search(index_.at(index_name), input);
search_results = mgcxx::text_search::search(index_.at(index_name).context_, input);
} catch (const std::exception &e) {
throw query::QueryException(fmt::format("Tantivy error: {}", e.what()));
}
@ -277,11 +283,11 @@ std::vector<Gid> TextIndex::Search(std::string index_name, std::string search_qu
return found_nodes;
}
std::vector<std::string> TextIndex::ListIndices() const {
std::vector<std::string> ret;
std::vector<std::pair<std::string, LabelId>> TextIndex::ListIndices() const {
std::vector<std::pair<std::string, LabelId>> ret;
ret.reserve(index_.size());
for (const auto &item : index_) {
ret.push_back(item.first);
for (const auto &[index_name, index_data] : index_) {
ret.push_back({index_name, index_data.scope_});
}
return ret;
}

View File

@ -23,10 +23,15 @@ class DbAccessor;
namespace memgraph::storage {
class Storage;
struct TextIndexData {
mgcxx::text_search::Context context_;
LabelId scope_;
};
class TextIndex {
private:
void AddNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp,
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices);
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices, bool skip_commit = false);
std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(const std::vector<LabelId> &labels);
@ -42,10 +47,11 @@ class TextIndex {
~TextIndex() = default;
std::map<std::string, mgcxx::text_search::Context> index_;
std::map<std::string, TextIndexData> index_;
std::map<LabelId, std::string> label_to_index_;
void AddNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp);
void AddNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp,
bool skip_commit = false);
void UpdateNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp);
@ -73,7 +79,7 @@ class TextIndex {
std::vector<Gid> Search(std::string index_name, std::string search_query);
std::vector<std::string> ListIndices() const;
std::vector<std::pair<std::string, LabelId>> ListIndices() const;
std::uint64_t ApproximateVertexCount(std::string index_name) const;
};

View File

@ -59,7 +59,7 @@ class EdgeAccessor;
struct IndicesInfo {
std::vector<LabelId> label;
std::vector<std::pair<LabelId, PropertyId>> label_property;
std::vector<std::string> text;
std::vector<std::pair<std::string, LabelId>> text;
};
struct ConstraintsInfo {