Add text index information to DUMP DATABASE and make it extensible
This commit is contained in:
parent
b24afcde0a
commit
3e7e0d896c
@ -248,6 +248,10 @@ void DumpLabelPropertyIndex(std::ostream *os, query::DbAccessor *dba, storage::L
|
|||||||
<< ");";
|
<< ");";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DumpTextIndex(std::ostream *os, query::DbAccessor *dba, std::string index_name, storage::LabelId label) {
|
||||||
|
*os << "CREATE TEXT INDEX " << EscapeName(index_name) << " ON :" << EscapeName(dba->LabelToName(label)) << ";";
|
||||||
|
}
|
||||||
|
|
||||||
void DumpExistenceConstraint(std::ostream *os, query::DbAccessor *dba, storage::LabelId label,
|
void DumpExistenceConstraint(std::ostream *os, query::DbAccessor *dba, storage::LabelId label,
|
||||||
storage::PropertyId property) {
|
storage::PropertyId property) {
|
||||||
*os << "CREATE CONSTRAINT ON (u:" << EscapeName(dba->LabelToName(label)) << ") ASSERT EXISTS (u."
|
*os << "CREATE CONSTRAINT ON (u:" << EscapeName(dba->LabelToName(label)) << ") ASSERT EXISTS (u."
|
||||||
@ -282,6 +286,8 @@ PullPlanDump::PullPlanDump(DbAccessor *dba, dbms::DatabaseAccess db_acc)
|
|||||||
CreateLabelIndicesPullChunk(),
|
CreateLabelIndicesPullChunk(),
|
||||||
// Dump all label property indices
|
// Dump all label property indices
|
||||||
CreateLabelPropertyIndicesPullChunk(),
|
CreateLabelPropertyIndicesPullChunk(),
|
||||||
|
// Dump all text indices
|
||||||
|
CreateTextIndicesPullChunk(),
|
||||||
// Dump all existence constraints
|
// Dump all existence constraints
|
||||||
CreateExistenceConstraintsPullChunk(),
|
CreateExistenceConstraintsPullChunk(),
|
||||||
// Dump all unique constraints
|
// Dump all unique constraints
|
||||||
@ -379,6 +385,34 @@ PullPlanDump::PullChunk PullPlanDump::CreateLabelPropertyIndicesPullChunk() {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PullPlanDump::PullChunk PullPlanDump::CreateTextIndicesPullChunk() {
|
||||||
|
// Dump all text indices
|
||||||
|
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
|
||||||
|
// Delay the construction of indices vectors
|
||||||
|
if (!indices_info_) {
|
||||||
|
indices_info_.emplace(dba_->ListAllIndices());
|
||||||
|
}
|
||||||
|
const auto &text = indices_info_->text;
|
||||||
|
|
||||||
|
size_t local_counter = 0;
|
||||||
|
while (global_index < text.size() && (!n || local_counter < *n)) {
|
||||||
|
std::ostringstream os;
|
||||||
|
const auto &text_index = text[global_index];
|
||||||
|
DumpTextIndex(&os, dba_, text_index.first, text_index.second);
|
||||||
|
stream->Result({TypedValue(os.str())});
|
||||||
|
|
||||||
|
++global_index;
|
||||||
|
++local_counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (global_index == text.size()) {
|
||||||
|
return local_counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::nullopt;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
PullPlanDump::PullChunk PullPlanDump::CreateExistenceConstraintsPullChunk() {
|
PullPlanDump::PullChunk PullPlanDump::CreateExistenceConstraintsPullChunk() {
|
||||||
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
|
return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> {
|
||||||
// Delay the construction of constraint vectors
|
// Delay the construction of constraint vectors
|
||||||
|
@ -55,6 +55,7 @@ struct PullPlanDump {
|
|||||||
|
|
||||||
PullChunk CreateLabelIndicesPullChunk();
|
PullChunk CreateLabelIndicesPullChunk();
|
||||||
PullChunk CreateLabelPropertyIndicesPullChunk();
|
PullChunk CreateLabelPropertyIndicesPullChunk();
|
||||||
|
PullChunk CreateTextIndicesPullChunk();
|
||||||
PullChunk CreateExistenceConstraintsPullChunk();
|
PullChunk CreateExistenceConstraintsPullChunk();
|
||||||
PullChunk CreateUniqueConstraintsPullChunk();
|
PullChunk CreateUniqueConstraintsPullChunk();
|
||||||
PullChunk CreateInternalIndexPullChunk();
|
PullChunk CreateInternalIndexPullChunk();
|
||||||
|
@ -3015,7 +3015,7 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici
|
|||||||
TypedValue(static_cast<int>(storage_acc->ApproximateVertexCount(item.first, item.second)))});
|
TypedValue(static_cast<int>(storage_acc->ApproximateVertexCount(item.first, item.second)))});
|
||||||
}
|
}
|
||||||
for (const auto &item : info.text) {
|
for (const auto &item : info.text) {
|
||||||
results.push_back({TypedValue(text_index_mark), TypedValue(item), TypedValue(), TypedValue()});
|
results.push_back({TypedValue(text_index_mark), TypedValue(item.first), TypedValue(), TypedValue()});
|
||||||
}
|
}
|
||||||
std::sort(results.begin(), results.end(), [&label_index_mark](const auto &record_1, const auto &record_2) {
|
std::sort(results.begin(), results.end(), [&label_index_mark](const auto &record_1, const auto &record_2) {
|
||||||
const auto type_1 = record_1[0].ValueString();
|
const auto type_1 = record_1[0].ValueString();
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
namespace memgraph::storage {
|
namespace memgraph::storage {
|
||||||
|
|
||||||
void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage, const std::uint64_t transaction_start_timestamp,
|
void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage, const std::uint64_t transaction_start_timestamp,
|
||||||
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices) {
|
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices, bool skip_commit) {
|
||||||
// NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties,
|
// NOTE: Text indexes are presently all-property indices. If we allow text indexes restricted to specific properties,
|
||||||
// an indexable document should be created for each applicable index.
|
// an indexable document should be created for each applicable index.
|
||||||
nlohmann::json document = {};
|
nlohmann::json document = {};
|
||||||
@ -65,11 +65,11 @@ void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage, const std
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage,
|
void TextIndex::AddNode(Vertex *vertex_after_update, Storage *storage, const std::uint64_t transaction_start_timestamp,
|
||||||
const std::uint64_t transaction_start_timestamp) {
|
bool skip_commit) {
|
||||||
auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update);
|
auto applicable_text_indices = GetApplicableTextIndices(vertex_after_update);
|
||||||
if (applicable_text_indices.empty()) return;
|
if (applicable_text_indices.empty()) return;
|
||||||
AddNode(vertex_after_update, storage, transaction_start_timestamp, applicable_text_indices);
|
AddNode(vertex_after_update, storage, transaction_start_timestamp, applicable_text_indices, skip_commit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextIndex::UpdateNode(Vertex *vertex_after_update, Storage *storage,
|
void TextIndex::UpdateNode(Vertex *vertex_after_update, Storage *storage,
|
||||||
@ -117,7 +117,8 @@ void TextIndex::UpdateOnAddLabel(LabelId added_label, Vertex *vertex_after_updat
|
|||||||
if (!label_to_index_.contains(added_label)) {
|
if (!label_to_index_.contains(added_label)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
AddNode(vertex_after_update, storage, transaction_start_timestamp, {&index_.at(label_to_index_.at(added_label))});
|
AddNode(vertex_after_update, storage, transaction_start_timestamp,
|
||||||
|
std::vector<mgcxx::text_search::Context *>{&index_.at(label_to_index_.at(added_label)).context_});
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextIndex::UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_update,
|
void TextIndex::UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_update,
|
||||||
@ -125,7 +126,7 @@ void TextIndex::UpdateOnRemoveLabel(LabelId removed_label, Vertex *vertex_after_
|
|||||||
if (!label_to_index_.contains(removed_label)) {
|
if (!label_to_index_.contains(removed_label)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
RemoveNode(vertex_after_update, {&index_.at(label_to_index_.at(removed_label))});
|
RemoveNode(vertex_after_update, {&index_.at(label_to_index_.at(removed_label)).context_});
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextIndex::UpdateOnSetProperty(Vertex *vertex_after_update, Storage *storage,
|
void TextIndex::UpdateOnSetProperty(Vertex *vertex_after_update, Storage *storage,
|
||||||
@ -137,7 +138,7 @@ std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(c
|
|||||||
std::vector<mgcxx::text_search::Context *> applicable_text_indices;
|
std::vector<mgcxx::text_search::Context *> applicable_text_indices;
|
||||||
for (const auto &label : labels) {
|
for (const auto &label : labels) {
|
||||||
if (label_to_index_.contains(label)) {
|
if (label_to_index_.contains(label)) {
|
||||||
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)));
|
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)).context_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return applicable_text_indices;
|
return applicable_text_indices;
|
||||||
@ -147,7 +148,7 @@ std::vector<mgcxx::text_search::Context *> TextIndex::GetApplicableTextIndices(V
|
|||||||
std::vector<mgcxx::text_search::Context *> applicable_text_indices;
|
std::vector<mgcxx::text_search::Context *> applicable_text_indices;
|
||||||
for (const auto &label : vertex->labels) {
|
for (const auto &label : vertex->labels) {
|
||||||
if (label_to_index_.contains(label)) {
|
if (label_to_index_.contains(label)) {
|
||||||
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)));
|
applicable_text_indices.push_back(&index_.at(label_to_index_.at(label)).context_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return applicable_text_indices;
|
return applicable_text_indices;
|
||||||
@ -160,8 +161,10 @@ bool TextIndex::CreateIndex(std::string index_name, LabelId label, memgraph::que
|
|||||||
mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
|
mappings["properties"]["data"] = {{"type", "json"}, {"fast", true}, {"stored", true}, {"text", true}};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
index_.emplace(index_name, mgcxx::text_search::create_index(
|
index_.emplace(index_name,
|
||||||
index_name, mgcxx::text_search::IndexConfig{.mappings = mappings.dump()}));
|
TextIndexData{.context_ = mgcxx::text_search::create_index(
|
||||||
|
index_name, mgcxx::text_search::IndexConfig{.mappings = mappings.dump()}),
|
||||||
|
.scope_ = label});
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
throw query::QueryException(fmt::format("Tantivy error: {}", e.what()));
|
throw query::QueryException(fmt::format("Tantivy error: {}", e.what()));
|
||||||
}
|
}
|
||||||
@ -169,7 +172,7 @@ bool TextIndex::CreateIndex(std::string index_name, LabelId label, memgraph::que
|
|||||||
|
|
||||||
bool has_schema = false;
|
bool has_schema = false;
|
||||||
std::vector<std::pair<PropertyId, std::string>> indexed_properties{};
|
std::vector<std::pair<PropertyId, std::string>> indexed_properties{};
|
||||||
auto &index_context = index_.at(index_name);
|
auto &index_context = index_.at(index_name).context_;
|
||||||
for (const auto &v : db->Vertices(View::OLD)) {
|
for (const auto &v : db->Vertices(View::OLD)) {
|
||||||
if (!v.HasLabel(View::OLD, label).GetValue()) {
|
if (!v.HasLabel(View::OLD, label).GetValue()) {
|
||||||
continue;
|
continue;
|
||||||
@ -259,8 +262,11 @@ std::vector<Gid> TextIndex::Search(std::string index_name, std::string search_qu
|
|||||||
std::vector<Gid> found_nodes;
|
std::vector<Gid> found_nodes;
|
||||||
|
|
||||||
mgcxx::text_search::SearchOutput search_results;
|
mgcxx::text_search::SearchOutput search_results;
|
||||||
|
|
||||||
|
// if (!index_.contains(index_name)) throw InvalidArgumentException("InvalidArgumentException");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
search_results = mgcxx::text_search::search(index_.at(index_name), input);
|
search_results = mgcxx::text_search::search(index_.at(index_name).context_, input);
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
throw query::QueryException(fmt::format("Tantivy error: {}", e.what()));
|
throw query::QueryException(fmt::format("Tantivy error: {}", e.what()));
|
||||||
}
|
}
|
||||||
@ -277,11 +283,11 @@ std::vector<Gid> TextIndex::Search(std::string index_name, std::string search_qu
|
|||||||
return found_nodes;
|
return found_nodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> TextIndex::ListIndices() const {
|
std::vector<std::pair<std::string, LabelId>> TextIndex::ListIndices() const {
|
||||||
std::vector<std::string> ret;
|
std::vector<std::pair<std::string, LabelId>> ret;
|
||||||
ret.reserve(index_.size());
|
ret.reserve(index_.size());
|
||||||
for (const auto &item : index_) {
|
for (const auto &[index_name, index_data] : index_) {
|
||||||
ret.push_back(item.first);
|
ret.push_back({index_name, index_data.scope_});
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -23,10 +23,15 @@ class DbAccessor;
|
|||||||
namespace memgraph::storage {
|
namespace memgraph::storage {
|
||||||
class Storage;
|
class Storage;
|
||||||
|
|
||||||
|
struct TextIndexData {
|
||||||
|
mgcxx::text_search::Context context_;
|
||||||
|
LabelId scope_;
|
||||||
|
};
|
||||||
|
|
||||||
class TextIndex {
|
class TextIndex {
|
||||||
private:
|
private:
|
||||||
void AddNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp,
|
void AddNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp,
|
||||||
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices);
|
const std::vector<mgcxx::text_search::Context *> &applicable_text_indices, bool skip_commit = false);
|
||||||
|
|
||||||
std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(const std::vector<LabelId> &labels);
|
std::vector<mgcxx::text_search::Context *> GetApplicableTextIndices(const std::vector<LabelId> &labels);
|
||||||
|
|
||||||
@ -42,10 +47,11 @@ class TextIndex {
|
|||||||
|
|
||||||
~TextIndex() = default;
|
~TextIndex() = default;
|
||||||
|
|
||||||
std::map<std::string, mgcxx::text_search::Context> index_;
|
std::map<std::string, TextIndexData> index_;
|
||||||
std::map<LabelId, std::string> label_to_index_;
|
std::map<LabelId, std::string> label_to_index_;
|
||||||
|
|
||||||
void AddNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp);
|
void AddNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp,
|
||||||
|
bool skip_commit = false);
|
||||||
|
|
||||||
void UpdateNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp);
|
void UpdateNode(Vertex *vertex, Storage *storage, const std::uint64_t transaction_start_timestamp);
|
||||||
|
|
||||||
@ -73,7 +79,7 @@ class TextIndex {
|
|||||||
|
|
||||||
std::vector<Gid> Search(std::string index_name, std::string search_query);
|
std::vector<Gid> Search(std::string index_name, std::string search_query);
|
||||||
|
|
||||||
std::vector<std::string> ListIndices() const;
|
std::vector<std::pair<std::string, LabelId>> ListIndices() const;
|
||||||
|
|
||||||
std::uint64_t ApproximateVertexCount(std::string index_name) const;
|
std::uint64_t ApproximateVertexCount(std::string index_name) const;
|
||||||
};
|
};
|
||||||
|
@ -59,7 +59,7 @@ class EdgeAccessor;
|
|||||||
struct IndicesInfo {
|
struct IndicesInfo {
|
||||||
std::vector<LabelId> label;
|
std::vector<LabelId> label;
|
||||||
std::vector<std::pair<LabelId, PropertyId>> label_property;
|
std::vector<std::pair<LabelId, PropertyId>> label_property;
|
||||||
std::vector<std::string> text;
|
std::vector<std::pair<std::string, LabelId>> text;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ConstraintsInfo {
|
struct ConstraintsInfo {
|
||||||
|
Loading…
Reference in New Issue
Block a user