Add vertex degree to index statistics (#1026)
Add graph analysis of vertex degrees when doing ANALYZE GRAPH.
This commit is contained in:
parent
261aa4f49b
commit
84721f7e0a
@ -15,5 +15,5 @@
|
|||||||
|
|
||||||
namespace memgraph::query {
|
namespace memgraph::query {
|
||||||
inline const std::string kAsterisk = "*";
|
inline const std::string kAsterisk = "*";
|
||||||
inline constexpr uint16_t kDeleteStatisticsNumResults = 6;
|
inline constexpr uint16_t kComputeStatisticsNumResults = 7;
|
||||||
} // namespace memgraph::query
|
} // namespace memgraph::query
|
||||||
|
@ -430,22 +430,36 @@ class DbAccessor final {
|
|||||||
return accessor_->LabelPropertyIndexExists(label, prop);
|
return accessor_->LabelPropertyIndexExists(label, prop);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<storage::IndexStats> GetIndexStats(const storage::LabelId &label,
|
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const {
|
||||||
const storage::PropertyId &property) const {
|
return accessor_->GetIndexStats(label);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<storage::LabelPropertyIndexStats> GetIndexStats(const storage::LabelId &label,
|
||||||
|
const storage::PropertyId &property) const {
|
||||||
return accessor_->GetIndexStats(label, property);
|
return accessor_->GetIndexStats(label, property);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<storage::LabelId, storage::PropertyId>> ClearIndexStats() {
|
std::vector<std::pair<storage::LabelId, storage::PropertyId>> ClearLabelPropertyIndexStats() {
|
||||||
return accessor_->ClearIndexStats();
|
return accessor_->ClearLabelPropertyIndexStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<storage::LabelId, storage::PropertyId>> DeleteIndexStatsForLabels(
|
std::vector<storage::LabelId> ClearLabelIndexStats() { return accessor_->ClearLabelIndexStats(); }
|
||||||
|
|
||||||
|
std::vector<std::pair<storage::LabelId, storage::PropertyId>> DeleteLabelPropertyIndexStats(
|
||||||
const std::span<std::string> labels) {
|
const std::span<std::string> labels) {
|
||||||
return accessor_->DeleteIndexStatsForLabels(labels);
|
return accessor_->DeleteLabelPropertyIndexStats(labels);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<storage::LabelId> DeleteLabelIndexStats(const std::span<std::string> labels) {
|
||||||
|
return accessor_->DeleteLabelIndexStats(labels);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetIndexStats(const storage::LabelId &label, const storage::LabelIndexStats &stats) {
|
||||||
|
accessor_->SetIndexStats(label, stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property,
|
void SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property,
|
||||||
const storage::IndexStats &stats) {
|
const storage::LabelPropertyIndexStats &stats) {
|
||||||
accessor_->SetIndexStats(label, property, stats);
|
accessor_->SetIndexStats(label, property, stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,25 +157,34 @@ void SymbolGenerator::VisitReturnBody(ReturnBody &body, Where *where) {
|
|||||||
// Query
|
// Query
|
||||||
|
|
||||||
bool SymbolGenerator::PreVisit(SingleQuery &) {
|
bool SymbolGenerator::PreVisit(SingleQuery &) {
|
||||||
prev_return_names_ = curr_return_names_;
|
auto &scope = scopes_.back();
|
||||||
curr_return_names_.clear();
|
|
||||||
|
scope.prev_return_names = scope.curr_return_names;
|
||||||
|
scope.curr_return_names.clear();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Union
|
// Union
|
||||||
|
|
||||||
bool SymbolGenerator::PreVisit(CypherUnion &) {
|
bool SymbolGenerator::PreVisit(CypherUnion &) {
|
||||||
scopes_.back() = Scope();
|
auto next_scope = Scope();
|
||||||
|
next_scope.curr_return_names = scopes_.back().curr_return_names;
|
||||||
|
|
||||||
|
scopes_.pop_back();
|
||||||
|
scopes_.push_back(next_scope);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SymbolGenerator::PostVisit(CypherUnion &cypher_union) {
|
bool SymbolGenerator::PostVisit(CypherUnion &cypher_union) {
|
||||||
if (prev_return_names_ != curr_return_names_) {
|
auto &scope = scopes_.back();
|
||||||
|
|
||||||
|
if (scope.prev_return_names != scope.curr_return_names) {
|
||||||
throw SemanticException("All subqueries in an UNION must have the same column names.");
|
throw SemanticException("All subqueries in an UNION must have the same column names.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// create new symbols for the result of the union
|
// create new symbols for the result of the union
|
||||||
for (const auto &name : curr_return_names_) {
|
for (const auto &name : scope.curr_return_names) {
|
||||||
auto symbol = CreateSymbol(name, false);
|
auto symbol = CreateSymbol(name, false);
|
||||||
cypher_union.union_symbols_.push_back(symbol);
|
cypher_union.union_symbols_.push_back(symbol);
|
||||||
}
|
}
|
||||||
@ -259,7 +268,9 @@ bool SymbolGenerator::PreVisit(Return &ret) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool SymbolGenerator::PostVisit(Return &) {
|
bool SymbolGenerator::PostVisit(Return &) {
|
||||||
for (const auto &name_symbol : scopes_.back().symbols) curr_return_names_.insert(name_symbol.first);
|
auto &scope = scopes_.back();
|
||||||
|
|
||||||
|
for (const auto &name_symbol : scope.symbols) scope.curr_return_names.insert(name_symbol.first);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -140,6 +140,8 @@ class SymbolGenerator : public HierarchicalTreeVisitor {
|
|||||||
std::vector<Identifier *> identifiers_in_match;
|
std::vector<Identifier *> identifiers_in_match;
|
||||||
// Number of nested IfOperators.
|
// Number of nested IfOperators.
|
||||||
int num_if_operators{0};
|
int num_if_operators{0};
|
||||||
|
std::unordered_set<std::string> prev_return_names{};
|
||||||
|
std::unordered_set<std::string> curr_return_names{};
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::optional<Symbol> FindSymbolInScope(const std::string &name, const Scope &scope, Symbol::Type type);
|
static std::optional<Symbol> FindSymbolInScope(const std::string &name, const Scope &scope, Symbol::Type type);
|
||||||
@ -171,8 +173,6 @@ class SymbolGenerator : public HierarchicalTreeVisitor {
|
|||||||
// is mapped by its name.
|
// is mapped by its name.
|
||||||
std::unordered_map<std::string, Identifier *> predefined_identifiers_;
|
std::unordered_map<std::string, Identifier *> predefined_identifiers_;
|
||||||
std::vector<Scope> scopes_;
|
std::vector<Scope> scopes_;
|
||||||
std::unordered_set<std::string> prev_return_names_;
|
|
||||||
std::unordered_set<std::string> curr_return_names_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
inline SymbolTable MakeSymbolTable(CypherQuery *query, const std::vector<Identifier *> &predefined_identifiers = {}) {
|
inline SymbolTable MakeSymbolTable(CypherQuery *query, const std::vector<Identifier *> &predefined_identifiers = {}) {
|
||||||
|
@ -1545,74 +1545,181 @@ PreparedQuery PrepareDumpQuery(ParsedQuery parsed_query, std::map<std::string, T
|
|||||||
std::vector<std::vector<TypedValue>> AnalyzeGraphQueryHandler::AnalyzeGraphCreateStatistics(
|
std::vector<std::vector<TypedValue>> AnalyzeGraphQueryHandler::AnalyzeGraphCreateStatistics(
|
||||||
const std::span<std::string> labels, DbAccessor *execution_db_accessor) {
|
const std::span<std::string> labels, DbAccessor *execution_db_accessor) {
|
||||||
using LPIndex = std::pair<storage::LabelId, storage::PropertyId>;
|
using LPIndex = std::pair<storage::LabelId, storage::PropertyId>;
|
||||||
|
auto view = storage::View::OLD;
|
||||||
|
|
||||||
std::vector<std::vector<TypedValue>> results;
|
auto erase_not_specified_label_indices = [&labels, execution_db_accessor](auto &index_info) {
|
||||||
std::map<LPIndex, std::map<storage::PropertyValue, int64_t>> counter;
|
if (labels[0] == kAsterisk) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Preprocess labels to avoid later checks
|
for (auto it = index_info.cbegin(); it != index_info.cend();) {
|
||||||
std::vector<LPIndex> indices_info = execution_db_accessor->ListAllIndices().label_property;
|
if (std::find(labels.begin(), labels.end(), execution_db_accessor->LabelToName(*it)) == labels.end()) {
|
||||||
if (labels[0] != kAsterisk) {
|
it = index_info.erase(it);
|
||||||
for (auto it = indices_info.cbegin(); it != indices_info.cend();) {
|
|
||||||
if (std::find(labels.begin(), labels.end(), execution_db_accessor->LabelToName(it->first)) == labels.end()) {
|
|
||||||
it = indices_info.erase(it);
|
|
||||||
} else {
|
} else {
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
// Iterate over all indexed vertices
|
|
||||||
std::for_each(indices_info.begin(), indices_info.end(), [execution_db_accessor, &counter](const LPIndex &index_info) {
|
|
||||||
auto vertices = execution_db_accessor->Vertices(storage::View::OLD, index_info.first, index_info.second);
|
|
||||||
std::for_each(vertices.begin(), vertices.end(), [&index_info, &counter](const auto &vertex) {
|
|
||||||
counter[index_info][*vertex.GetProperty(storage::View::OLD, index_info.second)]++;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
results.reserve(counter.size());
|
auto erase_not_specified_label_property_indices = [&labels, execution_db_accessor](auto &index_info) {
|
||||||
std::for_each(counter.begin(), counter.end(), [&results, execution_db_accessor](const auto &counter_entry) {
|
if (labels[0] == kAsterisk) {
|
||||||
const auto &[label_property, values_map] = counter_entry;
|
return;
|
||||||
std::vector<TypedValue> result;
|
}
|
||||||
result.reserve(kDeleteStatisticsNumResults);
|
|
||||||
// Extract info
|
for (auto it = index_info.cbegin(); it != index_info.cend();) {
|
||||||
int64_t count_property_value = std::accumulate(
|
if (std::find(labels.begin(), labels.end(), execution_db_accessor->LabelToName(it->first)) == labels.end()) {
|
||||||
values_map.begin(), values_map.end(), 0,
|
it = index_info.erase(it);
|
||||||
[](int64_t prev_value, const auto &prop_value_count) { return prev_value + prop_value_count.second; });
|
} else {
|
||||||
// num_distinc_values will never be 0
|
++it;
|
||||||
double avg_group_size = static_cast<double>(count_property_value) / static_cast<double>(values_map.size());
|
}
|
||||||
double chi_squared_stat = std::accumulate(
|
}
|
||||||
values_map.begin(), values_map.end(), 0.0, [avg_group_size](double prev_result, const auto &value_entry) {
|
};
|
||||||
return prev_result + utils::ChiSquaredValue(value_entry.second, avg_group_size);
|
|
||||||
|
auto populate_label_stats = [execution_db_accessor, view](auto index_info) {
|
||||||
|
std::vector<std::pair<storage::LabelId, storage::LabelIndexStats>> label_stats;
|
||||||
|
label_stats.reserve(index_info.size());
|
||||||
|
std::for_each(index_info.begin(), index_info.end(),
|
||||||
|
[execution_db_accessor, view, &label_stats](const storage::LabelId &label_id) {
|
||||||
|
auto vertices = execution_db_accessor->Vertices(view, label_id);
|
||||||
|
uint64_t no_vertices{0};
|
||||||
|
uint64_t total_degree{0};
|
||||||
|
std::for_each(vertices.begin(), vertices.end(),
|
||||||
|
[&total_degree, &no_vertices, &view](const auto &vertex) {
|
||||||
|
no_vertices++;
|
||||||
|
total_degree += *vertex.OutDegree(view) + *vertex.InDegree(view);
|
||||||
|
});
|
||||||
|
|
||||||
|
auto average_degree =
|
||||||
|
no_vertices > 0 ? static_cast<double>(total_degree) / static_cast<double>(no_vertices) : 0;
|
||||||
|
auto index_stats = storage::LabelIndexStats{.count = no_vertices, .avg_degree = average_degree};
|
||||||
|
execution_db_accessor->SetIndexStats(label_id, index_stats);
|
||||||
|
label_stats.emplace_back(label_id, index_stats);
|
||||||
|
});
|
||||||
|
|
||||||
|
return label_stats;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto populate_label_property_stats = [execution_db_accessor, view](auto &index_info) {
|
||||||
|
std::map<LPIndex, std::map<storage::PropertyValue, int64_t>> label_property_counter;
|
||||||
|
std::map<LPIndex, uint64_t> vertex_degree_counter;
|
||||||
|
// Iterate over all label property indexed vertices
|
||||||
|
std::for_each(
|
||||||
|
index_info.begin(), index_info.end(),
|
||||||
|
[execution_db_accessor, &label_property_counter, &vertex_degree_counter, view](const LPIndex &index_info) {
|
||||||
|
auto vertices = execution_db_accessor->Vertices(view, index_info.first, index_info.second);
|
||||||
|
std::for_each(vertices.begin(), vertices.end(),
|
||||||
|
[&index_info, &label_property_counter, &vertex_degree_counter, &view](const auto &vertex) {
|
||||||
|
label_property_counter[index_info][*vertex.GetProperty(view, index_info.second)]++;
|
||||||
|
vertex_degree_counter[index_info] += *vertex.OutDegree(view) + *vertex.InDegree(view);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
execution_db_accessor->SetIndexStats(
|
|
||||||
label_property.first, label_property.second,
|
std::vector<std::pair<LPIndex, storage::LabelPropertyIndexStats>> label_property_stats;
|
||||||
storage::IndexStats{.statistic = chi_squared_stat, .avg_group_size = avg_group_size});
|
label_property_stats.reserve(label_property_counter.size());
|
||||||
// Save result
|
std::for_each(
|
||||||
result.emplace_back(execution_db_accessor->LabelToName(label_property.first));
|
label_property_counter.begin(), label_property_counter.end(),
|
||||||
result.emplace_back(execution_db_accessor->PropertyToName(label_property.second));
|
[execution_db_accessor, &vertex_degree_counter, &label_property_stats](const auto &counter_entry) {
|
||||||
result.emplace_back(count_property_value);
|
const auto &[label_property, values_map] = counter_entry;
|
||||||
result.emplace_back(static_cast<int64_t>(values_map.size()));
|
// Extract info
|
||||||
result.emplace_back(avg_group_size);
|
uint64_t count_property_value = std::accumulate(
|
||||||
result.emplace_back(chi_squared_stat);
|
values_map.begin(), values_map.end(), 0,
|
||||||
|
[](uint64_t prev_value, const auto &prop_value_count) { return prev_value + prop_value_count.second; });
|
||||||
|
// num_distinc_values will never be 0
|
||||||
|
double avg_group_size = static_cast<double>(count_property_value) / static_cast<double>(values_map.size());
|
||||||
|
double chi_squared_stat = std::accumulate(
|
||||||
|
values_map.begin(), values_map.end(), 0.0, [avg_group_size](double prev_result, const auto &value_entry) {
|
||||||
|
return prev_result + utils::ChiSquaredValue(value_entry.second, avg_group_size);
|
||||||
|
});
|
||||||
|
|
||||||
|
double average_degree = count_property_value > 0
|
||||||
|
? static_cast<double>(vertex_degree_counter[label_property]) /
|
||||||
|
static_cast<double>(count_property_value)
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
auto index_stats =
|
||||||
|
storage::LabelPropertyIndexStats{.count = count_property_value,
|
||||||
|
.distinct_values_count = static_cast<uint64_t>(values_map.size()),
|
||||||
|
.statistic = chi_squared_stat,
|
||||||
|
.avg_group_size = avg_group_size,
|
||||||
|
.avg_degree = average_degree};
|
||||||
|
execution_db_accessor->SetIndexStats(label_property.first, label_property.second, index_stats);
|
||||||
|
label_property_stats.push_back(std::make_pair(label_property, index_stats));
|
||||||
|
});
|
||||||
|
|
||||||
|
return label_property_stats;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto index_info = execution_db_accessor->ListAllIndices();
|
||||||
|
|
||||||
|
std::vector<storage::LabelId> label_indices_info = index_info.label;
|
||||||
|
erase_not_specified_label_indices(label_indices_info);
|
||||||
|
auto label_stats = populate_label_stats(label_indices_info);
|
||||||
|
|
||||||
|
std::vector<LPIndex> label_property_indices_info = index_info.label_property;
|
||||||
|
erase_not_specified_label_property_indices(label_property_indices_info);
|
||||||
|
auto label_property_stats = populate_label_property_stats(label_property_indices_info);
|
||||||
|
|
||||||
|
std::vector<std::vector<TypedValue>> results;
|
||||||
|
results.reserve(label_stats.size() + label_property_stats.size());
|
||||||
|
|
||||||
|
std::for_each(label_stats.begin(), label_stats.end(), [execution_db_accessor, &results](const auto &stat_entry) {
|
||||||
|
std::vector<TypedValue> result;
|
||||||
|
result.reserve(kComputeStatisticsNumResults);
|
||||||
|
|
||||||
|
result.emplace_back(execution_db_accessor->LabelToName(stat_entry.first));
|
||||||
|
result.emplace_back(TypedValue());
|
||||||
|
result.emplace_back(static_cast<int64_t>(stat_entry.second.count));
|
||||||
|
result.emplace_back(TypedValue());
|
||||||
|
result.emplace_back(TypedValue());
|
||||||
|
result.emplace_back(TypedValue());
|
||||||
|
result.emplace_back(stat_entry.second.avg_degree);
|
||||||
results.push_back(std::move(result));
|
results.push_back(std::move(result));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
std::for_each(label_property_stats.begin(), label_property_stats.end(),
|
||||||
|
[execution_db_accessor, &results](const auto &stat_entry) {
|
||||||
|
std::vector<TypedValue> result;
|
||||||
|
result.reserve(kComputeStatisticsNumResults);
|
||||||
|
|
||||||
|
result.emplace_back(execution_db_accessor->LabelToName(stat_entry.first.first));
|
||||||
|
result.emplace_back(execution_db_accessor->PropertyToName(stat_entry.first.second));
|
||||||
|
result.emplace_back(static_cast<int64_t>(stat_entry.second.count));
|
||||||
|
result.emplace_back(static_cast<int64_t>(stat_entry.second.distinct_values_count));
|
||||||
|
result.emplace_back(stat_entry.second.avg_group_size);
|
||||||
|
result.emplace_back(stat_entry.second.statistic);
|
||||||
|
result.emplace_back(stat_entry.second.avg_degree);
|
||||||
|
results.push_back(std::move(result));
|
||||||
|
});
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<TypedValue>> AnalyzeGraphQueryHandler::AnalyzeGraphDeleteStatistics(
|
std::vector<std::vector<TypedValue>> AnalyzeGraphQueryHandler::AnalyzeGraphDeleteStatistics(
|
||||||
const std::span<std::string> labels, DbAccessor *execution_db_accessor) {
|
const std::span<std::string> labels, DbAccessor *execution_db_accessor) {
|
||||||
std::vector<std::pair<storage::LabelId, storage::PropertyId>> loc_results;
|
std::vector<std::pair<storage::LabelId, storage::PropertyId>> label_prop_results;
|
||||||
|
std::vector<storage::LabelId> label_results;
|
||||||
if (labels[0] == kAsterisk) {
|
if (labels[0] == kAsterisk) {
|
||||||
loc_results = execution_db_accessor->ClearIndexStats();
|
label_prop_results = execution_db_accessor->ClearLabelPropertyIndexStats();
|
||||||
|
label_results = execution_db_accessor->ClearLabelIndexStats();
|
||||||
} else {
|
} else {
|
||||||
loc_results = execution_db_accessor->DeleteIndexStatsForLabels(labels);
|
label_prop_results = execution_db_accessor->DeleteLabelPropertyIndexStats(labels);
|
||||||
|
label_results = execution_db_accessor->DeleteLabelIndexStats(labels);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<TypedValue>> results;
|
std::vector<std::vector<TypedValue>> results;
|
||||||
std::transform(loc_results.begin(), loc_results.end(), std::back_inserter(results),
|
results.reserve(label_prop_results.size() + label_results.size());
|
||||||
|
std::transform(label_prop_results.begin(), label_prop_results.end(), std::back_inserter(results),
|
||||||
[execution_db_accessor](const auto &label_property_index) {
|
[execution_db_accessor](const auto &label_property_index) {
|
||||||
return std::vector<TypedValue>{
|
return std::vector<TypedValue>{
|
||||||
TypedValue(execution_db_accessor->LabelToName(label_property_index.first)),
|
TypedValue(execution_db_accessor->LabelToName(label_property_index.first)),
|
||||||
TypedValue(execution_db_accessor->PropertyToName(label_property_index.second))};
|
TypedValue(execution_db_accessor->PropertyToName(label_property_index.second))};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
std::transform(
|
||||||
|
label_results.begin(), label_results.end(), std::back_inserter(results),
|
||||||
|
[execution_db_accessor](const auto &label_index) {
|
||||||
|
return std::vector<TypedValue>{TypedValue(execution_db_accessor->LabelToName(label_index)), TypedValue("")};
|
||||||
|
});
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1621,7 +1728,8 @@ Callback HandleAnalyzeGraphQuery(AnalyzeGraphQuery *analyze_graph_query, DbAcces
|
|||||||
switch (analyze_graph_query->action_) {
|
switch (analyze_graph_query->action_) {
|
||||||
case AnalyzeGraphQuery::Action::ANALYZE: {
|
case AnalyzeGraphQuery::Action::ANALYZE: {
|
||||||
callback.header = {"label", "property", "num estimation nodes",
|
callback.header = {"label", "property", "num estimation nodes",
|
||||||
"num groups", "avg group size", "chi-squared value"};
|
"num groups", "avg group size", "chi-squared value",
|
||||||
|
"avg degree"};
|
||||||
callback.fn = [handler = AnalyzeGraphQueryHandler(), labels = analyze_graph_query->labels_,
|
callback.fn = [handler = AnalyzeGraphQueryHandler(), labels = analyze_graph_query->labels_,
|
||||||
execution_db_accessor]() mutable {
|
execution_db_accessor]() mutable {
|
||||||
return handler.AnalyzeGraphCreateStatistics(labels, execution_db_accessor);
|
return handler.AnalyzeGraphCreateStatistics(labels, execution_db_accessor);
|
||||||
|
@ -15,9 +15,29 @@
|
|||||||
#include "query/parameters.hpp"
|
#include "query/parameters.hpp"
|
||||||
#include "query/plan/operator.hpp"
|
#include "query/plan/operator.hpp"
|
||||||
#include "query/typed_value.hpp"
|
#include "query/typed_value.hpp"
|
||||||
|
#include "utils/algorithm.hpp"
|
||||||
|
#include "utils/math.hpp"
|
||||||
|
|
||||||
namespace memgraph::query::plan {
|
namespace memgraph::query::plan {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The symbol statistics specify essential DB statistics which
|
||||||
|
* help the query planner (namely here the cost estimator), to decide
|
||||||
|
* how to do expands and other types of Cypher manipulations.
|
||||||
|
*/
|
||||||
|
struct SymbolStatistics {
|
||||||
|
uint64_t count;
|
||||||
|
double degree;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scope of the statistics for every scanned symbol in
|
||||||
|
* the operator tree.
|
||||||
|
*/
|
||||||
|
struct Scope {
|
||||||
|
std::unordered_map<std::string, SymbolStatistics> symbol_stats;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Query plan execution time cost estimator, for comparing and choosing optimal
|
* Query plan execution time cost estimator, for comparing and choosing optimal
|
||||||
* execution plans.
|
* execution plans.
|
||||||
@ -81,8 +101,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
||||||
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
||||||
|
|
||||||
CostEstimator(TDbAccessor *db_accessor, const Parameters ¶meters)
|
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters)
|
||||||
: db_accessor_(db_accessor), parameters(parameters) {}
|
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{Scope()} {}
|
||||||
|
|
||||||
|
CostEstimator(TDbAccessor *db_accessor, const SymbolTable &table, const Parameters ¶meters, Scope scope)
|
||||||
|
: db_accessor_(db_accessor), table_(table), parameters(parameters), scopes_{scope} {}
|
||||||
|
|
||||||
bool PostVisit(ScanAll &) override {
|
bool PostVisit(ScanAll &) override {
|
||||||
cardinality_ *= db_accessor_->VerticesCount();
|
cardinality_ *= db_accessor_->VerticesCount();
|
||||||
@ -92,6 +115,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool PostVisit(ScanAllByLabel &scan_all_by_label) override {
|
bool PostVisit(ScanAllByLabel &scan_all_by_label) override {
|
||||||
|
auto index_stats = db_accessor_->GetIndexStats(scan_all_by_label.label_);
|
||||||
|
if (index_stats.has_value()) {
|
||||||
|
SaveStatsFor(scan_all_by_label.output_symbol_, index_stats.value());
|
||||||
|
}
|
||||||
|
|
||||||
cardinality_ *= db_accessor_->VerticesCount(scan_all_by_label.label_);
|
cardinality_ *= db_accessor_->VerticesCount(scan_all_by_label.label_);
|
||||||
// ScanAll performs some work for every element that is produced
|
// ScanAll performs some work for every element that is produced
|
||||||
IncrementCost(CostParam::kScanAllByLabel);
|
IncrementCost(CostParam::kScanAllByLabel);
|
||||||
@ -102,6 +130,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
// This cardinality estimation depends on the property value (expression).
|
// This cardinality estimation depends on the property value (expression).
|
||||||
// If it's a constant, we can evaluate cardinality exactly, otherwise
|
// If it's a constant, we can evaluate cardinality exactly, otherwise
|
||||||
// we estimate
|
// we estimate
|
||||||
|
auto index_stats = db_accessor_->GetIndexStats(logical_op.label_, logical_op.property_);
|
||||||
|
if (index_stats.has_value()) {
|
||||||
|
SaveStatsFor(logical_op.output_symbol_, index_stats.value());
|
||||||
|
}
|
||||||
|
|
||||||
auto property_value = ConstPropertyValue(logical_op.expression_);
|
auto property_value = ConstPropertyValue(logical_op.expression_);
|
||||||
double factor = 1.0;
|
double factor = 1.0;
|
||||||
if (property_value)
|
if (property_value)
|
||||||
@ -119,6 +152,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool PostVisit(ScanAllByLabelPropertyRange &logical_op) override {
|
bool PostVisit(ScanAllByLabelPropertyRange &logical_op) override {
|
||||||
|
auto index_stats = db_accessor_->GetIndexStats(logical_op.label_, logical_op.property_);
|
||||||
|
if (index_stats.has_value()) {
|
||||||
|
SaveStatsFor(logical_op.output_symbol_, index_stats.value());
|
||||||
|
}
|
||||||
|
|
||||||
// this cardinality estimation depends on Bound expressions.
|
// this cardinality estimation depends on Bound expressions.
|
||||||
// if they are literals we can evaluate cardinality properly
|
// if they are literals we can evaluate cardinality properly
|
||||||
auto lower = BoundToPropertyValue(logical_op.lower_bound_);
|
auto lower = BoundToPropertyValue(logical_op.lower_bound_);
|
||||||
@ -144,6 +182,11 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool PostVisit(ScanAllByLabelProperty &logical_op) override {
|
bool PostVisit(ScanAllByLabelProperty &logical_op) override {
|
||||||
|
auto index_stats = db_accessor_->GetIndexStats(logical_op.label_, logical_op.property_);
|
||||||
|
if (index_stats.has_value()) {
|
||||||
|
SaveStatsFor(logical_op.output_symbol_, index_stats.value());
|
||||||
|
}
|
||||||
|
|
||||||
const auto factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_);
|
const auto factor = db_accessor_->VerticesCount(logical_op.label_, logical_op.property_);
|
||||||
cardinality_ *= factor;
|
cardinality_ *= factor;
|
||||||
IncrementCost(CostParam::MakeScanAllByLabelProperty);
|
IncrementCost(CostParam::MakeScanAllByLabelProperty);
|
||||||
@ -152,6 +195,20 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
|
|
||||||
// TODO: Cost estimate ScanAllById?
|
// TODO: Cost estimate ScanAllById?
|
||||||
|
|
||||||
|
bool PostVisit(Expand &expand) override {
|
||||||
|
auto card_param = CardParam::kExpand;
|
||||||
|
auto stats = GetStatsFor(expand.input_symbol_);
|
||||||
|
|
||||||
|
if (stats.has_value()) {
|
||||||
|
card_param = stats.value().degree;
|
||||||
|
}
|
||||||
|
|
||||||
|
cardinality_ *= card_param;
|
||||||
|
IncrementCost(CostParam::kExpand);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// For the given op first increments the cardinality and then cost.
|
// For the given op first increments the cardinality and then cost.
|
||||||
#define POST_VISIT_CARD_FIRST(NAME) \
|
#define POST_VISIT_CARD_FIRST(NAME) \
|
||||||
bool PostVisit(NAME &) override { \
|
bool PostVisit(NAME &) override { \
|
||||||
@ -160,7 +217,6 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
return true; \
|
return true; \
|
||||||
}
|
}
|
||||||
|
|
||||||
POST_VISIT_CARD_FIRST(Expand);
|
|
||||||
POST_VISIT_CARD_FIRST(ExpandVariable);
|
POST_VISIT_CARD_FIRST(ExpandVariable);
|
||||||
|
|
||||||
#undef POST_VISIT_CARD_FIRST
|
#undef POST_VISIT_CARD_FIRST
|
||||||
@ -225,20 +281,42 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool PostVisit(Produce &op) override {
|
||||||
|
auto scope = Scope();
|
||||||
|
|
||||||
|
// translate all the stats to the scope outside the return
|
||||||
|
for (const auto &symbol : op.ModifiedSymbols(table_)) {
|
||||||
|
auto stats = GetStatsFor(symbol);
|
||||||
|
if (stats.has_value()) {
|
||||||
|
scope.symbol_stats[symbol.name()] =
|
||||||
|
SymbolStatistics{.count = stats.value().count, .degree = stats.value().degree};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
scopes_.push_back(std::move(scope));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool PreVisit(Apply &op) override {
|
bool PreVisit(Apply &op) override {
|
||||||
double input_cost = EstimateCostOnBranch(&op.input_);
|
// Get the cost of the main branch
|
||||||
double subquery_cost = EstimateCostOnBranch(&op.subquery_);
|
op.input_->Accept(*this);
|
||||||
|
|
||||||
// if the query is a unit subquery, we don't want the cost to be zero but 1xN
|
// Estimate cost on the subquery branch independently, use a copy
|
||||||
input_cost = input_cost == 0 ? 1 : input_cost;
|
auto &last_scope = scopes_.back();
|
||||||
subquery_cost = subquery_cost == 0 ? 1 : subquery_cost;
|
double subquery_cost = EstimateCostOnBranch(&op.subquery_, last_scope);
|
||||||
|
subquery_cost = !utils::ApproxEqualDecimal(subquery_cost, 0.0) ? subquery_cost : 1;
|
||||||
|
cardinality_ *= subquery_cost;
|
||||||
|
|
||||||
cardinality_ *= input_cost * subquery_cost;
|
|
||||||
IncrementCost(CostParam::kSubquery);
|
IncrementCost(CostParam::kSubquery);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool PostVisit(EmptyResult & /*op*/) override {
|
||||||
|
scopes_.emplace_back();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Visit(Once &) override { return true; }
|
bool Visit(Once &) override { return true; }
|
||||||
|
|
||||||
auto cost() const { return cost_; }
|
auto cost() const { return cost_; }
|
||||||
@ -255,12 +333,20 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
|
|
||||||
// accessor used for cardinality estimates in ScanAll and ScanAllByLabel
|
// accessor used for cardinality estimates in ScanAll and ScanAllByLabel
|
||||||
TDbAccessor *db_accessor_;
|
TDbAccessor *db_accessor_;
|
||||||
|
const SymbolTable &table_;
|
||||||
const Parameters ¶meters;
|
const Parameters ¶meters;
|
||||||
|
std::vector<Scope> scopes_;
|
||||||
|
|
||||||
void IncrementCost(double param) { cost_ += param * cardinality_; }
|
void IncrementCost(double param) { cost_ += param * cardinality_; }
|
||||||
|
|
||||||
double EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch) {
|
double EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch) {
|
||||||
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, parameters);
|
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters);
|
||||||
|
(*branch)->Accept(cost_estimator);
|
||||||
|
return cost_estimator.cost();
|
||||||
|
}
|
||||||
|
|
||||||
|
double EstimateCostOnBranch(std::shared_ptr<LogicalOperator> *branch, Scope scope) {
|
||||||
|
CostEstimator<TDbAccessor> cost_estimator(db_accessor_, table_, parameters, scope);
|
||||||
(*branch)->Accept(cost_estimator);
|
(*branch)->Accept(cost_estimator);
|
||||||
return cost_estimator.cost();
|
return cost_estimator.cost();
|
||||||
}
|
}
|
||||||
@ -287,12 +373,32 @@ class CostEstimator : public HierarchicalLogicalOperatorVisitor {
|
|||||||
}
|
}
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasStatsFor(const Symbol &symbol) const { return utils::Contains(scopes_.back().symbol_stats, symbol.name()); }
|
||||||
|
|
||||||
|
std::optional<SymbolStatistics> GetStatsFor(const Symbol &symbol) {
|
||||||
|
if (!HasStatsFor(symbol)) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &scope = scopes_.back();
|
||||||
|
return scope.symbol_stats[symbol.name()];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void SaveStatsFor(const Symbol &symbol, T index_stats) {
|
||||||
|
scopes_.back().symbol_stats[symbol.name()] = SymbolStatistics{
|
||||||
|
.count = index_stats.count,
|
||||||
|
.degree = index_stats.avg_degree,
|
||||||
|
};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Returns the estimated cost of the given plan. */
|
/** Returns the estimated cost of the given plan. */
|
||||||
template <class TDbAccessor>
|
template <class TDbAccessor>
|
||||||
double EstimatePlanCost(TDbAccessor *db, const Parameters ¶meters, LogicalOperator &plan) {
|
double EstimatePlanCost(TDbAccessor *db, const SymbolTable &table, const Parameters ¶meters,
|
||||||
CostEstimator<TDbAccessor> estimator(db, parameters);
|
LogicalOperator &plan) {
|
||||||
|
CostEstimator<TDbAccessor> estimator(db, table, parameters);
|
||||||
plan.Accept(estimator);
|
plan.Accept(estimator);
|
||||||
return estimator.cost();
|
return estimator.cost();
|
||||||
}
|
}
|
||||||
|
@ -47,8 +47,9 @@ class PostProcessor final {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class TVertexCounts>
|
template <class TVertexCounts>
|
||||||
double EstimatePlanCost(const std::unique_ptr<LogicalOperator> &plan, TVertexCounts *vertex_counts) {
|
double EstimatePlanCost(const std::unique_ptr<LogicalOperator> &plan, TVertexCounts *vertex_counts,
|
||||||
return query::plan::EstimatePlanCost(vertex_counts, parameters_, *plan);
|
const SymbolTable &table) {
|
||||||
|
return query::plan::EstimatePlanCost(vertex_counts, table, parameters_, *plan);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -97,7 +98,7 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process,
|
|||||||
// Plans are generated lazily and the current plan will disappear, so
|
// Plans are generated lazily and the current plan will disappear, so
|
||||||
// it's ok to move it.
|
// it's ok to move it.
|
||||||
auto rewritten_plan = post_process->Rewrite(std::move(plan), context);
|
auto rewritten_plan = post_process->Rewrite(std::move(plan), context);
|
||||||
double cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts);
|
double cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table);
|
||||||
if (!curr_plan || cost < total_cost) {
|
if (!curr_plan || cost < total_cost) {
|
||||||
curr_plan.emplace(std::move(rewritten_plan));
|
curr_plan.emplace(std::move(rewritten_plan));
|
||||||
total_cost = cost;
|
total_cost = cost;
|
||||||
@ -106,7 +107,7 @@ auto MakeLogicalPlan(TPlanningContext *context, TPlanPostProcess *post_process,
|
|||||||
} else {
|
} else {
|
||||||
auto plan = MakeLogicalPlanForSingleQuery<RuleBasedPlanner>(query_parts, context);
|
auto plan = MakeLogicalPlanForSingleQuery<RuleBasedPlanner>(query_parts, context);
|
||||||
auto rewritten_plan = post_process->Rewrite(std::move(plan), context);
|
auto rewritten_plan = post_process->Rewrite(std::move(plan), context);
|
||||||
total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts);
|
total_cost = post_process->EstimatePlanCost(rewritten_plan, &vertex_counts, *context->symbol_table);
|
||||||
curr_plan.emplace(std::move(rewritten_plan));
|
curr_plan.emplace(std::move(rewritten_plan));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -505,7 +505,7 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
// FilterInfo with PropertyFilter.
|
// FilterInfo with PropertyFilter.
|
||||||
FilterInfo filter;
|
FilterInfo filter;
|
||||||
int64_t vertex_count;
|
int64_t vertex_count;
|
||||||
std::optional<storage::IndexStats> index_stats;
|
std::optional<storage::LabelPropertyIndexStats> index_stats;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool DefaultPreVisit() override { throw utils::NotYetImplemented("optimizing index lookup"); }
|
bool DefaultPreVisit() override { throw utils::NotYetImplemented("optimizing index lookup"); }
|
||||||
@ -572,8 +572,8 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
* @param vertex_count: New index's number of vertices.
|
* @param vertex_count: New index's number of vertices.
|
||||||
* @return -1 if the new index is better, 0 if they are equal and 1 if the existing one is better.
|
* @return -1 if the new index is better, 0 if they are equal and 1 if the existing one is better.
|
||||||
*/
|
*/
|
||||||
auto compare_indices = [](std::optional<LabelPropertyIndex> &found, std::optional<storage::IndexStats> &new_stats,
|
auto compare_indices = [](std::optional<LabelPropertyIndex> &found,
|
||||||
int vertex_count) {
|
std::optional<storage::LabelPropertyIndexStats> &new_stats, int vertex_count) {
|
||||||
if (!new_stats.has_value()) {
|
if (!new_stats.has_value()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -610,7 +610,8 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
|
|||||||
};
|
};
|
||||||
|
|
||||||
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
|
int64_t vertex_count = db_->VerticesCount(GetLabel(label), GetProperty(property));
|
||||||
std::optional<storage::IndexStats> new_stats = db_->GetIndexStats(GetLabel(label), GetProperty(property));
|
std::optional<storage::LabelPropertyIndexStats> new_stats =
|
||||||
|
db_->GetIndexStats(GetLabel(label), GetProperty(property));
|
||||||
|
|
||||||
// Conditions, from more to less important:
|
// Conditions, from more to less important:
|
||||||
// the index with 10x less vertices is better.
|
// the index with 10x less vertices is better.
|
||||||
|
@ -78,8 +78,12 @@ class VertexCountCache {
|
|||||||
return db_->LabelPropertyIndexExists(label, property);
|
return db_->LabelPropertyIndexExists(label, property);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<storage::IndexStats> GetIndexStats(const storage::LabelId &label,
|
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const {
|
||||||
const storage::PropertyId &property) const {
|
return db_->GetIndexStats(label);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<storage::LabelPropertyIndexStats> GetIndexStats(const storage::LabelId &label,
|
||||||
|
const storage::PropertyId &property) const {
|
||||||
return db_->GetIndexStats(label, property);
|
return db_->GetIndexStats(label, property);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -478,6 +478,40 @@ void LabelIndex::RunGC() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LabelIndex::SetIndexStats(const storage::LabelId &label, const storage::LabelIndexStats &stats) {
|
||||||
|
stats_[label] = stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<LabelIndexStats> LabelIndex::GetIndexStats(const storage::LabelId &label) const {
|
||||||
|
if (auto it = stats_.find(label); it != stats_.end()) {
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<LabelId> LabelIndex::ClearIndexStats() {
|
||||||
|
std::vector<LabelId> deleted_indexes;
|
||||||
|
deleted_indexes.reserve(stats_.size());
|
||||||
|
std::transform(stats_.begin(), stats_.end(), std::back_inserter(deleted_indexes),
|
||||||
|
[](const auto &elem) { return elem.first; });
|
||||||
|
stats_.clear();
|
||||||
|
return deleted_indexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<LabelId> LabelIndex::DeleteIndexStats(const storage::LabelId &label) {
|
||||||
|
std::vector<LabelId> deleted_indexes;
|
||||||
|
for (auto it = stats_.cbegin(); it != stats_.cend();) {
|
||||||
|
if (it->first == label) {
|
||||||
|
deleted_indexes.push_back(it->first);
|
||||||
|
it = stats_.erase(it);
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return deleted_indexes;
|
||||||
|
}
|
||||||
|
|
||||||
bool LabelPropertyIndex::Entry::operator<(const Entry &rhs) {
|
bool LabelPropertyIndex::Entry::operator<(const Entry &rhs) {
|
||||||
if (value < rhs.value) {
|
if (value < rhs.value) {
|
||||||
return true;
|
return true;
|
||||||
@ -814,8 +848,7 @@ int64_t LabelPropertyIndex::ApproximateVertexCount(LabelId label, PropertyId pro
|
|||||||
/*
|
/*
|
||||||
Iterate over all property-label pairs and deletes if label from the index is equal to label parameter.
|
Iterate over all property-label pairs and deletes if label from the index is equal to label parameter.
|
||||||
*/
|
*/
|
||||||
std::vector<std::pair<LabelId, PropertyId>> LabelPropertyIndex::DeleteIndexStatsForLabel(
|
std::vector<std::pair<LabelId, PropertyId>> LabelPropertyIndex::DeleteIndexStats(const storage::LabelId &label) {
|
||||||
const storage::LabelId &label) {
|
|
||||||
std::vector<std::pair<LabelId, PropertyId>> deleted_indexes;
|
std::vector<std::pair<LabelId, PropertyId>> deleted_indexes;
|
||||||
for (auto it = stats_.cbegin(); it != stats_.cend();) {
|
for (auto it = stats_.cbegin(); it != stats_.cend();) {
|
||||||
if (it->first.first == label) {
|
if (it->first.first == label) {
|
||||||
@ -837,14 +870,14 @@ std::vector<std::pair<LabelId, PropertyId>> LabelPropertyIndex::ClearIndexStats(
|
|||||||
return deleted_indexes;
|
return deleted_indexes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LabelPropertyIndex::SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property,
|
void LabelPropertyIndex::SetIndexStats(const std::pair<storage::LabelId, storage::PropertyId> &key,
|
||||||
const IndexStats &stats) {
|
const storage::LabelPropertyIndexStats &stats) {
|
||||||
stats_[{label, property}] = stats;
|
stats_[key] = stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<IndexStats> LabelPropertyIndex::GetIndexStats(const storage::LabelId &label,
|
std::optional<storage::LabelPropertyIndexStats> LabelPropertyIndex::GetIndexStats(
|
||||||
const storage::PropertyId &property) const {
|
const std::pair<storage::LabelId, storage::PropertyId> &key) const {
|
||||||
if (auto it = stats_.find({label, property}); it != stats_.end()) {
|
if (auto it = stats_.find(key); it != stats_.end()) {
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
|
@ -31,6 +31,11 @@ struct Constraints;
|
|||||||
using ParalellizedIndexCreationInfo =
|
using ParalellizedIndexCreationInfo =
|
||||||
std::pair<std::vector<std::pair<Gid, uint64_t>> /*vertex_recovery_info*/, uint64_t /*thread_count*/>;
|
std::pair<std::vector<std::pair<Gid, uint64_t>> /*vertex_recovery_info*/, uint64_t /*thread_count*/>;
|
||||||
|
|
||||||
|
struct LabelIndexStats {
|
||||||
|
uint64_t count;
|
||||||
|
double avg_degree;
|
||||||
|
};
|
||||||
|
|
||||||
class LabelIndex {
|
class LabelIndex {
|
||||||
private:
|
private:
|
||||||
struct Entry {
|
struct Entry {
|
||||||
@ -124,19 +129,29 @@ class LabelIndex {
|
|||||||
return it->second.size();
|
return it->second.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetIndexStats(const storage::LabelId &label, const storage::LabelIndexStats &stats);
|
||||||
|
|
||||||
|
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const;
|
||||||
|
|
||||||
|
std::vector<LabelId> ClearIndexStats();
|
||||||
|
|
||||||
|
std::vector<LabelId> DeleteIndexStats(const storage::LabelId &label);
|
||||||
|
|
||||||
void Clear() { index_.clear(); }
|
void Clear() { index_.clear(); }
|
||||||
|
|
||||||
void RunGC();
|
void RunGC();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::map<LabelId, utils::SkipList<Entry>> index_;
|
std::map<LabelId, utils::SkipList<Entry>> index_;
|
||||||
|
std::map<LabelId, storage::LabelIndexStats> stats_;
|
||||||
Indices *indices_;
|
Indices *indices_;
|
||||||
Constraints *constraints_;
|
Constraints *constraints_;
|
||||||
Config::Items config_;
|
Config::Items config_;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct IndexStats {
|
struct LabelPropertyIndexStats {
|
||||||
double statistic, avg_group_size;
|
uint64_t count, distinct_values_count;
|
||||||
|
double statistic, avg_group_size, avg_degree;
|
||||||
};
|
};
|
||||||
|
|
||||||
class LabelPropertyIndex {
|
class LabelPropertyIndex {
|
||||||
@ -248,13 +263,13 @@ class LabelPropertyIndex {
|
|||||||
|
|
||||||
std::vector<std::pair<LabelId, PropertyId>> ClearIndexStats();
|
std::vector<std::pair<LabelId, PropertyId>> ClearIndexStats();
|
||||||
|
|
||||||
std::vector<std::pair<LabelId, PropertyId>> DeleteIndexStatsForLabel(const storage::LabelId &label);
|
std::vector<std::pair<LabelId, PropertyId>> DeleteIndexStats(const storage::LabelId &label);
|
||||||
|
|
||||||
void SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property,
|
void SetIndexStats(const std::pair<storage::LabelId, storage::PropertyId> &key,
|
||||||
const storage::IndexStats &stats);
|
const storage::LabelPropertyIndexStats &stats);
|
||||||
|
|
||||||
std::optional<storage::IndexStats> GetIndexStats(const storage::LabelId &label,
|
std::optional<storage::LabelPropertyIndexStats> GetIndexStats(
|
||||||
const storage::PropertyId &property) const;
|
const std::pair<storage::LabelId, storage::PropertyId> &key) const;
|
||||||
|
|
||||||
void Clear() { index_.clear(); }
|
void Clear() { index_.clear(); }
|
||||||
|
|
||||||
@ -262,7 +277,7 @@ class LabelPropertyIndex {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::map<std::pair<LabelId, PropertyId>, utils::SkipList<Entry>> index_;
|
std::map<std::pair<LabelId, PropertyId>, utils::SkipList<Entry>> index_;
|
||||||
std::map<std::pair<LabelId, PropertyId>, storage::IndexStats> stats_;
|
std::map<std::pair<LabelId, PropertyId>, storage::LabelPropertyIndexStats> stats_;
|
||||||
Indices *indices_;
|
Indices *indices_;
|
||||||
Constraints *constraints_;
|
Constraints *constraints_;
|
||||||
Config::Items config_;
|
Config::Items config_;
|
||||||
|
@ -267,28 +267,66 @@ class Storage final {
|
|||||||
return storage_->indices_.label_property_index.ApproximateVertexCount(label, property, lower, upper);
|
return storage_->indices_.label_property_index.ApproximateVertexCount(label, property, lower, upper);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<storage::IndexStats> GetIndexStats(const storage::LabelId &label,
|
template <typename TResult, typename TIndex, typename TIndexKey>
|
||||||
const storage::PropertyId &property) const {
|
std::optional<TResult> GetIndexStatsForIndex(TIndex &index, TIndexKey &&key) const {
|
||||||
return storage_->indices_.label_property_index.GetIndexStats(label, property);
|
return index.GetIndexStats(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<LabelId, PropertyId>> ClearIndexStats() {
|
std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const {
|
||||||
return storage_->indices_.label_property_index.ClearIndexStats();
|
return GetIndexStatsForIndex<storage::LabelIndexStats>(storage_->indices_.label_index, label);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<LabelId, PropertyId>> DeleteIndexStatsForLabels(const std::span<std::string> labels) {
|
std::optional<storage::LabelPropertyIndexStats> GetIndexStats(const storage::LabelId &label,
|
||||||
std::vector<std::pair<LabelId, PropertyId>> deleted_indexes;
|
const storage::PropertyId &property) const {
|
||||||
std::for_each(labels.begin(), labels.end(), [this, &deleted_indexes](const auto &label_str) {
|
return GetIndexStatsForIndex<storage::LabelPropertyIndexStats>(storage_->indices_.label_property_index,
|
||||||
std::vector<std::pair<LabelId, PropertyId>> loc_results =
|
std::make_pair(label, property));
|
||||||
storage_->indices_.label_property_index.DeleteIndexStatsForLabel(NameToLabel(label_str));
|
}
|
||||||
|
|
||||||
|
template <typename TIndex, typename TIndexKey, typename TIndexStats>
|
||||||
|
void SetIndexStatsForIndex(TIndex &index, TIndexKey &&key, TIndexStats &stats) const {
|
||||||
|
index.SetIndexStats(key, stats);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetIndexStats(const storage::LabelId &label, const LabelIndexStats &stats) {
|
||||||
|
SetIndexStatsForIndex(storage_->indices_.label_index, label, stats);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property,
|
||||||
|
const LabelPropertyIndexStats &stats) {
|
||||||
|
SetIndexStatsForIndex(storage_->indices_.label_property_index, std::make_pair(label, property), stats);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TResult, typename TIndex>
|
||||||
|
std::vector<TResult> ClearIndexStatsForIndex(TIndex &index) const {
|
||||||
|
return index.ClearIndexStats();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<LabelId, PropertyId>> ClearLabelPropertyIndexStats() {
|
||||||
|
return ClearIndexStatsForIndex<std::pair<LabelId, PropertyId>>(storage_->indices_.label_property_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<LabelId> ClearLabelIndexStats() {
|
||||||
|
return ClearIndexStatsForIndex<LabelId>(storage_->indices_.label_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TResult, typename TIndex>
|
||||||
|
std::vector<TResult> DeleteIndexStatsForIndex(TIndex &index, const std::span<std::string> labels) {
|
||||||
|
std::vector<TResult> deleted_indexes;
|
||||||
|
|
||||||
|
for (const auto &label : labels) {
|
||||||
|
std::vector<TResult> loc_results = index.DeleteIndexStats(NameToLabel(label));
|
||||||
deleted_indexes.insert(deleted_indexes.end(), std::make_move_iterator(loc_results.begin()),
|
deleted_indexes.insert(deleted_indexes.end(), std::make_move_iterator(loc_results.begin()),
|
||||||
std::make_move_iterator(loc_results.end()));
|
std::make_move_iterator(loc_results.end()));
|
||||||
});
|
}
|
||||||
return deleted_indexes;
|
return deleted_indexes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetIndexStats(const storage::LabelId &label, const storage::PropertyId &property, const IndexStats &stats) {
|
std::vector<std::pair<LabelId, PropertyId>> DeleteLabelPropertyIndexStats(const std::span<std::string> labels) {
|
||||||
storage_->indices_.label_property_index.SetIndexStats(label, property, stats);
|
return DeleteIndexStatsForIndex<std::pair<LabelId, PropertyId>>(storage_->indices_.label_property_index, labels);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<LabelId> DeleteLabelIndexStats(const std::span<std::string> labels) {
|
||||||
|
return DeleteIndexStatsForIndex<LabelId>(storage_->indices_.label_index, labels);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @return Accessor to the deleted vertex if a deletion took place, std::nullopt otherwise
|
/// @return Accessor to the deleted vertex if a deletion took place, std::nullopt otherwise
|
||||||
|
@ -131,7 +131,7 @@ static void BM_PlanAndEstimateIndexedMatching(benchmark::State &state) {
|
|||||||
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
||||||
query_parts, &ctx);
|
query_parts, &ctx);
|
||||||
for (auto plan : plans) {
|
for (auto plan : plans) {
|
||||||
memgraph::query::plan::EstimatePlanCost(&dba, parameters, *plan);
|
memgraph::query::plan::EstimatePlanCost(&dba, symbol_table, parameters, *plan);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -161,7 +161,7 @@ static void BM_PlanAndEstimateIndexedMatchingWithCachedCounts(benchmark::State &
|
|||||||
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
auto plans = memgraph::query::plan::MakeLogicalPlanForSingleQuery<memgraph::query::plan::VariableStartPlanner>(
|
||||||
query_parts, &ctx);
|
query_parts, &ctx);
|
||||||
for (auto plan : plans) {
|
for (auto plan : plans) {
|
||||||
memgraph::query::plan::EstimatePlanCost(&vertex_counts, parameters, *plan);
|
memgraph::query::plan::EstimatePlanCost(&vertex_counts, symbol_table, parameters, *plan);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,7 @@ import typing
|
|||||||
|
|
||||||
import mgclient
|
import mgclient
|
||||||
import pytest
|
import pytest
|
||||||
|
from gqlalchemy import Memgraph
|
||||||
|
|
||||||
|
|
||||||
def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]:
|
def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = {}) -> typing.List[tuple]:
|
||||||
@ -27,3 +28,14 @@ def connect(**kwargs) -> mgclient.Connection:
|
|||||||
yield connection
|
yield connection
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n")
|
execute_and_fetch_all(cursor, "MATCH (n) DETACH DELETE n")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def memgraph(**kwargs) -> Memgraph:
|
||||||
|
memgraph = Memgraph()
|
||||||
|
|
||||||
|
yield memgraph
|
||||||
|
|
||||||
|
memgraph.drop_database()
|
||||||
|
memgraph.execute("analyze graph delete statistics;")
|
||||||
|
memgraph.drop_indexes()
|
||||||
|
@ -12,7 +12,10 @@
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from common import connect, execute_and_fetch_all
|
from common import connect, execute_and_fetch_all, memgraph
|
||||||
|
|
||||||
|
QUERY_PLAN = "QUERY PLAN"
|
||||||
|
|
||||||
|
|
||||||
# E2E tests for checking query semantic
|
# E2E tests for checking query semantic
|
||||||
# ------------------------------------
|
# ------------------------------------
|
||||||
@ -96,8 +99,8 @@ def test_analyze_full_graph(analyze_query, connect):
|
|||||||
else:
|
else:
|
||||||
first_index = 1
|
first_index = 1
|
||||||
# Check results
|
# Check results
|
||||||
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0)
|
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0, 0)
|
||||||
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 5, 10, 0)
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 5, 10, 0, 0)
|
||||||
# After analyzing graph, id1 index should be chosen because it has smaller average group size
|
# After analyzing graph, id1 index should be chosen because it has smaller average group size
|
||||||
expected_explain_after_analysis = [
|
expected_explain_after_analysis = [
|
||||||
(f" * Produce {{n}}",),
|
(f" * Produce {{n}}",),
|
||||||
@ -131,8 +134,8 @@ def test_cardinality_different_avg_group_size_uniform_dist(connect):
|
|||||||
else:
|
else:
|
||||||
first_index = 1
|
first_index = 1
|
||||||
# Check results
|
# Check results
|
||||||
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0)
|
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0, 0)
|
||||||
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0)
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0, 0)
|
||||||
expected_explain_after_analysis = [
|
expected_explain_after_analysis = [
|
||||||
(f" * Produce {{n}}",),
|
(f" * Produce {{n}}",),
|
||||||
(f" * Filter",),
|
(f" * Filter",),
|
||||||
@ -161,8 +164,8 @@ def test_cardinality_same_avg_group_size_uniform_dist_diff_vertex_count(connect)
|
|||||||
else:
|
else:
|
||||||
first_index = 1
|
first_index = 1
|
||||||
# Check results
|
# Check results
|
||||||
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0)
|
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 100, 1, 0, 0)
|
||||||
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0)
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0, 0)
|
||||||
expected_explain_after_analysis = [
|
expected_explain_after_analysis = [
|
||||||
(f" * Produce {{n}}",),
|
(f" * Produce {{n}}",),
|
||||||
(f" * Filter",),
|
(f" * Filter",),
|
||||||
@ -191,8 +194,8 @@ def test_large_diff_in_num_vertices_v1(connect):
|
|||||||
else:
|
else:
|
||||||
first_index = 1
|
first_index = 1
|
||||||
# Check results
|
# Check results
|
||||||
assert analyze_graph_results[first_index] == ("Label", "id1", 1000, 1000, 1, 0)
|
assert analyze_graph_results[first_index] == ("Label", "id1", 1000, 1000, 1, 0, 0)
|
||||||
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0)
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0, 0)
|
||||||
expected_explain_after_analysis = [
|
expected_explain_after_analysis = [
|
||||||
(f" * Produce {{n}}",),
|
(f" * Produce {{n}}",),
|
||||||
(f" * Filter",),
|
(f" * Filter",),
|
||||||
@ -221,8 +224,8 @@ def test_large_diff_in_num_vertices_v2(connect):
|
|||||||
else:
|
else:
|
||||||
first_index = 1
|
first_index = 1
|
||||||
# Check results
|
# Check results
|
||||||
assert analyze_graph_results[first_index] == ("Label", "id1", 99, 1, 99, 0)
|
assert analyze_graph_results[first_index] == ("Label", "id1", 99, 1, 99, 0, 0)
|
||||||
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0)
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0, 0)
|
||||||
expected_explain_after_analysis = [
|
expected_explain_after_analysis = [
|
||||||
(f" * Produce {{n}}",),
|
(f" * Produce {{n}}",),
|
||||||
(f" * Filter",),
|
(f" * Filter",),
|
||||||
@ -261,8 +264,8 @@ def test_same_avg_group_size_diff_distribution(connect):
|
|||||||
else:
|
else:
|
||||||
first_index = 1
|
first_index = 1
|
||||||
# Check results
|
# Check results
|
||||||
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 5, 20, 32.5)
|
assert analyze_graph_results[first_index] == ("Label", "id1", 100, 5, 20, 32.5, 0)
|
||||||
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0)
|
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0, 0)
|
||||||
expected_explain_after_analysis = [
|
expected_explain_after_analysis = [
|
||||||
(f" * Produce {{n}}",),
|
(f" * Produce {{n}}",),
|
||||||
(f" * Filter",),
|
(f" * Filter",),
|
||||||
@ -278,5 +281,194 @@ def test_same_avg_group_size_diff_distribution(connect):
|
|||||||
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
execute_and_fetch_all(cursor, "DROP INDEX ON :Label(id2);")
|
||||||
|
|
||||||
|
|
||||||
|
def test_given_supernode_when_expanding_then_expand_other_way_around(memgraph):
|
||||||
|
memgraph.execute("FOREACH (i in range(1, 1000) | CREATE (:Node {id: i}));")
|
||||||
|
memgraph.execute("CREATE (:SuperNode {id: 1});")
|
||||||
|
memgraph.execute("CREATE INDEX ON :SuperNode(id);")
|
||||||
|
memgraph.execute("CREATE INDEX ON :SuperNode;")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node(id);")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node;")
|
||||||
|
memgraph.execute("match (n:Node) match (s:SuperNode {id: 1}) merge (n)<-[:HAS_REL_TO]-(s);")
|
||||||
|
|
||||||
|
query = "explain match (n:Node) match (s:SuperNode {id: 1}) merge (n)<-[:HAS_REL_TO]-(s);"
|
||||||
|
expected_explain = [
|
||||||
|
f" * EmptyResult",
|
||||||
|
f" * Merge",
|
||||||
|
f" |\\ On Match",
|
||||||
|
f" | * Expand (s)-[anon3:HAS_REL_TO]->(n)",
|
||||||
|
f" | * Once",
|
||||||
|
f" |\\ On Create",
|
||||||
|
f" | * CreateExpand (n)<-[anon3:HAS_REL_TO]-(s)",
|
||||||
|
f" | * Once",
|
||||||
|
f" * ScanAllByLabel (n :Node)",
|
||||||
|
f" * ScanAllByLabelPropertyValue (s :SuperNode {{id}})",
|
||||||
|
f" * Once",
|
||||||
|
]
|
||||||
|
|
||||||
|
result_without_analysis = list(memgraph.execute_and_fetch(query))
|
||||||
|
result_without_analysis = [x[QUERY_PLAN] for x in result_without_analysis]
|
||||||
|
assert expected_explain == result_without_analysis
|
||||||
|
|
||||||
|
memgraph.execute("analyze graph;")
|
||||||
|
|
||||||
|
expected_explain = [
|
||||||
|
x.replace(f" | * Expand (s)-[anon3:HAS_REL_TO]->(n)", f" | * Expand (n)<-[anon3:HAS_REL_TO]-(s)")
|
||||||
|
for x in expected_explain
|
||||||
|
]
|
||||||
|
|
||||||
|
result_with_analysis = list(memgraph.execute_and_fetch(query))
|
||||||
|
result_with_analysis = [x[QUERY_PLAN] for x in result_with_analysis]
|
||||||
|
|
||||||
|
assert expected_explain == result_with_analysis
|
||||||
|
|
||||||
|
|
||||||
|
def test_given_supernode_when_subquery_then_carry_information_to_subquery(memgraph):
|
||||||
|
memgraph.execute("FOREACH (i in range(1, 1000) | CREATE (:Node {id: i}));")
|
||||||
|
memgraph.execute("FOREACH (i in range(1, 1000) | CREATE (:Node2 {id: i}));")
|
||||||
|
memgraph.execute("CREATE (:SuperNode {id: 1});")
|
||||||
|
memgraph.execute("CREATE INDEX ON :SuperNode(id);")
|
||||||
|
memgraph.execute("CREATE INDEX ON :SuperNode;")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node(id);")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node;")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node2(id);")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node2;")
|
||||||
|
|
||||||
|
memgraph.execute("match (n:Node) match (s:SuperNode {id: 1}) merge (n)<-[:HAS_REL_TO]-(s);")
|
||||||
|
memgraph.execute("match (n:Node2) match (s:SuperNode {id: 1}) merge (n)<-[:HAS_REL_TO]-(s);")
|
||||||
|
|
||||||
|
query = (
|
||||||
|
"explain match (n:Node) match (s:SuperNode {id: 1}) call { with n, s merge (n)<-[:HAS_REL_TO]-(s) } return 1"
|
||||||
|
)
|
||||||
|
expected_explain = [
|
||||||
|
f" * Produce {{0}}",
|
||||||
|
f" * Accumulate",
|
||||||
|
f" * Accumulate",
|
||||||
|
f" * Apply",
|
||||||
|
f" |\\ ",
|
||||||
|
f" | * EmptyResult",
|
||||||
|
f" | * Merge",
|
||||||
|
f" | |\\ On Match",
|
||||||
|
f" | | * Expand (s)-[anon3:HAS_REL_TO]->(n)",
|
||||||
|
f" | | * Once",
|
||||||
|
f" | |\\ On Create",
|
||||||
|
f" | | * CreateExpand (n)<-[anon3:HAS_REL_TO]-(s)",
|
||||||
|
f" | | * Once",
|
||||||
|
f" | * Produce {{n, s}}",
|
||||||
|
f" | * Once",
|
||||||
|
f" * ScanAllByLabel (n :Node)",
|
||||||
|
f" * ScanAllByLabelPropertyValue (s :SuperNode {{id}})",
|
||||||
|
f" * Once",
|
||||||
|
]
|
||||||
|
|
||||||
|
result_without_analysis = list(memgraph.execute_and_fetch(query))
|
||||||
|
result_without_analysis = [x[QUERY_PLAN] for x in result_without_analysis]
|
||||||
|
assert expected_explain == result_without_analysis
|
||||||
|
|
||||||
|
memgraph.execute("analyze graph;")
|
||||||
|
|
||||||
|
expected_explain = [
|
||||||
|
x.replace(f" | | * Expand (s)-[anon3:HAS_REL_TO]->(n)", f" | | * Expand (n)<-[anon3:HAS_REL_TO]-(s)")
|
||||||
|
for x in expected_explain
|
||||||
|
]
|
||||||
|
result_with_analysis = list(memgraph.execute_and_fetch(query))
|
||||||
|
result_with_analysis = [x[QUERY_PLAN] for x in result_with_analysis]
|
||||||
|
|
||||||
|
assert expected_explain == result_with_analysis
|
||||||
|
|
||||||
|
|
||||||
|
def test_given_supernode_when_subquery_and_union_then_carry_information(memgraph):
|
||||||
|
memgraph.execute("FOREACH (i in range(1, 1000) | CREATE (:Node {id: i}));")
|
||||||
|
memgraph.execute("FOREACH (i in range(1, 1000) | CREATE (:Node2 {id: i}));")
|
||||||
|
memgraph.execute("CREATE (:SuperNode {id: 1});")
|
||||||
|
memgraph.execute("CREATE INDEX ON :SuperNode(id);")
|
||||||
|
memgraph.execute("CREATE INDEX ON :SuperNode;")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node(id);")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node;")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node2(id);")
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node2;")
|
||||||
|
|
||||||
|
memgraph.execute("match (n:Node) match (s:SuperNode {id: 1}) merge (n)<-[:HAS_REL_TO]-(s);")
|
||||||
|
memgraph.execute("match (n:Node2) match (s:SuperNode {id: 1}) merge (n)<-[:HAS_REL_TO]-(s);")
|
||||||
|
|
||||||
|
query = "explain match (n:Node) match (s:SuperNode {id: 1}) call { with n, s merge (n)<-[:HAS_REL_TO]-(s) } return s union all match (n:Node) match (s:SuperNode {id: 1}) call { with n, s merge (n)<-[:HAS_REL_TO]-(s) } return s;"
|
||||||
|
expected_explain = [
|
||||||
|
f" * Union {{s : s}}",
|
||||||
|
f" |\\ ",
|
||||||
|
f" | * Produce {{s}}",
|
||||||
|
f" | * Accumulate",
|
||||||
|
f" | * Accumulate",
|
||||||
|
f" | * Apply",
|
||||||
|
f" | |\\ ",
|
||||||
|
f" | | * EmptyResult",
|
||||||
|
f" | | * Merge",
|
||||||
|
f" | | |\\ On Match",
|
||||||
|
f" | | | * Expand (s)-[anon7:HAS_REL_TO]->(n)",
|
||||||
|
f" | | | * Once",
|
||||||
|
f" | | |\\ On Create",
|
||||||
|
f" | | | * CreateExpand (n)<-[anon7:HAS_REL_TO]-(s)",
|
||||||
|
f" | | | * Once",
|
||||||
|
f" | | * Produce {{n, s}}",
|
||||||
|
f" | | * Once",
|
||||||
|
f" | * ScanAllByLabel (n :Node)",
|
||||||
|
f" | * ScanAllByLabelPropertyValue (s :SuperNode {{id}})",
|
||||||
|
f" | * Once",
|
||||||
|
f" * Produce {{s}}",
|
||||||
|
f" * Accumulate",
|
||||||
|
f" * Accumulate",
|
||||||
|
f" * Apply",
|
||||||
|
f" |\\ ",
|
||||||
|
f" | * EmptyResult",
|
||||||
|
f" | * Merge",
|
||||||
|
f" | |\\ On Match",
|
||||||
|
f" | | * Expand (s)-[anon3:HAS_REL_TO]->(n)",
|
||||||
|
f" | | * Once",
|
||||||
|
f" | |\\ On Create",
|
||||||
|
f" | | * CreateExpand (n)<-[anon3:HAS_REL_TO]-(s)",
|
||||||
|
f" | | * Once",
|
||||||
|
f" | * Produce {{n, s}}",
|
||||||
|
f" | * Once",
|
||||||
|
f" * ScanAllByLabel (n :Node)",
|
||||||
|
f" * ScanAllByLabelPropertyValue (s :SuperNode {{id}})",
|
||||||
|
f" * Once",
|
||||||
|
]
|
||||||
|
|
||||||
|
result_without_analysis = list(memgraph.execute_and_fetch(query))
|
||||||
|
result_without_analysis = [x[QUERY_PLAN] for x in result_without_analysis]
|
||||||
|
assert expected_explain == result_without_analysis
|
||||||
|
|
||||||
|
memgraph.execute("analyze graph;")
|
||||||
|
|
||||||
|
expected_explain = [
|
||||||
|
x.replace(f" | | * Expand (s)-[anon3:HAS_REL_TO]->(n)", f" | | * Expand (n)<-[anon3:HAS_REL_TO]-(s)")
|
||||||
|
for x in expected_explain
|
||||||
|
]
|
||||||
|
expected_explain = [
|
||||||
|
x.replace(f" | | | * Expand (s)-[anon7:HAS_REL_TO]->(n)", f" | | | * Expand (n)<-[anon7:HAS_REL_TO]-(s)")
|
||||||
|
for x in expected_explain
|
||||||
|
]
|
||||||
|
result_with_analysis = list(memgraph.execute_and_fetch(query))
|
||||||
|
result_with_analysis = [x[QUERY_PLAN] for x in result_with_analysis]
|
||||||
|
|
||||||
|
assert expected_explain == result_with_analysis
|
||||||
|
|
||||||
|
|
||||||
|
def test_given_empty_graph_when_analyzing_graph_return_zero_degree(memgraph):
|
||||||
|
memgraph.execute("CREATE INDEX ON :Node;")
|
||||||
|
|
||||||
|
label_stats = next(memgraph.execute_and_fetch("analyze graph;"))
|
||||||
|
|
||||||
|
expected_analysis = {
|
||||||
|
"label": "Node",
|
||||||
|
"property": None,
|
||||||
|
"num estimation nodes": 0,
|
||||||
|
"num groups": None,
|
||||||
|
"avg group size": None,
|
||||||
|
"chi-squared value": None,
|
||||||
|
"avg degree": 0.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
assert set(label_stats) == set(expected_analysis)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(pytest.main([__file__, "-rA"]))
|
sys.exit(pytest.main([__file__, "-rA"]))
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include "query/plan/planner.hpp"
|
#include "query/plan/planner.hpp"
|
||||||
#include "query/plan/pretty_print.hpp"
|
#include "query/plan/pretty_print.hpp"
|
||||||
#include "query/typed_value.hpp"
|
#include "query/typed_value.hpp"
|
||||||
|
#include "storage/v2/indices.hpp"
|
||||||
#include "storage/v2/property_value.hpp"
|
#include "storage/v2/property_value.hpp"
|
||||||
#include "utils/string.hpp"
|
#include "utils/string.hpp"
|
||||||
|
|
||||||
@ -213,8 +214,12 @@ class InteractiveDbAccessor {
|
|||||||
return label_property_index_.at(key);
|
return label_property_index_.at(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<memgraph::storage::IndexStats> GetIndexStats(memgraph::storage::LabelId label,
|
std::optional<memgraph::storage::LabelIndexStats> GetIndexStats(const memgraph::storage::LabelId label) const {
|
||||||
memgraph::storage::PropertyId property) const {
|
return dba_->GetIndexStats(label);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<memgraph::storage::LabelPropertyIndexStats> GetIndexStats(
|
||||||
|
const memgraph::storage::LabelId label, const memgraph::storage::PropertyId property) const {
|
||||||
return dba_->GetIndexStats(label, property);
|
return dba_->GetIndexStats(label, property);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -458,7 +463,7 @@ auto MakeLogicalPlans(memgraph::query::CypherQuery *query, memgraph::query::AstS
|
|||||||
memgraph::query::AstStorage ast_copy;
|
memgraph::query::AstStorage ast_copy;
|
||||||
auto unoptimized_plan = plan->Clone(&ast_copy);
|
auto unoptimized_plan = plan->Clone(&ast_copy);
|
||||||
auto rewritten_plan = post_process.Rewrite(std::move(plan), &ctx);
|
auto rewritten_plan = post_process.Rewrite(std::move(plan), &ctx);
|
||||||
double cost = post_process.EstimatePlanCost(rewritten_plan, dba);
|
double cost = post_process.EstimatePlanCost(rewritten_plan, dba, symbol_table);
|
||||||
interactive_plans.push_back(
|
interactive_plans.push_back(
|
||||||
InteractivePlan{std::move(unoptimized_plan), std::move(ast_copy), std::move(rewritten_plan), cost});
|
InteractivePlan{std::move(unoptimized_plan), std::move(ast_copy), std::move(rewritten_plan), cost});
|
||||||
}
|
}
|
||||||
|
@ -74,7 +74,7 @@ class QueryCostEstimator : public ::testing::Test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto Cost() {
|
auto Cost() {
|
||||||
CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, parameters_);
|
CostEstimator<memgraph::query::DbAccessor> cost_estimator(&*dba, symbol_table_, parameters_);
|
||||||
last_op_->Accept(cost_estimator);
|
last_op_->Accept(cost_estimator);
|
||||||
return cost_estimator.cost();
|
return cost_estimator.cost();
|
||||||
}
|
}
|
||||||
@ -201,7 +201,7 @@ TEST_F(QueryCostEstimator, SubqueryCartesian) {
|
|||||||
std::shared_ptr<LogicalOperator> input = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
std::shared_ptr<LogicalOperator> input = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
||||||
std::shared_ptr<LogicalOperator> subquery = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
std::shared_ptr<LogicalOperator> subquery = std::make_shared<ScanAll>(std::make_shared<Once>(), NextSymbol());
|
||||||
MakeOp<memgraph::query::plan::Apply>(input, subquery, true);
|
MakeOp<memgraph::query::plan::Apply>(input, subquery, true);
|
||||||
EXPECT_COST(CostParam::kSubquery * no_vertices * no_vertices);
|
EXPECT_COST(CostParam::kSubquery * no_vertices * no_vertices + no_vertices);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(QueryCostEstimator, UnitSubquery) {
|
TEST_F(QueryCostEstimator, UnitSubquery) {
|
||||||
|
@ -500,9 +500,13 @@ class FakeDbAccessor {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
memgraph::storage::IndexStats GetIndexStats(memgraph::storage::LabelId label,
|
std::optional<memgraph::storage::LabelPropertyIndexStats> GetIndexStats(
|
||||||
memgraph::storage::PropertyId property) const {
|
const memgraph::storage::LabelId label, const memgraph::storage::PropertyId property) const {
|
||||||
return memgraph::storage::IndexStats{.statistic = 0, .avg_group_size = 1}; // unique id
|
return memgraph::storage::LabelPropertyIndexStats{.statistic = 0, .avg_group_size = 1}; // unique id
|
||||||
|
}
|
||||||
|
|
||||||
|
std::optional<memgraph::storage::LabelIndexStats> GetIndexStats(const memgraph::storage::LabelId label) const {
|
||||||
|
return memgraph::storage::LabelIndexStats{.count = 0, .avg_degree = 0}; // unique id
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetIndexCount(memgraph::storage::LabelId label, int64_t count) { label_index_[label] = count; }
|
void SetIndexCount(memgraph::storage::LabelId label, int64_t count) { label_index_[label] = count; }
|
||||||
|
@ -1252,4 +1252,11 @@ TEST_F(TestSymbolGenerator, Subqueries) {
|
|||||||
query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n"))), CALL_SUBQUERY(subquery), RETURN("n", "m")));
|
query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n"))), CALL_SUBQUERY(subquery), RETURN("n", "m")));
|
||||||
symbol_table = MakeSymbolTable(query);
|
symbol_table = MakeSymbolTable(query);
|
||||||
ASSERT_EQ(symbol_table.max_position(), 11);
|
ASSERT_EQ(symbol_table.max_position(), 11);
|
||||||
|
|
||||||
|
// MATCH (n) CALL { MATCH (s) RETURN s } RETURN n UNION MATCH (n) CALL { MATCH (s) RETURN s } RETURN n
|
||||||
|
subquery = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("s"))), RETURN("s")));
|
||||||
|
query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n"))), CALL_SUBQUERY(subquery), RETURN("n")),
|
||||||
|
UNION(SINGLE_QUERY(MATCH(PATTERN(NODE("n"))), CALL_SUBQUERY(subquery), RETURN("n"))));
|
||||||
|
symbol_table = MakeSymbolTable(query);
|
||||||
|
ASSERT_EQ(symbol_table.max_position(), 13);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user