Summary: Union query combinator implementation consists of: * adjustments to the AST and `cypher_main_visitor` * enabling `QueryStripper` to parse multiple `return` statements (not stopping after first) * symbol generation for union results * union logical operator * query plan generator adjustments Reviewers: teon.banek, mislav.bradac Reviewed By: teon.banek Subscribers: pullbot, buda Differential Revision: https://phabricator.memgraph.io/D1038
310 lines
12 KiB
C++
310 lines
12 KiB
C++
#include "query/plan/variable_start_planner.hpp"
|
|
|
|
#include <limits>
|
|
#include <queue>
|
|
|
|
#include "glog/logging.h"
|
|
|
|
#include "utils/flag_validation.hpp"
|
|
|
|
DEFINE_VALIDATED_HIDDEN_uint64(
|
|
query_max_plans, 1000U, "Maximum number of generated plans for a query",
|
|
FLAG_IN_RANGE(1, std::numeric_limits<std::uint64_t>::max()));
|
|
|
|
namespace query::plan::impl {
|
|
|
|
namespace {
|
|
|
|
// Add applicable expansions for `node_symbol` to `next_expansions`. These
|
|
// expansions are removed from `node_symbol_to_expansions`, while
|
|
// `seen_expansions` and `expanded_symbols` are populated with new data.
|
|
void AddNextExpansions(
|
|
const Symbol &node_symbol, const Matching &matching,
|
|
const SymbolTable &symbol_table,
|
|
std::unordered_set<Symbol> &expanded_symbols,
|
|
std::unordered_map<Symbol, std::set<int>> &node_symbol_to_expansions,
|
|
std::unordered_set<int> &seen_expansions,
|
|
std::queue<Expansion> &next_expansions) {
|
|
auto node_to_expansions_it = node_symbol_to_expansions.find(node_symbol);
|
|
if (node_to_expansions_it == node_symbol_to_expansions.end()) {
|
|
return;
|
|
}
|
|
// Returns true if the expansion is a regular expand or if it is a variable
|
|
// path expand, but with bound symbols used inside the range expression.
|
|
auto can_expand = [&](auto &expansion) {
|
|
for (const auto &range_symbol : expansion.symbols_in_range) {
|
|
// If the symbols used in range need to be bound during this whole
|
|
// expansion, we must check whether they have already been expanded and
|
|
// therefore bound. If the symbols are not found in the whole expansion,
|
|
// then the semantic analysis should guarantee that the symbols have been
|
|
// bound long before we expand.
|
|
if (matching.expansion_symbols.find(range_symbol) !=
|
|
matching.expansion_symbols.end() &&
|
|
expanded_symbols.find(range_symbol) == expanded_symbols.end()) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
};
|
|
auto &node_expansions = node_to_expansions_it->second;
|
|
auto node_expansions_it = node_expansions.begin();
|
|
while (node_expansions_it != node_to_expansions_it->second.end()) {
|
|
auto expansion_id = *node_expansions_it;
|
|
if (seen_expansions.find(expansion_id) != seen_expansions.end()) {
|
|
// Skip and erase seen (already expanded) expansions.
|
|
node_expansions_it = node_expansions.erase(node_expansions_it);
|
|
continue;
|
|
}
|
|
auto expansion = matching.expansions[expansion_id];
|
|
if (!can_expand(expansion)) {
|
|
// Skip but save expansions which need other symbols for later.
|
|
++node_expansions_it;
|
|
continue;
|
|
}
|
|
if (symbol_table.at(*expansion.node1->identifier_) != node_symbol) {
|
|
// We are not expanding from node1, so flip the expansion.
|
|
DCHECK(expansion.node2 &&
|
|
symbol_table.at(*expansion.node2->identifier_) == node_symbol)
|
|
<< "Expected node_symbol to be bound in node2";
|
|
if (expansion.edge->type_ != EdgeAtom::Type::BREADTH_FIRST) {
|
|
// BFS must *not* be flipped. Doing that changes the BFS results.
|
|
std::swap(expansion.node1, expansion.node2);
|
|
expansion.is_flipped = true;
|
|
if (expansion.direction != EdgeAtom::Direction::BOTH) {
|
|
expansion.direction = expansion.direction == EdgeAtom::Direction::IN
|
|
? EdgeAtom::Direction::OUT
|
|
: EdgeAtom::Direction::IN;
|
|
}
|
|
}
|
|
}
|
|
seen_expansions.insert(expansion_id);
|
|
expanded_symbols.insert(symbol_table.at(*expansion.node1->identifier_));
|
|
if (expansion.edge) {
|
|
expanded_symbols.insert(symbol_table.at(*expansion.edge->identifier_));
|
|
expanded_symbols.insert(symbol_table.at(*expansion.node2->identifier_));
|
|
}
|
|
next_expansions.emplace(std::move(expansion));
|
|
node_expansions_it = node_expansions.erase(node_expansions_it);
|
|
}
|
|
if (node_expansions.empty()) {
|
|
node_symbol_to_expansions.erase(node_to_expansions_it);
|
|
}
|
|
}
|
|
|
|
// Generates expansions emanating from the start_node by forming a chain. When
|
|
// the chain can no longer be continued, a different starting node is picked
|
|
// among remaining expansions and the process continues. This is done until all
|
|
// matching.expansions are used.
|
|
std::vector<Expansion> ExpansionsFrom(const NodeAtom *start_node,
|
|
const Matching &matching,
|
|
const SymbolTable &symbol_table) {
|
|
// Make a copy of node_symbol_to_expansions, because we will modify it as
|
|
// expansions are chained.
|
|
auto node_symbol_to_expansions = matching.node_symbol_to_expansions;
|
|
std::unordered_set<int> seen_expansions;
|
|
std::queue<Expansion> next_expansions;
|
|
std::unordered_set<Symbol> expanded_symbols(
|
|
{symbol_table.at(*start_node->identifier_)});
|
|
auto add_next_expansions = [&](const auto *node) {
|
|
AddNextExpansions(symbol_table.at(*node->identifier_), matching,
|
|
symbol_table, expanded_symbols, node_symbol_to_expansions,
|
|
seen_expansions, next_expansions);
|
|
};
|
|
add_next_expansions(start_node);
|
|
// Potential optimization: expansions and next_expansions could be merge into
|
|
// a single vector and an index could be used to determine from which should
|
|
// additional expansions be added.
|
|
std::vector<Expansion> expansions;
|
|
while (!next_expansions.empty()) {
|
|
auto expansion = next_expansions.front();
|
|
next_expansions.pop();
|
|
expansions.emplace_back(expansion);
|
|
add_next_expansions(expansion.node1);
|
|
if (expansion.node2) {
|
|
add_next_expansions(expansion.node2);
|
|
}
|
|
}
|
|
if (!node_symbol_to_expansions.empty()) {
|
|
// We could pick a new starting expansion, but to avoid runtime
|
|
// complexity, simply append the remaining expansions. They should have the
|
|
// correct order, since the original expansions were verified during
|
|
// semantic analysis.
|
|
for (int i = 0; i < matching.expansions.size(); ++i) {
|
|
if (seen_expansions.find(i) != seen_expansions.end()) {
|
|
continue;
|
|
}
|
|
expansions.emplace_back(matching.expansions[i]);
|
|
}
|
|
}
|
|
return expansions;
|
|
}
|
|
|
|
// Collect all unique nodes from expansions. Uniqueness is determined by
|
|
// symbol uniqueness.
|
|
auto ExpansionNodes(const std::vector<Expansion> &expansions,
|
|
const SymbolTable &symbol_table) {
|
|
std::unordered_set<NodeAtom *, NodeSymbolHash, NodeSymbolEqual> nodes(
|
|
expansions.size(), NodeSymbolHash(symbol_table),
|
|
NodeSymbolEqual(symbol_table));
|
|
for (const auto &expansion : expansions) {
|
|
// TODO: Handle labels and properties from different node atoms.
|
|
nodes.insert(expansion.node1);
|
|
if (expansion.node2) {
|
|
nodes.insert(expansion.node2);
|
|
}
|
|
}
|
|
return nodes;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
VaryMatchingStart::VaryMatchingStart(Matching matching,
|
|
const SymbolTable &symbol_table)
|
|
: matching_(matching),
|
|
symbol_table_(symbol_table),
|
|
nodes_(ExpansionNodes(matching.expansions, symbol_table)) {}
|
|
|
|
VaryMatchingStart::iterator::iterator(VaryMatchingStart *self, bool is_done)
|
|
: self_(self),
|
|
// Use the original matching as the first matching. We are only
|
|
// interested in changing the expansions part, so the remaining fields
|
|
// should stay the same. This also produces a matching for the case
|
|
// when there are no nodes.
|
|
current_matching_(self->matching_) {
|
|
if (!self_->nodes_.empty()) {
|
|
// Overwrite the original matching expansions with the new ones by
|
|
// generating it from the first start node.
|
|
start_nodes_it_ = self_->nodes_.begin();
|
|
current_matching_.expansions = ExpansionsFrom(
|
|
**start_nodes_it_, self_->matching_, self_->symbol_table_);
|
|
}
|
|
DCHECK(start_nodes_it_ || self_->nodes_.empty())
|
|
<< "start_nodes_it_ should only be nullopt when self_->nodes_ is empty";
|
|
if (is_done) {
|
|
start_nodes_it_ = self_->nodes_.end();
|
|
}
|
|
}
|
|
|
|
VaryMatchingStart::iterator &VaryMatchingStart::iterator::operator++() {
|
|
if (!start_nodes_it_) {
|
|
DCHECK(self_->nodes_.empty())
|
|
<< "start_nodes_it_ should only be nullopt when self_->nodes_ is empty";
|
|
start_nodes_it_ = self_->nodes_.end();
|
|
}
|
|
if (*start_nodes_it_ == self_->nodes_.end()) {
|
|
return *this;
|
|
}
|
|
++*start_nodes_it_;
|
|
// start_nodes_it_ can become equal to `end` and we shouldn't dereference
|
|
// iterator in that case.
|
|
if (*start_nodes_it_ == self_->nodes_.end()) {
|
|
return *this;
|
|
}
|
|
const auto &start_node = **start_nodes_it_;
|
|
current_matching_.expansions =
|
|
ExpansionsFrom(start_node, self_->matching_, self_->symbol_table_);
|
|
return *this;
|
|
}
|
|
|
|
CartesianProduct<VaryMatchingStart> VaryMultiMatchingStarts(
|
|
const std::vector<Matching> &matchings, const SymbolTable &symbol_table) {
|
|
std::vector<VaryMatchingStart> variants;
|
|
variants.reserve(matchings.size());
|
|
for (const auto &matching : matchings) {
|
|
variants.emplace_back(VaryMatchingStart(matching, symbol_table));
|
|
}
|
|
return MakeCartesianProduct(std::move(variants));
|
|
}
|
|
|
|
VaryQueryPartMatching::VaryQueryPartMatching(SingleQueryPart query_part,
|
|
const SymbolTable &symbol_table)
|
|
: query_part_(std::move(query_part)),
|
|
matchings_(VaryMatchingStart(query_part_.matching, symbol_table)),
|
|
optional_matchings_(
|
|
VaryMultiMatchingStarts(query_part_.optional_matching, symbol_table)),
|
|
merge_matchings_(
|
|
VaryMultiMatchingStarts(query_part_.merge_matching, symbol_table)) {}
|
|
|
|
VaryQueryPartMatching::iterator::iterator(
|
|
const SingleQueryPart &query_part,
|
|
VaryMatchingStart::iterator matchings_begin,
|
|
VaryMatchingStart::iterator matchings_end,
|
|
CartesianProduct<VaryMatchingStart>::iterator optional_begin,
|
|
CartesianProduct<VaryMatchingStart>::iterator optional_end,
|
|
CartesianProduct<VaryMatchingStart>::iterator merge_begin,
|
|
CartesianProduct<VaryMatchingStart>::iterator merge_end)
|
|
: current_query_part_(query_part),
|
|
matchings_it_(matchings_begin),
|
|
matchings_end_(matchings_end),
|
|
optional_it_(optional_begin),
|
|
optional_begin_(optional_begin),
|
|
optional_end_(optional_end),
|
|
merge_it_(merge_begin),
|
|
merge_begin_(merge_begin),
|
|
merge_end_(merge_end) {
|
|
if (matchings_it_ != matchings_end_) {
|
|
// Fill the query part with the first variation of matchings
|
|
SetCurrentQueryPart();
|
|
}
|
|
}
|
|
|
|
VaryQueryPartMatching::iterator &VaryQueryPartMatching::iterator::operator++() {
|
|
// Produce parts for each possible combination. E.g. if we have:
|
|
// * matchings (m1) and (m2)
|
|
// * optional matchings (o1) and (o2)
|
|
// * merge matching (g1)
|
|
// We want to produce parts for:
|
|
// * (m1), (o1), (g1)
|
|
// * (m1), (o2), (g1)
|
|
// * (m2), (o1), (g1)
|
|
// * (m2), (o2), (g1)
|
|
// Create variations by changing the merge part first.
|
|
if (merge_it_ != merge_end_) ++merge_it_;
|
|
// If all merge variations are done, start them from beginning and move to the
|
|
// next optional matching variation.
|
|
if (merge_it_ == merge_end_) {
|
|
merge_it_ = merge_begin_;
|
|
if (optional_it_ != optional_end_) ++optional_it_;
|
|
}
|
|
// If all optional matching variations are done (after exhausting merge
|
|
// variations), start them from beginning and move to the next regular
|
|
// matching variation.
|
|
if (optional_it_ == optional_end_ && merge_it_ == merge_begin_) {
|
|
optional_it_ = optional_begin_;
|
|
if (matchings_it_ != matchings_end_) ++matchings_it_;
|
|
}
|
|
// We have reached the end, so return;
|
|
if (matchings_it_ == matchings_end_) return *this;
|
|
// Fill the query part with the new variation of matchings.
|
|
SetCurrentQueryPart();
|
|
return *this;
|
|
}
|
|
|
|
void VaryQueryPartMatching::iterator::SetCurrentQueryPart() {
|
|
current_query_part_.matching = *matchings_it_;
|
|
DCHECK(optional_it_ != optional_end_ || optional_begin_ == optional_end_)
|
|
<< "Either there are no optional matchings or we can always "
|
|
"generate a variation";
|
|
if (optional_it_ != optional_end_) {
|
|
current_query_part_.optional_matching = *optional_it_;
|
|
}
|
|
DCHECK(merge_it_ != merge_end_ || merge_begin_ == merge_end_)
|
|
<< "Either there are no merge matchings or we can always generate "
|
|
"a variation";
|
|
if (merge_it_ != merge_end_) {
|
|
current_query_part_.merge_matching = *merge_it_;
|
|
}
|
|
}
|
|
|
|
bool VaryQueryPartMatching::iterator::operator==(const iterator &other) const {
|
|
if (matchings_it_ == other.matchings_it_ && matchings_it_ == matchings_end_) {
|
|
// matchings_it_ is the primary iterator. If both are at the end, then other
|
|
// iterators can be at any position.
|
|
return true;
|
|
}
|
|
return matchings_it_ == other.matchings_it_ &&
|
|
optional_it_ == other.optional_it_ && merge_it_ == other.merge_it_;
|
|
}
|
|
|
|
} // namespace query::plan::impl
|