query::plan - Ops use vector instead of list
Summary: Replaced std::list with std::vector in all plan operators. Performance increase in harness tests is not visible. Defined a custom test: ``` unwind range(0, 1000000) as x create ({a: tointeger(rand() * 100), b: tointeger(rand() * 100), c: tointeger(rand() * 100), d: tointeger(rand() * 10), e: tointeger(rand() * 10), f: tointeger(rand() * 10)}); match (n) return min(n.a), max(n.b), sum(n.c), n.d, n.e, n.f match (n) with distinct n.a AS a, n.b AS b, n.c AS c, n.d AS d, n.e AS e, n.f AS f return count(*) ``` In that test performance gains are 9.8% on the aggregation query (mean 0.83s vs 092s) and 34% (mean 2.15s vs 3.25s) on the distinct query. Doubt we'll see much on any of the LDBC tests because they don't stress those operators nearly as much. Reviewers: buda, teon.banek, mislav.bradac Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D778
This commit is contained in:
parent
4601f6c368
commit
682fced81b
@ -1626,10 +1626,11 @@ bool Accumulate::AccumulateCursor::Pull(Frame &frame,
|
||||
// cache all the input
|
||||
if (!pulled_all_input_) {
|
||||
while (input_cursor_->Pull(frame, symbol_table)) {
|
||||
cache_.emplace_back();
|
||||
auto &row = cache_.back();
|
||||
std::vector<TypedValue> row;
|
||||
row.reserve(self_.symbols_.size());
|
||||
for (const Symbol &symbol : self_.symbols_)
|
||||
row.emplace_back(frame[symbol]);
|
||||
cache_.emplace_back(std::move(row));
|
||||
}
|
||||
pulled_all_input_ = true;
|
||||
cache_it_ = cache_.begin();
|
||||
@ -1752,8 +1753,8 @@ void Aggregate::AggregateCursor::ProcessAll(Frame &frame,
|
||||
void Aggregate::AggregateCursor::ProcessOne(Frame &frame,
|
||||
const SymbolTable &symbol_table,
|
||||
ExpressionEvaluator &evaluator) {
|
||||
// create the group-by list of values
|
||||
std::list<TypedValue> group_by;
|
||||
std::vector<TypedValue> group_by;
|
||||
group_by.reserve(self_.group_by_.size());
|
||||
for (Expression *expression : self_.group_by_) {
|
||||
group_by.emplace_back(expression->Accept(evaluator));
|
||||
}
|
||||
@ -1922,8 +1923,12 @@ void Aggregate::AggregateCursor::EnsureOkForAvgSum(
|
||||
}
|
||||
}
|
||||
|
||||
bool TypedValueListEqual::operator()(const std::list<TypedValue> &left,
|
||||
const std::list<TypedValue> &right) const {
|
||||
bool TypedValueVectorEqual::operator()(
|
||||
const std::vector<TypedValue> &left,
|
||||
const std::vector<TypedValue> &right) const {
|
||||
debug_assert(left.size() == right.size(),
|
||||
"TypedValueVector comparison should only be done over vectors "
|
||||
"of the same size");
|
||||
return std::equal(left.begin(), left.end(), right.begin(),
|
||||
TypedValue::BoolEqual{});
|
||||
}
|
||||
@ -2367,7 +2372,8 @@ bool Distinct::DistinctCursor::Pull(Frame &frame,
|
||||
while (true) {
|
||||
if (!input_cursor_->Pull(frame, symbol_table)) return false;
|
||||
|
||||
std::list<TypedValue> row;
|
||||
std::vector<TypedValue> row;
|
||||
row.reserve(self_.value_symbols_.size());
|
||||
for (const auto &symbol : self_.value_symbols_)
|
||||
row.emplace_back(frame[symbol]);
|
||||
if (seen_rows_.insert(std::move(row)).second) return true;
|
||||
|
@ -1109,19 +1109,19 @@ class Accumulate : public LogicalOperator {
|
||||
const Accumulate &self_;
|
||||
GraphDbAccessor &db_;
|
||||
const std::unique_ptr<Cursor> input_cursor_;
|
||||
std::list<std::list<TypedValue>> cache_;
|
||||
std::vector<std::vector<TypedValue>> cache_;
|
||||
decltype(cache_.begin()) cache_it_ = cache_.begin();
|
||||
bool pulled_all_input_{false};
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Custom equality function for a list of typed values.
|
||||
* Custom equality function for a vector of typed values.
|
||||
* Used in unordered_maps in Aggregate and Distinct operators.
|
||||
*/
|
||||
struct TypedValueListEqual {
|
||||
bool operator()(const std::list<TypedValue> &left,
|
||||
const std::list<TypedValue> &right) const;
|
||||
struct TypedValueVectorEqual {
|
||||
bool operator()(const std::vector<TypedValue> &left,
|
||||
const std::vector<TypedValue> &right) const;
|
||||
};
|
||||
|
||||
/** @brief Performs an arbitrary number of aggregations of data
|
||||
@ -1194,14 +1194,14 @@ class Aggregate : public LogicalOperator {
|
||||
GraphDbAccessor &db_;
|
||||
const std::unique_ptr<Cursor> input_cursor_;
|
||||
// storage for aggregated data
|
||||
// map key is the list of group-by values
|
||||
// map key is the vector of group-by values
|
||||
// map value is an AggregationValue struct
|
||||
std::unordered_map<
|
||||
std::list<TypedValue>, AggregationValue,
|
||||
// use FNV collection hashing specialized for a list of TypedValues
|
||||
FnvCollection<std::list<TypedValue>, TypedValue, TypedValue::Hash>,
|
||||
std::vector<TypedValue>, AggregationValue,
|
||||
// use FNV collection hashing specialized for a vector of TypedValues
|
||||
FnvCollection<std::vector<TypedValue>, TypedValue, TypedValue::Hash>,
|
||||
// custom equality
|
||||
TypedValueListEqual>
|
||||
TypedValueVectorEqual>
|
||||
aggregation_;
|
||||
// iterator over the accumulated cache
|
||||
decltype(aggregation_.begin()) aggregation_it_ = aggregation_.begin();
|
||||
@ -1393,8 +1393,8 @@ class OrderBy : public LogicalOperator {
|
||||
const std::unique_ptr<Cursor> input_cursor_;
|
||||
bool did_pull_all_{false};
|
||||
// a cache of elements pulled from the input
|
||||
// first pair element is the order-by list
|
||||
// second pair is the remember list
|
||||
// first pair element is the order-by vector
|
||||
// second pair is the remember vector
|
||||
// the cache is filled and sorted (only on first pair elem) on first Pull
|
||||
std::vector<std::pair<std::vector<TypedValue>, std::vector<TypedValue>>>
|
||||
cache_;
|
||||
@ -1566,10 +1566,10 @@ class Distinct : public LogicalOperator {
|
||||
const std::unique_ptr<Cursor> input_cursor_;
|
||||
// a set of already seen rows
|
||||
std::unordered_set<
|
||||
std::list<TypedValue>,
|
||||
// use FNV collection hashing specialized for a list of TypedValues
|
||||
FnvCollection<std::list<TypedValue>, TypedValue, TypedValue::Hash>,
|
||||
TypedValueListEqual>
|
||||
std::vector<TypedValue>,
|
||||
// use FNV collection hashing specialized for a vector of TypedValues
|
||||
FnvCollection<std::vector<TypedValue>, TypedValue, TypedValue::Hash>,
|
||||
TypedValueVectorEqual>
|
||||
seen_rows_;
|
||||
};
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user