query::plan - Ops use vector instead of list

Summary:
Replaced std::list with std::vector in all plan operators. Performance increase in harness tests is not visible. Defined a custom test:

```
unwind range(0, 1000000) as x
create ({a: tointeger(rand() * 100), b: tointeger(rand() * 100), c: tointeger(rand() * 100), d: tointeger(rand() * 10), e: tointeger(rand() * 10), f: tointeger(rand() * 10)});

match (n) return min(n.a), max(n.b), sum(n.c), n.d, n.e, n.f

match (n) with distinct n.a AS a, n.b AS b, n.c AS c, n.d AS d, n.e AS e, n.f AS f return count(*)
```

In that test performance gains are 9.8% on the aggregation query (mean 0.83s vs 092s) and 34% (mean 2.15s vs 3.25s) on the distinct query. Doubt we'll see much on any of the LDBC tests because they don't stress those operators nearly as much.

Reviewers: buda, teon.banek, mislav.bradac

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D778
This commit is contained in:
florijan 2017-09-12 10:08:58 +02:00
parent 4601f6c368
commit 682fced81b
2 changed files with 29 additions and 23 deletions

View File

@ -1626,10 +1626,11 @@ bool Accumulate::AccumulateCursor::Pull(Frame &frame,
// cache all the input // cache all the input
if (!pulled_all_input_) { if (!pulled_all_input_) {
while (input_cursor_->Pull(frame, symbol_table)) { while (input_cursor_->Pull(frame, symbol_table)) {
cache_.emplace_back(); std::vector<TypedValue> row;
auto &row = cache_.back(); row.reserve(self_.symbols_.size());
for (const Symbol &symbol : self_.symbols_) for (const Symbol &symbol : self_.symbols_)
row.emplace_back(frame[symbol]); row.emplace_back(frame[symbol]);
cache_.emplace_back(std::move(row));
} }
pulled_all_input_ = true; pulled_all_input_ = true;
cache_it_ = cache_.begin(); cache_it_ = cache_.begin();
@ -1752,8 +1753,8 @@ void Aggregate::AggregateCursor::ProcessAll(Frame &frame,
void Aggregate::AggregateCursor::ProcessOne(Frame &frame, void Aggregate::AggregateCursor::ProcessOne(Frame &frame,
const SymbolTable &symbol_table, const SymbolTable &symbol_table,
ExpressionEvaluator &evaluator) { ExpressionEvaluator &evaluator) {
// create the group-by list of values std::vector<TypedValue> group_by;
std::list<TypedValue> group_by; group_by.reserve(self_.group_by_.size());
for (Expression *expression : self_.group_by_) { for (Expression *expression : self_.group_by_) {
group_by.emplace_back(expression->Accept(evaluator)); group_by.emplace_back(expression->Accept(evaluator));
} }
@ -1922,8 +1923,12 @@ void Aggregate::AggregateCursor::EnsureOkForAvgSum(
} }
} }
bool TypedValueListEqual::operator()(const std::list<TypedValue> &left, bool TypedValueVectorEqual::operator()(
const std::list<TypedValue> &right) const { const std::vector<TypedValue> &left,
const std::vector<TypedValue> &right) const {
debug_assert(left.size() == right.size(),
"TypedValueVector comparison should only be done over vectors "
"of the same size");
return std::equal(left.begin(), left.end(), right.begin(), return std::equal(left.begin(), left.end(), right.begin(),
TypedValue::BoolEqual{}); TypedValue::BoolEqual{});
} }
@ -2367,7 +2372,8 @@ bool Distinct::DistinctCursor::Pull(Frame &frame,
while (true) { while (true) {
if (!input_cursor_->Pull(frame, symbol_table)) return false; if (!input_cursor_->Pull(frame, symbol_table)) return false;
std::list<TypedValue> row; std::vector<TypedValue> row;
row.reserve(self_.value_symbols_.size());
for (const auto &symbol : self_.value_symbols_) for (const auto &symbol : self_.value_symbols_)
row.emplace_back(frame[symbol]); row.emplace_back(frame[symbol]);
if (seen_rows_.insert(std::move(row)).second) return true; if (seen_rows_.insert(std::move(row)).second) return true;

View File

@ -1109,19 +1109,19 @@ class Accumulate : public LogicalOperator {
const Accumulate &self_; const Accumulate &self_;
GraphDbAccessor &db_; GraphDbAccessor &db_;
const std::unique_ptr<Cursor> input_cursor_; const std::unique_ptr<Cursor> input_cursor_;
std::list<std::list<TypedValue>> cache_; std::vector<std::vector<TypedValue>> cache_;
decltype(cache_.begin()) cache_it_ = cache_.begin(); decltype(cache_.begin()) cache_it_ = cache_.begin();
bool pulled_all_input_{false}; bool pulled_all_input_{false};
}; };
}; };
/** /**
* Custom equality function for a list of typed values. * Custom equality function for a vector of typed values.
* Used in unordered_maps in Aggregate and Distinct operators. * Used in unordered_maps in Aggregate and Distinct operators.
*/ */
struct TypedValueListEqual { struct TypedValueVectorEqual {
bool operator()(const std::list<TypedValue> &left, bool operator()(const std::vector<TypedValue> &left,
const std::list<TypedValue> &right) const; const std::vector<TypedValue> &right) const;
}; };
/** @brief Performs an arbitrary number of aggregations of data /** @brief Performs an arbitrary number of aggregations of data
@ -1194,14 +1194,14 @@ class Aggregate : public LogicalOperator {
GraphDbAccessor &db_; GraphDbAccessor &db_;
const std::unique_ptr<Cursor> input_cursor_; const std::unique_ptr<Cursor> input_cursor_;
// storage for aggregated data // storage for aggregated data
// map key is the list of group-by values // map key is the vector of group-by values
// map value is an AggregationValue struct // map value is an AggregationValue struct
std::unordered_map< std::unordered_map<
std::list<TypedValue>, AggregationValue, std::vector<TypedValue>, AggregationValue,
// use FNV collection hashing specialized for a list of TypedValues // use FNV collection hashing specialized for a vector of TypedValues
FnvCollection<std::list<TypedValue>, TypedValue, TypedValue::Hash>, FnvCollection<std::vector<TypedValue>, TypedValue, TypedValue::Hash>,
// custom equality // custom equality
TypedValueListEqual> TypedValueVectorEqual>
aggregation_; aggregation_;
// iterator over the accumulated cache // iterator over the accumulated cache
decltype(aggregation_.begin()) aggregation_it_ = aggregation_.begin(); decltype(aggregation_.begin()) aggregation_it_ = aggregation_.begin();
@ -1393,8 +1393,8 @@ class OrderBy : public LogicalOperator {
const std::unique_ptr<Cursor> input_cursor_; const std::unique_ptr<Cursor> input_cursor_;
bool did_pull_all_{false}; bool did_pull_all_{false};
// a cache of elements pulled from the input // a cache of elements pulled from the input
// first pair element is the order-by list // first pair element is the order-by vector
// second pair is the remember list // second pair is the remember vector
// the cache is filled and sorted (only on first pair elem) on first Pull // the cache is filled and sorted (only on first pair elem) on first Pull
std::vector<std::pair<std::vector<TypedValue>, std::vector<TypedValue>>> std::vector<std::pair<std::vector<TypedValue>, std::vector<TypedValue>>>
cache_; cache_;
@ -1566,10 +1566,10 @@ class Distinct : public LogicalOperator {
const std::unique_ptr<Cursor> input_cursor_; const std::unique_ptr<Cursor> input_cursor_;
// a set of already seen rows // a set of already seen rows
std::unordered_set< std::unordered_set<
std::list<TypedValue>, std::vector<TypedValue>,
// use FNV collection hashing specialized for a list of TypedValues // use FNV collection hashing specialized for a vector of TypedValues
FnvCollection<std::list<TypedValue>, TypedValue, TypedValue::Hash>, FnvCollection<std::vector<TypedValue>, TypedValue, TypedValue::Hash>,
TypedValueListEqual> TypedValueVectorEqual>
seen_rows_; seen_rows_;
}; };
}; };