query::plan - Ops use vector instead of list

Summary:
Replaced std::list with std::vector in all plan operators. Performance increase in harness tests is not visible. Defined a custom test:

```
unwind range(0, 1000000) as x
create ({a: tointeger(rand() * 100), b: tointeger(rand() * 100), c: tointeger(rand() * 100), d: tointeger(rand() * 10), e: tointeger(rand() * 10), f: tointeger(rand() * 10)});

match (n) return min(n.a), max(n.b), sum(n.c), n.d, n.e, n.f

match (n) with distinct n.a AS a, n.b AS b, n.c AS c, n.d AS d, n.e AS e, n.f AS f return count(*)
```

In that test performance gains are 9.8% on the aggregation query (mean 0.83s vs 092s) and 34% (mean 2.15s vs 3.25s) on the distinct query. Doubt we'll see much on any of the LDBC tests because they don't stress those operators nearly as much.

Reviewers: buda, teon.banek, mislav.bradac

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D778
This commit is contained in:
florijan 2017-09-12 10:08:58 +02:00
parent 4601f6c368
commit 682fced81b
2 changed files with 29 additions and 23 deletions

View File

@ -1626,10 +1626,11 @@ bool Accumulate::AccumulateCursor::Pull(Frame &frame,
// cache all the input
if (!pulled_all_input_) {
while (input_cursor_->Pull(frame, symbol_table)) {
cache_.emplace_back();
auto &row = cache_.back();
std::vector<TypedValue> row;
row.reserve(self_.symbols_.size());
for (const Symbol &symbol : self_.symbols_)
row.emplace_back(frame[symbol]);
cache_.emplace_back(std::move(row));
}
pulled_all_input_ = true;
cache_it_ = cache_.begin();
@ -1752,8 +1753,8 @@ void Aggregate::AggregateCursor::ProcessAll(Frame &frame,
void Aggregate::AggregateCursor::ProcessOne(Frame &frame,
const SymbolTable &symbol_table,
ExpressionEvaluator &evaluator) {
// create the group-by list of values
std::list<TypedValue> group_by;
std::vector<TypedValue> group_by;
group_by.reserve(self_.group_by_.size());
for (Expression *expression : self_.group_by_) {
group_by.emplace_back(expression->Accept(evaluator));
}
@ -1922,8 +1923,12 @@ void Aggregate::AggregateCursor::EnsureOkForAvgSum(
}
}
bool TypedValueListEqual::operator()(const std::list<TypedValue> &left,
const std::list<TypedValue> &right) const {
bool TypedValueVectorEqual::operator()(
const std::vector<TypedValue> &left,
const std::vector<TypedValue> &right) const {
debug_assert(left.size() == right.size(),
"TypedValueVector comparison should only be done over vectors "
"of the same size");
return std::equal(left.begin(), left.end(), right.begin(),
TypedValue::BoolEqual{});
}
@ -2367,7 +2372,8 @@ bool Distinct::DistinctCursor::Pull(Frame &frame,
while (true) {
if (!input_cursor_->Pull(frame, symbol_table)) return false;
std::list<TypedValue> row;
std::vector<TypedValue> row;
row.reserve(self_.value_symbols_.size());
for (const auto &symbol : self_.value_symbols_)
row.emplace_back(frame[symbol]);
if (seen_rows_.insert(std::move(row)).second) return true;

View File

@ -1109,19 +1109,19 @@ class Accumulate : public LogicalOperator {
const Accumulate &self_;
GraphDbAccessor &db_;
const std::unique_ptr<Cursor> input_cursor_;
std::list<std::list<TypedValue>> cache_;
std::vector<std::vector<TypedValue>> cache_;
decltype(cache_.begin()) cache_it_ = cache_.begin();
bool pulled_all_input_{false};
};
};
/**
* Custom equality function for a list of typed values.
* Custom equality function for a vector of typed values.
* Used in unordered_maps in Aggregate and Distinct operators.
*/
struct TypedValueListEqual {
bool operator()(const std::list<TypedValue> &left,
const std::list<TypedValue> &right) const;
struct TypedValueVectorEqual {
bool operator()(const std::vector<TypedValue> &left,
const std::vector<TypedValue> &right) const;
};
/** @brief Performs an arbitrary number of aggregations of data
@ -1194,14 +1194,14 @@ class Aggregate : public LogicalOperator {
GraphDbAccessor &db_;
const std::unique_ptr<Cursor> input_cursor_;
// storage for aggregated data
// map key is the list of group-by values
// map key is the vector of group-by values
// map value is an AggregationValue struct
std::unordered_map<
std::list<TypedValue>, AggregationValue,
// use FNV collection hashing specialized for a list of TypedValues
FnvCollection<std::list<TypedValue>, TypedValue, TypedValue::Hash>,
std::vector<TypedValue>, AggregationValue,
// use FNV collection hashing specialized for a vector of TypedValues
FnvCollection<std::vector<TypedValue>, TypedValue, TypedValue::Hash>,
// custom equality
TypedValueListEqual>
TypedValueVectorEqual>
aggregation_;
// iterator over the accumulated cache
decltype(aggregation_.begin()) aggregation_it_ = aggregation_.begin();
@ -1393,8 +1393,8 @@ class OrderBy : public LogicalOperator {
const std::unique_ptr<Cursor> input_cursor_;
bool did_pull_all_{false};
// a cache of elements pulled from the input
// first pair element is the order-by list
// second pair is the remember list
// first pair element is the order-by vector
// second pair is the remember vector
// the cache is filled and sorted (only on first pair elem) on first Pull
std::vector<std::pair<std::vector<TypedValue>, std::vector<TypedValue>>>
cache_;
@ -1566,10 +1566,10 @@ class Distinct : public LogicalOperator {
const std::unique_ptr<Cursor> input_cursor_;
// a set of already seen rows
std::unordered_set<
std::list<TypedValue>,
// use FNV collection hashing specialized for a list of TypedValues
FnvCollection<std::list<TypedValue>, TypedValue, TypedValue::Hash>,
TypedValueListEqual>
std::vector<TypedValue>,
// use FNV collection hashing specialized for a vector of TypedValues
FnvCollection<std::vector<TypedValue>, TypedValue, TypedValue::Hash>,
TypedValueVectorEqual>
seen_rows_;
};
};