plan::OrderBy - use vector instead of list

Summary: Reduces latency on LDBC query 9 from 7.9sec to 6.8sec (14%). That query has 650k rows in ORDER BY, 3 ordering elements and 10ish values get returned (both of them are now accumulated into vectors).

Reviewers: buda, mislav.bradac

Reviewed By: mislav.bradac

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D775
This commit is contained in:
florijan 2017-09-11 12:05:19 +02:00
parent 35f726dfd2
commit 14064926b6
2 changed files with 25 additions and 22 deletions

View File

@ -1,6 +1,7 @@
#include <algorithm>
#include <limits>
#include <type_traits>
#include <utility>
#include "query/plan/operator.hpp"
@ -2018,7 +2019,7 @@ OrderBy::OrderBy(const std::shared_ptr<LogicalOperator> &input,
ordering.emplace_back(ordering_expression_pair.first);
order_by_.emplace_back(ordering_expression_pair.second);
}
compare_ = TypedValueListCompare(ordering);
compare_ = TypedValueVectorCompare(ordering);
}
ACCEPT_WITH_INPUT(OrderBy)
@ -2041,17 +2042,19 @@ bool OrderBy::OrderByCursor::Pull(Frame &frame,
ExpressionEvaluator evaluator(frame, symbol_table, db_);
while (input_cursor_->Pull(frame, symbol_table)) {
// collect the order_by elements
std::list<TypedValue> order_by;
std::vector<TypedValue> order_by;
order_by.reserve(self_.order_by_.size());
for (auto expression_ptr : self_.order_by_) {
order_by.emplace_back(expression_ptr->Accept(evaluator));
}
// collect the output elements
std::list<TypedValue> output;
std::vector<TypedValue> output;
output.reserve(self_.output_symbols_.size());
for (const Symbol &output_sym : self_.output_symbols_)
output.emplace_back(frame[output_sym]);
cache_.emplace_back(order_by, output);
cache_.emplace_back(std::move(order_by), std::move(output));
}
std::sort(cache_.begin(), cache_.end(),
@ -2126,16 +2129,16 @@ bool OrderBy::TypedValueCompare(const TypedValue &a, const TypedValue &b) {
}
}
bool OrderBy::TypedValueListCompare::operator()(
const std::list<TypedValue> &c1, const std::list<TypedValue> &c2) const {
auto c1_it = c1.begin();
auto c2_it = c2.begin();
bool OrderBy::TypedValueVectorCompare::operator()(
const std::vector<TypedValue> &c1,
const std::vector<TypedValue> &c2) const {
// ordering is invalid if there are more elements in the collections
// then there are in the ordering_ vector
debug_assert(std::distance(c1_it, c1.end()) <= ordering_.size() &&
std::distance(c2_it, c2.end()) <= ordering_.size(),
debug_assert(c1.size() <= ordering_.size() && c2.size() <= ordering_.size(),
"Collections contain more elements then there are orderings");
auto c1_it = c1.begin();
auto c2_it = c2.begin();
auto ordering_it = ordering_.begin();
for (; c1_it != c1.end() && c2_it != c2.end();
c1_it++, c2_it++, ordering_it++) {

View File

@ -1351,24 +1351,24 @@ class OrderBy : public LogicalOperator {
const auto &output_symbols() const { return output_symbols_; }
private:
// custom Comparator type for comparing lists of TypedValues
// custom Comparator type for comparing vectors of TypedValues
// does lexicographical ordering of elements based on the above
// defined TypedValueCompare, and also accepts a vector of Orderings
// the define how respective elements compare
class TypedValueListCompare {
class TypedValueVectorCompare {
public:
TypedValueListCompare() {}
TypedValueListCompare(const std::vector<Ordering> &ordering)
TypedValueVectorCompare() {}
TypedValueVectorCompare(const std::vector<Ordering> &ordering)
: ordering_(ordering) {}
bool operator()(const std::list<TypedValue> &c1,
const std::list<TypedValue> &c2) const;
bool operator()(const std::vector<TypedValue> &c1,
const std::vector<TypedValue> &c2) const;
private:
std::vector<Ordering> ordering_;
};
const std::shared_ptr<LogicalOperator> input_;
TypedValueListCompare compare_;
TypedValueVectorCompare compare_;
std::vector<Expression *> order_by_;
const std::vector<Symbol> output_symbols_;
@ -1394,7 +1394,7 @@ class OrderBy : public LogicalOperator {
// first pair element is the order-by list
// second pair is the remember list
// the cache is filled and sorted (only on first pair elem) on first Pull
std::vector<std::pair<std::list<TypedValue>, std::list<TypedValue>>> cache_;
std::vector<std::pair<std::vector<TypedValue>, std::vector<TypedValue>>> cache_;
// iterator over the cache_, maintains state between Pulls
decltype(cache_.begin()) cache_it_ = cache_.begin();
};