optimize MergingIterator with sorted children

In current implemetation, children of MergingIterator are
unordered. Thus FindSmallest() and FindLargest() have to scan all
children to find a proper current_ and introduce n_ comparation. When
application iterating with iterator, there are many duplicated
comparations.

In this patch, we make children ordered, so that FindSmallest() and
FindLargest() don't need to scan all children but just pick the first
valid from the begin and the end respectively. When current_ is
changed by Next() or Prev(), we would move current_ forward or
backward to ensure the order of children.

This method improves the performance about 23% when immediatedly
iterating the db that are randomly inserted with a huge number of
records.

Performance without patch

LevelDB:    version 1.22
Date:       Tue Sep 10 23:38:20 2019
CPU:        24 * Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz
CPUCache:   15360 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1048576
RawSize:    116.0 MB (estimated)
FileSize:   66.0 MB (estimated)
------------------------------------------------
fillrandom   :       4.537 micros/op;   24.4 MB/s
readseq      :       0.569 micros/op;  194.5 MB/s
compact      : 1318885.000 micros/op;
readseq      :       0.260 micros/op;  426.0 MB/s

Performance with this patch

LevelDB:    version 1.22
Date:       Tue Sep 10 23:37:36 2019
CPU:        24 * Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz
CPUCache:   15360 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1048576
RawSize:    116.0 MB (estimated)
FileSize:   66.0 MB (estimated)
------------------------------------------------
fillrandom   :       4.446 micros/op;   24.9 MB/s
readseq      :       0.460 micros/op;  240.6 MB/s
compact      : 1570193.000 micros/op;
readseq      :       0.259 micros/op;  426.8 MB/s

Signed-off-by: Kyle Zhang <kyle@smartx.com>
This commit is contained in:
Kyle Zhang 2019-09-10 22:35:30 +08:00
parent 21304d41f7
commit fe201146d7

View File

@ -4,6 +4,8 @@
#include "table/merger.h"
#include <vector>
#include "leveldb/comparator.h"
#include "leveldb/iterator.h"
#include "table/iterator_wrapper.h"
@ -15,39 +17,49 @@ class MergingIterator : public Iterator {
public:
MergingIterator(const Comparator* comparator, Iterator** children, int n)
: comparator_(comparator),
children_(new IteratorWrapper[n]),
children_(n, nullptr),
n_(n),
current_(nullptr),
direction_(kForward) {
for (int i = 0; i < n; i++) {
children_[i].Set(children[i]);
for (int i = 0; i < n_; i++) {
children_[i] = new IteratorWrapper(children[i]);
}
}
~MergingIterator() override { delete[] children_; }
~MergingIterator() override {
for (int i = 0; i < n_; i++) {
delete children_[i];
}
}
bool Valid() const override { return (current_ != nullptr); }
void SeekToFirst() override {
for (int i = 0; i < n_; i++) {
children_[i].SeekToFirst();
children_[i]->SeekToFirst();
}
SortChildren();
FindSmallest();
direction_ = kForward;
}
void SeekToLast() override {
for (int i = 0; i < n_; i++) {
children_[i].SeekToLast();
children_[i]->SeekToLast();
}
SortChildren();
FindLargest();
direction_ = kReverse;
}
void Seek(const Slice& target) override {
for (int i = 0; i < n_; i++) {
children_[i].Seek(target);
children_[i]->Seek(target);
}
SortChildren();
FindSmallest();
direction_ = kForward;
}
@ -55,14 +67,17 @@ class MergingIterator : public Iterator {
void Next() override {
assert(Valid());
bool need_sort = false;
// Ensure that all children are positioned after key().
// If we are moving in the forward direction, it is already
// true for all of the non-current_ children since current_ is
// the smallest child and key() == current_->key(). Otherwise,
// we explicitly position the non-current_ children.
if (direction_ != kForward) {
need_sort = true;
for (int i = 0; i < n_; i++) {
IteratorWrapper* child = &children_[i];
IteratorWrapper* child = children_[i];
if (child != current_) {
child->Seek(key());
if (child->Valid() &&
@ -75,20 +90,30 @@ class MergingIterator : public Iterator {
}
current_->Next();
if (need_sort) {
SortChildren();
} else {
AdjustCurrentByNext();
}
FindSmallest();
}
void Prev() override {
assert(Valid());
bool need_sort = false;
// Ensure that all children are positioned before key().
// If we are moving in the reverse direction, it is already
// true for all of the non-current_ children since current_ is
// the largest child and key() == current_->key(). Otherwise,
// we explicitly position the non-current_ children.
if (direction_ != kReverse) {
need_sort = true;
for (int i = 0; i < n_; i++) {
IteratorWrapper* child = &children_[i];
IteratorWrapper* child = children_[i];
if (child != current_) {
child->Seek(key());
if (child->Valid()) {
@ -104,6 +129,13 @@ class MergingIterator : public Iterator {
}
current_->Prev();
if (need_sort) {
SortChildren();
} else {
AdjustCurrentByPrev();
}
FindLargest();
}
@ -120,7 +152,7 @@ class MergingIterator : public Iterator {
Status status() const override {
Status status;
for (int i = 0; i < n_; i++) {
status = children_[i].status();
status = children_[i]->status();
if (!status.ok()) {
break;
}
@ -135,44 +167,92 @@ class MergingIterator : public Iterator {
void FindSmallest();
void FindLargest();
void SortChildren();
void AdjustCurrentByNext();
void AdjustCurrentByPrev();
// We might want to use a heap in case there are lots of children.
// For now we use a simple array since we expect a very small number
// of children in leveldb.
const Comparator* comparator_;
IteratorWrapper* children_;
std::vector<IteratorWrapper*> children_;
int n_;
IteratorWrapper* current_;
int current_idx_;
Direction direction_;
};
void MergingIterator::FindSmallest() {
IteratorWrapper* smallest = nullptr;
current_ = nullptr;
for (int i = 0; i < n_; i++) {
IteratorWrapper* child = &children_[i];
IteratorWrapper* child = children_[i];
if (child->Valid()) {
if (smallest == nullptr) {
smallest = child;
} else if (comparator_->Compare(child->key(), smallest->key()) < 0) {
smallest = child;
}
current_ = child;
current_idx_ = i;
return;
}
}
current_ = smallest;
}
void MergingIterator::FindLargest() {
IteratorWrapper* largest = nullptr;
current_ = nullptr;
for (int i = n_ - 1; i >= 0; i--) {
IteratorWrapper* child = &children_[i];
IteratorWrapper* child = children_[i];
if (child->Valid()) {
if (largest == nullptr) {
largest = child;
} else if (comparator_->Compare(child->key(), largest->key()) > 0) {
largest = child;
}
current_ = child;
current_idx_ = i;
return;
}
}
}
void MergingIterator::SortChildren() {
std::sort(children_.begin(), children_.end(),
[this](const IteratorWrapper* a, const IteratorWrapper* b) {
// Order of invalid children are not important. They are just
// skipped.
if (!a->Valid()) return false;
if (!b->Valid()) return true;
return comparator_->Compare(a->key(), b->key()) < 0;
});
}
void MergingIterator::AdjustCurrentByNext() {
if (!current_->Valid()) return;
for (int next_idx = current_idx_ + 1; next_idx < n_; next_idx++) {
IteratorWrapper* next_child = children_[next_idx];
if (!next_child->Valid()) continue;
if (comparator_->Compare(current_->key(), next_child->key()) > 0) {
children_[current_idx_] = next_child;
current_idx_ = next_idx;
children_[next_idx] = current_;
} else {
break;
}
}
}
void MergingIterator::AdjustCurrentByPrev() {
if (!current_->Valid()) return;
for (int next_idx = current_idx_ - 1; next_idx >= 0 ; next_idx--) {
IteratorWrapper* next_child = children_[next_idx];
if (!next_child->Valid()) continue;
if (comparator_->Compare(current_->key(), next_child->key()) < 0) {
children_[current_idx_] = next_child;
current_idx_ = next_idx;
children_[next_idx] = current_;
} else {
break;
}
}
current_ = largest;
}
} // namespace