mirror of
https://github.com/google/leveldb.git
synced 2025-01-27 06:30:07 +08:00
optimize MergingIterator with sorted children
In current implemetation, children of MergingIterator are unordered. Thus FindSmallest() and FindLargest() have to scan all children to find a proper current_ and introduce n_ comparation. When application iterating with iterator, there are many duplicated comparations. In this patch, we make children ordered, so that FindSmallest() and FindLargest() don't need to scan all children but just pick the first valid from the begin and the end respectively. When current_ is changed by Next() or Prev(), we would move current_ forward or backward to ensure the order of children. This method improves the performance about 23% when immediatedly iterating the db that are randomly inserted with a huge number of records. Performance without patch LevelDB: version 1.22 Date: Tue Sep 10 23:38:20 2019 CPU: 24 * Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz CPUCache: 15360 KB Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1048576 RawSize: 116.0 MB (estimated) FileSize: 66.0 MB (estimated) ------------------------------------------------ fillrandom : 4.537 micros/op; 24.4 MB/s readseq : 0.569 micros/op; 194.5 MB/s compact : 1318885.000 micros/op; readseq : 0.260 micros/op; 426.0 MB/s Performance with this patch LevelDB: version 1.22 Date: Tue Sep 10 23:37:36 2019 CPU: 24 * Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz CPUCache: 15360 KB Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1048576 RawSize: 116.0 MB (estimated) FileSize: 66.0 MB (estimated) ------------------------------------------------ fillrandom : 4.446 micros/op; 24.9 MB/s readseq : 0.460 micros/op; 240.6 MB/s compact : 1570193.000 micros/op; readseq : 0.259 micros/op; 426.8 MB/s Signed-off-by: Kyle Zhang <kyle@smartx.com>
This commit is contained in:
parent
21304d41f7
commit
fe201146d7
134
table/merger.cc
134
table/merger.cc
@ -4,6 +4,8 @@
|
||||
|
||||
#include "table/merger.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "leveldb/comparator.h"
|
||||
#include "leveldb/iterator.h"
|
||||
#include "table/iterator_wrapper.h"
|
||||
@ -15,39 +17,49 @@ class MergingIterator : public Iterator {
|
||||
public:
|
||||
MergingIterator(const Comparator* comparator, Iterator** children, int n)
|
||||
: comparator_(comparator),
|
||||
children_(new IteratorWrapper[n]),
|
||||
children_(n, nullptr),
|
||||
n_(n),
|
||||
current_(nullptr),
|
||||
direction_(kForward) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
children_[i].Set(children[i]);
|
||||
for (int i = 0; i < n_; i++) {
|
||||
children_[i] = new IteratorWrapper(children[i]);
|
||||
}
|
||||
}
|
||||
|
||||
~MergingIterator() override { delete[] children_; }
|
||||
~MergingIterator() override {
|
||||
for (int i = 0; i < n_; i++) {
|
||||
delete children_[i];
|
||||
}
|
||||
}
|
||||
|
||||
bool Valid() const override { return (current_ != nullptr); }
|
||||
|
||||
void SeekToFirst() override {
|
||||
for (int i = 0; i < n_; i++) {
|
||||
children_[i].SeekToFirst();
|
||||
children_[i]->SeekToFirst();
|
||||
}
|
||||
|
||||
SortChildren();
|
||||
FindSmallest();
|
||||
direction_ = kForward;
|
||||
}
|
||||
|
||||
void SeekToLast() override {
|
||||
for (int i = 0; i < n_; i++) {
|
||||
children_[i].SeekToLast();
|
||||
children_[i]->SeekToLast();
|
||||
}
|
||||
|
||||
SortChildren();
|
||||
FindLargest();
|
||||
direction_ = kReverse;
|
||||
}
|
||||
|
||||
void Seek(const Slice& target) override {
|
||||
for (int i = 0; i < n_; i++) {
|
||||
children_[i].Seek(target);
|
||||
children_[i]->Seek(target);
|
||||
}
|
||||
|
||||
SortChildren();
|
||||
FindSmallest();
|
||||
direction_ = kForward;
|
||||
}
|
||||
@ -55,14 +67,17 @@ class MergingIterator : public Iterator {
|
||||
void Next() override {
|
||||
assert(Valid());
|
||||
|
||||
bool need_sort = false;
|
||||
|
||||
// Ensure that all children are positioned after key().
|
||||
// If we are moving in the forward direction, it is already
|
||||
// true for all of the non-current_ children since current_ is
|
||||
// the smallest child and key() == current_->key(). Otherwise,
|
||||
// we explicitly position the non-current_ children.
|
||||
if (direction_ != kForward) {
|
||||
need_sort = true;
|
||||
for (int i = 0; i < n_; i++) {
|
||||
IteratorWrapper* child = &children_[i];
|
||||
IteratorWrapper* child = children_[i];
|
||||
if (child != current_) {
|
||||
child->Seek(key());
|
||||
if (child->Valid() &&
|
||||
@ -75,20 +90,30 @@ class MergingIterator : public Iterator {
|
||||
}
|
||||
|
||||
current_->Next();
|
||||
|
||||
if (need_sort) {
|
||||
SortChildren();
|
||||
} else {
|
||||
AdjustCurrentByNext();
|
||||
}
|
||||
|
||||
FindSmallest();
|
||||
}
|
||||
|
||||
void Prev() override {
|
||||
assert(Valid());
|
||||
|
||||
bool need_sort = false;
|
||||
|
||||
// Ensure that all children are positioned before key().
|
||||
// If we are moving in the reverse direction, it is already
|
||||
// true for all of the non-current_ children since current_ is
|
||||
// the largest child and key() == current_->key(). Otherwise,
|
||||
// we explicitly position the non-current_ children.
|
||||
if (direction_ != kReverse) {
|
||||
need_sort = true;
|
||||
for (int i = 0; i < n_; i++) {
|
||||
IteratorWrapper* child = &children_[i];
|
||||
IteratorWrapper* child = children_[i];
|
||||
if (child != current_) {
|
||||
child->Seek(key());
|
||||
if (child->Valid()) {
|
||||
@ -104,6 +129,13 @@ class MergingIterator : public Iterator {
|
||||
}
|
||||
|
||||
current_->Prev();
|
||||
|
||||
if (need_sort) {
|
||||
SortChildren();
|
||||
} else {
|
||||
AdjustCurrentByPrev();
|
||||
}
|
||||
|
||||
FindLargest();
|
||||
}
|
||||
|
||||
@ -120,7 +152,7 @@ class MergingIterator : public Iterator {
|
||||
Status status() const override {
|
||||
Status status;
|
||||
for (int i = 0; i < n_; i++) {
|
||||
status = children_[i].status();
|
||||
status = children_[i]->status();
|
||||
if (!status.ok()) {
|
||||
break;
|
||||
}
|
||||
@ -135,44 +167,92 @@ class MergingIterator : public Iterator {
|
||||
void FindSmallest();
|
||||
void FindLargest();
|
||||
|
||||
void SortChildren();
|
||||
void AdjustCurrentByNext();
|
||||
void AdjustCurrentByPrev();
|
||||
|
||||
// We might want to use a heap in case there are lots of children.
|
||||
// For now we use a simple array since we expect a very small number
|
||||
// of children in leveldb.
|
||||
const Comparator* comparator_;
|
||||
IteratorWrapper* children_;
|
||||
std::vector<IteratorWrapper*> children_;
|
||||
int n_;
|
||||
IteratorWrapper* current_;
|
||||
int current_idx_;
|
||||
Direction direction_;
|
||||
};
|
||||
|
||||
void MergingIterator::FindSmallest() {
|
||||
IteratorWrapper* smallest = nullptr;
|
||||
current_ = nullptr;
|
||||
|
||||
for (int i = 0; i < n_; i++) {
|
||||
IteratorWrapper* child = &children_[i];
|
||||
IteratorWrapper* child = children_[i];
|
||||
if (child->Valid()) {
|
||||
if (smallest == nullptr) {
|
||||
smallest = child;
|
||||
} else if (comparator_->Compare(child->key(), smallest->key()) < 0) {
|
||||
smallest = child;
|
||||
}
|
||||
current_ = child;
|
||||
current_idx_ = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
current_ = smallest;
|
||||
}
|
||||
|
||||
void MergingIterator::FindLargest() {
|
||||
IteratorWrapper* largest = nullptr;
|
||||
current_ = nullptr;
|
||||
|
||||
for (int i = n_ - 1; i >= 0; i--) {
|
||||
IteratorWrapper* child = &children_[i];
|
||||
IteratorWrapper* child = children_[i];
|
||||
if (child->Valid()) {
|
||||
if (largest == nullptr) {
|
||||
largest = child;
|
||||
} else if (comparator_->Compare(child->key(), largest->key()) > 0) {
|
||||
largest = child;
|
||||
}
|
||||
current_ = child;
|
||||
current_idx_ = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MergingIterator::SortChildren() {
|
||||
std::sort(children_.begin(), children_.end(),
|
||||
[this](const IteratorWrapper* a, const IteratorWrapper* b) {
|
||||
// Order of invalid children are not important. They are just
|
||||
// skipped.
|
||||
if (!a->Valid()) return false;
|
||||
if (!b->Valid()) return true;
|
||||
return comparator_->Compare(a->key(), b->key()) < 0;
|
||||
});
|
||||
}
|
||||
|
||||
void MergingIterator::AdjustCurrentByNext() {
|
||||
if (!current_->Valid()) return;
|
||||
|
||||
for (int next_idx = current_idx_ + 1; next_idx < n_; next_idx++) {
|
||||
IteratorWrapper* next_child = children_[next_idx];
|
||||
|
||||
if (!next_child->Valid()) continue;
|
||||
|
||||
if (comparator_->Compare(current_->key(), next_child->key()) > 0) {
|
||||
children_[current_idx_] = next_child;
|
||||
current_idx_ = next_idx;
|
||||
children_[next_idx] = current_;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MergingIterator::AdjustCurrentByPrev() {
|
||||
if (!current_->Valid()) return;
|
||||
|
||||
for (int next_idx = current_idx_ - 1; next_idx >= 0 ; next_idx--) {
|
||||
IteratorWrapper* next_child = children_[next_idx];
|
||||
|
||||
if (!next_child->Valid()) continue;
|
||||
|
||||
if (comparator_->Compare(current_->key(), next_child->key()) < 0) {
|
||||
children_[current_idx_] = next_child;
|
||||
current_idx_ = next_idx;
|
||||
children_[next_idx] = current_;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
current_ = largest;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user