avoid costly string operations in Block::Iter::ParseNextKey

In current implementation, key_ is resized to 'shared' and then append
with a 'non_shared' length string. These will cause key_ shrinked and
then expanded.

This patch fixed the problem by introducing IterKey which is just a
wrapper of raw buffer. The core method is TrimAppend() which will do
like string::resize() and string::append(), but with no wasted
operation.

Without this patch

./db_bench --benchmarks=readseq --num=$((1<<20)) --db=/tmp/db --use_existing_db=1
LevelDB:    version 1.22
Date:       Thu Sep 12 17:32:49 2019
CPU:        24 * Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz
CPUCache:   15360 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1048576
RawSize:    116.0 MB (estimated)
FileSize:   66.0 MB (estimated)
------------------------------------------------
readseq      :       0.266 micros/op;  416.1 MB/s

With this patch

./db_bench --benchmarks=readseq --num=$((1<<20)) --db=/tmp/db --use_existing_db=1
LevelDB:    version 1.22
Date:       Thu Sep 12 17:33:22 2019
CPU:        24 * Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz
CPUCache:   15360 KB
Keys:       16 bytes each
Values:     100 bytes each (50 bytes after compression)
Entries:    1048576
RawSize:    116.0 MB (estimated)
FileSize:   66.0 MB (estimated)
------------------------------------------------
readseq      :       0.248 micros/op;  446.0 MB/s

Signed-off-by: Kyle Zhang <kyle@smartx.com>
This commit is contained in:
Kyle Zhang 2019-09-11 19:03:35 +08:00
parent 21304d41f7
commit 2a35c6d429

View File

@ -81,10 +81,61 @@ class Block::Iter : public Iterator {
uint32_t const restarts_; // Offset of restart array (list of fixed32)
uint32_t const num_restarts_; // Number of uint32_t entries in restart array
class IterKey {
private:
char* buf_;
size_t size_;
size_t capacity_;
public:
IterKey()
: buf_(nullptr),
size_(0),
capacity_(0) {
}
IterKey(const IterKey&) = delete;
IterKey& operator=(const IterKey&) = delete;
~IterKey() {
delete []buf_;
}
// Append s to its back at pos. pos must be larger than current size. If
// buf_ is smaller than pos + len, buf_ would be expanded.
void TrimAppend(size_t pos, const char* s, size_t len) {
assert(pos <= size_);
if (buf_ == nullptr) {
size_ = len;
capacity_ = len;
buf_ = new char[capacity_];
memcpy(buf_, s, len);
} else {
// expand buf_
if ((pos + len) > capacity_) {
capacity_ = pos + len;
char* new_buf = new char[capacity_];
memcpy(new_buf, buf_, pos);
delete []buf_;
buf_ = new_buf;
}
size_ = pos + len;
memcpy(buf_ + pos, s, len);
}
}
void clear() { size_ = 0; }
const char* data() const { return buf_; }
size_t size() const { return size_; }
};
// current_ is offset in data_ of current entry. >= restarts_ if !Valid
uint32_t current_;
uint32_t restart_index_; // Index of restart block in which current_ falls
std::string key_;
IterKey key_;
Slice value_;
Status status_;
@ -128,7 +179,7 @@ class Block::Iter : public Iterator {
Status status() const override { return status_; }
Slice key() const override {
assert(Valid());
return key_;
return Slice(key_.data(), key_.size());
}
Slice value() const override {
assert(Valid());
@ -195,7 +246,7 @@ class Block::Iter : public Iterator {
if (!ParseNextKey()) {
return;
}
if (Compare(key_, target) >= 0) {
if (Compare(Slice(key_.data(), key_.size()), target) >= 0) {
return;
}
}
@ -240,8 +291,7 @@ class Block::Iter : public Iterator {
CorruptionError();
return false;
} else {
key_.resize(shared);
key_.append(p, non_shared);
key_.TrimAppend(shared, p, non_shared);
value_ = Slice(p + non_shared, value_length);
while (restart_index_ + 1 < num_restarts_ &&
GetRestartPoint(restart_index_ + 1) < current_) {