mirror of
https://github.com/google/leveldb.git
synced 2025-01-27 06:30:07 +08:00
avoid costly string operations in Block::Iter::ParseNextKey
In current implementation, key_ is resized to 'shared' and then append with a 'non_shared' length string. These will cause key_ shrinked and then expanded. This patch fixed the problem by introducing IterKey which is just a wrapper of raw buffer. The core method is TrimAppend() which will do like string::resize() and string::append(), but with no wasted operation. Without this patch ./db_bench --benchmarks=readseq --num=$((1<<20)) --db=/tmp/db --use_existing_db=1 LevelDB: version 1.22 Date: Thu Sep 12 17:32:49 2019 CPU: 24 * Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz CPUCache: 15360 KB Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1048576 RawSize: 116.0 MB (estimated) FileSize: 66.0 MB (estimated) ------------------------------------------------ readseq : 0.266 micros/op; 416.1 MB/s With this patch ./db_bench --benchmarks=readseq --num=$((1<<20)) --db=/tmp/db --use_existing_db=1 LevelDB: version 1.22 Date: Thu Sep 12 17:33:22 2019 CPU: 24 * Intel(R) Xeon(R) CPU E5-2620 v2 @ 2.10GHz CPUCache: 15360 KB Keys: 16 bytes each Values: 100 bytes each (50 bytes after compression) Entries: 1048576 RawSize: 116.0 MB (estimated) FileSize: 66.0 MB (estimated) ------------------------------------------------ readseq : 0.248 micros/op; 446.0 MB/s Signed-off-by: Kyle Zhang <kyle@smartx.com>
This commit is contained in:
parent
21304d41f7
commit
2a35c6d429
@ -81,10 +81,61 @@ class Block::Iter : public Iterator {
|
||||
uint32_t const restarts_; // Offset of restart array (list of fixed32)
|
||||
uint32_t const num_restarts_; // Number of uint32_t entries in restart array
|
||||
|
||||
class IterKey {
|
||||
private:
|
||||
char* buf_;
|
||||
size_t size_;
|
||||
size_t capacity_;
|
||||
|
||||
public:
|
||||
IterKey()
|
||||
: buf_(nullptr),
|
||||
size_(0),
|
||||
capacity_(0) {
|
||||
}
|
||||
|
||||
IterKey(const IterKey&) = delete;
|
||||
IterKey& operator=(const IterKey&) = delete;
|
||||
|
||||
~IterKey() {
|
||||
delete []buf_;
|
||||
}
|
||||
|
||||
// Append s to its back at pos. pos must be larger than current size. If
|
||||
// buf_ is smaller than pos + len, buf_ would be expanded.
|
||||
void TrimAppend(size_t pos, const char* s, size_t len) {
|
||||
assert(pos <= size_);
|
||||
|
||||
if (buf_ == nullptr) {
|
||||
size_ = len;
|
||||
capacity_ = len;
|
||||
buf_ = new char[capacity_];
|
||||
|
||||
memcpy(buf_, s, len);
|
||||
} else {
|
||||
// expand buf_
|
||||
if ((pos + len) > capacity_) {
|
||||
capacity_ = pos + len;
|
||||
char* new_buf = new char[capacity_];
|
||||
memcpy(new_buf, buf_, pos);
|
||||
delete []buf_;
|
||||
buf_ = new_buf;
|
||||
}
|
||||
|
||||
size_ = pos + len;
|
||||
memcpy(buf_ + pos, s, len);
|
||||
}
|
||||
}
|
||||
|
||||
void clear() { size_ = 0; }
|
||||
const char* data() const { return buf_; }
|
||||
size_t size() const { return size_; }
|
||||
};
|
||||
|
||||
// current_ is offset in data_ of current entry. >= restarts_ if !Valid
|
||||
uint32_t current_;
|
||||
uint32_t restart_index_; // Index of restart block in which current_ falls
|
||||
std::string key_;
|
||||
IterKey key_;
|
||||
Slice value_;
|
||||
Status status_;
|
||||
|
||||
@ -128,7 +179,7 @@ class Block::Iter : public Iterator {
|
||||
Status status() const override { return status_; }
|
||||
Slice key() const override {
|
||||
assert(Valid());
|
||||
return key_;
|
||||
return Slice(key_.data(), key_.size());
|
||||
}
|
||||
Slice value() const override {
|
||||
assert(Valid());
|
||||
@ -195,7 +246,7 @@ class Block::Iter : public Iterator {
|
||||
if (!ParseNextKey()) {
|
||||
return;
|
||||
}
|
||||
if (Compare(key_, target) >= 0) {
|
||||
if (Compare(Slice(key_.data(), key_.size()), target) >= 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -240,8 +291,7 @@ class Block::Iter : public Iterator {
|
||||
CorruptionError();
|
||||
return false;
|
||||
} else {
|
||||
key_.resize(shared);
|
||||
key_.append(p, non_shared);
|
||||
key_.TrimAppend(shared, p, non_shared);
|
||||
value_ = Slice(p + non_shared, value_length);
|
||||
while (restart_index_ + 1 < num_restarts_ &&
|
||||
GetRestartPoint(restart_index_ + 1) < current_) {
|
||||
|
Loading…
Reference in New Issue
Block a user