mirror of
https://github.com/google/leveldb.git
synced 2024-12-26 03:50:10 +08:00
reverting disastrous MOE commit, returning to r21
git-svn-id: https://leveldb.googlecode.com/svn/trunk@23 62dab493-f737-651d-591e-8d6aee1b9529
This commit is contained in:
parent
b743906eea
commit
69c6d38342
@ -27,6 +27,7 @@ LIBOBJECTS = \
|
||||
./db/version_set.o \
|
||||
./db/write_batch.o \
|
||||
./port/port_posix.o \
|
||||
./port/sha1_portable.o \
|
||||
./table/block.o \
|
||||
./table/block_builder.o \
|
||||
./table/format.o \
|
||||
@ -62,6 +63,7 @@ TESTS = \
|
||||
env_test \
|
||||
filename_test \
|
||||
log_test \
|
||||
sha1_test \
|
||||
skiplist_test \
|
||||
table_test \
|
||||
version_edit_test \
|
||||
@ -113,6 +115,9 @@ log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CC) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
||||
|
||||
sha1_test: port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CC) $(LDFLAGS) port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
||||
|
||||
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CC) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@
|
||||
|
@ -2,10 +2,10 @@ leveldb: A key-value store
|
||||
Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
|
||||
|
||||
The code under this directory implements a system for maintaining a
|
||||
persistent key/value store.
|
||||
persistent key/value store.
|
||||
|
||||
See doc/index.html for more explanation.
|
||||
See doc/impl.html for a brief overview of the implementation.
|
||||
See doc/db_layout.txt for a brief overview of the implementation.
|
||||
|
||||
The public interface is in include/*.h. Callers should not include or
|
||||
rely on the details of any other header files in this package. Those
|
@ -8,7 +8,7 @@ db
|
||||
object stores, etc. can be done in the background anyway, so
|
||||
probably not that important.
|
||||
|
||||
api changes:
|
||||
- Make it wrappable
|
||||
api changes?
|
||||
- Efficient large value reading and writing
|
||||
|
||||
Faster Get implementation
|
@ -38,6 +38,15 @@ Status BuildTable(const std::string& dbname,
|
||||
for (; iter->Valid(); iter->Next()) {
|
||||
Slice key = iter->key();
|
||||
meta->largest.DecodeFrom(key);
|
||||
if (ExtractValueType(key) == kTypeLargeValueRef) {
|
||||
if (iter->value().size() != LargeValueRef::ByteSize()) {
|
||||
s = Status::Corruption("invalid indirect reference hash value (L0)");
|
||||
break;
|
||||
}
|
||||
edit->AddLargeValueRef(LargeValueRef::FromRef(iter->value()),
|
||||
meta->number,
|
||||
iter->key());
|
||||
}
|
||||
builder->Add(key, iter->value());
|
||||
}
|
||||
|
@ -20,9 +20,9 @@ class VersionEdit;
|
||||
// Build a Table file from the contents of *iter. The generated file
|
||||
// will be named according to meta->number. On success, the rest of
|
||||
// *meta will be filled with metadata about the generated table, and
|
||||
// the file information will be added to *edit. If no data is present
|
||||
// in *iter, meta->file_size will be set to zero, and no Table file
|
||||
// will be produced.
|
||||
// large value refs and the added file information will be added to
|
||||
// *edit. If no data is present in *iter, meta->file_size will be set
|
||||
// to zero, and no Table file will be produced.
|
||||
extern Status BuildTable(const std::string& dbname,
|
||||
Env* env,
|
||||
const Options& options,
|
@ -121,10 +121,11 @@ class CorruptionTest {
|
||||
std::vector<std::string> filenames;
|
||||
ASSERT_OK(env_.GetChildren(dbname_, &filenames));
|
||||
uint64_t number;
|
||||
LargeValueRef large_ref;
|
||||
FileType type;
|
||||
std::vector<std::string> candidates;
|
||||
for (int i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type) &&
|
||||
if (ParseFileName(filenames[i], &number, &large_ref, &type) &&
|
||||
type == filetype) {
|
||||
candidates.push_back(dbname_ + "/" + filenames[i]);
|
||||
}
|
||||
@ -275,6 +276,29 @@ TEST(CorruptionTest, SequenceNumberRecovery) {
|
||||
ASSERT_EQ("v6", v);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, LargeValueRecovery) {
|
||||
Options options;
|
||||
options.large_value_threshold = 10000;
|
||||
Reopen(&options);
|
||||
|
||||
Random rnd(301);
|
||||
std::string big;
|
||||
ASSERT_OK(db_->Put(WriteOptions(),
|
||||
"foo", test::RandomString(&rnd, 100000, &big)));
|
||||
std::string v;
|
||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||
ASSERT_EQ(big, v);
|
||||
|
||||
RepairDB();
|
||||
Reopen();
|
||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||
ASSERT_EQ(big, v);
|
||||
|
||||
Reopen();
|
||||
ASSERT_OK(db_->Get(ReadOptions(), "foo", &v));
|
||||
ASSERT_EQ(big, v);
|
||||
}
|
||||
|
||||
TEST(CorruptionTest, CorruptedDescriptor) {
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
|
||||
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
|
@ -28,6 +28,7 @@
|
||||
// readreverse -- read N values in reverse order
|
||||
// readrandom -- read N values in random order
|
||||
// crc32c -- repeated crc32c of 4K of data
|
||||
// sha1 -- repeated SHA1 computation over 4K of data
|
||||
// Meta operations:
|
||||
// compact -- Compact the entire DB
|
||||
// stats -- Print DB stats
|
||||
@ -47,6 +48,7 @@ static const char* FLAGS_benchmarks =
|
||||
"readreverse,"
|
||||
"fill100K,"
|
||||
"crc32c,"
|
||||
"sha1,"
|
||||
"snappycomp,"
|
||||
"snappyuncomp,"
|
||||
;
|
||||
@ -364,6 +366,8 @@ class Benchmark {
|
||||
Compact();
|
||||
} else if (name == Slice("crc32c")) {
|
||||
Crc32c(4096, "(4K per op)");
|
||||
} else if (name == Slice("sha1")) {
|
||||
SHA1(4096, "(4K per op)");
|
||||
} else if (name == Slice("snappycomp")) {
|
||||
SnappyCompress();
|
||||
} else if (name == Slice("snappyuncomp")) {
|
||||
@ -402,6 +406,24 @@ class Benchmark {
|
||||
message_ = label;
|
||||
}
|
||||
|
||||
void SHA1(int size, const char* label) {
|
||||
// SHA1 about 100MB of data total
|
||||
std::string data(size, 'x');
|
||||
int64_t bytes = 0;
|
||||
char sha1[20];
|
||||
while (bytes < 100 * 1048576) {
|
||||
port::SHA1_Hash(data.data(), size, sha1);
|
||||
FinishedSingleOp();
|
||||
bytes += size;
|
||||
}
|
||||
|
||||
// Print so result is not dead
|
||||
fprintf(stderr, "... sha1=%02x...\r", static_cast<unsigned int>(sha1[0]));
|
||||
|
||||
bytes_ = bytes;
|
||||
message_ = label;
|
||||
}
|
||||
|
||||
void SnappyCompress() {
|
||||
Slice input = gen_.Generate(Options().block_size);
|
||||
int64_t bytes = 0;
|
@ -81,8 +81,8 @@ class NullWritableFile : public WritableFile {
|
||||
// Fix user-supplied options to be reasonable
|
||||
template <class T,class V>
|
||||
static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
||||
if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
|
||||
if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
|
||||
if (*ptr > maxvalue) *ptr = maxvalue;
|
||||
if (*ptr < minvalue) *ptr = minvalue;
|
||||
}
|
||||
Options SanitizeOptions(const std::string& dbname,
|
||||
const InternalKeyComparator* icmp,
|
||||
@ -91,6 +91,7 @@ Options SanitizeOptions(const std::string& dbname,
|
||||
result.comparator = icmp;
|
||||
ClipToRange(&result.max_open_files, 20, 50000);
|
||||
ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
|
||||
ClipToRange(&result.large_value_threshold, 16<<10, 1<<30);
|
||||
ClipToRange(&result.block_size, 1<<10, 4<<20);
|
||||
if (result.info_log == NULL) {
|
||||
// Open a log file in the same directory as the db
|
||||
@ -212,12 +213,15 @@ void DBImpl::DeleteObsoleteFiles() {
|
||||
std::set<uint64_t> live = pending_outputs_;
|
||||
versions_->AddLiveFiles(&live);
|
||||
|
||||
versions_->CleanupLargeValueRefs(live);
|
||||
|
||||
std::vector<std::string> filenames;
|
||||
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
||||
uint64_t number;
|
||||
LargeValueRef large_ref;
|
||||
FileType type;
|
||||
for (size_t i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type)) {
|
||||
for (int i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
||||
bool keep = true;
|
||||
switch (type) {
|
||||
case kLogFile:
|
||||
@ -237,6 +241,9 @@ void DBImpl::DeleteObsoleteFiles() {
|
||||
// be recorded in pending_outputs_, which is inserted into "live"
|
||||
keep = (live.find(number) != live.end());
|
||||
break;
|
||||
case kLargeValueFile:
|
||||
keep = versions_->LargeValueIsLive(large_ref);
|
||||
break;
|
||||
case kCurrentFile:
|
||||
case kDBLockFile:
|
||||
case kInfoLogFile:
|
||||
@ -592,7 +599,7 @@ void DBImpl::CleanupCompaction(CompactionState* compact) {
|
||||
assert(compact->outfile == NULL);
|
||||
}
|
||||
delete compact->outfile;
|
||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||
for (int i = 0; i < compact->outputs.size(); i++) {
|
||||
const CompactionState::Output& out = compact->outputs[i];
|
||||
pending_outputs_.erase(out.number);
|
||||
}
|
||||
@ -688,7 +695,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
||||
// Add compaction outputs
|
||||
compact->compaction->AddInputDeletions(compact->compaction->edit());
|
||||
const int level = compact->compaction->level();
|
||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||
for (int i = 0; i < compact->outputs.size(); i++) {
|
||||
const CompactionState::Output& out = compact->outputs[i];
|
||||
compact->compaction->edit()->AddFile(
|
||||
level + 1,
|
||||
@ -703,7 +710,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
||||
DeleteObsoleteFiles();
|
||||
} else {
|
||||
// Discard any files we may have created during this failed compaction
|
||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||
for (int i = 0; i < compact->outputs.size(); i++) {
|
||||
env_->DeleteFile(TableFileName(dbname_, compact->outputs[i].number));
|
||||
}
|
||||
}
|
||||
@ -804,7 +811,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
||||
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
|
||||
"%d smallest_snapshot: %d",
|
||||
ikey.user_key.ToString().c_str(),
|
||||
(int)ikey.sequence, ikey.type, kTypeValue, drop,
|
||||
(int)ikey.sequence, ikey.type, kTypeLargeValueRef, drop,
|
||||
compact->compaction->IsBaseLevelForKey(ikey.user_key),
|
||||
(int)last_sequence_for_key, (int)compact->smallest_snapshot);
|
||||
#endif
|
||||
@ -821,7 +828,26 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
||||
compact->current_output()->smallest.DecodeFrom(key);
|
||||
}
|
||||
compact->current_output()->largest.DecodeFrom(key);
|
||||
compact->builder->Add(key, input->value());
|
||||
|
||||
if (ikey.type == kTypeLargeValueRef) {
|
||||
if (input->value().size() != LargeValueRef::ByteSize()) {
|
||||
if (options_.paranoid_checks) {
|
||||
status = Status::Corruption("invalid large value ref");
|
||||
break;
|
||||
} else {
|
||||
Log(env_, options_.info_log,
|
||||
"compaction found invalid large value ref");
|
||||
}
|
||||
} else {
|
||||
compact->compaction->edit()->AddLargeValueRef(
|
||||
LargeValueRef::FromRef(input->value()),
|
||||
compact->current_output()->number,
|
||||
input->key());
|
||||
compact->builder->Add(key, input->value());
|
||||
}
|
||||
} else {
|
||||
compact->builder->Add(key, input->value());
|
||||
}
|
||||
|
||||
// Close output file if it is big enough
|
||||
if (compact->builder->FileSize() >=
|
||||
@ -855,7 +881,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
||||
stats.bytes_read += compact->compaction->input(which, i)->file_size;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < compact->outputs.size(); i++) {
|
||||
for (int i = 0; i < compact->outputs.size(); i++) {
|
||||
stats.bytes_written += compact->outputs[i].file_size;
|
||||
}
|
||||
|
||||
@ -959,27 +985,40 @@ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
||||
|
||||
Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
||||
Status status;
|
||||
MutexLock l(&mutex_);
|
||||
status = MakeRoomForWrite(false); // May temporarily release lock and wait
|
||||
uint64_t last_sequence = versions_->LastSequence();
|
||||
if (status.ok()) {
|
||||
WriteBatchInternal::SetSequence(updates, last_sequence + 1);
|
||||
last_sequence += WriteBatchInternal::Count(updates);
|
||||
versions_->SetLastSequence(last_sequence);
|
||||
|
||||
// Add to log and apply to memtable
|
||||
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
|
||||
if (status.ok() && options.sync) {
|
||||
status = logfile_->Sync();
|
||||
WriteBatch* final = NULL;
|
||||
{
|
||||
MutexLock l(&mutex_);
|
||||
status = MakeRoomForWrite(false); // May temporarily release lock and wait
|
||||
|
||||
uint64_t last_sequence = versions_->LastSequence();
|
||||
if (status.ok()) {
|
||||
status = HandleLargeValues(last_sequence + 1, updates, &final);
|
||||
}
|
||||
if (status.ok()) {
|
||||
status = WriteBatchInternal::InsertInto(updates, mem_);
|
||||
WriteBatchInternal::SetSequence(final, last_sequence + 1);
|
||||
last_sequence += WriteBatchInternal::Count(final);
|
||||
versions_->SetLastSequence(last_sequence);
|
||||
|
||||
// Add to log and apply to memtable
|
||||
status = log_->AddRecord(WriteBatchInternal::Contents(final));
|
||||
if (status.ok() && options.sync) {
|
||||
status = logfile_->Sync();
|
||||
}
|
||||
if (status.ok()) {
|
||||
status = WriteBatchInternal::InsertInto(final, mem_);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.post_write_snapshot != NULL) {
|
||||
*options.post_write_snapshot =
|
||||
status.ok() ? snapshots_.New(last_sequence) : NULL;
|
||||
}
|
||||
}
|
||||
if (options.post_write_snapshot != NULL) {
|
||||
*options.post_write_snapshot =
|
||||
status.ok() ? snapshots_.New(last_sequence) : NULL;
|
||||
if (final != updates) {
|
||||
delete final;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1031,6 +1070,124 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
||||
return s;
|
||||
}
|
||||
|
||||
bool DBImpl::HasLargeValues(const WriteBatch& batch) const {
|
||||
if (WriteBatchInternal::ByteSize(&batch) >= options_.large_value_threshold) {
|
||||
for (WriteBatchInternal::Iterator it(batch); !it.Done(); it.Next()) {
|
||||
if (it.op() == kTypeValue &&
|
||||
it.value().size() >= options_.large_value_threshold) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Given "raw_value", determines the appropriate compression format to use
|
||||
// and stores the data that should be written to the large value file in
|
||||
// "*file_bytes", and sets "*ref" to the appropriate large value reference.
|
||||
// May use "*scratch" as backing store for "*file_bytes".
|
||||
void DBImpl::MaybeCompressLargeValue(
|
||||
const Slice& raw_value,
|
||||
Slice* file_bytes,
|
||||
std::string* scratch,
|
||||
LargeValueRef* ref) {
|
||||
switch (options_.compression) {
|
||||
case kSnappyCompression: {
|
||||
if (port::Snappy_Compress(raw_value.data(), raw_value.size(), scratch) &&
|
||||
(scratch->size() < (raw_value.size() / 8) * 7)) {
|
||||
*file_bytes = *scratch;
|
||||
*ref = LargeValueRef::Make(raw_value, kSnappyCompression);
|
||||
return;
|
||||
}
|
||||
|
||||
// Less than 12.5% compression: just leave as uncompressed data
|
||||
break;
|
||||
}
|
||||
case kNoCompression:
|
||||
// Use default code outside of switch
|
||||
break;
|
||||
}
|
||||
// Store as uncompressed data
|
||||
*file_bytes = raw_value;
|
||||
*ref = LargeValueRef::Make(raw_value, kNoCompression);
|
||||
}
|
||||
|
||||
Status DBImpl::HandleLargeValues(SequenceNumber assigned_seq,
|
||||
WriteBatch* updates,
|
||||
WriteBatch** final) {
|
||||
if (!HasLargeValues(*updates)) {
|
||||
// Fast path: no large values found
|
||||
*final = updates;
|
||||
} else {
|
||||
// Copy *updates to a new WriteBatch, replacing the references to
|
||||
*final = new WriteBatch;
|
||||
SequenceNumber seq = assigned_seq;
|
||||
for (WriteBatchInternal::Iterator it(*updates); !it.Done(); it.Next()) {
|
||||
switch (it.op()) {
|
||||
case kTypeValue:
|
||||
if (it.value().size() < options_.large_value_threshold) {
|
||||
(*final)->Put(it.key(), it.value());
|
||||
} else {
|
||||
std::string scratch;
|
||||
Slice file_bytes;
|
||||
LargeValueRef large_ref;
|
||||
MaybeCompressLargeValue(
|
||||
it.value(), &file_bytes, &scratch, &large_ref);
|
||||
InternalKey ikey(it.key(), seq, kTypeLargeValueRef);
|
||||
if (versions_->RegisterLargeValueRef(
|
||||
large_ref, versions_->LogNumber(), ikey)) {
|
||||
// TODO(opt): avoid holding the lock here (but be careful about
|
||||
// another thread doing a Write and switching logs or
|
||||
// having us get a different "assigned_seq" value).
|
||||
|
||||
uint64_t tmp_number = versions_->NewFileNumber();
|
||||
pending_outputs_.insert(tmp_number);
|
||||
std::string tmp = TempFileName(dbname_, tmp_number);
|
||||
WritableFile* file;
|
||||
Status s = env_->NewWritableFile(tmp, &file);
|
||||
if (!s.ok()) {
|
||||
return s; // Caller will delete *final
|
||||
}
|
||||
|
||||
file->Append(file_bytes);
|
||||
|
||||
s = file->Close();
|
||||
delete file;
|
||||
|
||||
if (s.ok()) {
|
||||
const std::string fname =
|
||||
LargeValueFileName(dbname_, large_ref);
|
||||
s = env_->RenameFile(tmp, fname);
|
||||
} else {
|
||||
Log(env_, options_.info_log, "Write large value: %s",
|
||||
s.ToString().c_str());
|
||||
}
|
||||
pending_outputs_.erase(tmp_number);
|
||||
|
||||
if (!s.ok()) {
|
||||
env_->DeleteFile(tmp); // Cleanup; intentionally ignoring error
|
||||
return s; // Caller will delete *final
|
||||
}
|
||||
}
|
||||
|
||||
// Put an indirect reference in the write batch in place
|
||||
// of large value
|
||||
WriteBatchInternal::PutLargeValueRef(*final, it.key(), large_ref);
|
||||
}
|
||||
break;
|
||||
case kTypeLargeValueRef:
|
||||
return Status::Corruption("Corrupted write batch");
|
||||
break;
|
||||
case kTypeDeletion:
|
||||
(*final)->Delete(it.key());
|
||||
break;
|
||||
}
|
||||
seq = seq + 1;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
value->clear();
|
||||
|
||||
@ -1048,8 +1205,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
return false;
|
||||
} else {
|
||||
char buf[100];
|
||||
snprintf(buf, sizeof(buf), "%d",
|
||||
versions_->NumLevelFiles(static_cast<int>(level)));
|
||||
snprintf(buf, sizeof(buf), "%d", versions_->NumLevelFiles(level));
|
||||
*value = buf;
|
||||
return true;
|
||||
}
|
||||
@ -1169,9 +1325,10 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
|
||||
Status result = env->LockFile(LockFileName(dbname), &lock);
|
||||
if (result.ok()) {
|
||||
uint64_t number;
|
||||
LargeValueRef large_ref;
|
||||
FileType type;
|
||||
for (size_t i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type)) {
|
||||
for (int i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
||||
Status del = env->DeleteFile(dbname + "/" + filenames[i]);
|
||||
if (result.ok() && !del.ok()) {
|
||||
result = del;
|
@ -92,6 +92,29 @@ class DBImpl : public DB {
|
||||
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
|
||||
|
||||
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
|
||||
bool HasLargeValues(const WriteBatch& batch) const;
|
||||
|
||||
// Process data in "*updates" and return a status. "assigned_seq"
|
||||
// is the sequence number assigned to the first mod in "*updates".
|
||||
// If no large values are encountered, "*final" is set to "updates".
|
||||
// If large values were encountered, registers the references of the
|
||||
// large values with the VersionSet, writes the large values to
|
||||
// files (if appropriate), and allocates a new WriteBatch with the
|
||||
// large values replaced with indirect references and stores a
|
||||
// pointer to the new WriteBatch in *final. If *final != updates on
|
||||
// return, then the client should delete *final when no longer
|
||||
// needed. Returns OK on success, and an appropriate error
|
||||
// otherwise.
|
||||
Status HandleLargeValues(SequenceNumber assigned_seq,
|
||||
WriteBatch* updates,
|
||||
WriteBatch** final);
|
||||
|
||||
// Helper routine for HandleLargeValues
|
||||
void MaybeCompressLargeValue(
|
||||
const Slice& raw_value,
|
||||
Slice* file_bytes,
|
||||
std::string* scratch,
|
||||
LargeValueRef* ref);
|
||||
|
||||
struct CompactionState;
|
||||
|
@ -53,11 +53,13 @@ class DBIter: public Iterator {
|
||||
user_comparator_(cmp),
|
||||
iter_(iter),
|
||||
sequence_(s),
|
||||
large_(NULL),
|
||||
direction_(kForward),
|
||||
valid_(false) {
|
||||
}
|
||||
virtual ~DBIter() {
|
||||
delete iter_;
|
||||
delete large_;
|
||||
}
|
||||
virtual bool Valid() const { return valid_; }
|
||||
virtual Slice key() const {
|
||||
@ -66,10 +68,20 @@ class DBIter: public Iterator {
|
||||
}
|
||||
virtual Slice value() const {
|
||||
assert(valid_);
|
||||
return (direction_ == kForward) ? iter_->value() : saved_value_;
|
||||
Slice raw_value = (direction_ == kForward) ? iter_->value() : saved_value_;
|
||||
if (large_ == NULL) {
|
||||
return raw_value;
|
||||
} else {
|
||||
MutexLock l(&large_->mutex);
|
||||
if (!large_->produced) {
|
||||
ReadIndirectValue(raw_value);
|
||||
}
|
||||
return large_->value;
|
||||
}
|
||||
}
|
||||
virtual Status status() const {
|
||||
if (status_.ok()) {
|
||||
if (large_ != NULL && !large_->status.ok()) return large_->status;
|
||||
return iter_->status();
|
||||
} else {
|
||||
return status_;
|
||||
@ -83,14 +95,29 @@ class DBIter: public Iterator {
|
||||
virtual void SeekToLast();
|
||||
|
||||
private:
|
||||
struct Large {
|
||||
port::Mutex mutex;
|
||||
std::string value;
|
||||
bool produced;
|
||||
Status status;
|
||||
};
|
||||
|
||||
void FindNextUserEntry(bool skipping, std::string* skip);
|
||||
void FindPrevUserEntry();
|
||||
bool ParseKey(ParsedInternalKey* key);
|
||||
void ReadIndirectValue(Slice ref) const;
|
||||
|
||||
inline void SaveKey(const Slice& k, std::string* dst) {
|
||||
dst->assign(k.data(), k.size());
|
||||
}
|
||||
|
||||
inline void ForgetLargeValue() {
|
||||
if (large_ != NULL) {
|
||||
delete large_;
|
||||
large_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
inline void ClearSavedValue() {
|
||||
if (saved_value_.capacity() > 1048576) {
|
||||
std::string empty;
|
||||
@ -109,6 +136,7 @@ class DBIter: public Iterator {
|
||||
Status status_;
|
||||
std::string saved_key_; // == current key when direction_==kReverse
|
||||
std::string saved_value_; // == current raw value when direction_==kReverse
|
||||
Large* large_; // Non-NULL if value is an indirect reference
|
||||
Direction direction_;
|
||||
bool valid_;
|
||||
|
||||
@ -128,6 +156,7 @@ inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
|
||||
|
||||
void DBIter::Next() {
|
||||
assert(valid_);
|
||||
ForgetLargeValue();
|
||||
|
||||
if (direction_ == kReverse) { // Switch directions?
|
||||
direction_ = kForward;
|
||||
@ -156,6 +185,7 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
||||
// Loop until we hit an acceptable entry to yield
|
||||
assert(iter_->Valid());
|
||||
assert(direction_ == kForward);
|
||||
assert(large_ == NULL);
|
||||
do {
|
||||
ParsedInternalKey ikey;
|
||||
if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
|
||||
@ -167,12 +197,17 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
||||
skipping = true;
|
||||
break;
|
||||
case kTypeValue:
|
||||
case kTypeLargeValueRef:
|
||||
if (skipping &&
|
||||
user_comparator_->Compare(ikey.user_key, *skip) <= 0) {
|
||||
// Entry hidden
|
||||
} else {
|
||||
valid_ = true;
|
||||
saved_key_.clear();
|
||||
if (ikey.type == kTypeLargeValueRef) {
|
||||
large_ = new Large;
|
||||
large_->produced = false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
break;
|
||||
@ -186,6 +221,7 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
|
||||
|
||||
void DBIter::Prev() {
|
||||
assert(valid_);
|
||||
ForgetLargeValue();
|
||||
|
||||
if (direction_ == kForward) { // Switch directions?
|
||||
// iter_ is pointing at the current entry. Scan backwards until
|
||||
@ -213,6 +249,7 @@ void DBIter::Prev() {
|
||||
|
||||
void DBIter::FindPrevUserEntry() {
|
||||
assert(direction_ == kReverse);
|
||||
assert(large_ == NULL);
|
||||
|
||||
ValueType value_type = kTypeDeletion;
|
||||
if (iter_->Valid()) {
|
||||
@ -249,11 +286,16 @@ void DBIter::FindPrevUserEntry() {
|
||||
direction_ = kForward;
|
||||
} else {
|
||||
valid_ = true;
|
||||
if (value_type == kTypeLargeValueRef) {
|
||||
large_ = new Large;
|
||||
large_->produced = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DBIter::Seek(const Slice& target) {
|
||||
direction_ = kForward;
|
||||
ForgetLargeValue();
|
||||
ClearSavedValue();
|
||||
saved_key_.clear();
|
||||
AppendInternalKey(
|
||||
@ -268,6 +310,7 @@ void DBIter::Seek(const Slice& target) {
|
||||
|
||||
void DBIter::SeekToFirst() {
|
||||
direction_ = kForward;
|
||||
ForgetLargeValue();
|
||||
ClearSavedValue();
|
||||
iter_->SeekToFirst();
|
||||
if (iter_->Valid()) {
|
||||
@ -279,11 +322,67 @@ void DBIter::SeekToFirst() {
|
||||
|
||||
void DBIter::SeekToLast() {
|
||||
direction_ = kReverse;
|
||||
ForgetLargeValue();
|
||||
ClearSavedValue();
|
||||
iter_->SeekToLast();
|
||||
FindPrevUserEntry();
|
||||
}
|
||||
|
||||
void DBIter::ReadIndirectValue(Slice ref) const {
|
||||
assert(!large_->produced);
|
||||
large_->produced = true;
|
||||
LargeValueRef large_ref;
|
||||
if (ref.size() != LargeValueRef::ByteSize()) {
|
||||
large_->status = Status::Corruption("malformed large value reference");
|
||||
return;
|
||||
}
|
||||
memcpy(large_ref.data, ref.data(), LargeValueRef::ByteSize());
|
||||
std::string fname = LargeValueFileName(*dbname_, large_ref);
|
||||
RandomAccessFile* file;
|
||||
Status s = env_->NewRandomAccessFile(fname, &file);
|
||||
uint64_t file_size = 0;
|
||||
if (s.ok()) {
|
||||
s = env_->GetFileSize(fname, &file_size);
|
||||
}
|
||||
if (s.ok()) {
|
||||
uint64_t value_size = large_ref.ValueSize();
|
||||
large_->value.resize(value_size);
|
||||
Slice result;
|
||||
s = file->Read(0, file_size, &result,
|
||||
const_cast<char*>(large_->value.data()));
|
||||
if (s.ok()) {
|
||||
if (result.size() == file_size) {
|
||||
switch (large_ref.compression_type()) {
|
||||
case kNoCompression: {
|
||||
if (result.data() != large_->value.data()) {
|
||||
large_->value.assign(result.data(), result.size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kSnappyCompression: {
|
||||
std::string uncompressed;
|
||||
if (port::Snappy_Uncompress(result.data(), result.size(),
|
||||
&uncompressed) &&
|
||||
uncompressed.size() == large_ref.ValueSize()) {
|
||||
swap(uncompressed, large_->value);
|
||||
} else {
|
||||
s = Status::Corruption(
|
||||
"Unable to read entire compressed large value file");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
s = Status::Corruption("Unable to read entire large value file");
|
||||
}
|
||||
}
|
||||
delete file; // Ignore errors on closing
|
||||
}
|
||||
if (!s.ok()) {
|
||||
large_->value.clear();
|
||||
large_->status = s;
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
Iterator* NewDBIterator(
|
@ -119,6 +119,9 @@ class DBTest {
|
||||
case kTypeValue:
|
||||
result += iter->value().ToString();
|
||||
break;
|
||||
case kTypeLargeValueRef:
|
||||
result += "LARGEVALUE(" + EscapeString(iter->value()) + ")";
|
||||
break;
|
||||
case kTypeDeletion:
|
||||
result += "DEL";
|
||||
break;
|
||||
@ -150,6 +153,26 @@ class DBTest {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::set<LargeValueRef> LargeValueFiles() const {
|
||||
// Return the set of large value files that exist in the database
|
||||
std::vector<std::string> filenames;
|
||||
env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
|
||||
uint64_t number;
|
||||
LargeValueRef large_ref;
|
||||
FileType type;
|
||||
std::set<LargeValueRef> live;
|
||||
for (int i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &large_ref, &type) &&
|
||||
type == kLargeValueFile) {
|
||||
fprintf(stderr, " live: %s\n",
|
||||
LargeValueRefToFilenameString(large_ref).c_str());
|
||||
live.insert(large_ref);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "Found %d live large value files\n", (int)live.size());
|
||||
return live;
|
||||
}
|
||||
|
||||
void Compact(const Slice& start, const Slice& limit) {
|
||||
dbfull()->TEST_CompactMemTable();
|
||||
int max_level_with_files = 1;
|
||||
@ -448,6 +471,7 @@ TEST(DBTest, MinorCompactionsHappen) {
|
||||
TEST(DBTest, RecoverWithLargeLog) {
|
||||
{
|
||||
Options options;
|
||||
options.large_value_threshold = 1048576;
|
||||
Reopen(&options);
|
||||
ASSERT_OK(Put("big1", std::string(200000, '1')));
|
||||
ASSERT_OK(Put("big2", std::string(200000, '2')));
|
||||
@ -460,6 +484,7 @@ TEST(DBTest, RecoverWithLargeLog) {
|
||||
// we flush table files in the middle of a large log file.
|
||||
Options options;
|
||||
options.write_buffer_size = 100000;
|
||||
options.large_value_threshold = 1048576;
|
||||
Reopen(&options);
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 3);
|
||||
ASSERT_EQ(std::string(200000, '1'), Get("big1"));
|
||||
@ -472,6 +497,7 @@ TEST(DBTest, RecoverWithLargeLog) {
|
||||
TEST(DBTest, CompactionsGenerateMultipleFiles) {
|
||||
Options options;
|
||||
options.write_buffer_size = 100000000; // Large write buffer
|
||||
options.large_value_threshold = 1048576;
|
||||
Reopen(&options);
|
||||
|
||||
Random rnd(301);
|
||||
@ -544,53 +570,65 @@ static bool Between(uint64_t val, uint64_t low, uint64_t high) {
|
||||
}
|
||||
|
||||
TEST(DBTest, ApproximateSizes) {
|
||||
Options options;
|
||||
options.write_buffer_size = 100000000; // Large write buffer
|
||||
options.compression = kNoCompression;
|
||||
DestroyAndReopen();
|
||||
for (int test = 0; test < 2; test++) {
|
||||
// test==0: default large_value_threshold
|
||||
// test==1: 1 MB large_value_threshold
|
||||
Options options;
|
||||
options.large_value_threshold = (test == 0) ? 65536 : 1048576;
|
||||
options.write_buffer_size = 100000000; // Large write buffer
|
||||
options.compression = kNoCompression;
|
||||
DestroyAndReopen();
|
||||
|
||||
ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
|
||||
Reopen(&options);
|
||||
ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
|
||||
|
||||
// Write 8MB (80 values, each 100K)
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||
const int N = 80;
|
||||
Random rnd(301);
|
||||
for (int i = 0; i < N; i++) {
|
||||
ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000)));
|
||||
}
|
||||
|
||||
// 0 because GetApproximateSizes() does not account for memtable space
|
||||
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
|
||||
|
||||
// Check sizes across recovery by reopening a few times
|
||||
for (int run = 0; run < 3; run++) {
|
||||
ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
|
||||
Reopen(&options);
|
||||
ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
|
||||
|
||||
for (int compact_start = 0; compact_start < N; compact_start += 10) {
|
||||
for (int i = 0; i < N; i += 10) {
|
||||
ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000));
|
||||
ASSERT_TRUE(Between(Size("", Key(i)+".suffix"),
|
||||
100000 * (i+1), 100000 * (i+1) + 10000));
|
||||
ASSERT_TRUE(Between(Size(Key(i), Key(i+10)),
|
||||
100000 * 10, 100000 * 10 + 10000));
|
||||
}
|
||||
ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
|
||||
ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
|
||||
|
||||
dbfull()->TEST_CompactRange(0,
|
||||
Key(compact_start),
|
||||
Key(compact_start + 9));
|
||||
// Write 8MB (80 values, each 100K)
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||
const int N = 80;
|
||||
Random rnd(301);
|
||||
for (int i = 0; i < N; i++) {
|
||||
ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000)));
|
||||
}
|
||||
if (test == 1) {
|
||||
// 0 because GetApproximateSizes() does not account for memtable space for
|
||||
// non-large values
|
||||
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
|
||||
} else {
|
||||
ASSERT_TRUE(Between(Size("", Key(50)), 100000*50, 100000*50 + 10000));
|
||||
ASSERT_TRUE(Between(Size(Key(20), Key(30)),
|
||||
100000*10, 100000*10 + 10000));
|
||||
}
|
||||
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||
ASSERT_GT(NumTableFilesAtLevel(1), 0);
|
||||
// Check sizes across recovery by reopening a few times
|
||||
for (int run = 0; run < 3; run++) {
|
||||
Reopen(&options);
|
||||
|
||||
for (int compact_start = 0; compact_start < N; compact_start += 10) {
|
||||
for (int i = 0; i < N; i += 10) {
|
||||
ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000));
|
||||
ASSERT_TRUE(Between(Size("", Key(i)+".suffix"),
|
||||
100000 * (i+1), 100000 * (i+1) + 10000));
|
||||
ASSERT_TRUE(Between(Size(Key(i), Key(i+10)),
|
||||
100000 * 10, 100000 * 10 + 10000));
|
||||
}
|
||||
ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
|
||||
ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
|
||||
|
||||
dbfull()->TEST_CompactRange(0,
|
||||
Key(compact_start),
|
||||
Key(compact_start + 9));
|
||||
}
|
||||
|
||||
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
|
||||
ASSERT_GT(NumTableFilesAtLevel(1), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
|
||||
Options options;
|
||||
options.large_value_threshold = 65536;
|
||||
options.compression = kNoCompression;
|
||||
Reopen();
|
||||
|
||||
@ -763,6 +801,146 @@ TEST(DBTest, ComparatorCheck) {
|
||||
<< s.ToString();
|
||||
}
|
||||
|
||||
static bool LargeValuesOK(DBTest* db,
|
||||
const std::set<LargeValueRef>& expected) {
|
||||
std::set<LargeValueRef> actual = db->LargeValueFiles();
|
||||
if (actual.size() != expected.size()) {
|
||||
fprintf(stderr, "Sets differ in size: %d vs %d\n",
|
||||
(int)actual.size(), (int)expected.size());
|
||||
return false;
|
||||
}
|
||||
for (std::set<LargeValueRef>::const_iterator it = expected.begin();
|
||||
it != expected.end();
|
||||
++it) {
|
||||
if (actual.count(*it) != 1) {
|
||||
fprintf(stderr, " key '%s' not found in actual set\n",
|
||||
LargeValueRefToFilenameString(*it).c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST(DBTest, LargeValues1) {
|
||||
Options options;
|
||||
options.large_value_threshold = 10000;
|
||||
Reopen(&options);
|
||||
|
||||
Random rnd(301);
|
||||
|
||||
std::string big1;
|
||||
test::CompressibleString(&rnd, 1.0, 100000, &big1); // Not compressible
|
||||
std::set<LargeValueRef> expected;
|
||||
|
||||
ASSERT_OK(Put("big1", big1));
|
||||
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
ASSERT_OK(Delete("big1"));
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||
// No handling of deletion markers on memtable compactions, so big1 remains
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
dbfull()->TEST_CompactRange(0, "", "z");
|
||||
expected.erase(LargeValueRef::Make(big1, kNoCompression));
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
}
|
||||
|
||||
static bool SnappyCompressionSupported() {
|
||||
std::string out;
|
||||
Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
|
||||
return port::Snappy_Compress(in.data(), in.size(), &out);
|
||||
}
|
||||
|
||||
TEST(DBTest, LargeValues2) {
|
||||
Options options;
|
||||
options.large_value_threshold = 10000;
|
||||
Reopen(&options);
|
||||
|
||||
Random rnd(301);
|
||||
|
||||
std::string big1, big2;
|
||||
test::CompressibleString(&rnd, 1.0, 20000, &big1); // Not compressible
|
||||
test::CompressibleString(&rnd, 0.6, 40000, &big2); // Compressible
|
||||
std::set<LargeValueRef> expected;
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
ASSERT_OK(Put("big1", big1));
|
||||
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
||||
ASSERT_EQ(big1, Get("big1"));
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
ASSERT_OK(Put("big2", big2));
|
||||
ASSERT_EQ(big2, Get("big2"));
|
||||
if (SnappyCompressionSupported()) {
|
||||
expected.insert(LargeValueRef::Make(big2, kSnappyCompression));
|
||||
} else {
|
||||
expected.insert(LargeValueRef::Make(big2, kNoCompression));
|
||||
}
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
dbfull()->TEST_CompactRange(0, "", "z");
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
ASSERT_OK(Put("big2", big2));
|
||||
ASSERT_OK(Put("big2_b", big2));
|
||||
ASSERT_EQ(big1, Get("big1"));
|
||||
ASSERT_EQ(big2, Get("big2"));
|
||||
ASSERT_EQ(big2, Get("big2_b"));
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
ASSERT_OK(Delete("big1"));
|
||||
ASSERT_EQ("NOT_FOUND", Get("big1"));
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
dbfull()->TEST_CompactRange(0, "", "z");
|
||||
expected.erase(LargeValueRef::Make(big1, kNoCompression));
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
dbfull()->TEST_CompactRange(1, "", "z");
|
||||
|
||||
ASSERT_OK(Delete("big2"));
|
||||
ASSERT_EQ("NOT_FOUND", Get("big2"));
|
||||
ASSERT_EQ(big2, Get("big2_b"));
|
||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
dbfull()->TEST_CompactRange(0, "", "z");
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
|
||||
// Make sure the large value refs survive a reload and compactions after
|
||||
// the reload.
|
||||
Reopen();
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
ASSERT_OK(Put("foo", "bar"));
|
||||
ASSERT_OK(dbfull()->TEST_CompactMemTable());
|
||||
dbfull()->TEST_CompactRange(0, "", "z");
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
}
|
||||
|
||||
TEST(DBTest, LargeValues3) {
|
||||
// Make sure we don't compress values if
|
||||
Options options;
|
||||
options.large_value_threshold = 10000;
|
||||
options.compression = kNoCompression;
|
||||
Reopen(&options);
|
||||
|
||||
Random rnd(301);
|
||||
|
||||
std::string big1 = std::string(100000, 'x'); // Very compressible
|
||||
std::set<LargeValueRef> expected;
|
||||
|
||||
ASSERT_OK(Put("big1", big1));
|
||||
ASSERT_EQ(big1, Get("big1"));
|
||||
expected.insert(LargeValueRef::Make(big1, kNoCompression));
|
||||
ASSERT_TRUE(LargeValuesOK(this, expected));
|
||||
}
|
||||
|
||||
|
||||
TEST(DBTest, DBOpen_Options) {
|
||||
std::string dbname = test::TmpDir() + "/db_options_test";
|
||||
DestroyDB(dbname, Options());
|
||||
@ -847,6 +1025,9 @@ class ModelDB: public DB {
|
||||
case kTypeValue:
|
||||
map_[it.key().ToString()] = it.value().ToString();
|
||||
break;
|
||||
case kTypeLargeValueRef:
|
||||
assert(false); // Should not occur
|
||||
break;
|
||||
case kTypeDeletion:
|
||||
map_.erase(it.key().ToString());
|
||||
break;
|
@ -84,4 +84,69 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
|
||||
}
|
||||
}
|
||||
|
||||
LargeValueRef LargeValueRef::Make(const Slice& value, CompressionType ctype) {
|
||||
LargeValueRef result;
|
||||
port::SHA1_Hash(value.data(), value.size(), &result.data[0]);
|
||||
EncodeFixed64(&result.data[20], value.size());
|
||||
result.data[28] = static_cast<unsigned char>(ctype);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string LargeValueRefToFilenameString(const LargeValueRef& h) {
|
||||
assert(sizeof(h.data) == LargeValueRef::ByteSize());
|
||||
assert(sizeof(h.data) == 29); // So we can hardcode the array size of buf
|
||||
static const char tohex[] = "0123456789abcdef";
|
||||
char buf[20*2];
|
||||
for (int i = 0; i < 20; i++) {
|
||||
buf[2*i] = tohex[(h.data[i] >> 4) & 0xf];
|
||||
buf[2*i+1] = tohex[h.data[i] & 0xf];
|
||||
}
|
||||
std::string result = std::string(buf, sizeof(buf));
|
||||
result += "-";
|
||||
result += NumberToString(h.ValueSize());
|
||||
result += "-";
|
||||
result += NumberToString(static_cast<uint64_t>(h.compression_type()));
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t hexvalue(char c) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
return c - '0';
|
||||
} else if (c >= 'A' && c <= 'F') {
|
||||
return 10 + c - 'A';
|
||||
} else {
|
||||
assert(c >= 'a' && c <= 'f');
|
||||
return 10 + c - 'a';
|
||||
}
|
||||
}
|
||||
|
||||
bool FilenameStringToLargeValueRef(const Slice& s, LargeValueRef* h) {
|
||||
Slice in = s;
|
||||
if (in.size() < 40) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < 20; i++) {
|
||||
if (!isxdigit(in[i*2]) || !isxdigit(in[i*2+1])) {
|
||||
return false;
|
||||
}
|
||||
unsigned char c = (hexvalue(in[i*2])<<4) | hexvalue(in[i*2+1]);
|
||||
h->data[i] = c;
|
||||
}
|
||||
in.remove_prefix(40);
|
||||
uint64_t value_size, ctype;
|
||||
|
||||
if (ConsumeChar(&in, '-') &&
|
||||
ConsumeDecimalNumber(&in, &value_size) &&
|
||||
ConsumeChar(&in, '-') &&
|
||||
ConsumeDecimalNumber(&in, &ctype) &&
|
||||
in.empty() &&
|
||||
(ctype <= kSnappyCompression)) {
|
||||
EncodeFixed64(&h->data[20], value_size);
|
||||
h->data[28] = static_cast<unsigned char>(ctype);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -29,6 +29,7 @@ class InternalKey;
|
||||
enum ValueType {
|
||||
kTypeDeletion = 0x0,
|
||||
kTypeValue = 0x1,
|
||||
kTypeLargeValueRef = 0x2,
|
||||
};
|
||||
// kValueTypeForSeek defines the ValueType that should be passed when
|
||||
// constructing a ParsedInternalKey object for seeking to a particular
|
||||
@ -36,7 +37,7 @@ enum ValueType {
|
||||
// and the value type is embedded as the low 8 bits in the sequence
|
||||
// number in internal keys, we need to use the highest-numbered
|
||||
// ValueType, not the lowest).
|
||||
static const ValueType kValueTypeForSeek = kTypeValue;
|
||||
static const ValueType kValueTypeForSeek = kTypeLargeValueRef;
|
||||
|
||||
typedef uint64_t SequenceNumber;
|
||||
|
||||
@ -138,6 +139,54 @@ inline int InternalKeyComparator::Compare(
|
||||
return Compare(a.Encode(), b.Encode());
|
||||
}
|
||||
|
||||
// LargeValueRef is a 160-bit hash value (20 bytes), plus an 8 byte
|
||||
// uncompressed size, and a 1 byte CompressionType code. An
|
||||
// encoded form of it is embedded in the filenames of large value
|
||||
// files stored in the database, and the raw binary form is stored as
|
||||
// the iter->value() result for values of type kTypeLargeValueRef in
|
||||
// the table and log files that make up the database.
|
||||
struct LargeValueRef {
|
||||
char data[29];
|
||||
|
||||
// Initialize a large value ref for the given data
|
||||
static LargeValueRef Make(const Slice& data,
|
||||
CompressionType compression_type);
|
||||
|
||||
// Initialize a large value ref from a serialized, 29-byte reference value
|
||||
static LargeValueRef FromRef(const Slice& ref) {
|
||||
LargeValueRef result;
|
||||
assert(ref.size() == sizeof(result.data));
|
||||
memcpy(result.data, ref.data(), sizeof(result.data));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Return the number of bytes in a LargeValueRef (not the
|
||||
// number of bytes in the value referenced).
|
||||
static size_t ByteSize() { return sizeof(LargeValueRef().data); }
|
||||
|
||||
// Return the number of bytes in the value referenced by "*this".
|
||||
uint64_t ValueSize() const { return DecodeFixed64(&data[20]); }
|
||||
|
||||
CompressionType compression_type() const {
|
||||
return static_cast<CompressionType>(data[28]);
|
||||
}
|
||||
|
||||
bool operator==(const LargeValueRef& b) const {
|
||||
return memcmp(data, b.data, sizeof(data)) == 0;
|
||||
}
|
||||
bool operator<(const LargeValueRef& b) const {
|
||||
return memcmp(data, b.data, sizeof(data)) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Convert the large value ref to a human-readable string suitable
|
||||
// for embedding in a large value filename.
|
||||
extern std::string LargeValueRefToFilenameString(const LargeValueRef& h);
|
||||
|
||||
// Parse the large value filename string in "input" and store it in
|
||||
// "*h". If successful, returns true. Otherwise returns false.
|
||||
extern bool FilenameStringToLargeValueRef(const Slice& in, LargeValueRef* ref);
|
||||
|
||||
inline bool ParseInternalKey(const Slice& internal_key,
|
||||
ParsedInternalKey* result) {
|
||||
const size_t n = internal_key.size();
|
||||
@ -147,7 +196,7 @@ inline bool ParseInternalKey(const Slice& internal_key,
|
||||
result->sequence = num >> 8;
|
||||
result->type = static_cast<ValueType>(c);
|
||||
result->user_key = Slice(internal_key.data(), n - 8);
|
||||
return (c <= static_cast<unsigned char>(kTypeValue));
|
||||
return (c <= static_cast<unsigned char>(kTypeLargeValueRef));
|
||||
}
|
||||
|
||||
}
|
@ -76,6 +76,9 @@ TEST(FormatTest, InternalKeyShortSeparator) {
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 100, kTypeDeletion)));
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
Shorten(IKey("foo", 100, kTypeValue),
|
||||
IKey("foo", 100, kTypeLargeValueRef)));
|
||||
|
||||
// When user keys are misordered
|
||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||
@ -105,6 +108,18 @@ TEST(FormatTest, InternalKeyShortestSuccessor) {
|
||||
ShortSuccessor(IKey("\xff\xff", 100, kTypeValue)));
|
||||
}
|
||||
|
||||
TEST(FormatTest, SHA1) {
|
||||
// Check that we are computing the same value as sha1.
|
||||
// Note that the last two numbers are the length of the input and the
|
||||
// compression type.
|
||||
ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-0", // SHA1, uncompr
|
||||
LargeValueRefToFilenameString(
|
||||
LargeValueRef::Make("hello", kNoCompression)));
|
||||
ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-1", // SHA1, lwcompr
|
||||
LargeValueRefToFilenameString(
|
||||
LargeValueRef::Make("hello", kSnappyCompression)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
@ -30,6 +30,14 @@ std::string TableFileName(const std::string& name, uint64_t number) {
|
||||
return MakeFileName(name, number, "sst");
|
||||
}
|
||||
|
||||
std::string LargeValueFileName(const std::string& name,
|
||||
const LargeValueRef& large_ref) {
|
||||
std::string result = name + "/";
|
||||
result += LargeValueRefToFilenameString(large_ref);
|
||||
result += ".val";
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string DescriptorFileName(const std::string& dbname, uint64_t number) {
|
||||
assert(number > 0);
|
||||
char buf[100];
|
||||
@ -67,9 +75,11 @@ std::string OldInfoLogFileName(const std::string& dbname) {
|
||||
// dbname/LOG
|
||||
// dbname/LOG.old
|
||||
// dbname/MANIFEST-[0-9]+
|
||||
// dbname/[0-9a-f]{20}-[0-9]+-[0-9]+.val
|
||||
// dbname/[0-9]+.(log|sst)
|
||||
bool ParseFileName(const std::string& fname,
|
||||
uint64_t* number,
|
||||
LargeValueRef* large_ref,
|
||||
FileType* type) {
|
||||
Slice rest(fname);
|
||||
if (rest == "CURRENT") {
|
||||
@ -81,6 +91,15 @@ bool ParseFileName(const std::string& fname,
|
||||
} else if (rest == "LOG" || rest == "LOG.old") {
|
||||
*number = 0;
|
||||
*type = kInfoLogFile;
|
||||
} else if (rest.size() >= 4 &&
|
||||
Slice(rest.data() + rest.size() - 4, 4) == ".val") {
|
||||
LargeValueRef h;
|
||||
if (!FilenameStringToLargeValueRef(Slice(rest.data(), rest.size() - 4),
|
||||
&h)) {
|
||||
return false;
|
||||
}
|
||||
*large_ref = h;
|
||||
*type = kLargeValueFile;
|
||||
} else if (rest.starts_with("MANIFEST-")) {
|
||||
rest.remove_prefix(strlen("MANIFEST-"));
|
||||
uint64_t num;
|
@ -16,11 +16,13 @@
|
||||
namespace leveldb {
|
||||
|
||||
class Env;
|
||||
struct LargeValueRef;
|
||||
|
||||
enum FileType {
|
||||
kLogFile,
|
||||
kDBLockFile,
|
||||
kTableFile,
|
||||
kLargeValueFile,
|
||||
kDescriptorFile,
|
||||
kCurrentFile,
|
||||
kTempFile,
|
||||
@ -37,6 +39,12 @@ extern std::string LogFileName(const std::string& dbname, uint64_t number);
|
||||
// "dbname".
|
||||
extern std::string TableFileName(const std::string& dbname, uint64_t number);
|
||||
|
||||
// Return the name of the large value file with the specified large
|
||||
// value reference in the db named by "dbname". The result will be
|
||||
// prefixed with "dbname".
|
||||
extern std::string LargeValueFileName(const std::string& dbname,
|
||||
const LargeValueRef& large_ref);
|
||||
|
||||
// Return the name of the descriptor file for the db named by
|
||||
// "dbname" and the specified incarnation number. The result will be
|
||||
// prefixed with "dbname".
|
||||
@ -63,10 +71,14 @@ extern std::string InfoLogFileName(const std::string& dbname);
|
||||
extern std::string OldInfoLogFileName(const std::string& dbname);
|
||||
|
||||
// If filename is a leveldb file, store the type of the file in *type.
|
||||
// The number encoded in the filename is stored in *number. If the
|
||||
// filename was successfully parsed, returns true. Else return false.
|
||||
// If *type is kLargeValueFile, then the large value reference data
|
||||
// from the filename is stored in "*large_ref. For all other types of
|
||||
// files, the number encoded in the filename is stored in *number. If
|
||||
// the filename was successfully parsed, returns true. Else return
|
||||
// false.
|
||||
extern bool ParseFileName(const std::string& filename,
|
||||
uint64_t* number,
|
||||
LargeValueRef* large_ref,
|
||||
FileType* type);
|
||||
|
||||
// Make the CURRENT file point to the descriptor file with the
|
156
db/filename_test.cc
Normal file
156
db/filename_test.cc
Normal file
@ -0,0 +1,156 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db/filename.h"
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "port/port.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/testharness.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class FileNameTest { };
|
||||
|
||||
TEST(FileNameTest, Parse) {
|
||||
Slice db;
|
||||
FileType type;
|
||||
uint64_t number;
|
||||
LargeValueRef large_ref;
|
||||
|
||||
// Successful parses
|
||||
static struct {
|
||||
const char* fname;
|
||||
uint64_t number;
|
||||
const char* large_ref;
|
||||
FileType type;
|
||||
} cases[] = {
|
||||
{ "100.log", 100, "", kLogFile },
|
||||
{ "0.log", 0, "", kLogFile },
|
||||
{ "0.sst", 0, "", kTableFile },
|
||||
{ "CURRENT", 0, "", kCurrentFile },
|
||||
{ "LOCK", 0, "", kDBLockFile },
|
||||
{ "MANIFEST-2", 2, "", kDescriptorFile },
|
||||
{ "MANIFEST-7", 7, "", kDescriptorFile },
|
||||
{ "LOG", 0, "", kInfoLogFile },
|
||||
{ "LOG.old", 0, "", kInfoLogFile },
|
||||
{ "18446744073709551615.log", 18446744073709551615ull, "",
|
||||
kLogFile },
|
||||
{ "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0.val", 0,
|
||||
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0", kLargeValueFile },
|
||||
{ "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0.val", 0,
|
||||
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0",
|
||||
kLargeValueFile },
|
||||
};
|
||||
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
||||
std::string f = cases[i].fname;
|
||||
ASSERT_TRUE(ParseFileName(f, &number, &large_ref, &type)) << f;
|
||||
ASSERT_EQ(cases[i].type, type) << f;
|
||||
if (type == kLargeValueFile) {
|
||||
ASSERT_EQ(cases[i].large_ref, LargeValueRefToFilenameString(large_ref))
|
||||
<< f;
|
||||
} else {
|
||||
ASSERT_EQ(cases[i].number, number) << f;
|
||||
}
|
||||
}
|
||||
|
||||
// Errors
|
||||
static const char* errors[] = {
|
||||
"",
|
||||
"foo",
|
||||
"foo-dx-100.log",
|
||||
".log",
|
||||
"",
|
||||
"manifest",
|
||||
"CURREN",
|
||||
"CURRENTX",
|
||||
"MANIFES",
|
||||
"MANIFEST",
|
||||
"MANIFEST-",
|
||||
"XMANIFEST-3",
|
||||
"MANIFEST-3x",
|
||||
"LOC",
|
||||
"LOCKx",
|
||||
"LO",
|
||||
"LOGx",
|
||||
"18446744073709551616.log",
|
||||
"184467440737095516150.log",
|
||||
"100",
|
||||
"100.",
|
||||
"100.lop",
|
||||
"100.val",
|
||||
".val",
|
||||
"123456789012345678901234567890123456789-12340.val",
|
||||
"1234567890123456789012345678901234567-123-0.val",
|
||||
"12345678901234567890123456789012345678902-100-1-.val",
|
||||
// Overflow on value size
|
||||
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000000000000000000-1.val",
|
||||
// '03.val' is a bad compression type
|
||||
"2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000-3.val" };
|
||||
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
||||
std::string f = errors[i];
|
||||
ASSERT_TRUE(!ParseFileName(f, &number, &large_ref, &type)) << f;
|
||||
};
|
||||
}
|
||||
|
||||
TEST(FileNameTest, Construction) {
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
LargeValueRef large_ref;
|
||||
std::string fname;
|
||||
|
||||
fname = CurrentFileName("foo");
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||
ASSERT_EQ(0, number);
|
||||
ASSERT_EQ(kCurrentFile, type);
|
||||
|
||||
fname = LockFileName("foo");
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||
ASSERT_EQ(0, number);
|
||||
ASSERT_EQ(kDBLockFile, type);
|
||||
|
||||
fname = LogFileName("foo", 192);
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||
ASSERT_EQ(192, number);
|
||||
ASSERT_EQ(kLogFile, type);
|
||||
|
||||
fname = TableFileName("bar", 200);
|
||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||
ASSERT_EQ(200, number);
|
||||
ASSERT_EQ(kTableFile, type);
|
||||
|
||||
fname = DescriptorFileName("bar", 100);
|
||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||
ASSERT_EQ(100, number);
|
||||
ASSERT_EQ(kDescriptorFile, type);
|
||||
|
||||
fname = TempFileName("tmp", 999);
|
||||
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||
ASSERT_EQ(999, number);
|
||||
ASSERT_EQ(kTempFile, type);
|
||||
|
||||
for (int i = 0; i <= kSnappyCompression; i++) {
|
||||
CompressionType ctype = static_cast<CompressionType>(i);
|
||||
std::string value = "abcdef";
|
||||
LargeValueRef real_large_ref = LargeValueRef::Make(Slice(value), ctype);
|
||||
fname = LargeValueFileName("tmp", real_large_ref);
|
||||
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type));
|
||||
ASSERT_TRUE(real_large_ref == large_ref);
|
||||
ASSERT_EQ(kLargeValueFile, type);
|
||||
ASSERT_EQ(large_ref.compression_type(), ctype);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
@ -46,9 +46,9 @@ Status Writer::AddRecord(const Slice& slice) {
|
||||
}
|
||||
|
||||
// Invariant: we never leave < kHeaderSize bytes in a block.
|
||||
assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
|
||||
const int avail = kBlockSize - block_offset_ - kHeaderSize;
|
||||
assert(avail >= 0);
|
||||
|
||||
const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
|
||||
const size_t fragment_length = (left < avail) ? left : avail;
|
||||
|
||||
RecordType type;
|
@ -6,7 +6,8 @@
|
||||
// (1) Any log files are first converted to tables
|
||||
// (2) We scan every table to compute
|
||||
// (a) smallest/largest for the table
|
||||
// (b) largest sequence number in the table
|
||||
// (b) large value refs from the table
|
||||
// (c) largest sequence number in the table
|
||||
// (3) We generate descriptor contents:
|
||||
// - log number is set to zero
|
||||
// - next-file-number is set to 1 + largest file number we found
|
||||
@ -21,8 +22,9 @@
|
||||
// (c) For each table: if it overlaps earlier table, place in level-0,
|
||||
// else place in level-M.
|
||||
// Possible optimization 2:
|
||||
// Store per-table metadata (smallest, largest, largest-seq#, ...)
|
||||
// in the table's meta section to speed up ScanTable.
|
||||
// Store per-table metadata (smallest, largest, largest-seq#,
|
||||
// large-value-refs, ...) in the table's meta section to speed up
|
||||
// ScanTable.
|
||||
|
||||
#include "db/builder.h"
|
||||
#include "db/db_impl.h"
|
||||
@ -71,7 +73,7 @@ class Repairer {
|
||||
}
|
||||
if (status.ok()) {
|
||||
unsigned long long bytes = 0;
|
||||
for (size_t i = 0; i < tables_.size(); i++) {
|
||||
for (int i = 0; i < tables_.size(); i++) {
|
||||
bytes += tables_[i].meta.file_size;
|
||||
}
|
||||
Log(env_, options_.info_log,
|
||||
@ -117,10 +119,13 @@ class Repairer {
|
||||
}
|
||||
|
||||
uint64_t number;
|
||||
LargeValueRef large_ref;
|
||||
FileType type;
|
||||
for (size_t i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &type)) {
|
||||
if (type == kDescriptorFile) {
|
||||
for (int i = 0; i < filenames.size(); i++) {
|
||||
if (ParseFileName(filenames[i], &number, &large_ref, &type)) {
|
||||
if (type == kLargeValueFile) {
|
||||
// Will be picked up when we process a Table that points to it
|
||||
} else if (type == kDescriptorFile) {
|
||||
manifests_.push_back(filenames[i]);
|
||||
} else {
|
||||
if (number + 1 > next_file_number_) {
|
||||
@ -140,7 +145,7 @@ class Repairer {
|
||||
}
|
||||
|
||||
void ConvertLogFilesToTables() {
|
||||
for (size_t i = 0; i < logs_.size(); i++) {
|
||||
for (int i = 0; i < logs_.size(); i++) {
|
||||
std::string logname = LogFileName(dbname_, logs_[i]);
|
||||
Status status = ConvertLogToTable(logs_[i]);
|
||||
if (!status.ok()) {
|
||||
@ -234,7 +239,7 @@ class Repairer {
|
||||
|
||||
void ExtractMetaData() {
|
||||
std::vector<TableInfo> kept;
|
||||
for (size_t i = 0; i < table_numbers_.size(); i++) {
|
||||
for (int i = 0; i < table_numbers_.size(); i++) {
|
||||
TableInfo t;
|
||||
t.meta.number = table_numbers_[i];
|
||||
Status status = ScanTable(&t);
|
||||
@ -278,6 +283,17 @@ class Repairer {
|
||||
if (parsed.sequence > t->max_sequence) {
|
||||
t->max_sequence = parsed.sequence;
|
||||
}
|
||||
|
||||
if (ExtractValueType(key) == kTypeLargeValueRef) {
|
||||
if (iter->value().size() != LargeValueRef::ByteSize()) {
|
||||
Log(env_, options_.info_log, "Table #%llu: bad large value ref",
|
||||
(unsigned long long) t->meta.number);
|
||||
} else {
|
||||
edit_.AddLargeValueRef(LargeValueRef::FromRef(iter->value()),
|
||||
t->meta.number,
|
||||
key);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!iter->status().ok()) {
|
||||
status = iter->status();
|
||||
@ -300,7 +316,7 @@ class Repairer {
|
||||
}
|
||||
|
||||
SequenceNumber max_sequence = 0;
|
||||
for (size_t i = 0; i < tables_.size(); i++) {
|
||||
for (int i = 0; i < tables_.size(); i++) {
|
||||
if (max_sequence < tables_[i].max_sequence) {
|
||||
max_sequence = tables_[i].max_sequence;
|
||||
}
|
||||
@ -311,7 +327,7 @@ class Repairer {
|
||||
edit_.SetNextFile(next_file_number_);
|
||||
edit_.SetLastSequence(max_sequence);
|
||||
|
||||
for (size_t i = 0; i < tables_.size(); i++) {
|
||||
for (int i = 0; i < tables_.size(); i++) {
|
||||
// TODO(opt): separate out into multiple levels
|
||||
const TableInfo& t = tables_[i];
|
||||
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
||||
@ -335,7 +351,7 @@ class Repairer {
|
||||
env_->DeleteFile(tmp);
|
||||
} else {
|
||||
// Discard older manifests
|
||||
for (size_t i = 0; i < manifests_.size(); i++) {
|
||||
for (int i = 0; i < manifests_.size(); i++) {
|
||||
ArchiveFile(dbname_ + "/" + manifests_[i]);
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ enum Tag {
|
||||
kCompactPointer = 5,
|
||||
kDeletedFile = 6,
|
||||
kNewFile = 7,
|
||||
// 8 was used for large value refs
|
||||
kLargeValueRef = 8,
|
||||
kPrevLogNumber = 9,
|
||||
};
|
||||
|
||||
@ -36,6 +36,7 @@ void VersionEdit::Clear() {
|
||||
has_last_sequence_ = false;
|
||||
deleted_files_.clear();
|
||||
new_files_.clear();
|
||||
large_refs_added_.clear();
|
||||
}
|
||||
|
||||
void VersionEdit::EncodeTo(std::string* dst) const {
|
||||
@ -60,7 +61,7 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
||||
PutVarint64(dst, last_sequence_);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
||||
for (int i = 0; i < compact_pointers_.size(); i++) {
|
||||
PutVarint32(dst, kCompactPointer);
|
||||
PutVarint32(dst, compact_pointers_[i].first); // level
|
||||
PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
|
||||
@ -74,7 +75,7 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
||||
PutVarint64(dst, iter->second); // file number
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < new_files_.size(); i++) {
|
||||
for (int i = 0; i < new_files_.size(); i++) {
|
||||
const FileMetaData& f = new_files_[i].second;
|
||||
PutVarint32(dst, kNewFile);
|
||||
PutVarint32(dst, new_files_[i].first); // level
|
||||
@ -83,6 +84,15 @@ void VersionEdit::EncodeTo(std::string* dst) const {
|
||||
PutLengthPrefixedSlice(dst, f.smallest.Encode());
|
||||
PutLengthPrefixedSlice(dst, f.largest.Encode());
|
||||
}
|
||||
|
||||
for (int i = 0; i < large_refs_added_.size(); i++) {
|
||||
const VersionEdit::Large& l = large_refs_added_[i];
|
||||
PutVarint32(dst, kLargeValueRef);
|
||||
PutLengthPrefixedSlice(dst,
|
||||
Slice(l.large_ref.data, LargeValueRef::ByteSize()));
|
||||
PutVarint64(dst, l.fnum);
|
||||
PutLengthPrefixedSlice(dst, l.internal_key.Encode());
|
||||
}
|
||||
}
|
||||
|
||||
static bool GetInternalKey(Slice* input, InternalKey* dst) {
|
||||
@ -117,6 +127,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
uint64_t number;
|
||||
FileMetaData f;
|
||||
Slice str;
|
||||
Large large;
|
||||
InternalKey key;
|
||||
|
||||
while (msg == NULL && GetVarint32(&input, &tag)) {
|
||||
@ -192,6 +203,18 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
|
||||
}
|
||||
break;
|
||||
|
||||
case kLargeValueRef:
|
||||
if (GetLengthPrefixedSlice(&input, &str) &&
|
||||
(str.size() == LargeValueRef::ByteSize()) &&
|
||||
GetVarint64(&input, &large.fnum) &&
|
||||
GetInternalKey(&input, &large.internal_key)) {
|
||||
large.large_ref = LargeValueRef::FromRef(str);
|
||||
large_refs_added_.push_back(large);
|
||||
} else {
|
||||
msg = "large ref";
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
msg = "unknown tag";
|
||||
break;
|
||||
@ -232,7 +255,7 @@ std::string VersionEdit::DebugString() const {
|
||||
r.append("\n LastSeq: ");
|
||||
AppendNumberTo(&r, last_sequence_);
|
||||
}
|
||||
for (size_t i = 0; i < compact_pointers_.size(); i++) {
|
||||
for (int i = 0; i < compact_pointers_.size(); i++) {
|
||||
r.append("\n CompactPointer: ");
|
||||
AppendNumberTo(&r, compact_pointers_[i].first);
|
||||
r.append(" '");
|
||||
@ -247,7 +270,7 @@ std::string VersionEdit::DebugString() const {
|
||||
r.append(" ");
|
||||
AppendNumberTo(&r, iter->second);
|
||||
}
|
||||
for (size_t i = 0; i < new_files_.size(); i++) {
|
||||
for (int i = 0; i < new_files_.size(); i++) {
|
||||
const FileMetaData& f = new_files_[i].second;
|
||||
r.append("\n AddFile: ");
|
||||
AppendNumberTo(&r, new_files_[i].first);
|
||||
@ -261,6 +284,16 @@ std::string VersionEdit::DebugString() const {
|
||||
AppendEscapedStringTo(&r, f.largest.Encode());
|
||||
r.append("'");
|
||||
}
|
||||
for (int i = 0; i < large_refs_added_.size(); i++) {
|
||||
const VersionEdit::Large& l = large_refs_added_[i];
|
||||
r.append("\n LargeRef: ");
|
||||
AppendNumberTo(&r, l.fnum);
|
||||
r.append(" ");
|
||||
r.append(LargeValueRefToFilenameString(l.large_ref));
|
||||
r.append(" '");
|
||||
AppendEscapedStringTo(&r, l.internal_key.Encode());
|
||||
r.append("'");
|
||||
}
|
||||
r.append("\n}\n");
|
||||
return r;
|
||||
}
|
@ -75,6 +75,18 @@ class VersionEdit {
|
||||
deleted_files_.insert(std::make_pair(level, file));
|
||||
}
|
||||
|
||||
// Record that a large value with the specified large_ref was
|
||||
// written to the output file numbered "fnum"
|
||||
void AddLargeValueRef(const LargeValueRef& large_ref,
|
||||
uint64_t fnum,
|
||||
const Slice& internal_key) {
|
||||
large_refs_added_.resize(large_refs_added_.size() + 1);
|
||||
Large* large = &(large_refs_added_.back());
|
||||
large->large_ref = large_ref;
|
||||
large->fnum = fnum;
|
||||
large->internal_key.DecodeFrom(internal_key);
|
||||
}
|
||||
|
||||
void EncodeTo(std::string* dst) const;
|
||||
Status DecodeFrom(const Slice& src);
|
||||
|
||||
@ -99,6 +111,12 @@ class VersionEdit {
|
||||
std::vector< std::pair<int, InternalKey> > compact_pointers_;
|
||||
DeletedFileSet deleted_files_;
|
||||
std::vector< std::pair<int, FileMetaData> > new_files_;
|
||||
struct Large {
|
||||
LargeValueRef large_ref;
|
||||
uint64_t fnum;
|
||||
InternalKey internal_key;
|
||||
};
|
||||
std::vector<Large> large_refs_added_;
|
||||
};
|
||||
|
||||
}
|
@ -26,9 +26,13 @@ TEST(VersionEditTest, EncodeDecode) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
TestEncodeDecode(edit);
|
||||
edit.AddFile(3, kBig + 300 + i, kBig + 400 + i,
|
||||
InternalKey("foo", kBig + 500 + i, kTypeValue),
|
||||
InternalKey("foo", kBig + 500 + i, kTypeLargeValueRef),
|
||||
InternalKey("zoo", kBig + 600 + i, kTypeDeletion));
|
||||
edit.DeleteFile(4, kBig + 700 + i);
|
||||
edit.AddLargeValueRef(LargeValueRef::Make("big", kNoCompression),
|
||||
kBig + 800 + i, "foobar");
|
||||
edit.AddLargeValueRef(LargeValueRef::Make("big2", kSnappyCompression),
|
||||
kBig + 801 + i, "baz");
|
||||
edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
|
||||
}
|
||||
|
@ -58,7 +58,7 @@ std::string IntSetToString(const std::set<uint64_t>& s) {
|
||||
Version::~Version() {
|
||||
assert(refs_ == 0);
|
||||
for (int level = 0; level < config::kNumLevels; level++) {
|
||||
for (size_t i = 0; i < files_[level].size(); i++) {
|
||||
for (int i = 0; i < files_[level].size(); i++) {
|
||||
FileMetaData* f = files_[level][i];
|
||||
assert(f->refs >= 0);
|
||||
f->refs--;
|
||||
@ -134,7 +134,7 @@ class Version::LevelFileNumIterator : public Iterator {
|
||||
private:
|
||||
const InternalKeyComparator icmp_;
|
||||
const std::vector<FileMetaData*>* const flist_;
|
||||
uint32_t index_;
|
||||
int index_;
|
||||
|
||||
// Backing store for value(). Holds the file number and size.
|
||||
mutable char value_buf_[16];
|
||||
@ -164,7 +164,7 @@ Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
|
||||
void Version::AddIterators(const ReadOptions& options,
|
||||
std::vector<Iterator*>* iters) {
|
||||
// Merge all level zero files together since they may overlap
|
||||
for (size_t i = 0; i < files_[0].size(); i++) {
|
||||
for (int i = 0; i < files_[0].size(); i++) {
|
||||
iters->push_back(
|
||||
vset_->table_cache_->NewIterator(
|
||||
options, files_[0][i]->number, files_[0][i]->file_size));
|
||||
@ -201,7 +201,7 @@ std::string Version::DebugString() const {
|
||||
AppendNumberTo(&r, level);
|
||||
r.push_back(':');
|
||||
const std::vector<FileMetaData*>& files = files_[level];
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
r.push_back(' ');
|
||||
AppendNumberTo(&r, files[i]->number);
|
||||
r.push_back(':');
|
||||
@ -232,7 +232,7 @@ class VersionSet::Builder {
|
||||
: vset_(vset) {
|
||||
for (int level = 0; level < config::kNumLevels; level++) {
|
||||
const std::vector<FileMetaData*>& files = base->files_[level];
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
FileMetaData* f = files[i];
|
||||
f->refs++;
|
||||
files_[level].insert(std::make_pair(f->number, f));
|
||||
@ -258,7 +258,7 @@ class VersionSet::Builder {
|
||||
// Apply all of the edits in *edit to the current state.
|
||||
void Apply(VersionEdit* edit) {
|
||||
// Update compaction pointers
|
||||
for (size_t i = 0; i < edit->compact_pointers_.size(); i++) {
|
||||
for (int i = 0; i < edit->compact_pointers_.size(); i++) {
|
||||
const int level = edit->compact_pointers_[i].first;
|
||||
vset_->compact_pointer_[level] =
|
||||
edit->compact_pointers_[i].second.Encode().ToString();
|
||||
@ -284,13 +284,19 @@ class VersionSet::Builder {
|
||||
}
|
||||
|
||||
// Add new files
|
||||
for (size_t i = 0; i < edit->new_files_.size(); i++) {
|
||||
for (int i = 0; i < edit->new_files_.size(); i++) {
|
||||
const int level = edit->new_files_[i].first;
|
||||
FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
|
||||
f->refs = 1;
|
||||
assert(files_[level].count(f->number) == 0);
|
||||
files_[level].insert(std::make_pair(f->number, f));
|
||||
}
|
||||
|
||||
// Add large value refs
|
||||
for (int i = 0; i < edit->large_refs_added_.size(); i++) {
|
||||
const VersionEdit::Large& l = edit->large_refs_added_[i];
|
||||
vset_->RegisterLargeValueRef(l.large_ref, l.fnum, l.internal_key);
|
||||
}
|
||||
}
|
||||
|
||||
// Save the current state in *v.
|
||||
@ -539,7 +545,7 @@ Status VersionSet::Recover() {
|
||||
|
||||
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
||||
int64_t sum = 0;
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
sum += files[i]->file_size;
|
||||
}
|
||||
return sum;
|
||||
@ -604,12 +610,25 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
|
||||
// Save files
|
||||
for (int level = 0; level < config::kNumLevels; level++) {
|
||||
const std::vector<FileMetaData*>& files = current_->files_[level];
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
const FileMetaData* f = files[i];
|
||||
edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest);
|
||||
}
|
||||
}
|
||||
|
||||
// Save large value refs
|
||||
for (LargeValueMap::const_iterator it = large_value_refs_.begin();
|
||||
it != large_value_refs_.end();
|
||||
++it) {
|
||||
const LargeValueRef& ref = it->first;
|
||||
const LargeReferencesSet& pointers = it->second;
|
||||
for (LargeReferencesSet::const_iterator j = pointers.begin();
|
||||
j != pointers.end();
|
||||
++j) {
|
||||
edit.AddLargeValueRef(ref, j->first, j->second);
|
||||
}
|
||||
}
|
||||
|
||||
std::string record;
|
||||
edit.EncodeTo(&record);
|
||||
return log->AddRecord(record);
|
||||
@ -632,7 +651,7 @@ Status VersionSet::SortLevel(Version* v, uint64_t level) {
|
||||
|
||||
if (result.ok() && level > 0) {
|
||||
// There should be no overlap
|
||||
for (size_t i = 1; i < v->files_[level].size(); i++) {
|
||||
for (int i = 1; i < v->files_[level].size(); i++) {
|
||||
const InternalKey& prev_end = v->files_[level][i-1]->largest;
|
||||
const InternalKey& this_begin = v->files_[level][i]->smallest;
|
||||
if (icmp_.Compare(prev_end, this_begin) >= 0) {
|
||||
@ -657,7 +676,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
|
||||
uint64_t result = 0;
|
||||
for (int level = 0; level < config::kNumLevels; level++) {
|
||||
const std::vector<FileMetaData*>& files = v->files_[level];
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
if (icmp_.Compare(files[i]->largest, ikey) <= 0) {
|
||||
// Entire file is before "ikey", so just add the file size
|
||||
result += files[i]->file_size;
|
||||
@ -682,9 +701,83 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add in large value files which are references from internal keys
|
||||
// stored in the table files
|
||||
//
|
||||
// TODO(opt): this is O(# large values in db). If this becomes too slow,
|
||||
// we could store an auxiliary data structure indexed by internal key
|
||||
for (LargeValueMap::const_iterator it = large_value_refs_.begin();
|
||||
it != large_value_refs_.end();
|
||||
++it) {
|
||||
const LargeValueRef& lref = it->first;
|
||||
for (LargeReferencesSet::const_iterator it2 = it->second.begin();
|
||||
it2 != it->second.end();
|
||||
++it2) {
|
||||
if (icmp_.Compare(it2->second, ikey.Encode()) <= 0) {
|
||||
// Internal key for large value is before our key of interest
|
||||
result += lref.ValueSize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool VersionSet::RegisterLargeValueRef(const LargeValueRef& large_ref,
|
||||
uint64_t fnum,
|
||||
const InternalKey& internal_key) {
|
||||
LargeReferencesSet* refs = &large_value_refs_[large_ref];
|
||||
bool is_first = refs->empty();
|
||||
refs->insert(make_pair(fnum, internal_key.Encode().ToString()));
|
||||
return is_first;
|
||||
}
|
||||
|
||||
void VersionSet::CleanupLargeValueRefs(const std::set<uint64_t>& live_tables) {
|
||||
for (LargeValueMap::iterator it = large_value_refs_.begin();
|
||||
it != large_value_refs_.end();
|
||||
) {
|
||||
LargeReferencesSet* refs = &it->second;
|
||||
for (LargeReferencesSet::iterator ref_it = refs->begin();
|
||||
ref_it != refs->end();
|
||||
) {
|
||||
if (ref_it->first != log_number_ && // Not in log file
|
||||
ref_it->first != prev_log_number_ && // Not in prev log
|
||||
live_tables.count(ref_it->first) == 0) { // Not in a live table
|
||||
// No longer live: erase
|
||||
LargeReferencesSet::iterator to_erase = ref_it;
|
||||
++ref_it;
|
||||
refs->erase(to_erase);
|
||||
} else {
|
||||
// Still live: leave this reference alone
|
||||
++ref_it;
|
||||
}
|
||||
}
|
||||
if (refs->empty()) {
|
||||
// No longer any live references to this large value: remove from
|
||||
// large_value_refs
|
||||
Log(env_, options_->info_log, "large value is dead: '%s'",
|
||||
LargeValueRefToFilenameString(it->first).c_str());
|
||||
LargeValueMap::iterator to_erase = it;
|
||||
++it;
|
||||
large_value_refs_.erase(to_erase);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool VersionSet::LargeValueIsLive(const LargeValueRef& large_ref) {
|
||||
LargeValueMap::iterator it = large_value_refs_.find(large_ref);
|
||||
if (it == large_value_refs_.end()) {
|
||||
return false;
|
||||
} else {
|
||||
assert(!it->second.empty());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void VersionSet::MaybeDeleteOldVersions() {
|
||||
// Note: it is important to delete versions in order since a newer
|
||||
// version with zero refs may be holding a pointer to a memtable
|
||||
@ -700,7 +793,7 @@ void VersionSet::AddLiveFiles(std::set<uint64_t>* live) {
|
||||
for (Version* v = oldest_; v != NULL; v = v->next_) {
|
||||
for (int level = 0; level < config::kNumLevels; level++) {
|
||||
const std::vector<FileMetaData*>& files = v->files_[level];
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
live->insert(files[i]->number);
|
||||
}
|
||||
}
|
||||
@ -717,7 +810,7 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() {
|
||||
int64_t result = 0;
|
||||
std::vector<FileMetaData*> overlaps;
|
||||
for (int level = 0; level < config::kNumLevels - 1; level++) {
|
||||
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
||||
for (int i = 0; i < current_->files_[level].size(); i++) {
|
||||
const FileMetaData* f = current_->files_[level][i];
|
||||
GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps);
|
||||
const int64_t sum = TotalFileSize(overlaps);
|
||||
@ -739,7 +832,7 @@ void VersionSet::GetOverlappingInputs(
|
||||
Slice user_begin = begin.user_key();
|
||||
Slice user_end = end.user_key();
|
||||
const Comparator* user_cmp = icmp_.user_comparator();
|
||||
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
||||
for (int i = 0; i < current_->files_[level].size(); i++) {
|
||||
FileMetaData* f = current_->files_[level][i];
|
||||
if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
|
||||
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
||||
@ -759,7 +852,7 @@ void VersionSet::GetRange(const std::vector<FileMetaData*>& inputs,
|
||||
assert(!inputs.empty());
|
||||
smallest->Clear();
|
||||
largest->Clear();
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
FileMetaData* f = inputs[i];
|
||||
if (i == 0) {
|
||||
*smallest = f->smallest;
|
||||
@ -802,7 +895,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
|
||||
if (!c->inputs_[which].empty()) {
|
||||
if (c->level() + which == 0) {
|
||||
const std::vector<FileMetaData*>& files = c->inputs_[which];
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
list[num++] = table_cache_->NewIterator(
|
||||
options, files[i]->number, files[i]->file_size);
|
||||
}
|
||||
@ -834,7 +927,7 @@ Compaction* VersionSet::PickCompaction() {
|
||||
c->input_version_->Ref();
|
||||
|
||||
// Pick the first file that comes after compact_pointer_[level]
|
||||
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
||||
for (int i = 0; i < current_->files_[level].size(); i++) {
|
||||
FileMetaData* f = current_->files_[level][i];
|
||||
if (compact_pointer_[level].empty() ||
|
||||
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
||||
@ -969,7 +1062,7 @@ bool Compaction::IsTrivialMove() const {
|
||||
|
||||
void Compaction::AddInputDeletions(VersionEdit* edit) {
|
||||
for (int which = 0; which < 2; which++) {
|
||||
for (size_t i = 0; i < inputs_[which].size(); i++) {
|
||||
for (int i = 0; i < inputs_[which].size(); i++) {
|
||||
edit->DeleteFile(level_ + which, inputs_[which][i]->number);
|
||||
}
|
||||
}
|
@ -171,6 +171,22 @@ class VersionSet {
|
||||
// "key" as of version "v".
|
||||
uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
|
||||
|
||||
// Register a reference to a large value with the specified
|
||||
// large_ref from the specified file number. Returns "true" if this
|
||||
// is the first recorded reference to the "large_ref" value in the
|
||||
// database, and false otherwise.
|
||||
bool RegisterLargeValueRef(const LargeValueRef& large_ref,
|
||||
uint64_t filenum,
|
||||
const InternalKey& internal_key);
|
||||
|
||||
// Cleanup the large value reference state by eliminating any
|
||||
// references from files that are not includes in either "live_tables"
|
||||
// or the current log.
|
||||
void CleanupLargeValueRefs(const std::set<uint64_t>& live_tables);
|
||||
|
||||
// Returns true if a large value with the given reference is live.
|
||||
bool LargeValueIsLive(const LargeValueRef& large_ref);
|
||||
|
||||
private:
|
||||
class Builder;
|
||||
|
||||
@ -221,6 +237,14 @@ class VersionSet {
|
||||
Version* current_; // Pointer to the last (newest) list entry
|
||||
Version* oldest_; // Pointer to the first (oldest) list entry
|
||||
|
||||
// Map from large value reference to the set of <file numbers,internal_key>
|
||||
// values containing references to the value. We keep the
|
||||
// internal key as a std::string rather than as an InternalKey because
|
||||
// we want to be able to easily use a set.
|
||||
typedef std::set<std::pair<uint64_t, std::string> > LargeReferencesSet;
|
||||
typedef std::map<LargeValueRef, LargeReferencesSet> LargeValueMap;
|
||||
LargeValueMap large_value_refs_;
|
||||
|
||||
// Per-level key at which the next compaction at that level should start.
|
||||
// Either an empty string, or a valid InternalKey.
|
||||
std::string compact_pointer_[config::kNumLevels];
|
||||
@ -289,7 +313,7 @@ class Compaction {
|
||||
// State used to check for number of of overlapping grandparent files
|
||||
// (parent == level_ + 1, grandparent == level_ + 2)
|
||||
std::vector<FileMetaData*> grandparents_;
|
||||
size_t grandparent_index_; // Index in grandparent_starts_
|
||||
int grandparent_index_; // Index in grandparent_starts_
|
||||
bool seen_key_; // Some output key has been seen
|
||||
int64_t overlapped_bytes_; // Bytes of overlap between current output
|
||||
// and grandparent files
|
||||
@ -300,7 +324,7 @@ class Compaction {
|
||||
// is that we are positioned at one of the file ranges for each
|
||||
// higher level than the ones involved in this compaction (i.e. for
|
||||
// all L >= level_ + 2).
|
||||
size_t level_ptrs_[config::kNumLevels];
|
||||
int level_ptrs_[config::kNumLevels];
|
||||
};
|
||||
|
||||
}
|
@ -8,6 +8,7 @@
|
||||
// data: record[count]
|
||||
// record :=
|
||||
// kTypeValue varstring varstring |
|
||||
// kTypeLargeValueRef varstring varstring |
|
||||
// kTypeDeletion varstring
|
||||
// varstring :=
|
||||
// len: varint32
|
||||
@ -57,6 +58,16 @@ void WriteBatch::Put(const Slice& key, const Slice& value) {
|
||||
PutLengthPrefixedSlice(&rep_, value);
|
||||
}
|
||||
|
||||
void WriteBatchInternal::PutLargeValueRef(WriteBatch* b,
|
||||
const Slice& key,
|
||||
const LargeValueRef& large_ref) {
|
||||
WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
|
||||
b->rep_.push_back(static_cast<char>(kTypeLargeValueRef));
|
||||
PutLengthPrefixedSlice(&b->rep_, key);
|
||||
PutLengthPrefixedSlice(&b->rep_,
|
||||
Slice(large_ref.data, sizeof(large_ref.data)));
|
||||
}
|
||||
|
||||
void WriteBatch::Delete(const Slice& key) {
|
||||
WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
|
||||
rep_.push_back(static_cast<char>(kTypeDeletion));
|
||||
@ -76,6 +87,10 @@ Status WriteBatchInternal::InsertInto(const WriteBatch* b,
|
||||
case kTypeValue:
|
||||
memtable->Add(it.sequence_number(), kTypeValue, it.key(), it.value());
|
||||
break;
|
||||
case kTypeLargeValueRef:
|
||||
memtable->Add(it.sequence_number(), kTypeLargeValueRef,
|
||||
it.key(), it.value());
|
||||
break;
|
||||
}
|
||||
found++;
|
||||
}
|
||||
@ -119,6 +134,7 @@ void WriteBatchInternal::Iterator::GetNextEntry() {
|
||||
input_.remove_prefix(1);
|
||||
switch (tag) {
|
||||
case kTypeValue:
|
||||
case kTypeLargeValueRef:
|
||||
if (GetLengthPrefixedSlice(&input_, &key_) &&
|
||||
GetLengthPrefixedSlice(&input_, &value_)) {
|
||||
op_ = static_cast<ValueType>(tag);
|
@ -13,6 +13,10 @@ namespace leveldb {
|
||||
// WriteBatch that we don't want in the public WriteBatch interface.
|
||||
class WriteBatchInternal {
|
||||
public:
|
||||
static void PutLargeValueRef(WriteBatch* batch,
|
||||
const Slice& key,
|
||||
const LargeValueRef& large_ref);
|
||||
|
||||
// Return the number of entries in the batch.
|
||||
static int Count(const WriteBatch* batch);
|
||||
|
@ -29,6 +29,13 @@ static std::string PrintContents(WriteBatch* b) {
|
||||
state.append(iter->value().ToString());
|
||||
state.append(")");
|
||||
break;
|
||||
case kTypeLargeValueRef:
|
||||
state.append("PutRef(");
|
||||
state.append(ikey.user_key.ToString());
|
||||
state.append(", ");
|
||||
state.append(iter->value().ToString());
|
||||
state.append(")");
|
||||
break;
|
||||
case kTypeDeletion:
|
||||
state.append("Delete(");
|
||||
state.append(ikey.user_key.ToString());
|
||||
@ -67,6 +74,22 @@ TEST(WriteBatchTest, Multiple) {
|
||||
PrintContents(&batch));
|
||||
}
|
||||
|
||||
TEST(WriteBatchTest, PutIndirect) {
|
||||
WriteBatch batch;
|
||||
batch.Put(Slice("baz"), Slice("boo"));
|
||||
LargeValueRef h;
|
||||
for (int i = 0; i < LargeValueRef::ByteSize(); i++) {
|
||||
h.data[i] = (i < 20) ? 'a' : 'b';
|
||||
}
|
||||
WriteBatchInternal::PutLargeValueRef(&batch, Slice("foo"), h);
|
||||
WriteBatchInternal::SetSequence(&batch, 100);
|
||||
ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch));
|
||||
ASSERT_EQ(2, WriteBatchInternal::Count(&batch));
|
||||
ASSERT_EQ("Put(baz, boo)@100"
|
||||
"PutRef(foo, aaaaaaaaaaaaaaaaaaaabbbbbbbbb)@101",
|
||||
PrintContents(&batch));
|
||||
}
|
||||
|
||||
TEST(WriteBatchTest, Corruption) {
|
||||
WriteBatch batch;
|
||||
batch.Put(Slice("foo"), Slice("bar"));
|
@ -57,6 +57,15 @@ These merges have the effect of gradually migrating new updates from
|
||||
the young level to the largest level using only bulk reads and writes
|
||||
(i.e., minimizing expensive seeks).
|
||||
|
||||
<h2>Large value files</h2>
|
||||
<p>
|
||||
Each large value (greater than 64KB by default) is placed in a large
|
||||
value file (*.val) of its own. An entry is maintained in the log
|
||||
and/or sorted tables that maps from the corresponding key to the
|
||||
name of this large value file. The name of the large value file
|
||||
is derived from a SHA1 hash of the value and its length so that
|
||||
identical values share the same file.
|
||||
<p>
|
||||
<h2>Manifest</h2>
|
||||
<p>
|
||||
A MANIFEST file lists the set of sorted tables that make up each
|
||||
@ -211,7 +220,9 @@ So maybe even the sharding is not necessary on modern filesystems?
|
||||
compaction and at the end of recovery. It finds the names of all
|
||||
files in the database. It deletes all log files that are not the
|
||||
current log file. It deletes all table files that are not referenced
|
||||
from some level and are not the output of an active compaction.
|
||||
from some level and are not the output of an active compaction. It
|
||||
deletes all large value files that are not referenced from any live
|
||||
table or log file.
|
||||
|
||||
</body>
|
||||
</html>
|
@ -412,6 +412,17 @@ We might want to prefix <code>filename</code> keys with one letter (say '/') and
|
||||
over just the metadata do not force us to fetch and cache bulky file
|
||||
contents.
|
||||
<p>
|
||||
<h2>Large Values</h2>
|
||||
<p>
|
||||
<code>leveldb</code> has special treatment of large values (by default, a value
|
||||
of length greater than or equal to 64K is considered large, though a
|
||||
field in Options can be used to adjust this threshold). Each such
|
||||
large value is placed in a separate operating system file, and the
|
||||
normal database blocks just contain pointers to such files.
|
||||
<p>
|
||||
Furthermore, if the same large value occurs multiple times in a single
|
||||
database, it will be stored just once.
|
||||
<p>
|
||||
<h1>Checksums</h1>
|
||||
<p>
|
||||
<code>leveldb</code> associates checksums with all data it stores in the file system.
|
@ -86,6 +86,16 @@ struct Options {
|
||||
// Default: 1000
|
||||
int max_open_files;
|
||||
|
||||
// Handle values larger than "large_value_threshold" bytes
|
||||
// specially, by writing them into their own files (to avoid
|
||||
// compaction overhead) and doing content-based elimination of
|
||||
// duplicate values to save space.
|
||||
//
|
||||
// We recommend against changing this value.
|
||||
//
|
||||
// Default: 64K
|
||||
size_t large_value_threshold;
|
||||
|
||||
// Control over blocks (user data is stored in a set of blocks, and
|
||||
// a block is the unit of reading from disk).
|
||||
|
||||
@ -100,7 +110,7 @@ struct Options {
|
||||
// compression is enabled. This parameter can be changed dynamically.
|
||||
//
|
||||
// Default: 4K
|
||||
size_t block_size;
|
||||
int block_size;
|
||||
|
||||
// Number of keys between restart points for delta encoding of keys.
|
||||
// This parameter can be changed dynamically. Most clients should
|
@ -96,6 +96,8 @@
|
||||
'port/port_example.h',
|
||||
'port/port_posix.cc',
|
||||
'port/port_posix.h',
|
||||
'port/sha1_portable.cc',
|
||||
'port/sha1_portable.h',
|
||||
'table/block.cc',
|
||||
'table/block.h',
|
||||
'table/block_builder.cc',
|
||||
@ -265,6 +267,16 @@
|
||||
'db/log_test.cc',
|
||||
],
|
||||
},
|
||||
{
|
||||
'target_name': 'leveldb_sha1_test',
|
||||
'type': 'executable',
|
||||
'dependencies': [
|
||||
'leveldb_testutil',
|
||||
],
|
||||
'sources': [
|
||||
'port/sha1_test.cc',
|
||||
],
|
||||
},
|
||||
{
|
||||
'target_name': 'leveldb_skiplist_test',
|
||||
'type': 'executable',
|
@ -1,122 +0,0 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db/filename.h"
|
||||
|
||||
#include "db/dbformat.h"
|
||||
#include "port/port.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/testharness.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class FileNameTest { };
|
||||
|
||||
TEST(FileNameTest, Parse) {
|
||||
Slice db;
|
||||
FileType type;
|
||||
uint64_t number;
|
||||
|
||||
// Successful parses
|
||||
static struct {
|
||||
const char* fname;
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
} cases[] = {
|
||||
{ "100.log", 100, kLogFile },
|
||||
{ "0.log", 0, kLogFile },
|
||||
{ "0.sst", 0, kTableFile },
|
||||
{ "CURRENT", 0, kCurrentFile },
|
||||
{ "LOCK", 0, kDBLockFile },
|
||||
{ "MANIFEST-2", 2, kDescriptorFile },
|
||||
{ "MANIFEST-7", 7, kDescriptorFile },
|
||||
{ "LOG", 0, kInfoLogFile },
|
||||
{ "LOG.old", 0, kInfoLogFile },
|
||||
{ "18446744073709551615.log", 18446744073709551615ull, kLogFile },
|
||||
};
|
||||
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
||||
std::string f = cases[i].fname;
|
||||
ASSERT_TRUE(ParseFileName(f, &number, &type)) << f;
|
||||
ASSERT_EQ(cases[i].type, type) << f;
|
||||
ASSERT_EQ(cases[i].number, number) << f;
|
||||
}
|
||||
|
||||
// Errors
|
||||
static const char* errors[] = {
|
||||
"",
|
||||
"foo",
|
||||
"foo-dx-100.log",
|
||||
".log",
|
||||
"",
|
||||
"manifest",
|
||||
"CURREN",
|
||||
"CURRENTX",
|
||||
"MANIFES",
|
||||
"MANIFEST",
|
||||
"MANIFEST-",
|
||||
"XMANIFEST-3",
|
||||
"MANIFEST-3x",
|
||||
"LOC",
|
||||
"LOCKx",
|
||||
"LO",
|
||||
"LOGx",
|
||||
"18446744073709551616.log",
|
||||
"184467440737095516150.log",
|
||||
"100",
|
||||
"100.",
|
||||
"100.lop"
|
||||
};
|
||||
for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
|
||||
std::string f = errors[i];
|
||||
ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
|
||||
};
|
||||
}
|
||||
|
||||
TEST(FileNameTest, Construction) {
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
std::string fname;
|
||||
|
||||
fname = CurrentFileName("foo");
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(0, number);
|
||||
ASSERT_EQ(kCurrentFile, type);
|
||||
|
||||
fname = LockFileName("foo");
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(0, number);
|
||||
ASSERT_EQ(kDBLockFile, type);
|
||||
|
||||
fname = LogFileName("foo", 192);
|
||||
ASSERT_EQ("foo/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(192, number);
|
||||
ASSERT_EQ(kLogFile, type);
|
||||
|
||||
fname = TableFileName("bar", 200);
|
||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(200, number);
|
||||
ASSERT_EQ(kTableFile, type);
|
||||
|
||||
fname = DescriptorFileName("bar", 100);
|
||||
ASSERT_EQ("bar/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(100, number);
|
||||
ASSERT_EQ(kDescriptorFile, type);
|
||||
|
||||
fname = TempFileName("tmp", 999);
|
||||
ASSERT_EQ("tmp/", std::string(fname.data(), 4));
|
||||
ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type));
|
||||
ASSERT_EQ(999, number);
|
||||
ASSERT_EQ(kTempFile, type);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
@ -10,6 +10,7 @@
|
||||
#include <endian.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <sha1.h>
|
||||
#include <cstdatomic>
|
||||
#include <string>
|
||||
#include <cctype>
|
||||
@ -133,6 +134,13 @@ inline bool Snappy_Uncompress(
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
||||
SHA1_CTX sha1_ctx;
|
||||
SHA1Init(&sha1_ctx);
|
||||
SHA1Update(&sha1_ctx, (const u_char*)data, len);
|
||||
SHA1Final((u_char*)hash_array, &sha1_ctx);
|
||||
}
|
||||
|
||||
inline uint64_t ThreadIdentifier() {
|
||||
pthread_t tid = pthread_self();
|
||||
uint64_t r = 0;
|
@ -13,6 +13,7 @@
|
||||
#include "base/atomicops.h"
|
||||
#include "base/basictypes.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/sha1.h"
|
||||
#include "base/synchronization/condition_variable.h"
|
||||
#include "base/synchronization/lock.h"
|
||||
|
||||
@ -82,6 +83,12 @@ class AtomicPointer {
|
||||
}
|
||||
};
|
||||
|
||||
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
||||
return ::base::SHA1HashBytes(reinterpret_cast<const unsigned char*>(data),
|
||||
len,
|
||||
reinterpret_cast<unsigned char*>(hash_array));
|
||||
}
|
||||
|
||||
bool Snappy_Compress(const char* input, size_t input_length,
|
||||
std::string* output);
|
||||
bool Snappy_Uncompress(const char* input_data, size_t input_length,
|
@ -89,6 +89,11 @@ class AtomicPointer {
|
||||
void NoBarrier_Store(void* v);
|
||||
};
|
||||
|
||||
// ------------------ Checksumming -------------------
|
||||
|
||||
// Store a 160-bit hash of "data[0..len-1]" in "hash_array[0]..hash_array[19]"
|
||||
extern void SHA1_Hash(const char* data, size_t len, char* hash_array);
|
||||
|
||||
// ------------------ Compression -------------------
|
||||
|
||||
// Store the snappy compression of "input[0,input_length-1]" in *output.
|
@ -13,6 +13,7 @@
|
||||
#include <string>
|
||||
#include <cstdatomic>
|
||||
#include <cstring>
|
||||
#include "port/sha1_portable.h"
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
@ -72,6 +73,10 @@ class AtomicPointer {
|
||||
}
|
||||
};
|
||||
|
||||
inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
|
||||
SHA1_Hash_Portable(data, len, hash_array);
|
||||
}
|
||||
|
||||
// TODO(gabor): Implement actual compress
|
||||
inline bool Snappy_Compress(const char* input, size_t input_length,
|
||||
std::string* output) {
|
298
port/sha1_portable.cc
Normal file
298
port/sha1_portable.cc
Normal file
@ -0,0 +1,298 @@
|
||||
// Portions copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// This module provides a slow but portable implementation of
|
||||
// the SHA1 hash function.
|
||||
//
|
||||
// It is adapted from free code written by Paul E. Jones
|
||||
// <paulej@packetizer.com>. See http://www.packetizer.com/security/sha1/
|
||||
//
|
||||
// The license for the original code is:
|
||||
/*
|
||||
Copyright (C) 1998, 2009
|
||||
Paul E. Jones <paulej@packetizer.com>
|
||||
|
||||
Freeware Public License (FPL)
|
||||
|
||||
This software is licensed as "freeware." Permission to distribute
|
||||
this software in source and binary forms, including incorporation
|
||||
into other products, is hereby granted without a fee. THIS SOFTWARE
|
||||
IS PROVIDED 'AS IS' AND WITHOUT ANY EXPRESSED OR IMPLIED WARRANTIES,
|
||||
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHOR SHALL NOT BE HELD
|
||||
LIABLE FOR ANY DAMAGES RESULTING FROM THE USE OF THIS SOFTWARE, EITHER
|
||||
DIRECTLY OR INDIRECTLY, INCLUDING, BUT NOT LIMITED TO, LOSS OF DATA
|
||||
OR DATA BEING RENDERED INACCURATE.
|
||||
*/
|
||||
|
||||
#include "port/sha1_portable.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
|
||||
/*
|
||||
* Description:
|
||||
* This class implements the Secure Hashing Standard as defined
|
||||
* in FIPS PUB 180-1 published April 17, 1995.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This structure will hold context information for the hashing
|
||||
* operation
|
||||
*/
|
||||
typedef struct SHA1Context {
|
||||
unsigned Message_Digest[5]; /* Message Digest (output) */
|
||||
|
||||
unsigned Length_Low; /* Message length in bits */
|
||||
unsigned Length_High; /* Message length in bits */
|
||||
|
||||
unsigned char Message_Block[64]; /* 512-bit message blocks */
|
||||
int Message_Block_Index; /* Index into message block array */
|
||||
|
||||
bool Computed; /* Is the digest computed? */
|
||||
bool Corrupted; /* Is the message digest corruped? */
|
||||
} SHA1Context;
|
||||
|
||||
/*
|
||||
* Portability Issues:
|
||||
* SHA-1 is defined in terms of 32-bit "words". This code was
|
||||
* written with the expectation that the processor has at least
|
||||
* a 32-bit machine word size. If the machine word size is larger,
|
||||
* the code should still function properly. One caveat to that
|
||||
* is that the input functions taking characters and character
|
||||
* arrays assume that only 8 bits of information are stored in each
|
||||
* character.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Define the circular shift macro
|
||||
*/
|
||||
#define SHA1CircularShift(bits,word) \
|
||||
((((word) << (bits)) & 0xFFFFFFFF) | \
|
||||
((word) >> (32-(bits))))
|
||||
|
||||
/* Function prototypes */
|
||||
static void SHA1ProcessMessageBlock(SHA1Context *);
|
||||
static void SHA1PadMessage(SHA1Context *);
|
||||
|
||||
// Initialize the SHA1Context in preparation for computing a new
|
||||
// message digest.
|
||||
static void SHA1Reset(SHA1Context* context) {
|
||||
context->Length_Low = 0;
|
||||
context->Length_High = 0;
|
||||
context->Message_Block_Index = 0;
|
||||
|
||||
context->Message_Digest[0] = 0x67452301;
|
||||
context->Message_Digest[1] = 0xEFCDAB89;
|
||||
context->Message_Digest[2] = 0x98BADCFE;
|
||||
context->Message_Digest[3] = 0x10325476;
|
||||
context->Message_Digest[4] = 0xC3D2E1F0;
|
||||
|
||||
context->Computed = false;
|
||||
context->Corrupted = false;
|
||||
}
|
||||
|
||||
// This function will return the 160-bit message digest into the
|
||||
// Message_Digest array within the SHA1Context provided
|
||||
static bool SHA1Result(SHA1Context *context) {
|
||||
if (context->Corrupted) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!context->Computed) {
|
||||
SHA1PadMessage(context);
|
||||
context->Computed = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// This function accepts an array of bytes as the next portion of
|
||||
// the message.
|
||||
static void SHA1Input(SHA1Context *context,
|
||||
const unsigned char *message_array,
|
||||
unsigned length) {
|
||||
if (!length) return;
|
||||
|
||||
if (context->Computed || context->Corrupted) {
|
||||
context->Corrupted = true;
|
||||
return;
|
||||
}
|
||||
|
||||
while(length-- && !context->Corrupted) {
|
||||
context->Message_Block[context->Message_Block_Index++] =
|
||||
(*message_array & 0xFF);
|
||||
|
||||
context->Length_Low += 8;
|
||||
/* Force it to 32 bits */
|
||||
context->Length_Low &= 0xFFFFFFFF;
|
||||
if (context->Length_Low == 0) {
|
||||
context->Length_High++;
|
||||
/* Force it to 32 bits */
|
||||
context->Length_High &= 0xFFFFFFFF;
|
||||
if (context->Length_High == 0)
|
||||
{
|
||||
/* Message is too long */
|
||||
context->Corrupted = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (context->Message_Block_Index == 64)
|
||||
{
|
||||
SHA1ProcessMessageBlock(context);
|
||||
}
|
||||
|
||||
message_array++;
|
||||
}
|
||||
}
|
||||
|
||||
// This function will process the next 512 bits of the message stored
|
||||
// in the Message_Block array.
|
||||
static void SHA1ProcessMessageBlock(SHA1Context *context) {
|
||||
const unsigned K[] = // Constants defined in SHA-1
|
||||
{
|
||||
0x5A827999,
|
||||
0x6ED9EBA1,
|
||||
0x8F1BBCDC,
|
||||
0xCA62C1D6
|
||||
};
|
||||
int t; // Loop counter
|
||||
unsigned temp; // Temporary word value
|
||||
unsigned W[80]; // Word sequence
|
||||
unsigned A, B, C, D, E; // Word buffers
|
||||
|
||||
// Initialize the first 16 words in the array W
|
||||
for(t = 0; t < 16; t++) {
|
||||
W[t] = ((unsigned) context->Message_Block[t * 4]) << 24;
|
||||
W[t] |= ((unsigned) context->Message_Block[t * 4 + 1]) << 16;
|
||||
W[t] |= ((unsigned) context->Message_Block[t * 4 + 2]) << 8;
|
||||
W[t] |= ((unsigned) context->Message_Block[t * 4 + 3]);
|
||||
}
|
||||
|
||||
for(t = 16; t < 80; t++) {
|
||||
W[t] = SHA1CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
|
||||
}
|
||||
|
||||
A = context->Message_Digest[0];
|
||||
B = context->Message_Digest[1];
|
||||
C = context->Message_Digest[2];
|
||||
D = context->Message_Digest[3];
|
||||
E = context->Message_Digest[4];
|
||||
|
||||
for(t = 0; t < 20; t++) {
|
||||
temp = SHA1CircularShift(5,A) +
|
||||
((B & C) | ((~B) & D)) + E + W[t] + K[0];
|
||||
temp &= 0xFFFFFFFF;
|
||||
E = D;
|
||||
D = C;
|
||||
C = SHA1CircularShift(30,B);
|
||||
B = A;
|
||||
A = temp;
|
||||
}
|
||||
|
||||
for(t = 20; t < 40; t++) {
|
||||
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
|
||||
temp &= 0xFFFFFFFF;
|
||||
E = D;
|
||||
D = C;
|
||||
C = SHA1CircularShift(30,B);
|
||||
B = A;
|
||||
A = temp;
|
||||
}
|
||||
|
||||
for(t = 40; t < 60; t++) {
|
||||
temp = SHA1CircularShift(5,A) +
|
||||
((B & C) | (B & D) | (C & D)) + E + W[t] + K[2];
|
||||
temp &= 0xFFFFFFFF;
|
||||
E = D;
|
||||
D = C;
|
||||
C = SHA1CircularShift(30,B);
|
||||
B = A;
|
||||
A = temp;
|
||||
}
|
||||
|
||||
for(t = 60; t < 80; t++) {
|
||||
temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
|
||||
temp &= 0xFFFFFFFF;
|
||||
E = D;
|
||||
D = C;
|
||||
C = SHA1CircularShift(30,B);
|
||||
B = A;
|
||||
A = temp;
|
||||
}
|
||||
|
||||
context->Message_Digest[0] = (context->Message_Digest[0] + A) & 0xFFFFFFFF;
|
||||
context->Message_Digest[1] = (context->Message_Digest[1] + B) & 0xFFFFFFFF;
|
||||
context->Message_Digest[2] = (context->Message_Digest[2] + C) & 0xFFFFFFFF;
|
||||
context->Message_Digest[3] = (context->Message_Digest[3] + D) & 0xFFFFFFFF;
|
||||
context->Message_Digest[4] = (context->Message_Digest[4] + E) & 0xFFFFFFFF;
|
||||
|
||||
context->Message_Block_Index = 0;
|
||||
}
|
||||
|
||||
// According to the standard, the message must be padded to an even
|
||||
// 512 bits. The first padding bit must be a '1'. The last 64 bits
|
||||
// represent the length of the original message. All bits in between
|
||||
// should be 0. This function will pad the message according to those
|
||||
// rules by filling the Message_Block array accordingly. It will also
|
||||
// call SHA1ProcessMessageBlock() appropriately. When it returns, it
|
||||
// can be assumed that the message digest has been computed.
|
||||
static void SHA1PadMessage(SHA1Context *context) {
|
||||
// Check to see if the current message block is too small to hold
|
||||
// the initial padding bits and length. If so, we will pad the
|
||||
// block, process it, and then continue padding into a second block.
|
||||
if (context->Message_Block_Index > 55) {
|
||||
context->Message_Block[context->Message_Block_Index++] = 0x80;
|
||||
while(context->Message_Block_Index < 64) {
|
||||
context->Message_Block[context->Message_Block_Index++] = 0;
|
||||
}
|
||||
|
||||
SHA1ProcessMessageBlock(context);
|
||||
|
||||
while(context->Message_Block_Index < 56) {
|
||||
context->Message_Block[context->Message_Block_Index++] = 0;
|
||||
}
|
||||
} else {
|
||||
context->Message_Block[context->Message_Block_Index++] = 0x80;
|
||||
while(context->Message_Block_Index < 56) {
|
||||
context->Message_Block[context->Message_Block_Index++] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Store the message length as the last 8 octets
|
||||
context->Message_Block[56] = (context->Length_High >> 24) & 0xFF;
|
||||
context->Message_Block[57] = (context->Length_High >> 16) & 0xFF;
|
||||
context->Message_Block[58] = (context->Length_High >> 8) & 0xFF;
|
||||
context->Message_Block[59] = (context->Length_High) & 0xFF;
|
||||
context->Message_Block[60] = (context->Length_Low >> 24) & 0xFF;
|
||||
context->Message_Block[61] = (context->Length_Low >> 16) & 0xFF;
|
||||
context->Message_Block[62] = (context->Length_Low >> 8) & 0xFF;
|
||||
context->Message_Block[63] = (context->Length_Low) & 0xFF;
|
||||
|
||||
SHA1ProcessMessageBlock(context);
|
||||
}
|
||||
|
||||
|
||||
void SHA1_Hash_Portable(const char* data, size_t len, char* hash_array) {
|
||||
SHA1Context context;
|
||||
SHA1Reset(&context);
|
||||
SHA1Input(&context, reinterpret_cast<const unsigned char*>(data), len);
|
||||
bool ok = SHA1Result(&context);
|
||||
if (!ok) {
|
||||
fprintf(stderr, "Unexpected error in SHA1_Hash_Portable code\n");
|
||||
exit(1);
|
||||
}
|
||||
for (int i = 0; i < 5; i++) {
|
||||
uint32_t value = context.Message_Digest[i];
|
||||
hash_array[i*4 + 0] = (value >> 24) & 0xff;
|
||||
hash_array[i*4 + 1] = (value >> 16) & 0xff;
|
||||
hash_array[i*4 + 2] = (value >> 8) & 0xff;
|
||||
hash_array[i*4 + 3] = value & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
25
port/sha1_portable.h
Normal file
25
port/sha1_portable.h
Normal file
@ -0,0 +1,25 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_
|
||||
#define STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
|
||||
// Compute the SHA1 hash value of "data[0..len-1]" and store it in
|
||||
// "hash_array[0..19]". hash_array must have 20 bytes of space available.
|
||||
//
|
||||
// This function is portable but may not be as fast as a version
|
||||
// optimized for your platform. It is provided as a default method
|
||||
// that can be used when porting leveldb to a new platform if no
|
||||
// better SHA1 hash implementation is available.
|
||||
void SHA1_Hash_Portable(const char* data, size_t len, char* hash_array);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif // STORAGE_LEVELDB_PORT_SHA1_PORTABLE_H_
|
39
port/sha1_test.cc
Normal file
39
port/sha1_test.cc
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "port/port.h"
|
||||
#include "util/testharness.h"
|
||||
|
||||
namespace leveldb {
|
||||
namespace port {
|
||||
|
||||
class SHA1 { };
|
||||
|
||||
static std::string TestSHA1(const char* data, size_t len) {
|
||||
char hash_val[20];
|
||||
SHA1_Hash(data, len, hash_val);
|
||||
char buf[41];
|
||||
for (int i = 0; i < 20; i++) {
|
||||
snprintf(buf + i * 2, 41 - i * 2,
|
||||
"%02x",
|
||||
static_cast<unsigned int>(static_cast<unsigned char>(
|
||||
hash_val[i])));
|
||||
}
|
||||
return std::string(buf, 40);
|
||||
}
|
||||
|
||||
TEST(SHA1, Simple) {
|
||||
ASSERT_EQ("da39a3ee5e6b4b0d3255bfef95601890afd80709", TestSHA1("", 0));
|
||||
ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d", TestSHA1("hello", 5));
|
||||
std::string x(10000, 'x');
|
||||
ASSERT_EQ("f8c5cde791c5056cf515881e701c8a9ecb439a75",
|
||||
TestSHA1(x.data(), x.size()));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
@ -62,9 +62,7 @@ static inline const char* DecodeEntry(const char* p, const char* limit,
|
||||
if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL;
|
||||
}
|
||||
|
||||
if (static_cast<uint32>(limit - p) < (*non_shared + *value_length)) {
|
||||
return NULL;
|
||||
}
|
||||
if (limit - p < (*non_shared + *value_length)) return NULL;
|
||||
return p;
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ size_t BlockBuilder::CurrentSizeEstimate() const {
|
||||
|
||||
Slice BlockBuilder::Finish() {
|
||||
// Append restart array
|
||||
for (size_t i = 0; i < restarts_.size(); i++) {
|
||||
for (int i = 0; i < restarts_.size(); i++) {
|
||||
PutFixed32(&buffer_, restarts_[i]);
|
||||
}
|
||||
PutFixed32(&buffer_, restarts_.size());
|
@ -36,7 +36,7 @@ void Footer::EncodeTo(std::string* dst) const {
|
||||
metaindex_handle_.EncodeTo(dst);
|
||||
index_handle_.EncodeTo(dst);
|
||||
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
|
||||
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu));
|
||||
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber));
|
||||
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
|
||||
assert(dst->size() == original_size + kEncodedLength);
|
||||
}
|
||||
@ -71,7 +71,7 @@ Status ReadBlock(RandomAccessFile* file,
|
||||
|
||||
// Read the block contents as well as the type/crc footer.
|
||||
// See table_builder.cc for the code that built this structure.
|
||||
size_t n = static_cast<size_t>(handle.size());
|
||||
size_t n = handle.size();
|
||||
char* buf = new char[n + kBlockTrailerSize];
|
||||
Slice contents;
|
||||
Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf);
|
@ -16,7 +16,7 @@ Arena::Arena() {
|
||||
}
|
||||
|
||||
Arena::~Arena() {
|
||||
for (size_t i = 0; i < blocks_.size(); i++) {
|
||||
for (int i = 0; i < blocks_.size(); i++) {
|
||||
delete[] blocks_[i];
|
||||
}
|
||||
}
|
@ -85,7 +85,7 @@ char* EncodeVarint64(char* dst, uint64_t v) {
|
||||
*(ptr++) = (v & (B-1)) | B;
|
||||
v >>= 7;
|
||||
}
|
||||
*(ptr++) = static_cast<unsigned char>(v);
|
||||
*(ptr++) = v;
|
||||
return reinterpret_cast<char*>(ptr);
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ class BytewiseComparatorImpl : public Comparator {
|
||||
virtual void FindShortSuccessor(std::string* key) const {
|
||||
// Find first character that can be incremented
|
||||
size_t n = key->size();
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
const uint8_t byte = (*key)[i];
|
||||
if (byte != static_cast<uint8_t>(0xff)) {
|
||||
(*key)[i] = byte + 1;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user