Make GC + snapshot, main lock friendly (#1759)

- Only IN_MEMORY_ANALYTICAL requires unique lock during snapshot
- GC in some cases will be provide with unique lock
  - This fact can be used for optimisations
  - In all other cases, optimisations should be done with alternative
    check. Not via getting a unique lock

Also:
- Faster property lookup
- Faster index iteration (better conditional branching)
This commit is contained in:
Gareth Andrew Lloyd 2024-02-27 14:45:08 +00:00 committed by GitHub
parent e88c7a0aa5
commit a6fcdfd905
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 138 additions and 94 deletions

View File

@ -310,7 +310,7 @@ class DiskStorage final : public Storage {
StorageInfo GetBaseInfo() override; StorageInfo GetBaseInfo() override;
StorageInfo GetInfo(memgraph::replication_coordination_glue::ReplicationRole replication_role) override; StorageInfo GetInfo(memgraph::replication_coordination_glue::ReplicationRole replication_role) override;
void FreeMemory(std::unique_lock<utils::ResourceLock> /*lock*/) override {} void FreeMemory(std::unique_lock<utils::ResourceLock> /*lock*/, bool /*periodic*/) override {}
void PrepareForNewEpoch() override { throw utils::BasicException("Disk storage mode does not support replication."); } void PrepareForNewEpoch() override { throw utils::BasicException("Disk storage mode does not support replication."); }

View File

@ -144,27 +144,30 @@ void InMemoryLabelPropertyIndex::RemoveObsoleteEntries(uint64_t oldest_active_st
auto maybe_stop = utils::ResettableCounter<2048>(); auto maybe_stop = utils::ResettableCounter<2048>();
for (auto &[label_property, index] : index_) { for (auto &[label_property, index] : index_) {
auto [label_id, prop_id] = label_property;
// before starting index, check if stop_requested // before starting index, check if stop_requested
if (token.stop_requested()) return; if (token.stop_requested()) return;
auto index_acc = index.access(); auto index_acc = index.access();
for (auto it = index_acc.begin(); it != index_acc.end();) { auto it = index_acc.begin();
auto end_it = index_acc.end();
if (it == end_it) continue;
while (true) {
// Hot loop, don't check stop_requested every time // Hot loop, don't check stop_requested every time
if (maybe_stop() && token.stop_requested()) return; if (maybe_stop() && token.stop_requested()) return;
auto next_it = it; auto next_it = it;
++next_it; ++next_it;
if (it->timestamp >= oldest_active_start_timestamp) { bool has_next = next_it != end_it;
it = next_it; if (it->timestamp < oldest_active_start_timestamp) {
continue; bool redundant_duplicate = has_next && it->vertex == next_it->vertex && it->value == next_it->value;
} if (redundant_duplicate ||
!AnyVersionHasLabelProperty(*it->vertex, label_id, prop_id, it->value, oldest_active_start_timestamp)) {
if ((next_it != index_acc.end() && it->vertex == next_it->vertex && it->value == next_it->value) || index_acc.remove(*it);
!AnyVersionHasLabelProperty(*it->vertex, label_property.first, label_property.second, it->value, }
oldest_active_start_timestamp)) {
index_acc.remove(*it);
} }
if (!has_next) break;
it = next_it; it = next_it;
} }
} }

View File

@ -143,9 +143,7 @@ InMemoryStorage::InMemoryStorage(Config config)
if (config_.gc.type == Config::Gc::Type::PERIODIC) { if (config_.gc.type == Config::Gc::Type::PERIODIC) {
// TODO: move out of storage have one global gc_runner_ // TODO: move out of storage have one global gc_runner_
gc_runner_.Run("Storage GC", config_.gc.interval, [this] { gc_runner_.Run("Storage GC", config_.gc.interval, [this] { this->FreeMemory({}, true); });
this->FreeMemory(std::unique_lock<utils::ResourceLock>{main_lock_, std::defer_lock});
});
} }
if (timestamp_ == kTimestampInitialId) { if (timestamp_ == kTimestampInitialId) {
commit_log_.emplace(); commit_log_.emplace();
@ -1425,28 +1423,27 @@ void InMemoryStorage::SetStorageMode(StorageMode new_storage_mode) {
} }
storage_mode_ = new_storage_mode; storage_mode_ = new_storage_mode;
FreeMemory(std::move(main_guard)); FreeMemory(std::move(main_guard), false);
} }
} }
template <bool force> template <bool aggressive = true>
void InMemoryStorage::CollectGarbage(std::unique_lock<utils::ResourceLock> main_guard) { void InMemoryStorage::CollectGarbage(std::unique_lock<utils::ResourceLock> main_guard, bool periodic) {
// NOTE: You do not need to consider cleanup of deleted object that occurred in // NOTE: You do not need to consider cleanup of deleted object that occurred in
// different storage modes within the same CollectGarbage call. This is because // different storage modes within the same CollectGarbage call. This is because
// SetStorageMode will ensure CollectGarbage is called before any new transactions // SetStorageMode will ensure CollectGarbage is called before any new transactions
// with the new storage mode can start. // with the new storage mode can start.
// SetStorageMode will pass its unique_lock of main_lock_. We will use that lock, // SetStorageMode will pass its unique_lock of main_lock_. We will use that lock,
// as reacquiring the lock would cause deadlock. Otherwise, we need to get our own // as reacquiring the lock would cause deadlock. Otherwise, we need to get our own
// lock. // lock.
if (!main_guard.owns_lock()) { if (!main_guard.owns_lock()) {
if constexpr (force) { if constexpr (aggressive) {
// We take the unique lock on the main storage lock, so we can forcefully clean // We tried to be aggressive but we do not already have main lock continue as not aggressive
// everything we can // Perf note: Do not try to get unique lock if it was not already passed in. GC maybe expensive,
if (!main_lock_.try_lock()) { // do not assume it is fast, unique lock will blocks all new storage transactions.
CollectGarbage<false>(); CollectGarbage<false>({}, periodic);
return; return;
}
} else { } else {
// Because the garbage collector iterates through the indices and constraints // Because the garbage collector iterates through the indices and constraints
// to clean them up, it must take the main lock for reading to make sure that // to clean them up, it must take the main lock for reading to make sure that
@ -1458,17 +1455,24 @@ void InMemoryStorage::CollectGarbage(std::unique_lock<utils::ResourceLock> main_
} }
utils::OnScopeExit lock_releaser{[&] { utils::OnScopeExit lock_releaser{[&] {
if (!main_guard.owns_lock()) { if (main_guard.owns_lock()) {
if constexpr (force) {
main_lock_.unlock();
} else {
main_lock_.unlock_shared();
}
} else {
main_guard.unlock(); main_guard.unlock();
} else {
main_lock_.unlock_shared();
} }
}}; }};
// Only one gc run at a time
std::unique_lock<std::mutex> gc_guard(gc_lock_, std::try_to_lock);
if (!gc_guard.owns_lock()) {
return;
}
// Diagnostic trace
spdlog::trace("Storage GC on '{}' started [{}]", name(), periodic ? "periodic" : "forced");
auto trace_on_exit = utils::OnScopeExit{
[&] { spdlog::trace("Storage GC on '{}' finished [{}]", name(), periodic ? "periodic" : "forced"); }};
// Garbage collection must be performed in two phases. In the first phase, // Garbage collection must be performed in two phases. In the first phase,
// deltas that won't be applied by any transaction anymore are unlinked from // deltas that won't be applied by any transaction anymore are unlinked from
// the version chains. They cannot be deleted immediately, because there // the version chains. They cannot be deleted immediately, because there
@ -1476,27 +1480,29 @@ void InMemoryStorage::CollectGarbage(std::unique_lock<utils::ResourceLock> main_
// chain traversal. They are instead marked for deletion and will be deleted // chain traversal. They are instead marked for deletion and will be deleted
// in the second GC phase in this GC iteration or some of the following // in the second GC phase in this GC iteration or some of the following
// ones. // ones.
std::unique_lock<std::mutex> gc_guard(gc_lock_, std::try_to_lock);
if (!gc_guard.owns_lock()) {
return;
}
uint64_t oldest_active_start_timestamp = commit_log_->OldestActive(); uint64_t oldest_active_start_timestamp = commit_log_->OldestActive();
// Deltas from previous GC runs or from aborts can be cleaned up here {
garbage_undo_buffers_.WithLock([&](auto &garbage_undo_buffers) { std::unique_lock<utils::SpinLock> guard(engine_lock_);
if constexpr (force) { uint64_t mark_timestamp = timestamp_; // a timestamp no active transaction can currently have
// if force is set to true we can simply delete all the leftover undos because
// no transaction is active // Deltas from previous GC runs or from aborts can be cleaned up here
garbage_undo_buffers.clear(); garbage_undo_buffers_.WithLock([&](auto &garbage_undo_buffers) {
} else { guard.unlock();
// garbage_undo_buffers is ordered, pop until we can't if (aggressive or mark_timestamp == oldest_active_start_timestamp) {
while (!garbage_undo_buffers.empty() && // We know no transaction is active, it is safe to simply delete all the garbage undos
garbage_undo_buffers.front().mark_timestamp_ <= oldest_active_start_timestamp) { // Nothing can be reading them
garbage_undo_buffers.pop_front(); garbage_undo_buffers.clear();
} else {
// garbage_undo_buffers is ordered, pop until we can't
while (!garbage_undo_buffers.empty() &&
garbage_undo_buffers.front().mark_timestamp_ <= oldest_active_start_timestamp) {
garbage_undo_buffers.pop_front();
}
} }
} });
}); }
// We don't move undo buffers of unlinked transactions to garbage_undo_buffers // We don't move undo buffers of unlinked transactions to garbage_undo_buffers
// list immediately, because we would have to repeatedly take // list immediately, because we would have to repeatedly take
@ -1694,7 +1700,8 @@ void InMemoryStorage::CollectGarbage(std::unique_lock<utils::ResourceLock> main_
std::unique_lock<utils::SpinLock> guard(engine_lock_); std::unique_lock<utils::SpinLock> guard(engine_lock_);
uint64_t mark_timestamp = timestamp_; // a timestamp no active transaction can currently have uint64_t mark_timestamp = timestamp_; // a timestamp no active transaction can currently have
if (force or mark_timestamp == oldest_active_start_timestamp) { if (aggressive or mark_timestamp == oldest_active_start_timestamp) {
guard.unlock();
// if lucky, there are no active transactions, hence nothing looking at the deltas // if lucky, there are no active transactions, hence nothing looking at the deltas
// remove them now // remove them now
unlinked_undo_buffers.clear(); unlinked_undo_buffers.clear();
@ -1756,8 +1763,8 @@ void InMemoryStorage::CollectGarbage(std::unique_lock<utils::ResourceLock> main_
} }
// tell the linker he can find the CollectGarbage definitions here // tell the linker he can find the CollectGarbage definitions here
template void InMemoryStorage::CollectGarbage<true>(std::unique_lock<utils::ResourceLock>); template void InMemoryStorage::CollectGarbage<true>(std::unique_lock<utils::ResourceLock> main_guard, bool periodic);
template void InMemoryStorage::CollectGarbage<false>(std::unique_lock<utils::ResourceLock>); template void InMemoryStorage::CollectGarbage<false>(std::unique_lock<utils::ResourceLock> main_guard, bool periodic);
StorageInfo InMemoryStorage::GetBaseInfo() { StorageInfo InMemoryStorage::GetBaseInfo() {
StorageInfo info{}; StorageInfo info{};
@ -2108,50 +2115,35 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera
utils::BasicResult<InMemoryStorage::CreateSnapshotError> InMemoryStorage::CreateSnapshot( utils::BasicResult<InMemoryStorage::CreateSnapshotError> InMemoryStorage::CreateSnapshot(
memgraph::replication_coordination_glue::ReplicationRole replication_role) { memgraph::replication_coordination_glue::ReplicationRole replication_role) {
if (replication_role == memgraph::replication_coordination_glue::ReplicationRole::REPLICA) { using memgraph::replication_coordination_glue::ReplicationRole;
if (replication_role == ReplicationRole::REPLICA) {
return InMemoryStorage::CreateSnapshotError::DisabledForReplica; return InMemoryStorage::CreateSnapshotError::DisabledForReplica;
} }
auto const &epoch = repl_storage_state_.epoch_;
auto snapshot_creator = [this, &epoch]() {
utils::Timer timer;
auto transaction = CreateTransaction(IsolationLevel::SNAPSHOT_ISOLATION, storage_mode_,
memgraph::replication_coordination_glue::ReplicationRole::MAIN);
durability::CreateSnapshot(this, &transaction, recovery_.snapshot_directory_, recovery_.wal_directory_, &vertices_,
&edges_, uuid_, epoch, repl_storage_state_.history, &file_retainer_);
// Finalize snapshot transaction.
commit_log_->MarkFinished(transaction.start_timestamp);
memgraph::metrics::Measure(memgraph::metrics::SnapshotCreationLatency_us,
std::chrono::duration_cast<std::chrono::microseconds>(timer.Elapsed()).count());
};
std::lock_guard snapshot_guard(snapshot_lock_); std::lock_guard snapshot_guard(snapshot_lock_);
auto should_try_shared{true}; auto accessor = std::invoke([&]() {
auto max_num_tries{10}; if (storage_mode_ == StorageMode::IN_MEMORY_ANALYTICAL) {
while (max_num_tries) { // For analytical no other txn can be in play
if (should_try_shared) { return UniqueAccess(ReplicationRole::MAIN, IsolationLevel::SNAPSHOT_ISOLATION);
std::shared_lock storage_guard(main_lock_);
if (storage_mode_ == memgraph::storage::StorageMode::IN_MEMORY_TRANSACTIONAL) {
snapshot_creator();
return {};
}
} else { } else {
std::unique_lock main_guard{main_lock_}; return Access(ReplicationRole::MAIN, IsolationLevel::SNAPSHOT_ISOLATION);
if (storage_mode_ == memgraph::storage::StorageMode::IN_MEMORY_ANALYTICAL) {
snapshot_creator();
return {};
}
} }
should_try_shared = !should_try_shared; });
max_num_tries--;
}
return CreateSnapshotError::ReachedMaxNumTries; utils::Timer timer;
Transaction *transaction = accessor->GetTransaction();
auto const &epoch = repl_storage_state_.epoch_;
durability::CreateSnapshot(this, transaction, recovery_.snapshot_directory_, recovery_.wal_directory_, &vertices_,
&edges_, uuid_, epoch, repl_storage_state_.history, &file_retainer_);
memgraph::metrics::Measure(memgraph::metrics::SnapshotCreationLatency_us,
std::chrono::duration_cast<std::chrono::microseconds>(timer.Elapsed()).count());
return {};
} }
void InMemoryStorage::FreeMemory(std::unique_lock<utils::ResourceLock> main_guard) { void InMemoryStorage::FreeMemory(std::unique_lock<utils::ResourceLock> main_guard, bool periodic) {
CollectGarbage<true>(std::move(main_guard)); CollectGarbage(std::move(main_guard), periodic);
static_cast<InMemoryLabelIndex *>(indices_.label_index_.get())->RunGC(); static_cast<InMemoryLabelIndex *>(indices_.label_index_.get())->RunGC();
static_cast<InMemoryLabelPropertyIndex *>(indices_.label_property_index_.get())->RunGC(); static_cast<InMemoryLabelPropertyIndex *>(indices_.label_property_index_.get())->RunGC();

View File

@ -332,7 +332,7 @@ class InMemoryStorage final : public Storage {
std::unique_ptr<Accessor> UniqueAccess(memgraph::replication_coordination_glue::ReplicationRole replication_role, std::unique_ptr<Accessor> UniqueAccess(memgraph::replication_coordination_glue::ReplicationRole replication_role,
std::optional<IsolationLevel> override_isolation_level) override; std::optional<IsolationLevel> override_isolation_level) override;
void FreeMemory(std::unique_lock<utils::ResourceLock> main_guard) override; void FreeMemory(std::unique_lock<utils::ResourceLock> main_guard, bool periodic) override;
utils::FileRetainer::FileLockerAccessor::ret_type IsPathLocked(); utils::FileRetainer::FileLockerAccessor::ret_type IsPathLocked();
utils::FileRetainer::FileLockerAccessor::ret_type LockPath(); utils::FileRetainer::FileLockerAccessor::ret_type LockPath();
@ -363,7 +363,7 @@ class InMemoryStorage final : public Storage {
/// @throw std::system_error /// @throw std::system_error
/// @throw std::bad_alloc /// @throw std::bad_alloc
template <bool force> template <bool force>
void CollectGarbage(std::unique_lock<utils::ResourceLock> main_guard = {}); void CollectGarbage(std::unique_lock<utils::ResourceLock> main_guard, bool periodic);
bool InitializeWalFile(memgraph::replication::ReplicationEpoch &epoch); bool InitializeWalFile(memgraph::replication::ReplicationEpoch &epoch);
void FinalizeWalFile(); void FinalizeWalFile();

View File

@ -1051,6 +1051,14 @@ struct SpecificPropertyAndBufferInfo {
uint64_t all_size; uint64_t all_size;
}; };
// Struct used to return info about the property position
struct SpecificPropertyAndBufferInfoMinimal {
uint64_t property_begin;
uint64_t property_end;
auto property_size() const { return property_end - property_begin; }
};
// Function used to find the position where the property should be in the data // Function used to find the position where the property should be in the data
// buffer. It keeps the properties in the buffer sorted by `PropertyId` and // buffer. It keeps the properties in the buffer sorted by `PropertyId` and
// returns the positions in the buffer where the seeked property starts and // returns the positions in the buffer where the seeked property starts and
@ -1083,6 +1091,27 @@ SpecificPropertyAndBufferInfo FindSpecificPropertyAndBufferInfo(Reader *reader,
return {property_begin, property_end, property_end - property_begin, all_begin, all_end, all_end - all_begin}; return {property_begin, property_end, property_end - property_begin, all_begin, all_end, all_end - all_begin};
} }
// Like FindSpecificPropertyAndBufferInfo, but will early exit. No need to find the "all" information
SpecificPropertyAndBufferInfoMinimal FindSpecificPropertyAndBufferInfoMinimal(Reader *reader, PropertyId property) {
uint64_t property_begin = reader->GetPosition();
while (true) {
switch (HasExpectedProperty(reader, property)) {
case ExpectedPropertyStatus::MISSING_DATA:
[[fallthrough]];
case ExpectedPropertyStatus::GREATER: {
return {0, 0};
}
case ExpectedPropertyStatus::EQUAL: {
return {property_begin, reader->GetPosition()};
}
case ExpectedPropertyStatus::SMALLER: {
property_begin = reader->GetPosition();
break;
}
}
}
}
// All data buffers will be allocated to a power of 8 size. // All data buffers will be allocated to a power of 8 size.
uint64_t ToPowerOf8(uint64_t size) { uint64_t ToPowerOf8(uint64_t size) {
uint64_t mod = size % 8; uint64_t mod = size % 8;
@ -1254,11 +1283,12 @@ bool PropertyStore::IsPropertyEqual(PropertyId property, const PropertyValue &va
BufferInfo buffer_info = GetBufferInfo(buffer_); BufferInfo buffer_info = GetBufferInfo(buffer_);
Reader reader(buffer_info.data, buffer_info.size); Reader reader(buffer_info.data, buffer_info.size);
auto info = FindSpecificPropertyAndBufferInfo(&reader, property); auto info = FindSpecificPropertyAndBufferInfoMinimal(&reader, property);
if (info.property_size == 0) return value.IsNull(); auto property_size = info.property_size();
Reader prop_reader(buffer_info.data + info.property_begin, info.property_size); if (property_size == 0) return value.IsNull();
Reader prop_reader(buffer_info.data + info.property_begin, property_size);
if (!CompareExpectedProperty(&prop_reader, property, value)) return false; if (!CompareExpectedProperty(&prop_reader, property, value)) return false;
return prop_reader.GetPosition() == info.property_size; return prop_reader.GetPosition() == property_size;
} }
std::map<PropertyId, PropertyValue> PropertyStore::Properties() const { std::map<PropertyId, PropertyValue> PropertyStore::Properties() const {

View File

@ -284,6 +284,8 @@ class Storage {
virtual UniqueConstraints::DeletionStatus DropUniqueConstraint(LabelId label, virtual UniqueConstraints::DeletionStatus DropUniqueConstraint(LabelId label,
const std::set<PropertyId> &properties) = 0; const std::set<PropertyId> &properties) = 0;
auto GetTransaction() -> Transaction * { return std::addressof(transaction_); }
protected: protected:
Storage *storage_; Storage *storage_;
std::shared_lock<utils::ResourceLock> storage_guard_; std::shared_lock<utils::ResourceLock> storage_guard_;
@ -336,9 +338,15 @@ class Storage {
StorageMode GetStorageMode() const noexcept; StorageMode GetStorageMode() const noexcept;
virtual void FreeMemory(std::unique_lock<utils::ResourceLock> main_guard) = 0; virtual void FreeMemory(std::unique_lock<utils::ResourceLock> main_guard, bool periodic) = 0;
void FreeMemory() { FreeMemory({}); } void FreeMemory() {
if (storage_mode_ == StorageMode::IN_MEMORY_ANALYTICAL) {
FreeMemory(std::unique_lock{main_lock_}, false);
} else {
FreeMemory({}, false);
}
}
virtual std::unique_ptr<Accessor> Access(memgraph::replication_coordination_glue::ReplicationRole replication_role, virtual std::unique_ptr<Accessor> Access(memgraph::replication_coordination_glue::ReplicationRole replication_role,
std::optional<IsolationLevel> override_isolation_level) = 0; std::optional<IsolationLevel> override_isolation_level) = 0;

View File

@ -66,6 +66,17 @@ struct ResourceLock {
} }
return false; return false;
} }
template <typename Rep, typename Period>
bool try_lock_shared_for(std::chrono::duration<Rep, Period> const &time) {
auto lock = std::unique_lock{mtx};
// block until available
if (!cv.wait_for(lock, time, [this] { return state != UNIQUE; })) return false;
state = SHARED;
++count;
return true;
}
void unlock() { void unlock() {
auto lock = std::unique_lock{mtx}; auto lock = std::unique_lock{mtx};
state = UNLOCKED; state = UNLOCKED;