Implement deferred vertex/edge deletion

Summary: this will make GC for indices easier

Reviewers: teon.banek, mferencevic

Reviewed By: teon.banek, mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2223
This commit is contained in:
Marin Tomic 2019-07-22 17:05:00 +02:00
parent 08bf993a47
commit ffa76e59c9
2 changed files with 108 additions and 81 deletions

View File

@ -318,18 +318,19 @@ void Storage::Accessor::Commit() {
// Take committed_transactions lock while holding the engine lock to // Take committed_transactions lock while holding the engine lock to
// make sure that committed transactions are sorted by the commit // make sure that committed transactions are sorted by the commit
// timestamp in the list. // timestamp in the list.
std::lock_guard<utils::SpinLock> committed_transactions_guard( storage_->committed_transactions_.WithLock(
storage_->committed_transactions_lock_); [&](auto &committed_transactions) {
// TODO: release lock, and update all deltas to have a local copy // TODO: release lock, and update all deltas to have a local copy
// of the commit timestamp // of the commit timestamp
CHECK(transaction_.commit_timestamp != nullptr) CHECK(transaction_.commit_timestamp != nullptr)
<< "Invalid database state!"; << "Invalid database state!";
transaction_.commit_timestamp->store(commit_timestamp, transaction_.commit_timestamp->store(commit_timestamp,
std::memory_order_release); std::memory_order_release);
// Release engine lock because we don't have to hold it anymore and // Release engine lock because we don't have to hold it anymore and
// emplace back could take a long time. // emplace back could take a long time.
engine_guard.unlock(); engine_guard.unlock();
storage_->committed_transactions_.emplace_back(std::move(transaction_)); committed_transactions.emplace_back(std::move(transaction_));
});
} }
storage_->commit_log_.MarkFinished(start_timestamp); storage_->commit_log_.MarkFinished(start_timestamp);
@ -344,12 +345,11 @@ void Storage::Accessor::Commit() {
void Storage::Accessor::Abort() { void Storage::Accessor::Abort() {
CHECK(is_transaction_active_) << "The transaction is already terminated!"; CHECK(is_transaction_active_) << "The transaction is already terminated!";
// We acquire accessors to the storage `SkipList`s so that we can safely // We collect vertices and edges we've created here and then splice them into
// delete objects in the lists. While the accessor is alive, all objects (even // `deleted_vertices_` and `deleted_edges_` lists, instead of adding them one
// deleted ones) are guaranteed to still exist and pointers to them are still // by one and acquiring lock every time.
// valid. std::list<Gid> my_deleted_vertices;
auto vertex_acc = storage_->vertices_.access(); std::list<Gid> my_deleted_edges;
auto edge_acc = storage_->edges_.access();
for (const auto &delta : transaction_.deltas) { for (const auto &delta : transaction_.deltas) {
auto prev = delta.prev.Get(); auto prev = delta.prev.Get();
@ -436,8 +436,8 @@ void Storage::Accessor::Abort() {
break; break;
} }
case Delta::Action::DELETE_OBJECT: { case Delta::Action::DELETE_OBJECT: {
CHECK(vertex_acc.remove(vertex->gid)) vertex->deleted = true;
<< "Invalid database state!"; my_deleted_vertices.push_back(vertex->gid);
break; break;
} }
case Delta::Action::RECREATE_OBJECT: { case Delta::Action::RECREATE_OBJECT: {
@ -479,7 +479,8 @@ void Storage::Accessor::Abort() {
break; break;
} }
case Delta::Action::DELETE_OBJECT: { case Delta::Action::DELETE_OBJECT: {
CHECK(edge_acc.remove(edge->gid)) << "Invalid database state!"; edge->deleted = true;
my_deleted_edges.push_back(edge->gid);
break; break;
} }
case Delta::Action::RECREATE_OBJECT: { case Delta::Action::RECREATE_OBJECT: {
@ -513,15 +514,21 @@ void Storage::Accessor::Abort() {
{ {
std::unique_lock<utils::SpinLock> engine_guard(storage_->engine_lock_); std::unique_lock<utils::SpinLock> engine_guard(storage_->engine_lock_);
uint64_t mark_timestamp = storage_->timestamp_; uint64_t mark_timestamp = storage_->timestamp_;
// Take aborted_undo_buffers lock while holding the engine lock to make // Take garbage_undo_buffers lock while holding the engine lock to make
// sure that entries are sorted by mark timestamp in the list. // sure that entries are sorted by mark timestamp in the list.
std::lock_guard<utils::SpinLock> aborted_undo_buffers_guard( storage_->garbage_undo_buffers_.WithLock([&](auto &garbage_undo_buffers) {
storage_->aborted_undo_buffers_lock_); // Release engine lock because we don't have to hold it anymore and
// Release engine lock because we don't have to hold it anymore and // emplace back could take a long time.
// emplace back could take a long time. engine_guard.unlock();
engine_guard.unlock(); garbage_undo_buffers.emplace_back(mark_timestamp,
storage_->aborted_undo_buffers_.emplace_back( std::move(transaction_.deltas));
mark_timestamp, std::move(transaction_.deltas)); });
storage_->deleted_vertices_.WithLock([&](auto &deleted_vertices) {
deleted_vertices.splice(deleted_vertices.begin(), my_deleted_vertices);
});
storage_->deleted_edges_.WithLock([&](auto &deleted_edges) {
deleted_edges.splice(deleted_edges.begin(), my_deleted_edges);
});
} }
storage_->commit_log_.MarkFinished(transaction_.start_timestamp); storage_->commit_log_.MarkFinished(transaction_.start_timestamp);
@ -559,23 +566,29 @@ void Storage::CollectGarbage() {
} }
uint64_t oldest_active_start_timestamp = commit_log_.OldestActive(); uint64_t oldest_active_start_timestamp = commit_log_.OldestActive();
// We don't move undo buffers of unlinked transactions to // We don't move undo buffers of unlinked transactions to garbage_undo_buffers
// marked_undo_buffers list immediately, because we would have to repeatedly // list immediately, because we would have to repeatedly take
// take transaction engine lock. // garbage_undo_buffers lock.
std::list<Transaction> unlinked; std::list<std::pair<uint64_t, std::list<Delta>>> unlinked_undo_buffers;
// We will free only vertices deleted up until now in this GC cycle.
// Otherwise, GC cycle might be prolonged by aborted transactions adding new
// edges and vertices to the lists over and over again.
std::list<Gid> current_deleted_edges;
std::list<Gid> current_deleted_vertices;
deleted_vertices_->swap(current_deleted_vertices);
deleted_edges_->swap(current_deleted_edges);
while (true) { while (true) {
// We don't want to hold the lock on commited transactions for too long, // We don't want to hold the lock on commited transactions for too long,
// because that prevents other transactions from committing. // because that prevents other transactions from committing.
Transaction *transaction; Transaction *transaction;
{ {
std::lock_guard<utils::SpinLock> guard(committed_transactions_lock_); auto committed_transactions_ptr = committed_transactions_.Lock();
if (committed_transactions_.empty()) { if (committed_transactions_ptr->empty()) {
break; break;
} }
transaction = &committed_transactions_ptr->front();
transaction = &committed_transactions_.front();
} }
if (transaction->commit_timestamp->load(std::memory_order_acquire) >= if (transaction->commit_timestamp->load(std::memory_order_acquire) >=
@ -583,7 +596,7 @@ void Storage::CollectGarbage() {
break; break;
} }
// When unlinking a delta which is a first delta in its version chain, // When unlinking a delta which is the first delta in its version chain,
// special care has to be taken to avoid the following race condition: // special care has to be taken to avoid the following race condition:
// //
// [Vertex] --> [Delta A] // [Vertex] --> [Delta A]
@ -616,11 +629,7 @@ void Storage::CollectGarbage() {
} }
vertex->delta = nullptr; vertex->delta = nullptr;
if (vertex->deleted) { if (vertex->deleted) {
// We must unlock the guard now because the lock it is holding current_deleted_vertices.push_back(vertex->gid);
// might be destroyed when vertex is deleted.
vertex_guard.unlock();
auto acc = vertices_.access();
acc.remove(vertex->gid);
} }
break; break;
} }
@ -633,12 +642,7 @@ void Storage::CollectGarbage() {
continue; continue;
} }
if (edge->deleted) { if (edge->deleted) {
// We must unlock the guard now because the lock it is holding current_deleted_edges.push_back(edge->gid);
// might be destroyed when vertex is deleted.
edge_guard.unlock();
auto acc = edges_.access();
acc.remove(edge->gid);
} }
break; break;
} }
@ -652,38 +656,54 @@ void Storage::CollectGarbage() {
} }
} }
{ committed_transactions_.WithLock([&](auto &committed_transactions) {
std::lock_guard<utils::SpinLock> guard(committed_transactions_lock_); unlinked_undo_buffers.emplace_back(0, std::move(transaction->deltas));
// We use splice here to avoid allocating storage for new list nodes. committed_transactions.pop_front();
unlinked.splice(unlinked.begin(), committed_transactions_, });
committed_transactions_.begin());
}
} }
uint64_t mark_timestamp;
{ {
std::lock_guard<utils::SpinLock> guard(engine_lock_); uint64_t mark_timestamp;
std::unique_lock<utils::SpinLock> guard(engine_lock_);
mark_timestamp = timestamp_; mark_timestamp = timestamp_;
} // Take garbage_undo_buffers lock while holding the engine lock to make
// sure that entries are sorted by mark timestamp in the list.
for (auto &transaction : unlinked) { garbage_undo_buffers_.WithLock([&](auto &garbage_undo_buffers) {
marked_undo_buffers_.emplace_back(mark_timestamp, // Release engine lock because we don't have to hold it anymore and
std::move(transaction.deltas)); // this could take a long time.
} guard.unlock();
// TODO(mtomic): holding garbage_undo_buffers_ lock here prevents
while (!marked_undo_buffers_.empty() && // transactions from aborting until we're done marking, maybe we should
marked_undo_buffers_.front().first < oldest_active_start_timestamp) { // add them one-by-one or something
marked_undo_buffers_.pop_front(); for (auto &[timestamp, undo_buffer] : unlinked_undo_buffers) {
timestamp = mark_timestamp;
}
garbage_undo_buffers.splice(garbage_undo_buffers.end(),
unlinked_undo_buffers);
});
} }
while (true) { while (true) {
std::lock_guard<utils::SpinLock> aborted_undo_buffers_guard( auto garbage_undo_buffers_ptr = garbage_undo_buffers_.Lock();
aborted_undo_buffers_lock_); if (garbage_undo_buffers_ptr->empty() ||
if (aborted_undo_buffers_.empty() || garbage_undo_buffers_ptr->front().first >=
aborted_undo_buffers_.front().first >= oldest_active_start_timestamp) { oldest_active_start_timestamp) {
break; break;
} }
aborted_undo_buffers_.pop_front(); garbage_undo_buffers_ptr->pop_front();
}
{
auto vertex_acc = vertices_.access();
for (auto vertex : current_deleted_vertices) {
CHECK(vertex_acc.remove(vertex)) << "Invalid database state!";
}
}
{
auto edge_acc = edges_.access();
for (auto edge : current_deleted_edges) {
CHECK(edge_acc.remove(edge)) << "Invalid database state!";
}
} }
} }

View File

@ -15,6 +15,7 @@
#include "utils/rw_lock.hpp" #include "utils/rw_lock.hpp"
#include "utils/scheduler.hpp" #include "utils/scheduler.hpp"
#include "utils/skip_list.hpp" #include "utils/skip_list.hpp"
#include "utils/synchronized.hpp"
namespace storage { namespace storage {
@ -124,6 +125,8 @@ class Storage final {
std::atomic<uint64_t> vertex_id_{0}; std::atomic<uint64_t> vertex_id_{0};
std::atomic<uint64_t> edge_id_{0}; std::atomic<uint64_t> edge_id_{0};
NameIdMapper name_id_mapper_;
// Transaction engine // Transaction engine
utils::SpinLock engine_lock_; utils::SpinLock engine_lock_;
uint64_t timestamp_{kTimestampInitialId}; uint64_t timestamp_{kTimestampInitialId};
@ -134,18 +137,22 @@ class Storage final {
// whatever. // whatever.
CommitLog commit_log_; CommitLog commit_log_;
NameIdMapper name_id_mapper_; utils::Synchronized<std::list<Transaction>, utils::SpinLock>
committed_transactions_;
utils::SpinLock committed_transactions_lock_;
std::list<Transaction> committed_transactions_;
utils::SpinLock aborted_undo_buffers_lock_;
std::list<std::pair<uint64_t, std::list<Delta>>> aborted_undo_buffers_;
StorageGcConfig gc_config_; StorageGcConfig gc_config_;
utils::Scheduler gc_runner_; utils::Scheduler gc_runner_;
std::mutex gc_lock_; std::mutex gc_lock_;
std::list<std::pair<uint64_t, std::list<Delta>>> marked_undo_buffers_;
// Undo buffers that were unlinked and now are waiting to be freed.
utils::Synchronized<std::list<std::pair<uint64_t, std::list<Delta>>>,
utils::SpinLock>
garbage_undo_buffers_;
// Vertices that are logically deleted and now are waiting to be removed from
// the main storage.
utils::Synchronized<std::list<Gid>, utils::SpinLock> deleted_vertices_;
utils::Synchronized<std::list<Gid>, utils::SpinLock> deleted_edges_;
}; };
} // namespace storage } // namespace storage