Remove old HA implementation

Reviewers: teon.banek, ipaljak

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2686
This commit is contained in:
Matej Ferencevic 2020-02-25 16:19:27 +01:00
parent 4e5a91e7fb
commit bfbace8168
87 changed files with 0 additions and 10779 deletions

View File

@ -17,112 +17,6 @@ if (MG_ENTERPRISE)
add_subdirectory(rpc) add_subdirectory(rpc)
endif() endif()
## ----------------------------------------------------------------------------
## Common LCP files
## ----------------------------------------------------------------------------
#
#define_add_lcp(add_lcp_common lcp_common_cpp_files generated_lcp_common_files)
#
#add_lcp_common(query/frontend/ast/ast.lcp)
#add_lcp_common(query/frontend/semantic/symbol.lcp)
#add_lcp_common(query/plan/operator.lcp)
#
#add_custom_target(generate_lcp_common DEPENDS ${generated_lcp_common_files})
#
## ----------------------------------------------------------------------------
## END Common LCP files
## ----------------------------------------------------------------------------
#
## ----------------------------------------------------------------------------
## Memgraph Single Node High Availability
## ----------------------------------------------------------------------------
#set(mg_single_node_ha_sources
# ${lcp_common_cpp_files}
# data_structures/concurrent/skiplist_gc.cpp
# database/single_node_ha/config.cpp
# database/single_node_ha/graph_db.cpp
# database/single_node_ha/graph_db_accessor.cpp
# durability/single_node_ha/state_delta.cpp
# durability/single_node_ha/paths.cpp
# durability/single_node_ha/snapshooter.cpp
# durability/single_node_ha/recovery.cpp
# glue/communication.cpp
# raft/coordination.cpp
# raft/raft_server.cpp
# raft/storage_info.cpp
# query/common.cpp
# query/frontend/ast/cypher_main_visitor.cpp
# query/frontend/ast/pretty_print.cpp
# query/frontend/parsing.cpp
# query/frontend/semantic/required_privileges.cpp
# query/frontend/semantic/symbol_generator.cpp
# query/frontend/stripped.cpp
# query/interpret/awesome_memgraph_functions.cpp
# query/interpreter.cpp
# query/plan/operator.cpp
# query/plan/preprocess.cpp
# query/plan/pretty_print.cpp
# query/plan/profile.cpp
# query/plan/rewrite/index_lookup.cpp
# query/plan/rule_based_planner.cpp
# query/plan/variable_start_planner.cpp
# query/procedure/mg_procedure_impl.cpp
# query/procedure/module.cpp
# query/typed_value.cpp
# storage/common/constraints/record.cpp
# storage/common/constraints/unique_constraints.cpp
# storage/common/types/slk.cpp
# storage/common/types/property_value_store.cpp
# storage/common/locking/record_lock.cpp
# storage/single_node_ha/edge_accessor.cpp
# storage/single_node_ha/record_accessor.cpp
# storage/single_node_ha/vertex_accessor.cpp
# transactions/single_node_ha/engine.cpp
# memgraph_init.cpp
#)
#if (MG_ENTERPRISE)
# set(mg_single_node_ha_sources
# ${mg_single_node_ha_sources}
# glue/auth.cpp)
#endif()
#
#define_add_lcp(add_lcp_single_node_ha mg_single_node_ha_sources generated_lcp_single_node_ha_files)
#
#add_lcp_single_node_ha(durability/single_node_ha/state_delta.lcp)
#add_lcp_single_node_ha(database/single_node_ha/serialization.lcp SLK_SERIALIZE
# DEPENDS durability/single_node_ha/state_delta.lcp)
#add_lcp_single_node_ha(raft/raft_rpc_messages.lcp SLK_SERIALIZE)
#add_lcp_single_node_ha(raft/log_entry.lcp SLK_SERIALIZE)
#add_lcp_single_node_ha(raft/snapshot_metadata.lcp SLK_SERIALIZE)
#add_lcp_single_node_ha(raft/storage_info_rpc_messages.lcp SLK_SERIALIZE)
#
#add_custom_target(generate_lcp_single_node_ha DEPENDS generate_lcp_common ${generated_lcp_single_node_ha_files})
#
#set(MG_SINGLE_NODE_HA_LIBS stdc++fs Threads::Threads fmt cppitertools
# antlr_opencypher_parser_lib dl glog gflags
# mg-utils mg-io mg-requests mg-communication mg-rpc
# mg-auth mg-audit)
#
#add_library(mg-single-node-ha STATIC ${mg_single_node_ha_sources})
#target_include_directories(mg-single-node-ha PUBLIC ${CMAKE_SOURCE_DIR}/include)
#target_link_libraries(mg-single-node-ha ${MG_SINGLE_NODE_HA_LIBS})
#add_dependencies(mg-single-node-ha generate_opencypher_parser)
#add_dependencies(mg-single-node-ha generate_lcp_single_node_ha)
#target_compile_definitions(mg-single-node-ha PUBLIC MG_SINGLE_NODE_HA)
## TODO: Make these symbols visible once we add support for custom procedure
## modules in HA.
## NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which
## should be dynamically exported, so that `dlopen` can correctly link the
## symbols in custom procedure module libraries.
## target_link_libraries(mg-single-node-ha "-Wl,--dynamic-list=${CMAKE_SOURCE_DIR}/include/mg_procedure.syms")
#
## ----------------------------------------------------------------------------
## END Memgraph Single Node High Availability
## ----------------------------------------------------------------------------
#
#add_custom_target(generate_lcp)
#add_dependencies(generate_lcp generate_lcp_single_node_ha)
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
# Generate a version.hpp file # Generate a version.hpp file
@ -218,18 +112,3 @@ endif()
# Create empty directories for default location of lib and log. # Create empty directories for default location of lib and log.
install(CODE "file(MAKE_DIRECTORY \$ENV{DESTDIR}/var/log/memgraph install(CODE "file(MAKE_DIRECTORY \$ENV{DESTDIR}/var/log/memgraph
\$ENV{DESTDIR}/var/lib/memgraph)") \$ENV{DESTDIR}/var/lib/memgraph)")
## memgraph single node high availability executable
#add_executable(memgraph_ha memgraph_ha.cpp)
#target_link_libraries(memgraph_ha mg-single-node-ha mg-kvstore telemetry_lib)
#set_target_properties(memgraph_ha PROPERTIES
# # Set the executable output name to include version information.
# OUTPUT_NAME "memgraph_ha-${MEMGRAPH_VERSION}_${CMAKE_BUILD_TYPE}"
# # Output the executable in main binary dir.
# RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
## Create symlink to the built executable.
#add_custom_command(TARGET memgraph_ha POST_BUILD
# COMMAND ${CMAKE_COMMAND} -E create_symlink $<TARGET_FILE:memgraph_ha> ${CMAKE_BINARY_DIR}/memgraph_ha
# BYPRODUCTS ${CMAKE_BINARY_DIR}/memgraph_ha
# COMMENT "Creating symlink to memgraph single node high availability executable")

View File

@ -1,5 +0,0 @@
#pragma once
#ifdef MG_SINGLE_NODE_HA
#include "database/single_node_ha/graph_db.hpp"
#endif

View File

@ -1,5 +0,0 @@
#pragma once
#ifdef MG_SINGLE_NODE_HA
#include "database/single_node_ha/graph_db_accessor.hpp"
#endif

View File

@ -1,51 +0,0 @@
#include "database/single_node_ha/config.hpp"
#include <limits>
#include <thread>
#include "utils/flag_validation.hpp"
#include "utils/string.hpp"
// Durability flags.
DEFINE_string(
durability_directory, "durability",
"Path to directory in which to save snapshots and write-ahead log files.");
DEFINE_bool(db_recover_on_startup, true, "Recover database on startup.");
// Misc flags
DEFINE_int32(query_execution_time_sec, 180,
"Maximum allowed query execution time. Queries exceeding this "
"limit will be aborted. Value of -1 means no limit.");
DEFINE_int32(gc_cycle_sec, 30,
"Amount of time between starts of two cleaning cycles in seconds. "
"-1 to turn off.");
// Data location.
DEFINE_string(properties_on_disk, "",
"Property names of properties which will be stored on available "
"disk. Property names have to be separated with comma (,).");
// High availability.
DEFINE_string(
coordination_config_file, "coordination.json",
"Path to the file containing coordination configuration in JSON format");
DEFINE_string(raft_config_file, "raft.json",
"Path to the file containing raft configuration in JSON format");
DEFINE_VALIDATED_int32(
server_id, 1U, "Id used in the coordination configuration for this machine",
FLAG_IN_RANGE(1, std::numeric_limits<uint16_t>::max()));
database::Config::Config()
// Durability flags.
: durability_directory{FLAGS_durability_directory},
db_recover_on_startup{FLAGS_db_recover_on_startup},
// Misc flags.
gc_cycle_sec{FLAGS_gc_cycle_sec},
query_execution_time_sec{FLAGS_query_execution_time_sec},
// Data location.
properties_on_disk(utils::Split(FLAGS_properties_on_disk, ",")),
// High availability.
coordination_config_file{FLAGS_coordination_config_file},
raft_config_file{FLAGS_raft_config_file},
server_id{static_cast<uint16_t>(FLAGS_server_id)} {}

View File

@ -1,31 +0,0 @@
/// @file
#pragma once
#include <cstdint>
#include <string>
#include <vector>
namespace database {
/// Database configuration. Initialized from flags, but modifiable.
struct Config {
Config();
// Durability flags.
std::string durability_directory;
bool db_recover_on_startup;
// Misc flags.
int gc_cycle_sec;
int query_execution_time_sec;
// set of properties which will be stored on disk
std::vector<std::string> properties_on_disk;
// HA flags.
std::string coordination_config_file;
std::string raft_config_file;
uint16_t server_id;
};
} // namespace database

View File

@ -1,103 +0,0 @@
#include "database/single_node_ha/graph_db.hpp"
#include <optional>
#include <glog/logging.h>
#include "database/single_node_ha/graph_db_accessor.hpp"
#include "storage/single_node_ha/concurrent_id_mapper.hpp"
#include "storage/single_node_ha/storage_gc.hpp"
#include "transactions/single_node_ha/engine.hpp"
namespace database {
GraphDb::GraphDb(Config config) : config_(config) {}
void GraphDb::Start() {
utils::EnsureDirOrDie(config_.durability_directory);
raft_server_.Start();
storage_info_.Start();
CHECK(coordination_.Start()) << "Couldn't start coordination!";
// Start transaction killer.
if (config_.query_execution_time_sec != -1) {
transaction_killer_.Run(
"TX killer",
std::chrono::seconds(
std::max(1, std::min(5, config_.query_execution_time_sec / 4))),
[this]() {
tx_engine_.LocalForEachActiveTransaction([this](tx::Transaction &t) {
if (t.creation_time() +
std::chrono::seconds(config_.query_execution_time_sec) <
std::chrono::steady_clock::now()) {
t.set_should_abort();
};
});
});
}
}
void GraphDb::AwaitShutdown(std::function<void(void)> call_before_shutdown) {
coordination_.AwaitShutdown([this, &call_before_shutdown]() {
tx_engine_.LocalForEachActiveTransaction(
[](auto &t) { t.set_should_abort(); });
call_before_shutdown();
raft_server_.Shutdown();
});
}
void GraphDb::Shutdown() { coordination_.Shutdown(); }
GraphDbAccessor GraphDb::Access() {
return GraphDbAccessor(this);
}
GraphDbAccessor GraphDb::Access(tx::TransactionId tx_id) {
return GraphDbAccessor(this, tx_id);
}
GraphDbAccessor GraphDb::AccessBlocking(
std::optional<tx::TransactionId> parent_tx) {
return GraphDbAccessor(this, parent_tx);
}
Storage &GraphDb::storage() { return *storage_; }
raft::RaftInterface *GraphDb::raft() { return &raft_server_; }
raft::StorageInfo *GraphDb::storage_info() { return &storage_info_; }
tx::Engine &GraphDb::tx_engine() { return tx_engine_; }
storage::StateDeltaBuffer *GraphDb::sd_buffer() { return &sd_buffer_; }
storage::ConcurrentIdMapper<storage::Label> &GraphDb::label_mapper() {
return label_mapper_;
}
storage::ConcurrentIdMapper<storage::EdgeType> &GraphDb::edge_type_mapper() {
return edge_mapper_;
}
storage::ConcurrentIdMapper<storage::Property> &GraphDb::property_mapper() {
return property_mapper_;
}
void GraphDb::CollectGarbage() { storage_gc_->CollectGarbage(); }
void GraphDb::Reset() {
// Release gc scheduler to stop it from touching storage.
storage_gc_ = nullptr;
// This will make all active transactions to abort and reset the internal
// state.
tx_engine_.Reset();
storage_ = std::make_unique<Storage>(config_.properties_on_disk);
storage_gc_ = std::make_unique<StorageGc>(
*storage_, tx_engine_, &raft_server_, config_.gc_cycle_sec);
}
} // namespace database

View File

@ -1,156 +0,0 @@
/// @file
#pragma once
#include <atomic>
#include <memory>
#include <optional>
#include <vector>
#include "database/single_node_ha/config.hpp"
#include "io/network/endpoint.hpp"
#include "raft/coordination.hpp"
#include "raft/raft_server.hpp"
#include "raft/storage_info.hpp"
#include "storage/common/types/types.hpp"
#include "storage/single_node_ha/concurrent_id_mapper.hpp"
#include "storage/single_node_ha/state_delta_buffer.hpp"
#include "storage/single_node_ha/storage.hpp"
#include "storage/single_node_ha/storage_gc.hpp"
#include "transactions/single_node_ha/engine.hpp"
#include "utils/scheduler.hpp"
#include "utils/stat.hpp"
namespace database {
/// Struct containing basic statistics about storage.
struct Stat {
// std::atomic<int64_t> is needed as reference to stat is passed to
// other threads. If there were no std::atomic we couldn't guarantee
// that a change to any member will be visible to other threads.
/// Vertex count is number of `VersionList<Vertex>` physically stored.
std::atomic<int64_t> vertex_count{0};
/// Vertex count is number of `VersionList<Edge>` physically stored.
std::atomic<int64_t> edge_count{0};
/// Average in/out degree of a vertex.
/// `avg_degree` is calculated as 2 * `edges_count` / `vertex_count`.
std::atomic<double> avg_degree{0};
};
class GraphDbAccessor;
/// An abstract base class providing the interface for a graph database.
///
/// Always be sure that GraphDb object is destructed before main exits, i. e.
/// GraphDb object shouldn't be part of global/static variable, except if its
/// destructor is explicitly called before main exits. Consider code:
///
/// GraphDb db; // KeyIndex is created as a part of database::Storage
/// int main() {
/// GraphDbAccessor dba(db);
/// auto v = dba.InsertVertex();
/// v.add_label(dba.Label(
/// "Start")); // New SkipList is created in KeyIndex for LabelIndex.
/// // That SkipList creates SkipListGc which
/// // initialises static Executor object.
/// return 0;
/// }
///
/// After main exits: 1. Executor is destructed, 2. KeyIndex is destructed.
/// Destructor of KeyIndex calls delete on created SkipLists which destroy
/// SkipListGc that tries to use Excutioner object that doesn't exist anymore.
/// -> CRASH
class GraphDb {
public:
explicit GraphDb(Config config = Config());
GraphDb(const GraphDb &) = delete;
GraphDb(GraphDb &&) = delete;
GraphDb &operator=(const GraphDb &) = delete;
GraphDb &operator=(GraphDb &&) = delete;
void Start();
void AwaitShutdown(std::function<void(void)> call_before_shutdown);
void Shutdown();
/// Create a new accessor by starting a new transaction.
GraphDbAccessor Access();
GraphDbAccessor AccessBlocking(std::optional<tx::TransactionId> parent_tx);
/// Create an accessor for a running transaction.
GraphDbAccessor Access(tx::TransactionId);
Storage &storage();
raft::RaftInterface *raft();
raft::StorageInfo *storage_info();
tx::Engine &tx_engine();
storage::StateDeltaBuffer *sd_buffer();
storage::ConcurrentIdMapper<storage::Label> &label_mapper();
storage::ConcurrentIdMapper<storage::EdgeType> &edge_type_mapper();
storage::ConcurrentIdMapper<storage::Property> &property_mapper();
void CollectGarbage();
/// Releases the storage object safely and creates a new object, resets the tx
/// engine.
///
/// This is needed in HA during the leader -> follower transition where we
/// might end up with some stale transactions on the leader.
void Reset();
/// Get live view of storage stats. Gets updated on RefreshStat.
const Stat &GetStat() const { return stat_; }
/// Updates storage stats.
void RefreshStat() {
auto vertex_count = storage().vertices_.access().size();
auto edge_count = storage().edges_.access().size();
stat_.vertex_count = vertex_count;
stat_.edge_count = edge_count;
if (vertex_count != 0) {
stat_.avg_degree = 2 * static_cast<double>(edge_count) / vertex_count;
} else {
stat_.avg_degree = 0;
}
}
/// Returns the number of bytes used by the durability directory on disk.
uint64_t GetDurabilityDirDiskUsage() const {
return utils::GetDirDiskUsage(config_.durability_directory);
}
protected:
Stat stat_;
utils::Scheduler transaction_killer_;
Config config_;
std::unique_ptr<Storage> storage_ =
std::make_unique<Storage>(config_.properties_on_disk);
raft::Coordination coordination_{
config_.server_id,
raft::LoadNodesFromFile(config_.coordination_config_file)};
raft::RaftServer raft_server_{
config_.server_id,
config_.durability_directory,
config_.db_recover_on_startup,
raft::Config::LoadFromFile(config_.raft_config_file),
&coordination_,
this};
raft::StorageInfo storage_info_{this, &coordination_, config_.server_id};
storage::StateDeltaBuffer sd_buffer_;
tx::Engine tx_engine_{&raft_server_, &sd_buffer_};
std::unique_ptr<StorageGc> storage_gc_ = std::make_unique<StorageGc>(
*storage_, tx_engine_, &raft_server_, config_.gc_cycle_sec);
storage::ConcurrentIdMapper<storage::Label> label_mapper_{
storage_->PropertiesOnDisk()};
storage::ConcurrentIdMapper<storage::EdgeType> edge_mapper_{
storage_->PropertiesOnDisk()};
storage::ConcurrentIdMapper<storage::Property> property_mapper_{
storage_->PropertiesOnDisk()};
};
} // namespace database

View File

@ -1,556 +0,0 @@
#include "database/single_node_ha/graph_db_accessor.hpp"
#include <chrono>
#include <thread>
#include <glog/logging.h>
#include "durability/single_node_ha/state_delta.hpp"
#include "storage/common/constraints/exceptions.hpp"
#include "storage/single_node_ha/edge.hpp"
#include "storage/single_node_ha/edge_accessor.hpp"
#include "storage/single_node_ha/vertex.hpp"
#include "storage/single_node_ha/vertex_accessor.hpp"
#include "utils/cast.hpp"
#include "utils/on_scope_exit.hpp"
#include "utils/stat.hpp"
namespace database {
GraphDbAccessor::GraphDbAccessor(GraphDb *db)
: db_(db),
transaction_(db->tx_engine().Begin()),
transaction_starter_{true} {}
GraphDbAccessor::GraphDbAccessor(GraphDb *db, tx::TransactionId tx_id)
: db_(db),
transaction_(db->tx_engine().RunningTransaction(tx_id)),
transaction_starter_{false} {}
GraphDbAccessor::GraphDbAccessor(GraphDb *db,
std::optional<tx::TransactionId> parent_tx)
: db_(db),
transaction_(db->tx_engine().BeginBlocking(parent_tx)),
transaction_starter_{true} {}
GraphDbAccessor::GraphDbAccessor(GraphDbAccessor &&other)
: db_(other.db_),
transaction_(other.transaction_),
transaction_starter_(other.transaction_starter_),
commited_(other.commited_),
aborted_(other.aborted_) {
// Make sure that the other transaction isn't a transaction starter so that
// its destructor doesn't close the transaction.
other.transaction_starter_ = false;
}
GraphDbAccessor &GraphDbAccessor::operator=(GraphDbAccessor &&other) {
db_ = other.db_;
transaction_ = other.transaction_;
transaction_starter_ = other.transaction_starter_;
commited_ = other.commited_;
aborted_ = other.aborted_;
// Make sure that the other transaction isn't a transaction starter so that
// its destructor doesn't close the transaction.
other.transaction_starter_ = false;
return *this;
}
GraphDbAccessor::~GraphDbAccessor() {
if (transaction_starter_ && !commited_ && !aborted_) {
this->Abort();
}
}
tx::TransactionId GraphDbAccessor::transaction_id() const {
return transaction_->id_;
}
void GraphDbAccessor::AdvanceCommand() {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
db_->tx_engine().Advance(transaction_->id_);
}
void GraphDbAccessor::Commit() {
DCHECK(!commited_ && !aborted_) << "Already aborted or commited transaction.";
db_->tx_engine().Commit(*transaction_);
commited_ = true;
}
void GraphDbAccessor::Abort() {
DCHECK(!commited_ && !aborted_) << "Already aborted or commited transaction.";
db_->tx_engine().Abort(*transaction_);
aborted_ = true;
}
bool GraphDbAccessor::should_abort() const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return transaction_->should_abort();
}
raft::RaftInterface *GraphDbAccessor::raft() {
return db_->raft();
}
storage::StateDeltaBuffer *GraphDbAccessor::sd_buffer() {
return db_->sd_buffer();
}
VertexAccessor GraphDbAccessor::InsertVertex(
std::optional<storage::Gid> requested_gid) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
auto gid = db_->storage().vertex_generator_.Next(requested_gid);
auto vertex_vlist = new mvcc::VersionList<Vertex>(*transaction_, gid);
bool success =
db_->storage().vertices_.access().insert(gid, vertex_vlist).second;
CHECK(success) << "Attempting to insert a vertex with an existing GID: "
<< gid.AsUint();
sd_buffer()->Emplace(
database::StateDelta::CreateVertex(transaction_->id_, vertex_vlist->gid_));
auto va = VertexAccessor(vertex_vlist, *this);
return va;
}
std::optional<VertexAccessor> GraphDbAccessor::FindVertexOptional(
storage::Gid gid, bool current_state) {
VertexAccessor record_accessor(db_->storage().LocalAddress<Vertex>(gid),
*this);
if (!record_accessor.Visible(transaction(), current_state))
return std::nullopt;
return record_accessor;
}
VertexAccessor GraphDbAccessor::FindVertex(storage::Gid gid,
bool current_state) {
auto found = FindVertexOptional(gid, current_state);
CHECK(found) << "Unable to find vertex for id: " << gid.AsUint();
return *found;
}
std::optional<EdgeAccessor> GraphDbAccessor::FindEdgeOptional(
storage::Gid gid, bool current_state) {
EdgeAccessor record_accessor(db_->storage().LocalAddress<Edge>(gid), *this);
if (!record_accessor.Visible(transaction(), current_state))
return std::nullopt;
return record_accessor;
}
EdgeAccessor GraphDbAccessor::FindEdge(storage::Gid gid, bool current_state) {
auto found = FindEdgeOptional(gid, current_state);
CHECK(found) << "Unable to find edge for id: " << gid.AsUint();
return *found;
}
void GraphDbAccessor::BuildIndex(storage::Label label,
storage::Property property) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
// Create the index
const LabelPropertyIndex::Key key(label, property);
if (db_->storage().label_property_index_.CreateIndex(key) == false) {
throw IndexExistsException(
"Index is either being created by another transaction or already "
"exists.");
}
try {
auto dba = db_->AccessBlocking(std::make_optional(transaction_->id_));
dba.PopulateIndex(key);
dba.EnableIndex(key);
dba.Commit();
} catch (const tx::TransactionEngineError &e) {
db_->storage().label_property_index_.DeleteIndex(key);
throw TransactionException(e.what());
}
}
void GraphDbAccessor::EnableIndex(const LabelPropertyIndex::Key &key) {
// Commit transaction as we finished applying method on newest visible
// records. Write that transaction's ID to the RaftServer as the index has
// been built at this point even if this DBA's transaction aborts for some
// reason.
sd_buffer()->Emplace(database::StateDelta::BuildIndex(
transaction_id(), key.label_, LabelName(key.label_), key.property_,
PropertyName(key.property_)));
}
void GraphDbAccessor::PopulateIndex(const LabelPropertyIndex::Key &key) {
for (auto vertex : Vertices(key.label_, false)) {
if (vertex.PropsAt(key.property_).type() == PropertyValue::Type::Null)
continue;
db_->storage().label_property_index_.UpdateOnLabelProperty(vertex.address(),
vertex.current_);
}
}
void GraphDbAccessor::DeleteIndex(storage::Label label,
storage::Property property) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
LabelPropertyIndex::Key key(label, property);
try {
auto dba = db_->AccessBlocking(std::make_optional(transaction_->id_));
db_->storage().label_property_index_.DeleteIndex(key);
dba.sd_buffer()->Emplace(database::StateDelta::DropIndex(
dba.transaction_id(), key.label_, LabelName(key.label_), key.property_,
PropertyName(key.property_)));
dba.Commit();
} catch (const tx::TransactionEngineError &e) {
throw TransactionException(e.what());
}
}
void GraphDbAccessor::BuildUniqueConstraint(
storage::Label label, const std::vector<storage::Property> &properties) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
storage::constraints::ConstraintEntry entry{label, properties};
if (!db_->storage().unique_constraints_.AddConstraint(entry)) {
// Already exists
return;
}
try {
auto dba = db_->AccessBlocking(std::make_optional(transaction().id_));
for (auto v : dba.Vertices(false)) {
if (std::find(v.labels().begin(), v.labels().end(), label) !=
v.labels().end()) {
db_->storage().unique_constraints_.Update(v, dba.transaction());
}
}
std::vector<std::string> property_names(properties.size());
std::transform(properties.begin(), properties.end(), property_names.begin(),
[&dba](storage::Property property) {
return dba.PropertyName(property);
});
dba.sd_buffer()->Emplace(database::StateDelta::BuildUniqueConstraint(
dba.transaction().id_, label, dba.LabelName(label), properties,
property_names));
dba.Commit();
} catch (const tx::TransactionEngineError &e) {
db_->storage().unique_constraints_.RemoveConstraint(entry);
throw TransactionException(e.what());
} catch (const storage::constraints::ViolationException &e) {
db_->storage().unique_constraints_.RemoveConstraint(entry);
throw ConstraintViolationException(e.what());
} catch (const storage::constraints::SerializationException &e) {
db_->storage().unique_constraints_.RemoveConstraint(entry);
throw mvcc::SerializationError();
} catch (...) {
db_->storage().unique_constraints_.RemoveConstraint(entry);
throw;
}
}
void GraphDbAccessor::DeleteUniqueConstraint(
storage::Label label, const std::vector<storage::Property> &properties) {
storage::constraints::ConstraintEntry entry{label, properties};
try {
auto dba = db_->AccessBlocking(std::make_optional(transaction().id_));
if (!db_->storage().unique_constraints_.RemoveConstraint(entry)) {
// Nothing was deleted
return;
}
std::vector<std::string> property_names(properties.size());
std::transform(properties.begin(), properties.end(), property_names.begin(),
[&dba](storage::Property property) {
return dba.PropertyName(property);
});
dba.sd_buffer()->Emplace(database::StateDelta::DropUniqueConstraint(
dba.transaction().id_, label, dba.LabelName(label), properties,
property_names));
dba.Commit();
} catch (const tx::TransactionEngineError &e) {
throw TransactionException(e.what());
}
}
std::vector<storage::constraints::ConstraintEntry>
GraphDbAccessor::ListUniqueConstraints() const {
return db_->storage().unique_constraints_.ListConstraints();
}
void GraphDbAccessor::UpdateOnAddLabel(storage::Label label,
const VertexAccessor &vertex_accessor,
const Vertex *vertex) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
auto *vlist_ptr = vertex_accessor.address();
try {
db_->storage().unique_constraints_.UpdateOnAddLabel(label, vertex_accessor,
transaction());
} catch (const storage::constraints::SerializationException &e) {
throw mvcc::SerializationError();
} catch (const storage::constraints::ViolationException &e) {
throw ConstraintViolationException(e.what());
}
db_->storage().label_property_index_.UpdateOnLabel(label, vlist_ptr, vertex);
db_->storage().labels_index_.Update(label, vlist_ptr, vertex);
}
void GraphDbAccessor::UpdateOnRemoveLabel(
storage::Label label, const RecordAccessor<Vertex> &accessor) {
db_->storage().unique_constraints_.UpdateOnRemoveLabel(label, accessor,
transaction());
}
void GraphDbAccessor::UpdateOnAddProperty(
storage::Property property, const PropertyValue &previous_value,
const PropertyValue &new_value,
const RecordAccessor<Vertex> &vertex_accessor, const Vertex *vertex) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
try {
db_->storage().unique_constraints_.UpdateOnAddProperty(
property, previous_value, new_value, vertex_accessor, transaction());
} catch (const storage::constraints::SerializationException &e) {
throw mvcc::SerializationError();
} catch (const storage::constraints::ViolationException &e) {
throw ConstraintViolationException(e.what());
}
db_->storage().label_property_index_.UpdateOnProperty(
property, vertex_accessor.address(), vertex);
}
void GraphDbAccessor::UpdateOnRemoveProperty(
storage::Property property, const PropertyValue &previous_value,
const RecordAccessor<Vertex> &accessor, const Vertex *vertex) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
try {
db_->storage().unique_constraints_.UpdateOnRemoveProperty(
property, previous_value, accessor, transaction());
} catch (const storage::constraints::SerializationException &e) {
throw mvcc::SerializationError();
}
}
int64_t GraphDbAccessor::VerticesCount() const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->storage().vertices_.access().size();
}
int64_t GraphDbAccessor::VerticesCount(storage::Label label) const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->storage().labels_index_.Count(label);
}
int64_t GraphDbAccessor::VerticesCount(storage::Label label,
storage::Property property) const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
const LabelPropertyIndex::Key key(label, property);
DCHECK(db_->storage().label_property_index_.IndexExists(key))
<< "Index doesn't exist.";
return db_->storage().label_property_index_.Count(key);
}
int64_t GraphDbAccessor::VerticesCount(storage::Label label,
storage::Property property,
const PropertyValue &value) const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
const LabelPropertyIndex::Key key(label, property);
DCHECK(db_->storage().label_property_index_.IndexExists(key))
<< "Index doesn't exist.";
return db_->storage()
.label_property_index_.PositionAndCount(key, value)
.second;
}
int64_t GraphDbAccessor::VerticesCount(
storage::Label label, storage::Property property,
const std::optional<utils::Bound<PropertyValue>> lower,
const std::optional<utils::Bound<PropertyValue>> upper) const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
const LabelPropertyIndex::Key key(label, property);
DCHECK(db_->storage().label_property_index_.IndexExists(key))
<< "Index doesn't exist.";
CHECK(lower || upper) << "At least one bound must be provided";
CHECK(!lower || lower.value().value().type() != PropertyValue::Type::Null)
<< "Null value is not a valid index bound";
CHECK(!upper || upper.value().value().type() != PropertyValue::Type::Null)
<< "Null value is not a valid index bound";
if (!upper) {
auto lower_pac = db_->storage().label_property_index_.PositionAndCount(
key, lower.value().value());
int64_t size = db_->storage().label_property_index_.Count(key);
return std::max(0l,
size - lower_pac.first -
(lower.value().IsInclusive() ? 0l : lower_pac.second));
} else if (!lower) {
auto upper_pac = db_->storage().label_property_index_.PositionAndCount(
key, upper.value().value());
return upper.value().IsInclusive() ? upper_pac.first + upper_pac.second
: upper_pac.first;
} else {
auto lower_pac = db_->storage().label_property_index_.PositionAndCount(
key, lower.value().value());
auto upper_pac = db_->storage().label_property_index_.PositionAndCount(
key, upper.value().value());
auto result = upper_pac.first - lower_pac.first;
if (lower.value().IsExclusive()) result -= lower_pac.second;
if (upper.value().IsInclusive()) result += upper_pac.second;
return std::max(0l, result);
}
}
bool GraphDbAccessor::RemoveVertex(VertexAccessor &vertex_accessor,
bool check_empty) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
vertex_accessor.SwitchNew();
// it's possible the vertex was removed already in this transaction
// due to it getting matched multiple times by some patterns
// we can only delete it once, so check if it's already deleted
if (vertex_accessor.current().is_expired_by(*transaction_)) return true;
if (check_empty &&
vertex_accessor.out_degree() + vertex_accessor.in_degree() > 0)
return false;
auto *vlist_ptr = vertex_accessor.address();
sd_buffer()->Emplace(database::StateDelta::RemoveVertex(
transaction_->id_, vlist_ptr->gid_, check_empty));
vlist_ptr->remove(vertex_accessor.current_, *transaction_);
return true;
}
void GraphDbAccessor::DetachRemoveVertex(VertexAccessor &vertex_accessor) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
vertex_accessor.SwitchNew();
// Note that when we call RemoveEdge we must take care not to delete from the
// collection we are iterating over. This invalidates the iterator in a subtle
// way that does not fail in tests, but is NOT correct.
for (auto edge_accessor : vertex_accessor.in())
RemoveEdge(edge_accessor, true, false);
vertex_accessor.SwitchNew();
for (auto edge_accessor : vertex_accessor.out())
RemoveEdge(edge_accessor, false, true);
RemoveVertex(vertex_accessor, false);
}
EdgeAccessor GraphDbAccessor::InsertEdge(
VertexAccessor &from, VertexAccessor &to, storage::EdgeType edge_type,
std::optional<storage::Gid> requested_gid) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
auto gid = db_->storage().edge_generator_.Next(requested_gid);
auto edge_vlist = new mvcc::VersionList<Edge>(
*transaction_, gid, from.address(), to.address(), edge_type);
// We need to insert edge_vlist to edges_ before calling update since update
// can throw and edge_vlist will not be garbage collected if it is not in
// edges_ skiplist.
bool success = db_->storage().edges_.access().insert(gid, edge_vlist).second;
CHECK(success) << "Attempting to insert an edge with an existing GID: "
<< gid.AsUint();
// ensure that the "from" accessor has the latest version
from.SwitchNew();
from.update().out_.emplace(to.address(), edge_vlist, edge_type);
// ensure that the "to" accessor has the latest version (Switch new)
// WARNING: must do that after the above "from.update()" for cases when
// we are creating a cycle and "from" and "to" are the same vlist
to.SwitchNew();
to.update().in_.emplace(from.address(), edge_vlist, edge_type);
sd_buffer()->Emplace(database::StateDelta::CreateEdge(
transaction_->id_, edge_vlist->gid_, from.gid(), to.gid(), edge_type,
EdgeTypeName(edge_type)));
return EdgeAccessor(edge_vlist, *this, from.address(), to.address(),
edge_type);
}
int64_t GraphDbAccessor::EdgesCount() const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->storage().edges_.access().size();
}
void GraphDbAccessor::RemoveEdge(EdgeAccessor &edge, bool remove_out_edge,
bool remove_in_edge) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
// it's possible the edge was removed already in this transaction
// due to it getting matched multiple times by some patterns
// we can only delete it once, so check if it's already deleted
edge.SwitchNew();
if (edge.current().is_expired_by(*transaction_)) return;
if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
edge.address()->remove(edge.current_, *transaction_);
sd_buffer()->Emplace(
database::StateDelta::RemoveEdge(transaction_->id_, edge.gid()));
}
storage::Label GraphDbAccessor::Label(const std::string &label_name) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->label_mapper().value_to_id(label_name);
}
const std::string &GraphDbAccessor::LabelName(storage::Label label) const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->label_mapper().id_to_value(label);
}
storage::EdgeType GraphDbAccessor::EdgeType(const std::string &edge_type_name) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->edge_type_mapper().value_to_id(edge_type_name);
}
const std::string &GraphDbAccessor::EdgeTypeName(
storage::EdgeType edge_type) const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->edge_type_mapper().id_to_value(edge_type);
}
storage::Property GraphDbAccessor::Property(const std::string &property_name) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->property_mapper().value_to_id(property_name);
}
const std::string &GraphDbAccessor::PropertyName(
storage::Property property) const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->property_mapper().id_to_value(property);
}
std::vector<std::string> GraphDbAccessor::IndexInfo() const {
std::vector<std::string> info;
for (storage::Label label : db_->storage().labels_index_.Keys()) {
info.emplace_back(":" + LabelName(label));
}
for (LabelPropertyIndex::Key key :
db_->storage().label_property_index_.Keys()) {
info.emplace_back(fmt::format(":{}({})", LabelName(key.label_),
PropertyName(key.property_)));
}
return info;
}
std::map<std::string, std::vector<std::pair<std::string, std::string>>>
GraphDbAccessor::StorageInfo() const {
return db_->storage_info()->GetStorageInfo();
}
} // namespace database

View File

@ -1,691 +0,0 @@
/// @file
#pragma once
#include <map>
#include <optional>
#include <string>
#include <vector>
#include <glog/logging.h>
#include <cppitertools/filter.hpp>
#include <cppitertools/imap.hpp>
#include "database/single_node_ha/graph_db.hpp"
#include "raft/raft_interface.hpp"
#include "storage/common/types/types.hpp"
#include "storage/single_node_ha/edge_accessor.hpp"
#include "storage/single_node_ha/state_delta_buffer.hpp"
#include "storage/single_node_ha/vertex_accessor.hpp"
#include "transactions/transaction.hpp"
#include "transactions/type.hpp"
#include "utils/bound.hpp"
#include "utils/exceptions.hpp"
namespace storage::constraints {
struct ConstraintEntry;
} // namespace storage::constraints
namespace database {
/** Thrown when inserting in an index with constraint. */
class ConstraintViolationException : public utils::BasicException {
using utils::BasicException::BasicException;
};
/** Thrown when creating an index which already exists. */
class IndexExistsException : public utils::BasicException {
using utils::BasicException::BasicException;
};
/// Thrown on concurrent index creation when the transaction engine fails to
/// start a new transaction.
class TransactionException : public utils::BasicException {
using utils::BasicException::BasicException;
};
/**
* Base accessor for the database object: exposes functions for operating on the
* database. All the functions in this class should be self-sufficient: for
* example the function for creating a new Vertex should take care of all the
* book-keeping around the creation.
*/
class GraphDbAccessor {
// We need to make friends with this guys since they need to access private
// methods for updating indices.
// TODO: Rethink this, we have too much long-distance friendship complicating
// the code.
friend class ::RecordAccessor<Vertex>;
friend class ::VertexAccessor;
friend class GraphDb;
protected:
// Construction should only be done through GraphDb::Access function and
// concrete GraphDbAccessor type.
/// Creates a new accessor by starting a new transaction.
explicit GraphDbAccessor(GraphDb *db);
/// Creates an accessor for a running transaction.
GraphDbAccessor(GraphDb *db, tx::TransactionId tx_id);
GraphDbAccessor(GraphDb *db, std::optional<tx::TransactionId> parent_tx);
public:
~GraphDbAccessor();
GraphDbAccessor(const GraphDbAccessor &other) = delete;
GraphDbAccessor &operator=(const GraphDbAccessor &other) = delete;
GraphDbAccessor(GraphDbAccessor &&other);
GraphDbAccessor &operator=(GraphDbAccessor &&other);
/**
* Creates a new Vertex and returns an accessor to it. If the ID is
* provided, the created Vertex will have that local ID, and the ID counter
* will be increased to it so collisions are avoided. This should only be used
* by durability recovery, normal vertex creation should not provide the ID.
*
* You should NOT make interleaved recovery and normal DB op calls to this
* function. Doing so will likely mess up the ID generation and crash MG.
* Always perform recovery only once, immediately when the database is
* created, before any transactional ops start.
*
* @param requested_gid The requested GID. Should only be provided when
* recovering from durability.
*
* @return See above.
*/
VertexAccessor InsertVertex(
std::optional<storage::Gid> requested_gid = std::nullopt);
/**
* Removes the vertex of the given accessor. If the vertex has any outgoing or
* incoming edges, it is not deleted. See `DetachRemoveVertex` if you want to
* remove a vertex regardless of connectivity.
*
* If the vertex has already been deleted by the current transaction+command,
* this function will not do anything and will return true.
*
* @param vertex_accessor Accessor to vertex.
* @param check_empty If the vertex should be checked for existing edges
* before deletion.
* @return If or not the vertex was deleted.
*/
bool RemoveVertex(VertexAccessor &vertex_accessor, bool check_empty = true);
/**
* Removes the vertex of the given accessor along with all it's outgoing
* and incoming connections.
*
* @param vertex_accessor Accessor to a vertex.
*/
void DetachRemoveVertex(VertexAccessor &vertex_accessor);
/**
* Obtains the vertex for the given ID. If there is no vertex for the given
* ID, or it's not visible to this accessor's transaction, nullopt is
* returned.
*
* @param gid - The GID of the sought vertex.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
*/
std::optional<VertexAccessor> FindVertexOptional(storage::Gid gid,
bool current_state);
/**
* Obtains the vertex for the given ID. If there is no vertex for the given
* ID, or it's not visible to this accessor's transaction, MG is crashed
* using a CHECK.
*
* @param gid - The GID of the sought vertex.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
*/
VertexAccessor FindVertex(storage::Gid gid, bool current_state);
/**
* Returns iterable over accessors to all the vertices in the graph
* visible to the current transaction.
*
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
*/
auto Vertices(bool current_state) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
// wrap version lists into accessors, which will look for visible versions
auto accessors = iter::imap(
[this](auto id_vlist) {
return VertexAccessor(id_vlist.second, *this);
},
db_->storage().vertices_.access());
// filter out the accessors not visible to the current transaction
return iter::filter(
[this, current_state](const VertexAccessor &accessor) {
return accessor.Visible(transaction(), current_state);
},
std::move(accessors));
}
/**
* Return VertexAccessors which contain the current label for the current
* transaction visibilty.
* @param label - label for which to return VertexAccessors
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection
*/
auto Vertices(storage::Label label, bool current_state) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return iter::imap(
[this](auto vlist) { return VertexAccessor(vlist, *this); },
db_->storage().labels_index_.GetVlists(label, *transaction_,
current_state));
}
/**
* Return VertexAccessors which contain the current label and property for the
* given transaction visibility.
*
* @param label - label for which to return VertexAccessors
* @param property - property for which to return VertexAccessors
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection
*/
auto Vertices(storage::Label label, storage::Property property,
bool current_state) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
DCHECK(db_->storage().label_property_index_.IndexExists(
LabelPropertyIndex::Key(label, property)))
<< "Label+property index doesn't exist.";
return iter::imap(
[this](auto vlist) { return VertexAccessor(vlist, *this); },
db_->storage().label_property_index_.GetVlists(
LabelPropertyIndex::Key(label, property), *transaction_,
current_state));
}
/**
* Return VertexAccessors which contain the current label + property, and
* those properties are equal to this 'value' for the given transaction
* visibility.
* @param label - label for which to return VertexAccessors
* @param property - property for which to return VertexAccessors
* @param value - property value for which to return VertexAccessors
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection
*/
auto Vertices(storage::Label label, storage::Property property,
const PropertyValue &value, bool current_state) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
DCHECK(db_->storage().label_property_index_.IndexExists(
LabelPropertyIndex::Key(label, property)))
<< "Label+property index doesn't exist.";
CHECK(value.type() != PropertyValue::Type::Null)
<< "Can't query index for propery value type null.";
return iter::imap(
[this](auto vlist) { return VertexAccessor(vlist, *this); },
db_->storage().label_property_index_.GetVlists(
LabelPropertyIndex::Key(label, property), value, *transaction_,
current_state));
}
/**
* Return an iterable over VertexAccessors which contain the
* given label and whose property value (for the given property)
* falls within the given (lower, upper) @c Bound.
*
* The returned iterator will only contain
* vertices/edges whose property value is comparable with the
* given bounds (w.r.t. type). This has implications on Cypher
* query execuction semantics which have not been resovled yet.
*
* At least one of the bounds must be specified. Bonds can't be
* @c PropertyValue::Null. If both bounds are
* specified, their PropertyValue elments must be of comparable
* types.
*
* @param label - label for which to return VertexAccessors
* @param property - property for which to return VertexAccessors
* @param lower - Lower bound of the interval.
* @param upper - Upper bound of the interval.
* @param value - property value for which to return VertexAccessors
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of record accessors
* satisfy the bounds and are visible to the current transaction.
*/
auto Vertices(storage::Label label, storage::Property property,
const std::optional<utils::Bound<PropertyValue>> lower,
const std::optional<utils::Bound<PropertyValue>> upper,
bool current_state) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
DCHECK(db_->storage().label_property_index_.IndexExists(
LabelPropertyIndex::Key(label, property)))
<< "Label+property index doesn't exist.";
return iter::imap(
[this](auto vlist) { return VertexAccessor(vlist, *this); },
db_->storage().label_property_index_.GetVlists(
LabelPropertyIndex::Key(label, property), lower, upper,
*transaction_, current_state));
}
/**
* Creates a new Edge and returns an accessor to it. If the ID is
* provided, the created Edge will have that ID, and the ID counter will be
* increased to it so collisions are avoided. This should only be used by
* durability recovery, normal edge creation should not provide the ID.
*
* You should NOT make interleaved recovery and normal DB op calls to this
* function. Doing so will likely mess up the ID generation and crash MG.
* Always perform recovery only once, immediately when the database is
* created, before any transactional ops start.
*
* @param from The 'from' vertex.
* @param to The 'to' vertex'
* @param type Edge type.
* @param requested_gid The requested GID. Should only be provided when
* recovering from durability.
*
* @return An accessor to the edge.
*/
EdgeAccessor InsertEdge(
VertexAccessor & from, VertexAccessor & to, storage::EdgeType type,
std::optional<storage::Gid> requested_gid = std::nullopt);
/**
* Removes an edge from the graph. Parameters can indicate if the edge should
* be removed from data structures in vertices it connects. When removing an
* edge both arguments should be `true`. `false` is only used when
* detach-deleting a vertex.
*
* @param edge The accessor to an edge.
* @param remove_out_edge If the edge should be removed from the its origin
* side.
* @param remove_in_edge If the edge should be removed from the its
* destination side.
*/
void RemoveEdge(EdgeAccessor &edge, bool remove_out_edge = true,
bool remove_in_edge = true);
/**
* Obtains the edge for the given ID. If there is no edge for the given
* ID, or it's not visible to this accessor's transaction, nullopt is
* returned.
*
* @param gid - The GID of the sought edge.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
*/
std::optional<EdgeAccessor> FindEdgeOptional(storage::Gid gid,
bool current_state);
/**
* Obtains the edge for the given ID. If there is no edge for the given
* ID, or it's not visible to this accessor's transaction, MG is crashed
* using a CHECK.
*
* @param gid - The GID of the sought edge.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
*/
EdgeAccessor FindEdge(storage::Gid gid, bool current_state);
/**
* Returns iterable over accessors to all the edges in the graph
* visible to the current transaction.
*
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
*/
auto Edges(bool current_state) {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
// wrap version lists into accessors, which will look for visible versions
auto accessors = iter::imap(
[this](auto id_vlist) { return EdgeAccessor(id_vlist.second, *this); },
db_->storage().edges_.access());
// filter out the accessors not visible to the current transaction
return iter::filter(
[this, current_state](const EdgeAccessor &accessor) {
return accessor.Visible(transaction(), current_state);
},
std::move(accessors));
}
/**
* Creates and returns a new accessor that represents the same graph element
* (node / version) as the given `accessor`, but in this `GraphDbAccessor`.
*
* It is possible that the given `accessor` graph element is not visible in
* this `GraphDbAccessor`'s transaction. If that is the case, a `nullopt` is
* returned.
*
* The returned accessor does NOT have the same `current_` set as the given
* `accessor`. It has default post-construction `current_` set (`old` if
* available, otherwise `new`).
*
* @param accessor The [Vertex/Edge]Accessor whose underlying graph element we
* want in this GraphDbAccessor.
* @return See above.
* @tparam TAccessor Either VertexAccessor or EdgeAccessor
*/
template <typename TAccessor>
std::optional<TAccessor> Transfer(const TAccessor &accessor) {
if (accessor.db_accessor_ == this) return std::make_optional(accessor);
TAccessor accessor_in_this(accessor.address(), *this);
if (accessor_in_this.current_)
return std::make_optional(std::move(accessor_in_this));
else
return std::nullopt;
}
/**
* Adds an index for the given (label, property) and populates it with
* existing vertices that belong to it.
*
* You should never call BuildIndex on a GraphDbAccessor (transaction) on
* which new vertices have been inserted or existing ones updated. Do it
* in a new accessor instead.
*
* Build index throws if an index for the given (label, property) already
* exists (even if it's being built by a concurrent transaction and is not yet
* ready for use).
*
* It also throws if there is another index being built concurrently on the
* same database this accessor is for.
*
* @param label - label to build for
* @param property - property to build for
*/
void BuildIndex(storage::Label label, storage::Property property);
/// Deletes the index responisble for (label, property).
///
/// @throws IndexTransactionException if it can't obtain a blocking
/// transaction.
void DeleteIndex(storage::Label label, storage::Property property);
/// Populates index with vertices containing the key
void PopulateIndex(const LabelPropertyIndex::Key &key);
/// Writes Index (key) creation to Raft, marks it as ready for usage
void EnableIndex(const LabelPropertyIndex::Key &key);
/**
* Creates new unique constraint that consists of a label and multiple
* properties.
* If the constraint already exists, this method does nothing.
*
* @throws ConstraintViolationException if constraint couldn't be build
* due to existing constraint violation.
* @throws TransactionEngineError if the engine doesn't accept transactions.
* @throws mvcc::SerializationError on serialization errors.
*/
void BuildUniqueConstraint(storage::Label label,
const std::vector<storage::Property> &properties);
/**
* Deletes existing unique constraint.
* If the constraint doesn't exist, this method does nothing.
*/
void DeleteUniqueConstraint(storage::Label label,
const std::vector<storage::Property> &properties);
/**
* Returns a list of currently active unique constraints.
*/
std::vector<storage::constraints::ConstraintEntry> ListUniqueConstraints()
const;
/**
* @brief - Returns true if the given label+property index already exists and
* is ready for use.
*/
bool LabelPropertyIndexExists(storage::Label label,
storage::Property property) const {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->storage().label_property_index_.IndexExists(
LabelPropertyIndex::Key(label, property));
}
/**
* @brief - Returns vector of keys of label-property indices.
*/
std::vector<LabelPropertyIndex::Key> GetIndicesKeys() {
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
return db_->storage().label_property_index_.Keys();
}
/**
* Return approximate number of all vertices in the database.
* Note that this is always an over-estimate and never an under-estimate.
*/
int64_t VerticesCount() const;
/*
* Return approximate number of all edges in the database.
* Note that this is always an over-estimate and never an under-estimate.
*/
int64_t EdgesCount() const;
/**
* Return approximate number of vertices under indexes with the given label.
* Note that this is always an over-estimate and never an under-estimate.
*
* @param label - label to check for
* @return number of vertices with the given label
*/
int64_t VerticesCount(storage::Label label) const;
/**
* Return approximate number of vertices under indexes with the given label
* and property. Note that this is always an over-estimate and never an
* under-estimate.
*
* @param label - label to check for
* @param property - property to check for
* @return number of vertices with the given label, fails if no such
* label+property index exists.
*/
int64_t VerticesCount(storage::Label label, storage::Property property) const;
/**
* Returns approximate number of vertices that have the given label
* and the given value for the given property.
*
* Assumes that an index for that (label, property) exists.
*/
int64_t VerticesCount(storage::Label label, storage::Property property,
const PropertyValue &value) const;
/**
* Returns approximate number of vertices that have the given label
* and whose vaue is in the range defined by upper and lower @c Bound.
*
* At least one bound must be specified. Neither can be
* PropertyValue::Null.
*
* Assumes that an index for that (label, property) exists.
*/
int64_t VerticesCount(
storage::Label label, storage::Property property,
const std::optional<utils::Bound<PropertyValue>> lower,
const std::optional<utils::Bound<PropertyValue>> upper) const;
/**
* Obtains the Label for the label's name.
* @return See above.
*/
storage::Label Label(const std::string &label_name);
/**
* Obtains the label name (a string) for the given label.
*
* @param label a Label.
* @return See above.
*/
const std::string &LabelName(storage::Label label) const;
/**
* Obtains the EdgeType for it's name.
* @return See above.
*/
storage::EdgeType EdgeType(const std::string &edge_type_name);
/**
* Obtains the edge type name (a string) for the given edge type.
*
* @param edge_type an EdgeType.
* @return See above.
*/
const std::string &EdgeTypeName(storage::EdgeType edge_type) const;
/**
* Obtains the Property for it's name.
* @return See above.
*/
storage::Property Property(const std::string &property_name);
/**
* Obtains the property name (a string) for the given property.
*
* @param property a Property.
* @return See above.
*/
const std::string &PropertyName(storage::Property property) const;
/** Returns the id of this accessor's transaction */
tx::TransactionId transaction_id() const;
/** Advances transaction's command id by 1. */
void AdvanceCommand();
/** Commit transaction. */
void Commit();
/** Abort transaction. */
void Abort();
/** Return true if transaction is hinted to abort. */
bool should_abort() const;
const tx::Transaction &transaction() const { return *transaction_; }
raft::RaftInterface *raft();
storage::StateDeltaBuffer *sd_buffer();
auto &db() { return db_; }
const auto &db() const { return db_; }
/* Returns a list of index names present in the database. */
std::vector<std::string> IndexInfo() const;
/**
* Returns a map containing storage information for each Raft cluster member.
*
* Inside the vector, the following storage stats will exist:
* - vertex_count
* - edge_count
* - average_degree
* - memory_usage
* - disk_usage
**/
std::map<std::string, std::vector<std::pair<std::string, std::string>>>
StorageInfo() const;
/**
* Insert this vertex into corresponding label and label+property (if it
* exists) index.
*
* @param label - label with which to insert vertex label record
* @param vertex_accessor - vertex_accessor to insert
* @param vertex - vertex record to insert
*/
void UpdateLabelIndices(storage::Label label,
const VertexAccessor &vertex_accessor,
const Vertex *const vertex);
private:
GraphDb *db_;
tx::Transaction *transaction_;
// Indicates if this db-accessor started the transaction and should Abort it
// upon destruction.
bool transaction_starter_;
bool commited_{false};
bool aborted_{false};
/**
* Notifies storage about label addition.
*
* @param label - label that was added
* @param vertex_accessor - vertex_accessor that was updated
* @param vertex - vertex that was updated
*/
void UpdateOnAddLabel(storage::Label label,
const VertexAccessor &vertex_accessor,
const Vertex *vertex);
/**
* Notifies storage about label removal.
*
* @param label - label that was removed
* @param vertex_accessor - vertex_accessor that was updated
*/
void UpdateOnRemoveLabel(storage::Label label,
const RecordAccessor<Vertex> &accessor);
/**
* Notifies storage about a property removal.
*
* @param property - property that was removed
* @param previous_value - previous value of the property
* @param vertex_accessor - vertex_accessor that was updated
* @param vertex - vertex that was updated
*/
void UpdateOnRemoveProperty(storage::Property property,
const PropertyValue &previous_value,
const RecordAccessor<Vertex> &accessor,
const Vertex *vertex);
/**
* Notifies storage about a property addition.
*
* @param property - property that was added
* @param previous_value - previous value of the property
* @param new_value - new value of the property
* @param vertex_accessor - vertex accessor that was updated
* @param vertex - vertex that was updated
*/
void UpdateOnAddProperty(storage::Property property,
const PropertyValue &previous_value,
const PropertyValue &new_value,
const RecordAccessor<Vertex> &vertex_accessor,
const Vertex *vertex);
};
} // namespace database

View File

@ -1,9 +0,0 @@
#>cpp
#pragma once
#include "durability/single_node_ha/state_delta.hpp"
#include "storage/common/types/slk.hpp"
cpp<#
;; Generate serialization of state-delta
(load "durability/single_node_ha/state_delta.lcp")

View File

@ -1,74 +0,0 @@
#pragma once
#include <fstream>
#include "hasher.hpp"
#include "utils/endian.hpp"
/**
* Buffer reads data from file and calculates hash of read data. Implements
* template param Buffer interface from BaseDecoder class.
*/
class HashedFileReader {
public:
/** Opens the file for reading. Returns true if successful. */
bool Open(const std::string &file) {
input_stream_.open(file, std::ios::in | std::ios::binary);
hasher_ = Hasher();
return !input_stream_.fail();
}
/** Closes ifstream. Returns false if closing fails. */
bool Close() {
input_stream_.close();
return !input_stream_.fail();
}
/**
* Reads raw data from stream.
*
* @param data - pointer to where data should be stored.
* @param n - data length.
* @param hash - If the read should be included in the hash calculation.
*/
bool Read(uint8_t *data, size_t n, bool hash = true) {
input_stream_.read(reinterpret_cast<char *>(data), n);
if (input_stream_.fail()) return false;
if (hash) hasher_.Update(data, n);
return true;
}
/**
* Reads a TValue value from the stream.
*
* @param val - The value to read into.
* @param hash - If the read should be included in the hash calculation.
* @tparam TValue - Type of value being read.
* @return - If the read was successful.
*/
template <typename TValue>
bool ReadType(TValue &val, bool hash = true) {
if (!Read(reinterpret_cast<uint8_t *>(&val), sizeof(TValue), hash))
return false;
val = utils::BigEndianToHost(val);
return true;
}
void Seek(std::streamoff offset, std::ios_base::seekdir way) {
input_stream_.seekg(offset, way);
}
void Seek(std::streampos pos) { input_stream_.seekg(pos); }
auto Tellg() { return input_stream_.tellg(); }
/** Returns the hash of the data read so far from the stream. */
uint64_t hash() const { return hasher_.hash(); }
/** Checks whether the end of file is reached. */
bool EndOfFile() const { return input_stream_.eof(); }
private:
Hasher hasher_;
std::ifstream input_stream_;
};

View File

@ -1,74 +0,0 @@
#pragma once
#include <fstream>
#include "hasher.hpp"
#include "utils/endian.hpp"
/**
* Buffer that writes data to file and calculates hash of written data.
* Implements template param Buffer interface from BaseEncoder class.
*
* All of the methods on a HashedFileWriter can throw an exception.
*/
class HashedFileWriter {
public:
/** Constructor, initialize ofstream to throw exception on fail. */
HashedFileWriter() {
output_stream_.exceptions(std::ifstream::failbit | std::ifstream::badbit);
}
/** Constructor which also takes a file path and opens it immediately. */
explicit HashedFileWriter(const std::string &path) : HashedFileWriter() {
output_stream_.open(path, std::ios::out | std::ios::binary);
}
/** Opens the writer */
void Open(const std::string &path) {
output_stream_.open(path, std::ios::out | std::ios::binary);
hasher_ = Hasher();
}
/** Closes the writer. */
void Close() { output_stream_.close(); }
/**
* Writes data to stream.
*
* @param data - Pointer to data to write.
* @param n - Data length.
* @param hash - If writing should update the hash.
* @return - True if succesful.
*/
void Write(const uint8_t *data, size_t n, bool hash = true) {
output_stream_.write(reinterpret_cast<const char *>(data), n);
if (hash) hasher_.Update(data, n);
}
/**
* Writes a TValue to the stream.
*
* @param val - The value to write.
* @param hash - If writing should update the hash.
* @return - True if succesful.
*/
template <typename TValue>
void WriteValue(const TValue &val, bool hash = true) {
TValue val_big = utils::HostToBigEndian(val);
Write(reinterpret_cast<const uint8_t *>(&val_big), sizeof(TValue), hash);
}
// TODO try to remove before diff
/** Does nothing. Just for API compatibility with the bolt buffer. */
void Chunk() {}
/** Flushes data to stream. */
void Flush() { output_stream_.flush(); }
/** Returns the hash of the data written so far to the stream. */
uint64_t hash() const { return hasher_.hash(); }
private:
std::ofstream output_stream_;
Hasher hasher_;
};

View File

@ -1,31 +0,0 @@
#pragma once
#include <cstdint>
#include <cstdlib>
// TODO: implement better hash function
/**
* Class calculates hash of the data dynamically.
*/
class Hasher {
/** Prime number used in calculating hash. */
static constexpr uint64_t kPrime = 3137;
public:
/**
* Updates hash from given data.
*
* @param data - Data from which hash will be updated.
* @param n - Length of the data.
*/
void Update(const uint8_t *data, size_t n) {
for (size_t i = 0; i < n; ++i) hash_ = hash_ * kPrime + data[i] + 1;
}
/** Returns current hash value. */
uint64_t hash() const { return hash_; }
private:
uint64_t hash_ = 0;
};

View File

@ -1,26 +0,0 @@
#include "durability/single_node_ha/paths.hpp"
#include "utils/string.hpp"
#include "utils/timestamp.hpp"
namespace durability {
namespace fs = std::filesystem;
// This is the prefix used for WAL and Snapshot filenames. It is a timestamp
// format that equals to: YYYYmmddHHMMSSffffff
const std::string kTimestampFormat =
"{:04d}{:02d}{:02d}{:02d}{:02d}{:02d}{:06d}";
std::string GetSnapshotFilename(uint64_t last_included_term,
uint64_t last_included_index) {
std::string date_str = utils::Timestamp::Now().ToString(kTimestampFormat);
return date_str + "_term_" + std::to_string(last_included_term) + "_index_" +
std::to_string(last_included_index);
}
fs::path MakeSnapshotPath(const fs::path &durability_dir,
const std::string &snapshot_filename) {
return durability_dir / kSnapshotDir / snapshot_filename;
}
} // namespace durability

View File

@ -1,20 +0,0 @@
#pragma once
#include <filesystem>
#include <string>
namespace durability {
const std::string kSnapshotDir = "snapshots";
const std::string kBackupDir = ".backup";
/// Generates a filename for a DB snapshot in the given folder in a well-defined
/// sortable format with last included term and last included index from which
/// the snapshot is created appended to the file name.
std::string GetSnapshotFilename(uint64_t last_included_term,
uint64_t last_included_index);
/// Generates a full path for a DB snapshot.
std::filesystem::path MakeSnapshotPath(
const std::filesystem::path &durability_dir,
const std::string &snapshot_filename);
} // namespace durability

View File

@ -1,172 +0,0 @@
#include "durability/single_node_ha/recovery.hpp"
#include <filesystem>
#include <limits>
#include <optional>
#include <unordered_map>
#include "communication/bolt/v1/decoder/decoder.hpp"
#include "database/single_node_ha/graph_db_accessor.hpp"
#include "durability/hashed_file_reader.hpp"
#include "durability/single_node_ha/paths.hpp"
#include "durability/single_node_ha/version.hpp"
#include "glue/communication.hpp"
#include "storage/single_node_ha/indexes/label_property_index.hpp"
#include "transactions/type.hpp"
#include "utils/algorithm.hpp"
#include "utils/file.hpp"
namespace fs = std::filesystem;
namespace durability {
using communication::bolt::Value;
bool ReadSnapshotSummary(HashedFileReader &buffer, int64_t &vertex_count,
int64_t &edge_count, uint64_t &hash) {
auto pos = buffer.Tellg();
auto offset = sizeof(vertex_count) + sizeof(edge_count) + sizeof(hash);
buffer.Seek(-offset, std::ios_base::end);
bool r_val = buffer.ReadType(vertex_count, false) &&
buffer.ReadType(edge_count, false) &&
buffer.ReadType(hash, false);
buffer.Seek(pos);
return r_val;
}
namespace {
using communication::bolt::Value;
#define RETURN_IF_NOT(condition) \
if (!(condition)) { \
reader.Close(); \
return false; \
}
bool RecoverSnapshot(const fs::path &snapshot_file, database::GraphDb *db,
RecoveryData *recovery_data) {
HashedFileReader reader;
communication::bolt::Decoder<HashedFileReader> decoder(reader);
RETURN_IF_NOT(reader.Open(snapshot_file));
auto magic_number = durability::kSnapshotMagic;
reader.Read(magic_number.data(), magic_number.size());
RETURN_IF_NOT(magic_number == durability::kSnapshotMagic);
// Read the vertex and edge count, and the hash, from the end of the snapshot.
int64_t vertex_count;
int64_t edge_count;
uint64_t hash;
RETURN_IF_NOT(
durability::ReadSnapshotSummary(reader, vertex_count, edge_count, hash));
Value dv;
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::Int) &&
dv.ValueInt() == durability::kVersion);
// A list of label+property indexes.
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::List));
auto index_value = dv.ValueList();
for (auto it = index_value.begin(); it != index_value.end();) {
auto label = *it++;
RETURN_IF_NOT(it != index_value.end());
auto property = *it++;
RETURN_IF_NOT(label.IsString() && property.IsString());
recovery_data->indexes.emplace_back(
IndexRecoveryData{label.ValueString(), property.ValueString(),
/*create = */ true});
}
auto dba = db->Access();
std::unordered_map<uint64_t, VertexAccessor> vertices;
for (int64_t i = 0; i < vertex_count; ++i) {
Value vertex_dv;
RETURN_IF_NOT(decoder.ReadValue(&vertex_dv, Value::Type::Vertex));
auto &vertex = vertex_dv.ValueVertex();
auto vertex_accessor =
dba.InsertVertex(storage::Gid::FromUint(vertex.id.AsUint()));
for (const auto &label : vertex.labels) {
vertex_accessor.add_label(dba.Label(label));
}
for (const auto &property_pair : vertex.properties) {
vertex_accessor.PropsSet(dba.Property(property_pair.first),
glue::ToPropertyValue(property_pair.second));
}
vertices.insert({vertex.id.AsUint(), vertex_accessor});
}
for (int64_t i = 0; i < edge_count; ++i) {
Value edge_dv;
RETURN_IF_NOT(decoder.ReadValue(&edge_dv, Value::Type::Edge));
auto &edge = edge_dv.ValueEdge();
auto it_from = vertices.find(edge.from.AsUint());
auto it_to = vertices.find(edge.to.AsUint());
RETURN_IF_NOT(it_from != vertices.end() && it_to != vertices.end());
auto edge_accessor =
dba.InsertEdge(it_from->second, it_to->second, dba.EdgeType(edge.type),
storage::Gid::FromUint(edge.id.AsUint()));
for (const auto &property_pair : edge.properties)
edge_accessor.PropsSet(dba.Property(property_pair.first),
glue::ToPropertyValue(property_pair.second));
}
// Vertex and edge counts are included in the hash. Re-read them to update the
// hash.
reader.ReadType(vertex_count);
reader.ReadType(edge_count);
if (!reader.Close() || reader.hash() != hash) {
dba.Abort();
return false;
}
dba.Commit();
return true;
}
#undef RETURN_IF_NOT
} // anonymous namespace
bool RecoverSnapshot(database::GraphDb *db, RecoveryData *recovery_data,
const fs::path &durability_dir,
const std::string &snapshot_filename) {
const auto snapshot_dir = durability_dir / kSnapshotDir;
if (!fs::exists(snapshot_dir) || !fs::is_directory(snapshot_dir)) {
LOG(WARNING) << "Missing snapshot directory!";
return false;
}
const auto snapshot = snapshot_dir / snapshot_filename;
if (!fs::exists(snapshot)) {
LOG(WARNING) << "Missing snapshot file!";
return false;
}
LOG(INFO) << "Starting snapshot recovery from: " << snapshot;
if (!RecoverSnapshot(snapshot, db, recovery_data)) {
LOG(WARNING) << "Snapshot recovery failed.";
return false;
}
LOG(INFO) << "Snapshot recovery successful.";
return true;
}
void RecoverIndexes(database::GraphDb *db,
const std::vector<IndexRecoveryData> &indexes) {
auto dba = db->Access();
for (const auto &index : indexes) {
auto label = dba.Label(index.label);
auto property = dba.Property(index.property);
if (index.create) {
dba.BuildIndex(label, property);
} else {
dba.DeleteIndex(label, property);
}
}
dba.Commit();
}
} // namespace durability

View File

@ -1,57 +0,0 @@
#pragma once
#include <filesystem>
#include <optional>
#include <unordered_map>
#include <vector>
#include "durability/hashed_file_reader.hpp"
#include "durability/single_node_ha/state_delta.hpp"
#include "transactions/type.hpp"
namespace database {
class GraphDb;
};
namespace durability {
struct IndexRecoveryData {
std::string label;
std::string property;
bool create; // distinguish between creating and dropping index
bool unique; // used only when creating an index
};
/// Data structure for exchanging info between main recovery function and
/// snapshot recovery functions.
struct RecoveryData {
// A collection into which the indexes should be added so they
// can be rebuilt at the end of the recovery transaction.
std::vector<IndexRecoveryData> indexes;
};
/// Reads snapshot metadata from the end of the file without messing up the
/// hash.
bool ReadSnapshotSummary(HashedFileReader &buffer, int64_t &vertex_count,
int64_t &edge_count, uint64_t &hash);
/**
* Recovers database from the given snapshot. If recovering fails, false is
* returned and db_accessor aborts transaction, else true is returned and
* transaction is commited.
*
* @param db - The database to recover into.
* @param recovery_data - Struct that will contain additional recovery data.
* @param durability_dir - Path to durability directory.
* @param snapshot_filename - Snapshot filename.
* @return - recovery info
*/
bool RecoverSnapshot(database::GraphDb *db,
durability::RecoveryData *recovery_data,
const std::filesystem::path &durability_dir,
const std::string &snapshot_filename);
void RecoverIndexes(database::GraphDb *db,
const std::vector<IndexRecoveryData> &indexes);
} // namespace durability

View File

@ -1,108 +0,0 @@
#include "durability/single_node_ha/snapshooter.hpp"
#include <algorithm>
#include <glog/logging.h>
#include "communication/bolt/v1/encoder/base_encoder.hpp"
#include "database/single_node_ha/graph_db_accessor.hpp"
#include "durability/hashed_file_writer.hpp"
#include "durability/single_node_ha/paths.hpp"
#include "durability/single_node_ha/version.hpp"
#include "glue/communication.hpp"
#include "storage/v2/view.hpp"
#include "utils/file.hpp"
namespace fs = std::filesystem;
namespace durability {
// Snapshot layout is described in durability/version.hpp
static_assert(durability::kVersion == 9,
"Wrong snapshot version, please update!");
namespace {
bool Encode(const fs::path &snapshot_file, database::GraphDb &db,
database::GraphDbAccessor &dba) {
try {
HashedFileWriter buffer(snapshot_file);
communication::bolt::BaseEncoder<HashedFileWriter> encoder(buffer);
int64_t vertex_num = 0, edge_num = 0;
encoder.WriteRAW(durability::kSnapshotMagic.data(),
durability::kSnapshotMagic.size());
encoder.WriteInt(durability::kVersion);
// Write label+property indexes as list ["label", "property", ...]
{
std::vector<communication::bolt::Value> index_vec;
for (const auto &key : dba.GetIndicesKeys()) {
index_vec.emplace_back(dba.LabelName(key.label_));
index_vec.emplace_back(dba.PropertyName(key.property_));
}
encoder.WriteList(index_vec);
}
for (const auto &vertex : dba.Vertices(false)) {
encoder.WriteVertex(glue::ToBoltVertex(vertex, storage::View::OLD));
vertex_num++;
}
for (const auto &edge : dba.Edges(false)) {
encoder.WriteEdge(glue::ToBoltEdge(edge, storage::View::OLD));
edge_num++;
}
buffer.WriteValue(vertex_num);
buffer.WriteValue(edge_num);
buffer.WriteValue(buffer.hash());
buffer.Close();
} catch (const std::ifstream::failure &) {
if (fs::exists(snapshot_file) && !fs::remove(snapshot_file)) {
LOG(ERROR) << "Error while removing corrupted snapshot file: "
<< snapshot_file;
}
return false;
}
return true;
}
/// Remove old snapshots but leave at most `keep` number of latest ones.
void RemoveOldSnapshots(const fs::path &snapshot_dir, uint16_t keep) {
std::vector<fs::path> files;
for (auto &file : fs::directory_iterator(snapshot_dir))
files.push_back(file.path());
if (static_cast<uint16_t>(files.size()) <= keep) return;
sort(files.begin(), files.end());
for (int i = 0; i < static_cast<uint16_t>(files.size()) - keep; ++i) {
if (!fs::remove(files[i])) {
LOG(ERROR) << "Error while removing file: " << files[i];
}
}
}
} // namespace
bool MakeSnapshot(database::GraphDb &db, database::GraphDbAccessor &dba,
const fs::path &durability_dir,
const std::string &snapshot_filename) {
if (!utils::EnsureDir(durability_dir / kSnapshotDir)) return false;
const auto snapshot_file =
MakeSnapshotPath(durability_dir, snapshot_filename);
if (fs::exists(snapshot_file)) return false;
if (Encode(snapshot_file, db, dba)) {
// Only keep the latest snapshot.
RemoveOldSnapshots(durability_dir / kSnapshotDir, 1);
return true;
} else {
std::error_code error_code; // Just for exception suppression.
fs::remove(snapshot_file, error_code);
return false;
}
}
void RemoveAllSnapshots(const fs::path &durability_dir) {
auto snapshot_dir = durability_dir / kSnapshotDir;
if (!utils::EnsureDir(snapshot_dir)) return;
RemoveOldSnapshots(snapshot_dir, 0);
}
} // namespace durability

View File

@ -1,22 +0,0 @@
#pragma once
#include <filesystem>
#include "database/single_node_ha/graph_db.hpp"
namespace durability {
/// Make snapshot and save it in snapshots folder. Returns true if successful.
/// @param db - database for which we are creating a snapshot
/// @param dba - db accessor with which we are creating a snapshot (reading
/// data)
/// @param durability_dir - directory where durability data is stored.
/// @param snapshot_filename - filename for the snapshot.
bool MakeSnapshot(database::GraphDb &db, database::GraphDbAccessor &dba,
const std::filesystem::path &durability_dir,
const std::string &snapshot_filename);
/// Remove all snapshots inside the snapshot durability directory.
void RemoveAllSnapshots(const std::filesystem::path &durability_dir);
} // namespace durability

View File

@ -1,453 +0,0 @@
#include "durability/single_node_ha/state_delta.hpp"
#include <string>
#include "communication/bolt/v1/value.hpp"
#include "database/single_node_ha/graph_db_accessor.hpp"
#include "glue/communication.hpp"
namespace database {
StateDelta StateDelta::TxBegin(tx::TransactionId tx_id) {
return {StateDelta::Type::TRANSACTION_BEGIN, tx_id};
}
StateDelta StateDelta::TxCommit(tx::TransactionId tx_id) {
return {StateDelta::Type::TRANSACTION_COMMIT, tx_id};
}
StateDelta StateDelta::TxAbort(tx::TransactionId tx_id) {
return {StateDelta::Type::TRANSACTION_ABORT, tx_id};
}
StateDelta StateDelta::CreateVertex(tx::TransactionId tx_id,
storage::Gid vertex_id) {
StateDelta op(StateDelta::Type::CREATE_VERTEX, tx_id);
op.vertex_id = vertex_id;
return op;
}
StateDelta StateDelta::CreateEdge(tx::TransactionId tx_id, storage::Gid edge_id,
storage::Gid vertex_from_id,
storage::Gid vertex_to_id,
storage::EdgeType edge_type,
const std::string &edge_type_name) {
StateDelta op(StateDelta::Type::CREATE_EDGE, tx_id);
op.edge_id = edge_id;
op.vertex_from_id = vertex_from_id;
op.vertex_to_id = vertex_to_id;
op.edge_type = edge_type;
op.edge_type_name = edge_type_name;
return op;
}
StateDelta StateDelta::PropsSetVertex(tx::TransactionId tx_id,
storage::Gid vertex_id,
storage::Property property,
const std::string &property_name,
const PropertyValue &value) {
StateDelta op(StateDelta::Type::SET_PROPERTY_VERTEX, tx_id);
op.vertex_id = vertex_id;
op.property = property;
op.property_name = property_name;
op.value = value;
return op;
}
StateDelta StateDelta::PropsSetEdge(tx::TransactionId tx_id,
storage::Gid edge_id,
storage::Property property,
const std::string &property_name,
const PropertyValue &value) {
StateDelta op(StateDelta::Type::SET_PROPERTY_EDGE, tx_id);
op.edge_id = edge_id;
op.property = property;
op.property_name = property_name;
op.value = value;
return op;
}
StateDelta StateDelta::AddLabel(tx::TransactionId tx_id, storage::Gid vertex_id,
storage::Label label,
const std::string &label_name) {
StateDelta op(StateDelta::Type::ADD_LABEL, tx_id);
op.vertex_id = vertex_id;
op.label = label;
op.label_name = label_name;
return op;
}
StateDelta StateDelta::RemoveLabel(tx::TransactionId tx_id,
storage::Gid vertex_id, storage::Label label,
const std::string &label_name) {
StateDelta op(StateDelta::Type::REMOVE_LABEL, tx_id);
op.vertex_id = vertex_id;
op.label = label;
op.label_name = label_name;
return op;
}
StateDelta StateDelta::RemoveVertex(tx::TransactionId tx_id,
storage::Gid vertex_id, bool check_empty) {
StateDelta op(StateDelta::Type::REMOVE_VERTEX, tx_id);
op.vertex_id = vertex_id;
op.check_empty = check_empty;
return op;
}
StateDelta StateDelta::RemoveEdge(tx::TransactionId tx_id,
storage::Gid edge_id) {
StateDelta op(StateDelta::Type::REMOVE_EDGE, tx_id);
op.edge_id = edge_id;
return op;
}
StateDelta StateDelta::BuildIndex(tx::TransactionId tx_id, storage::Label label,
const std::string &label_name,
storage::Property property,
const std::string &property_name) {
StateDelta op(StateDelta::Type::BUILD_INDEX, tx_id);
op.label = label;
op.label_name = label_name;
op.property = property;
op.property_name = property_name;
return op;
}
StateDelta StateDelta::DropIndex(tx::TransactionId tx_id, storage::Label label,
const std::string &label_name,
storage::Property property,
const std::string &property_name) {
StateDelta op(StateDelta::Type::DROP_INDEX, tx_id);
op.label = label;
op.label_name = label_name;
op.property = property;
op.property_name = property_name;
return op;
}
StateDelta StateDelta::NoOp(tx::TransactionId tx_id) {
StateDelta op(StateDelta::Type::NO_OP, tx_id);
return op;
}
StateDelta StateDelta::BuildUniqueConstraint(
tx::TransactionId tx_id, storage::Label label,
const std::string &label_name,
const std::vector<storage::Property> &properties,
const std::vector<std::string> &property_names) {
StateDelta op(StateDelta::Type::BUILD_UNIQUE_CONSTRAINT, tx_id);
op.label = label;
op.label_name = label_name;
op.properties = properties;
op.property_names = property_names;
return op;
}
StateDelta StateDelta::DropUniqueConstraint(
tx::TransactionId tx_id, storage::Label label,
const std::string &label_name,
const std::vector<storage::Property> &properties,
const std::vector<std::string> &property_names) {
StateDelta op(StateDelta::Type::DROP_UNIQUE_CONSTRAINT, tx_id);
op.label = label;
op.label_name = label_name;
op.properties = properties;
op.property_names = property_names;
return op;
}
void StateDelta::Encode(
HashedFileWriter &writer,
communication::bolt::BaseEncoder<HashedFileWriter> &encoder) const {
encoder.WriteInt(static_cast<int64_t>(type));
encoder.WriteInt(static_cast<int64_t>(transaction_id));
switch (type) {
case Type::TRANSACTION_BEGIN:
case Type::TRANSACTION_COMMIT:
case Type::TRANSACTION_ABORT:
case Type::NO_OP:
break;
case Type::CREATE_VERTEX:
encoder.WriteInt(vertex_id.AsInt());
break;
case Type::CREATE_EDGE:
encoder.WriteInt(edge_id.AsInt());
encoder.WriteInt(vertex_from_id.AsInt());
encoder.WriteInt(vertex_to_id.AsInt());
encoder.WriteInt(edge_type.Id());
encoder.WriteString(edge_type_name);
break;
case Type::SET_PROPERTY_VERTEX:
encoder.WriteInt(vertex_id.AsInt());
encoder.WriteInt(property.Id());
encoder.WriteString(property_name);
encoder.WriteValue(glue::ToBoltValue(value));
break;
case Type::SET_PROPERTY_EDGE:
encoder.WriteInt(edge_id.AsInt());
encoder.WriteInt(property.Id());
encoder.WriteString(property_name);
encoder.WriteValue(glue::ToBoltValue(value));
break;
case Type::ADD_LABEL:
case Type::REMOVE_LABEL:
encoder.WriteInt(vertex_id.AsInt());
encoder.WriteInt(label.Id());
encoder.WriteString(label_name);
break;
case Type::REMOVE_VERTEX:
encoder.WriteInt(vertex_id.AsInt());
break;
case Type::REMOVE_EDGE:
encoder.WriteInt(edge_id.AsInt());
break;
case Type::BUILD_INDEX:
encoder.WriteInt(label.Id());
encoder.WriteString(label_name);
encoder.WriteInt(property.Id());
encoder.WriteString(property_name);
break;
case Type::DROP_INDEX:
encoder.WriteInt(label.Id());
encoder.WriteString(label_name);
encoder.WriteInt(property.Id());
encoder.WriteString(property_name);
break;
case Type::BUILD_UNIQUE_CONSTRAINT:
encoder.WriteInt(label.Id());
encoder.WriteString(label_name);
encoder.WriteInt(properties.size());
for (auto prop : properties) {
encoder.WriteInt(prop.Id());
}
for (auto &name : property_names) {
encoder.WriteString(name);
}
break;
case Type::DROP_UNIQUE_CONSTRAINT:
encoder.WriteInt(label.Id());
encoder.WriteString(label_name);
encoder.WriteInt(properties.size());
for (auto prop : properties) {
encoder.WriteInt(prop.Id());
}
for (auto &name : property_names) {
encoder.WriteString(name);
}
break;
}
writer.WriteValue(writer.hash());
}
#define DECODE_MEMBER(member, value_f) \
if (!decoder.ReadValue(&dv)) return nullopt; \
r_val.member = dv.value_f();
#define DECODE_GID_MEMBER(member) \
if (!decoder.ReadValue(&dv)) return nullopt; \
r_val.member = storage::Gid::FromInt(dv.ValueInt());
#define DECODE_MEMBER_CAST(member, value_f, type) \
if (!decoder.ReadValue(&dv)) return nullopt; \
r_val.member = static_cast<type>(dv.value_f());
std::optional<StateDelta> StateDelta::Decode(
HashedFileReader &reader,
communication::bolt::Decoder<HashedFileReader> &decoder) {
using std::nullopt;
StateDelta r_val;
// The decoded value used as a temporary while decoding.
communication::bolt::Value dv;
try {
if (!decoder.ReadValue(&dv)) return nullopt;
r_val.type = static_cast<enum StateDelta::Type>(dv.ValueInt());
DECODE_MEMBER(transaction_id, ValueInt)
switch (r_val.type) {
case Type::TRANSACTION_BEGIN:
case Type::TRANSACTION_COMMIT:
case Type::TRANSACTION_ABORT:
case Type::NO_OP:
break;
case Type::CREATE_VERTEX:
DECODE_GID_MEMBER(vertex_id)
break;
case Type::CREATE_EDGE:
DECODE_GID_MEMBER(edge_id)
DECODE_GID_MEMBER(vertex_from_id)
DECODE_GID_MEMBER(vertex_to_id)
DECODE_MEMBER_CAST(edge_type, ValueInt, storage::EdgeType)
DECODE_MEMBER(edge_type_name, ValueString)
break;
case Type::SET_PROPERTY_VERTEX:
DECODE_GID_MEMBER(vertex_id)
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
DECODE_MEMBER(property_name, ValueString)
if (!decoder.ReadValue(&dv)) return nullopt;
r_val.value = glue::ToPropertyValue(dv);
break;
case Type::SET_PROPERTY_EDGE:
DECODE_GID_MEMBER(edge_id)
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
DECODE_MEMBER(property_name, ValueString)
if (!decoder.ReadValue(&dv)) return nullopt;
r_val.value = glue::ToPropertyValue(dv);
break;
case Type::ADD_LABEL:
case Type::REMOVE_LABEL:
DECODE_GID_MEMBER(vertex_id)
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
DECODE_MEMBER(label_name, ValueString)
break;
case Type::REMOVE_VERTEX:
DECODE_GID_MEMBER(vertex_id)
break;
case Type::REMOVE_EDGE:
DECODE_GID_MEMBER(edge_id)
break;
case Type::BUILD_INDEX:
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
DECODE_MEMBER(label_name, ValueString)
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
DECODE_MEMBER(property_name, ValueString)
break;
case Type::DROP_INDEX:
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
DECODE_MEMBER(label_name, ValueString)
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
DECODE_MEMBER(property_name, ValueString)
break;
case Type::BUILD_UNIQUE_CONSTRAINT: {
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
DECODE_MEMBER(label_name, ValueString)
if (!decoder.ReadValue(&dv)) return nullopt;
int size = dv.ValueInt();
for (size_t i = 0; i < size; ++i) {
if (!decoder.ReadValue(&dv)) return nullopt;
r_val.properties.push_back(
static_cast<storage::Property>(dv.ValueInt()));
}
for (size_t i = 0; i < size; ++i) {
if (!decoder.ReadValue(&dv)) return nullopt;
r_val.property_names.push_back(dv.ValueString());
}
break;
}
case Type::DROP_UNIQUE_CONSTRAINT: {
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
DECODE_MEMBER(label_name, ValueString)
if (!decoder.ReadValue(&dv)) return nullopt;
int size = dv.ValueInt();
for (size_t i = 0; i < size; ++i) {
if (!decoder.ReadValue(&dv)) return nullopt;
r_val.properties.push_back(
static_cast<storage::Property>(dv.ValueInt()));
}
for (size_t i = 0; i < size; ++i) {
if (!decoder.ReadValue(&dv)) return nullopt;
r_val.property_names.push_back(dv.ValueString());
}
break;
}
}
auto decoder_hash = reader.hash();
uint64_t encoded_hash;
if (!reader.ReadType(encoded_hash, true)) return nullopt;
if (decoder_hash != encoded_hash) return nullopt;
return r_val;
} catch (communication::bolt::ValueException &) {
return nullopt;
} catch (std::ifstream::failure &) {
return nullopt;
}
}
#undef DECODE_MEMBER
void StateDelta::Apply(GraphDbAccessor &dba) const {
switch (type) {
// Transactional state is not recovered.
case Type::TRANSACTION_BEGIN:
case Type::TRANSACTION_COMMIT:
case Type::TRANSACTION_ABORT:
LOG(FATAL) << "Transaction handling not handled in Apply";
break;
case Type::CREATE_VERTEX:
dba.InsertVertex(vertex_id);
break;
case Type::CREATE_EDGE: {
auto from = dba.FindVertex(vertex_from_id, true);
auto to = dba.FindVertex(vertex_to_id, true);
dba.InsertEdge(from, to, dba.EdgeType(edge_type_name), edge_id);
break;
}
case Type::SET_PROPERTY_VERTEX: {
auto vertex = dba.FindVertex(vertex_id, true);
vertex.PropsSet(dba.Property(property_name), value);
break;
}
case Type::SET_PROPERTY_EDGE: {
auto edge = dba.FindEdge(edge_id, true);
edge.PropsSet(dba.Property(property_name), value);
break;
}
case Type::ADD_LABEL: {
auto vertex = dba.FindVertex(vertex_id, true);
vertex.add_label(dba.Label(label_name));
break;
}
case Type::REMOVE_LABEL: {
auto vertex = dba.FindVertex(vertex_id, true);
vertex.remove_label(dba.Label(label_name));
break;
}
case Type::REMOVE_VERTEX: {
auto vertex = dba.FindVertex(vertex_id, true);
dba.DetachRemoveVertex(vertex);
break;
}
case Type::REMOVE_EDGE: {
auto edge = dba.FindEdge(edge_id, true);
dba.RemoveEdge(edge);
break;
}
case Type::BUILD_INDEX: {
dba.BuildIndex(dba.Label(label_name), dba.Property(property_name));
break;
}
case Type::DROP_INDEX: {
dba.DeleteIndex(dba.Label(label_name), dba.Property(property_name));
break;
}
case Type::NO_OP:
break;
case Type::BUILD_UNIQUE_CONSTRAINT: {
std::vector<storage::Property> properties;
properties.reserve(property_names.size());
for (auto &p : property_names) {
properties.push_back(dba.Property(p));
}
dba.BuildUniqueConstraint(dba.Label(label_name), properties);
} break;
case Type::DROP_UNIQUE_CONSTRAINT: {
std::vector<storage::Property> properties;
properties.reserve(property_names.size());
for (auto &p : property_names) {
properties.push_back(dba.Property(p));
}
dba.DeleteUniqueConstraint(dba.Label(label_name), properties);
} break;
}
}
}; // namespace database

View File

@ -1,149 +0,0 @@
#>cpp
#pragma once
#include "communication/bolt/v1/decoder/decoder.hpp"
#include "communication/bolt/v1/encoder/base_encoder.hpp"
#include "durability/hashed_file_reader.hpp"
#include "durability/hashed_file_writer.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/common/types/property_value.hpp"
#include "storage/common/types/types.hpp"
#include "utils/typeinfo.hpp"
class Vertex;
class Edge;
cpp<#
(lcp:namespace database)
#>cpp
class GraphDbAccessor;
cpp<#
(lcp:define-struct state-delta ()
(
;; Members valid for every delta.
(type "Type")
(transaction-id "::tx::TransactionId")
;; Members valid only for some deltas, see StateDelta::Type comments above.
(vertex-id "::storage::Gid")
(edge-id "::storage::Gid")
(vertex-from-id "::storage::Gid")
(vertex-to-id "::storage::Gid")
(edge-type "::storage::EdgeType")
(edge-type-name "std::string")
(property "::storage::Property")
(property-name "std::string")
(properties "std::vector<storage::Property>")
(property-names "std::vector<std::string>")
(value "PropertyValue")
(label "::storage::Label")
(label-name "std::string")
(check-empty :bool))
(:documentation
"Describes single change to the database state. Used for state communication
over network in HA.
Labels, Properties and EdgeTypes are stored both as values (integers) and
strings (their names). The values are used when applying deltas in a running
database. Names are used when recovering the database as it's not guaranteed
that after recovery the old name<->value mapping will be preserved.
TODO: ensure the mapping is preserved after recovery and don't save strings
in StateDeltas.")
(:public
(lcp:define-enum type
(transaction-begin
transaction-commit
transaction-abort
create-vertex ;; vertex_id
create-edge ;; edge_id, from_vertex_id, to_vertex_id, edge_type, edge_type_name
set-property-vertex ;; vertex_id, property, property_name, property_value
set-property-edge ;; edge_id, property, property_name, property_value
;; remove property is done by setting a PropertyValue to Null
add-label ;; vertex_id, label, label_name
remove-label ;; vertex_id, label, label_name
remove-vertex ;; vertex_id, check_empty
remove-edge ;; edge_id
build-index ;; label, label_name, property, property_name
drop-index ;; label, label_name, property, property_name
no-op ;; no-op state delta required by Raft protocol
build-unique_constraint ;; label, label_name, properties, property_names
drop-unique_constraint ;; label, label_name, properties, property_names
)
(:documentation
"Defines StateDelta type. For each type the comment indicates which values
need to be stored. All deltas have the transaction_id member, so that's
omitted in the comment.")
(:serialize))
#>cpp
StateDelta() = default;
StateDelta(const enum Type &type, tx::TransactionId tx_id)
: type(type), transaction_id(tx_id) {}
/** Attempts to decode a StateDelta from the given decoder. Returns the
* decoded value if successful, otherwise returns nullopt. */
static std::optional<StateDelta> Decode(
HashedFileReader &reader,
communication::bolt::Decoder<HashedFileReader> &decoder);
/** Encodes the delta using primitive encoder, and writes out the new hash
* with delta to the writer */
void Encode(
HashedFileWriter &writer,
communication::bolt::BaseEncoder<HashedFileWriter> &encoder) const;
static StateDelta TxBegin(tx::TransactionId tx_id);
static StateDelta TxCommit(tx::TransactionId tx_id);
static StateDelta TxAbort(tx::TransactionId tx_id);
static StateDelta CreateVertex(tx::TransactionId tx_id,
storage::Gid vertex_id);
static StateDelta CreateEdge(tx::TransactionId tx_id, storage::Gid edge_id,
storage::Gid vertex_from_id,
storage::Gid vertex_to_id,
storage::EdgeType edge_type,
const std::string &edge_type_name);
static StateDelta PropsSetVertex(tx::TransactionId tx_id,
storage::Gid vertex_id,
storage::Property property,
const std::string &property_name,
const PropertyValue &value);
static StateDelta PropsSetEdge(tx::TransactionId tx_id, storage::Gid edge_id,
storage::Property property,
const std::string &property_name,
const PropertyValue &value);
static StateDelta AddLabel(tx::TransactionId tx_id, storage::Gid vertex_id,
storage::Label label,
const std::string &label_name);
static StateDelta RemoveLabel(tx::TransactionId tx_id, storage::Gid vertex_id,
storage::Label label,
const std::string &label_name);
static StateDelta RemoveVertex(tx::TransactionId tx_id, storage::Gid vertex_id,
bool check_empty);
static StateDelta RemoveEdge(tx::TransactionId tx_id, storage::Gid edge_id);
static StateDelta BuildIndex(tx::TransactionId tx_id, storage::Label label,
const std::string &label_name,
storage::Property property,
const std::string &property_name);
static StateDelta DropIndex(tx::TransactionId tx_id, storage::Label label,
const std::string &label_name,
storage::Property property,
const std::string &property_name);
static StateDelta NoOp(tx::TransactionId tx_id);
static StateDelta BuildUniqueConstraint(
tx::TransactionId tx_id, storage::Label label,
const std::string &label_name,
const std::vector<storage::Property> &properties,
const std::vector<std::string> &property_names);
static StateDelta DropUniqueConstraint(
tx::TransactionId tx_id, storage::Label label,
const std::string &label_name,
const std::vector<storage::Property> &property,
const std::vector<std::string> &property_names);
/// Applies CRUD delta to database accessor. Fails on other types of deltas
void Apply(GraphDbAccessor &dba) const;
cpp<#)
(:serialize (:slk)))
(lcp:pop-namespace) ;; database

View File

@ -1,34 +0,0 @@
#pragma once
///
///
/// IMPORTANT: Please update this file for every snapshot format change!!!
/// TODO (buda): This is not rock solid.
///
#include <array>
#include <cstdint>
namespace durability {
constexpr std::array<uint8_t, 6> kSnapshotMagic{{'M', 'G', 'H', 'A', 's', 'n'}};
// The current default version of snapshot and WAL encoding / decoding.
constexpr int64_t kVersion{9};
// Snapshot format (version 9):
// 1) Magic number + snapshot version
//
// 2) A list of label+property indices.
//
// 3) Bolt encoded nodes. Each node is written in the following format:
// * gid, labels, properties
// 4) Bolt encoded edges. Each edge is written in the following format:
// * gid
// * from, to
// * edge_type
// * properties
//
// 5) Snapshot summary (number of nodes, number of edges, hash)
} // namespace durability

View File

@ -1,79 +0,0 @@
#include <algorithm>
#include <chrono>
#include <cstdint>
#include <exception>
#include <functional>
#include <limits>
#include <thread>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "communication/server.hpp"
#include "database/single_node_ha/graph_db.hpp"
#include "memgraph_init.hpp"
#include "query/exceptions.hpp"
#include "utils/flag_validation.hpp"
// General purpose flags.
DEFINE_string(bolt_address, "0.0.0.0",
"IP address on which the Bolt server should listen.");
DEFINE_VALIDATED_int32(bolt_port, 7687,
"Port on which the Bolt server should listen.",
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
DEFINE_VALIDATED_int32(
bolt_num_workers, std::max(std::thread::hardware_concurrency(), 1U),
"Number of workers used by the Bolt server. By default, this will be the "
"number of processing units available on the machine.",
FLAG_IN_RANGE(1, INT32_MAX));
DEFINE_VALIDATED_int32(
bolt_session_inactivity_timeout, 1800,
"Time in seconds after which inactive Bolt sessions will be "
"closed.",
FLAG_IN_RANGE(1, INT32_MAX));
DEFINE_string(bolt_cert_file, "",
"Certificate file which should be used for the Bolt server.");
DEFINE_string(bolt_key_file, "",
"Key file which should be used for the Bolt server.");
using ServerT = communication::Server<BoltSession, SessionData>;
using communication::ServerContext;
void SingleNodeHAMain() {
auto durability_directory = std::filesystem::path(FLAGS_durability_directory);
database::GraphDb db;
query::InterpreterContext interpreter_context{&db};
SessionData session_data{&db, &interpreter_context, nullptr, nullptr};
ServerContext context;
std::string service_name = "Bolt";
if (!FLAGS_bolt_key_file.empty() && !FLAGS_bolt_cert_file.empty()) {
context = ServerContext(FLAGS_bolt_key_file, FLAGS_bolt_cert_file);
service_name = "BoltS";
}
ServerT server({FLAGS_bolt_address, static_cast<uint16_t>(FLAGS_bolt_port)},
&session_data, &context, FLAGS_bolt_session_inactivity_timeout,
service_name, FLAGS_bolt_num_workers);
// Handler for regular termination signals
auto shutdown = [&db] { db.Shutdown(); };
InitSignalHandlers(shutdown);
// Start the database.
db.Start();
// Start the Bolt server.
CHECK(server.Start()) << "Couldn't start the Bolt server!";
db.AwaitShutdown([&server] {
server.Shutdown();
server.AwaitShutdown();
});
}
int main(int argc, char **argv) {
google::SetUsageMessage("Memgraph high availability database server");
return WithInit(argc, argv, SingleNodeHAMain);
}

View File

@ -2,8 +2,6 @@
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include "database/graph_db.hpp"
#include "database/graph_db_accessor.hpp"
#include "query/context.hpp" #include "query/context.hpp"
#include "query/db_accessor.hpp" #include "query/db_accessor.hpp"
#include "query/frontend/ast/ast.hpp" #include "query/frontend/ast/ast.hpp"

View File

@ -1,60 +0,0 @@
/// @file
#pragma once
#include <chrono>
#include <filesystem>
#include <ratio>
#include <json/json.hpp>
#include "raft/exceptions.hpp"
#include "utils/file.hpp"
#include "utils/string.hpp"
namespace raft {
/// Configurable Raft parameters.
struct Config {
std::chrono::milliseconds election_timeout_min;
std::chrono::milliseconds election_timeout_max;
std::chrono::milliseconds heartbeat_interval;
std::chrono::milliseconds replication_timeout;
int64_t log_size_snapshot_threshold;
static Config LoadFromFile(const std::string &raft_config_file) {
if (!std::filesystem::exists(raft_config_file))
throw RaftConfigException(raft_config_file);
nlohmann::json data;
try {
data = nlohmann::json::parse(
utils::Join(utils::ReadLines(raft_config_file), ""));
} catch (const nlohmann::json::parse_error &e) {
throw RaftConfigException(raft_config_file);
}
if (!data.is_object()) throw RaftConfigException(raft_config_file);
if (!data["election_timeout_min"].is_number())
throw RaftConfigException(raft_config_file);
if (!data["election_timeout_max"].is_number())
throw RaftConfigException(raft_config_file);
if (!data["heartbeat_interval"].is_number())
throw RaftConfigException(raft_config_file);
if (!data["replication_timeout"].is_number())
throw RaftConfigException(raft_config_file);
if (!data["log_size_snapshot_threshold"].is_number())
throw RaftConfigException(raft_config_file);
return Config{
std::chrono::duration<int64_t, std::milli>(
data["election_timeout_min"]),
std::chrono::duration<int64_t, std::milli>(
data["election_timeout_max"]),
std::chrono::duration<int64_t, std::milli>(data["heartbeat_interval"]),
std::chrono::duration<int64_t, std::milli>(data["replication_timeout"]),
data["log_size_snapshot_threshold"]};
}
};
} // namespace raft

View File

@ -1,137 +0,0 @@
#include "raft/coordination.hpp"
#include <gflags/gflags.h>
#include <json/json.hpp>
#include "utils/file.hpp"
#include "utils/string.hpp"
DEFINE_string(rpc_cert_file, "", "Certificate file to use (RPC).");
DEFINE_string(rpc_key_file, "", "Key file to use (RPC).");
namespace raft {
namespace fs = std::filesystem;
std::unordered_map<uint16_t, io::network::Endpoint> LoadNodesFromFile(
const std::string &coordination_config_file) {
if (!fs::exists(coordination_config_file))
throw RaftCoordinationConfigException("file (" + coordination_config_file +
") doesn't exist");
std::unordered_map<uint16_t, io::network::Endpoint> nodes;
nlohmann::json data;
try {
data = nlohmann::json::parse(
utils::Join(utils::ReadLines(coordination_config_file), ""));
} catch (const nlohmann::json::parse_error &e) {
throw RaftCoordinationConfigException("invalid json");
}
if (!data.is_array()) throw RaftCoordinationConfigException("not an array");
for (auto &it : data) {
if (!it.is_array())
throw RaftCoordinationConfigException("element not an array");
if (it.size() != 3)
throw RaftCoordinationConfigException("invalid number of subelements");
if (!it[0].is_number_unsigned() || !it[1].is_string() ||
!it[2].is_number_unsigned())
throw RaftCoordinationConfigException("subelement data is invalid");
nodes[it[0]] = io::network::Endpoint{it[1], it[2]};
}
return nodes;
}
Coordination::Coordination(
uint16_t node_id,
std::unordered_map<uint16_t, io::network::Endpoint> all_nodes)
: node_id_(node_id), cluster_size_(all_nodes.size()) {
// Create and initialize all server elements.
if (!FLAGS_rpc_cert_file.empty() && !FLAGS_rpc_key_file.empty()) {
server_context_.emplace(FLAGS_rpc_key_file, FLAGS_rpc_cert_file);
} else {
server_context_.emplace();
}
server_.emplace(all_nodes[node_id_], &server_context_.value(),
all_nodes.size() * 2);
// Create all client elements.
endpoints_.resize(cluster_size_);
clients_.resize(cluster_size_);
client_locks_.resize(cluster_size_);
// Initialize all client elements.
client_context_.emplace(server_context_->use_ssl());
for (uint16_t i = 1; i <= cluster_size_; ++i) {
auto it = all_nodes.find(i);
if (it == all_nodes.end()) {
throw RaftCoordinationConfigException("missing endpoint for node " +
std::to_string(i));
}
endpoints_[i - 1] = it->second;
client_locks_[i - 1] = std::make_unique<std::mutex>();
}
}
Coordination::~Coordination() {
CHECK(!alive_) << "You must call Shutdown and AwaitShutdown on Coordination!";
}
std::vector<uint16_t> Coordination::GetAllNodeIds() {
std::vector<uint16_t> ret;
ret.reserve(cluster_size_);
for (uint16_t i = 1; i <= cluster_size_; ++i) {
ret.push_back(i);
}
return ret;
}
std::vector<uint16_t> Coordination::GetOtherNodeIds() {
std::vector<uint16_t> ret;
ret.reserve(cluster_size_ - 1);
for (uint16_t i = 1; i <= cluster_size_; ++i) {
if (i == node_id_) continue;
ret.push_back(i);
}
return ret;
}
uint16_t Coordination::GetAllNodeCount() { return cluster_size_; }
uint16_t Coordination::GetOtherNodeCount() { return cluster_size_ - 1; }
io::network::Endpoint Coordination::GetOtherNodeEndpoint(uint16_t other_id) {
CHECK(other_id != node_id_) << "Trying to execute RPC on self!";
CHECK(other_id >= 1 && other_id <= cluster_size_) << "Invalid node id!";
return endpoints_[other_id - 1];
}
communication::ClientContext *Coordination::GetRpcClientContext() {
return &client_context_.value();
}
bool Coordination::Start() { return server_->Start(); }
void Coordination::AwaitShutdown(
std::function<void(void)> call_before_shutdown) {
// Wait for a shutdown notification.
while (alive_) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
// Call the before shutdown callback.
call_before_shutdown();
// Shutdown our RPC server.
server_->Shutdown();
server_->AwaitShutdown();
}
void Coordination::Shutdown() { alive_.store(false); }
} // namespace raft

View File

@ -1,142 +0,0 @@
/// @file
#pragma once
#include <atomic>
#include <functional>
#include <memory>
#include <mutex>
#include <optional>
#include <thread>
#include <unordered_map>
#include <vector>
#include <glog/logging.h>
#include "io/network/endpoint.hpp"
#include "raft/exceptions.hpp"
#include "rpc/client.hpp"
#include "rpc/server.hpp"
namespace raft {
/// Loads raft cluster configuration from file.
///
/// File format:
/// [[node_id, "node_address", node_port], ...]
std::unordered_map<uint16_t, io::network::Endpoint> LoadNodesFromFile(
const std::string &coordination_config_file);
/// This class is responsible for coordination between nodes within the Raft
/// cluster. Its implementation is quite similar to coordination in distributed
/// Memgraph apart from slight modifications which align more closely to Raft.
///
/// It should be noted that, in the context of communication, all nodes within
/// the Raft cluster are considered equivalent and are henceforth known simply
/// as nodes.
///
/// This class is thread safe.
class Coordination final {
public:
/// Class constructor
///
/// @param node_id ID of Raft node on this machine.
/// @param node mapping from node_id to endpoint information (for the whole
/// cluster).
Coordination(uint16_t node_id,
std::unordered_map<uint16_t, io::network::Endpoint> all_nodes);
~Coordination();
Coordination(const Coordination &) = delete;
Coordination(Coordination &&) = delete;
Coordination &operator=(const Coordination &) = delete;
Coordination &operator=(Coordination &&) = delete;
/// Returns all node IDs.
std::vector<uint16_t> GetAllNodeIds();
/// Returns other node IDs (excluding this node).
std::vector<uint16_t> GetOtherNodeIds();
/// Returns total number of nodes.
uint16_t GetAllNodeCount();
/// Returns number of other nodes.
uint16_t GetOtherNodeCount();
/// Returns endpoint of other node.
io::network::Endpoint GetOtherNodeEndpoint(uint16_t other_id);
/// Returns the currently used RPC client context.
communication::ClientContext *GetRpcClientContext();
/// Executes a RPC on another node in the cluster. If the RPC execution
/// fails (because of underlying network issues) it returns a `std::nullopt`.
template <class TRequestResponse, class... Args>
std::optional<typename TRequestResponse::Response> ExecuteOnOtherNode(
uint16_t other_id, Args &&... args) {
CHECK(other_id != node_id_) << "Trying to execute RPC on self!";
CHECK(other_id >= 1 && other_id <= cluster_size_) << "Invalid node id!";
auto &lock = *client_locks_[other_id - 1].get();
auto &client = clients_[other_id - 1];
std::lock_guard<std::mutex> guard(lock);
if (!client) {
const auto &endpoint = endpoints_[other_id - 1];
client =
std::make_unique<rpc::Client>(endpoint, &client_context_.value());
}
try {
return client->Call<TRequestResponse>(std::forward<Args>(args)...);
} catch (...) {
// Invalidate the client so that we reconnect next time.
client = nullptr;
return std::nullopt;
}
}
/// Registers a RPC call on this node.
template <class TRequestResponse>
void Register(std::function<void(slk::Reader *, slk::Builder *)> callback) {
server_->Register<TRequestResponse>(callback);
}
/// Registers an extended RPC call on this node.
template <class TRequestResponse>
void Register(std::function<void(const io::network::Endpoint &, slk::Reader *,
slk::Builder *)>
callback) {
server_->Register<TRequestResponse>(callback);
}
/// Starts the coordination and its servers.
bool Start();
/// Blocks until the coordination is shut down. Accepts a callback function
/// that is called to clean up all services that should be stopped before the
/// coordination.
void AwaitShutdown(std::function<void(void)> call_before_shutdown);
/// Hints that the coordination should start shutting down the whole cluster.
void Shutdown();
private:
uint16_t node_id_;
uint16_t cluster_size_;
std::optional<communication::ServerContext> server_context_;
std::optional<rpc::Server> server_;
std::optional<communication::ClientContext> client_context_;
std::vector<io::network::Endpoint> endpoints_;
std::vector<std::unique_ptr<rpc::Client>> clients_;
std::vector<std::unique_ptr<std::mutex>> client_locks_;
std::atomic<bool> alive_{true};
};
} // namespace raft

View File

@ -1,111 +0,0 @@
/// @file
#pragma once
#include "communication/bolt/v1/exceptions.hpp"
namespace raft {
/// Base exception class used for all exceptions that can occur within the
/// Raft protocol.
class RaftException : public communication::bolt::VerboseError {
public:
template <class... Args>
RaftException(const std::string &format, Args &&... args)
: communication::bolt::VerboseError(
communication::bolt::VerboseError::Classification::DATABASE_ERROR,
"Raft", "Error", format, std::forward<Args>(args)...) {}
};
/// This exception should be thrown when attempting to transition between
/// incompatible states, e.g. from `FOLLOWER` to `LEADER`.
class InvalidTransitionException : public RaftException {
public:
using RaftException::RaftException;
InvalidTransitionException(const std::string &old_mode,
const std::string &new_mode)
: RaftException("Invalid transition from " + old_mode + " to " +
new_mode) {}
};
/// Exception used to indicate something is wrong with the raft config provided
/// by the user.
class RaftConfigException : public RaftException {
public:
using RaftException::RaftException;
explicit RaftConfigException(const std::string &path)
: RaftException("Unable to parse raft config file " + path) {}
};
/// Exception used to indicate something is wrong with the coordination config
/// provided by the user.
class RaftCoordinationConfigException : public RaftException {
public:
using RaftException::RaftException;
explicit RaftCoordinationConfigException(const std::string &msg)
: RaftException("Unable to parse raft coordination config file: " + msg +
"!") {}
};
/// This exception should be thrown when a `RaftServer` instance attempts
/// to read data from persistent storage which is missing.
class MissingPersistentDataException : public RaftException {
public:
using RaftException::RaftException;
explicit MissingPersistentDataException(const std::string &key)
: RaftException(
"Attempting to read non-existing persistent data under key: " +
key) {}
};
/// This exception should be thrown when a `RaftServer` instance attempts to
/// read from replication log for a garbage collected transaction or a
/// transaction that didn't begin.
class InvalidReplicationLogLookup : public RaftException {
public:
using RaftException::RaftException;
InvalidReplicationLogLookup()
: RaftException("Replication log lookup for invalid transaction.") {}
};
/// This exception is thrown when a transaction is taking too long to replicate.
/// We're throwing this to reduce the number of threads that are in an infinite
/// loop during a network partition.
class ReplicationTimeoutException : public RaftException {
public:
using RaftException::RaftException;
ReplicationTimeoutException()
: RaftException("Raft Log replication is taking too long. ") {}
};
/// This exception is thrown when a client tries to execute a query on a server
/// that isn't a leader.
class CantExecuteQueries : public RaftException {
public:
using RaftException::RaftException;
CantExecuteQueries()
: RaftException(
"Memgraph High Availability: Can't execute queries if not "
"leader.") {}
};
/// This exception is thrown when leader re-election takes place during
/// transaction commit. We're throwing this exception to inform the client that
/// transaction failed.
class UnexpectedLeaderChangeException : public RaftException {
public:
using RaftException::RaftException;
UnexpectedLeaderChangeException()
: RaftException(
"Leader change happened during transaction commit. Aborting.") {}
};
/// This exception is thrown when the machine is in the process of shutting down
/// and Raft API is being used.
class RaftShutdownException : public RaftException {
public:
using RaftException::RaftException;
RaftShutdownException() : RaftException("Raft Server is shutting down.") {}
};
} // namespace raft

View File

@ -1,19 +0,0 @@
#>cpp
#pragma once
#include "database/single_node_ha/serialization.hpp"
#include "durability/single_node_ha/state_delta.hpp"
cpp<#
(lcp:namespace raft)
(lcp:define-struct log-entry ()
((term :uint64_t)
(deltas "std::vector<database::StateDelta>"))
(:public #>cpp
LogEntry() = default;
LogEntry(uint64_t _term, std::vector<database::StateDelta> _deltas): term(_term), deltas(_deltas) {}
cpp<#)
(:serialize (:slk)))
(lcp:pop-namespace) ;; raft

View File

@ -1,96 +0,0 @@
/// @file
#pragma once
#include <mutex>
#include "durability/single_node_ha/state_delta.hpp"
#include "transactions/type.hpp"
namespace raft {
enum class ReplicationStatus { REPLICATED, WAITING, ABORTED, INVALID };
inline std::string ReplicationStatusToString(
const ReplicationStatus &replication_status) {
switch (replication_status) {
case ReplicationStatus::REPLICATED:
return "REPLICATED";
case ReplicationStatus::WAITING:
return "WAITING";
case ReplicationStatus::ABORTED:
return "ABORTED";
case ReplicationStatus::INVALID:
return "INVALID";
}
}
/// Structure which describes the status of a newly created LogEntry after the
/// execution of RaftServer's Emplace method.
///
/// It consists of two unsigned 64-bit integers which uniquely describe
/// the emplaced LogEntry:
/// 1) Term when the LogEntry was emplaced to the Raft log.
/// 2) Index of the entry within the Raft log.
///
/// In the case an entry was not successfully emplaced (e.g. unexpected
/// leadership change), the values will have a std::nullopt value instead.
struct LogEntryStatus {
uint64_t term_id;
uint64_t log_index;
};
/// Exposes only functionality that other parts of Memgraph can interact with.
class RaftInterface {
public:
/// Emplace a new LogEntry in the raft log and start its replication. This
/// entry is created from a given batched set of StateDelta objects.
///
/// It is possible that the entry was not successfully emplaced. In that case,
/// the method returns std::nullopt and the caller is responsible for handling
/// situation correctly (e.g. aborting the corresponding transaction).
///
/// @returns an optional LogEntryStatus object as result.
virtual std::optional<LogEntryStatus> Emplace(
const std::vector<database::StateDelta> &) = 0;
/// Returns true if the current servers mode is LEADER. False otherwise.
virtual bool IsLeader() = 0;
/// Returns the term ID of the current leader.
virtual uint64_t TermId() = 0;
/// Returns the replication status of LogEntry which began its replication in
/// a given term ID and was emplaced in the raft log at the given index.
///
/// Replication status can be one of the following
/// 1) REPLICATED -- LogEntry was successfully replicated across
/// the Raft cluster
/// 2) WAITING -- LogEntry was successfully emplaced in the Raft
/// log and is currently being replicated.
/// 3) ABORTED -- LogEntry will not be replicated.
/// 4) INVALID -- the request for the LogEntry was invalid, most
/// likely either term_id or log_index were out of range.
virtual ReplicationStatus GetReplicationStatus(uint64_t term_id,
uint64_t log_index) = 0;
/// Checks if the LogEntry with the give term id and log index can safely be
/// committed in local storage.
///
/// @param term_id term when the LogEntry was created
/// @param log_index index of the LogEntry in the Raft log
///
/// @return bool True if the transaction is safe to commit, false otherwise.
///
/// @throws ReplicationTimeoutException
/// @throws RaftShutdownException
/// @throws InvalidReplicationLogLookup
virtual bool SafeToCommit(uint64_t term_id, uint64_t log_index) = 0;
virtual std::mutex &WithLock() = 0;
protected:
~RaftInterface() {}
};
} // namespace raft

View File

@ -1,43 +0,0 @@
#>cpp
#pragma once
#include <cstring>
#include <vector>
#include "raft/log_entry.hpp"
#include "rpc/messages.hpp"
cpp<#
(lcp:namespace raft)
(lcp:define-rpc request-vote
(:request
((candidate-id :uint16_t)
(term :uint64_t)
(last-log-index :uint64_t)
(last-log-term :uint64_t)))
(:response
((vote-granted :bool)
(term :uint64_t))))
(lcp:define-rpc append-entries
(:request
((leader-id :uint16_t)
(leader-commit :uint64_t)
(term :uint64_t)
(prev-log-index :uint64_t)
(prev-log-term :uint64_t)
(entries "std::vector<raft::LogEntry>")))
(:response
((success :bool)
(term :uint64_t))))
(lcp:define-rpc heartbeat
(:request
((leader-id :uint16_t)
(term :uint64_t)))
(:response
((success :bool)
(term :uint64_t))))
(lcp:pop-namespace) ;; raft

View File

@ -1,961 +0,0 @@
#include "raft/raft_server.hpp"
#include <algorithm>
#include <chrono>
#include <iostream>
#include <memory>
#include <optional>
#include <fmt/format.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "database/graph_db_accessor.hpp"
#include "durability/single_node_ha/paths.hpp"
#include "raft/exceptions.hpp"
#include "rpc/client.hpp"
#include "slk/streams.hpp"
#include "utils/cast.hpp"
#include "utils/exceptions.hpp"
#include "utils/on_scope_exit.hpp"
#include "utils/thread.hpp"
namespace raft {
using namespace std::literals::chrono_literals;
namespace fs = std::filesystem;
const std::string kCurrentTermKey = "current_term";
const std::string kVotedForKey = "voted_for";
const std::string kLogSizeKey = "log_size";
const std::string kLogEntryPrefix = "log_entry_";
const std::string kRaftDir = "raft";
RaftServer::RaftServer(uint16_t server_id, const std::string &durability_dir,
bool db_recover_on_startup, const Config &config,
Coordination *coordination, database::GraphDb *db)
: config_(config),
coordination_(coordination),
db_(db),
mode_(Mode::FOLLOWER),
server_id_(server_id),
durability_dir_(fs::path(durability_dir)),
db_recover_on_startup_(db_recover_on_startup),
commit_index_(0),
last_applied_(0),
last_entry_term_(0),
issue_hb_(false),
replication_timeout_(config.replication_timeout),
disk_storage_(fs::path(durability_dir) / kRaftDir) {}
void RaftServer::Start() {
if (!db_recover_on_startup_) {
// We need to clear persisted data if we don't want any recovery.
disk_storage_.DeletePrefix("");
}
// Persistent storage initialization
if (!disk_storage_.Get(kLogSizeKey)) {
SetCurrentTerm(0);
SetLogSize(0);
LogEntry empty_log_entry(0, {});
AppendLogEntries(0, 0, {empty_log_entry});
} else {
RecoverPersistentData();
}
// Peer state initialization
auto cluster_size = coordination_->GetAllNodeCount() + 1;
next_index_.resize(cluster_size);
index_offset_.resize(cluster_size);
match_index_.resize(cluster_size);
next_replication_.resize(cluster_size);
next_heartbeat_.resize(cluster_size);
// RPC registration
coordination_->Register<RequestVoteRpc>(
[this](auto *req_reader, auto *res_builder) {
std::lock_guard<std::mutex> guard(lock_);
RequestVoteReq req;
slk::Load(&req, req_reader);
// [Raft paper 5.1]
// "If a server recieves a request with a stale term,
// it rejects the request"
if (exiting_ || req.term < current_term_) {
RequestVoteRes res(false, current_term_);
slk::Save(res, res_builder);
return;
}
// [Raft paper figure 2]
// If RPC request or response contains term T > currentTerm,
// set currentTerm = T and convert to follower.
if (req.term > current_term_) {
SetCurrentTerm(req.term);
if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER);
}
if (voted_for_) {
bool grant_vote = voted_for_.value() == req.candidate_id;
if (grant_vote) SetNextElectionTimePoint();
RequestVoteRes res(grant_vote, current_term_);
slk::Save(res, res_builder);
return;
}
// [Raft paper 5.2, 5.4]
// "Each server will vote for at most one candidate in a given
// term, on a first-come-first-serve basis with an additional
// restriction on votes"
// Restriction: "The voter denies its vote if its own log is more
// up-to-date than that of the candidate"
auto last_entry_data = LastEntryData();
bool grant_vote =
AtLeastUpToDate(req.last_log_index, req.last_log_term,
last_entry_data.first, last_entry_data.second);
if (grant_vote) {
SetVotedFor(req.candidate_id);
SetNextElectionTimePoint();
}
RequestVoteRes res(grant_vote, current_term_);
slk::Save(res, res_builder);
});
coordination_->Register<AppendEntriesRpc>([this](auto *req_reader,
auto *res_builder) {
std::lock_guard<std::mutex> guard(lock_);
AppendEntriesReq req;
slk::Load(&req, req_reader);
// [Raft paper 5.1]
// "If a server receives a request with a stale term, it rejects the
// request"
if (exiting_ || req.term < current_term_) {
AppendEntriesRes res(false, current_term_);
slk::Save(res, res_builder);
return;
}
// Everything below is considered to be a valid RPC. This will ensure that
// after we finish processing the current request, the election timeout will
// be extended. During this process we will prevent the timeout from
// occuring.
next_election_ = TimePoint::max();
election_change_.notify_all();
utils::OnScopeExit extend_election_timeout([this] {
// [Raft thesis 3.4]
// A server remains in follower state as long as it receives valid RPCs
// from a leader or candidate.
SetNextElectionTimePoint();
election_change_.notify_all();
});
// [Raft paper figure 2]
// If RPC request or response contains term T > currentTerm,
// set currentTerm = T and convert to follower.
if (req.term > current_term_) {
SetCurrentTerm(req.term);
if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER);
}
// [Raft paper 5.3]
// "If a follower's log is inconsistent with the leader's, the
// consistency check will fail in the AppendEntries RPC."
//
// Consistency checking assures the Log Matching Property:
// - If two entries in different logs have the same index and
// term, then they store the same command.
// - If two entries in different logs have the same index and term,
// then the logs are identical in all preceding entries.
if (log_size_ <= req.prev_log_index ||
GetLogEntry(req.prev_log_index).term != req.prev_log_term) {
AppendEntriesRes res(false, current_term_);
slk::Save(res, res_builder);
return;
}
// No need to call this function for a heartbeat
if (!req.entries.empty()) {
AppendLogEntries(req.leader_commit, req.prev_log_index + 1, req.entries);
}
// [Raft paper 5.3]
// "Once a follower learns that a log entry is committed, it applies
// the entry to its state machine (in log order)
while (req.leader_commit > last_applied_ && last_applied_ + 1 < log_size_) {
++last_applied_;
ApplyStateDeltas(GetLogEntry(last_applied_).deltas);
}
// Respond positively to a heartbeat.
if (req.entries.empty()) {
AppendEntriesRes res(true, current_term_);
slk::Save(res, res_builder);
if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER);
return;
}
AppendEntriesRes res(true, current_term_);
slk::Save(res, res_builder);
});
coordination_->Register<HeartbeatRpc>(
[this](auto *req_reader, auto *res_builder) {
std::lock_guard<std::mutex> guard(lock_);
HeartbeatReq req;
slk::Load(&req, req_reader);
if (exiting_ || req.term < current_term_) {
HeartbeatRes res(false, current_term_);
slk::Save(res, res_builder);
return;
}
if (req.term > current_term_) {
SetCurrentTerm(req.term);
if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER);
}
SetNextElectionTimePoint();
election_change_.notify_all();
HeartbeatRes res(true, current_term_);
slk::Save(res, res_builder);
});
// start threads
SetNextElectionTimePoint();
election_thread_ = std::thread(&RaftServer::ElectionThreadMain, this);
for (auto peer_id : coordination_->GetOtherNodeIds()) {
peer_threads_.emplace_back(&RaftServer::PeerThreadMain, this, peer_id);
hb_threads_.emplace_back(&RaftServer::HBThreadMain, this, peer_id);
}
no_op_issuer_thread_ = std::thread(&RaftServer::NoOpIssuerThreadMain, this);
}
void RaftServer::Shutdown() {
exiting_ = true;
{
std::lock_guard<std::mutex> guard(lock_);
state_changed_.notify_all();
election_change_.notify_all();
leader_changed_.notify_all();
hb_condition_.notify_all();
}
for (auto &peer_thread : peer_threads_) {
if (peer_thread.joinable()) peer_thread.join();
}
for (auto &hb_thread : hb_threads_) {
if (hb_thread.joinable()) hb_thread.join();
}
if (election_thread_.joinable()) election_thread_.join();
if (no_op_issuer_thread_.joinable()) no_op_issuer_thread_.join();
}
void RaftServer::SetCurrentTerm(uint64_t new_current_term) {
current_term_ = new_current_term;
disk_storage_.Put(kCurrentTermKey, std::to_string(new_current_term));
SetVotedFor(std::nullopt);
}
void RaftServer::SetVotedFor(std::optional<uint16_t> new_voted_for) {
voted_for_ = new_voted_for;
if (new_voted_for)
disk_storage_.Put(kVotedForKey, std::to_string(new_voted_for.value()));
else
disk_storage_.Delete(kVotedForKey);
}
void RaftServer::SetLogSize(uint64_t new_log_size) {
log_size_ = new_log_size;
disk_storage_.Put(kLogSizeKey, std::to_string(new_log_size));
}
std::optional<LogEntryStatus> RaftServer::Emplace(
const std::vector<database::StateDelta> &deltas) {
std::unique_lock<std::mutex> lock(lock_);
if (mode_ != Mode::LEADER) {
return std::nullopt;
}
LogEntry new_entry(current_term_, deltas);
log_[log_size_] = new_entry;
disk_storage_.Put(LogEntryKey(log_size_), SerializeLogEntry(new_entry));
last_entry_term_ = new_entry.term;
SetLogSize(log_size_ + 1);
// Force replication
TimePoint now = Clock::now();
for (auto &peer_replication : next_replication_) peer_replication = now;
// From this point on, we can say that the replication of a LogEntry started.
replication_timeout_.Insert(new_entry.term, log_size_ - 1);
state_changed_.notify_all();
return {{new_entry.term, log_size_ - 1}};
}
bool RaftServer::IsLeader() { return !exiting_ && mode_ == Mode::LEADER; }
uint64_t RaftServer::TermId() { return current_term_; }
ReplicationStatus RaftServer::GetReplicationStatus(uint64_t term_id,
uint64_t log_index) {
std::unique_lock<std::mutex> lock(lock_);
if (term_id > current_term_ || log_index >= log_size_)
return ReplicationStatus::INVALID;
auto log_entry = GetLogEntry(log_index);
// This is correct because the leader can only append to the log and no two
// workers can be leaders in the same term.
if (log_entry.term != term_id) return ReplicationStatus::ABORTED;
if (last_applied_ < log_index) return ReplicationStatus::WAITING;
return ReplicationStatus::REPLICATED;
}
bool RaftServer::SafeToCommit(uint64_t term_id, uint64_t log_index) {
auto replication_status = GetReplicationStatus(term_id, log_index);
// If we are shutting down, but we know that the Raft Log replicated
// successfully, we return true. This will eventually commit since we
// replicate NoOp on leader election.
if (replication_status == ReplicationStatus::REPLICATED) return true;
// Only if the log entry isn't replicated, throw an exception to inform
// the client.
if (exiting_) throw RaftShutdownException();
if (replication_status == ReplicationStatus::WAITING) {
if (replication_timeout_.CheckTimeout(term_id, log_index)) {
throw ReplicationTimeoutException();
}
return false;
}
// TODO(ipaljak): Fix the old naming.
// The only possibility left is that our ReplicationLog doesn't contain
// information about that tx.
throw InvalidReplicationLogLookup();
}
void RaftServer::RecoverPersistentData() {
auto opt_term = disk_storage_.Get(kCurrentTermKey);
if (opt_term) current_term_ = std::stoull(opt_term.value());
auto opt_voted_for = disk_storage_.Get(kVotedForKey);
if (!opt_voted_for) {
voted_for_ = std::nullopt;
} else {
voted_for_ = {std::stoul(opt_voted_for.value())};
}
auto opt_log_size = disk_storage_.Get(kLogSizeKey);
if (opt_log_size) log_size_ = std::stoull(opt_log_size.value());
if (log_size_ != 0) {
auto opt_last_log_entry = disk_storage_.Get(LogEntryKey(log_size_ - 1));
DCHECK(opt_last_log_entry != std::nullopt)
<< "Log size is equal to " << log_size_
<< ", but there is no log entry on index: " << log_size_ - 1;
last_entry_term_ = DeserializeLogEntry(opt_last_log_entry.value()).term;
}
}
void RaftServer::Transition(const Mode &new_mode) {
switch (new_mode) {
case Mode::FOLLOWER: {
LOG(INFO) << "Server " << server_id_
<< ": Transition to FOLLOWER (Term: " << current_term_ << ")";
bool reset = mode_ == Mode::LEADER;
issue_hb_ = false;
mode_ = Mode::FOLLOWER;
if (reset) {
VLOG(40) << "Resetting internal state";
// Temporary freeze election timer while we do the reset.
next_election_ = TimePoint::max();
db_->Reset();
replication_timeout_.Clear();
// Re-apply raft log.
uint64_t starting_index = 1;
for (uint64_t i = starting_index; i <= commit_index_; ++i) {
ApplyStateDeltas(GetLogEntry(i).deltas);
}
last_applied_ = commit_index_;
}
SetNextElectionTimePoint();
election_change_.notify_all();
state_changed_.notify_all();
break;
}
case Mode::CANDIDATE: {
LOG(INFO) << "Server " << server_id_
<< ": Transition to CANDIDATE (Term: " << current_term_ << ")";
// [Raft thesis, section 3.4]
// "Each candidate restarts its randomized election timeout at the start
// of an election, and it waits for that timeout to elapse before
// starting the next election; this reduces the likelihood of another
// split vote in the new election."
SetNextElectionTimePoint();
election_change_.notify_all();
// [Raft thesis, section 3.4]
// "To begin an election, a follower increments its current term and
// transitions to candidate state. It then votes for itself and issues
// RequestVote RPCs in parallel to each of the other servers in the
// cluster."
SetCurrentTerm(current_term_ + 1);
SetVotedFor(server_id_);
granted_votes_ = 1;
vote_requested_.assign(coordination_->GetAllNodeCount() + 1, false);
issue_hb_ = false;
mode_ = Mode::CANDIDATE;
state_changed_.notify_all();
break;
}
case Mode::LEADER: {
LOG(INFO) << "Server " << server_id_
<< ": Transition to LEADER (Term: " << current_term_ << ")";
// Freeze election timer
next_election_ = TimePoint::max();
election_change_.notify_all();
// Set next heartbeat and replication to correct values
TimePoint now = Clock::now();
for (auto &peer_replication : next_replication_)
peer_replication = now + config_.heartbeat_interval;
for (auto &peer_heartbeat : next_heartbeat_)
peer_heartbeat = now + config_.heartbeat_interval;
issue_hb_ = true;
hb_condition_.notify_all();
// [Raft paper figure 2]
// "For each server, index of the next log entry to send to that server
// is initialized to leader's last log index + 1"
for (int i = 1; i <= coordination_->GetAllNodeCount(); ++i) {
next_index_[i] = log_size_;
index_offset_[i] = 1;
match_index_[i] = 0;
}
// Raft guarantees the Leader Append-Only property [Raft paper 5.2]
// so its safe to apply everything from our log into our state machine
for (int i = last_applied_ + 1; i < log_size_; ++i)
ApplyStateDeltas(GetLogEntry(i).deltas);
last_applied_ = log_size_ - 1;
mode_ = Mode::LEADER;
leader_changed_.notify_all();
break;
}
}
}
void RaftServer::AdvanceCommitIndex() {
DCHECK(mode_ == Mode::LEADER)
<< "Commit index can only be advanced by the leader";
std::vector<uint64_t> known_replication_indices;
for (int i = 1; i <= coordination_->GetAllNodeCount(); ++i) {
if (i != server_id_)
known_replication_indices.push_back(match_index_[i]);
else
known_replication_indices.push_back(log_size_ - 1);
}
std::sort(known_replication_indices.begin(), known_replication_indices.end());
uint64_t new_commit_index =
known_replication_indices[(coordination_->GetAllNodeCount() - 1) / 2];
// This can happen because we reset `match_index` vector to 0 after a
// new leader has been elected.
if (commit_index_ >= new_commit_index) return;
// [Raft thesis, section 3.6.2]
// "(...) Raft never commits log entries from previous terms by counting
// replicas. Only log entries from the leader's current term are committed by
// counting replicas; once an entry from the current term has been committed
// in this way, then all prior entries are committed indirectly because of the
// Log Matching Property."
if (GetLogEntry(new_commit_index).term != current_term_) {
VLOG(40) << "Server " << server_id_
<< ": cannot commit log entry from "
"previous term based on "
"replication count.";
return;
}
VLOG(40) << "Begin applying commited transactions";
for (int i = commit_index_ + 1; i <= new_commit_index; ++i) {
auto log_entry = GetLogEntry(i);
DCHECK(log_entry.deltas.size() > 2)
<< "Log entry should consist of at least three state deltas.";
replication_timeout_.Remove(log_entry.term, i);
}
commit_index_ = new_commit_index;
last_applied_ = new_commit_index;
}
void RaftServer::SendEntries(uint16_t peer_id,
std::unique_lock<std::mutex> *lock) {
SendLogEntries(peer_id, lock);
}
void RaftServer::SendLogEntries(uint16_t peer_id,
std::unique_lock<std::mutex> *lock) {
uint64_t request_term = current_term_;
uint64_t request_prev_log_index = next_index_[peer_id] - 1;
uint64_t request_prev_log_term;
request_prev_log_term = GetLogEntry(next_index_[peer_id] - 1).term;
std::vector<LogEntry> request_entries;
if (next_index_[peer_id] <= log_size_ - 1)
GetLogSuffix(next_index_[peer_id], request_entries);
// Copy all internal variables before releasing the lock.
auto server_id = server_id_;
auto commit_index = commit_index_;
VLOG(40) << "Server " << server_id_
<< ": Sending Entries RPC to server " << peer_id
<< " (Term: " << current_term_ << ")";
VLOG(40) << "Entries size: " << request_entries.size();
// Execute the RPC.
lock->unlock();
auto reply = coordination_->ExecuteOnOtherNode<AppendEntriesRpc>(
peer_id, server_id, commit_index, request_term, request_prev_log_index,
request_prev_log_term, request_entries);
lock->lock();
if (!reply) {
next_replication_[peer_id] = Clock::now() + config_.heartbeat_interval;
return;
}
// We can't early exit if the `exiting_` flag is true just yet. It is possible
// that the response we handle here carries the last confirmation that the logs
// have been replicated. We need to handle the response so the client doesn't
// retry the query because he thinks the query failed.
if (current_term_ != request_term || mode_ != Mode::LEADER) {
return;
}
if (OutOfSync(reply->term)) {
state_changed_.notify_all();
return;
}
DCHECK(mode_ == Mode::LEADER)
<< "Elected leader for term should never change.";
if (reply->term != current_term_) {
VLOG(40) << "Server " << server_id_
<< ": Ignoring stale AppendEntriesRPC reply from " << peer_id;
return;
}
if (!reply->success) {
// Replication can fail for the first log entry if the peer that we're
// sending the entry is in the process of shutting down.
if (next_index_[peer_id] > index_offset_[peer_id]) {
next_index_[peer_id] -= index_offset_[peer_id];
// Overflow should be prevented by snapshot threshold constant.
index_offset_[peer_id] <<= 1UL;
} else {
next_index_[peer_id] = 1UL;
}
} else {
uint64_t new_match_index = request_prev_log_index + request_entries.size();
DCHECK(match_index_[peer_id] <= new_match_index)
<< "`match_index` should increase monotonically within a term";
match_index_[peer_id] = new_match_index;
if (request_entries.size() > 0) AdvanceCommitIndex();
next_index_[peer_id] = match_index_[peer_id] + 1;
index_offset_[peer_id] = 1;
next_replication_[peer_id] = Clock::now() + config_.heartbeat_interval;
}
if (exiting_) return;
state_changed_.notify_all();
}
void RaftServer::ElectionThreadMain() {
utils::ThreadSetName("ElectionThread");
std::unique_lock<std::mutex> lock(lock_);
while (!exiting_) {
if (Clock::now() >= next_election_) {
VLOG(40) << "Server " << server_id_
<< ": Election timeout exceeded (Term: " << current_term_ << ")";
Transition(Mode::CANDIDATE);
state_changed_.notify_all();
}
election_change_.wait_until(lock, next_election_);
}
}
void RaftServer::PeerThreadMain(uint16_t peer_id) {
utils::ThreadSetName(fmt::format("RaftPeer{}", peer_id));
std::unique_lock<std::mutex> lock(lock_);
/* This loop will either call a function that issues an RPC or wait on the
* condition variable. It must not do both! Lock on `mutex_` is released
* while waiting for RPC response, which might cause us to miss a
* notification on `state_changed_` conditional variable and wait
* indefinitely. The safest thing to do is to assume some important part of
* state was modified while we were waiting for the response and loop around
* to check. */
while (!exiting_) {
TimePoint now = Clock::now();
TimePoint wait_until;
switch (mode_) {
case Mode::FOLLOWER: {
wait_until = TimePoint::max();
break;
}
case Mode::CANDIDATE: {
if (vote_requested_[peer_id]) {
wait_until = TimePoint::max();
break;
}
// TODO(ipaljak): Consider backoff.
wait_until = TimePoint::max();
// Copy all internal variables before releasing the lock.
auto server_id = server_id_;
auto request_term = current_term_.load();
auto last_entry_data = LastEntryData();
vote_requested_[peer_id] = true;
// Execute the RPC.
lock.unlock(); // Release lock while waiting for response
auto reply = coordination_->ExecuteOnOtherNode<RequestVoteRpc>(
peer_id, server_id, request_term, last_entry_data.first,
last_entry_data.second);
lock.lock();
// If the peer isn't reachable, it is the same as if he didn't grant
// us his vote.
if (!reply) {
reply = RequestVoteRes(false, request_term);
}
if (current_term_ != request_term || mode_ != Mode::CANDIDATE ||
exiting_) {
VLOG(40) << "Server " << server_id_
<< ": Ignoring RequestVoteRPC reply from " << peer_id;
break;
}
if (OutOfSync(reply->term)) {
state_changed_.notify_all();
continue;
}
if (reply->vote_granted) {
VLOG(40) << "Server " << server_id_ << ": Got vote from "
<< peer_id;
++granted_votes_;
if (HasMajorityVote()) Transition(Mode::LEADER);
} else {
VLOG(40) << "Server " << server_id_ << ": Denied vote from "
<< peer_id;
}
state_changed_.notify_all();
continue;
}
case Mode::LEADER: {
if (now >= next_replication_[peer_id]) {
SendEntries(peer_id, &lock);
continue;
}
wait_until = next_replication_[peer_id];
break;
}
}
if (exiting_) break;
state_changed_.wait_until(lock, wait_until);
}
}
void RaftServer::HBThreadMain(uint16_t peer_id) {
utils::ThreadSetName(fmt::format("HBThread{}", peer_id));
std::unique_lock<std::mutex> lock(heartbeat_lock_);
// The heartbeat thread uses a dedicated RPC client for its peer so that it
// can issue heartbeats in parallel with other RPC requests that are being
// issued to the peer (replication, voting, etc.)
std::unique_ptr<rpc::Client> rpc_client;
while (!exiting_) {
TimePoint wait_until;
if (!issue_hb_) {
wait_until = TimePoint::max();
} else {
TimePoint now = Clock::now();
if (now < next_heartbeat_[peer_id]) {
wait_until = next_heartbeat_[peer_id];
} else {
VLOG(40) << "Server " << server_id_ << ": Sending HB to server "
<< peer_id << " (Term: " << current_term_ << ")";
lock.unlock();
if (!rpc_client) {
rpc_client = std::make_unique<rpc::Client>(
coordination_->GetOtherNodeEndpoint(peer_id),
coordination_->GetRpcClientContext());
}
try {
rpc_client->Call<HeartbeatRpc>(server_id_, current_term_);
} catch (...) {
// Invalidate the client so that we reconnect next time.
rpc_client = nullptr;
}
lock.lock();
// This is ok even if we don't receive a reply.
next_heartbeat_[peer_id] = now + config_.heartbeat_interval;
wait_until = next_heartbeat_[peer_id];
}
}
if (exiting_) break;
hb_condition_.wait_until(lock, wait_until);
}
}
void RaftServer::NoOpIssuerThreadMain() {
utils::ThreadSetName(fmt::format("NoOpIssuer"));
std::mutex m;
auto lock = std::unique_lock<std::mutex>(m);
while (!exiting_) {
leader_changed_.wait(lock);
// no_op_create_callback_ will create a new transaction that has a NO_OP
// StateDelta. This will trigger the whole procedure of replicating logs
// in our implementation of Raft.
if (!exiting_) NoOpCreate();
}
}
void RaftServer::SetNextElectionTimePoint() {
// [Raft thesis, section 3.4]
// "Raft uses randomized election timeouts to ensure that split votes are
// rare and that they are resolved quickly. To prevent split votes in the
// first place, election timeouts are chosen randomly from a fixed interval
// (e.g., 150-300 ms)."
std::uniform_int_distribution<uint64_t> distribution(
config_.election_timeout_min.count(),
config_.election_timeout_max.count());
Clock::duration wait_interval = std::chrono::milliseconds(distribution(rng_));
next_election_ = Clock::now() + wait_interval;
}
bool RaftServer::HasMajorityVote() {
if (2 * granted_votes_ > coordination_->GetAllNodeCount()) {
VLOG(40) << "Server " << server_id_
<< ": Obtained majority vote (Term: " << current_term_ << ")";
return true;
}
return false;
}
std::pair<uint64_t, uint64_t> RaftServer::LastEntryData() {
return {log_size_, last_entry_term_};
}
bool RaftServer::AtLeastUpToDate(uint64_t last_log_index_a,
uint64_t last_log_term_a,
uint64_t last_log_index_b,
uint64_t last_log_term_b) {
if (last_log_term_a == last_log_term_b)
return last_log_index_a >= last_log_index_b;
return last_log_term_a > last_log_term_b;
}
bool RaftServer::OutOfSync(uint64_t reply_term) {
DCHECK(mode_ != Mode::FOLLOWER) << "`OutOfSync` called from FOLLOWER mode";
// [Raft thesis, Section 3.3]
// "Current terms are exchanged whenever servers communicate; if one
// server's current term is smaller than the other's, then it updates
// its current term to the larger value. If a candidate or leader
// discovers that its term is out of date, it immediately reverts to
// follower state."
if (current_term_ < reply_term) {
disk_storage_.Put(kCurrentTermKey, std::to_string(reply_term));
disk_storage_.Delete(kVotedForKey);
granted_votes_ = 0;
Transition(Mode::FOLLOWER);
return true;
}
return false;
}
LogEntry RaftServer::GetLogEntry(uint64_t index) {
auto it = log_.find(index);
if (it != log_.end())
return it->second; // retrieve in-mem if possible
auto opt_value = disk_storage_.Get(LogEntryKey(index));
DCHECK(opt_value != std::nullopt)
<< "Log index (" << index << ") out of bounds.";
return DeserializeLogEntry(opt_value.value());
}
void RaftServer::DeleteLogSuffix(int starting_index) {
DCHECK(0 <= starting_index && starting_index < log_size_)
<< "Log index out of bounds.";
for (int i = starting_index; i < log_size_; ++i) {
log_.erase(i);
disk_storage_.Delete(LogEntryKey(i));
}
SetLogSize(starting_index);
}
void RaftServer::GetLogSuffix(int starting_index,
std::vector<raft::LogEntry> &entries) {
DCHECK(0 <= starting_index && starting_index < log_size_)
<< "Log index out of bounds.";
for (int i = starting_index; i < log_size_; ++i)
entries.push_back(GetLogEntry(i));
}
void RaftServer::AppendLogEntries(uint64_t leader_commit_index,
uint64_t starting_index,
const std::vector<LogEntry> &new_entries) {
for (int i = 0; i < new_entries.size(); ++i) {
// If existing entry conflicts with new one, we need to delete the
// existing entry and all that follow it.
int current_index = i + starting_index;
if (log_size_ > current_index &&
GetLogEntry(current_index).term != new_entries[i].term) {
DeleteLogSuffix(current_index);
}
DCHECK(log_size_ >= current_index) << "Current Log index out of bounds.";
if (log_size_ == current_index) {
log_[log_size_] = new_entries[i];
disk_storage_.Put(LogEntryKey(log_size_),
SerializeLogEntry(new_entries[i]));
last_entry_term_ = new_entries[i].term;
SetLogSize(log_size_ + 1);
}
}
// See Raft paper 5.3
if (leader_commit_index > commit_index_) {
commit_index_ = std::min(leader_commit_index, log_size_ - 1);
}
}
std::string RaftServer::LogEntryKey(uint64_t index) {
return kLogEntryPrefix + std::to_string(index);
}
std::string RaftServer::SerializeLogEntry(const LogEntry &log_entry) {
std::stringstream stream(std::ios_base::in | std::ios_base::out |
std::ios_base::binary);
slk::Builder builder(
[&stream](const uint8_t *data, size_t size, bool have_more) {
for (size_t i = 0; i < size; ++i) {
stream << utils::MemcpyCast<char>(data[i]);
}
});
slk::Save(log_entry, &builder);
builder.Finalize();
return stream.str();
}
LogEntry RaftServer::DeserializeLogEntry(
const std::string &serialized_log_entry) {
slk::Reader reader(
reinterpret_cast<const uint8_t *>(serialized_log_entry.data()),
serialized_log_entry.size());
LogEntry deserialized;
try {
slk::Load(&deserialized, &reader);
reader.Finalize();
} catch (const slk::SlkReaderException &) {
LOG(FATAL) << "Couldn't load log from disk storage!";
}
return deserialized;
}
void RaftServer::NoOpCreate() {
// TODO(ipaljak): Review this after implementing RaftDelta object.
auto dba = db_->Access();
db_->sd_buffer()->Emplace(database::StateDelta::NoOp(dba.transaction_id()));
try {
dba.Commit();
} catch (const RaftException &) {
// NoOp failure can be ignored.
return;
}
}
void RaftServer::ApplyStateDeltas(
const std::vector<database::StateDelta> &deltas) {
std::optional<database::GraphDbAccessor> dba;
for (auto &delta : deltas) {
switch (delta.type) {
case database::StateDelta::Type::NO_OP:
break;
case database::StateDelta::Type::TRANSACTION_BEGIN:
CHECK(!dba) << "Double transaction start";
dba = db_->Access();
break;
case database::StateDelta::Type::TRANSACTION_COMMIT:
CHECK(dba) << "Missing accessor for transaction"
<< delta.transaction_id;
dba->Commit();
dba = std::nullopt;
break;
case database::StateDelta::Type::TRANSACTION_ABORT:
LOG(FATAL) << "ApplyStateDeltas shouldn't know about aborted "
"transactions";
break;
default:
CHECK(dba) << "Missing accessor for transaction"
<< delta.transaction_id;
delta.Apply(*dba);
}
}
CHECK(!dba) << "StateDeltas missing commit command";
}
std::mutex &RaftServer::WithLock() { return lock_; }
} // namespace raft

View File

@ -1,393 +0,0 @@
/// @file
#pragma once
#include <atomic>
#include <filesystem>
#include <mutex>
#include <unordered_map>
#include <vector>
#include "durability/single_node_ha/state_delta.hpp"
#include "kvstore/kvstore.hpp"
#include "raft/config.hpp"
#include "raft/coordination.hpp"
#include "raft/log_entry.hpp"
#include "raft/raft_interface.hpp"
#include "raft/raft_rpc_messages.hpp"
#include "raft/replication_log.hpp"
#include "raft/replication_timeout_map.hpp"
#include "transactions/type.hpp"
#include "utils/scheduler.hpp"
// Forward declaration
namespace database {
class GraphDb;
} // namespace database
namespace raft {
using Clock = std::chrono::system_clock;
using TimePoint = std::chrono::system_clock::time_point;
enum class Mode { FOLLOWER, CANDIDATE, LEADER };
inline std::string ModeToString(const Mode &mode) {
switch (mode) {
case Mode::FOLLOWER:
return "FOLLOWER";
case Mode::CANDIDATE:
return "CANDIDATE";
case Mode::LEADER:
return "LEADER";
}
}
/// Class which models the behaviour of a single server within the Raft
/// cluster. The class is responsible for storing both volatile and
/// persistent internal state of the corresponding state machine as well
/// as performing operations that comply with the Raft protocol.
class RaftServer final : public RaftInterface {
public:
RaftServer() = delete;
/// The implementation assumes that server IDs are unique integers between
/// ranging from 1 to cluster_size.
///
/// @param server_id ID of the current server.
/// @param durbility_dir directory for persisted data.
/// @param db_recover_on_startup flag indicating if recovery should happen at
/// startup.
/// @param config raft configuration.
/// @param coordination Abstraction for coordination between Raft servers.
/// @param db The current DB object.
RaftServer(uint16_t server_id, const std::string &durability_dir,
bool db_recover_on_startup, const Config &config,
raft::Coordination *coordination, database::GraphDb *db);
/// Starts the RPC servers and starts mechanisms inside Raft protocol.
void Start();
/// Stops all threads responsible for the Raft protocol.
void Shutdown();
/// Setter for the current term. It updates the persistent storage as well
/// as its in-memory copy.
void SetCurrentTerm(uint64_t new_current_term);
/// Setter for `voted for` member. It updates the persistent storage as well
/// as its in-memory copy.
void SetVotedFor(std::optional<uint16_t> new_voted_for);
/// Setter for `log size` member. It updates the persistent storage as well
/// as its in-memory copy.
void SetLogSize(uint64_t new_log_size);
/// Emplace a new LogEntry in the raft log and start its replication. This
/// entry is created from a given batched set of StateDelta objects.
///
/// It is possible that the entry was not successfully emplaced. In that case,
/// the method returns std::nullopt and the caller is responsible for handling
/// situation correctly (e.g. aborting the corresponding transaction).
///
/// @returns an optional LogEntryStatus object as result.
std::optional<LogEntryStatus> Emplace(
const std::vector<database::StateDelta> &deltas) override;
/// Returns true if the current servers mode is LEADER. False otherwise.
bool IsLeader() override;
/// Returns the term ID of the current leader.
uint64_t TermId() override;
/// Returns the replication status of LogEntry which began its replication in
/// a given term ID and was emplaced in the raft log at the given index.
///
/// Replication status can be one of the following
/// 1) REPLICATED -- LogEntry was successfully replicated across
/// the Raft cluster
/// 2) WAITING -- LogEntry was successfully emplaced in the Raft
/// log and is currently being replicated.
/// 3) ABORTED -- LogEntry will not be replicated.
/// 4) INVALID -- the request for the LogEntry was invalid, most
/// likely either term_id or log_index were out of range.
ReplicationStatus GetReplicationStatus(uint64_t term_id,
uint64_t log_index) override;
/// Checks if the LogEntry with the give term id and log index can safely be
/// committed in local storage.
///
/// @param term_id term when the LogEntry was created
/// @param log_index index of the LogEntry in the Raft log
///
/// @return bool True if the transaction is safe to commit, false otherwise.
///
/// @throws ReplicationTimeoutException
/// @throws RaftShutdownException
/// @throws InvalidReplicationLogLookup
bool SafeToCommit(uint64_t term_id, uint64_t log_index) override;
private:
mutable std::mutex lock_; ///< Guards all internal state.
mutable std::mutex heartbeat_lock_; ///< Guards HB issuing
//////////////////////////////////////////////////////////////////////////////
// volatile state on all servers
//////////////////////////////////////////////////////////////////////////////
Config config_; ///< Raft config.
Coordination *coordination_{nullptr}; ///< Cluster coordination.
database::GraphDb *db_{nullptr};
std::atomic<Mode> mode_; ///< Server's current mode.
uint16_t server_id_; ///< ID of the current server.
std::filesystem::path durability_dir_; ///< Durability directory.
bool db_recover_on_startup_; ///< Flag indicating if recovery should happen
///< on startup.
uint64_t commit_index_; ///< Index of the highest known committed entry.
uint64_t last_applied_; ///< Index of the highest applied entry to SM.
uint64_t last_entry_term_; ///< Term of the last entry in Raft log
std::atomic<bool> issue_hb_; ///< Flag which signalizes if the current server
///< should send HBs to the rest of the cluster.
std::vector<std::thread> peer_threads_; ///< One thread per peer which
///< handles outgoing RPCs.
std::vector<std::thread> hb_threads_; ///< One thread per peer which is
///< responsible for sending periodic
///< heartbeats.
std::condition_variable state_changed_; ///< Notifies all peer threads on
///< relevant state change.
std::thread no_op_issuer_thread_; ///< Thread responsible for issuing no-op
///< command on leader change.
std::condition_variable leader_changed_; ///< Notifies the
///< no_op_issuer_thread that a new
///< leader has been elected.
std::condition_variable hb_condition_; ///< Notifies the HBIssuer thread
///< that it should start sending
///< heartbeats.
std::atomic<bool> exiting_{false}; ///< True on server shutdown.
//////////////////////////////////////////////////////////////////////////////
// volatile state on followers and candidates
//////////////////////////////////////////////////////////////////////////////
std::thread election_thread_; ///< Timer thread for triggering elections.
TimePoint next_election_; ///< Next election `TimePoint`.
std::condition_variable election_change_; ///> Used to notify election_thread
///> on next_election_ change.
std::mt19937_64 rng_ = std::mt19937_64(std::random_device{}());
//////////////////////////////////////////////////////////////////////////////
// volatile state on candidates
//////////////////////////////////////////////////////////////////////////////
uint16_t granted_votes_;
std::vector<bool> vote_requested_;
//////////////////////////////////////////////////////////////////////////////
// volatile state on leaders
//////////////////////////////////////////////////////////////////////////////
std::vector<uint64_t> next_index_; ///< for each server, index of the next
///< log entry to send to that server.
std::vector<uint64_t> index_offset_; ///< for each server, the offset for
///< which we reduce the next_index_
///< field if the AppendEntries request
///< is denied. We use "binary lifting"
///< style technique to achieve at most
///< O(logn) requests.
std::vector<uint64_t> match_index_; ///< for each server, index of the
///< highest log entry known to be
///< replicated on server.
std::vector<TimePoint> next_replication_; ///< for each server, time point
///< for the next replication.
std::vector<TimePoint> next_heartbeat_; ///< for each server, time point for
///< the next heartbeat.
// Tracks timepoints until a transactions is allowed to be in the replication
// process.
ReplicationTimeoutMap replication_timeout_;
//////////////////////////////////////////////////////////////////////////////
// persistent state on all servers
//
// Persistent data consists of:
// - uint64_t current_term -- latest term server has seen.
// - uint16_t voted_for -- candidate_id that received vote in current
// term (null if none).
// - uint64_t log_size -- Number of stored entries within the log.
// - vector<LogEntry> log -- log entries. Each log entry is stored under
// a separate key within KVStore.
//////////////////////////////////////////////////////////////////////////////
kvstore::KVStore disk_storage_;
std::optional<uint16_t> voted_for_;
std::atomic<uint64_t> current_term_;
uint64_t log_size_;
std::map<uint64_t, LogEntry> log_;
/// Recovers persistent data from disk and stores its in-memory copies
/// that insure faster read-only operations. This method should be called
/// on start-up. If parts of persistent data are missing, the method won't
/// make a copy of that data, i.e. no exception is thrown and the caller
/// should check whether persistent data actually exists.
void RecoverPersistentData();
/// Makes a transition to a new `raft::Mode`.
///
/// throws InvalidTransitionException when transitioning between incompatible
/// `raft::Mode`s.
void Transition(const raft::Mode &new_mode);
/// Tries to advance the commit index on a leader.
void AdvanceCommitIndex();
/// Decides whether to send Log Entires or Snapshot to the given peer.
///
/// @param peer_id ID of the peer which receives entries.
/// @param lock Lock from the peer thread (released while waiting for
/// response)
void SendEntries(uint16_t peer_id, std::unique_lock<std::mutex> *lock);
/// Sends Log Entries to peer. This function should only be called in leader
/// mode.
///
/// @param peer_id ID of the peer which receives entries.
/// @param lock Lock from the peer thread (released while waiting for
/// response)
void SendLogEntries(uint16_t peer_id,
std::unique_lock<std::mutex> *lock);
/// Send Snapshot to peer. This function should only be called in leader
/// mode.
///
/// @param peer_id ID of the peer which receives entries.
/// @param lock Lock from the peer thread (released while waiting for
/// response)
void SendSnapshot(uint16_t peer_id, std::unique_lock<std::mutex> *lock);
/// Main function of the `election_thread_`. It is responsible for
/// transition to CANDIDATE mode when election timeout elapses.
void ElectionThreadMain();
/// Main function of the thread that handles outgoing RPCs towards a
/// specified node within the Raft cluster.
///
/// @param peer_id - ID of a receiving node in the cluster.
void PeerThreadMain(uint16_t peer_id);
/// Main function of the thread that handles issuing heartbeats towards
/// other peers. At the moment, this function is ignorant about the status
/// of LogEntry replication. Therefore, it might issue unnecessary
/// heartbeats, but we can live with that at this point.
///
/// @param peer_id - ID of a receiving node in the cluster.
void HBThreadMain(uint16_t peer_id);
/// Issues no-op command when a new leader is elected. This is done to
/// force the Raft protocol to commit logs from previous terms that
/// have been replicated on a majority of peers.
void NoOpIssuerThreadMain();
/// Sets the `TimePoint` for next election.
void SetNextElectionTimePoint();
/// Checks if the current server obtained enough votes to become a leader.
bool HasMajorityVote();
/// Returns relevant metadata about the last entry in this server's Raft Log.
/// More precisely, returns a pair consisting of an index of the last entry
/// in the log and the term of the last entry in the log.
///
/// @return std::pair<last_log_index, last_log_term>
std::pair<uint64_t, uint64_t> LastEntryData();
/// Checks whether Raft log of server A is at least as up-to-date as the Raft
/// log of server B. This is strictly defined in Raft paper 5.4.
///
/// @param last_log_index_a - Index of server A's last log entry.
/// @param last_log_term_a - Term of server A's last log entry.
/// @param last_log_index_b - Index of server B's last log entry.
/// @param last_log_term_b - Term of server B's last log entry.
bool AtLeastUpToDate(uint64_t last_log_index_a, uint64_t last_log_term_a,
uint64_t last_log_index_b, uint64_t last_log_term_b);
/// Checks whether the current server got a reply from "future", i.e. reply
/// with a higher term. If so, the current server falls back to follower mode
/// and updates its current term.
///
/// @param reply_term Term from RPC response.
/// @return true if the current server's term lags behind.
bool OutOfSync(uint64_t reply_term);
/// Retrieves a log entry from the log at a given index.
///
/// @param index Index of the log entry to be retrieved.
LogEntry GetLogEntry(uint64_t index);
/// Deletes log entries with indexes that are greater or equal to the given
/// starting index.
///
/// @param starting_index Smallest index which will be deleted from the Log.
/// Also, a friendly remainder that log entries are
/// 1-indexed.
void DeleteLogSuffix(int starting_index);
/// Stores log entries with indexes that are greater or equal to the given
/// starting index into a provided container. If the starting index is
/// greater than the log size, nothing will be stored in the provided
/// container.
///
/// @param starting_index Smallest index which will be stored.
/// @param entries The container which will store the wanted suffix.
void GetLogSuffix(int starting_index, std::vector<raft::LogEntry> &entries);
/// Appends new log entries to Raft log. Note that this function is not
/// smart in any way, i.e. the caller should make sure that it's safe
/// to call this function. This function also updates this server's commit
/// index if necessary.
///
/// @param leader_commit_index - Used to update local commit index.
/// @param starting_index - Index in the log from which we start to append.
/// @param new_entries - New `LogEntry` instances to be appended in the log.
void AppendLogEntries(uint64_t leader_commit_index, uint64_t starting_index,
const std::vector<LogEntry> &new_entries);
/// Generates the key under which the `LogEntry` with a given index should
/// be stored on our disk storage.
///
/// @param index - Index of the `LogEntry` for which we generate the key.
std::string LogEntryKey(uint64_t index);
/// Serializes Raft log entry into `std::string`
std::string SerializeLogEntry(const LogEntry &log_entry);
/// Deserialized Raft log entry from `std::string`
LogEntry DeserializeLogEntry(const std::string &serialized_log_entry);
/// Start a new transaction with a NO-OP StateDelta.
void NoOpCreate();
/// Applies the given batch of state deltas that are representing a transacton
/// to the db.
void ApplyStateDeltas(const std::vector<database::StateDelta> &deltas);
std::mutex &WithLock() override;
};
} // namespace raft

View File

@ -1,86 +0,0 @@
/// @file
#pragma once
#include <atomic>
#include "data_structures/bitset/dynamic_bitset.hpp"
#include "transactions/type.hpp"
namespace raft {
/// Tracks information about replicated and active logs for high availability.
///
/// The main difference between ReplicationLog and CommitLog is that
/// ReplicationLog doesn't throw when looking up garbage collected transaction
/// ids.
class ReplicationLog final {
public:
static constexpr int kBitsetBlockSize = 32768;
ReplicationLog() = default;
ReplicationLog(const ReplicationLog &) = delete;
ReplicationLog(ReplicationLog &&) = delete;
ReplicationLog &operator=(const ReplicationLog &) = delete;
ReplicationLog &operator=(ReplicationLog &&) = delete;
bool is_active(tx::TransactionId id) const {
return fetch_info(id).is_active();
}
void set_active(tx::TransactionId id) { log.set(2 * id); }
bool is_replicated(tx::TransactionId id) const {
return fetch_info(id).is_replicated();
}
void set_replicated(tx::TransactionId id) { log.set(2 * id + 1); }
// Clears the replication log from bits associated with transactions with an
// id lower than `id`.
void garbage_collect_older(tx::TransactionId id) {
// We keep track of the valid prefix in order to avoid the `CHECK` inside
// the `DynamicBitset`.
valid_prefix = 2 * id;
log.delete_prefix(2 * id);
}
class Info final {
public:
enum Status {
UNKNOWN = 0, // 00
ACTIVE = 1, // 01
REPLICATED = 2, // 10
};
explicit Info(uint8_t flags) {
if (flags & REPLICATED) {
flags_ = REPLICATED;
} else if (flags & ACTIVE) {
flags_ = ACTIVE;
} else {
flags_ = UNKNOWN;
}
}
bool is_active() const { return flags_ & ACTIVE; }
bool is_replicated() const { return flags_ & REPLICATED; }
operator uint8_t() const { return flags_; }
private:
uint8_t flags_{0};
};
Info fetch_info(tx::TransactionId id) const {
if (valid_prefix > 2 * id) return Info{0};
return Info{log.at(2 * id, 2)};
}
private:
DynamicBitset<uint8_t, kBitsetBlockSize> log;
std::atomic<tx::TransactionId> valid_prefix{0};
};
} // namespace raft

View File

@ -1,71 +0,0 @@
/// @file
#pragma once
#include <chrono>
#include <map>
#include <mutex>
namespace raft {
using Clock = std::chrono::system_clock;
using TimePoint = std::chrono::system_clock::time_point;
/// A wrapper around an unordered_map whose reads/writes are protected with a
/// lock. It's also specialized to serve the sole purpose of tracking
/// replication timeout.
class ReplicationTimeoutMap final {
public:
ReplicationTimeoutMap() = delete;
ReplicationTimeoutMap(const ReplicationTimeoutMap &) = delete;
ReplicationTimeoutMap(ReplicationTimeoutMap &&) = delete;
ReplicationTimeoutMap operator=(const ReplicationTimeoutMap &) = delete;
ReplicationTimeoutMap operator=(ReplicationTimeoutMap &&) = delete;
explicit ReplicationTimeoutMap(std::chrono::milliseconds replication_timeout)
: replication_timeout_(replication_timeout) {}
/// Remove all entries from the map.
void Clear() {
std::lock_guard<std::mutex> guard(lock_);
timeout_.clear();
}
/// Remove a single entry from the map.
void Remove(const uint64_t term_id, const uint64_t log_index) {
std::lock_guard<std::mutex> guard(lock_);
timeout_.erase({term_id, log_index});
}
/// Inserts and entry in the map by setting a point in time until it needs to
/// replicated.
void Insert(const uint64_t term_id, const uint64_t log_index) {
std::lock_guard<std::mutex> guard(lock_);
timeout_[{term_id, log_index}] = replication_timeout_ + Clock::now();
}
/// Checks if the given entry has timed out.
/// @returns bool True if it exceeded timeout, false otherwise.
bool CheckTimeout(const uint64_t term_id, const uint64_t log_index) {
std::lock_guard<std::mutex> guard(lock_);
auto found = timeout_.find({term_id, log_index});
// If we didn't set the timeout yet, or we already deleted it, we didn't
// time out.
if (found == timeout_.end()) return false;
if (found->second < Clock::now()) {
return true;
} else {
return false;
}
}
private:
std::chrono::milliseconds replication_timeout_;
mutable std::mutex lock_;
// TODO(ipaljak): Consider using unordered_map if we encounter any performance
// issues.
std::map<std::pair<uint64_t, uint64_t>, TimePoint> timeout_;
};
} // namespace raft

View File

@ -1,74 +0,0 @@
#include "raft/storage_info.hpp"
#include <chrono>
#include "database/single_node_ha/graph_db.hpp"
#include "raft/coordination.hpp"
#include "raft/storage_info_rpc_messages.hpp"
#include "utils/future.hpp"
#include "utils/stat.hpp"
namespace raft {
using namespace std::literals::chrono_literals;
using Clock = std::chrono::system_clock;
using TimePoint = std::chrono::system_clock::time_point;
StorageInfo::StorageInfo(database::GraphDb *db, Coordination *coordination,
uint16_t server_id)
: db_(db), coordination_(coordination), server_id_(server_id) {
CHECK(db) << "Graph DB can't be nullptr";
CHECK(coordination) << "Coordination can't be nullptr";
}
StorageInfo::~StorageInfo() {}
void StorageInfo::Start() {
coordination_->Register<StorageInfoRpc>(
[this](auto *req_reader, auto *res_builder) {
StorageInfoReq req;
slk::Load(&req, req_reader);
StorageInfoRes res(this->server_id_, this->GetLocalStorageInfo());
slk::Save(res, res_builder);
});
}
std::vector<std::pair<std::string, std::string>>
StorageInfo::GetLocalStorageInfo() const {
std::vector<std::pair<std::string, std::string>> info;
db_->RefreshStat();
auto &stat = db_->GetStat();
info.emplace_back("vertex_count", std::to_string(stat.vertex_count));
info.emplace_back("edge_count", std::to_string(stat.edge_count));
info.emplace_back("average_degree", std::to_string(stat.avg_degree));
info.emplace_back("memory_usage", std::to_string(utils::GetMemoryUsage()));
info.emplace_back("disk_usage",
std::to_string(db_->GetDurabilityDirDiskUsage()));
return info;
}
std::map<std::string, std::vector<std::pair<std::string, std::string>>>
StorageInfo::GetStorageInfo() const {
std::map<std::string, std::vector<std::pair<std::string, std::string>>> info;
for (auto id : coordination_->GetAllNodeIds()) {
if (id == server_id_) {
info.emplace(std::to_string(id), GetLocalStorageInfo());
} else {
auto reply = coordination_->ExecuteOnOtherNode<StorageInfoRpc>(id);
if (reply) {
info[std::to_string(id)] = std::move(reply->storage_info);
} else {
info[std::to_string(id)] = {};
}
}
}
return info;
}
} // namespace raft

View File

@ -1,47 +0,0 @@
/// @file
#pragma once
#include <map>
#include <vector>
// Forward declaration
namespace database {
class GraphDb;
} // namespace database
namespace raft {
// Forward declaration
class Coordination;
/// StorageInfo takes care of the Raft cluster storage info retrieval.
class StorageInfo final {
public:
StorageInfo() = delete;
StorageInfo(database::GraphDb *db, Coordination *coordination,
uint16_t server_id);
StorageInfo(const StorageInfo &) = delete;
StorageInfo(StorageInfo &&) = delete;
StorageInfo operator=(const StorageInfo &) = delete;
StorageInfo operator=(StorageInfo &&) = delete;
~StorageInfo();
void Start();
/// Returns storage info for the local storage only.
std::vector<std::pair<std::string, std::string>> GetLocalStorageInfo() const;
/// Returns storage info for each peer in the Raft cluster.
std::map<std::string, std::vector<std::pair<std::string, std::string>>>
GetStorageInfo() const;
private:
database::GraphDb *db_{nullptr};
Coordination *coordination_{nullptr};
uint16_t server_id_;
};
} // namespace raft

View File

@ -1,19 +0,0 @@
#>cpp
#pragma once
#include <vector>
#include <string>
#include "rpc/messages.hpp"
#include "slk/serialization.hpp"
cpp<#
(lcp:namespace raft)
(lcp:define-rpc storage-info
(:request ())
(:response
((server-id :uint16_t)
(storage-info "std::vector<std::pair<std::string, std::string>>"))))
(lcp:pop-namespace) ;; raft

View File

@ -1,30 +0,0 @@
/// @file
#pragma once
#include <mutex>
#include "storage/common/types/property_value_store.hpp"
#include "transactions/engine.hpp"
#include "transactions/snapshot.hpp"
namespace storage::constraints::common {
template <typename TConstraints>
void UniqueConstraintRefresh(const tx::Snapshot &snapshot,
const tx::Engine &engine,
TConstraints &constraints, std::mutex &lock) {
std::lock_guard<std::mutex> guard(lock);
for (auto &constraint : constraints) {
for (auto p = constraint.version_pairs.begin();
p != constraint.version_pairs.end(); ++p) {
auto exp_id = p->record.tx_id_exp;
auto cre_id = p->record.tx_id_cre;
if ((exp_id != 0 && exp_id < snapshot.back() &&
engine.Info(exp_id).is_committed() && !snapshot.contains(exp_id)) ||
(cre_id < snapshot.back() && engine.Info(cre_id).is_aborted())) {
constraint.version_pairs.erase(p);
}
}
}
}
} // namespace storage::constraints::common

View File

@ -1,19 +0,0 @@
/// @file
#pragma once
#include "utils/exceptions.hpp"
namespace storage::constraints {
/// Thrown when a violation of a constraint occurs.
class ViolationException : public utils::BasicException {
using utils::BasicException::BasicException;
};
/// Thrown when multiple transactions alter the same constraint.
class SerializationException : public utils::BasicException {
using utils::BasicException::BasicException;
};
} // namespace database

View File

@ -1,66 +0,0 @@
#include "storage/common/constraints/record.hpp"
#include "storage/common/constraints/exceptions.hpp"
#include "storage/common/mvcc/exceptions.hpp"
#include "transactions/engine.hpp"
#include "transactions/transaction.hpp"
namespace storage::constraints::impl {
Record::Record(storage::Gid gid, const tx::Transaction &t)
: curr_gid(gid), tx_id_cre(t.id_) {}
void Record::Insert(storage::Gid gid, const tx::Transaction &t) {
// Insert
// - delete before or in this transaction and not aborted
// - insert before and aborted
// Throw SerializationException
// - delted of inserted after this transaction
// Throw ViolationException
// - insert before or in this transaction and not aborted
// - delete before and aborted
t.TakeLock(lock_);
if (t.id_ < tx_id_cre || (tx_id_exp != 0 && t.id_ < tx_id_exp)) {
throw SerializationException(
"Node couldn't be updated due to unique constraint serialization "
"error!");
}
bool has_entry = tx_id_exp == 0;
bool is_aborted = has_entry ? t.engine_.Info(tx_id_cre).is_aborted()
: t.engine_.Info(tx_id_exp).is_aborted();
if ((has_entry && !is_aborted) || (!has_entry && is_aborted)) {
throw ViolationException(
"Node couldn't be updated due to unique constraint violation!");
}
curr_gid = gid;
tx_id_cre = t.id_;
tx_id_exp = 0;
}
void Record::Remove(storage::Gid gid, const tx::Transaction &t) {
// Remove
// - insert before or in this transaction and not aborted
// - remove before and aborted
// Nothing
// - remove before or in this transaction and not aborted
// - insert before and aborted
// Throw SerializationException
// - delete or insert after this transaction
t.TakeLock(lock_);
DCHECK(gid == curr_gid);
if (t.id_ < tx_id_cre || (tx_id_exp != 0 && t.id_ < tx_id_exp))
throw mvcc::SerializationError();
bool has_entry = tx_id_exp == 0;
bool is_aborted = has_entry ? t.engine_.Info(tx_id_cre).is_aborted()
: t.engine_.Info(tx_id_exp).is_aborted();
if ((!has_entry && !is_aborted) || (has_entry && is_aborted)) return;
tx_id_exp = t.id_;
}
} // namespace storage::constraints::impl

View File

@ -1,25 +0,0 @@
/// @file
#pragma once
#include "storage/common/locking/record_lock.hpp"
#include "storage/common/types/types.hpp"
#include "transactions/type.hpp"
namespace tx {
class Transaction;
} // namespace tx
namespace storage::constraints::impl {
/// Contains records of creation and deletion of entry in a constraint.
struct Record {
Record(storage::Gid gid, const tx::Transaction &t);
void Insert(storage::Gid gid, const tx::Transaction &t);
void Remove(storage::Gid gid, const tx::Transaction &t);
storage::Gid curr_gid;
tx::TransactionId tx_id_cre;
tx::TransactionId tx_id_exp{0};
RecordLock lock_;
};
} // namespace storage::constraints::impl

View File

@ -1,258 +0,0 @@
#include "storage/common/constraints/unique_constraints.hpp"
#include <algorithm>
#include "storage/vertex_accessor.hpp"
namespace storage::constraints {
namespace {
auto FindIn(storage::Label label,
const std::vector<storage::Property> &properties,
const std::list<impl::LabelPropertiesEntry> &constraints) {
return std::find_if(
constraints.begin(), constraints.end(), [label, properties](auto &c) {
return c.label == label &&
std::is_permutation(properties.begin(), properties.end(),
c.properties.begin(), c.properties.end());
});
}
} // anonymous namespace
bool UniqueConstraints::AddConstraint(const ConstraintEntry &entry) {
auto constraint = FindIn(entry.label, entry.properties, constraints_);
if (constraint == constraints_.end()) {
constraints_.emplace_back(entry.label, entry.properties);
return true;
}
return false;
}
bool UniqueConstraints::RemoveConstraint(const ConstraintEntry &entry) {
auto constraint = FindIn(entry.label, entry.properties, constraints_);
if (constraint != constraints_.end()) {
constraints_.erase(constraint);
return true;
}
return false;
}
bool UniqueConstraints::Exists(
storage::Label label,
const std::vector<storage::Property> &properties) const {
return FindIn(label, properties, constraints_) != constraints_.end();
}
std::vector<ConstraintEntry> UniqueConstraints::ListConstraints() const {
std::vector<ConstraintEntry> constraints(constraints_.size());
std::transform(constraints_.begin(), constraints_.end(), constraints.begin(),
[](auto &c) {
return ConstraintEntry{c.label, c.properties};
});
return constraints;
}
void UniqueConstraints::Update(const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t) {
auto &vertex = accessor.current();
std::lock_guard<std::mutex> guard(lock_);
for (auto &constraint : constraints_) {
if (!utils::Contains(vertex.labels_, constraint.label)) continue;
std::vector<PropertyValue> values;
for (auto p : constraint.properties) {
auto value = vertex.properties_.at(p);
if (value.IsNull()) break;
values.emplace_back(value);
}
if (values.size() != constraint.properties.size()) continue;
auto entry = std::find_if(constraint.version_pairs.begin(),
constraint.version_pairs.end(),
[values](const impl::LabelPropertyPair &p) {
return p.values == values;
});
if (entry != constraint.version_pairs.end()) {
entry->record.Insert(accessor.gid(), t);
} else {
constraint.version_pairs.emplace_back(accessor.gid(), values, t);
}
}
}
void UniqueConstraints::UpdateOnAddLabel(storage::Label label,
const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t) {
auto &vertex = accessor.current();
std::lock_guard<std::mutex> guard(lock_);
for (auto &constraint : constraints_) {
if (constraint.label != label) continue;
std::vector<PropertyValue> values;
for (auto p : constraint.properties) {
auto value = vertex.properties_.at(p);
if (value.IsNull()) break;
values.emplace_back(value);
}
if (values.size() != constraint.properties.size()) continue;
auto entry = std::find_if(constraint.version_pairs.begin(),
constraint.version_pairs.end(),
[values](const impl::LabelPropertyPair &p) {
return p.values == values;
});
if (entry != constraint.version_pairs.end()) {
entry->record.Insert(accessor.gid(), t);
} else {
constraint.version_pairs.emplace_back(accessor.gid(), values, t);
}
}
}
void UniqueConstraints::UpdateOnRemoveLabel(
storage::Label label, const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t) {
auto &vertex = accessor.current();
std::lock_guard<std::mutex> guard(lock_);
for (auto &constraint : constraints_) {
if (constraint.label != label) continue;
std::vector<PropertyValue> values;
for (auto p : constraint.properties) {
auto value = vertex.properties_.at(p);
if (value.IsNull()) break;
values.emplace_back(value);
}
if (values.size() != constraint.properties.size()) continue;
auto entry = std::find_if(constraint.version_pairs.begin(),
constraint.version_pairs.end(),
[values](const impl::LabelPropertyPair &p) {
return p.values == values;
});
if (entry != constraint.version_pairs.end())
entry->record.Remove(accessor.gid(), t);
}
}
void UniqueConstraints::UpdateOnAddProperty(
storage::Property property, const PropertyValue &previous_value,
const PropertyValue &new_value, const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t) {
auto &vertex = accessor.current();
std::lock_guard<std::mutex> guard(lock_);
for (auto &constraint : constraints_) {
if (!utils::Contains(vertex.labels_, constraint.label)) continue;
if (!utils::Contains(constraint.properties, property)) continue;
std::vector<PropertyValue> old_values;
std::vector<PropertyValue> new_values;
for (auto p : constraint.properties) {
auto value = vertex.properties_.at(p);
if (p == property) {
if (!previous_value.IsNull()) old_values.emplace_back(previous_value);
if (!new_value.IsNull()) new_values.emplace_back(new_value);
} else {
if (value.IsNull()) break;
old_values.emplace_back(value);
new_values.emplace_back(value);
}
}
// First we need to remove the old entry if there was one.
if (old_values.size() == constraint.properties.size()) {
auto entry = std::find_if(constraint.version_pairs.begin(),
constraint.version_pairs.end(),
[old_values](const impl::LabelPropertyPair &p) {
return p.values == old_values;
});
if (entry != constraint.version_pairs.end())
entry->record.Remove(accessor.gid(), t);
}
if (new_values.size() != constraint.properties.size()) continue;
auto entry = std::find_if(constraint.version_pairs.begin(),
constraint.version_pairs.end(),
[new_values](const impl::LabelPropertyPair &p) {
return p.values == new_values;
});
if (entry != constraint.version_pairs.end()) {
entry->record.Insert(accessor.gid(), t);
} else {
constraint.version_pairs.emplace_back(accessor.gid(), new_values, t);
}
}
}
void UniqueConstraints::UpdateOnRemoveProperty(
storage::Property property, const PropertyValue &previous_value,
const RecordAccessor<Vertex> &accessor, const tx::Transaction &t) {
auto &vertex = accessor.current();
std::lock_guard<std::mutex> guard(lock_);
for (auto &constraint : constraints_) {
if (!utils::Contains(vertex.labels_, constraint.label)) continue;
if (!utils::Contains(constraint.properties, property)) continue;
std::vector<PropertyValue> values;
for (auto p : constraint.properties) {
auto value = vertex.properties_.at(p);
if (p == property) {
values.emplace_back(previous_value);
} else {
if (value.IsNull()) break;
values.emplace_back(value);
}
}
if (values.size() != constraint.properties.size()) continue;
auto entry = std::find_if(constraint.version_pairs.begin(),
constraint.version_pairs.end(),
[values](const impl::LabelPropertyPair &p) {
return p.values == values;
});
if (entry != constraint.version_pairs.end()) {
entry->record.Remove(accessor.gid(), t);
}
}
}
void UniqueConstraints::UpdateOnRemoveVertex(
const RecordAccessor<Vertex> &accessor, const tx::Transaction &t) {
auto &vertex = accessor.current();
std::lock_guard<std::mutex> guard(lock_);
for (auto &constraint : constraints_) {
if (!utils::Contains(vertex.labels_, constraint.label)) continue;
std::vector<PropertyValue> values;
for (auto p : constraint.properties) {
auto value = vertex.properties_.at(p);
if (value.IsNull()) break;
values.emplace_back(value);
}
if (values.size() != constraint.properties.size()) continue;
auto entry = std::find_if(constraint.version_pairs.begin(),
constraint.version_pairs.end(),
[values](const impl::LabelPropertyPair &p) {
return p.values == values;
});
if (entry != constraint.version_pairs.end()) {
entry->record.Remove(accessor.gid(), t);
}
}
}
void UniqueConstraints::Refresh(const tx::Snapshot &snapshot,
const tx::Engine &engine) {
std::lock_guard<std::mutex> guard(lock_);
for (auto &constraint : constraints_) {
for (auto p = constraint.version_pairs.begin();
p != constraint.version_pairs.end();) {
auto exp_id = p->record.tx_id_exp;
auto cre_id = p->record.tx_id_cre;
if ((exp_id != 0 && exp_id < snapshot.back() &&
engine.Info(exp_id).is_committed() && !snapshot.contains(exp_id)) ||
(cre_id < snapshot.back() && engine.Info(cre_id).is_aborted())) {
p = constraint.version_pairs.erase(p);
} else {
++p;
}
}
}
}
} // namespace storage::constraints

View File

@ -1,166 +0,0 @@
/// @file
#pragma once
#include <list>
#include <mutex>
#include "storage/common/types/property_value.hpp"
#include "storage/common/types/types.hpp"
#include "storage/common/constraints/record.hpp"
namespace tx {
class Snapshot;
}; // namespace tx
class Vertex;
template <typename TRecord>
class RecordAccessor;
namespace storage::constraints {
namespace impl {
struct LabelPropertyPair {
LabelPropertyPair(storage::Gid gid, const std::vector<PropertyValue> &v,
const tx::Transaction &t)
: values(v), record(gid, t) {}
std::vector<PropertyValue> values;
Record record;
};
struct LabelPropertiesEntry {
LabelPropertiesEntry(storage::Label l,
const std::vector<storage::Property> &p)
: label(l), properties(p) {}
storage::Label label;
std::vector<storage::Property> properties;
std::list<LabelPropertyPair> version_pairs;
};
} // namespace impl
struct ConstraintEntry {
// This struct is used by ListConstraints method in order to avoid using
// std::pair or something like that.
storage::Label label;
std::vector<storage::Property> properties;
};
/// UniqueConstraints contains all unique constraints defined by both label and
/// a set of properties. To create or delete unique constraint, caller must
/// ensure that there are no other transactions running in parallel.
/// Additionally, for adding unique constraint caller must first call
/// AddConstraint to create unique constraint and then call Update for every
/// existing Vertex. If there is a unique constraint violation, the caller must
/// manually handle that by catching exceptions and calling RemoveConstraint
/// method. This is needed to ensure logical correctness of transactions. Once
/// created, client uses UpdateOn* methods to notify UniqueConstraint about
/// changes. In case of violation UpdateOn* methods throw
/// ConstraintViolationException exception. Methods can also throw
/// SerializationError. This class is thread safe.
class UniqueConstraints {
public:
UniqueConstraints() = default;
UniqueConstraints(const UniqueConstraints &) = delete;
UniqueConstraints(UniqueConstraints &&) = delete;
UniqueConstraints &operator=(const UniqueConstraints &) = delete;
UniqueConstraints &operator=(UniqueConstraints &&) = delete;
~UniqueConstraints() = default;
/// Add new unique constraint, if constraint already exists this method does
/// nothing. This method doesn't check if any of the existing vertices breaks
/// this constraint. Caller must do that instead. Caller must also ensure that
/// no other transaction is running in parallel.
///
/// @return true if the constraint doesn't exists and was added.
bool AddConstraint(const ConstraintEntry &entry);
/// Removes existing unique constraint, if the constraint doesn't exist this
/// method does nothing. Caller must ensure that no other transaction is
/// running in parallel.
///
/// @return true if the constraint existed and was removed.
bool RemoveConstraint(const ConstraintEntry &entry);
/// Checks whether given unique constraint is visible.
bool Exists(storage::Label label,
const std::vector<storage::Property> &properties) const;
/// Returns list of unique constraints.
std::vector<ConstraintEntry> ListConstraints() const;
/// Updates unique constraint versions when adding new constraint rule.
///
/// @throws ConstraintViolationException
/// @throws SerializationError
void Update(const RecordAccessor<Vertex> &accessor, const tx::Transaction &t);
/// Updates unique constraint versions when adding label.
/// @param label - label that was added
/// @param accessor - accessor that was updated
/// @param t - current transaction
///
/// @throws ConstraintViolationException
/// @throws SerializationError
void UpdateOnAddLabel(storage::Label label,
const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t);
/// Updates unique constraint versions when removing label.
/// @param label - label that was removed
/// @param accessor - accessor that was updated
/// @param t - current transaction
///
/// @throws SerializationError
void UpdateOnRemoveLabel(storage::Label label,
const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t);
/// Updates unique constraint versions when adding property.
/// @param property - property that was added
/// @param previous_value - previous value of the property
/// @param new_value - new value of the property
/// @param accessor - accessor that was updated
/// @param t - current transaction
///
/// @throws ConstraintViolationException
/// @throws SerializationError
void UpdateOnAddProperty(storage::Property property,
const PropertyValue &previous_value,
const PropertyValue &new_value,
const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t);
/// Updates unique constraint versions when removing property.
/// @param property - property that was removed
/// @param previous_value - previous value of the property
/// @param accessor - accessor that was updated
/// @param t - current transaction
///
/// @throws SerializationError
void UpdateOnRemoveProperty(storage::Property property,
const PropertyValue &previous_value,
const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t);
/// Updates unique constraint versions when removing a vertex.
/// @param accessor - accessor that was updated
/// @param t - current transaction
///
/// @throws SerializationError
void UpdateOnRemoveVertex(const RecordAccessor<Vertex> &accessor,
const tx::Transaction &t);
/// Removes records that are no longer visible.
/// @param snapshot - the GC snapshot.
/// @param engine - current transaction engine.
void Refresh(const tx::Snapshot &snapshot, const tx::Engine &engine);
private:
std::mutex lock_;
std::list<impl::LabelPropertiesEntry> constraints_;
};
} // namespace storage::constraints

View File

@ -1,183 +0,0 @@
#pragma once
#include "cppitertools/filter.hpp"
#include "cppitertools/imap.hpp"
#include "cppitertools/takewhile.hpp"
#include "glog/logging.h"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "data_structures/concurrent/skiplist.hpp"
#include "transactions/transaction.hpp"
namespace database::index {
/**
* @brief - Wrap beginning iterator to iterable object. This provides us with
* begin and end iterator, and allows us to iterate from the iterator given in
* constructor till the end of the collection over which we are really
* iterating, i.e. it allows us to iterate over the suffix of some skiplist
* hence the name SkipListSuffix.
*/
template <class TIterator, class TValue, typename TAccessor>
class SkipListSuffix {
public:
class Iterator {
public:
explicit Iterator(TIterator current) : current_(current) {}
TValue &operator*() { return *current_; }
bool operator!=(Iterator other) const {
return this->current_ != other.current_;
}
Iterator &operator++() {
++current_;
return *this;
}
private:
TIterator current_;
};
explicit SkipListSuffix(
const TIterator begin,
typename SkipList<TValue>::template Accessor<TAccessor> &&accessor)
: begin_(begin), accessor_(std::move(accessor)) {}
Iterator begin() const { return Iterator(begin_); }
Iterator end() { return Iterator(accessor_.end()); }
TIterator begin_;
typename SkipList<TValue>::template Accessor<TAccessor> accessor_;
};
/**
* @brief - Get all inserted vlists in TKey specific storage which
* still return true for the 'exists' function.
* @param skiplist_accessor - accessor used to get begin iterator, and that
* should be used to get end iterator as well.
* @param begin - starting iterator for vlist iteration.
* @param predicate - function which checks if TIndexEntry has a value that we
* are looking for
* @param t - current transaction, which determines visibility.
* @param exists - method which determines visibility of entry and version
* (record) of the underlying objects (vertex/edge)
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @Tparam TIndexEntry - index entry inside skiplist
* @Tparam TRecord - type of record under index (edge/vertex usually.)
* @Tparam TAccessor - type of accessor to use (const skiplist/non const
* skiplist).
* @return iterable collection of distinct vlist records<TRecord> for which
* exists function evaluates as true
*/
template <class TIterator, class TIndexEntry, class TRecord, typename TAccessor>
static auto GetVlists(
typename SkipList<TIndexEntry>::template Accessor<TAccessor>
&&skiplist_accessor,
TIterator begin,
const std::function<bool(const TIndexEntry &entry)> &predicate,
const tx::Transaction &t,
const std::function<bool(const TIndexEntry &, const TRecord *)> &exists,
bool current_state = false) {
TIndexEntry *prev = nullptr;
auto range = iter::takewhile(
predicate, SkipListSuffix<TIterator, TIndexEntry, TAccessor>(
begin, std::move(skiplist_accessor)));
auto filtered = iter::filter(
[&t, exists, prev, current_state](TIndexEntry &entry) mutable {
// Check if the current entry could offer new possible return value
// with respect to the previous entry we evaluated.
// We do this to guarantee uniqueness, and also as an optimization to
// avoid checking same vlist twice when we can.
if (prev && entry.IsAlreadyChecked(*prev)) return false;
prev = &entry;
// TODO when refactoring MVCC reconsider the return-value-arg idiom
// here
TRecord *old_record, *new_record;
entry.vlist_->find_set_old_new(t, &old_record, &new_record);
// filtering out records not visible to the current
// transaction+command
// taking into account the current_state flag
bool visible =
(old_record && !(current_state && old_record->is_expired_by(t))) ||
(current_state && new_record && !new_record->is_expired_by(t));
if (!visible) return false;
// if current_state is true and we have the new record, then that's
// the reference value, and that needs to be compared with the index
// predicate
return (current_state && new_record) ? exists(entry, new_record)
: exists(entry, old_record);
},
std::move(range));
return iter::imap([](auto entry) { return entry.vlist_; },
std::move(filtered));
}
/**
* @brief - Removes from the index all entries for which records don't contain
* the given label/edge type/label + property anymore. Also update (remove)
* all records which are not visible for any transaction in the given
* 'snapshot'. This method assumes that the MVCC GC has been run with the
* same 'snapshot'.
*
* @param indices - map of index entries (TIndexKey, skiplist<TIndexEntry>)
* @param snapshot - the GC snapshot. Consists of the oldest active
* transaction's snapshot, with that transaction's id appened as last.
* @param engine - transaction engine to see which records are commited
* @param exists - function which checks 'key' and 'entry' if the entry still
* contains required properties (key + optional value (in case of label_property
* index))
* @Tparam Tkey - index key
* @Tparam TIndexEntry - index entry inside skiplist
* @Tparam TRecord - type of record under index (edge/vertex usually.)
*/
template <class TKey, class TIndexEntry, class TRecord>
static void Refresh(
ConcurrentMap<TKey, std::unique_ptr<SkipList<TIndexEntry>>> &indices,
const tx::Snapshot &snapshot, tx::Engine &engine,
const std::function<bool(const TKey &, const TIndexEntry &)> &exists) {
// iterate over all the indices
for (auto &key_indices_pair : indices.access()) {
// iterate over index entries
auto indices_entries_accessor = key_indices_pair.second->access();
for (auto indices_entry : indices_entries_accessor) {
if (indices_entry.record_->is_not_visible_from(snapshot, engine)) {
// be careful when deleting the record which is not visible anymore.
// it's newer copy could be visible, and might still logically belong to
// index (it satisfies the `exists` function). that's why we can't just
// remove the index entry, but also re-insert the oldest visible record
// to the index. if that record does not satisfy `exists`, it will be
// cleaned up in the next Refresh first insert and then remove,
// otherwise there is a timeframe during which the record is not present
// in the index
auto new_record = indices_entry.vlist_->Oldest();
if (new_record != nullptr)
indices_entries_accessor.insert(
TIndexEntry(indices_entry, new_record));
[[gnu::unused]] auto success =
indices_entries_accessor.remove(indices_entry);
DCHECK(success) << "Unable to delete entry.";
}
// if the record is still visible,
// check if it satisfies the `exists` function. if not
// it does not belong in index anymore.
// be careful when using the `exists` function
// because it's creator transaction could still be modifying it,
// and modify+read is not thread-safe. for that reason we need to
// first see if the the transaction that created it has ended
// (tx().cre < oldest active trancsation).
else if (indices_entry.record_->tx().cre < snapshot.back() &&
!exists(key_indices_pair.first, indices_entry)) {
indices_entries_accessor.remove(indices_entry);
}
}
}
}
}; // namespace database::index

View File

@ -1,3 +0,0 @@
#pragma once
enum class LockStatus { Acquired, AlreadyHeld };

View File

@ -1,131 +0,0 @@
#include "storage/common/locking/record_lock.hpp"
#include <fmt/format.h>
#include <glog/logging.h>
#include <optional>
#include <stack>
#include <unordered_set>
#include "transactions/engine.hpp"
#include "utils/on_scope_exit.hpp"
#include "utils/thread/sync.hpp"
#include "utils/timer.hpp"
namespace {
// Finds lock cycle that start transaction is a part of and returns id of oldest
// transaction in that cycle. If start transaction is not in a cycle nullopt is
// returned.
template <typename TAccessor>
std::optional<tx::TransactionId> FindOldestTxInLockCycle(
tx::TransactionId start, TAccessor &graph_accessor) {
std::vector<tx::TransactionId> path;
std::unordered_set<tx::TransactionId> visited;
auto current = start;
do {
visited.insert(current);
path.push_back(current);
auto it = graph_accessor.find(current);
if (it == graph_accessor.end()) return std::nullopt;
current = it->second;
} while (visited.find(current) == visited.end());
if (current == start) {
// start is a part of the cycle, return oldest transaction.
CHECK(path.size() >= 2U) << "Cycle must have at least two nodes";
return *std::min(path.begin(), path.end());
}
// There is a cycle, but start is not a part of it. Some transaction that is
// in a cycle will find it and abort oldest transaction.
return std::nullopt;
}
} // namespace
bool RecordLock::TryLock(tx::TransactionId tx_id) {
tx::TransactionId unlocked{0};
return owner_.compare_exchange_strong(unlocked, tx_id);
}
LockStatus RecordLock::Lock(const tx::Transaction &tx, tx::Engine &engine) {
if (TryLock(tx.id_)) {
return LockStatus::Acquired;
}
tx::TransactionId owner = owner_;
if (owner_ == tx.id_) return LockStatus::AlreadyHeld;
// In a distributed worker the transaction objects (and the locks they own)
// are not destructed at the same time like on the master. Consequently a lock
// might be active for a dead transaction. By asking the transaction engine
// for transaction info, we'll make the worker refresh it's knowledge about
// live transactions and release obsolete locks.
if (owner == 0 || !engine.Info(owner).is_active()) {
if (TryLock(tx.id_)) {
return LockStatus::Acquired;
}
}
// Insert edge into local lock_graph.
auto accessor = engine.local_lock_graph().access();
auto it = accessor.insert(tx.id_, owner).first;
auto abort_oldest_tx_in_lock_cycle = [&tx, &accessor, &engine]() {
// Find oldest transaction in lock cycle if cycle exists and notify that
// transaction that it should abort.
// TODO: maybe we can be smarter and abort some other transaction and not
// the oldest one.
auto oldest = FindOldestTxInLockCycle(tx.id_, accessor);
if (oldest) {
engine.LocalForEachActiveTransaction([&](tx::Transaction &t) {
if (t.id_ == oldest) {
t.set_should_abort();
}
});
}
};
abort_oldest_tx_in_lock_cycle();
// Make sure to erase edge on function exit. Either function will throw and
// transaction will be killed so we should erase the edge because transaction
// won't exist anymore or owner_ will finish and we will be able to acquire
// the lock.
utils::OnScopeExit cleanup{[&tx, &accessor] { accessor.remove(tx.id_); }};
utils::Timer t;
while (t.Elapsed() < kTimeout) {
if (tx.should_abort()) {
// Message could be incorrect. Transaction could be aborted because it was
// running for too long time, but that is unlikely and it is not very
// important which exception (and message) we throw here.
throw utils::LockTimeoutException(
"Transaction was aborted since it was oldest in a lock cycle");
}
if (TryLock(tx.id_)) {
return LockStatus::Acquired;
}
if (owner != owner_) {
// Owner changed while we were spinlocking. Update the edge and rerun
// cycle resolution routine.
// TODO: we should make sure that first transaction that tries to acquire
// already held lock succeeds in acquiring the lock once transaction that
// was lock owner finishes. That would probably reduce number of aborted
// transactions.
owner = owner_;
it->second = owner;
abort_oldest_tx_in_lock_cycle();
}
utils::CpuRelax();
}
throw utils::LockTimeoutException(fmt::format(
"Transaction locked for more than {} seconds", kTimeout.count()));
}
void RecordLock::Unlock() { owner_ = 0; }
constexpr std::chrono::duration<double> RecordLock::kTimeout;

View File

@ -1,30 +0,0 @@
#pragma once
#include <atomic>
#include <chrono>
#include <unordered_set>
#include "storage/common/locking/lock_status.hpp"
#include "transactions/type.hpp"
namespace tx {
class Engine;
class Transaction;
}; // namespace tx
class RecordLock {
public:
/// @throw utils::LockTimeoutException
LockStatus Lock(const tx::Transaction &id, tx::Engine &engine);
void Unlock();
private:
bool TryLock(tx::TransactionId tx_id);
// Arbitrary choosen constant, postgresql uses 1 second so do we.
constexpr static std::chrono::duration<double> kTimeout{
std::chrono::seconds(1)};
std::atomic<tx::TransactionId> owner_{0};
};

View File

@ -1,15 +0,0 @@
#pragma once
#include "utils/exceptions.hpp"
namespace mvcc {
class SerializationError : public utils::BasicException {
static constexpr const char *default_message =
"Can't serialize due to concurrent operations.";
public:
using utils::BasicException::BasicException;
SerializationError() : BasicException(default_message) {}
};
} // namespace mvcc

View File

@ -1,42 +0,0 @@
#pragma once
#include <atomic>
namespace mvcc {
template <class T>
class Version {
public:
Version() = default;
explicit Version(T *older) : older_(older) {}
// this must also destroy all the older versions
virtual ~Version() {
auto curr = next();
while (curr != nullptr) {
auto next = curr->next();
// remove link to older version to avoid recursion
curr->older_.store(nullptr);
delete curr;
curr = next;
}
}
// return a pointer to an older version stored in this record
T *next(std::memory_order order = std::memory_order_seq_cst) {
return older_.load(order);
}
const T *next(std::memory_order order = std::memory_order_seq_cst) const {
return older_.load(order);
}
// set the older version of this record
void next(T *value, std::memory_order order = std::memory_order_seq_cst) {
older_.store(value, order);
}
private:
std::atomic<T *> older_{nullptr};
};
} // namespace mvcc

View File

@ -1,49 +0,0 @@
#pragma once
#include <cstdint>
#include <cstring>
#include <string>
#include <vector>
namespace storage {
/**
* Buffer used for serialization of disk properties. The buffer
* implements a template parameter Buffer interface from BaseEncoder
* and Decoder classes for bolt serialization.
*/
class PODBuffer {
public:
PODBuffer() = default;
explicit PODBuffer(const std::string &s) {
buffer = std::vector<uint8_t>{s.begin(), s.end()};
}
/**
* Writes data to buffer
*
* @param data - Pointer to data to be written.
* @param len - Data length.
*/
void Write(const uint8_t *data, size_t len) {
for (size_t i = 0; i < len; ++i) buffer.push_back(data[i]);
}
/**
* Reads raw data from buffer.
*
* @param data - pointer to where data should be stored.
* @param len - data length
* @return - True if successful, False otherwise.
*/
bool Read(uint8_t *data, size_t len) {
if (len > buffer.size()) return false;
memcpy(data, buffer.data(), len);
buffer.erase(buffer.begin(), buffer.begin() + len);
return true;
}
std::vector<uint8_t> buffer;
};
} // namespace storage

View File

@ -1,6 +0,0 @@
#pragma once
#include "storage/v2/property_value.hpp"
using storage::PropertyValue;
using storage::PropertyValueException;

View File

@ -1,240 +0,0 @@
#include "storage/common/types/property_value_store.hpp"
#include <filesystem>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "communication/bolt/v1/decoder/decoder.hpp"
#include "communication/bolt/v1/encoder/base_encoder.hpp"
#include "glue/communication.hpp"
#include "storage/common/pod_buffer.hpp"
namespace fs = std::filesystem;
using namespace communication::bolt;
const std::string kDiskKeySeparator = "_";
std::atomic<uint64_t> PropertyValueStore::global_key_cnt_ = {0};
// properties on disk are stored in a directory named properties within the
// durability directory
DECLARE_string(durability_directory);
DECLARE_string(properties_on_disk);
std::string DiskKeyPrefix(const std::string &version_key) {
return version_key + kDiskKeySeparator;
}
std::string DiskKey(const std::string &version_key,
const std::string &property_id) {
return DiskKeyPrefix(version_key) + property_id;
}
PropertyValueStore::PropertyValueStore(const PropertyValueStore &old)
: props_(old.props_) {
// We need to update disk key and disk key counter when calling a copy
// constructor due to mvcc.
if (!FLAGS_properties_on_disk.empty()) {
version_key_ = global_key_cnt_++;
kvstore::KVStore::iterator old_disk_it(
&DiskStorage(), DiskKeyPrefix(std::to_string(old.version_key_)));
iterator it(&old, old.props_.end(), std::move(old_disk_it));
while (it != old.end()) {
this->set(it->first, it->second);
++it;
}
}
}
PropertyValueStore::~PropertyValueStore() {
if (!FLAGS_properties_on_disk.empty()) {
DiskStorage().DeletePrefix(DiskKeyPrefix(std::to_string(version_key_)));
}
}
PropertyValue PropertyValueStore::at(const Property &key) const {
auto GetValue = [&key](const auto &props) {
for (const auto &kv : props)
if (kv.first == key) return kv.second;
return PropertyValue();
};
if (key.Location() == Location::Memory) return GetValue(props_);
CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from "
"disk storage with properties on "
"disk disabled!";
std::string disk_key =
DiskKey(std::to_string(version_key_), std::to_string(key.Id()));
auto serialized_prop = DiskStorage().Get(disk_key);
if (serialized_prop) return DeserializeProp(serialized_prop.value());
return PropertyValue();
}
void PropertyValueStore::set(const Property &key, const char *value) {
set(key, PropertyValue(value));
}
void PropertyValueStore::set(const Property &key, const PropertyValue &value) {
if (value.type() == PropertyValue::Type::Null) {
erase(key);
return;
}
auto SetValue = [&key, &value](auto &props) {
for (auto &kv : props)
if (kv.first == key) {
kv.second = value;
return;
}
props.emplace_back(key, value);
};
if (key.Location() == Location::Memory) {
SetValue(props_);
} else {
CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from "
"disk storage with properties "
"on disk disabled!";
std::string disk_key =
DiskKey(std::to_string(version_key_), std::to_string(key.Id()));
DiskStorage().Put(disk_key, SerializeProp(value));
}
}
bool PropertyValueStore::erase(const Property &key) {
auto EraseKey = [&key](auto &props) {
auto found = std::find_if(props.begin(), props.end(),
[&key](std::pair<Property, PropertyValue> &kv) {
return kv.first == key;
});
if (found != props.end()) props.erase(found);
return true;
};
if (key.Location() == Location::Memory) return EraseKey(props_);
CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from "
"disk storage with properties on "
"disk disabled!";
std::string disk_key =
DiskKey(std::to_string(version_key_), std::to_string(key.Id()));
return DiskStorage().Delete(disk_key);
}
void PropertyValueStore::clear() {
props_.clear();
if (!FLAGS_properties_on_disk.empty()) {
DiskStorage().DeletePrefix(DiskKeyPrefix(std::to_string(version_key_)));
}
}
kvstore::KVStore &PropertyValueStore::DiskStorage() const {
static auto disk_storage = ConstructDiskStorage();
return disk_storage;
}
PropertyValueStore::iterator::iterator(
const PropertyValueStore *pvs,
std::vector<std::pair<Property, PropertyValue>>::const_iterator memory_it)
: pvs_(pvs), memory_it_(memory_it) {}
PropertyValueStore::iterator::iterator(
const PropertyValueStore *pvs,
std::vector<std::pair<Property, PropertyValue>>::const_iterator memory_it,
kvstore::KVStore::iterator disk_it)
: pvs_(pvs), memory_it_(memory_it), disk_it_(std::move(disk_it)) {}
PropertyValueStore::iterator &PropertyValueStore::iterator::operator++() {
if (memory_it_ != pvs_->props_.end()) {
++memory_it_;
} else if (disk_it_) {
++(*disk_it_);
}
return *this;
}
bool PropertyValueStore::iterator::operator==(const iterator &other) const {
return pvs_ == other.pvs_ && memory_it_ == other.memory_it_ &&
disk_it_ == other.disk_it_;
}
bool PropertyValueStore::iterator::operator!=(const iterator &other) const {
return !(*this == other);
}
PropertyValueStore::iterator::reference PropertyValueStore::iterator::operator
*() {
if (memory_it_ != pvs_->props_.end() || !disk_it_) return *memory_it_;
std::pair<std::string, std::string> kv = *(*disk_it_);
std::string prop_id = kv.first.substr(kv.first.find(kDiskKeySeparator) + 1);
disk_prop_ = {Property(std::stoi(prop_id), Location::Disk),
pvs_->DeserializeProp(kv.second)};
return disk_prop_.value();
}
PropertyValueStore::iterator::pointer PropertyValueStore::iterator::
operator->() {
return &**this;
}
size_t PropertyValueStore::size() const {
if (FLAGS_properties_on_disk.empty()) {
return props_.size();
} else {
return props_.size() +
DiskStorage().Size(DiskKeyPrefix(std::to_string(version_key_)));
}
}
PropertyValueStore::iterator PropertyValueStore::begin() const {
if (FLAGS_properties_on_disk.empty()) {
return iterator(this, props_.begin());
} else {
return iterator(
this, props_.begin(),
DiskStorage().begin(DiskKeyPrefix(std::to_string(version_key_))));
}
}
PropertyValueStore::iterator PropertyValueStore::end() const {
if (FLAGS_properties_on_disk.empty()) {
return iterator(this, props_.end());
} else {
return iterator(
this, props_.end(),
DiskStorage().end(DiskKeyPrefix(std::to_string(version_key_))));
}
}
std::string PropertyValueStore::SerializeProp(const PropertyValue &prop) const {
storage::PODBuffer pod_buffer;
BaseEncoder<storage::PODBuffer> encoder{pod_buffer};
encoder.WriteValue(glue::ToBoltValue(prop));
return std::string(reinterpret_cast<char *>(pod_buffer.buffer.data()),
pod_buffer.buffer.size());
}
PropertyValue PropertyValueStore::DeserializeProp(
const std::string &serialized_prop) const {
storage::PODBuffer pod_buffer{serialized_prop};
communication::bolt::Decoder<storage::PODBuffer> decoder{pod_buffer};
Value dv;
if (!decoder.ReadValue(&dv)) {
DLOG(WARNING) << "Unable to read property value";
return PropertyValue();
}
return glue::ToPropertyValue(dv);
}
kvstore::KVStore PropertyValueStore::ConstructDiskStorage() const {
auto storage_path = fs::path() / FLAGS_durability_directory / "properties";
if (fs::exists(storage_path)) fs::remove_all(storage_path);
return kvstore::KVStore(storage_path);
}

View File

@ -1,164 +0,0 @@
#pragma once
#include <atomic>
#include <optional>
#include <string>
#include <vector>
#include "kvstore/kvstore.hpp"
#include "storage/common/types/property_value.hpp"
#include "storage/common/types/types.hpp"
/**
* A collection of properties accessed in a map-like way using a key of type
* Storage::Property.
*
* PropertyValueStore handles storage on disk or in memory. Property key defines
* where the corresponding property should be stored. Each instance of
* PropertyValueStore contains a version_key_ member which specifies where on
* disk should the properties be stored. That key is inferred from a static
* global counter global_key_cnt_.
*
* The underlying implementation of in-memory storage is not necessarily
* std::map.
*/
class PropertyValueStore {
using Property = storage::Property;
using Location = storage::Location;
public:
// Property name which will be used to store vertex/edge ids inside property
// value store
static constexpr char IdPropertyName[] = "__id__";
PropertyValueStore() = default;
PropertyValueStore(const PropertyValueStore &old);
~PropertyValueStore();
/**
* Returns a PropertyValue (by reference) at the given key.
* If the key does not exist, the Null property is returned.
*
* This is NOT thread-safe, the reference might not be valid
* when used in a multithreaded scenario.
*
* @param key The key for which a PropertyValue is sought.
* @return See above.
*/
PropertyValue at(const Property &key) const;
/**
* Set overriding for character constants. Forces conversion
* to std::string, otherwise templating might cast the pointer
* to something else (bool) and mess things up.
*/
void set(const Property &key, const char *value);
/**
* Set overriding for PropertyValue. When setting a Null value it
* calls 'erase' instead of inserting the Null into storage.
*/
void set(const Property &key, const PropertyValue &value);
/**
* Removes the PropertyValue for the given key.
*
* @param key - The key for which to remove the property.
*
* @return true if the operation was successful and there is nothing stored
* under given key after this operation.
*/
bool erase(const Property &key);
/** Removes all the properties (both in-mem and on-disk) from this store. */
void clear();
/**
* Returns a static kvstore::KVStore instance used for storing properties on
* disk. This hack is needed due to statics that are internal to RocksDB and
* availability of durability_directory flag.
*/
kvstore::KVStore &DiskStorage() const;
/**
* Custom PVS iterator behaves as if all properties are stored in a single
* iterable collection of std::pair<Property, PropertyValue>.
*/
class iterator final
: public std::iterator<
std::input_iterator_tag, // iterator_category
std::pair<Property, PropertyValue>, // value_type
long, // difference_type
const std::pair<Property, PropertyValue> *, // pointer
const std::pair<Property, PropertyValue> & // reference
> {
public:
iterator() = delete;
iterator(const PropertyValueStore *pvs,
std::vector<std::pair<Property, PropertyValue>>::const_iterator
memory_it);
iterator(const PropertyValueStore *pvs,
std::vector<std::pair<Property, PropertyValue>>::const_iterator
memory_it,
kvstore::KVStore::iterator disk_it);
iterator(const iterator &other) = delete;
iterator(iterator &&other) = default;
iterator &operator=(iterator &&other) = default;
iterator &operator=(const iterator &other) = delete;
iterator &operator++();
bool operator==(const iterator &other) const;
bool operator!=(const iterator &other) const;
reference operator*();
pointer operator->();
private:
const PropertyValueStore *pvs_;
std::vector<std::pair<Property, PropertyValue>>::const_iterator memory_it_;
std::optional<kvstore::KVStore::iterator> disk_it_;
std::optional<std::pair<Property, PropertyValue>> disk_prop_;
};
size_t size() const;
iterator begin() const;
iterator end() const;
private:
static std::atomic<uint64_t> global_key_cnt_;
uint64_t version_key_ = global_key_cnt_++;
std::vector<std::pair<Property, PropertyValue>> props_;
/**
* Serializes a single PropertyValue into std::string.
*
* @param prop - Property to be serialized.
*
* @return Serialized property.
*/
std::string SerializeProp(const PropertyValue &prop) const;
/**
* Deserializes a single PropertyValue from std::string.
*
* @param serialized_prop - Serialized property.
*
* @return Deserialized property.
*/
PropertyValue DeserializeProp(const std::string &serialized_prop) const;
kvstore::KVStore ConstructDiskStorage() const;
};

View File

@ -1,126 +0,0 @@
#include "storage/common/types/slk.hpp"
namespace slk {
void Save(const PropertyValue &value, slk::Builder *builder) {
switch (value.type()) {
case PropertyValue::Type::Null:
slk::Save(static_cast<uint8_t>(0), builder);
return;
case PropertyValue::Type::Bool:
slk::Save(static_cast<uint8_t>(1), builder);
slk::Save(value.ValueBool(), builder);
return;
case PropertyValue::Type::Int:
slk::Save(static_cast<uint8_t>(2), builder);
slk::Save(value.ValueInt(), builder);
return;
case PropertyValue::Type::Double:
slk::Save(static_cast<uint8_t>(3), builder);
slk::Save(value.ValueDouble(), builder);
return;
case PropertyValue::Type::String:
slk::Save(static_cast<uint8_t>(4), builder);
slk::Save(value.ValueString(), builder);
return;
case PropertyValue::Type::List: {
slk::Save(static_cast<uint8_t>(5), builder);
const auto &values = value.ValueList();
size_t size = values.size();
slk::Save(size, builder);
for (const auto &v : values) {
slk::Save(v, builder);
}
return;
}
case PropertyValue::Type::Map: {
slk::Save(static_cast<uint8_t>(6), builder);
const auto &map = value.ValueMap();
size_t size = map.size();
slk::Save(size, builder);
for (const auto &kv : map) {
slk::Save(kv, builder);
}
return;
}
}
}
void Load(PropertyValue *value, slk::Reader *reader) {
uint8_t type;
slk::Load(&type, reader);
switch (type) {
case static_cast<uint8_t>(0):
*value = PropertyValue();
return;
case static_cast<uint8_t>(1): {
bool v;
slk::Load(&v, reader);
*value = PropertyValue(v);
return;
}
case static_cast<uint8_t>(2): {
int64_t v;
slk::Load(&v, reader);
*value = PropertyValue(v);
return;
}
case static_cast<uint8_t>(3): {
double v;
slk::Load(&v, reader);
*value = PropertyValue(v);
return;
}
case static_cast<uint8_t>(4): {
std::string v;
slk::Load(&v, reader);
*value = PropertyValue(std::move(v));
return;
}
case static_cast<uint8_t>(5): {
size_t size;
slk::Load(&size, reader);
std::vector<PropertyValue> list(size);
for (size_t i = 0; i < size; ++i) {
slk::Load(&list[i], reader);
}
*value = PropertyValue(std::move(list));
return;
}
case static_cast<uint8_t>(6): {
size_t size;
slk::Load(&size, reader);
std::map<std::string, PropertyValue> map;
for (size_t i = 0; i < size; ++i) {
std::pair<std::string, PropertyValue> kv;
slk::Load(&kv, reader);
map.insert(kv);
}
*value = PropertyValue(std::move(map));
return;
}
default:
throw slk::SlkDecodeException("Trying to load unknown PropertyValue!");
}
}
void Save(const PropertyValueStore &properties, slk::Builder *builder) {
size_t size = properties.size();
slk::Save(size, builder);
for (const auto &kv : properties) {
slk::Save(kv, builder);
}
}
void Load(PropertyValueStore *properties, slk::Reader *reader) {
properties->clear();
size_t size;
slk::Load(&size, reader);
for (size_t i = 0; i < size; ++i) {
std::pair<storage::Property, PropertyValue> kv;
slk::Load(&kv, reader);
properties->set(kv.first, kv.second);
}
}
} // namespace slk

View File

@ -1,52 +0,0 @@
#pragma once
#include "slk/serialization.hpp"
#include "storage/common/types/property_value.hpp"
#include "storage/common/types/property_value_store.hpp"
#include "storage/common/types/types.hpp"
namespace slk {
inline void Save(const storage::Label &common, slk::Builder *builder) {
slk::Save(common.id_, builder);
}
inline void Load(storage::Label *common, slk::Reader *reader) {
slk::Load(&common->id_, reader);
}
inline void Save(const storage::EdgeType &common, slk::Builder *builder) {
slk::Save(common.id_, builder);
}
inline void Load(storage::EdgeType *common, slk::Reader *reader) {
slk::Load(&common->id_, reader);
}
inline void Save(const storage::Property &common, slk::Builder *builder) {
slk::Save(common.id_, builder);
}
inline void Load(storage::Property *common, slk::Reader *reader) {
slk::Load(&common->id_, reader);
}
inline void Save(const storage::Gid &gid, slk::Builder *builder) {
slk::Save(gid.AsUint(), builder);
}
inline void Load(storage::Gid *gid, slk::Reader *reader) {
uint64_t id;
slk::Load(&id, reader);
*gid = storage::Gid::FromUint(id);
}
void Save(const PropertyValue &value, slk::Builder *builder);
void Load(PropertyValue *value, slk::Reader *reader);
void Save(const PropertyValueStore &properties, slk::Builder *builder);
void Load(PropertyValueStore *properties, slk::Reader *reader);
} // namespace slk

View File

@ -1,249 +0,0 @@
#pragma once
#include <atomic>
#include <cstdint>
#include <functional>
#include <limits>
#include <optional>
#include <glog/logging.h>
#include "utils/atomic.hpp"
#include "utils/cast.hpp"
namespace storage {
using IdT = uint16_t;
const IdT IdMask = std::numeric_limits<IdT>::max() >> 1;
const IdT IdNotMask = ~IdMask;
// In case of a new location Mask value has to be updated.
//
// |-------------|--------------|
// |---location--|------id------|
// |-Memory|Disk-|-----2^15-----|
enum class Location : IdT { Memory = 0x8000, Disk = 0x0000 };
class Label final {
public:
Label() = default;
explicit Label(const IdT id, const Location location = Location::Memory)
: id_((id & IdMask) | static_cast<IdT>(location)) {
// TODO(ipaljak): A better way would be to throw an exception
// and send a message to the user that a new Id can't be created.
// By doing that, database instance will continue to work and user
// has a chance to make an appropriate action.
// CHECK isn't user friendly at all because it will immediately
// terminate the whole process.
// TODO implement throw and error handling
CHECK(id <= IdMask) << "Number of used ids overflowed!";
}
IdT Id() const { return static_cast<IdT>(id_ & IdMask); }
storage::Location Location() const {
return static_cast<storage::Location>(id_ & IdNotMask);
}
friend bool operator==(const Label &a, const Label &b) {
return a.Id() == b.Id();
}
friend bool operator!=(const Label &a, const Label &b) {
return a.Id() != b.Id();
}
friend bool operator<(const Label &a, const Label &b) {
return a.Id() < b.Id();
}
friend bool operator>(const Label &a, const Label &b) {
return a.Id() > b.Id();
}
friend bool operator<=(const Label &a, const Label &b) {
return a.Id() <= b.Id();
}
friend bool operator>=(const Label &a, const Label &b) {
return a.Id() >= b.Id();
}
IdT id_{0};
};
class EdgeType final {
public:
EdgeType() = default;
explicit EdgeType(const IdT id, const Location location = Location::Memory)
: id_((id & IdMask) | static_cast<IdT>(location)) {
// TODO(ipaljak): A better way would be to throw an exception
// and send a message to the user that a new Id can't be created.
// By doing that, database instance will continue to work and user
// has a chance to make an appropriate action.
// CHECK isn't user friendly at all because it will immediately
// terminate the whole process.
// TODO implement throw and error handling
CHECK(id <= IdMask) << "Number of used ids overflowed!";
}
IdT Id() const { return static_cast<IdT>(id_ & IdMask); }
storage::Location Location() const {
return static_cast<storage::Location>(id_ & IdNotMask);
}
friend bool operator==(const EdgeType &a, const EdgeType &b) {
return a.Id() == b.Id();
}
friend bool operator!=(const EdgeType &a, const EdgeType &b) {
return a.Id() != b.Id();
}
friend bool operator<(const EdgeType &a, const EdgeType &b) {
return a.Id() < b.Id();
}
friend bool operator>(const EdgeType &a, const EdgeType &b) {
return a.Id() > b.Id();
}
friend bool operator<=(const EdgeType &a, const EdgeType &b) {
return a.Id() <= b.Id();
}
friend bool operator>=(const EdgeType &a, const EdgeType &b) {
return a.Id() >= b.Id();
}
IdT id_{0};
};
class Property final {
public:
Property() = default;
explicit Property(const IdT id, const Location location = Location::Memory)
: id_((id & IdMask) | static_cast<IdT>(location)) {
// TODO(ipaljak): A better way would be to throw an exception
// and send a message to the user that a new Id can't be created.
// By doing that, database instance will continue to work and user
// has a chance to make an appropriate action.
// CHECK isn't user friendly at all because it will immediately
// terminate the whole process.
// TODO implement throw and error handling
CHECK(id <= IdMask) << "Number of used ids overflowed!";
}
IdT Id() const { return static_cast<IdT>(id_ & IdMask); }
storage::Location Location() const {
return static_cast<storage::Location>(id_ & IdNotMask);
}
friend bool operator==(const Property &a, const Property &b) {
return a.Id() == b.Id();
}
friend bool operator!=(const Property &a, const Property &b) {
return a.Id() != b.Id();
}
friend bool operator<(const Property &a, const Property &b) {
return a.Id() < b.Id();
}
friend bool operator>(const Property &a, const Property &b) {
return a.Id() > b.Id();
}
friend bool operator<=(const Property &a, const Property &b) {
return a.Id() <= b.Id();
}
friend bool operator>=(const Property &a, const Property &b) {
return a.Id() >= b.Id();
}
IdT id_{0};
};
/** Global ID of a record in the database. */
class Gid final {
private:
explicit Gid(uint64_t id) : id_(id) {}
public:
Gid() = default;
static Gid FromUint(uint64_t id) { return Gid{id}; }
static Gid FromInt(int64_t id) {
return Gid{utils::MemcpyCast<uint64_t>(id)};
}
uint64_t AsUint() const { return id_; }
int64_t AsInt() const { return utils::MemcpyCast<int64_t>(id_); }
private:
uint64_t id_;
};
inline bool operator==(const Gid &first, const Gid &second) {
return first.AsUint() == second.AsUint();
}
inline bool operator!=(const Gid &first, const Gid &second) {
return first.AsUint() != second.AsUint();
}
inline bool operator<(const Gid &first, const Gid &second) {
return first.AsUint() < second.AsUint();
}
inline bool operator>(const Gid &first, const Gid &second) {
return first.AsUint() > second.AsUint();
}
inline bool operator<=(const Gid &first, const Gid &second) {
return first.AsUint() <= second.AsUint();
}
inline bool operator>=(const Gid &first, const Gid &second) {
return first.AsUint() >= second.AsUint();
}
/** Threadsafe generation of new global IDs. */
class GidGenerator {
public:
/**
* Returns a globally unique identifier.
*
* @param requested_gid - The desired gid. If given, it will be returned and
* this generator's state updated accordingly.
*/
Gid Next(std::optional<Gid> requested_gid = std::nullopt) {
if (requested_gid) {
utils::EnsureAtomicGe(next_local_id_, requested_gid->AsUint() + 1U);
return *requested_gid;
} else {
return Gid::FromUint(next_local_id_++);
}
}
private:
std::atomic<uint64_t> next_local_id_{0};
};
} // namespace storage
namespace std {
template <>
struct hash<storage::Label> {
size_t operator()(const storage::Label &k) const {
return hash<storage::IdT>()(k.Id());
}
};
template <>
struct hash<storage::EdgeType> {
size_t operator()(const storage::EdgeType &k) const {
return hash<storage::IdT>()(k.Id());
}
};
template <>
struct hash<storage::Property> {
size_t operator()(const storage::Property &k) const {
return hash<storage::IdT>()(k.Id());
}
};
template <>
struct hash<storage::Gid> {
size_t operator()(const storage::Gid &gid) const {
return hash<uint64_t>()(gid.AsUint());
}
};
} // namespace std

View File

@ -1,7 +0,0 @@
#pragma once
#ifdef MG_SINGLE_NODE_HA
#include "storage/single_node_ha/edge_accessor.hpp"
#endif
// TODO: write documentation for the interface here!

View File

@ -1,68 +0,0 @@
#pragma once
#include <mutex>
#include "data_structures/concurrent/concurrent_map.hpp"
#include "storage/common/types/types.hpp"
#include "utils/algorithm.hpp"
namespace storage {
/** SingleNode implementation of ConcurrentIdMapper. */
template <typename TId>
class ConcurrentIdMapper {
using StorageT = IdT;
public:
ConcurrentIdMapper() = default;
explicit ConcurrentIdMapper(
const std::vector<std::string> &properties_on_disk)
: properties_on_disk_(properties_on_disk) {}
TId value_to_id(const std::string &value) {
auto value_to_id_acc = value_to_id_.access();
auto found = value_to_id_acc.find(value);
TId inserted_id(0);
if (found == value_to_id_acc.end()) {
StorageT new_id = id_.fetch_add(1);
// After we tried to insert value with our id we either got our id, or the
// id created by the thread which succesfully inserted (value, id) pair
// because that's ConcurrentMap's behaviour
if (std::is_same<TId, Property>::value)
inserted_id =
value_to_id_acc.insert(value, TId(new_id, PropertyLocation(value)))
.first->second;
else
inserted_id = value_to_id_acc.insert(value, TId(new_id)).first->second;
} else {
inserted_id = found->second;
}
auto id_to_value_acc = id_to_value_.access();
// We have to try to insert the inserted_id and value even if we are not the
// one who assigned id because we have to make sure that after this method
// returns that both mappings between id->value and value->id exist.
id_to_value_acc.insert(inserted_id, value);
return inserted_id;
}
const std::string &id_to_value(const TId &id) {
auto id_to_value_acc = id_to_value_.access();
auto result = id_to_value_acc.find(id);
DCHECK(result != id_to_value_acc.end());
return result->second;
}
private:
ConcurrentMap<std::string, TId> value_to_id_;
ConcurrentMap<TId, std::string> id_to_value_;
std::atomic<StorageT> id_{0};
std::vector<std::string> properties_on_disk_;
std::mutex mutex_;
Location PropertyLocation(const std::string &name) {
std::unique_lock<std::mutex> lock(mutex_);
if (utils::Contains(properties_on_disk_, name)) return Location::Disk;
return Location::Memory;
}
};
} // namespace storage

View File

@ -1,78 +0,0 @@
#pragma once
#include <malloc.h>
#include <limits>
#include <list>
#include "glog/logging.h"
#include "storage/single_node_ha/mvcc/record.hpp"
#include "transactions/transaction.hpp"
/**
* @brief - Implements deferred deletion.
* @Tparam T - type of object to delete (Vertex/Edge/VersionList...)
* This is NOT a thread-safe class.
*/
template <typename T>
class DeferredDeleter {
public:
/**
* @brief - keep track of what object was deleted at which time.
*/
struct DeletedObject {
const T *object;
const tx::TransactionId deleted_at;
DeletedObject(const T *object, tx::TransactionId deleted_at)
: object(object), deleted_at(deleted_at) {}
};
/**
* @brief - check if everything is freed
*/
~DeferredDeleter() {
CHECK(objects_.size() == 0U)
<< "Objects are not freed when calling the destructor.";
}
/**
* @brief - Add objects to this deleter. This method assumes that it will
* always be called with a non-decreasing sequence of `deleted_at`.
* @param objects - vector of objects to add
* @param last_transaction - nothing newer or equal to it can see these
* objects
*/
void AddObjects(const std::vector<DeletedObject> &objects) {
auto previous_tx_id = objects_.empty()
? std::numeric_limits<tx::TransactionId>::min()
: objects_.back().deleted_at;
for (auto object : objects) {
CHECK(previous_tx_id <= object.deleted_at)
<< "deleted_at must be non-decreasing";
previous_tx_id = object.deleted_at;
objects_.push_back(object);
}
}
/**
* @brief - Free memory of objects deleted before the id.
* @param id - delete before this id
*/
void FreeExpiredObjects(tx::TransactionId id) {
auto it = objects_.begin();
while (it != objects_.end() && it->deleted_at < id) {
delete it->object;
++it;
}
objects_.erase(objects_.begin(), it);
}
/**
* @brief - Return number of stored objects.
*/
size_t Count() { return objects_.size(); }
private:
// Ascendingly sorted list of deleted objects by `deleted_at`.
std::list<DeletedObject> objects_;
};

View File

@ -1,32 +0,0 @@
#pragma once
#include "storage/single_node_ha/mvcc/record.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/common/types/property_value_store.hpp"
#include "storage/common/types/types.hpp"
class Vertex;
class Edge : public mvcc::Record<Edge> {
public:
Edge(mvcc::VersionList<Vertex> *from, mvcc::VersionList<Vertex> *to,
storage::EdgeType edge_type)
: from_(from), to_(to), edge_type_(edge_type) {}
// Returns new Edge with copy of data stored in this Edge, but without
// copying superclass' members.
Edge *CloneData() { return new Edge(*this); }
mvcc::VersionList<Vertex> *from_;
mvcc::VersionList<Vertex> *to_;
storage::EdgeType edge_type_;
PropertyValueStore properties_;
private:
Edge(const Edge &other)
: mvcc::Record<Edge>(),
from_(other.from_),
to_(other.to_),
edge_type_(other.edge_type_),
properties_(other.properties_) {}
};

View File

@ -1,59 +0,0 @@
#include "storage/edge_accessor.hpp"
#include "database/single_node_ha/graph_db_accessor.hpp"
#include "storage/vertex_accessor.hpp"
#include "utils/algorithm.hpp"
EdgeAccessor::EdgeAccessor(mvcc::VersionList<Edge> *address,
database::GraphDbAccessor &db_accessor)
: RecordAccessor(address, db_accessor),
from_(nullptr),
to_(nullptr),
edge_type_() {
RecordAccessor::Reconstruct();
if (current_ != nullptr) {
from_ = current_->from_;
to_ = current_->to_;
edge_type_ = current_->edge_type_;
}
}
EdgeAccessor::EdgeAccessor(mvcc::VersionList<Edge> *address,
database::GraphDbAccessor &db_accessor,
mvcc::VersionList<Vertex> *from,
mvcc::VersionList<Vertex> *to,
storage::EdgeType edge_type)
: RecordAccessor(address, db_accessor),
from_(from),
to_(to),
edge_type_(edge_type) {}
storage::EdgeType EdgeAccessor::EdgeType() const { return edge_type_; }
VertexAccessor EdgeAccessor::from() const {
return VertexAccessor(from_, db_accessor());
}
bool EdgeAccessor::from_is(const VertexAccessor &v) const {
return v.address() == from_;
}
VertexAccessor EdgeAccessor::to() const {
return VertexAccessor(to_, db_accessor());
}
bool EdgeAccessor::to_is(const VertexAccessor &v) const {
return v.address() == to_;
}
bool EdgeAccessor::is_cycle() const { return to_ == from_; }
std::ostream &operator<<(std::ostream &os, const EdgeAccessor &ea) {
os << "E[" << ea.db_accessor().EdgeTypeName(ea.EdgeType());
os << " {";
utils::PrintIterable(os, ea.Properties(), ", ", [&](auto &stream,
const auto &pair) {
stream << ea.db_accessor().PropertyName(pair.first) << ": " << pair.second;
});
return os << "}]";
}

View File

@ -1,77 +0,0 @@
#pragma once
#include "storage/single_node_ha/edge.hpp"
#include "storage/single_node_ha/record_accessor.hpp"
// forward declaring the VertexAccessor because it's returned
// by some functions
class VertexAccessor;
/**
* Provides ways for the client programmer (i.e. code generated by the compiler)
* to interact with an Edge.
*
* Note that EdgeAccessors do not necessary read versioned (MVCC) data. This is
* possible because edge endpoints (from and to), as well as the edge type, are
* all immutable. These are the most often used aspects of an edge, and are
* stored also in the vertex endpoints of the edge. Using them when creating an
* EdgeAccessor means that data does not have to be read from a random memory
* location, which is often a performance bottleneck in traversals.
*/
class EdgeAccessor final : public RecordAccessor<Edge> {
public:
/** Constructor that reads data from the random memory location (lower
* performance, see class docs). */
EdgeAccessor(mvcc::VersionList<Edge> *address,
database::GraphDbAccessor &db_accessor);
/**
* Constructor that does NOT read data from the random memory location
* (better performance, see class docs).
*/
EdgeAccessor(mvcc::VersionList<Edge> *address,
database::GraphDbAccessor &db_accessor,
mvcc::VersionList<Vertex> *from, mvcc::VersionList<Vertex> *to,
storage::EdgeType edge_type);
storage::EdgeType EdgeType() const;
/** Returns an accessor to the originating Vertex of this edge. */
VertexAccessor from() const;
/** Returns the address of the originating Vertex of this edge. */
auto from_addr() const { return from_; }
/** Checks if the given vertex is the source of this edge, without
* creating an additional accessor to perform the check. */
bool from_is(const VertexAccessor &v) const;
/** Returns an accessor to the destination Vertex of this edge. */
VertexAccessor to() const;
/** Returns the address of the destination Vertex of this edge. */
auto to_addr() const { return to_; }
/** Checks if the given vertex is the destination of this edge, without
* creating an additional accessor to perform the check. */
bool to_is(const VertexAccessor &v) const;
/** Returns true if this edge is a cycle (start and end node are
* the same. */
bool is_cycle() const;
private:
mvcc::VersionList<Vertex> *from_;
mvcc::VersionList<Vertex> *to_;
storage::EdgeType edge_type_;
};
std::ostream &operator<<(std::ostream &, const EdgeAccessor &);
// hash function for the edge accessor
namespace std {
template <>
struct hash<EdgeAccessor> {
size_t operator()(const EdgeAccessor &e) const { return e.gid().AsUint(); };
};
} // namespace std

View File

@ -1,156 +0,0 @@
#pragma once
#include <optional>
#include <utility>
#include <vector>
#include "glog/logging.h"
#include "storage/common/types/types.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "utils/algorithm.hpp"
/**
* A data stucture that holds a number of edges. This implementation assumes
* that separate Edges instances are used for incoming and outgoing edges in a
* vertex (and consequently that edge Addresses are unique in it).
*/
class Edges {
private:
struct Element {
mvcc::VersionList<Vertex> *vertex;
mvcc::VersionList<Edge> *edge;
storage::EdgeType edge_type;
};
/** Custom iterator that takes care of skipping edges when the destination
* vertex or edge types are known. */
class Iterator {
public:
/** Ctor that just sets the position. Used for normal iteration (that does
* not skip any edges), and for end-iterator creation in both normal and
* skipping iteration.
*
* @param iterator - Iterator in the underlying storage.
*/
explicit Iterator(std::vector<Element>::const_iterator iterator)
: position_(iterator) {}
/** Ctor used for creating the beginning iterator with known destination
* vertex.
*
* @param iterator - Iterator in the underlying storage.
* @param end - End iterator in the underlying storage.
* @param vertex - The destination vertex address. If empty the
* edges are not filtered on destination.
* @param edge_types - The edge types at least one of which must be matched.
* If nullptr edges are not filtered on type.
*/
Iterator(std::vector<Element>::const_iterator position,
std::vector<Element>::const_iterator end,
mvcc::VersionList<Vertex> *vertex,
const std::vector<storage::EdgeType> *edge_types)
: position_(position),
end_(end),
vertex_(vertex),
edge_types_(edge_types) {
update_position();
}
Iterator &operator++() {
++position_;
update_position();
return *this;
}
const Element &operator*() const { return *position_; }
const Element *operator->() const { return &(*position_); }
bool operator==(const Iterator &other) const {
return position_ == other.position_;
}
bool operator!=(const Iterator &other) const { return !(*this == other); }
private:
std::vector<Element>::const_iterator position_;
// end_ is used only in update_position() to limit find.
std::vector<Element>::const_iterator end_;
// Optional predicates. If set they define which edges are skipped by the
// iterator.
mvcc::VersionList<Vertex> *vertex_{nullptr};
// For edge types we use a vector pointer because it's optional.
const std::vector<storage::EdgeType> *edge_types_ = nullptr;
/** Helper function that skips edges that don't satisfy the predicate
* present in this iterator. */
void update_position() {
if (vertex_ && edge_types_) {
position_ = std::find_if(position_, end_, [this](const Element &e) {
return e.vertex == this->vertex_ &&
utils::Contains(*this->edge_types_, e.edge_type);
});
} else if (vertex_) {
position_ = std::find_if(position_, end_, [this](const Element &e) {
return e.vertex == this->vertex_;
});
} else if (edge_types_) {
position_ = std::find_if(position_, end_, [this](const Element &e) {
return utils::Contains(*this->edge_types_, e.edge_type);
});
}
}
};
public:
/**
* Adds an edge to this structure.
*
* @param vertex - The destination vertex of the edge. That's the one
* opposite from the vertex that contains this `Edges` instance.
* @param edge - The edge.
* @param edge_type - Type of the edge.
*/
void emplace(mvcc::VersionList<Vertex> *vertex, mvcc::VersionList<Edge> *edge,
storage::EdgeType edge_type) {
storage_.emplace_back(Element{vertex, edge, edge_type});
}
/**
* Removes an edge from this structure.
*/
void RemoveEdge(mvcc::VersionList<Edge> *edge) {
auto found = std::find_if(
storage_.begin(), storage_.end(),
[edge](const Element &element) { return edge == element.edge; });
// If the edge is not in the structure we don't care and can simply return
if (found == storage_.end()) return;
*found = std::move(storage_.back());
storage_.pop_back();
}
auto size() const { return storage_.size(); }
auto begin() const { return Iterator(storage_.begin()); }
auto end() const { return Iterator(storage_.end()); }
auto &storage() { return storage_; }
/**
* Creates a beginning iterator that will skip edges whose destination
* vertex is not equal to the given vertex.
*
* @param vertex - The destination vertex Address. If empty the
* edges are not filtered on destination.
* @param edge_types - The edge types at least one of which must be matched.
* If nullptr edges are not filtered on type.
*/
auto begin(mvcc::VersionList<Vertex> *vertex,
const std::vector<storage::EdgeType> *edge_types) const {
if (edge_types && edge_types->empty()) edge_types = nullptr;
return Iterator(storage_.begin(), storage_.end(), vertex, edge_types);
}
private:
std::vector<Element> storage_;
};

View File

@ -1,70 +0,0 @@
#pragma once
#include <glog/logging.h>
#include "data_structures/concurrent/skiplist.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/single_node_ha/deferred_deleter.hpp"
#include "transactions/single_node_ha/engine.hpp"
/**
* @brief - Garbage collects deleted records.
* @tparam TCollection - type of collection. Must have a SkipList-like API
* (accessors).
* @tparam TRecord - type of underlying record in mvcc.
*/
template <typename TCollection, typename TRecord>
class GarbageCollector {
public:
GarbageCollector(
TCollection &collection, DeferredDeleter<TRecord> &record_deleter,
DeferredDeleter<mvcc::VersionList<TRecord>> &version_list_deleter)
: collection_(collection),
record_deleter_(record_deleter),
version_list_deleter_(version_list_deleter) {}
/**
* @brief - Runs garbage collector. Populates deferred deleters with version
* lists and records.
*
* @param snapshot - the GC snapshot. Consists of the oldest active
* transaction's snapshot, with that transaction's id appened as last.
* @param engine - reference to engine object
*/
void Run(const tx::Snapshot &snapshot, const tx::Engine &engine) {
auto collection_accessor = collection_.access();
uint64_t count = 0;
std::vector<typename DeferredDeleter<TRecord>::DeletedObject>
deleted_records;
std::vector<
typename DeferredDeleter<mvcc::VersionList<TRecord>>::DeletedObject>
deleted_version_lists;
for (auto id_vlist : collection_accessor) {
mvcc::VersionList<TRecord> *vlist = id_vlist.second;
// If the version_list is empty, i.e. there is nothing else to be read
// from it we can delete it.
auto ret = vlist->GcDeleted(snapshot, engine);
if (ret.first) {
deleted_version_lists.emplace_back(vlist, engine.LocalLast());
count += collection_accessor.remove(id_vlist.first);
}
if (ret.second != nullptr)
deleted_records.emplace_back(ret.second, engine.LocalLast());
}
DLOG_IF(INFO, count > 0)
<< "GC started cleaning with snapshot: " << snapshot;
DLOG_IF(INFO, count > 0) << "Destroyed: " << count;
// Add records to deleter, with the id larger or equal than the last active
// transaction.
record_deleter_.AddObjects(deleted_records);
// Add version_lists to deleter, with the id larger or equal than the last
// active transaction.
version_list_deleter_.AddObjects(deleted_version_lists);
}
private:
TCollection &collection_;
DeferredDeleter<TRecord> &record_deleter_;
DeferredDeleter<mvcc::VersionList<TRecord>> &version_list_deleter_;
};

View File

@ -1,188 +0,0 @@
#pragma once
#include "glog/logging.h"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/common/index.hpp"
#include "storage/common/types/types.hpp"
#include "storage/single_node_ha/edge.hpp"
#include "storage/single_node_ha/vertex.hpp"
#include "transactions/transaction.hpp"
#include "utils/total_ordering.hpp"
namespace database {
/**
* @brief Implements index update and acquire.
* @Tparam TKey - underlying type by which to key objects
* @Tparam TRecord - object stored under the given key
*/
template <typename TKey, typename TRecord>
class KeyIndex {
public:
KeyIndex() {}
KeyIndex(const KeyIndex &other) = delete;
KeyIndex(KeyIndex &&other) = delete;
KeyIndex &operator=(const KeyIndex &other) = delete;
KeyIndex &operator=(KeyIndex &&other) = delete;
/**
* @brief - Add record, vlist, if new, to TKey specific storage.
* @param key - TKey index to update.
* @param vlist - pointer to vlist entry to add
* @param record - pointer to record entry to add (contained in vlist)
*/
void Update(const TKey &key, mvcc::VersionList<TRecord> *vlist,
const TRecord *const record) {
GetKeyStorage(key)->access().insert(IndexEntry(vlist, record));
}
/**
* @brief - Get all the inserted vlists in TKey specific storage which
* still have that label visible in this transaction.
* @param key - key to query.
* @param t - current transaction, which determines visibility.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of vlists records<TRecord> with the requested
* TKey.
*/
auto GetVlists(const TKey &key, tx::Transaction &t, bool current_state) {
auto access = GetKeyStorage(key)->access();
auto begin = access.begin();
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
TRecord>(
std::move(access), begin, [](const IndexEntry &) { return true; }, t,
[key](const IndexEntry &, const TRecord *record) {
return KeyIndex::Exists(key, record);
},
current_state);
}
/**
* @brief - Return number of items in skiplist associated with the given
* TKey. This number could be imprecise because of the underlying skiplist
* storage. Use this as a hint, and not as a rule.
* Moreover, some transaction probably sees only part of the skiplist since
* not all versions are visible for it. Also, garbage collection might now
* have been run for some time so the index might have accumulated garbage.
* @param key - key to query for.
* @return number of items
*/
auto Count(const TKey &key) { return GetKeyStorage(key)->access().size(); }
/**
* @brief - Removes from the index all entries for which records don't contain
* the given label anymore. Update all record which are not visible for any
* transaction with an id larger or equal to `id`.
*
* @param snapshot - the GC snapshot. Consists of the oldest active
* transaction's snapshot, with that transaction's id appened as last.
* @param engine - transaction engine to see which records are commited
*/
void Refresh(const tx::Snapshot &snapshot, tx::Engine &engine) {
return index::Refresh<TKey, IndexEntry, TRecord>(
indices_, snapshot, engine,
[](const TKey &key, const IndexEntry &entry) {
return KeyIndex::Exists(key, entry.record_);
});
}
/**
* Returns a vector of keys present in this index.
*/
std::vector<TKey> Keys() {
std::vector<TKey> keys;
for (auto &kv : indices_.access()) keys.push_back(kv.first);
return keys;
}
private:
/**
* @brief - Contains vlist and record pointers.
*/
class IndexEntry : public utils::TotalOrdering<IndexEntry> {
public:
IndexEntry(const IndexEntry &entry, const TRecord *const new_record)
: IndexEntry(entry.vlist_, new_record) {}
IndexEntry(mvcc::VersionList<TRecord> *const vlist,
const TRecord *const record)
: vlist_(vlist), record_(record) {}
// Comparision operators - we need them to keep this sorted inside
// skiplist.
// This needs to be sorted first by vlist and second record because we
// want to keep same vlists close together since we need to filter them to
// get only the unique ones.
bool operator<(const IndexEntry &other) const {
if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_;
return this->record_ < other.record_;
}
bool operator==(const IndexEntry &other) const {
return this->vlist_ == other.vlist_ && this->record_ == other.record_;
}
/**
* @brief - Checks if previous IndexEntry has the same vlist as this
* IndexEntry.
* @return - true if the vlists match.
*/
bool IsAlreadyChecked(const IndexEntry &previous) const {
return previous.vlist_ == this->vlist_;
}
mvcc::VersionList<TRecord> *const vlist_;
const TRecord *const record_;
};
/**
* @brief - Get storage for this label. Creates new
* storage if this key is not yet indexed.
* @param key - key for which to access storage.
* @return pointer to skiplist of version list records<T>.
*/
auto GetKeyStorage(const TKey &key) {
auto access = indices_.access();
// Avoid excessive new/delete by first checking if it exists.
auto iter = access.find(key);
if (iter == access.end()) {
auto ret = access.insert(key, std::make_unique<SkipList<IndexEntry>>());
return ret.first->second.get();
}
return iter->second.get();
}
/**
* @brief - Check if Vertex contains label.
* @param label - label to check for.
* @return true if it contains, false otherwise.
*/
static bool Exists(storage::Label label, const Vertex *const v) {
DCHECK(v != nullptr) << "Vertex is nullptr.";
// We have to check for existance of label because the transaction
// might not see the label, or the label was deleted and not yet
// removed from the index.
const auto &labels = v->labels_;
return std::find(labels.begin(), labels.end(), label) != labels.end();
}
/**
* @brief - Check if Edge has edge_type.
* @param edge_type - edge_type to check for.
* @return true if it has that edge_type, false otherwise.
*/
static bool Exists(storage::EdgeType edge_type, const Edge *const e) {
DCHECK(e != nullptr) << "Edge is nullptr.";
// We have to check for equality of edge types because the transaction
// might not see the edge type, or the edge type was deleted and not yet
// removed from the index.
return e->edge_type_ == edge_type;
}
ConcurrentMap<TKey, std::unique_ptr<SkipList<IndexEntry>>> indices_;
};
} // namespace database

View File

@ -1,533 +0,0 @@
#pragma once
#include <optional>
#include "data_structures/concurrent/concurrent_map.hpp"
#include "data_structures/concurrent/skiplist.hpp"
#include "storage/common/index.hpp"
#include "storage/common/types/types.hpp"
#include "storage/single_node_ha/edge.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/single_node_ha/vertex.hpp"
#include "transactions/transaction.hpp"
#include "utils/bound.hpp"
#include "utils/total_ordering.hpp"
namespace database {
/**
* @brief Implements LabelPropertyIndex.
* Currently this provides implementation for:
* acquiring all entries which contain the given label, and a given property
* sorted by the property value
* acquiring all non-unique entries with the given label, and property, with
* exactly one property value
*/
class LabelPropertyIndex {
public:
LabelPropertyIndex(){};
LabelPropertyIndex(const LabelPropertyIndex &other) = delete;
LabelPropertyIndex(LabelPropertyIndex &&other) = delete;
LabelPropertyIndex &operator=(const LabelPropertyIndex &other) = delete;
LabelPropertyIndex &operator=(LabelPropertyIndex &&other) = delete;
/**
* @brief - Contain Label + property, to be used as an index key.
*/
class Key : public utils::TotalOrdering<Key> {
public:
const storage::Label label_;
const storage::Property property_;
Key(storage::Label label, storage::Property property)
: label_(label), property_(property) {}
// Comparison operators - we need them to keep this sorted inside skiplist.
bool operator<(const Key &other) const {
if (this->label_ != other.label_) return this->label_ < other.label_;
return this->property_ < other.property_;
}
bool operator==(const Key &other) const {
return this->label_ == other.label_ && this->property_ == other.property_;
}
};
/**
* @brief - Creates index with the given key if it doesn't exist. Note that
* you still need to populate the index with existing records.
* @return - True if it created the index, false if it already exists.
*/
bool CreateIndex(const Key &key) {
auto access = indices_.access();
// Avoid creation if it already exists.
auto iter = access.find(key);
if (iter != access.end()) return false;
auto ret = access.insert(key, std::make_unique<SkipList<IndexEntry>>());
return ret.second;
}
/**
* Returns if it succeeded in deleting the index and freeing the index memory
*/
void DeleteIndex(const Key &key) { indices_.access().remove(key); }
/**
* @brief - Updates all indexes which should contain this vertex.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnLabelProperty(mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
const auto &labels = vertex->labels_;
// We need to check if the given vertex can be inserted in all indexes
for (auto &index : indices_.access()) {
// Vertex has the given label
if (std::find(labels.begin(), labels.end(), index.first.label_) ==
labels.end())
continue;
auto prop = vertex->properties_.at(index.first.property_);
if (prop.type() != PropertyValue::Type::Null) {
Insert(*index.second, prop, vlist, vertex);
}
}
}
/**
* @brief - Updates all indexes with `label` and any property in `vertex` that
* exists.
* @param label - indexes with this label might be updated if vertex contains
* the corresponding property.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnLabel(storage::Label label,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
// We need to check if the given vertex can be inserted in all indexes
for (auto &index : indices_.access()) {
if (index.first.label_ != label) continue;
auto prop = vertex->properties_.at(index.first.property_);
if (prop.type() != PropertyValue::Type::Null) {
// Property exists and vertex should be added to skiplist.
Insert(*index.second, prop, vlist, vertex);
}
}
}
/**
* @brief - Updates all indexes with `property` and any label in `vertex` that
* exists.
* @param property - indexes with this property might be updated if vertex
* contains the corresponding label.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnProperty(storage::Property property,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
const auto &labels = vertex->labels_;
for (auto &index : indices_.access()) {
if (index.first.property_ != property) continue;
if (std::find(labels.begin(), labels.end(), index.first.label_) !=
labels.end()) {
// Label exists and vertex should be added to skiplist.
Insert(*index.second, vertex->properties_.at(property), vlist, vertex);
}
}
}
/**
* @brief - Get all the inserted vlists in key specific storage which still
* have that label and property visible in this transaction.
* @param key - Label+Property to query.
* @param t - current transaction, which determines visibility.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of vlists of vertex records with the requested
* key sorted ascendingly by the property value.
*/
auto GetVlists(const Key &key, const tx::Transaction &t, bool current_state) {
DCHECK(IndexExists(key)) << "Index not yet ready.";
auto access = GetKeyStorage(key)->access();
auto begin = access.begin();
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
Vertex, SkipList<IndexEntry>>(
std::move(access), begin, [](const IndexEntry &) { return true; }, t,
[key](const IndexEntry &entry, const Vertex *const vertex) {
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
},
current_state);
}
/**
* @brief - Get all the inserted vlists in key specific storage which still
* have that label and property visible in this transaction with property
* value equal to 'value'.
* @param key - Label+Property to query.
* @param value - vlists with this value will be returned
* @param t - current transaction, which determines visibility.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of vlists of vertex records with the requested
* key and value
*/
auto GetVlists(const Key &key, const PropertyValue &value,
const tx::Transaction &t, bool current_state) {
DCHECK(IndexExists(key)) << "Index not yet ready.";
auto access = GetKeyStorage(key)->access();
auto min_ptr = std::numeric_limits<std::uintptr_t>::min();
auto start_iter = access.find_or_larger(IndexEntry(
value, reinterpret_cast<mvcc::VersionList<Vertex> *>(min_ptr),
reinterpret_cast<const Vertex *>(min_ptr)));
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
Vertex>(
std::move(access), start_iter,
[value](const IndexEntry &entry) {
return !IndexEntry::Less(value, entry.value_) &&
!IndexEntry::Less(entry.value_, value);
},
t,
[key](const IndexEntry &entry, const Vertex *const vertex) {
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
},
current_state);
}
/**
* Get an iterable over all mvcc::VersionLists that are contained in this
* index and satisfy the given bounds.
*
* The returned iterator will only contain vertices/edges whose property value
* is comparable with the given bounds (w.r.t. type). This has implications on
* Cypher query execuction semantics which have not been resolved yet.
*
* At least one of the bounds must be specified. Bounds can't be @c
* PropertyValue::Null. If both bounds are specified, their PropertyValue
* elements must be of comparable types.
*
* @param key - Label+Property to query.
* @param lower - Lower bound of the interval.
* @param upper - Upper bound of the interval.
* @param t - current transaction, which determines visibility.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of mvcc:VersionLists pointers that
* satisfy the bounds and are visible to the given transaction.
*/
auto GetVlists(const Key &key,
const std::optional<utils::Bound<PropertyValue>> lower,
const std::optional<utils::Bound<PropertyValue>> upper,
const tx::Transaction &transaction, bool current_state) {
DCHECK(IndexExists(key)) << "Index not yet ready.";
auto type = [](const auto &bound) { return bound.value().value().type(); };
CHECK(lower || upper) << "At least one bound must be provided";
CHECK(!lower || type(lower) != PropertyValue::Type::Null)
<< "Null value is not a valid index bound";
CHECK(!upper || type(upper) != PropertyValue::Type::Null)
<< "Null value is not a valid index bound";
// helper function for creating a bound with an IndexElement
auto make_index_bound = [](const auto &optional_bound, bool bottom) {
std::uintptr_t ptr_bound =
bottom ? std::numeric_limits<std::uintptr_t>::min()
: std::numeric_limits<std::uintptr_t>::max();
return IndexEntry(
optional_bound.value().value(),
reinterpret_cast<mvcc::VersionList<Vertex> *>(ptr_bound),
reinterpret_cast<const Vertex *>(ptr_bound));
};
auto access = GetKeyStorage(key)->access();
// create the iterator startpoint based on the lower bound
auto start_iter = lower
? access.find_or_larger(make_index_bound(
lower, lower.value().IsInclusive()))
: access.begin();
// a function that defines if an entry staisfies the filtering predicate.
// since we already handled the lower bound, we only need to deal with the
// upper bound and value type
std::function<bool(const IndexEntry &entry)> predicate;
if (lower && upper &&
!AreComparablePropertyValueTypes(type(lower), type(upper)))
predicate = [](const IndexEntry &) { return false; };
else if (upper) {
auto upper_index_entry =
make_index_bound(upper, upper.value().IsExclusive());
predicate = [upper_index_entry](const IndexEntry &entry) {
return AreComparablePropertyValueTypes(
entry.value_.type(), upper_index_entry.value_.type()) &&
entry < upper_index_entry;
};
} else {
auto lower_type = type(lower);
make_index_bound(lower, lower.value().IsExclusive());
predicate = [lower_type](const IndexEntry &entry) {
return AreComparablePropertyValueTypes(entry.value_.type(), lower_type);
};
}
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
Vertex>(
std::move(access), start_iter, predicate, transaction,
[key](const IndexEntry &entry, const Vertex *const vertex) {
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
},
current_state);
}
/**
* @brief - Check for existance of index.
* @param key - Index key
* @return true if the index with that key exists
*/
bool IndexExists(const Key &key) {
auto access = indices_.access();
return access.find(key) != access.end();
}
/**
* @brief - Return number of items in skiplist associated with the given
* key. This number could be imprecise because of the underlying skiplist
* storage. Use this as a hint, and not as a rule. Fails if index doesn't
* exist.
* Moreover, some transaction probably sees only part of the skiplist since
* not all versions are visible for it. Also, garbage collection might now
* have been run for some time so the index might have accumulated garbage.
* @param key - key to query for.
* @return number of items
*/
int64_t Count(const Key &key) {
auto index = GetKeyStorage(key);
CHECK(index != nullptr) << "Index doesn't exist.";
return index->access().size();
}
/**
* Returns the approximate position and count of the given value in the
* index for the given Key.
*
* Both are approximations for several reasons. Initially the position
* and count are obtained from the skipist (the index) and as such are
* not exact for perfromance reasons. At the same time the position
* and count are calculated based on property value comparison: an
* additional error is accumulated because the index could contain
* the same vertex with the same value multiple times,
* as well as the same vertex with different values.
*/
auto PositionAndCount(const Key &key, const PropertyValue &value) {
auto access = GetKeyStorage(key)->access();
return access.position_and_count(
value,
// the 'less' function
[](const PropertyValue &a, const IndexEntry &b) {
return IndexEntry::Less(a, b.value_);
},
// the 'equal_to' function
[](const PropertyValue &a, const IndexEntry &b) {
return !(IndexEntry::Less(a, b.value_) ||
IndexEntry::Less(b.value_, a));
});
}
/**
* @brief - Removes from the index all entries for which records don't contain
* the given label anymore, or the record was deleted before this transaction
* id.
*
* @param snapshot - the GC snapshot. Consists of the oldest active
* transaction's snapshot, with that transaction's id appened as last.
*/
void Refresh(const tx::Snapshot &snapshot, tx::Engine &engine) {
return index::Refresh<Key, IndexEntry, Vertex>(
indices_, snapshot, engine,
[](const Key &key, const IndexEntry &entry) {
return LabelPropertyIndex::Exists(key, entry.value_, entry.record_);
});
}
/**
* Returns a vector of keys present in this index.
*/
std::vector<Key> Keys() {
std::vector<Key> keys;
for (auto &kv : indices_.access()) keys.push_back(kv.first);
return keys;
}
private:
static bool AreComparablePropertyValueTypes(PropertyValue::Type a,
PropertyValue::Type b) {
auto is_numeric = [](const PropertyValue::Type t) {
return t == PropertyValue::Type::Int || t == PropertyValue::Type::Double;
};
return a == b || (is_numeric(a) && is_numeric(b));
}
/**
* @brief - Contains value, vlist and vertex record to distinguish between
* index entries.
*/
class IndexEntry : public utils::TotalOrdering<IndexEntry> {
public:
IndexEntry(const IndexEntry &entry, const Vertex *new_record)
: IndexEntry(entry.value_, entry.vlist_, new_record) {}
IndexEntry(const PropertyValue &value, mvcc::VersionList<Vertex> *vlist,
const Vertex *record)
: value_(value), vlist_(vlist), record_(record) {}
// Comparision operators - we need them to keep this sorted inside
// skiplist.
bool operator<(const IndexEntry &other) const {
bool this_value_smaller = Less(this->value_, other.value_);
if (this_value_smaller || Less(other.value_, this->value_))
return this_value_smaller;
if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_;
return this->record_ < other.record_;
}
bool operator==(const IndexEntry &other) const {
return !(*this < other) && !(other < *this);
}
/**
* @brief - For two property values - orders the records by type and then by
* value. Except for integers and doubles - those are both converted to
* double and then compared.
* @return true if the first property value is smaller( should be before)
* than the second one
*/
static bool Less(const PropertyValue &a, const PropertyValue &b) {
if (!AreComparablePropertyValueTypes(a.type(), b.type()))
return a.type() < b.type();
if (a.type() == b.type()) {
switch (a.type()) {
case PropertyValue::Type::Null:
return false;
case PropertyValue::Type::String:
return a.ValueString() < b.ValueString();
case PropertyValue::Type::Bool:
return a.ValueBool() < b.ValueBool();
case PropertyValue::Type::Int:
return a.ValueInt() < b.ValueInt();
case PropertyValue::Type::Double:
return a.ValueDouble() < b.ValueDouble();
case PropertyValue::Type::List: {
auto va = a.ValueList();
auto vb = b.ValueList();
if (va.size() != vb.size()) return va.size() < vb.size();
return lexicographical_compare(va.begin(), va.end(), vb.begin(),
vb.end(), Less);
}
case PropertyValue::Type::Map: {
auto ma = a.ValueMap();
auto mb = b.ValueMap();
if (ma.size() != mb.size()) return ma.size() < mb.size();
const auto cmp = [](const auto &a, const auto &b) {
if (a.first != b.first)
return a.first < b.first;
else
return Less(a.second, b.second);
};
return lexicographical_compare(ma.begin(), ma.end(), mb.begin(),
mb.end(), cmp);
}
}
}
// helper for getting a double from PropertyValue, if possible
auto get_double = [](const PropertyValue &value) {
DCHECK(value.type() == PropertyValue::Type::Int ||
value.type() == PropertyValue::Type::Double)
<< "Invalid data type.";
if (value.type() == PropertyValue::Type::Int)
return static_cast<double>(value.ValueInt());
return value.ValueDouble();
};
// Types are int and double - convert int to double
return get_double(a) < get_double(b);
}
/**
* @brief - Check if previous IndexEntry represents the same vlist/value
* pair.
* @return - true if IndexEntries are equal by the vlist/value pair.
*/
bool IsAlreadyChecked(const IndexEntry &previous) const {
return previous.vlist_ == this->vlist_ &&
!Less(previous.value_, this->value_) &&
!Less(this->value_, previous.value_);
}
const PropertyValue value_;
mvcc::VersionList<Vertex> *const vlist_{nullptr};
const Vertex *const record_{nullptr};
};
/**
* @brief - Insert value, vlist, vertex into corresponding index (key) if
* the index exists.
* @param index - into which index to add
* @param value - value which to add
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in
* vlist)
*/
void Insert(SkipList<IndexEntry> &index, const PropertyValue &value,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
index.access().insert(IndexEntry{value, vlist, vertex});
}
/**
* @brief - Get storage for this key.
* @param key - Label and and property for which to query.
* @return pointer to skiplist of IndexEntries, if none which matches key
* exists return nullptr
*/
SkipList<IndexEntry> *GetKeyStorage(const Key &key) {
auto access = indices_.access();
auto iter = access.find(key);
if (iter == access.end()) return nullptr;
return iter->second.get();
}
/**
* @brief - Check if Vertex contains label and property with the given
* value.
* @param key - label and property to check for.
* @param value - value of property to compare
* @return true if it contains, false otherwise.
*/
static bool Exists(const Key &key, const PropertyValue &value,
const Vertex *const v) {
DCHECK(v != nullptr) << "Vertex is nullptr.";
// We have to check for existance of label because the transaction
// might not see the label, or the label was deleted and not yet
// removed from the index.
const auto &labels = v->labels_;
if (std::find(labels.begin(), labels.end(), key.label_) == labels.end())
return false;
auto prop = v->properties_.at(key.property_);
// Property doesn't exists.
if (prop.type() == PropertyValue::Type::Null) return false;
// Property value is the same as expected.
return !IndexEntry::Less(prop, value) && !IndexEntry::Less(value, prop);
}
ConcurrentMap<Key, std::unique_ptr<SkipList<IndexEntry>>> indices_;
};
} // namespace database

View File

@ -1,334 +0,0 @@
#pragma once
#include <atomic>
#include <iostream>
#include <optional>
#include "transactions/commit_log.hpp"
#include "transactions/single_node_ha/engine.hpp"
#include "transactions/transaction.hpp"
#include "storage/common/locking/record_lock.hpp"
#include "storage/common/mvcc/version.hpp"
// the mvcc implementation used here is very much like postgresql's
// more info: https://momjian.us/main/writings/pgsql/mvcc.pdf
namespace mvcc {
template <class T>
class Record : public Version<T> {
public:
Record() = default;
Record(const Record &) = delete;
Record &operator=(const Record &) = delete;
Record(Record &&) = delete;
Record &operator=(Record &&) = delete;
// check if this record is visible to the transaction t
bool visible(const tx::Transaction &t) {
// Mike Olson says 17 march 1993: the tests in this routine are correct;
// if you think they're not, you're wrong, and you should think about it
// again. i know, it happened to me.
// fetch expiration info in a safe way (see fetch_exp for details)
tx::TransactionId tx_exp;
tx::CommandId cmd_exp;
std::tie(tx_exp, cmd_exp) = fetch_exp();
return ((tx_.cre == t.id_ && // inserted by the current transaction
cmd_.cre < t.cid() && // before this command, and
(tx_exp == 0 || // the row has not been deleted, or
(tx_exp == t.id_ && // it was deleted by the current
// transaction
cmd_exp >= t.cid()))) // but not before this command,
|| // or
(visible_from(Hints::kCre, tx_.cre,
t) && // the record was inserted by a
// committed transaction, and
(tx_exp == 0 || // the record has not been deleted, or
(tx_exp == t.id_ && // the row is being deleted by this
// transaction
cmd_exp >= t.cid()) || // but it's not deleted "yet", or
(tx_exp != t.id_ && // the row was deleted by another
// transaction
!visible_from(Hints::kExp, tx_exp,
t) // that has not been committed
))));
}
void mark_created(const tx::Transaction &t) {
DCHECK(tx_.cre == 0) << "Marking node as created twice.";
tx_.cre = t.id_;
cmd_.cre = t.cid();
}
void mark_expired(const tx::Transaction &t) {
tx_.exp = t.id_;
cmd_.exp = t.cid();
}
bool exp_committed(tx::Engine &engine) {
return committed(Hints::kExp, engine);
}
/**
* Check if this record is visible w.r.t. to the given garbage collection
* snapshot. See source comments for exact logic.
*
* @param snapshot - the GC snapshot. Consists of the oldest active
* transaction's snapshot, with that transaction's id appened as last.
*/
bool is_not_visible_from(const tx::Snapshot &snapshot,
const tx::Engine &engine) const {
// first get tx.exp so that all the subsequent checks operate on
// the same id. otherwise there could be a race condition
auto exp_id = tx_.exp.load();
// a record is NOT visible if:
// 1. it creating transaction aborted (last check), and is also older than
// the current oldest active transaction (optimization) OR
// 2. a) it's expiration is not 0 (some transaction expired it)
// AND
// b) the expiring transaction is older than latest active
// AND
// c) that transaction committed (as opposed to aborted)
// AND
// d) that transaction is not in oldest active transaction's
// snapshot (consequently also not in the snapshots of
// newer transactions)
return (exp_id != 0 && exp_id < snapshot.back() &&
committed(Hints::kExp, engine) && !snapshot.contains(exp_id)) ||
(tx_.cre.load() < snapshot.back() && cre_aborted(engine));
}
// TODO: Test this
// True if this record is visible for write.
// Note that this logic is different from the one above
// in the sense that a record is visible if created before
// OR DURING this command. this is done to support cypher's
// queries which can match, update and return in the same query
bool is_visible_write(const tx::Transaction &t) {
// fetch expiration info in a safe way (see fetch_exp for details)
tx::TransactionId tx_exp;
tx::CommandId cmd_exp;
std::tie(tx_exp, cmd_exp) = fetch_exp();
return (tx_.cre == t.id_ && // inserted by the current transaction
cmd_.cre <= t.cid() && // before OR DURING this command, and
(tx_exp == 0 || // the row has not been deleted, or
(tx_exp == t.id_ && // it was deleted by the current
// transaction
cmd_exp >= t.cid()))); // but not before this command,
}
/**
* True if this record is created in the current command
* of the given transaction.
*/
bool is_created_by(const tx::Transaction &t) {
return tx_.cre == t.id_ && cmd_.cre == t.cid();
}
/**
* True if this record is expired in the current command
* of the given transaction.
*/
bool is_expired_by(const tx::Transaction &t) const {
return std::make_pair(t.id_, t.cid()) == fetch_exp();
}
const auto &tx() const { return tx_; }
const auto &cmd() const { return cmd_; }
/**
* Makes sure that create and expiry are in sync with hints if they are
* committed or aborted and are before the `tx_cutoff`.
* `tx_cutoff` exists as a performance optimization to avoid setting hint bits
* on records for which we don't need to have a guarantee that they are set as
* part of GC hints setting procedure
*/
void populate_hints(const tx::Engine &engine, tx::TransactionId tx_cutoff) {
populate_hint_if_possible(engine, Hints::kCre, tx_cutoff);
if (!populate_hint_if_possible(engine, Hints::kExp, tx_cutoff)) {
// Exp is aborted and we can't set the hint, this way we don't have to set
// the hint because an aborted transaction which expires a record is the
// same thing as a non-expired record
tx::TransactionId expected;
do {
expected = tx_.exp;
// If the transaction expiry is no longer aborted we don't need to
// update it anymore, and hints can't be set since it's obviously an
// active transaction - there might be a case where this transaction
// gets finished and committed in the meantime and hints could be set,
// but since we are not going to delete info for this transaction from
// the commit log since it wasn't older than the oldest active
// transaction at the time, or before the invocation of this method;
// we are in the clear
if (!engine.Info(expected).is_aborted()) break;
} while (!tx_.exp.compare_exchange_weak(expected, 0));
// Ideally we should set the command id as well, but by setting it we
// can't guarantee that some new update won't change the transaction id
// and command id before we had a chance to set it, and just leaving it
// unchanged and relying on all methods to operate on [tx_id: 0, cmd_id:
// some cmd] as a non-transaction doesn't seem too crazy
}
}
private:
/**
* Fast indicators if a transaction has committed or aborted. It is possible
* the hints do not have that information, in which case the commit log needs
* to be consulted (a slower operation).
*/
class Hints {
public:
/// Masks for the creation/expration and commit/abort positions.
static constexpr uint8_t kCre = 0b0011;
static constexpr uint8_t kExp = 0b1100;
static constexpr uint8_t kCmt = 0b0101;
static constexpr uint8_t kAbt = 0b1010;
/** Returns true if any bit under the given mask is set. */
bool Get(uint8_t mask) const { return bits_ & mask; }
/** Sets all the bits under the given mask. */
void Set(uint8_t mask) { bits_.fetch_or(mask); }
/** Clears all the bits under the given mask. */
void Clear(uint8_t mask) { bits_.fetch_and(~mask); }
private:
std::atomic<uint8_t> bits_{0};
};
template <typename TId>
struct CreExp {
std::atomic<TId> cre{0};
std::atomic<TId> exp{0};
};
// tx.cre is the id of the transaction that created the record
// and tx.exp is the id of the transaction that deleted the record
// These values are used to determine the visibility of the record
// to the current transaction.
CreExp<tx::TransactionId> tx_;
// cmd.cre is the id of the command in this transaction that created the
// record and cmd.exp is the id of the command in this transaction that
// deleted the record. These values are used to determine the visibility
// of the record to the current command in the running transaction.
CreExp<tx::CommandId> cmd_;
mutable Hints hints_;
/** Fetch the (transaction, command) expiration before the check
* because they can be concurrently modified by multiple transactions.
* Do it in a loop to ensure that command is consistent with transaction.
*/
auto fetch_exp() const {
tx::TransactionId tx_exp;
tx::CommandId cmd_exp;
do {
tx_exp = tx_.exp;
cmd_exp = cmd_.exp;
} while (tx_exp != tx_.exp);
return std::make_pair(tx_exp, cmd_exp);
}
/**
* Populates hint if it is not set for the given create/expiry mask and is
* before the `tx_cutoff` if specified. Note that it doesn't set hint bits for
* expiry transactions which abort because it's too expensive to maintain
* correctness of those hints with regards to race conditions
* @returns - true if hints are now equal to transaction status
* (committed/aborted), will only be false if we are trying to set hint for
* aborted transaction which is this records expiry
*/
bool populate_hint_if_possible(
const tx::Engine &engine, const uint8_t mask,
const std::optional<tx::TransactionId> tx_cutoff = std::nullopt) const {
DCHECK(mask == Hints::kCre || mask == Hints::kExp)
<< "Mask should be either for creation or expiration";
if (hints_.Get(mask)) return true;
auto id = mask == Hints::kCre ? tx_.cre.load() : tx_.exp.load();
// Nothing to do here if there is no id or id is larger than tx_cutoff
if (!id || (tx_cutoff && id >= *tx_cutoff)) return true;
auto info = engine.Info(id);
if (info.is_committed()) {
hints_.Set(mask & Hints::kCmt);
} else if (info.is_aborted()) {
// Abort hints can only be updated for creation hints because only one
// transaction can be creating a single record, so there is no races
if (mask == Hints::kCre)
hints_.Set(mask & Hints::kAbt);
else
return false;
}
return true;
}
/**
* @brief - Check if the transaciton `id` has comitted before `t` started
* (that means that edits done by transaction `id` are visible in `t`)
*
* Evaluates to true if that transaction has committed,
* it started before `t` and it's not in it's snapshot.
*
* about transactions commit/abort status
* @param mask - Hint bits mask (either Hints::kCre or Hints::kExp).
* @param id - id to check if it's commited and visible
* @return true if the id is commited and visible for the transaction t.
*/
bool visible_from(uint8_t mask, tx::TransactionId id,
const tx::Transaction &t) {
DCHECK(mask == Hints::kCre || mask == Hints::kExp)
<< "Mask must be either kCre or kExp";
// Dominik Gleich says 4 april 2017: the tests in this routine are correct;
// if you think they're not, you're wrong, and you should think about it
// again. I know, it happened to me (and also to Matej Gradicek).
// You certainly can't see the transaction with id greater than yours as
// that means it started after this transaction and if it commited, it
// commited after this transaction has started.
if (id >= t.id_) return false;
// The creating transaction is still in progress (examine snapshot)
if (t.snapshot().contains(id)) return false;
return committed(mask, t.engine_);
}
/**
* @brief - Check if the transaction with the given `id` is committed.
*
* @param mask - Hint bits mask (either Hints::kCre or Hints::kExp).
* @param id - id to check if commited
* statuses
* @return true if it's commited, false otherwise
*/
bool committed(uint8_t mask, const tx::Engine &engine) const {
DCHECK(mask == Hints::kCre || mask == Hints::kExp)
<< "Mask must be either kCre or kExp";
populate_hint_if_possible(engine, mask);
return hints_.Get(Hints::kCmt & mask);
}
/**
* @brief - Check if tx_.cre is aborted. If you need to check for exp
* transaction do it manually by looking at commit log. This function can't do
* that for you since hints can't be used for exp transaction (reason is
* described in function above).
*
* @param engine - engine instance with information about transaction
* statuses
* @return true if it's aborted, false otherwise
*/
bool cre_aborted(const tx::Engine &engine) const {
// Populate hints if not set and return result from hints
DCHECK(populate_hint_if_possible(engine, Hints::kCre))
<< "Hints not populated";
return hints_.Get(Hints::kAbt & Hints::kCre);
}
};
} // namespace mvcc

View File

@ -1,261 +0,0 @@
#pragma once
#include "storage/common/locking/record_lock.hpp"
#include "storage/common/mvcc/exceptions.hpp"
#include "storage/common/types/types.hpp"
#include "transactions/transaction.hpp"
#include "utils/exceptions.hpp"
namespace mvcc {
template <class T>
class VersionList {
public:
/**
* @brief Constructor that is used to insert one item into VersionList.
*
* @param t - transaction
* @param gid - Version list identifier. Uniqueness guaranteed by the code
* creating this version list.
* @param args - args forwarded to constructor of item T (for
* creating the first Record (Version) in this VersionList.
*/
template <typename... Args>
VersionList(const tx::Transaction &t, storage::Gid gid, Args &&... args)
: gid_(gid) {
// TODO replace 'new' with something better
auto *v1 = new T(std::forward<Args>(args)...);
v1->mark_created(t);
head_ = v1;
}
VersionList() = delete;
VersionList(const VersionList &) = delete;
VersionList &operator=(const VersionList &) = delete;
// We do a lot of raw-pointer ops with VLists, and these ops assume that a
// VList's address identifies a vertex/edge absolutely and during it's whole
// lifteme. We also assume that the VList owner is the database and that
// ownership is also handled via raw pointers so this shouldn't be moved or
// move assigned.
VersionList(VersionList &&other) = delete;
VersionList &operator=(VersionList &&other) = delete;
~VersionList() { delete head_.load(); }
friend std::ostream &operator<<(std::ostream &stream,
const VersionList<T> &vlist) {
stream << "VersionList" << std::endl;
T *record = vlist.head_;
while (record != nullptr) {
stream << "-- " << *record << std::endl;
record = record->next();
}
return stream;
}
/**
* Garbage collects records that are not reachable/visible anymore.
*
* Relinks this version-list so that garbage collected records are no
* longer reachable through this version list.
* Visibility is defined in mvcc::Record::is_not_visible_from,
* to which the given `snapshot` is passed.
*
* This method is NOT thread-safe.
*
* @param snapshot - the GC snapshot. Consists of the oldest active
* transaction's snapshot, with that transaction's id appened as last.
* @param engine - transaction engine to use - we need it to check which
* records were commited and which weren't
* @return pair<status, to_delete>; status is true - If version list is empty
* after garbage collection. to_delete points to the newest record that is not
* visible anymore. If none exists to_delete will point to nullptr.
*/
std::pair<bool, T *> GcDeleted(const tx::Snapshot &snapshot,
const tx::Engine &engine) {
// nullptr
// |
// [v1] ... all of this gets deleted!
// |
// [v2] <------+ head_of_deletable_records
// | |
// [v3] <------+ oldest_visible_record
// | | Jump backwards until you find the oldest visible
// [VerList] ----+ record, or you reach the end of the list
//
T *head = head_;
T *current = head;
T *oldest_visible_record = nullptr;
while (current) {
// Populate hints only when needed to avoid excessive rpc calls on
// workers.
// snapshot.back() corresponds to the oldest active transaction,
// and this makes it set only hint bits when the creating or expiring
// transaction of a record is older than that)
current->populate_hints(engine, snapshot.back());
if (!current->is_not_visible_from(snapshot, engine))
oldest_visible_record = current;
current = current->next();
}
if (oldest_visible_record) {
T *head_of_deletable_records = oldest_visible_record->next();
// oldest_visible_record might be visible to some transaction but
// head_of_deletable_records is not and will never be visted by the find
// function and as such doesn't represent pointer invalidation
// race-condition risk.
oldest_visible_record->next(nullptr); // No transaction will look
// further than this record and
// that's why it's safe to set
// next to nullptr.
// Calling destructor of head_of_deletable_records will clean everything
// older than this record since they are called recursively.
return std::make_pair(false, head_of_deletable_records);
}
// This can happen only if the head points to a expired record. Since there
// is no visible records in this version_list we can remove it.
head_ = nullptr;
// This is safe to return as ready for deletion since we unlinked head
// above and this will only be deleted after the last active transaction
// ends.
return std::make_pair(true, head);
}
/**
* @brief - returns oldest record
* @return nullptr if none exist
*/
T *Oldest() {
T *r = head_;
while (r && r->next(std::memory_order_seq_cst))
r = r->next(std::memory_order_seq_cst);
return r;
}
T *find(const tx::Transaction &t) {
T *r = head_;
// nullptr
// |
// [v1] ...
// |
// [v2] <------+
// | |
// [v3] <------+
// | | Jump backwards until you find a first visible
// [VerList] ----+ version, or you reach the end of the list
//
while (r != nullptr && !r->visible(t))
r = r->next(std::memory_order_seq_cst);
return r;
}
/**
* Looks for and sets two versions. The 'old' version is the
* newest version that is visible by the current transaction+command,
* but has not been created by it. The 'new' version is the version
* that has been created by current transaction+command.
*
* It is possible that both, either or neither are found:
* - both are found when an existing record has been modified
* - only old is found when an existing record has not been modified
* - only new is found when the whole vlist was created
* - neither is found when for example the record has been deleted but not
* garbage collected yet
*
* @param t The transaction
*/
void find_set_old_new(const tx::Transaction &t, T **old_ref, T **new_ref) {
// assume that the sought old record is further down the list
// from new record, so that if we found old we can stop looking
*new_ref = nullptr;
*old_ref = head_;
while (*old_ref != nullptr && !(*old_ref)->visible(t)) {
if (!*new_ref && (*old_ref)->is_created_by(t)) *new_ref = *old_ref;
*old_ref = (*old_ref)->next(std::memory_order_seq_cst);
}
}
/**
* Looks for the first visible record seen by this transaction. If the current
* transaction has already created new record in the current command then that
* record is returned, else first older visible record is updated. New record
* becomes head of the version list and it is returned. There should always be
* older visible record when this update is called.
*
* @param t The transaction
*/
T *update(const tx::Transaction &t) {
DCHECK(head_ != nullptr) << "Head is nullptr on update.";
T *old_record = nullptr;
T *new_record = nullptr;
find_set_old_new(t, &old_record, &new_record);
// check if current transaction in current cmd has
// already updated version list
if (new_record) return new_record;
// check if we found any visible records
CHECK(old_record != nullptr) << "Updating nullptr record";
return update(old_record, t);
}
/** Makes the given record as being expired by the given transaction. */
void remove(T *record, const tx::Transaction &t) {
DCHECK(record != nullptr) << "Record is nullptr on removal.";
lock_and_validate(record, t);
record->mark_expired(t);
}
const storage::Gid gid_;
int64_t cypher_id() { return gid_.AsInt(); }
private:
void lock_and_validate(T *record, const tx::Transaction &t) {
DCHECK(record != nullptr) << "Record is nullptr on lock and validation.";
// take a lock on this node
t.TakeLock(lock_);
// if the record hasn't been deleted yet or the deleting transaction
// has aborted, it's ok to modify it
if (!record->tx().exp || !record->exp_committed(t.engine_)) return;
// if it committed, then we have a serialization conflict
throw SerializationError();
}
T *update(T *record, const tx::Transaction &t) {
DCHECK(record != nullptr) << "Record is nullptr on update.";
lock_and_validate(record, t);
// It could be done with unique_ptr but while this could mean memory
// leak on exception, unique_ptr could mean use after free. Memory
// leak is less dangerous.
auto *updated = record->CloneData();
updated->mark_created(t);
record->mark_expired(t);
// Updated version should point to the latest available version. Older
// versions that can be deleted will be removed during the GC phase.
updated->next(head_.load(), std::memory_order_seq_cst);
// Store the updated version as the first version point to by head.
head_.store(updated, std::memory_order_seq_cst);
return updated;
}
std::atomic<T *> head_{nullptr};
RecordLock lock_;
};
} // namespace mvcc

View File

@ -1,174 +0,0 @@
#include "storage/single_node_ha/record_accessor.hpp"
#include <glog/logging.h>
#include "database/single_node_ha/graph_db_accessor.hpp"
#include "durability/single_node_ha/state_delta.hpp"
#include "storage/single_node_ha/edge.hpp"
#include "storage/single_node_ha/vertex.hpp"
using database::StateDelta;
template <typename TRecord>
RecordAccessor<TRecord>::RecordAccessor(mvcc::VersionList<TRecord> *address,
database::GraphDbAccessor &db_accessor)
: db_accessor_(&db_accessor), address_(address) {}
template <typename TRecord>
PropertyValue RecordAccessor<TRecord>::PropsAt(storage::Property key) const {
return current().properties_.at(key);
}
template <>
void RecordAccessor<Vertex>::PropsSet(storage::Property key,
PropertyValue value) {
auto &dba = db_accessor();
auto delta = StateDelta::PropsSetVertex(dba.transaction_id(), gid(), key,
dba.PropertyName(key), value);
auto previous_value = PropsAt(key);
update().properties_.set(key, value);
dba.UpdateOnAddProperty(key, previous_value, value, *this, &update());
dba.sd_buffer()->Emplace(delta);
}
template <>
void RecordAccessor<Edge>::PropsSet(storage::Property key,
PropertyValue value) {
auto &dba = db_accessor();
auto delta = StateDelta::PropsSetEdge(dba.transaction_id(), gid(), key,
dba.PropertyName(key), value);
update().properties_.set(key, value);
dba.sd_buffer()->Emplace(delta);
}
template <>
void RecordAccessor<Vertex>::PropsErase(storage::Property key) {
auto &dba = db_accessor();
auto delta =
StateDelta::PropsSetVertex(dba.transaction_id(), gid(), key,
dba.PropertyName(key), PropertyValue());
auto previous_value = PropsAt(key);
update().properties_.set(key, PropertyValue());
dba.UpdateOnRemoveProperty(key, previous_value, *this, &update());
dba.sd_buffer()->Emplace(delta);
}
template <>
void RecordAccessor<Edge>::PropsErase(storage::Property key) {
auto &dba = db_accessor();
auto delta =
StateDelta::PropsSetEdge(dba.transaction_id(), gid(), key,
dba.PropertyName(key), PropertyValue());
update().properties_.set(key, PropertyValue());
dba.sd_buffer()->Emplace(delta);
}
template <typename TRecord>
void RecordAccessor<TRecord>::PropsClear() {
std::vector<storage::Property> to_remove;
for (const auto &kv : update().properties_) to_remove.emplace_back(kv.first);
for (const auto &prop : to_remove) {
PropsErase(prop);
}
}
template <typename TRecord>
const PropertyValueStore &RecordAccessor<TRecord>::Properties() const {
return current().properties_;
}
template <typename TRecord>
bool RecordAccessor<TRecord>::operator==(const RecordAccessor &other) const {
DCHECK(db_accessor_->transaction_id() == other.db_accessor_->transaction_id())
<< "Not in the same transaction.";
return address_ == other.address_;
}
template <typename TRecord>
database::GraphDbAccessor &RecordAccessor<TRecord>::db_accessor() const {
return *db_accessor_;
}
template <typename TRecord>
storage::Gid RecordAccessor<TRecord>::gid() const {
return address_->gid_;
}
template <typename TRecord>
typename mvcc::VersionList<TRecord> *RecordAccessor<TRecord>::address() const {
return address_;
}
template <typename TRecord>
RecordAccessor<TRecord> &RecordAccessor<TRecord>::SwitchNew() {
if (!new_) {
// if new_ is not set yet, look for it
// we can just Reconstruct the pointers, old_ will get initialized
// to the same value as it has now, and the amount of work is the
// same as just looking for a new_ record
if (!Reconstruct())
DLOG(FATAL)
<< "RecordAccessor::SwitchNew - accessor invalid after Reconstruct";
}
current_ = new_ ? new_ : old_;
return *this;
}
template <typename TRecord>
RecordAccessor<TRecord> &RecordAccessor<TRecord>::SwitchOld() {
current_ = old_ ? old_ : new_;
return *this;
}
template <typename TRecord>
bool RecordAccessor<TRecord>::Reconstruct() const {
auto &dba = db_accessor();
const auto &addr = address();
addr->find_set_old_new(dba.transaction(), &old_, &new_);
current_ = old_ ? old_ : new_;
return old_ != nullptr || new_ != nullptr;
}
template <typename TRecord>
TRecord &RecordAccessor<TRecord>::update() const {
auto &dba = db_accessor();
// Edges have lazily initialize mutable, versioned data (properties).
if (std::is_same<TRecord, Edge>::value && current_ == nullptr) {
bool reconstructed = Reconstruct();
DCHECK(reconstructed) << "Unable to initialize record";
}
const auto &t = dba.transaction();
if (!new_ && old_->is_expired_by(t))
throw RecordDeletedError();
else if (new_ && new_->is_expired_by(t))
throw RecordDeletedError();
if (new_) return *new_;
const auto &addr = address();
new_ = addr->update(dba.transaction());
DCHECK(new_ != nullptr) << "RecordAccessor.new_ is null after update";
return *new_;
}
template <typename TRecord>
int64_t RecordAccessor<TRecord>::CypherId() const {
return address()->cypher_id();
}
template <typename TRecord>
const TRecord &RecordAccessor<TRecord>::current() const {
// Edges have lazily initialize mutable, versioned data (properties).
if (std::is_same<TRecord, Edge>::value && current_ == nullptr) {
bool reconstructed = Reconstruct();
DCHECK(reconstructed) << "Unable to initialize record";
}
DCHECK(current_ != nullptr) << "RecordAccessor.current_ pointer is nullptr";
return *current_;
}
template class RecordAccessor<Vertex>;
template class RecordAccessor<Edge>;

View File

@ -1,204 +0,0 @@
/// @file
#pragma once
#include <glog/logging.h>
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/common/types/property_value.hpp"
#include "storage/common/types/property_value_store.hpp"
#include "storage/common/types/types.hpp"
namespace database {
class GraphDbAccessor;
struct StateDelta;
}; // namespace database
/**
* An accessor to a database record (an Edge or a Vertex).
*
* Exposes view and update functions to the client programmer.
* Assumes responsibility of doing all the relevant book-keeping
* (such as index updates etc).
*
* @tparam TRecord Type of record (MVCC Version) of the accessor.
*/
template <typename TRecord>
class RecordAccessor {
protected:
/**
* The database::GraphDbAccessor is friend to this accessor so it can
* operate on it's data (mvcc version-list and the record itself).
* This is legitimate because database::GraphDbAccessor creates
* RecordAccessors
* and is semantically their parent/owner. It is necessary because
* the database::GraphDbAccessor handles insertions and deletions, and these
* operations modify data intensively.
*/
friend database::GraphDbAccessor;
public:
/**
* @param address Address (local or global) of the Vertex/Edge of this
* accessor.
* @param db_accessor The DB accessor that "owns" this record accessor.
*/
RecordAccessor(mvcc::VersionList<TRecord> *address, database::GraphDbAccessor &db_accessor);
// this class is default copyable, movable and assignable
RecordAccessor(const RecordAccessor &other) = default;
RecordAccessor(RecordAccessor &&other) = default;
RecordAccessor &operator=(const RecordAccessor &other) = default;
RecordAccessor &operator=(RecordAccessor &&other) = default;
/** Gets the property for the given key. */
PropertyValue PropsAt(storage::Property key) const;
/** Sets a value on the record for the given property. */
void PropsSet(storage::Property key, PropertyValue value);
/** Erases the property for the given key. */
void PropsErase(storage::Property key);
/** Removes all the properties from this record. */
void PropsClear();
/** Returns the properties of this record. */
const PropertyValueStore &Properties() const;
bool operator==(const RecordAccessor &other) const;
bool operator!=(const RecordAccessor &other) const {
return !(*this == other);
}
/** Returns a GraphDB accessor of this record accessor. */
database::GraphDbAccessor &db_accessor() const;
/**
* Returns a globally-unique ID of this vertex or edge. Note that vertices
* and edges have separate ID domains, there can be a vertex with ID X and an
* edge with the same id.
*/
storage::Gid gid() const;
mvcc::VersionList<TRecord> *address() const;
/*
* Switches this record accessor to use the latest version visible to the
* current transaction+command. Possibly the one that was created by this
* transaction+command.
*
* @return A reference to this.
*/
RecordAccessor<TRecord> &SwitchNew();
/** Returns the new record pointer. */
TRecord *GetNew() const { return new_; }
/**
* Attempts to switch this accessor to use the latest version not updated by
* the current transaction+command. If that is not possible (vertex/edge was
* created by the current transaction/command), it does nothing (current
* remains pointing to the new version).
*
* @return A reference to this.
*/
RecordAccessor<TRecord> &SwitchOld();
/** Returns the old record pointer. */
TRecord *GetOld() const { return old_; }
/**
* Reconstructs the internal state of the record accessor so it uses the
* versions appropriate to this transaction+command.
*
* @return True if this accessor is valid after reconstruction. This means
* that at least one record pointer was found (either new_ or old_), possibly
* both.
*/
bool Reconstruct() const;
/**
* Ensures there is an updateable version of the record in the version_list,
* and that the `new_` pointer points to it. Returns a reference to that
* version.
*
* It is not legal to call this function on a Vertex/Edge that has been
* deleted in the current transaction+command.
*
* @throws RecordDeletedError
*/
TRecord &update() const;
/**
* Returns true if the given accessor is visible to the given transaction.
*
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
*/
bool Visible(const tx::Transaction &t, bool current_state) const {
return (old_ && !(current_state && old_->is_expired_by(t))) ||
(current_state && new_ && !new_->is_expired_by(t));
}
/**
* Returns Cypher Id of this record.
*/
int64_t CypherId() const;
/** Returns the current version (either new_ or old_) set on this
* RecordAccessor. */
const TRecord &current() const;
protected:
/**
* Pointer to the version (either old_ or new_) that READ operations
* in the accessor should take data from. Note that WRITE operations
* should always use new_.
*
* This pointer can be null if created by an accessor which lazily reads from
* mvcc.
*/
mutable TRecord *current_{nullptr};
private:
// The database accessor for which this record accessor is created
// Provides means of getting to the transaction and database functions.
// Immutable, set in the constructor and never changed.
database::GraphDbAccessor *db_accessor_;
mvcc::VersionList<TRecord> *address_;
/**
* Latest version which is visible to the current transaction+command
* but has not been created nor modified by the current transaction+command.
*
* Can be null only when the record itself (the version-list) has
* been created by the current transaction+command.
*/
mutable TRecord *old_{nullptr};
/**
* Version that has been modified (created or updated) by the current
* transaction+command.
*
* Can be null when the record has not been modified in the current
* transaction+command. It is also possible that the modification
* has happened, but this RecordAccessor does not know this. To
* ensure correctness, the `SwitchNew` function must check if this
* is null, and if it is it must check with the vlist_ if there is
* an update.
*/
mutable TRecord *new_{nullptr};
};
/** Error when trying to update a deleted record */
class RecordDeletedError : public utils::BasicException {
public:
RecordDeletedError()
: utils::BasicException(
"Can't update a record deleted in the current transaction+commad") {
}
};

View File

@ -1,47 +0,0 @@
#pragma once
#include "durability/single_node_ha/state_delta.hpp"
namespace storage {
class StateDeltaBuffer final {
public:
/// Inserts a new StateDelta in buffer.
void Emplace(const database::StateDelta &delta) {
tx::TransactionId tx_id = delta.transaction_id;
std::vector<database::StateDelta> *curr_buffer;
{
// We only need the lock when we're inserting a new key into the buffer.
std::lock_guard<std::mutex> lock(buffer_lock_);
curr_buffer = &buffer_[tx_id];
}
curr_buffer->emplace_back(delta);
}
/// Retrieves all buffered StateDeltas for a given transaction id.
/// If there are no such StateDeltas, the return vector is empty.
std::vector<database::StateDelta> GetDeltas(
const tx::TransactionId &tx_id) {
std::vector<database::StateDelta> *curr_buffer;
{
std::lock_guard<std::mutex> lock(buffer_lock_);
auto it = buffer_.find(tx_id);
if (it == buffer_.end()) return {};
curr_buffer = &it->second;
}
return *curr_buffer;
}
/// Deletes all buffered StateDeltas for a given transaction id.
void Erase(const tx::TransactionId &tx_id) {
std::lock_guard<std::mutex> lock(buffer_lock_);
buffer_.erase(tx_id);
}
private:
mutable std::mutex buffer_lock_;
std::unordered_map<tx::TransactionId, std::vector<database::StateDelta>>
buffer_;
};
} // namespace storage

View File

@ -1,102 +0,0 @@
#pragma once
#include <filesystem>
#include <optional>
#include "data_structures/concurrent/concurrent_map.hpp"
#include "kvstore/kvstore.hpp"
#include "storage/common/constraints/unique_constraints.hpp"
#include "storage/common/types/types.hpp"
#include "storage/single_node_ha/edge.hpp"
#include "storage/single_node_ha/indexes/key_index.hpp"
#include "storage/single_node_ha/indexes/label_property_index.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/single_node_ha/vertex.hpp"
#include "transactions/type.hpp"
namespace database {
class GraphDb;
};
namespace database {
/** A data structure containing the main data members of a graph database. */
class Storage {
public:
explicit Storage(const std::vector<std::string> &properties_on_disk)
: properties_on_disk_{properties_on_disk} {}
public:
~Storage() {
// Delete vertices and edges which weren't collected before, also deletes
// records inside version list
for (auto &id_vlist : vertices_.access()) delete id_vlist.second;
for (auto &id_vlist : edges_.access()) delete id_vlist.second;
}
Storage(const Storage &) = delete;
Storage(Storage &&) = delete;
Storage &operator=(const Storage &) = delete;
Storage &operator=(Storage &&) = delete;
storage::GidGenerator &VertexGenerator() { return vertex_generator_; }
storage::GidGenerator &EdgeGenerator() { return edge_generator_; }
LabelPropertyIndex &label_property_index() { return label_property_index_; }
/// Gets the local address for the given gid. Fails if not present.
template <typename TRecord>
mvcc::VersionList<TRecord> *LocalAddress(storage::Gid gid) const {
const auto &map = GetMap<TRecord>();
auto access = map.access();
auto found = access.find(gid);
CHECK(found != access.end())
<< "Failed to find "
<< (std::is_same<TRecord, Vertex>::value ? "vertex" : "edge")
<< " for gid: " << gid.AsUint();
return found->second;
}
/// Gets names of properties stored on disk
std::vector<std::string> &PropertiesOnDisk() { return properties_on_disk_; }
private:
friend class GraphDbAccessor;
// Needed for GraphDb::RefreshStat.
friend class GraphDb;
friend class StorageGc;
storage::GidGenerator vertex_generator_;
storage::GidGenerator edge_generator_;
// main storage for the graph
ConcurrentMap<storage::Gid, mvcc::VersionList<Vertex> *> vertices_;
ConcurrentMap<storage::Gid, mvcc::VersionList<Edge> *> edges_;
// indexes
KeyIndex<storage::Label, Vertex> labels_index_;
LabelPropertyIndex label_property_index_;
// unique constraints
storage::constraints::UniqueConstraints unique_constraints_;
std::vector<std::string> properties_on_disk_;
/// Gets the Vertex/Edge main storage map.
template <typename TRecord>
const ConcurrentMap<storage::Gid, mvcc::VersionList<TRecord> *> &GetMap()
const;
};
template <>
inline const ConcurrentMap<storage::Gid, mvcc::VersionList<Vertex> *>
&Storage::GetMap() const {
return vertices_;
}
template <>
inline const ConcurrentMap<storage::Gid, mvcc::VersionList<Edge> *>
&Storage::GetMap() const {
return edges_;
}
} // namespace database

View File

@ -1,174 +0,0 @@
#pragma once
#include <chrono>
#include <queue>
#include "data_structures/concurrent/concurrent_map.hpp"
#include "raft/raft_server.hpp"
#include "storage/common/types/types.hpp"
#include "storage/single_node_ha/deferred_deleter.hpp"
#include "storage/single_node_ha/edge.hpp"
#include "storage/single_node_ha/garbage_collector.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/single_node_ha/storage.hpp"
#include "storage/single_node_ha/vertex.hpp"
#include "transactions/single_node_ha/engine.hpp"
#include "utils/exceptions.hpp"
#include "utils/scheduler.hpp"
#include "utils/timer.hpp"
namespace database {
/** Garbage collection capabilities for database::Storage. Extracted into a
* separate class for better code organization, and because the GC requires a
* tx::Engine, while the Storage itself can exist without it. Even though, a
* database::Storage is always acompanied by a Gc.
*/
class StorageGc {
template <typename TRecord>
class MvccDeleter {
using VlistT = mvcc::VersionList<TRecord>;
public:
explicit MvccDeleter(ConcurrentMap<storage::Gid, VlistT *> &collection)
: gc_(collection, record_deleter_, version_list_deleter_) {}
DeferredDeleter<TRecord> record_deleter_;
DeferredDeleter<mvcc::VersionList<TRecord>> version_list_deleter_;
GarbageCollector<ConcurrentMap<storage::Gid, VlistT *>, TRecord> gc_;
};
public:
/** Creates a garbage collector for the given storage that uses the given
* tx::Engine. If `pause_sec` is greater then zero, then GC gets triggered
* periodically. */
StorageGc(Storage &storage, tx::Engine &tx_engine,
raft::RaftServer *raft_server, int pause_sec)
: tx_engine_(tx_engine),
raft_server_(raft_server),
storage_(storage),
vertices_(storage.vertices_),
edges_(storage.edges_) {
if (pause_sec > 0)
scheduler_.Run(
"Storage GC", std::chrono::seconds(pause_sec), [this] {
try {
CollectGarbage();
} catch (const utils::BasicException &e) {
DLOG(WARNING)
<< "Couldn't perform storage garbage collection due to: "
<< e.what();
}
});
}
~StorageGc() {
// We have to stop the scheduler before destroying this class.
scheduler_.Stop();
edges_.record_deleter_.FreeExpiredObjects(tx::Transaction::MaxId());
vertices_.record_deleter_.FreeExpiredObjects(tx::Transaction::MaxId());
edges_.version_list_deleter_.FreeExpiredObjects(tx::Transaction::MaxId());
vertices_.version_list_deleter_.FreeExpiredObjects(
tx::Transaction::MaxId());
}
StorageGc(const StorageGc &) = delete;
StorageGc(StorageGc &&) = delete;
StorageGc &operator=(const StorageGc &) = delete;
StorageGc &operator=(StorageGc &&) = delete;
void CollectLogGarbage(tx::TransactionId oldest_active) {
auto safe_to_delete = GetClogSafeTransaction(oldest_active);
if (safe_to_delete) {
tx_engine_.GarbageCollectCommitLog(*safe_to_delete);
}
}
void CollectGarbage() {
// main garbage collection logic
// see wiki documentation for logic explanation
VLOG(21) << "Garbage collector started";
const auto snapshot_gc = tx_engine_.GlobalGcSnapshot();
{
// This can be run concurrently
utils::Timer x;
vertices_.gc_.Run(snapshot_gc, tx_engine_);
edges_.gc_.Run(snapshot_gc, tx_engine_);
storage_.unique_constraints_.Refresh(snapshot_gc, tx_engine_);
VLOG(21) << "Garbage collector mvcc phase time: " << x.Elapsed().count();
}
// This has to be run sequentially after gc because gc modifies
// version_lists and changes the oldest visible record, on which Refresh
// depends.
{
// This can be run concurrently
utils::Timer x;
storage_.labels_index_.Refresh(snapshot_gc, tx_engine_);
storage_.label_property_index_.Refresh(snapshot_gc, tx_engine_);
VLOG(21) << "Garbage collector index phase time: " << x.Elapsed().count();
}
{
// We free expired objects with snapshot.back(), which is
// the ID of the oldest active transaction (or next active, if there
// are no currently active). That's legal because that was the
// last possible transaction that could have obtained pointers
// to those records. New snapshot can be used, different than one used for
// first two phases of gc.
utils::Timer x;
const auto snapshot_gc = tx_engine_.GlobalGcSnapshot();
edges_.record_deleter_.FreeExpiredObjects(snapshot_gc.back());
vertices_.record_deleter_.FreeExpiredObjects(snapshot_gc.back());
edges_.version_list_deleter_.FreeExpiredObjects(snapshot_gc.back());
vertices_.version_list_deleter_.FreeExpiredObjects(snapshot_gc.back());
VLOG(21) << "Garbage collector deferred deletion phase time: "
<< x.Elapsed().count();
}
CollectLogGarbage(snapshot_gc.back());
gc_txid_ranges_.emplace(snapshot_gc.back(), tx_engine_.GlobalLast());
VLOG(21) << "gc snapshot: " << snapshot_gc;
VLOG(21) << "edge_record_deleter_ size: " << edges_.record_deleter_.Count();
VLOG(21) << "vertex record deleter_ size: "
<< vertices_.record_deleter_.Count();
VLOG(21) << "edge_version_list_deleter_ size: "
<< edges_.version_list_deleter_.Count();
VLOG(21) << "vertex_version_list_deleter_ size: "
<< vertices_.version_list_deleter_.Count();
VLOG(21) << "vertices_ size: " << storage_.vertices_.access().size();
VLOG(21) << "edges_ size: " << storage_.edges_.access().size();
VLOG(21) << "Garbage collector finished.";
}
protected:
// Find the largest transaction from which everything older is safe to
// delete, ones for which the hints have been set in the gc phase, and no
// alive transaction from the time before the hints were set is still alive
// (otherwise that transaction could still be waiting for a resolution of
// the query to the commit log about some old transaction)
std::optional<tx::TransactionId> GetClogSafeTransaction(
tx::TransactionId oldest_active) {
std::optional<tx::TransactionId> safe_to_delete;
while (!gc_txid_ranges_.empty() &&
gc_txid_ranges_.front().second < oldest_active) {
safe_to_delete = gc_txid_ranges_.front().first;
gc_txid_ranges_.pop();
}
return safe_to_delete;
}
tx::Engine &tx_engine_;
raft::RaftServer *raft_server_;
utils::Scheduler scheduler_;
private:
Storage &storage_;
MvccDeleter<Vertex> vertices_;
MvccDeleter<Edge> edges_;
// History of <oldest active transaction, next transaction to be ran> ranges
// that gc operated on at some previous time - used to clear commit log
std::queue<std::pair<tx::TransactionId, tx::TransactionId>> gc_txid_ranges_;
};
} // namespace database

View File

@ -1,28 +0,0 @@
#pragma once
#include "storage/single_node_ha/mvcc/record.hpp"
#include "storage/single_node_ha/mvcc/version_list.hpp"
#include "storage/common/types/property_value_store.hpp"
#include "storage/common/types/types.hpp"
#include "storage/single_node_ha/edges.hpp"
class Vertex : public mvcc::Record<Vertex> {
public:
Vertex() = default;
// Returns new Vertex with copy of data stored in this Vertex, but without
// copying superclass' members.
Vertex *CloneData() { return new Vertex(*this); }
Edges out_;
Edges in_;
std::vector<storage::Label> labels_;
PropertyValueStore properties_;
private:
Vertex(const Vertex &other)
: mvcc::Record<Vertex>(),
out_(other.out_),
in_(other.in_),
labels_(other.labels_),
properties_(other.properties_) {}
};

View File

@ -1,86 +0,0 @@
#include "storage/single_node_ha/vertex_accessor.hpp"
#include <algorithm>
#include "database/single_node_ha/graph_db_accessor.hpp"
#include "durability/single_node_ha/state_delta.hpp"
#include "utils/algorithm.hpp"
VertexAccessor::VertexAccessor(mvcc::VersionList<Vertex> *address,
database::GraphDbAccessor &db_accessor)
: RecordAccessor(address, db_accessor) {
Reconstruct();
}
size_t VertexAccessor::out_degree() const { return current().out_.size(); }
size_t VertexAccessor::in_degree() const { return current().in_.size(); }
void VertexAccessor::add_label(storage::Label label) {
auto &dba = db_accessor();
auto delta = database::StateDelta::AddLabel(dba.transaction_id(), gid(),
label, dba.LabelName(label));
Vertex &vertex = update();
// not a duplicate label, add it
if (!utils::Contains(vertex.labels_, label)) {
vertex.labels_.emplace_back(label);
dba.sd_buffer()->Emplace(delta);
dba.UpdateOnAddLabel(label, *this, &vertex);
}
}
void VertexAccessor::remove_label(storage::Label label) {
auto &dba = db_accessor();
auto delta = database::StateDelta::RemoveLabel(dba.transaction_id(), gid(),
label, dba.LabelName(label));
Vertex &vertex = update();
if (utils::Contains(vertex.labels_, label)) {
auto &labels = vertex.labels_;
auto found = std::find(labels.begin(), labels.end(), delta.label);
std::swap(*found, labels.back());
labels.pop_back();
dba.sd_buffer()->Emplace(delta);
dba.UpdateOnRemoveLabel(label, *this);
}
}
bool VertexAccessor::has_label(storage::Label label) const {
auto &labels = this->current().labels_;
return std::find(labels.begin(), labels.end(), label) != labels.end();
}
const std::vector<storage::Label> &VertexAccessor::labels() const {
return this->current().labels_;
}
void VertexAccessor::RemoveOutEdge(mvcc::VersionList<Edge> *edge) {
auto &dba = db_accessor();
SwitchNew();
if (current().is_expired_by(dba.transaction())) return;
update().out_.RemoveEdge(edge);
}
void VertexAccessor::RemoveInEdge(mvcc::VersionList<Edge> *edge) {
auto &dba = db_accessor();
SwitchNew();
if (current().is_expired_by(dba.transaction())) return;
update().in_.RemoveEdge(edge);
}
std::ostream &operator<<(std::ostream &os, const VertexAccessor &va) {
os << "V(";
utils::PrintIterable(os, va.labels(), ":", [&](auto &stream, auto label) {
stream << va.db_accessor().LabelName(label);
});
os << " {";
utils::PrintIterable(os, va.Properties(), ", ",
[&](auto &stream, const auto &pair) {
stream << va.db_accessor().PropertyName(pair.first)
<< ": " << pair.second;
});
return os << "})";
}

View File

@ -1,158 +0,0 @@
#pragma once
#include <limits>
#include <set>
#include <vector>
#include <cppitertools/chain.hpp>
#include <cppitertools/imap.hpp>
#include "storage/single_node_ha/edge_accessor.hpp"
#include "storage/single_node_ha/record_accessor.hpp"
#include "storage/single_node_ha/vertex.hpp"
#include "utils/algorithm.hpp"
/**
* Provides ways for the client programmer (i.e. code generated
* by the compiler) to interact with a Vertex.
*
* This class indirectly inherits MVCC data structures and
* takes care of MVCC versioning.
*/
class VertexAccessor final : public RecordAccessor<Vertex> {
// Helper function for creating an iterator over edges.
// @param begin - begin iterator
// @param end - end iterator
// @param from - if true specifies that the vertex represents `from` part of
// the edge, otherwise it specifies `to` part of the edge
// @param vertex - one endpoint of every edge
// @param db_accessor - database accessor
// @return - Iterator over EdgeAccessors
template <typename TIterator>
static inline auto MakeAccessorIterator(
TIterator &&begin, TIterator &&end, bool from,
mvcc::VersionList<Vertex> *vertex,
database::GraphDbAccessor &db_accessor) {
return iter::imap(
[from, vertex, &db_accessor](auto &edges_element) {
if (from) {
return EdgeAccessor(edges_element.edge, db_accessor, vertex,
edges_element.vertex, edges_element.edge_type);
} else {
return EdgeAccessor(edges_element.edge, db_accessor,
edges_element.vertex, vertex,
edges_element.edge_type);
}
},
utils::Iterable<TIterator>(std::forward<TIterator>(begin),
std::forward<TIterator>(end)));
}
public:
VertexAccessor(mvcc::VersionList<Vertex> *address,
database::GraphDbAccessor &db_accessor);
/** Returns the number of outgoing edges. */
size_t out_degree() const;
/** Returns the number of incoming edges. */
size_t in_degree() const;
/** Adds a label to the Vertex. If the Vertex already has that label the call
* has no effect. */
void add_label(storage::Label label);
/** Removes a label from the Vertex. */
void remove_label(storage::Label label);
/** Indicates if the Vertex has the given label. */
bool has_label(storage::Label label) const;
/** Returns all the Labels of the Vertex. */
const std::vector<storage::Label> &labels() const;
/** Returns EdgeAccessors for all incoming edges. */
auto in() const {
return MakeAccessorIterator(current().in_.begin(), current().in_.end(),
false, address(), db_accessor());
}
/**
* Returns EdgeAccessors for all incoming edges.
*
* @param dest - The destination vertex filter.
* @param edge_types - Edge types filter. At least one be matched. If nullptr
* or empty, the parameter is ignored.
*/
auto in(const VertexAccessor &dest,
const std::vector<storage::EdgeType> *edge_types = nullptr) const {
return MakeAccessorIterator(current().in_.begin(dest.address(), edge_types),
current().in_.end(), false, address(),
db_accessor());
}
/**
* Returns EdgeAccessors for all incoming edges.
*
* @param edge_types - Edge types filter. At least one be matched. If nullptr
* or empty, the parameter is ignored.
*/
auto in(const std::vector<storage::EdgeType> *edge_types) const {
return MakeAccessorIterator(current().in_.begin(nullptr, edge_types),
current().in_.end(), false, address(),
db_accessor());
}
/** Returns EdgeAccessors for all outgoing edges. */
auto out() const {
return MakeAccessorIterator(current().out_.begin(), current().out_.end(),
true, address(), db_accessor());
}
/**
* Returns EdgeAccessors for all outgoing edges whose destination is the given
* vertex.
*
* @param dest - The destination vertex filter.
* @param edge_types - Edge types filter. At least one be matched. If nullptr
* or empty, the parameter is ignored.
*/
auto out(const VertexAccessor &dest,
const std::vector<storage::EdgeType> *edge_types = nullptr) const {
return MakeAccessorIterator(
current().out_.begin(dest.address(), edge_types), current().out_.end(),
true, address(), db_accessor());
}
/**
* Returns EdgeAccessors for all outgoing edges.
*
* @param edge_types - Edge types filter. At least one be matched. If nullptr
* or empty, the parameter is ignored.
*/
auto out(const std::vector<storage::EdgeType> *edge_types) const {
return MakeAccessorIterator(current().out_.begin(nullptr, edge_types),
current().out_.end(), true, address(),
db_accessor());
}
/** Removes the given edge from the outgoing edges of this vertex. Note that
* this operation should always be accompanied by the removal of the edge from
* the incoming edges on the other side and edge deletion. */
void RemoveOutEdge(mvcc::VersionList<Edge> *edge);
/** Removes the given edge from the incoming edges of this vertex. Note that
* this operation should always be accompanied by the removal of the edge from
* the outgoing edges on the other side and edge deletion. */
void RemoveInEdge(mvcc::VersionList<Edge> *edge);
};
std::ostream &operator<<(std::ostream &, const VertexAccessor &);
// hash function for the vertex accessor
namespace std {
template <>
struct hash<VertexAccessor> {
size_t operator()(const VertexAccessor &v) const { return v.gid().AsUint(); };
};
} // namespace std

View File

@ -1,7 +0,0 @@
#pragma once
#ifdef MG_SINGLE_NODE_HA
#include "storage/single_node_ha/vertex_accessor.hpp"
#endif
// TODO: write documentation for the interface here!

View File

@ -1,80 +0,0 @@
#pragma once
#include "data_structures/bitset/dynamic_bitset.hpp"
#include "transactions/type.hpp"
namespace tx {
// This class is lock free. There is no need to acquire any lock when accessing
// this class and this class doesn't acquire any lock on method calls.
class CommitLog final {
public:
static constexpr int kBitsetBlockSize = 32768;
CommitLog() = default;
CommitLog(const CommitLog &) = delete;
CommitLog(CommitLog &&) = delete;
CommitLog &operator=(const CommitLog &) = delete;
CommitLog &operator=(CommitLog &&) = delete;
bool is_active(TransactionId id) const {
return fetch_info(id).is_active();
}
bool is_committed(TransactionId id) const {
return fetch_info(id).is_committed();
}
void set_committed(TransactionId id) { log.set(2 * id); }
bool is_aborted(TransactionId id) const {
return fetch_info(id).is_aborted();
}
void set_aborted(TransactionId id) { log.set(2 * id + 1); }
// Clears the commit log from bits associated with transactions with an id
// lower than `id`.
void garbage_collect_older(TransactionId id) { log.delete_prefix(2 * id); }
class Info final {
public:
Info() {} // Needed for serialization.
enum Status {
ACTIVE = 0, // 00
COMMITTED = 1, // 01
ABORTED = 2, // 10
};
explicit Info(uint8_t flags) {
if (flags & ABORTED) {
flags_ = ABORTED;
} else if (flags & COMMITTED) {
flags_ = COMMITTED;
} else {
flags_ = ACTIVE;
}
}
bool is_active() const { return flags_ == ACTIVE; }
bool is_committed() const {
if (flags_ & ABORTED) return false;
return flags_ & COMMITTED;
}
bool is_aborted() const { return flags_ & ABORTED; }
operator uint8_t() const { return flags_; }
private:
uint8_t flags_{0};
};
Info fetch_info(TransactionId id) const { return Info{log.at(2 * id, 2)}; }
private:
DynamicBitset<uint8_t, kBitsetBlockSize> log;
};
} // namespace tx

View File

@ -1,5 +0,0 @@
#pragma once
#ifdef MG_SINGLE_NODE_HA
#include "transactions/single_node_ha/engine.hpp"
#endif

View File

@ -1,85 +0,0 @@
#pragma once
#include <memory>
#include <mutex>
#include <vector>
#include "glog/logging.h"
#include "storage/common/locking/lock_status.hpp"
#include "storage/common/locking/record_lock.hpp"
#include "transactions/type.hpp"
#include "utils/spin_lock.hpp"
namespace tx {
class Engine;
class Transaction;
class LockStore {
class LockHolder {
public:
LockHolder() = default;
/// @throw utils::LockTimeoutException
LockHolder(RecordLock *lock, const Transaction &tx, tx::Engine &engine)
: lock_(lock) {
DCHECK(lock != nullptr) << "Lock is nullptr.";
auto status = lock_->Lock(tx, engine);
if (status != LockStatus::Acquired) {
lock_ = nullptr;
}
}
LockHolder(const LockHolder &) = delete;
LockHolder &operator=(const LockHolder &) = delete;
LockHolder(LockHolder &&other) : lock_(other.lock_) {
other.lock_ = nullptr;
}
LockHolder &operator=(LockHolder &&other) {
if (this == &other) return *this;
lock_ = other.lock_;
other.lock_ = nullptr;
return *this;
}
~LockHolder() {
if (lock_ != nullptr) {
lock_->Unlock();
}
}
bool active() const { return lock_ != nullptr; }
private:
RecordLock *lock_{nullptr};
};
public:
/// @throw utils::LockTimeoutException
void Take(RecordLock *lock, const tx::Transaction &tx, tx::Engine &engine) {
// Creating a lock holder locks the version list to the given transaction.
// Note that it's an op that can take a long time (if there are multiple
// transactions trying to lock.
LockHolder holder{lock, tx, engine};
// This guard prevents the same transaction from concurrent modificaton of
// locks_. This can only happen in distributed memgraph, when there are
// multiple edits coming to the same worker in the same transaction at the
// same time. IMPORTANT: This guard must come after LockHolder construction,
// as that potentially takes a long time and this guard only needs to
// protect locks_ update.
std::lock_guard<utils::SpinLock> guard{locks_lock_};
locks_.emplace_back(std::move(holder));
if (!locks_.back().active()) {
locks_.pop_back();
}
}
private:
utils::SpinLock locks_lock_;
std::vector<LockHolder> locks_;
};
} // namespace tx

View File

@ -1,305 +0,0 @@
#include "transactions/single_node_ha/engine.hpp"
#include <limits>
#include <mutex>
#include "glog/logging.h"
#include "durability/single_node_ha/state_delta.hpp"
#include "raft/exceptions.hpp"
namespace tx {
Engine::Engine(raft::RaftInterface *raft,
storage::StateDeltaBuffer *delta_buffer)
: clog_(std::make_unique<CommitLog>()),
raft_(raft),
delta_buffer_(delta_buffer) {
CHECK(raft) << "Raft can't be nullptr in HA";
CHECK(delta_buffer) << "State delta buffer can't be nullptr in HA";
}
Transaction *Engine::Begin() {
VLOG(11) << "[Tx] Starting transaction " << counter_ + 1;
std::lock_guard<utils::SpinLock> guard(lock_);
if (!accepting_transactions_.load() || !replication_errors_.empty())
throw TransactionEngineError(
"The transaction engine currently isn't accepting new transactions.");
return BeginTransaction(false);
}
Transaction *Engine::BeginBlocking(std::optional<TransactionId> parent_tx) {
Snapshot wait_for_txs;
{
std::lock_guard<utils::SpinLock> guard(lock_);
if (!accepting_transactions_.load() || !replication_errors_.empty())
throw TransactionEngineError(
"The transaction engine currently isn't accepting new transactions.");
// Block the engine from accepting new transactions.
accepting_transactions_.store(false);
// Set active transactions to abort ASAP.
for (auto transaction : active_) {
store_.find(transaction)->second->set_should_abort();
}
wait_for_txs = active_;
}
// Wait for all active transactions except the parent (optional) and ourselves
// to end.
for (auto id : wait_for_txs) {
if (parent_tx && *parent_tx == id) continue;
while (Info(id).is_active()) {
// TODO reconsider this constant, currently rule-of-thumb chosen
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
}
// Only after all transactions have finished, start the blocking transaction.
std::lock_guard<utils::SpinLock> guard(lock_);
return BeginTransaction(true);
}
CommandId Engine::Advance(TransactionId id) {
std::lock_guard<utils::SpinLock> guard(lock_);
auto it = store_.find(id);
DCHECK(it != store_.end())
<< "Transaction::advance on non-existing transaction";
return it->second.get()->AdvanceCommand();
}
CommandId Engine::UpdateCommand(TransactionId id) {
std::lock_guard<utils::SpinLock> guard(lock_);
auto it = store_.find(id);
DCHECK(it != store_.end())
<< "Transaction::advance on non-existing transaction";
return it->second->cid();
}
void Engine::Commit(const Transaction &t) {
VLOG(11) << "[Tx] Committing transaction " << t.id_;
delta_buffer_->Emplace(database::StateDelta::TxCommit(t.id_));
auto deltas = delta_buffer_->GetDeltas(t.id_);
// If we have only two state deltas in our transaction, that means we are
// dealing with a read-only transaction which does not need to be replicated
// throughout the cluster, so we simply commit it in our storage.
//
// Also, when the current server is not in the leader mode, the following
// holds:
//
// 1) In CANDIDATE mode we need to be able to commit because Raft is
// initialzed in that mode and needs to perform recovery.
//
// 2) In FOLLOWER mode, Raft will only try to apply state deltas from logs
// that are behind the current commit index and are therefore safe to
// apply.
if (deltas.size() == 2 || !raft_->IsLeader()) {
delta_buffer_->Erase(t.id_);
std::lock_guard<utils::SpinLock> guard(lock_);
clog_->set_committed(t.id_);
active_.remove(t.id_);
store_.erase(store_.find(t.id_));
if (t.blocking()) {
accepting_transactions_.store(true);
}
return;
}
auto log_entry_status = raft_->Emplace(deltas);
// Log Entry was not successfully emplaced and the transaction should be
// aborted
if (!log_entry_status) {
Abort(t);
return;
}
// It is important to note the following situation. If our cluster ends up
// with a network partition where the current leader can't communicate with
// the majority of the peers, and the client is still sending queries to it,
// all of the transaction will end up waiting here until the network
// partition is resolved. The problem that can occur afterwards is bad.
// When the machine transitions from leader to follower mode,
// `ReplicationInfo` method will start returning `is_replicated=true`. This
// might lead to a problem where we suddenly want to alter the state of the
// transaction engine that isn't valid anymore, because the current machine
// isn't the leader anymore. This is all handled in the `Transition` method
// where once the transition from leader to follower occurs, the mode will
// be set to follower first, then the `Reset` method on the transaction
// engine will wait for all transactions to finish, and even though we
// change the transaction engine state here, the engine will perform a
// `Reset` and start recovering from zero, and the invalid changes won't
// matter.
// Wait for Raft to receive confirmation from the majority of followers.
while (true) {
try {
if (raft_->SafeToCommit(log_entry_status->term_id,
log_entry_status->log_index))
break;
} catch (const raft::ReplicationTimeoutException &e) {
std::lock_guard<utils::SpinLock> guard(lock_);
if (replication_errors_.insert(t.id_).second) {
LOG(WARNING) << e.what();
}
}
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
std::unique_lock<std::mutex> raft_lock(raft_->WithLock(), std::defer_lock);
// We need to acquire the Raft lock so we don't end up racing with a Raft
// thread that can reset the engine state. If we can't acquire the lock, and
// we end up with reseting the engine, we throw
// UnexpectedLeaderChangeException.
while (true) {
if (raft_lock.try_lock()) {
break;
}
// This is the case when we've lost our leader status due to another peer
// requesting election.
if (reset_active_.load()) throw raft::UnexpectedLeaderChangeException();
// This is the case when we're shutting down and we're no longer a valid
// leader. `SafeToCommit` will throw `RaftShutdownException` if the
// transaction wasn't replicated and the client will receive a negative
// response. Otherwise, we'll end up here, and since the transaction was
// replciated, we need to inform the client that the query succeeded.
if (!raft_->IsLeader()) break;
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
delta_buffer_->Erase(t.id_);
std::lock_guard<utils::SpinLock> guard(lock_);
replication_errors_.erase(t.id_);
clog_->set_committed(t.id_);
active_.remove(t.id_);
store_.erase(store_.find(t.id_));
if (t.blocking()) {
accepting_transactions_.store(true);
}
}
void Engine::Abort(const Transaction &t) {
VLOG(11) << "[Tx] Aborting transaction " << t.id_;
delta_buffer_->Erase(t.id_);
std::lock_guard<utils::SpinLock> guard(lock_);
clog_->set_aborted(t.id_);
active_.remove(t.id_);
store_.erase(store_.find(t.id_));
if (t.blocking()) {
accepting_transactions_.store(true);
}
}
CommitLog::Info Engine::Info(TransactionId tx) const {
return clog_->fetch_info(tx);
}
Snapshot Engine::GlobalGcSnapshot() {
std::lock_guard<utils::SpinLock> guard(lock_);
// No active transactions.
if (active_.size() == 0) {
auto snapshot_copy = active_;
snapshot_copy.insert(counter_ + 1);
return snapshot_copy;
}
// There are active transactions.
auto snapshot_copy = store_.find(active_.front())->second->snapshot();
snapshot_copy.insert(active_.front());
return snapshot_copy;
}
Snapshot Engine::GlobalActiveTransactions() {
std::lock_guard<utils::SpinLock> guard(lock_);
Snapshot active_transactions = active_;
return active_transactions;
}
TransactionId Engine::LocalLast() const {
std::lock_guard<utils::SpinLock> guard(lock_);
return counter_;
}
TransactionId Engine::GlobalLast() const { return LocalLast(); }
TransactionId Engine::LocalOldestActive() const {
std::lock_guard<utils::SpinLock> guard(lock_);
return active_.empty() ? counter_ + 1 : active_.front();
}
void Engine::GarbageCollectCommitLog(TransactionId tx_id) {
clog_->garbage_collect_older(tx_id);
}
void Engine::LocalForEachActiveTransaction(
std::function<void(Transaction &)> f) {
std::lock_guard<utils::SpinLock> guard(lock_);
for (auto transaction : active_) {
f(*store_.find(transaction)->second);
}
}
Transaction *Engine::RunningTransaction(TransactionId tx_id) {
std::lock_guard<utils::SpinLock> guard(lock_);
auto found = store_.find(tx_id);
CHECK(found != store_.end())
<< "Can't return snapshot for an inactive transaction";
return found->second.get();
}
void Engine::Reset() {
Snapshot wait_for_txs;
{
std::lock_guard<utils::SpinLock> guard(lock_);
// Block the engine from accepting new transactions.
accepting_transactions_.store(false);
// Set active transactions to abort ASAP.
for (auto transaction : active_) {
store_.find(transaction)->second->set_should_abort();
}
wait_for_txs = active_;
reset_active_.store(true);
}
// Wait for all active transactions to end.
for (auto id : wait_for_txs) {
while (Info(id).is_active()) {
// TODO reconsider this constant, currently rule-of-thumb chosen
std::this_thread::sleep_for(std::chrono::microseconds(100));
}
}
// Only after all transactions have finished, reset the engine.
std::lock_guard<utils::SpinLock> guard(lock_);
counter_ = 0;
replication_errors_.clear();
store_.clear();
active_.clear();
{
clog_ = nullptr;
clog_ = std::make_unique<CommitLog>();
}
accepting_transactions_.store(true);
reset_active_.store(false);
}
Transaction *Engine::BeginTransaction(bool blocking) {
TransactionId id{++counter_};
Transaction *t = new Transaction(id, active_, *this, blocking);
active_.insert(id);
store_.emplace(id, t);
delta_buffer_->Emplace(database::StateDelta::TxBegin(id));
return t;
}
} // namespace tx

View File

@ -1,88 +0,0 @@
/// @file
#pragma once
#include <atomic>
#include <optional>
#include <unordered_map>
#include <unordered_set>
#include "raft/raft_interface.hpp"
#include "storage/single_node_ha/state_delta_buffer.hpp"
#include "transactions/commit_log.hpp"
#include "transactions/transaction.hpp"
#include "utils/spin_lock.hpp"
namespace tx {
class TransactionEngineError : public utils::BasicException {
using utils::BasicException::BasicException;
};
/// High availability single node transaction engine.
///
/// Requires RaftInterface where it stores StateDeltas containing transaction
/// information needed for raft followers when replicating logs.
class Engine final {
public:
Engine(raft::RaftInterface *raft, storage::StateDeltaBuffer *delta_buffer);
Engine(const Engine &) = delete;
Engine(Engine &&) = delete;
Engine &operator=(const Engine &) = delete;
Engine &operator=(Engine &&) = delete;
Transaction *Begin();
/// Blocking transactions are used when we can't allow any other transaction
/// to run (besides this one). This is the reason why this transactions blocks
/// the engine from creating new transactions and waits for the existing ones
/// to finish.
Transaction *BeginBlocking(std::optional<TransactionId> parent_tx);
CommandId Advance(TransactionId id);
CommandId UpdateCommand(TransactionId id);
void Commit(const Transaction &t);
void Abort(const Transaction &t);
CommitLog::Info Info(TransactionId tx) const;
Snapshot GlobalGcSnapshot();
Snapshot GlobalActiveTransactions();
TransactionId GlobalLast() const;
TransactionId LocalLast() const;
TransactionId LocalOldestActive() const;
void LocalForEachActiveTransaction(std::function<void(Transaction &)> f);
Transaction *RunningTransaction(TransactionId tx_id);
void GarbageCollectCommitLog(TransactionId tx_id);
auto &local_lock_graph() { return local_lock_graph_; }
const auto &local_lock_graph() const { return local_lock_graph_; }
/// Reset the internal state of the engine. Use with caution as this will
/// block the engine from receiving any new transaction and will hint all
/// transactions to abort and will wait for them to finish before reseting
/// engines internal state.
void Reset();
private:
// Map lock dependencies. Each entry maps (tx_that_wants_lock,
// tx_that_holds_lock). Used for local deadlock resolution.
// TODO consider global deadlock resolution.
ConcurrentMap<TransactionId, TransactionId> local_lock_graph_;
TransactionId counter_{0};
std::unique_ptr<CommitLog> clog_{nullptr};
std::unordered_map<TransactionId, std::unique_ptr<Transaction>> store_;
Snapshot active_;
mutable utils::SpinLock lock_;
raft::RaftInterface *raft_{nullptr};
storage::StateDeltaBuffer *delta_buffer_{nullptr};
std::atomic<bool> accepting_transactions_{true};
std::atomic<bool> reset_active_{false};
// Keep track of transaction that experienced a replication error.
// While there is a replication error known to the engine, the engine won't
// accept new transactions.
std::unordered_set<TransactionId> replication_errors_;
// Helper method for transaction begin.
Transaction *BeginTransaction(bool blocking);
};
} // namespace tx

View File

@ -1,98 +0,0 @@
/// @file
#pragma once
#include <algorithm>
#include <iostream>
#include <vector>
#include "glog/logging.h"
#include "transactions/type.hpp"
#include "utils/algorithm.hpp"
namespace tx {
/// Ascendingly sorted collection of transaction ids.
///
/// Represents the transactions that were active at
/// some point in the discrete transaction time.
class Snapshot final {
public:
Snapshot() = default;
Snapshot(std::vector<TransactionId> &&active)
: transaction_ids_(std::move(active)) {}
Snapshot(const Snapshot &) = default;
Snapshot(Snapshot &&) = default;
Snapshot &operator=(const Snapshot &) = default;
Snapshot &operator=(Snapshot &&) = default;
/// Returns true if this snapshot contains the given
/// transaction id.
///
/// @param xid - The transcation id in question
bool contains(TransactionId id) const {
return std::binary_search(transaction_ids_.begin(), transaction_ids_.end(),
id);
}
/// Adds the given transaction id to the end of this Snapshot.
/// The given id must be greater then all the existing ones,
/// to maintain ascending sort order.
///
/// @param id - the transaction id to add
void insert(TransactionId id) {
transaction_ids_.push_back(id);
DCHECK(std::is_sorted(transaction_ids_.begin(), transaction_ids_.end()))
<< "Snapshot must be sorted";
}
/// Removes the given transaction id from this Snapshot.
///
/// @param id - the transaction id to remove
void remove(TransactionId id) {
auto last =
std::remove(transaction_ids_.begin(), transaction_ids_.end(), id);
transaction_ids_.erase(last, transaction_ids_.end());
}
/// Removes all transactions from this Snapshot.
void clear() {
transaction_ids_.clear();
}
TransactionId front() const {
DCHECK(transaction_ids_.size()) << "Snapshot.front() on empty Snapshot";
return transaction_ids_.front();
}
TransactionId back() const {
DCHECK(transaction_ids_.size()) << "Snapshot.back() on empty Snapshot";
return transaction_ids_.back();
}
size_t size() const { return transaction_ids_.size(); }
bool empty() const { return transaction_ids_.empty(); }
bool operator==(const Snapshot &other) const {
return transaction_ids_ == other.transaction_ids_;
}
auto begin() { return transaction_ids_.begin(); }
auto end() { return transaction_ids_.end(); }
auto begin() const { return transaction_ids_.cbegin(); }
auto end() const { return transaction_ids_.cend(); }
friend std::ostream &operator<<(std::ostream &stream,
const Snapshot &snapshot) {
stream << "Snapshot(";
utils::PrintIterable(stream, snapshot.transaction_ids_);
stream << ")";
return stream;
}
const auto &transaction_ids() const { return transaction_ids_; }
private:
std::vector<TransactionId> transaction_ids_;
};
} // namespace tx

View File

@ -1,118 +0,0 @@
/// @file
#pragma once
#include <chrono>
#include <cstdint>
#include <cstdlib>
#include <unordered_set>
#include <vector>
#include "data_structures/concurrent/concurrent_map.hpp"
#include "storage/common/locking/record_lock.hpp"
#include "transactions/lock_store.hpp"
#include "transactions/snapshot.hpp"
#include "transactions/type.hpp"
#include "utils/exceptions.hpp"
namespace tx {
/// Indicates an error in transaction handling (currently
/// only command id overflow).
class TransactionError : public utils::BasicException {
public:
using utils::BasicException::BasicException;
};
/// A database transaction. Encapsulates an atomic, abortable unit of work. Also
/// defines that all db ops are single-threaded within a single transaction
class Transaction final {
public:
/// Returns the maximum possible transcation id
static TransactionId MaxId() {
return std::numeric_limits<TransactionId>::max();
}
private:
friend class Engine;
// The constructor is private, only the Engine ever uses it.
Transaction(TransactionId id, const Snapshot &snapshot, Engine &engine,
bool blocking)
: id_(id),
engine_(engine),
snapshot_(snapshot),
blocking_(blocking) {}
// A transaction can't be moved nor copied. it's owned by the transaction
// engine, and it's lifetime is managed by it.
Transaction(const Transaction &) = delete;
Transaction(Transaction &&) = delete;
Transaction &operator=(const Transaction &) = delete;
Transaction &operator=(Transaction &&) = delete;
public:
/// Acquires the lock over the given RecordLock, preventing other transactions
/// from doing the same
/// @throw utils::LockTimeoutException
void TakeLock(RecordLock &lock) const { locks_.Take(&lock, *this, engine_); }
/// Transaction's id. Unique in the engine that owns it
const TransactionId id_;
/// The transaction engine to which this transaction belongs
Engine &engine_;
/// Returns the current transaction's current command id
// TODO rename to cmd_id (variable and function
auto cid() const { return cid_; }
/// Returns this transaction's snapshot.
const Snapshot &snapshot() const { return snapshot_; }
/// Signal to transaction that it should abort. It doesn't really enforce that
/// transaction will abort, but it merely hints too the transaction that it is
/// preferable to stop its execution.
void set_should_abort() { should_abort_ = true; }
bool should_abort() const { return should_abort_; }
auto creation_time() const { return creation_time_; }
auto blocking() const { return blocking_; }
private:
/// Function used to advance the command.
/// @throw TransactionError
CommandId AdvanceCommand() {
if (cid_ == std::numeric_limits<CommandId>::max()) {
throw TransactionError(
"Reached maximum number of commands in this "
"transaction.");
}
return ++cid_;
}
// Function used to set the command.
void SetCommand(CommandId cid) { cid_ = cid; }
// Index of the current command in the current transaction.
CommandId cid_{1};
// A snapshot of currently active transactions.
const Snapshot snapshot_;
// Record locks held by this transaction.
mutable LockStore locks_;
// True if transaction should abort. Used to signal query executor that it
// should stop execution, it is only a hint, transaction can disobey.
std::atomic<bool> should_abort_{false};
// Creation time.
const std::chrono::time_point<std::chrono::steady_clock> creation_time_{
std::chrono::steady_clock::now()};
bool blocking_{false};
};
} // namespace tx

View File

@ -1,14 +0,0 @@
/// @file
#include <cstdint>
// transcation and command types defined
// in a separate header to avoid cyclic dependencies
namespace tx {
/// Type of a tx::Transcation's id member
using TransactionId = uint64_t;
/// Type of a tx::Transcation's command id member
using CommandId = uint32_t;
}

View File

@ -1,6 +1,5 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "database/graph_db.hpp"
#include "query/frontend/semantic/symbol_table.hpp" #include "query/frontend/semantic/symbol_table.hpp"
#include "query/plan/operator.hpp" #include "query/plan/operator.hpp"
#include "query/plan/pretty_print.hpp" #include "query/plan/pretty_print.hpp"