From bfbace81683b76bcd766043d171e2df8b0eb0a5a Mon Sep 17 00:00:00 2001 From: Matej Ferencevic Date: Tue, 25 Feb 2020 16:19:27 +0100 Subject: [PATCH] Remove old HA implementation Reviewers: teon.banek, ipaljak Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2686 --- src/CMakeLists.txt | 121 --- src/database/graph_db.hpp | 5 - src/database/graph_db_accessor.hpp | 5 - src/database/single_node_ha/config.cpp | 51 - src/database/single_node_ha/config.hpp | 31 - src/database/single_node_ha/graph_db.cpp | 103 -- src/database/single_node_ha/graph_db.hpp | 156 --- .../single_node_ha/graph_db_accessor.cpp | 556 ---------- .../single_node_ha/graph_db_accessor.hpp | 691 ------------- src/database/single_node_ha/serialization.lcp | 9 - src/durability/hashed_file_reader.hpp | 74 -- src/durability/hashed_file_writer.hpp | 74 -- src/durability/hasher.hpp | 31 - src/durability/single_node_ha/paths.cpp | 26 - src/durability/single_node_ha/paths.hpp | 20 - src/durability/single_node_ha/recovery.cpp | 172 ---- src/durability/single_node_ha/recovery.hpp | 57 -- src/durability/single_node_ha/snapshooter.cpp | 108 -- src/durability/single_node_ha/snapshooter.hpp | 22 - src/durability/single_node_ha/state_delta.cpp | 453 --------- src/durability/single_node_ha/state_delta.lcp | 149 --- src/durability/single_node_ha/version.hpp | 34 - src/memgraph_ha.cpp | 79 -- src/query/interpreter.hpp | 2 - src/raft/config.hpp | 60 -- src/raft/coordination.cpp | 137 --- src/raft/coordination.hpp | 142 --- src/raft/exceptions.hpp | 111 -- src/raft/log_entry.lcp | 19 - src/raft/raft_interface.hpp | 96 -- src/raft/raft_rpc_messages.lcp | 43 - src/raft/raft_server.cpp | 961 ------------------ src/raft/raft_server.hpp | 393 ------- src/raft/replication_log.hpp | 86 -- src/raft/replication_timeout_map.hpp | 71 -- src/raft/storage_info.cpp | 74 -- src/raft/storage_info.hpp | 47 - src/raft/storage_info_rpc_messages.lcp | 19 - src/storage/common/constraints/common.hpp | 30 - src/storage/common/constraints/exceptions.hpp | 19 - src/storage/common/constraints/record.cpp | 66 -- src/storage/common/constraints/record.hpp | 25 - .../common/constraints/unique_constraints.cpp | 258 ----- .../common/constraints/unique_constraints.hpp | 166 --- src/storage/common/index.hpp | 183 ---- src/storage/common/locking/lock_status.hpp | 3 - src/storage/common/locking/record_lock.cpp | 131 --- src/storage/common/locking/record_lock.hpp | 30 - src/storage/common/mvcc/exceptions.hpp | 15 - src/storage/common/mvcc/version.hpp | 42 - src/storage/common/pod_buffer.hpp | 49 - src/storage/common/types/property_value.hpp | 6 - .../common/types/property_value_store.cpp | 240 ----- .../common/types/property_value_store.hpp | 164 --- src/storage/common/types/slk.cpp | 126 --- src/storage/common/types/slk.hpp | 52 - src/storage/common/types/types.hpp | 249 ----- src/storage/edge_accessor.hpp | 7 - .../single_node_ha/concurrent_id_mapper.hpp | 68 -- .../single_node_ha/deferred_deleter.hpp | 78 -- src/storage/single_node_ha/edge.hpp | 32 - src/storage/single_node_ha/edge_accessor.cpp | 59 -- src/storage/single_node_ha/edge_accessor.hpp | 77 -- src/storage/single_node_ha/edges.hpp | 156 --- .../single_node_ha/garbage_collector.hpp | 70 -- .../single_node_ha/indexes/key_index.hpp | 188 ---- .../indexes/label_property_index.hpp | 533 ---------- src/storage/single_node_ha/mvcc/record.hpp | 334 ------ .../single_node_ha/mvcc/version_list.hpp | 261 ----- .../single_node_ha/record_accessor.cpp | 174 ---- .../single_node_ha/record_accessor.hpp | 204 ---- .../single_node_ha/state_delta_buffer.hpp | 47 - src/storage/single_node_ha/storage.hpp | 102 -- src/storage/single_node_ha/storage_gc.hpp | 174 ---- src/storage/single_node_ha/vertex.hpp | 28 - .../single_node_ha/vertex_accessor.cpp | 86 -- .../single_node_ha/vertex_accessor.hpp | 158 --- src/storage/vertex_accessor.hpp | 7 - src/transactions/commit_log.hpp | 80 -- src/transactions/engine.hpp | 5 - src/transactions/lock_store.hpp | 85 -- src/transactions/single_node_ha/engine.cpp | 305 ------ src/transactions/single_node_ha/engine.hpp | 88 -- src/transactions/snapshot.hpp | 98 -- src/transactions/transaction.hpp | 118 --- src/transactions/type.hpp | 14 - tests/unit/plan_pretty_print.cpp | 1 - 87 files changed, 10779 deletions(-) delete mode 100644 src/database/graph_db.hpp delete mode 100644 src/database/graph_db_accessor.hpp delete mode 100644 src/database/single_node_ha/config.cpp delete mode 100644 src/database/single_node_ha/config.hpp delete mode 100644 src/database/single_node_ha/graph_db.cpp delete mode 100644 src/database/single_node_ha/graph_db.hpp delete mode 100644 src/database/single_node_ha/graph_db_accessor.cpp delete mode 100644 src/database/single_node_ha/graph_db_accessor.hpp delete mode 100644 src/database/single_node_ha/serialization.lcp delete mode 100644 src/durability/hashed_file_reader.hpp delete mode 100644 src/durability/hashed_file_writer.hpp delete mode 100644 src/durability/hasher.hpp delete mode 100644 src/durability/single_node_ha/paths.cpp delete mode 100644 src/durability/single_node_ha/paths.hpp delete mode 100644 src/durability/single_node_ha/recovery.cpp delete mode 100644 src/durability/single_node_ha/recovery.hpp delete mode 100644 src/durability/single_node_ha/snapshooter.cpp delete mode 100644 src/durability/single_node_ha/snapshooter.hpp delete mode 100644 src/durability/single_node_ha/state_delta.cpp delete mode 100644 src/durability/single_node_ha/state_delta.lcp delete mode 100644 src/durability/single_node_ha/version.hpp delete mode 100644 src/memgraph_ha.cpp delete mode 100644 src/raft/config.hpp delete mode 100644 src/raft/coordination.cpp delete mode 100644 src/raft/coordination.hpp delete mode 100644 src/raft/exceptions.hpp delete mode 100644 src/raft/log_entry.lcp delete mode 100644 src/raft/raft_interface.hpp delete mode 100644 src/raft/raft_rpc_messages.lcp delete mode 100644 src/raft/raft_server.cpp delete mode 100644 src/raft/raft_server.hpp delete mode 100644 src/raft/replication_log.hpp delete mode 100644 src/raft/replication_timeout_map.hpp delete mode 100644 src/raft/storage_info.cpp delete mode 100644 src/raft/storage_info.hpp delete mode 100644 src/raft/storage_info_rpc_messages.lcp delete mode 100644 src/storage/common/constraints/common.hpp delete mode 100644 src/storage/common/constraints/exceptions.hpp delete mode 100644 src/storage/common/constraints/record.cpp delete mode 100644 src/storage/common/constraints/record.hpp delete mode 100644 src/storage/common/constraints/unique_constraints.cpp delete mode 100644 src/storage/common/constraints/unique_constraints.hpp delete mode 100644 src/storage/common/index.hpp delete mode 100644 src/storage/common/locking/lock_status.hpp delete mode 100644 src/storage/common/locking/record_lock.cpp delete mode 100644 src/storage/common/locking/record_lock.hpp delete mode 100644 src/storage/common/mvcc/exceptions.hpp delete mode 100644 src/storage/common/mvcc/version.hpp delete mode 100644 src/storage/common/pod_buffer.hpp delete mode 100644 src/storage/common/types/property_value.hpp delete mode 100644 src/storage/common/types/property_value_store.cpp delete mode 100644 src/storage/common/types/property_value_store.hpp delete mode 100644 src/storage/common/types/slk.cpp delete mode 100644 src/storage/common/types/slk.hpp delete mode 100644 src/storage/common/types/types.hpp delete mode 100644 src/storage/edge_accessor.hpp delete mode 100644 src/storage/single_node_ha/concurrent_id_mapper.hpp delete mode 100644 src/storage/single_node_ha/deferred_deleter.hpp delete mode 100644 src/storage/single_node_ha/edge.hpp delete mode 100644 src/storage/single_node_ha/edge_accessor.cpp delete mode 100644 src/storage/single_node_ha/edge_accessor.hpp delete mode 100644 src/storage/single_node_ha/edges.hpp delete mode 100644 src/storage/single_node_ha/garbage_collector.hpp delete mode 100644 src/storage/single_node_ha/indexes/key_index.hpp delete mode 100644 src/storage/single_node_ha/indexes/label_property_index.hpp delete mode 100644 src/storage/single_node_ha/mvcc/record.hpp delete mode 100644 src/storage/single_node_ha/mvcc/version_list.hpp delete mode 100644 src/storage/single_node_ha/record_accessor.cpp delete mode 100644 src/storage/single_node_ha/record_accessor.hpp delete mode 100644 src/storage/single_node_ha/state_delta_buffer.hpp delete mode 100644 src/storage/single_node_ha/storage.hpp delete mode 100644 src/storage/single_node_ha/storage_gc.hpp delete mode 100644 src/storage/single_node_ha/vertex.hpp delete mode 100644 src/storage/single_node_ha/vertex_accessor.cpp delete mode 100644 src/storage/single_node_ha/vertex_accessor.hpp delete mode 100644 src/storage/vertex_accessor.hpp delete mode 100644 src/transactions/commit_log.hpp delete mode 100644 src/transactions/engine.hpp delete mode 100644 src/transactions/lock_store.hpp delete mode 100644 src/transactions/single_node_ha/engine.cpp delete mode 100644 src/transactions/single_node_ha/engine.hpp delete mode 100644 src/transactions/snapshot.hpp delete mode 100644 src/transactions/transaction.hpp delete mode 100644 src/transactions/type.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b47b00299..71954a6bd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,112 +17,6 @@ if (MG_ENTERPRISE) add_subdirectory(rpc) endif() -## ---------------------------------------------------------------------------- -## Common LCP files -## ---------------------------------------------------------------------------- -# -#define_add_lcp(add_lcp_common lcp_common_cpp_files generated_lcp_common_files) -# -#add_lcp_common(query/frontend/ast/ast.lcp) -#add_lcp_common(query/frontend/semantic/symbol.lcp) -#add_lcp_common(query/plan/operator.lcp) -# -#add_custom_target(generate_lcp_common DEPENDS ${generated_lcp_common_files}) -# -## ---------------------------------------------------------------------------- -## END Common LCP files -## ---------------------------------------------------------------------------- -# -## ---------------------------------------------------------------------------- -## Memgraph Single Node High Availability -## ---------------------------------------------------------------------------- -#set(mg_single_node_ha_sources -# ${lcp_common_cpp_files} -# data_structures/concurrent/skiplist_gc.cpp -# database/single_node_ha/config.cpp -# database/single_node_ha/graph_db.cpp -# database/single_node_ha/graph_db_accessor.cpp -# durability/single_node_ha/state_delta.cpp -# durability/single_node_ha/paths.cpp -# durability/single_node_ha/snapshooter.cpp -# durability/single_node_ha/recovery.cpp -# glue/communication.cpp -# raft/coordination.cpp -# raft/raft_server.cpp -# raft/storage_info.cpp -# query/common.cpp -# query/frontend/ast/cypher_main_visitor.cpp -# query/frontend/ast/pretty_print.cpp -# query/frontend/parsing.cpp -# query/frontend/semantic/required_privileges.cpp -# query/frontend/semantic/symbol_generator.cpp -# query/frontend/stripped.cpp -# query/interpret/awesome_memgraph_functions.cpp -# query/interpreter.cpp -# query/plan/operator.cpp -# query/plan/preprocess.cpp -# query/plan/pretty_print.cpp -# query/plan/profile.cpp -# query/plan/rewrite/index_lookup.cpp -# query/plan/rule_based_planner.cpp -# query/plan/variable_start_planner.cpp -# query/procedure/mg_procedure_impl.cpp -# query/procedure/module.cpp -# query/typed_value.cpp -# storage/common/constraints/record.cpp -# storage/common/constraints/unique_constraints.cpp -# storage/common/types/slk.cpp -# storage/common/types/property_value_store.cpp -# storage/common/locking/record_lock.cpp -# storage/single_node_ha/edge_accessor.cpp -# storage/single_node_ha/record_accessor.cpp -# storage/single_node_ha/vertex_accessor.cpp -# transactions/single_node_ha/engine.cpp -# memgraph_init.cpp -#) -#if (MG_ENTERPRISE) -# set(mg_single_node_ha_sources -# ${mg_single_node_ha_sources} -# glue/auth.cpp) -#endif() -# -#define_add_lcp(add_lcp_single_node_ha mg_single_node_ha_sources generated_lcp_single_node_ha_files) -# -#add_lcp_single_node_ha(durability/single_node_ha/state_delta.lcp) -#add_lcp_single_node_ha(database/single_node_ha/serialization.lcp SLK_SERIALIZE -# DEPENDS durability/single_node_ha/state_delta.lcp) -#add_lcp_single_node_ha(raft/raft_rpc_messages.lcp SLK_SERIALIZE) -#add_lcp_single_node_ha(raft/log_entry.lcp SLK_SERIALIZE) -#add_lcp_single_node_ha(raft/snapshot_metadata.lcp SLK_SERIALIZE) -#add_lcp_single_node_ha(raft/storage_info_rpc_messages.lcp SLK_SERIALIZE) -# -#add_custom_target(generate_lcp_single_node_ha DEPENDS generate_lcp_common ${generated_lcp_single_node_ha_files}) -# -#set(MG_SINGLE_NODE_HA_LIBS stdc++fs Threads::Threads fmt cppitertools -# antlr_opencypher_parser_lib dl glog gflags -# mg-utils mg-io mg-requests mg-communication mg-rpc -# mg-auth mg-audit) -# -#add_library(mg-single-node-ha STATIC ${mg_single_node_ha_sources}) -#target_include_directories(mg-single-node-ha PUBLIC ${CMAKE_SOURCE_DIR}/include) -#target_link_libraries(mg-single-node-ha ${MG_SINGLE_NODE_HA_LIBS}) -#add_dependencies(mg-single-node-ha generate_opencypher_parser) -#add_dependencies(mg-single-node-ha generate_lcp_single_node_ha) -#target_compile_definitions(mg-single-node-ha PUBLIC MG_SINGLE_NODE_HA) -## TODO: Make these symbols visible once we add support for custom procedure -## modules in HA. -## NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which -## should be dynamically exported, so that `dlopen` can correctly link the -## symbols in custom procedure module libraries. -## target_link_libraries(mg-single-node-ha "-Wl,--dynamic-list=${CMAKE_SOURCE_DIR}/include/mg_procedure.syms") -# -## ---------------------------------------------------------------------------- -## END Memgraph Single Node High Availability -## ---------------------------------------------------------------------------- -# -#add_custom_target(generate_lcp) -#add_dependencies(generate_lcp generate_lcp_single_node_ha) - string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) # Generate a version.hpp file @@ -218,18 +112,3 @@ endif() # Create empty directories for default location of lib and log. install(CODE "file(MAKE_DIRECTORY \$ENV{DESTDIR}/var/log/memgraph \$ENV{DESTDIR}/var/lib/memgraph)") - - -## memgraph single node high availability executable -#add_executable(memgraph_ha memgraph_ha.cpp) -#target_link_libraries(memgraph_ha mg-single-node-ha mg-kvstore telemetry_lib) -#set_target_properties(memgraph_ha PROPERTIES -# # Set the executable output name to include version information. -# OUTPUT_NAME "memgraph_ha-${MEMGRAPH_VERSION}_${CMAKE_BUILD_TYPE}" -# # Output the executable in main binary dir. -# RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) -## Create symlink to the built executable. -#add_custom_command(TARGET memgraph_ha POST_BUILD -# COMMAND ${CMAKE_COMMAND} -E create_symlink $ ${CMAKE_BINARY_DIR}/memgraph_ha -# BYPRODUCTS ${CMAKE_BINARY_DIR}/memgraph_ha -# COMMENT "Creating symlink to memgraph single node high availability executable") diff --git a/src/database/graph_db.hpp b/src/database/graph_db.hpp deleted file mode 100644 index 849512678..000000000 --- a/src/database/graph_db.hpp +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#ifdef MG_SINGLE_NODE_HA -#include "database/single_node_ha/graph_db.hpp" -#endif diff --git a/src/database/graph_db_accessor.hpp b/src/database/graph_db_accessor.hpp deleted file mode 100644 index 72c48c723..000000000 --- a/src/database/graph_db_accessor.hpp +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#ifdef MG_SINGLE_NODE_HA -#include "database/single_node_ha/graph_db_accessor.hpp" -#endif diff --git a/src/database/single_node_ha/config.cpp b/src/database/single_node_ha/config.cpp deleted file mode 100644 index 69d9585f8..000000000 --- a/src/database/single_node_ha/config.cpp +++ /dev/null @@ -1,51 +0,0 @@ -#include "database/single_node_ha/config.hpp" - -#include -#include - -#include "utils/flag_validation.hpp" -#include "utils/string.hpp" - -// Durability flags. -DEFINE_string( - durability_directory, "durability", - "Path to directory in which to save snapshots and write-ahead log files."); -DEFINE_bool(db_recover_on_startup, true, "Recover database on startup."); - -// Misc flags -DEFINE_int32(query_execution_time_sec, 180, - "Maximum allowed query execution time. Queries exceeding this " - "limit will be aborted. Value of -1 means no limit."); -DEFINE_int32(gc_cycle_sec, 30, - "Amount of time between starts of two cleaning cycles in seconds. " - "-1 to turn off."); -// Data location. -DEFINE_string(properties_on_disk, "", - "Property names of properties which will be stored on available " - "disk. Property names have to be separated with comma (,)."); - -// High availability. -DEFINE_string( - coordination_config_file, "coordination.json", - "Path to the file containing coordination configuration in JSON format"); - -DEFINE_string(raft_config_file, "raft.json", - "Path to the file containing raft configuration in JSON format"); - -DEFINE_VALIDATED_int32( - server_id, 1U, "Id used in the coordination configuration for this machine", - FLAG_IN_RANGE(1, std::numeric_limits::max())); - -database::Config::Config() - // Durability flags. - : durability_directory{FLAGS_durability_directory}, - db_recover_on_startup{FLAGS_db_recover_on_startup}, - // Misc flags. - gc_cycle_sec{FLAGS_gc_cycle_sec}, - query_execution_time_sec{FLAGS_query_execution_time_sec}, - // Data location. - properties_on_disk(utils::Split(FLAGS_properties_on_disk, ",")), - // High availability. - coordination_config_file{FLAGS_coordination_config_file}, - raft_config_file{FLAGS_raft_config_file}, - server_id{static_cast(FLAGS_server_id)} {} diff --git a/src/database/single_node_ha/config.hpp b/src/database/single_node_ha/config.hpp deleted file mode 100644 index ef123c24d..000000000 --- a/src/database/single_node_ha/config.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include - -namespace database { - -/// Database configuration. Initialized from flags, but modifiable. -struct Config { - Config(); - - // Durability flags. - std::string durability_directory; - bool db_recover_on_startup; - - // Misc flags. - int gc_cycle_sec; - int query_execution_time_sec; - - // set of properties which will be stored on disk - std::vector properties_on_disk; - - // HA flags. - std::string coordination_config_file; - std::string raft_config_file; - uint16_t server_id; -}; -} // namespace database diff --git a/src/database/single_node_ha/graph_db.cpp b/src/database/single_node_ha/graph_db.cpp deleted file mode 100644 index 9c1fb9f15..000000000 --- a/src/database/single_node_ha/graph_db.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "database/single_node_ha/graph_db.hpp" - -#include - -#include - -#include "database/single_node_ha/graph_db_accessor.hpp" -#include "storage/single_node_ha/concurrent_id_mapper.hpp" -#include "storage/single_node_ha/storage_gc.hpp" -#include "transactions/single_node_ha/engine.hpp" - -namespace database { - -GraphDb::GraphDb(Config config) : config_(config) {} - -void GraphDb::Start() { - utils::EnsureDirOrDie(config_.durability_directory); - raft_server_.Start(); - storage_info_.Start(); - CHECK(coordination_.Start()) << "Couldn't start coordination!"; - - // Start transaction killer. - if (config_.query_execution_time_sec != -1) { - transaction_killer_.Run( - "TX killer", - std::chrono::seconds( - std::max(1, std::min(5, config_.query_execution_time_sec / 4))), - [this]() { - tx_engine_.LocalForEachActiveTransaction([this](tx::Transaction &t) { - if (t.creation_time() + - std::chrono::seconds(config_.query_execution_time_sec) < - std::chrono::steady_clock::now()) { - t.set_should_abort(); - }; - }); - }); - } -} - -void GraphDb::AwaitShutdown(std::function call_before_shutdown) { - coordination_.AwaitShutdown([this, &call_before_shutdown]() { - tx_engine_.LocalForEachActiveTransaction( - [](auto &t) { t.set_should_abort(); }); - - call_before_shutdown(); - - raft_server_.Shutdown(); - }); -} - -void GraphDb::Shutdown() { coordination_.Shutdown(); } - -GraphDbAccessor GraphDb::Access() { - return GraphDbAccessor(this); -} - -GraphDbAccessor GraphDb::Access(tx::TransactionId tx_id) { - return GraphDbAccessor(this, tx_id); -} - -GraphDbAccessor GraphDb::AccessBlocking( - std::optional parent_tx) { - return GraphDbAccessor(this, parent_tx); -} - -Storage &GraphDb::storage() { return *storage_; } - -raft::RaftInterface *GraphDb::raft() { return &raft_server_; } - -raft::StorageInfo *GraphDb::storage_info() { return &storage_info_; } - -tx::Engine &GraphDb::tx_engine() { return tx_engine_; } - -storage::StateDeltaBuffer *GraphDb::sd_buffer() { return &sd_buffer_; } - -storage::ConcurrentIdMapper &GraphDb::label_mapper() { - return label_mapper_; -} - -storage::ConcurrentIdMapper &GraphDb::edge_type_mapper() { - return edge_mapper_; -} - -storage::ConcurrentIdMapper &GraphDb::property_mapper() { - return property_mapper_; -} - -void GraphDb::CollectGarbage() { storage_gc_->CollectGarbage(); } - -void GraphDb::Reset() { - // Release gc scheduler to stop it from touching storage. - storage_gc_ = nullptr; - - // This will make all active transactions to abort and reset the internal - // state. - tx_engine_.Reset(); - - storage_ = std::make_unique(config_.properties_on_disk); - storage_gc_ = std::make_unique( - *storage_, tx_engine_, &raft_server_, config_.gc_cycle_sec); -} - -} // namespace database diff --git a/src/database/single_node_ha/graph_db.hpp b/src/database/single_node_ha/graph_db.hpp deleted file mode 100644 index e110a31dc..000000000 --- a/src/database/single_node_ha/graph_db.hpp +++ /dev/null @@ -1,156 +0,0 @@ -/// @file -#pragma once - -#include -#include -#include -#include - -#include "database/single_node_ha/config.hpp" -#include "io/network/endpoint.hpp" -#include "raft/coordination.hpp" -#include "raft/raft_server.hpp" -#include "raft/storage_info.hpp" -#include "storage/common/types/types.hpp" -#include "storage/single_node_ha/concurrent_id_mapper.hpp" -#include "storage/single_node_ha/state_delta_buffer.hpp" -#include "storage/single_node_ha/storage.hpp" -#include "storage/single_node_ha/storage_gc.hpp" -#include "transactions/single_node_ha/engine.hpp" -#include "utils/scheduler.hpp" -#include "utils/stat.hpp" - -namespace database { - -/// Struct containing basic statistics about storage. -struct Stat { - // std::atomic is needed as reference to stat is passed to - // other threads. If there were no std::atomic we couldn't guarantee - // that a change to any member will be visible to other threads. - - /// Vertex count is number of `VersionList` physically stored. - std::atomic vertex_count{0}; - - /// Vertex count is number of `VersionList` physically stored. - std::atomic edge_count{0}; - - /// Average in/out degree of a vertex. - /// `avg_degree` is calculated as 2 * `edges_count` / `vertex_count`. - std::atomic avg_degree{0}; -}; - -class GraphDbAccessor; - -/// An abstract base class providing the interface for a graph database. -/// -/// Always be sure that GraphDb object is destructed before main exits, i. e. -/// GraphDb object shouldn't be part of global/static variable, except if its -/// destructor is explicitly called before main exits. Consider code: -/// -/// GraphDb db; // KeyIndex is created as a part of database::Storage -/// int main() { -/// GraphDbAccessor dba(db); -/// auto v = dba.InsertVertex(); -/// v.add_label(dba.Label( -/// "Start")); // New SkipList is created in KeyIndex for LabelIndex. -/// // That SkipList creates SkipListGc which -/// // initialises static Executor object. -/// return 0; -/// } -/// -/// After main exits: 1. Executor is destructed, 2. KeyIndex is destructed. -/// Destructor of KeyIndex calls delete on created SkipLists which destroy -/// SkipListGc that tries to use Excutioner object that doesn't exist anymore. -/// -> CRASH -class GraphDb { - public: - explicit GraphDb(Config config = Config()); - - GraphDb(const GraphDb &) = delete; - GraphDb(GraphDb &&) = delete; - GraphDb &operator=(const GraphDb &) = delete; - GraphDb &operator=(GraphDb &&) = delete; - - void Start(); - void AwaitShutdown(std::function call_before_shutdown); - void Shutdown(); - - /// Create a new accessor by starting a new transaction. - GraphDbAccessor Access(); - GraphDbAccessor AccessBlocking(std::optional parent_tx); - /// Create an accessor for a running transaction. - GraphDbAccessor Access(tx::TransactionId); - - Storage &storage(); - raft::RaftInterface *raft(); - raft::StorageInfo *storage_info(); - tx::Engine &tx_engine(); - storage::StateDeltaBuffer *sd_buffer(); - storage::ConcurrentIdMapper &label_mapper(); - storage::ConcurrentIdMapper &edge_type_mapper(); - storage::ConcurrentIdMapper &property_mapper(); - void CollectGarbage(); - - /// Releases the storage object safely and creates a new object, resets the tx - /// engine. - /// - /// This is needed in HA during the leader -> follower transition where we - /// might end up with some stale transactions on the leader. - void Reset(); - - /// Get live view of storage stats. Gets updated on RefreshStat. - const Stat &GetStat() const { return stat_; } - - /// Updates storage stats. - void RefreshStat() { - auto vertex_count = storage().vertices_.access().size(); - auto edge_count = storage().edges_.access().size(); - - stat_.vertex_count = vertex_count; - stat_.edge_count = edge_count; - - if (vertex_count != 0) { - stat_.avg_degree = 2 * static_cast(edge_count) / vertex_count; - } else { - stat_.avg_degree = 0; - } - } - - /// Returns the number of bytes used by the durability directory on disk. - uint64_t GetDurabilityDirDiskUsage() const { - return utils::GetDirDiskUsage(config_.durability_directory); - } - - protected: - Stat stat_; - - utils::Scheduler transaction_killer_; - - Config config_; - std::unique_ptr storage_ = - std::make_unique(config_.properties_on_disk); - raft::Coordination coordination_{ - config_.server_id, - raft::LoadNodesFromFile(config_.coordination_config_file)}; - raft::RaftServer raft_server_{ - config_.server_id, - config_.durability_directory, - config_.db_recover_on_startup, - raft::Config::LoadFromFile(config_.raft_config_file), - &coordination_, - this}; - raft::StorageInfo storage_info_{this, &coordination_, config_.server_id}; - storage::StateDeltaBuffer sd_buffer_; - - tx::Engine tx_engine_{&raft_server_, &sd_buffer_}; - std::unique_ptr storage_gc_ = std::make_unique( - *storage_, tx_engine_, &raft_server_, config_.gc_cycle_sec); - storage::ConcurrentIdMapper label_mapper_{ - storage_->PropertiesOnDisk()}; - storage::ConcurrentIdMapper edge_mapper_{ - storage_->PropertiesOnDisk()}; - storage::ConcurrentIdMapper property_mapper_{ - storage_->PropertiesOnDisk()}; -}; - -} // namespace database diff --git a/src/database/single_node_ha/graph_db_accessor.cpp b/src/database/single_node_ha/graph_db_accessor.cpp deleted file mode 100644 index d7a9fd6a2..000000000 --- a/src/database/single_node_ha/graph_db_accessor.cpp +++ /dev/null @@ -1,556 +0,0 @@ -#include "database/single_node_ha/graph_db_accessor.hpp" - -#include -#include - -#include - -#include "durability/single_node_ha/state_delta.hpp" -#include "storage/common/constraints/exceptions.hpp" -#include "storage/single_node_ha/edge.hpp" -#include "storage/single_node_ha/edge_accessor.hpp" -#include "storage/single_node_ha/vertex.hpp" -#include "storage/single_node_ha/vertex_accessor.hpp" -#include "utils/cast.hpp" -#include "utils/on_scope_exit.hpp" -#include "utils/stat.hpp" - -namespace database { - -GraphDbAccessor::GraphDbAccessor(GraphDb *db) - : db_(db), - transaction_(db->tx_engine().Begin()), - transaction_starter_{true} {} - -GraphDbAccessor::GraphDbAccessor(GraphDb *db, tx::TransactionId tx_id) - : db_(db), - transaction_(db->tx_engine().RunningTransaction(tx_id)), - transaction_starter_{false} {} - -GraphDbAccessor::GraphDbAccessor(GraphDb *db, - std::optional parent_tx) - : db_(db), - transaction_(db->tx_engine().BeginBlocking(parent_tx)), - transaction_starter_{true} {} - -GraphDbAccessor::GraphDbAccessor(GraphDbAccessor &&other) - : db_(other.db_), - transaction_(other.transaction_), - transaction_starter_(other.transaction_starter_), - commited_(other.commited_), - aborted_(other.aborted_) { - // Make sure that the other transaction isn't a transaction starter so that - // its destructor doesn't close the transaction. - other.transaction_starter_ = false; -} - -GraphDbAccessor &GraphDbAccessor::operator=(GraphDbAccessor &&other) { - db_ = other.db_; - transaction_ = other.transaction_; - transaction_starter_ = other.transaction_starter_; - commited_ = other.commited_; - aborted_ = other.aborted_; - - // Make sure that the other transaction isn't a transaction starter so that - // its destructor doesn't close the transaction. - other.transaction_starter_ = false; - - return *this; -} - -GraphDbAccessor::~GraphDbAccessor() { - if (transaction_starter_ && !commited_ && !aborted_) { - this->Abort(); - } -} - -tx::TransactionId GraphDbAccessor::transaction_id() const { - return transaction_->id_; -} - -void GraphDbAccessor::AdvanceCommand() { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - db_->tx_engine().Advance(transaction_->id_); -} - -void GraphDbAccessor::Commit() { - DCHECK(!commited_ && !aborted_) << "Already aborted or commited transaction."; - db_->tx_engine().Commit(*transaction_); - commited_ = true; -} - -void GraphDbAccessor::Abort() { - DCHECK(!commited_ && !aborted_) << "Already aborted or commited transaction."; - db_->tx_engine().Abort(*transaction_); - aborted_ = true; -} - -bool GraphDbAccessor::should_abort() const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return transaction_->should_abort(); -} - -raft::RaftInterface *GraphDbAccessor::raft() { - return db_->raft(); -} - -storage::StateDeltaBuffer *GraphDbAccessor::sd_buffer() { - return db_->sd_buffer(); -} - -VertexAccessor GraphDbAccessor::InsertVertex( - std::optional requested_gid) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - - auto gid = db_->storage().vertex_generator_.Next(requested_gid); - auto vertex_vlist = new mvcc::VersionList(*transaction_, gid); - - bool success = - db_->storage().vertices_.access().insert(gid, vertex_vlist).second; - CHECK(success) << "Attempting to insert a vertex with an existing GID: " - << gid.AsUint(); - sd_buffer()->Emplace( - database::StateDelta::CreateVertex(transaction_->id_, vertex_vlist->gid_)); - auto va = VertexAccessor(vertex_vlist, *this); - return va; -} - -std::optional GraphDbAccessor::FindVertexOptional( - storage::Gid gid, bool current_state) { - VertexAccessor record_accessor(db_->storage().LocalAddress(gid), - *this); - if (!record_accessor.Visible(transaction(), current_state)) - return std::nullopt; - return record_accessor; -} - -VertexAccessor GraphDbAccessor::FindVertex(storage::Gid gid, - bool current_state) { - auto found = FindVertexOptional(gid, current_state); - CHECK(found) << "Unable to find vertex for id: " << gid.AsUint(); - return *found; -} - -std::optional GraphDbAccessor::FindEdgeOptional( - storage::Gid gid, bool current_state) { - EdgeAccessor record_accessor(db_->storage().LocalAddress(gid), *this); - if (!record_accessor.Visible(transaction(), current_state)) - return std::nullopt; - return record_accessor; -} - -EdgeAccessor GraphDbAccessor::FindEdge(storage::Gid gid, bool current_state) { - auto found = FindEdgeOptional(gid, current_state); - CHECK(found) << "Unable to find edge for id: " << gid.AsUint(); - return *found; -} - -void GraphDbAccessor::BuildIndex(storage::Label label, - storage::Property property) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - - // Create the index - const LabelPropertyIndex::Key key(label, property); - if (db_->storage().label_property_index_.CreateIndex(key) == false) { - throw IndexExistsException( - "Index is either being created by another transaction or already " - "exists."); - } - - try { - auto dba = db_->AccessBlocking(std::make_optional(transaction_->id_)); - - dba.PopulateIndex(key); - dba.EnableIndex(key); - dba.Commit(); - } catch (const tx::TransactionEngineError &e) { - db_->storage().label_property_index_.DeleteIndex(key); - throw TransactionException(e.what()); - } -} - -void GraphDbAccessor::EnableIndex(const LabelPropertyIndex::Key &key) { - // Commit transaction as we finished applying method on newest visible - // records. Write that transaction's ID to the RaftServer as the index has - // been built at this point even if this DBA's transaction aborts for some - // reason. - sd_buffer()->Emplace(database::StateDelta::BuildIndex( - transaction_id(), key.label_, LabelName(key.label_), key.property_, - PropertyName(key.property_))); -} - -void GraphDbAccessor::PopulateIndex(const LabelPropertyIndex::Key &key) { - for (auto vertex : Vertices(key.label_, false)) { - if (vertex.PropsAt(key.property_).type() == PropertyValue::Type::Null) - continue; - db_->storage().label_property_index_.UpdateOnLabelProperty(vertex.address(), - vertex.current_); - } -} - -void GraphDbAccessor::DeleteIndex(storage::Label label, - storage::Property property) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - - LabelPropertyIndex::Key key(label, property); - try { - auto dba = db_->AccessBlocking(std::make_optional(transaction_->id_)); - - db_->storage().label_property_index_.DeleteIndex(key); - dba.sd_buffer()->Emplace(database::StateDelta::DropIndex( - dba.transaction_id(), key.label_, LabelName(key.label_), key.property_, - PropertyName(key.property_))); - - dba.Commit(); - } catch (const tx::TransactionEngineError &e) { - throw TransactionException(e.what()); - } -} - -void GraphDbAccessor::BuildUniqueConstraint( - storage::Label label, const std::vector &properties) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - - storage::constraints::ConstraintEntry entry{label, properties}; - if (!db_->storage().unique_constraints_.AddConstraint(entry)) { - // Already exists - return; - } - - try { - auto dba = db_->AccessBlocking(std::make_optional(transaction().id_)); - - for (auto v : dba.Vertices(false)) { - if (std::find(v.labels().begin(), v.labels().end(), label) != - v.labels().end()) { - db_->storage().unique_constraints_.Update(v, dba.transaction()); - } - } - - std::vector property_names(properties.size()); - std::transform(properties.begin(), properties.end(), property_names.begin(), - [&dba](storage::Property property) { - return dba.PropertyName(property); - }); - - dba.sd_buffer()->Emplace(database::StateDelta::BuildUniqueConstraint( - dba.transaction().id_, label, dba.LabelName(label), properties, - property_names)); - - dba.Commit(); - - } catch (const tx::TransactionEngineError &e) { - db_->storage().unique_constraints_.RemoveConstraint(entry); - throw TransactionException(e.what()); - } catch (const storage::constraints::ViolationException &e) { - db_->storage().unique_constraints_.RemoveConstraint(entry); - throw ConstraintViolationException(e.what()); - } catch (const storage::constraints::SerializationException &e) { - db_->storage().unique_constraints_.RemoveConstraint(entry); - throw mvcc::SerializationError(); - } catch (...) { - db_->storage().unique_constraints_.RemoveConstraint(entry); - throw; - } -} - -void GraphDbAccessor::DeleteUniqueConstraint( - storage::Label label, const std::vector &properties) { - storage::constraints::ConstraintEntry entry{label, properties}; - try { - auto dba = db_->AccessBlocking(std::make_optional(transaction().id_)); - - if (!db_->storage().unique_constraints_.RemoveConstraint(entry)) { - // Nothing was deleted - return; - } - - std::vector property_names(properties.size()); - std::transform(properties.begin(), properties.end(), property_names.begin(), - [&dba](storage::Property property) { - return dba.PropertyName(property); - }); - - dba.sd_buffer()->Emplace(database::StateDelta::DropUniqueConstraint( - dba.transaction().id_, label, dba.LabelName(label), properties, - property_names)); - - dba.Commit(); - } catch (const tx::TransactionEngineError &e) { - throw TransactionException(e.what()); - } -} - -std::vector -GraphDbAccessor::ListUniqueConstraints() const { - return db_->storage().unique_constraints_.ListConstraints(); -} - -void GraphDbAccessor::UpdateOnAddLabel(storage::Label label, - const VertexAccessor &vertex_accessor, - const Vertex *vertex) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - auto *vlist_ptr = vertex_accessor.address(); - - try { - db_->storage().unique_constraints_.UpdateOnAddLabel(label, vertex_accessor, - transaction()); - } catch (const storage::constraints::SerializationException &e) { - throw mvcc::SerializationError(); - } catch (const storage::constraints::ViolationException &e) { - throw ConstraintViolationException(e.what()); - } - - db_->storage().label_property_index_.UpdateOnLabel(label, vlist_ptr, vertex); - db_->storage().labels_index_.Update(label, vlist_ptr, vertex); -} - -void GraphDbAccessor::UpdateOnRemoveLabel( - storage::Label label, const RecordAccessor &accessor) { - db_->storage().unique_constraints_.UpdateOnRemoveLabel(label, accessor, - transaction()); -} - -void GraphDbAccessor::UpdateOnAddProperty( - storage::Property property, const PropertyValue &previous_value, - const PropertyValue &new_value, - const RecordAccessor &vertex_accessor, const Vertex *vertex) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - - try { - db_->storage().unique_constraints_.UpdateOnAddProperty( - property, previous_value, new_value, vertex_accessor, transaction()); - } catch (const storage::constraints::SerializationException &e) { - throw mvcc::SerializationError(); - } catch (const storage::constraints::ViolationException &e) { - throw ConstraintViolationException(e.what()); - } - - db_->storage().label_property_index_.UpdateOnProperty( - property, vertex_accessor.address(), vertex); -} - -void GraphDbAccessor::UpdateOnRemoveProperty( - storage::Property property, const PropertyValue &previous_value, - const RecordAccessor &accessor, const Vertex *vertex) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - - try { - db_->storage().unique_constraints_.UpdateOnRemoveProperty( - property, previous_value, accessor, transaction()); - } catch (const storage::constraints::SerializationException &e) { - throw mvcc::SerializationError(); - } -} - -int64_t GraphDbAccessor::VerticesCount() const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->storage().vertices_.access().size(); -} - -int64_t GraphDbAccessor::VerticesCount(storage::Label label) const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->storage().labels_index_.Count(label); -} - -int64_t GraphDbAccessor::VerticesCount(storage::Label label, - storage::Property property) const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - const LabelPropertyIndex::Key key(label, property); - DCHECK(db_->storage().label_property_index_.IndexExists(key)) - << "Index doesn't exist."; - return db_->storage().label_property_index_.Count(key); -} - -int64_t GraphDbAccessor::VerticesCount(storage::Label label, - storage::Property property, - const PropertyValue &value) const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - const LabelPropertyIndex::Key key(label, property); - DCHECK(db_->storage().label_property_index_.IndexExists(key)) - << "Index doesn't exist."; - return db_->storage() - .label_property_index_.PositionAndCount(key, value) - .second; -} - -int64_t GraphDbAccessor::VerticesCount( - storage::Label label, storage::Property property, - const std::optional> lower, - const std::optional> upper) const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - const LabelPropertyIndex::Key key(label, property); - DCHECK(db_->storage().label_property_index_.IndexExists(key)) - << "Index doesn't exist."; - CHECK(lower || upper) << "At least one bound must be provided"; - CHECK(!lower || lower.value().value().type() != PropertyValue::Type::Null) - << "Null value is not a valid index bound"; - CHECK(!upper || upper.value().value().type() != PropertyValue::Type::Null) - << "Null value is not a valid index bound"; - - if (!upper) { - auto lower_pac = db_->storage().label_property_index_.PositionAndCount( - key, lower.value().value()); - int64_t size = db_->storage().label_property_index_.Count(key); - return std::max(0l, - size - lower_pac.first - - (lower.value().IsInclusive() ? 0l : lower_pac.second)); - - } else if (!lower) { - auto upper_pac = db_->storage().label_property_index_.PositionAndCount( - key, upper.value().value()); - return upper.value().IsInclusive() ? upper_pac.first + upper_pac.second - : upper_pac.first; - - } else { - auto lower_pac = db_->storage().label_property_index_.PositionAndCount( - key, lower.value().value()); - auto upper_pac = db_->storage().label_property_index_.PositionAndCount( - key, upper.value().value()); - auto result = upper_pac.first - lower_pac.first; - if (lower.value().IsExclusive()) result -= lower_pac.second; - if (upper.value().IsInclusive()) result += upper_pac.second; - return std::max(0l, result); - } -} - -bool GraphDbAccessor::RemoveVertex(VertexAccessor &vertex_accessor, - bool check_empty) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - vertex_accessor.SwitchNew(); - // it's possible the vertex was removed already in this transaction - // due to it getting matched multiple times by some patterns - // we can only delete it once, so check if it's already deleted - if (vertex_accessor.current().is_expired_by(*transaction_)) return true; - if (check_empty && - vertex_accessor.out_degree() + vertex_accessor.in_degree() > 0) - return false; - - auto *vlist_ptr = vertex_accessor.address(); - sd_buffer()->Emplace(database::StateDelta::RemoveVertex( - transaction_->id_, vlist_ptr->gid_, check_empty)); - vlist_ptr->remove(vertex_accessor.current_, *transaction_); - return true; -} - -void GraphDbAccessor::DetachRemoveVertex(VertexAccessor &vertex_accessor) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - - vertex_accessor.SwitchNew(); - - // Note that when we call RemoveEdge we must take care not to delete from the - // collection we are iterating over. This invalidates the iterator in a subtle - // way that does not fail in tests, but is NOT correct. - for (auto edge_accessor : vertex_accessor.in()) - RemoveEdge(edge_accessor, true, false); - vertex_accessor.SwitchNew(); - for (auto edge_accessor : vertex_accessor.out()) - RemoveEdge(edge_accessor, false, true); - - RemoveVertex(vertex_accessor, false); -} - -EdgeAccessor GraphDbAccessor::InsertEdge( - VertexAccessor &from, VertexAccessor &to, storage::EdgeType edge_type, - std::optional requested_gid) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - auto gid = db_->storage().edge_generator_.Next(requested_gid); - auto edge_vlist = new mvcc::VersionList( - *transaction_, gid, from.address(), to.address(), edge_type); - // We need to insert edge_vlist to edges_ before calling update since update - // can throw and edge_vlist will not be garbage collected if it is not in - // edges_ skiplist. - bool success = db_->storage().edges_.access().insert(gid, edge_vlist).second; - CHECK(success) << "Attempting to insert an edge with an existing GID: " - << gid.AsUint(); - - // ensure that the "from" accessor has the latest version - from.SwitchNew(); - from.update().out_.emplace(to.address(), edge_vlist, edge_type); - - // ensure that the "to" accessor has the latest version (Switch new) - // WARNING: must do that after the above "from.update()" for cases when - // we are creating a cycle and "from" and "to" are the same vlist - to.SwitchNew(); - to.update().in_.emplace(from.address(), edge_vlist, edge_type); - - sd_buffer()->Emplace(database::StateDelta::CreateEdge( - transaction_->id_, edge_vlist->gid_, from.gid(), to.gid(), edge_type, - EdgeTypeName(edge_type))); - - return EdgeAccessor(edge_vlist, *this, from.address(), to.address(), - edge_type); -} - -int64_t GraphDbAccessor::EdgesCount() const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->storage().edges_.access().size(); -} - -void GraphDbAccessor::RemoveEdge(EdgeAccessor &edge, bool remove_out_edge, - bool remove_in_edge) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - // it's possible the edge was removed already in this transaction - // due to it getting matched multiple times by some patterns - // we can only delete it once, so check if it's already deleted - edge.SwitchNew(); - if (edge.current().is_expired_by(*transaction_)) return; - if (remove_out_edge) edge.from().RemoveOutEdge(edge.address()); - if (remove_in_edge) edge.to().RemoveInEdge(edge.address()); - - edge.address()->remove(edge.current_, *transaction_); - sd_buffer()->Emplace( - database::StateDelta::RemoveEdge(transaction_->id_, edge.gid())); -} - -storage::Label GraphDbAccessor::Label(const std::string &label_name) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->label_mapper().value_to_id(label_name); -} - -const std::string &GraphDbAccessor::LabelName(storage::Label label) const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->label_mapper().id_to_value(label); -} - -storage::EdgeType GraphDbAccessor::EdgeType(const std::string &edge_type_name) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->edge_type_mapper().value_to_id(edge_type_name); -} - -const std::string &GraphDbAccessor::EdgeTypeName( - storage::EdgeType edge_type) const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->edge_type_mapper().id_to_value(edge_type); -} - -storage::Property GraphDbAccessor::Property(const std::string &property_name) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->property_mapper().value_to_id(property_name); -} - -const std::string &GraphDbAccessor::PropertyName( - storage::Property property) const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->property_mapper().id_to_value(property); -} - -std::vector GraphDbAccessor::IndexInfo() const { - std::vector info; - for (storage::Label label : db_->storage().labels_index_.Keys()) { - info.emplace_back(":" + LabelName(label)); - } - for (LabelPropertyIndex::Key key : - db_->storage().label_property_index_.Keys()) { - info.emplace_back(fmt::format(":{}({})", LabelName(key.label_), - PropertyName(key.property_))); - } - return info; -} - -std::map>> -GraphDbAccessor::StorageInfo() const { - return db_->storage_info()->GetStorageInfo(); -} - -} // namespace database diff --git a/src/database/single_node_ha/graph_db_accessor.hpp b/src/database/single_node_ha/graph_db_accessor.hpp deleted file mode 100644 index 65f35f321..000000000 --- a/src/database/single_node_ha/graph_db_accessor.hpp +++ /dev/null @@ -1,691 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - -#include "database/single_node_ha/graph_db.hpp" -#include "raft/raft_interface.hpp" -#include "storage/common/types/types.hpp" -#include "storage/single_node_ha/edge_accessor.hpp" -#include "storage/single_node_ha/state_delta_buffer.hpp" -#include "storage/single_node_ha/vertex_accessor.hpp" -#include "transactions/transaction.hpp" -#include "transactions/type.hpp" -#include "utils/bound.hpp" -#include "utils/exceptions.hpp" - -namespace storage::constraints { -struct ConstraintEntry; -} // namespace storage::constraints - -namespace database { - -/** Thrown when inserting in an index with constraint. */ -class ConstraintViolationException : public utils::BasicException { - using utils::BasicException::BasicException; -}; - -/** Thrown when creating an index which already exists. */ -class IndexExistsException : public utils::BasicException { - using utils::BasicException::BasicException; -}; - -/// Thrown on concurrent index creation when the transaction engine fails to -/// start a new transaction. -class TransactionException : public utils::BasicException { - using utils::BasicException::BasicException; -}; - -/** - * Base accessor for the database object: exposes functions for operating on the - * database. All the functions in this class should be self-sufficient: for - * example the function for creating a new Vertex should take care of all the - * book-keeping around the creation. - */ -class GraphDbAccessor { - // We need to make friends with this guys since they need to access private - // methods for updating indices. - // TODO: Rethink this, we have too much long-distance friendship complicating - // the code. - friend class ::RecordAccessor; - friend class ::VertexAccessor; - friend class GraphDb; - - protected: - // Construction should only be done through GraphDb::Access function and - // concrete GraphDbAccessor type. - - /// Creates a new accessor by starting a new transaction. - explicit GraphDbAccessor(GraphDb *db); - /// Creates an accessor for a running transaction. - GraphDbAccessor(GraphDb *db, tx::TransactionId tx_id); - - GraphDbAccessor(GraphDb *db, std::optional parent_tx); - - public: - ~GraphDbAccessor(); - - GraphDbAccessor(const GraphDbAccessor &other) = delete; - GraphDbAccessor &operator=(const GraphDbAccessor &other) = delete; - - GraphDbAccessor(GraphDbAccessor &&other); - GraphDbAccessor &operator=(GraphDbAccessor &&other); - - /** - * Creates a new Vertex and returns an accessor to it. If the ID is - * provided, the created Vertex will have that local ID, and the ID counter - * will be increased to it so collisions are avoided. This should only be used - * by durability recovery, normal vertex creation should not provide the ID. - * - * You should NOT make interleaved recovery and normal DB op calls to this - * function. Doing so will likely mess up the ID generation and crash MG. - * Always perform recovery only once, immediately when the database is - * created, before any transactional ops start. - * - * @param requested_gid The requested GID. Should only be provided when - * recovering from durability. - * - * @return See above. - */ - VertexAccessor InsertVertex( - std::optional requested_gid = std::nullopt); - - /** - * Removes the vertex of the given accessor. If the vertex has any outgoing or - * incoming edges, it is not deleted. See `DetachRemoveVertex` if you want to - * remove a vertex regardless of connectivity. - * - * If the vertex has already been deleted by the current transaction+command, - * this function will not do anything and will return true. - * - * @param vertex_accessor Accessor to vertex. - * @param check_empty If the vertex should be checked for existing edges - * before deletion. - * @return If or not the vertex was deleted. - */ - bool RemoveVertex(VertexAccessor &vertex_accessor, bool check_empty = true); - - /** - * Removes the vertex of the given accessor along with all it's outgoing - * and incoming connections. - * - * @param vertex_accessor Accessor to a vertex. - */ - void DetachRemoveVertex(VertexAccessor &vertex_accessor); - - /** - * Obtains the vertex for the given ID. If there is no vertex for the given - * ID, or it's not visible to this accessor's transaction, nullopt is - * returned. - * - * @param gid - The GID of the sought vertex. - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - */ - std::optional FindVertexOptional(storage::Gid gid, - bool current_state); - - /** - * Obtains the vertex for the given ID. If there is no vertex for the given - * ID, or it's not visible to this accessor's transaction, MG is crashed - * using a CHECK. - * - * @param gid - The GID of the sought vertex. - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - */ - VertexAccessor FindVertex(storage::Gid gid, bool current_state); - - /** - * Returns iterable over accessors to all the vertices in the graph - * visible to the current transaction. - * - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - */ - auto Vertices(bool current_state) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - // wrap version lists into accessors, which will look for visible versions - auto accessors = iter::imap( - [this](auto id_vlist) { - return VertexAccessor(id_vlist.second, *this); - }, - db_->storage().vertices_.access()); - - // filter out the accessors not visible to the current transaction - return iter::filter( - [this, current_state](const VertexAccessor &accessor) { - return accessor.Visible(transaction(), current_state); - }, - std::move(accessors)); - } - - /** - * Return VertexAccessors which contain the current label for the current - * transaction visibilty. - * @param label - label for which to return VertexAccessors - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @return iterable collection - */ - auto Vertices(storage::Label label, bool current_state) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return iter::imap( - [this](auto vlist) { return VertexAccessor(vlist, *this); }, - db_->storage().labels_index_.GetVlists(label, *transaction_, - current_state)); - } - - /** - * Return VertexAccessors which contain the current label and property for the - * given transaction visibility. - * - * @param label - label for which to return VertexAccessors - * @param property - property for which to return VertexAccessors - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @return iterable collection - */ - auto Vertices(storage::Label label, storage::Property property, - bool current_state) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - DCHECK(db_->storage().label_property_index_.IndexExists( - LabelPropertyIndex::Key(label, property))) - << "Label+property index doesn't exist."; - return iter::imap( - [this](auto vlist) { return VertexAccessor(vlist, *this); }, - db_->storage().label_property_index_.GetVlists( - LabelPropertyIndex::Key(label, property), *transaction_, - current_state)); - } - - /** - * Return VertexAccessors which contain the current label + property, and - * those properties are equal to this 'value' for the given transaction - * visibility. - * @param label - label for which to return VertexAccessors - * @param property - property for which to return VertexAccessors - * @param value - property value for which to return VertexAccessors - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @return iterable collection - */ - auto Vertices(storage::Label label, storage::Property property, - const PropertyValue &value, bool current_state) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - DCHECK(db_->storage().label_property_index_.IndexExists( - LabelPropertyIndex::Key(label, property))) - << "Label+property index doesn't exist."; - CHECK(value.type() != PropertyValue::Type::Null) - << "Can't query index for propery value type null."; - return iter::imap( - [this](auto vlist) { return VertexAccessor(vlist, *this); }, - db_->storage().label_property_index_.GetVlists( - LabelPropertyIndex::Key(label, property), value, *transaction_, - current_state)); - } - - /** - * Return an iterable over VertexAccessors which contain the - * given label and whose property value (for the given property) - * falls within the given (lower, upper) @c Bound. - * - * The returned iterator will only contain - * vertices/edges whose property value is comparable with the - * given bounds (w.r.t. type). This has implications on Cypher - * query execuction semantics which have not been resovled yet. - * - * At least one of the bounds must be specified. Bonds can't be - * @c PropertyValue::Null. If both bounds are - * specified, their PropertyValue elments must be of comparable - * types. - * - * @param label - label for which to return VertexAccessors - * @param property - property for which to return VertexAccessors - * @param lower - Lower bound of the interval. - * @param upper - Upper bound of the interval. - * @param value - property value for which to return VertexAccessors - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @return iterable collection of record accessors - * satisfy the bounds and are visible to the current transaction. - */ - auto Vertices(storage::Label label, storage::Property property, - const std::optional> lower, - const std::optional> upper, - bool current_state) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - DCHECK(db_->storage().label_property_index_.IndexExists( - LabelPropertyIndex::Key(label, property))) - << "Label+property index doesn't exist."; - return iter::imap( - [this](auto vlist) { return VertexAccessor(vlist, *this); }, - db_->storage().label_property_index_.GetVlists( - LabelPropertyIndex::Key(label, property), lower, upper, - *transaction_, current_state)); - } - - /** - * Creates a new Edge and returns an accessor to it. If the ID is - * provided, the created Edge will have that ID, and the ID counter will be - * increased to it so collisions are avoided. This should only be used by - * durability recovery, normal edge creation should not provide the ID. - * - * You should NOT make interleaved recovery and normal DB op calls to this - * function. Doing so will likely mess up the ID generation and crash MG. - * Always perform recovery only once, immediately when the database is - * created, before any transactional ops start. - * - * @param from The 'from' vertex. - * @param to The 'to' vertex' - * @param type Edge type. - * @param requested_gid The requested GID. Should only be provided when - * recovering from durability. - * - * @return An accessor to the edge. - */ - EdgeAccessor InsertEdge( - VertexAccessor & from, VertexAccessor & to, storage::EdgeType type, - std::optional requested_gid = std::nullopt); - - /** - * Removes an edge from the graph. Parameters can indicate if the edge should - * be removed from data structures in vertices it connects. When removing an - * edge both arguments should be `true`. `false` is only used when - * detach-deleting a vertex. - * - * @param edge The accessor to an edge. - * @param remove_out_edge If the edge should be removed from the its origin - * side. - * @param remove_in_edge If the edge should be removed from the its - * destination side. - */ - void RemoveEdge(EdgeAccessor &edge, bool remove_out_edge = true, - bool remove_in_edge = true); - - /** - * Obtains the edge for the given ID. If there is no edge for the given - * ID, or it's not visible to this accessor's transaction, nullopt is - * returned. - * - * @param gid - The GID of the sought edge. - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - */ - std::optional FindEdgeOptional(storage::Gid gid, - bool current_state); - - /** - * Obtains the edge for the given ID. If there is no edge for the given - * ID, or it's not visible to this accessor's transaction, MG is crashed - * using a CHECK. - * - * @param gid - The GID of the sought edge. - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - */ - EdgeAccessor FindEdge(storage::Gid gid, bool current_state); - - /** - * Returns iterable over accessors to all the edges in the graph - * visible to the current transaction. - * - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - */ - auto Edges(bool current_state) { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - - // wrap version lists into accessors, which will look for visible versions - auto accessors = iter::imap( - [this](auto id_vlist) { return EdgeAccessor(id_vlist.second, *this); }, - db_->storage().edges_.access()); - - // filter out the accessors not visible to the current transaction - return iter::filter( - [this, current_state](const EdgeAccessor &accessor) { - return accessor.Visible(transaction(), current_state); - }, - std::move(accessors)); - } - - /** - * Creates and returns a new accessor that represents the same graph element - * (node / version) as the given `accessor`, but in this `GraphDbAccessor`. - * - * It is possible that the given `accessor` graph element is not visible in - * this `GraphDbAccessor`'s transaction. If that is the case, a `nullopt` is - * returned. - * - * The returned accessor does NOT have the same `current_` set as the given - * `accessor`. It has default post-construction `current_` set (`old` if - * available, otherwise `new`). - * - * @param accessor The [Vertex/Edge]Accessor whose underlying graph element we - * want in this GraphDbAccessor. - * @return See above. - * @tparam TAccessor Either VertexAccessor or EdgeAccessor - */ - template - std::optional Transfer(const TAccessor &accessor) { - if (accessor.db_accessor_ == this) return std::make_optional(accessor); - - TAccessor accessor_in_this(accessor.address(), *this); - if (accessor_in_this.current_) - return std::make_optional(std::move(accessor_in_this)); - else - return std::nullopt; - } - - /** - * Adds an index for the given (label, property) and populates it with - * existing vertices that belong to it. - * - * You should never call BuildIndex on a GraphDbAccessor (transaction) on - * which new vertices have been inserted or existing ones updated. Do it - * in a new accessor instead. - * - * Build index throws if an index for the given (label, property) already - * exists (even if it's being built by a concurrent transaction and is not yet - * ready for use). - * - * It also throws if there is another index being built concurrently on the - * same database this accessor is for. - * - * @param label - label to build for - * @param property - property to build for - */ - void BuildIndex(storage::Label label, storage::Property property); - - /// Deletes the index responisble for (label, property). - /// - /// @throws IndexTransactionException if it can't obtain a blocking - /// transaction. - void DeleteIndex(storage::Label label, storage::Property property); - - /// Populates index with vertices containing the key - void PopulateIndex(const LabelPropertyIndex::Key &key); - - /// Writes Index (key) creation to Raft, marks it as ready for usage - void EnableIndex(const LabelPropertyIndex::Key &key); - - /** - * Creates new unique constraint that consists of a label and multiple - * properties. - * If the constraint already exists, this method does nothing. - * - * @throws ConstraintViolationException if constraint couldn't be build - * due to existing constraint violation. - * @throws TransactionEngineError if the engine doesn't accept transactions. - * @throws mvcc::SerializationError on serialization errors. - */ - void BuildUniqueConstraint(storage::Label label, - const std::vector &properties); - - /** - * Deletes existing unique constraint. - * If the constraint doesn't exist, this method does nothing. - */ - void DeleteUniqueConstraint(storage::Label label, - const std::vector &properties); - - /** - * Returns a list of currently active unique constraints. - */ - std::vector ListUniqueConstraints() - const; - - /** - * @brief - Returns true if the given label+property index already exists and - * is ready for use. - */ - bool LabelPropertyIndexExists(storage::Label label, - storage::Property property) const { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->storage().label_property_index_.IndexExists( - LabelPropertyIndex::Key(label, property)); - } - - /** - * @brief - Returns vector of keys of label-property indices. - */ - std::vector GetIndicesKeys() { - DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted"; - return db_->storage().label_property_index_.Keys(); - } - - /** - * Return approximate number of all vertices in the database. - * Note that this is always an over-estimate and never an under-estimate. - */ - int64_t VerticesCount() const; - - /* - * Return approximate number of all edges in the database. - * Note that this is always an over-estimate and never an under-estimate. - */ - int64_t EdgesCount() const; - - /** - * Return approximate number of vertices under indexes with the given label. - * Note that this is always an over-estimate and never an under-estimate. - * - * @param label - label to check for - * @return number of vertices with the given label - */ - int64_t VerticesCount(storage::Label label) const; - - /** - * Return approximate number of vertices under indexes with the given label - * and property. Note that this is always an over-estimate and never an - * under-estimate. - * - * @param label - label to check for - * @param property - property to check for - * @return number of vertices with the given label, fails if no such - * label+property index exists. - */ - int64_t VerticesCount(storage::Label label, storage::Property property) const; - - /** - * Returns approximate number of vertices that have the given label - * and the given value for the given property. - * - * Assumes that an index for that (label, property) exists. - */ - int64_t VerticesCount(storage::Label label, storage::Property property, - const PropertyValue &value) const; - - /** - * Returns approximate number of vertices that have the given label - * and whose vaue is in the range defined by upper and lower @c Bound. - * - * At least one bound must be specified. Neither can be - * PropertyValue::Null. - * - * Assumes that an index for that (label, property) exists. - */ - int64_t VerticesCount( - storage::Label label, storage::Property property, - const std::optional> lower, - const std::optional> upper) const; - - /** - * Obtains the Label for the label's name. - * @return See above. - */ - storage::Label Label(const std::string &label_name); - - /** - * Obtains the label name (a string) for the given label. - * - * @param label a Label. - * @return See above. - */ - const std::string &LabelName(storage::Label label) const; - - /** - * Obtains the EdgeType for it's name. - * @return See above. - */ - storage::EdgeType EdgeType(const std::string &edge_type_name); - - /** - * Obtains the edge type name (a string) for the given edge type. - * - * @param edge_type an EdgeType. - * @return See above. - */ - const std::string &EdgeTypeName(storage::EdgeType edge_type) const; - - /** - * Obtains the Property for it's name. - * @return See above. - */ - storage::Property Property(const std::string &property_name); - - /** - * Obtains the property name (a string) for the given property. - * - * @param property a Property. - * @return See above. - */ - const std::string &PropertyName(storage::Property property) const; - - /** Returns the id of this accessor's transaction */ - tx::TransactionId transaction_id() const; - - /** Advances transaction's command id by 1. */ - void AdvanceCommand(); - - /** Commit transaction. */ - void Commit(); - - /** Abort transaction. */ - void Abort(); - - /** Return true if transaction is hinted to abort. */ - bool should_abort() const; - - const tx::Transaction &transaction() const { return *transaction_; } - raft::RaftInterface *raft(); - storage::StateDeltaBuffer *sd_buffer(); - auto &db() { return db_; } - const auto &db() const { return db_; } - - /* Returns a list of index names present in the database. */ - std::vector IndexInfo() const; - - /** - * Returns a map containing storage information for each Raft cluster member. - * - * Inside the vector, the following storage stats will exist: - * - vertex_count - * - edge_count - * - average_degree - * - memory_usage - * - disk_usage - **/ - std::map>> - StorageInfo() const; - - /** - * Insert this vertex into corresponding label and label+property (if it - * exists) index. - * - * @param label - label with which to insert vertex label record - * @param vertex_accessor - vertex_accessor to insert - * @param vertex - vertex record to insert - */ - void UpdateLabelIndices(storage::Label label, - const VertexAccessor &vertex_accessor, - const Vertex *const vertex); - - private: - GraphDb *db_; - tx::Transaction *transaction_; - // Indicates if this db-accessor started the transaction and should Abort it - // upon destruction. - bool transaction_starter_; - - bool commited_{false}; - bool aborted_{false}; - - /** - * Notifies storage about label addition. - * - * @param label - label that was added - * @param vertex_accessor - vertex_accessor that was updated - * @param vertex - vertex that was updated - */ - void UpdateOnAddLabel(storage::Label label, - const VertexAccessor &vertex_accessor, - const Vertex *vertex); - - /** - * Notifies storage about label removal. - * - * @param label - label that was removed - * @param vertex_accessor - vertex_accessor that was updated - */ - void UpdateOnRemoveLabel(storage::Label label, - const RecordAccessor &accessor); - - /** - * Notifies storage about a property removal. - * - * @param property - property that was removed - * @param previous_value - previous value of the property - * @param vertex_accessor - vertex_accessor that was updated - * @param vertex - vertex that was updated - */ - void UpdateOnRemoveProperty(storage::Property property, - const PropertyValue &previous_value, - const RecordAccessor &accessor, - const Vertex *vertex); - - /** - * Notifies storage about a property addition. - * - * @param property - property that was added - * @param previous_value - previous value of the property - * @param new_value - new value of the property - * @param vertex_accessor - vertex accessor that was updated - * @param vertex - vertex that was updated - */ - void UpdateOnAddProperty(storage::Property property, - const PropertyValue &previous_value, - const PropertyValue &new_value, - const RecordAccessor &vertex_accessor, - const Vertex *vertex); -}; - -} // namespace database diff --git a/src/database/single_node_ha/serialization.lcp b/src/database/single_node_ha/serialization.lcp deleted file mode 100644 index 970b2585f..000000000 --- a/src/database/single_node_ha/serialization.lcp +++ /dev/null @@ -1,9 +0,0 @@ -#>cpp -#pragma once - -#include "durability/single_node_ha/state_delta.hpp" -#include "storage/common/types/slk.hpp" -cpp<# - -;; Generate serialization of state-delta -(load "durability/single_node_ha/state_delta.lcp") diff --git a/src/durability/hashed_file_reader.hpp b/src/durability/hashed_file_reader.hpp deleted file mode 100644 index e012a051a..000000000 --- a/src/durability/hashed_file_reader.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include - -#include "hasher.hpp" -#include "utils/endian.hpp" - -/** - * Buffer reads data from file and calculates hash of read data. Implements - * template param Buffer interface from BaseDecoder class. - */ -class HashedFileReader { - public: - /** Opens the file for reading. Returns true if successful. */ - bool Open(const std::string &file) { - input_stream_.open(file, std::ios::in | std::ios::binary); - hasher_ = Hasher(); - return !input_stream_.fail(); - } - - /** Closes ifstream. Returns false if closing fails. */ - bool Close() { - input_stream_.close(); - return !input_stream_.fail(); - } - - /** - * Reads raw data from stream. - * - * @param data - pointer to where data should be stored. - * @param n - data length. - * @param hash - If the read should be included in the hash calculation. - */ - bool Read(uint8_t *data, size_t n, bool hash = true) { - input_stream_.read(reinterpret_cast(data), n); - if (input_stream_.fail()) return false; - if (hash) hasher_.Update(data, n); - return true; - } - - /** - * Reads a TValue value from the stream. - * - * @param val - The value to read into. - * @param hash - If the read should be included in the hash calculation. - * @tparam TValue - Type of value being read. - * @return - If the read was successful. - */ - template - bool ReadType(TValue &val, bool hash = true) { - if (!Read(reinterpret_cast(&val), sizeof(TValue), hash)) - return false; - val = utils::BigEndianToHost(val); - return true; - } - - void Seek(std::streamoff offset, std::ios_base::seekdir way) { - input_stream_.seekg(offset, way); - } - - void Seek(std::streampos pos) { input_stream_.seekg(pos); } - - auto Tellg() { return input_stream_.tellg(); } - - /** Returns the hash of the data read so far from the stream. */ - uint64_t hash() const { return hasher_.hash(); } - - /** Checks whether the end of file is reached. */ - bool EndOfFile() const { return input_stream_.eof(); } - - private: - Hasher hasher_; - std::ifstream input_stream_; -}; diff --git a/src/durability/hashed_file_writer.hpp b/src/durability/hashed_file_writer.hpp deleted file mode 100644 index ad9dd7be4..000000000 --- a/src/durability/hashed_file_writer.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include - -#include "hasher.hpp" -#include "utils/endian.hpp" - -/** - * Buffer that writes data to file and calculates hash of written data. - * Implements template param Buffer interface from BaseEncoder class. - * - * All of the methods on a HashedFileWriter can throw an exception. - */ -class HashedFileWriter { - public: - /** Constructor, initialize ofstream to throw exception on fail. */ - HashedFileWriter() { - output_stream_.exceptions(std::ifstream::failbit | std::ifstream::badbit); - } - - /** Constructor which also takes a file path and opens it immediately. */ - explicit HashedFileWriter(const std::string &path) : HashedFileWriter() { - output_stream_.open(path, std::ios::out | std::ios::binary); - } - - /** Opens the writer */ - void Open(const std::string &path) { - output_stream_.open(path, std::ios::out | std::ios::binary); - hasher_ = Hasher(); - } - - /** Closes the writer. */ - void Close() { output_stream_.close(); } - - /** - * Writes data to stream. - * - * @param data - Pointer to data to write. - * @param n - Data length. - * @param hash - If writing should update the hash. - * @return - True if succesful. - */ - void Write(const uint8_t *data, size_t n, bool hash = true) { - output_stream_.write(reinterpret_cast(data), n); - if (hash) hasher_.Update(data, n); - } - - /** - * Writes a TValue to the stream. - * - * @param val - The value to write. - * @param hash - If writing should update the hash. - * @return - True if succesful. - */ - template - void WriteValue(const TValue &val, bool hash = true) { - TValue val_big = utils::HostToBigEndian(val); - Write(reinterpret_cast(&val_big), sizeof(TValue), hash); - } - - // TODO try to remove before diff - /** Does nothing. Just for API compatibility with the bolt buffer. */ - void Chunk() {} - - /** Flushes data to stream. */ - void Flush() { output_stream_.flush(); } - - /** Returns the hash of the data written so far to the stream. */ - uint64_t hash() const { return hasher_.hash(); } - - private: - std::ofstream output_stream_; - Hasher hasher_; -}; diff --git a/src/durability/hasher.hpp b/src/durability/hasher.hpp deleted file mode 100644 index 0eeda5513..000000000 --- a/src/durability/hasher.hpp +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include -#include - -// TODO: implement better hash function - -/** - * Class calculates hash of the data dynamically. - */ -class Hasher { - /** Prime number used in calculating hash. */ - static constexpr uint64_t kPrime = 3137; - - public: - /** - * Updates hash from given data. - * - * @param data - Data from which hash will be updated. - * @param n - Length of the data. - */ - void Update(const uint8_t *data, size_t n) { - for (size_t i = 0; i < n; ++i) hash_ = hash_ * kPrime + data[i] + 1; - } - - /** Returns current hash value. */ - uint64_t hash() const { return hash_; } - - private: - uint64_t hash_ = 0; -}; diff --git a/src/durability/single_node_ha/paths.cpp b/src/durability/single_node_ha/paths.cpp deleted file mode 100644 index fb2b6f4a8..000000000 --- a/src/durability/single_node_ha/paths.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "durability/single_node_ha/paths.hpp" - -#include "utils/string.hpp" -#include "utils/timestamp.hpp" - -namespace durability { - -namespace fs = std::filesystem; - -// This is the prefix used for WAL and Snapshot filenames. It is a timestamp -// format that equals to: YYYYmmddHHMMSSffffff -const std::string kTimestampFormat = - "{:04d}{:02d}{:02d}{:02d}{:02d}{:02d}{:06d}"; - -std::string GetSnapshotFilename(uint64_t last_included_term, - uint64_t last_included_index) { - std::string date_str = utils::Timestamp::Now().ToString(kTimestampFormat); - return date_str + "_term_" + std::to_string(last_included_term) + "_index_" + - std::to_string(last_included_index); -} - -fs::path MakeSnapshotPath(const fs::path &durability_dir, - const std::string &snapshot_filename) { - return durability_dir / kSnapshotDir / snapshot_filename; -} -} // namespace durability diff --git a/src/durability/single_node_ha/paths.hpp b/src/durability/single_node_ha/paths.hpp deleted file mode 100644 index ec2d36f19..000000000 --- a/src/durability/single_node_ha/paths.hpp +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include -#include - -namespace durability { -const std::string kSnapshotDir = "snapshots"; -const std::string kBackupDir = ".backup"; - -/// Generates a filename for a DB snapshot in the given folder in a well-defined -/// sortable format with last included term and last included index from which -/// the snapshot is created appended to the file name. -std::string GetSnapshotFilename(uint64_t last_included_term, - uint64_t last_included_index); - -/// Generates a full path for a DB snapshot. -std::filesystem::path MakeSnapshotPath( - const std::filesystem::path &durability_dir, - const std::string &snapshot_filename); -} // namespace durability diff --git a/src/durability/single_node_ha/recovery.cpp b/src/durability/single_node_ha/recovery.cpp deleted file mode 100644 index 10a188eec..000000000 --- a/src/durability/single_node_ha/recovery.cpp +++ /dev/null @@ -1,172 +0,0 @@ -#include "durability/single_node_ha/recovery.hpp" - -#include -#include -#include -#include - -#include "communication/bolt/v1/decoder/decoder.hpp" -#include "database/single_node_ha/graph_db_accessor.hpp" -#include "durability/hashed_file_reader.hpp" -#include "durability/single_node_ha/paths.hpp" -#include "durability/single_node_ha/version.hpp" -#include "glue/communication.hpp" -#include "storage/single_node_ha/indexes/label_property_index.hpp" -#include "transactions/type.hpp" -#include "utils/algorithm.hpp" -#include "utils/file.hpp" - -namespace fs = std::filesystem; - -namespace durability { - -using communication::bolt::Value; -bool ReadSnapshotSummary(HashedFileReader &buffer, int64_t &vertex_count, - int64_t &edge_count, uint64_t &hash) { - auto pos = buffer.Tellg(); - auto offset = sizeof(vertex_count) + sizeof(edge_count) + sizeof(hash); - buffer.Seek(-offset, std::ios_base::end); - bool r_val = buffer.ReadType(vertex_count, false) && - buffer.ReadType(edge_count, false) && - buffer.ReadType(hash, false); - buffer.Seek(pos); - return r_val; -} - -namespace { -using communication::bolt::Value; - -#define RETURN_IF_NOT(condition) \ - if (!(condition)) { \ - reader.Close(); \ - return false; \ - } - -bool RecoverSnapshot(const fs::path &snapshot_file, database::GraphDb *db, - RecoveryData *recovery_data) { - HashedFileReader reader; - communication::bolt::Decoder decoder(reader); - - RETURN_IF_NOT(reader.Open(snapshot_file)); - - auto magic_number = durability::kSnapshotMagic; - reader.Read(magic_number.data(), magic_number.size()); - RETURN_IF_NOT(magic_number == durability::kSnapshotMagic); - - // Read the vertex and edge count, and the hash, from the end of the snapshot. - int64_t vertex_count; - int64_t edge_count; - uint64_t hash; - RETURN_IF_NOT( - durability::ReadSnapshotSummary(reader, vertex_count, edge_count, hash)); - - Value dv; - RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::Int) && - dv.ValueInt() == durability::kVersion); - - // A list of label+property indexes. - RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::List)); - auto index_value = dv.ValueList(); - for (auto it = index_value.begin(); it != index_value.end();) { - auto label = *it++; - RETURN_IF_NOT(it != index_value.end()); - auto property = *it++; - RETURN_IF_NOT(label.IsString() && property.IsString()); - recovery_data->indexes.emplace_back( - IndexRecoveryData{label.ValueString(), property.ValueString(), - /*create = */ true}); - } - - auto dba = db->Access(); - std::unordered_map vertices; - for (int64_t i = 0; i < vertex_count; ++i) { - Value vertex_dv; - RETURN_IF_NOT(decoder.ReadValue(&vertex_dv, Value::Type::Vertex)); - auto &vertex = vertex_dv.ValueVertex(); - auto vertex_accessor = - dba.InsertVertex(storage::Gid::FromUint(vertex.id.AsUint())); - - for (const auto &label : vertex.labels) { - vertex_accessor.add_label(dba.Label(label)); - } - for (const auto &property_pair : vertex.properties) { - vertex_accessor.PropsSet(dba.Property(property_pair.first), - glue::ToPropertyValue(property_pair.second)); - } - vertices.insert({vertex.id.AsUint(), vertex_accessor}); - } - - for (int64_t i = 0; i < edge_count; ++i) { - Value edge_dv; - RETURN_IF_NOT(decoder.ReadValue(&edge_dv, Value::Type::Edge)); - auto &edge = edge_dv.ValueEdge(); - auto it_from = vertices.find(edge.from.AsUint()); - auto it_to = vertices.find(edge.to.AsUint()); - RETURN_IF_NOT(it_from != vertices.end() && it_to != vertices.end()); - auto edge_accessor = - dba.InsertEdge(it_from->second, it_to->second, dba.EdgeType(edge.type), - storage::Gid::FromUint(edge.id.AsUint())); - - for (const auto &property_pair : edge.properties) - edge_accessor.PropsSet(dba.Property(property_pair.first), - glue::ToPropertyValue(property_pair.second)); - } - - // Vertex and edge counts are included in the hash. Re-read them to update the - // hash. - reader.ReadType(vertex_count); - reader.ReadType(edge_count); - if (!reader.Close() || reader.hash() != hash) { - dba.Abort(); - return false; - } - - dba.Commit(); - return true; -} - -#undef RETURN_IF_NOT - -} // anonymous namespace - -bool RecoverSnapshot(database::GraphDb *db, RecoveryData *recovery_data, - const fs::path &durability_dir, - const std::string &snapshot_filename) { - const auto snapshot_dir = durability_dir / kSnapshotDir; - if (!fs::exists(snapshot_dir) || !fs::is_directory(snapshot_dir)) { - LOG(WARNING) << "Missing snapshot directory!"; - return false; - } - - const auto snapshot = snapshot_dir / snapshot_filename; - if (!fs::exists(snapshot)) { - LOG(WARNING) << "Missing snapshot file!"; - return false; - } - - LOG(INFO) << "Starting snapshot recovery from: " << snapshot; - if (!RecoverSnapshot(snapshot, db, recovery_data)) { - LOG(WARNING) << "Snapshot recovery failed."; - return false; - } - - LOG(INFO) << "Snapshot recovery successful."; - return true; -} - -void RecoverIndexes(database::GraphDb *db, - const std::vector &indexes) { - auto dba = db->Access(); - for (const auto &index : indexes) { - auto label = dba.Label(index.label); - auto property = dba.Property(index.property); - if (index.create) { - dba.BuildIndex(label, property); - } else { - dba.DeleteIndex(label, property); - } - } - dba.Commit(); -} - -} // namespace durability diff --git a/src/durability/single_node_ha/recovery.hpp b/src/durability/single_node_ha/recovery.hpp deleted file mode 100644 index 8177b8a42..000000000 --- a/src/durability/single_node_ha/recovery.hpp +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "durability/hashed_file_reader.hpp" -#include "durability/single_node_ha/state_delta.hpp" -#include "transactions/type.hpp" - -namespace database { -class GraphDb; -}; - -namespace durability { - -struct IndexRecoveryData { - std::string label; - std::string property; - bool create; // distinguish between creating and dropping index - bool unique; // used only when creating an index -}; - -/// Data structure for exchanging info between main recovery function and -/// snapshot recovery functions. -struct RecoveryData { - // A collection into which the indexes should be added so they - // can be rebuilt at the end of the recovery transaction. - std::vector indexes; -}; - -/// Reads snapshot metadata from the end of the file without messing up the -/// hash. -bool ReadSnapshotSummary(HashedFileReader &buffer, int64_t &vertex_count, - int64_t &edge_count, uint64_t &hash); - -/** - * Recovers database from the given snapshot. If recovering fails, false is - * returned and db_accessor aborts transaction, else true is returned and - * transaction is commited. - * - * @param db - The database to recover into. - * @param recovery_data - Struct that will contain additional recovery data. - * @param durability_dir - Path to durability directory. - * @param snapshot_filename - Snapshot filename. - * @return - recovery info - */ -bool RecoverSnapshot(database::GraphDb *db, - durability::RecoveryData *recovery_data, - const std::filesystem::path &durability_dir, - const std::string &snapshot_filename); - -void RecoverIndexes(database::GraphDb *db, - const std::vector &indexes); - -} // namespace durability diff --git a/src/durability/single_node_ha/snapshooter.cpp b/src/durability/single_node_ha/snapshooter.cpp deleted file mode 100644 index 232c3049e..000000000 --- a/src/durability/single_node_ha/snapshooter.cpp +++ /dev/null @@ -1,108 +0,0 @@ -#include "durability/single_node_ha/snapshooter.hpp" - -#include - -#include - -#include "communication/bolt/v1/encoder/base_encoder.hpp" -#include "database/single_node_ha/graph_db_accessor.hpp" -#include "durability/hashed_file_writer.hpp" -#include "durability/single_node_ha/paths.hpp" -#include "durability/single_node_ha/version.hpp" -#include "glue/communication.hpp" -#include "storage/v2/view.hpp" -#include "utils/file.hpp" - -namespace fs = std::filesystem; - -namespace durability { - -// Snapshot layout is described in durability/version.hpp -static_assert(durability::kVersion == 9, - "Wrong snapshot version, please update!"); - -namespace { -bool Encode(const fs::path &snapshot_file, database::GraphDb &db, - database::GraphDbAccessor &dba) { - try { - HashedFileWriter buffer(snapshot_file); - communication::bolt::BaseEncoder encoder(buffer); - int64_t vertex_num = 0, edge_num = 0; - - encoder.WriteRAW(durability::kSnapshotMagic.data(), - durability::kSnapshotMagic.size()); - encoder.WriteInt(durability::kVersion); - - // Write label+property indexes as list ["label", "property", ...] - { - std::vector index_vec; - for (const auto &key : dba.GetIndicesKeys()) { - index_vec.emplace_back(dba.LabelName(key.label_)); - index_vec.emplace_back(dba.PropertyName(key.property_)); - } - encoder.WriteList(index_vec); - } - - for (const auto &vertex : dba.Vertices(false)) { - encoder.WriteVertex(glue::ToBoltVertex(vertex, storage::View::OLD)); - vertex_num++; - } - for (const auto &edge : dba.Edges(false)) { - encoder.WriteEdge(glue::ToBoltEdge(edge, storage::View::OLD)); - edge_num++; - } - buffer.WriteValue(vertex_num); - buffer.WriteValue(edge_num); - buffer.WriteValue(buffer.hash()); - buffer.Close(); - } catch (const std::ifstream::failure &) { - if (fs::exists(snapshot_file) && !fs::remove(snapshot_file)) { - LOG(ERROR) << "Error while removing corrupted snapshot file: " - << snapshot_file; - } - return false; - } - return true; -} - -/// Remove old snapshots but leave at most `keep` number of latest ones. -void RemoveOldSnapshots(const fs::path &snapshot_dir, uint16_t keep) { - std::vector files; - for (auto &file : fs::directory_iterator(snapshot_dir)) - files.push_back(file.path()); - if (static_cast(files.size()) <= keep) return; - sort(files.begin(), files.end()); - for (int i = 0; i < static_cast(files.size()) - keep; ++i) { - if (!fs::remove(files[i])) { - LOG(ERROR) << "Error while removing file: " << files[i]; - } - } -} - -} // namespace - -bool MakeSnapshot(database::GraphDb &db, database::GraphDbAccessor &dba, - const fs::path &durability_dir, - const std::string &snapshot_filename) { - if (!utils::EnsureDir(durability_dir / kSnapshotDir)) return false; - const auto snapshot_file = - MakeSnapshotPath(durability_dir, snapshot_filename); - if (fs::exists(snapshot_file)) return false; - if (Encode(snapshot_file, db, dba)) { - // Only keep the latest snapshot. - RemoveOldSnapshots(durability_dir / kSnapshotDir, 1); - return true; - } else { - std::error_code error_code; // Just for exception suppression. - fs::remove(snapshot_file, error_code); - return false; - } -} - -void RemoveAllSnapshots(const fs::path &durability_dir) { - auto snapshot_dir = durability_dir / kSnapshotDir; - if (!utils::EnsureDir(snapshot_dir)) return; - RemoveOldSnapshots(snapshot_dir, 0); -} - -} // namespace durability diff --git a/src/durability/single_node_ha/snapshooter.hpp b/src/durability/single_node_ha/snapshooter.hpp deleted file mode 100644 index dda8802a7..000000000 --- a/src/durability/single_node_ha/snapshooter.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include - -#include "database/single_node_ha/graph_db.hpp" - -namespace durability { - -/// Make snapshot and save it in snapshots folder. Returns true if successful. -/// @param db - database for which we are creating a snapshot -/// @param dba - db accessor with which we are creating a snapshot (reading -/// data) -/// @param durability_dir - directory where durability data is stored. -/// @param snapshot_filename - filename for the snapshot. -bool MakeSnapshot(database::GraphDb &db, database::GraphDbAccessor &dba, - const std::filesystem::path &durability_dir, - const std::string &snapshot_filename); - -/// Remove all snapshots inside the snapshot durability directory. -void RemoveAllSnapshots(const std::filesystem::path &durability_dir); - -} // namespace durability diff --git a/src/durability/single_node_ha/state_delta.cpp b/src/durability/single_node_ha/state_delta.cpp deleted file mode 100644 index 50c041b8a..000000000 --- a/src/durability/single_node_ha/state_delta.cpp +++ /dev/null @@ -1,453 +0,0 @@ -#include "durability/single_node_ha/state_delta.hpp" - -#include - -#include "communication/bolt/v1/value.hpp" -#include "database/single_node_ha/graph_db_accessor.hpp" -#include "glue/communication.hpp" - -namespace database { - -StateDelta StateDelta::TxBegin(tx::TransactionId tx_id) { - return {StateDelta::Type::TRANSACTION_BEGIN, tx_id}; -} - -StateDelta StateDelta::TxCommit(tx::TransactionId tx_id) { - return {StateDelta::Type::TRANSACTION_COMMIT, tx_id}; -} - -StateDelta StateDelta::TxAbort(tx::TransactionId tx_id) { - return {StateDelta::Type::TRANSACTION_ABORT, tx_id}; -} - -StateDelta StateDelta::CreateVertex(tx::TransactionId tx_id, - storage::Gid vertex_id) { - StateDelta op(StateDelta::Type::CREATE_VERTEX, tx_id); - op.vertex_id = vertex_id; - return op; -} - -StateDelta StateDelta::CreateEdge(tx::TransactionId tx_id, storage::Gid edge_id, - storage::Gid vertex_from_id, - storage::Gid vertex_to_id, - storage::EdgeType edge_type, - const std::string &edge_type_name) { - StateDelta op(StateDelta::Type::CREATE_EDGE, tx_id); - op.edge_id = edge_id; - op.vertex_from_id = vertex_from_id; - op.vertex_to_id = vertex_to_id; - op.edge_type = edge_type; - op.edge_type_name = edge_type_name; - return op; -} - -StateDelta StateDelta::PropsSetVertex(tx::TransactionId tx_id, - storage::Gid vertex_id, - storage::Property property, - const std::string &property_name, - const PropertyValue &value) { - StateDelta op(StateDelta::Type::SET_PROPERTY_VERTEX, tx_id); - op.vertex_id = vertex_id; - op.property = property; - op.property_name = property_name; - op.value = value; - return op; -} - -StateDelta StateDelta::PropsSetEdge(tx::TransactionId tx_id, - storage::Gid edge_id, - storage::Property property, - const std::string &property_name, - const PropertyValue &value) { - StateDelta op(StateDelta::Type::SET_PROPERTY_EDGE, tx_id); - op.edge_id = edge_id; - op.property = property; - op.property_name = property_name; - op.value = value; - return op; -} - -StateDelta StateDelta::AddLabel(tx::TransactionId tx_id, storage::Gid vertex_id, - storage::Label label, - const std::string &label_name) { - StateDelta op(StateDelta::Type::ADD_LABEL, tx_id); - op.vertex_id = vertex_id; - op.label = label; - op.label_name = label_name; - return op; -} - -StateDelta StateDelta::RemoveLabel(tx::TransactionId tx_id, - storage::Gid vertex_id, storage::Label label, - const std::string &label_name) { - StateDelta op(StateDelta::Type::REMOVE_LABEL, tx_id); - op.vertex_id = vertex_id; - op.label = label; - op.label_name = label_name; - return op; -} - -StateDelta StateDelta::RemoveVertex(tx::TransactionId tx_id, - storage::Gid vertex_id, bool check_empty) { - StateDelta op(StateDelta::Type::REMOVE_VERTEX, tx_id); - op.vertex_id = vertex_id; - op.check_empty = check_empty; - return op; -} - -StateDelta StateDelta::RemoveEdge(tx::TransactionId tx_id, - storage::Gid edge_id) { - StateDelta op(StateDelta::Type::REMOVE_EDGE, tx_id); - op.edge_id = edge_id; - return op; -} - -StateDelta StateDelta::BuildIndex(tx::TransactionId tx_id, storage::Label label, - const std::string &label_name, - storage::Property property, - const std::string &property_name) { - StateDelta op(StateDelta::Type::BUILD_INDEX, tx_id); - op.label = label; - op.label_name = label_name; - op.property = property; - op.property_name = property_name; - return op; -} - -StateDelta StateDelta::DropIndex(tx::TransactionId tx_id, storage::Label label, - const std::string &label_name, - storage::Property property, - const std::string &property_name) { - StateDelta op(StateDelta::Type::DROP_INDEX, tx_id); - op.label = label; - op.label_name = label_name; - op.property = property; - op.property_name = property_name; - return op; -} - -StateDelta StateDelta::NoOp(tx::TransactionId tx_id) { - StateDelta op(StateDelta::Type::NO_OP, tx_id); - return op; -} - -StateDelta StateDelta::BuildUniqueConstraint( - tx::TransactionId tx_id, storage::Label label, - const std::string &label_name, - const std::vector &properties, - const std::vector &property_names) { - StateDelta op(StateDelta::Type::BUILD_UNIQUE_CONSTRAINT, tx_id); - op.label = label; - op.label_name = label_name; - op.properties = properties; - op.property_names = property_names; - return op; -} - -StateDelta StateDelta::DropUniqueConstraint( - tx::TransactionId tx_id, storage::Label label, - const std::string &label_name, - const std::vector &properties, - const std::vector &property_names) { - StateDelta op(StateDelta::Type::DROP_UNIQUE_CONSTRAINT, tx_id); - op.label = label; - op.label_name = label_name; - op.properties = properties; - op.property_names = property_names; - return op; -} - -void StateDelta::Encode( - HashedFileWriter &writer, - communication::bolt::BaseEncoder &encoder) const { - encoder.WriteInt(static_cast(type)); - encoder.WriteInt(static_cast(transaction_id)); - - switch (type) { - case Type::TRANSACTION_BEGIN: - case Type::TRANSACTION_COMMIT: - case Type::TRANSACTION_ABORT: - case Type::NO_OP: - break; - case Type::CREATE_VERTEX: - encoder.WriteInt(vertex_id.AsInt()); - break; - case Type::CREATE_EDGE: - encoder.WriteInt(edge_id.AsInt()); - encoder.WriteInt(vertex_from_id.AsInt()); - encoder.WriteInt(vertex_to_id.AsInt()); - encoder.WriteInt(edge_type.Id()); - encoder.WriteString(edge_type_name); - break; - case Type::SET_PROPERTY_VERTEX: - encoder.WriteInt(vertex_id.AsInt()); - encoder.WriteInt(property.Id()); - encoder.WriteString(property_name); - encoder.WriteValue(glue::ToBoltValue(value)); - break; - case Type::SET_PROPERTY_EDGE: - encoder.WriteInt(edge_id.AsInt()); - encoder.WriteInt(property.Id()); - encoder.WriteString(property_name); - encoder.WriteValue(glue::ToBoltValue(value)); - break; - case Type::ADD_LABEL: - case Type::REMOVE_LABEL: - encoder.WriteInt(vertex_id.AsInt()); - encoder.WriteInt(label.Id()); - encoder.WriteString(label_name); - break; - case Type::REMOVE_VERTEX: - encoder.WriteInt(vertex_id.AsInt()); - break; - case Type::REMOVE_EDGE: - encoder.WriteInt(edge_id.AsInt()); - break; - case Type::BUILD_INDEX: - encoder.WriteInt(label.Id()); - encoder.WriteString(label_name); - encoder.WriteInt(property.Id()); - encoder.WriteString(property_name); - break; - case Type::DROP_INDEX: - encoder.WriteInt(label.Id()); - encoder.WriteString(label_name); - encoder.WriteInt(property.Id()); - encoder.WriteString(property_name); - break; - case Type::BUILD_UNIQUE_CONSTRAINT: - encoder.WriteInt(label.Id()); - encoder.WriteString(label_name); - encoder.WriteInt(properties.size()); - for (auto prop : properties) { - encoder.WriteInt(prop.Id()); - } - for (auto &name : property_names) { - encoder.WriteString(name); - } - break; - case Type::DROP_UNIQUE_CONSTRAINT: - encoder.WriteInt(label.Id()); - encoder.WriteString(label_name); - encoder.WriteInt(properties.size()); - for (auto prop : properties) { - encoder.WriteInt(prop.Id()); - } - for (auto &name : property_names) { - encoder.WriteString(name); - } - break; - } - - writer.WriteValue(writer.hash()); -} - -#define DECODE_MEMBER(member, value_f) \ - if (!decoder.ReadValue(&dv)) return nullopt; \ - r_val.member = dv.value_f(); - -#define DECODE_GID_MEMBER(member) \ - if (!decoder.ReadValue(&dv)) return nullopt; \ - r_val.member = storage::Gid::FromInt(dv.ValueInt()); - -#define DECODE_MEMBER_CAST(member, value_f, type) \ - if (!decoder.ReadValue(&dv)) return nullopt; \ - r_val.member = static_cast(dv.value_f()); - -std::optional StateDelta::Decode( - HashedFileReader &reader, - communication::bolt::Decoder &decoder) { - using std::nullopt; - - StateDelta r_val; - // The decoded value used as a temporary while decoding. - communication::bolt::Value dv; - - try { - if (!decoder.ReadValue(&dv)) return nullopt; - r_val.type = static_cast(dv.ValueInt()); - DECODE_MEMBER(transaction_id, ValueInt) - - switch (r_val.type) { - case Type::TRANSACTION_BEGIN: - case Type::TRANSACTION_COMMIT: - case Type::TRANSACTION_ABORT: - case Type::NO_OP: - break; - case Type::CREATE_VERTEX: - DECODE_GID_MEMBER(vertex_id) - break; - case Type::CREATE_EDGE: - DECODE_GID_MEMBER(edge_id) - DECODE_GID_MEMBER(vertex_from_id) - DECODE_GID_MEMBER(vertex_to_id) - DECODE_MEMBER_CAST(edge_type, ValueInt, storage::EdgeType) - DECODE_MEMBER(edge_type_name, ValueString) - break; - case Type::SET_PROPERTY_VERTEX: - DECODE_GID_MEMBER(vertex_id) - DECODE_MEMBER_CAST(property, ValueInt, storage::Property) - DECODE_MEMBER(property_name, ValueString) - if (!decoder.ReadValue(&dv)) return nullopt; - r_val.value = glue::ToPropertyValue(dv); - break; - case Type::SET_PROPERTY_EDGE: - DECODE_GID_MEMBER(edge_id) - DECODE_MEMBER_CAST(property, ValueInt, storage::Property) - DECODE_MEMBER(property_name, ValueString) - if (!decoder.ReadValue(&dv)) return nullopt; - r_val.value = glue::ToPropertyValue(dv); - break; - case Type::ADD_LABEL: - case Type::REMOVE_LABEL: - DECODE_GID_MEMBER(vertex_id) - DECODE_MEMBER_CAST(label, ValueInt, storage::Label) - DECODE_MEMBER(label_name, ValueString) - break; - case Type::REMOVE_VERTEX: - DECODE_GID_MEMBER(vertex_id) - break; - case Type::REMOVE_EDGE: - DECODE_GID_MEMBER(edge_id) - break; - case Type::BUILD_INDEX: - DECODE_MEMBER_CAST(label, ValueInt, storage::Label) - DECODE_MEMBER(label_name, ValueString) - DECODE_MEMBER_CAST(property, ValueInt, storage::Property) - DECODE_MEMBER(property_name, ValueString) - break; - case Type::DROP_INDEX: - DECODE_MEMBER_CAST(label, ValueInt, storage::Label) - DECODE_MEMBER(label_name, ValueString) - DECODE_MEMBER_CAST(property, ValueInt, storage::Property) - DECODE_MEMBER(property_name, ValueString) - break; - case Type::BUILD_UNIQUE_CONSTRAINT: { - DECODE_MEMBER_CAST(label, ValueInt, storage::Label) - DECODE_MEMBER(label_name, ValueString) - if (!decoder.ReadValue(&dv)) return nullopt; - int size = dv.ValueInt(); - for (size_t i = 0; i < size; ++i) { - if (!decoder.ReadValue(&dv)) return nullopt; - r_val.properties.push_back( - static_cast(dv.ValueInt())); - } - for (size_t i = 0; i < size; ++i) { - if (!decoder.ReadValue(&dv)) return nullopt; - r_val.property_names.push_back(dv.ValueString()); - } - break; - } - case Type::DROP_UNIQUE_CONSTRAINT: { - DECODE_MEMBER_CAST(label, ValueInt, storage::Label) - DECODE_MEMBER(label_name, ValueString) - if (!decoder.ReadValue(&dv)) return nullopt; - int size = dv.ValueInt(); - for (size_t i = 0; i < size; ++i) { - if (!decoder.ReadValue(&dv)) return nullopt; - r_val.properties.push_back( - static_cast(dv.ValueInt())); - } - for (size_t i = 0; i < size; ++i) { - if (!decoder.ReadValue(&dv)) return nullopt; - r_val.property_names.push_back(dv.ValueString()); - } - break; - } - } - - auto decoder_hash = reader.hash(); - uint64_t encoded_hash; - if (!reader.ReadType(encoded_hash, true)) return nullopt; - if (decoder_hash != encoded_hash) return nullopt; - - return r_val; - } catch (communication::bolt::ValueException &) { - return nullopt; - } catch (std::ifstream::failure &) { - return nullopt; - } -} - -#undef DECODE_MEMBER - -void StateDelta::Apply(GraphDbAccessor &dba) const { - switch (type) { - // Transactional state is not recovered. - case Type::TRANSACTION_BEGIN: - case Type::TRANSACTION_COMMIT: - case Type::TRANSACTION_ABORT: - LOG(FATAL) << "Transaction handling not handled in Apply"; - break; - case Type::CREATE_VERTEX: - dba.InsertVertex(vertex_id); - break; - case Type::CREATE_EDGE: { - auto from = dba.FindVertex(vertex_from_id, true); - auto to = dba.FindVertex(vertex_to_id, true); - dba.InsertEdge(from, to, dba.EdgeType(edge_type_name), edge_id); - break; - } - case Type::SET_PROPERTY_VERTEX: { - auto vertex = dba.FindVertex(vertex_id, true); - vertex.PropsSet(dba.Property(property_name), value); - break; - } - case Type::SET_PROPERTY_EDGE: { - auto edge = dba.FindEdge(edge_id, true); - edge.PropsSet(dba.Property(property_name), value); - break; - } - case Type::ADD_LABEL: { - auto vertex = dba.FindVertex(vertex_id, true); - vertex.add_label(dba.Label(label_name)); - break; - } - case Type::REMOVE_LABEL: { - auto vertex = dba.FindVertex(vertex_id, true); - vertex.remove_label(dba.Label(label_name)); - break; - } - case Type::REMOVE_VERTEX: { - auto vertex = dba.FindVertex(vertex_id, true); - dba.DetachRemoveVertex(vertex); - break; - } - case Type::REMOVE_EDGE: { - auto edge = dba.FindEdge(edge_id, true); - dba.RemoveEdge(edge); - break; - } - case Type::BUILD_INDEX: { - dba.BuildIndex(dba.Label(label_name), dba.Property(property_name)); - break; - } - case Type::DROP_INDEX: { - dba.DeleteIndex(dba.Label(label_name), dba.Property(property_name)); - break; - } - case Type::NO_OP: - break; - case Type::BUILD_UNIQUE_CONSTRAINT: { - std::vector properties; - properties.reserve(property_names.size()); - for (auto &p : property_names) { - properties.push_back(dba.Property(p)); - } - - dba.BuildUniqueConstraint(dba.Label(label_name), properties); - } break; - case Type::DROP_UNIQUE_CONSTRAINT: { - std::vector properties; - properties.reserve(property_names.size()); - for (auto &p : property_names) { - properties.push_back(dba.Property(p)); - } - - dba.DeleteUniqueConstraint(dba.Label(label_name), properties); - } break; - - } -} - -}; // namespace database diff --git a/src/durability/single_node_ha/state_delta.lcp b/src/durability/single_node_ha/state_delta.lcp deleted file mode 100644 index b85654464..000000000 --- a/src/durability/single_node_ha/state_delta.lcp +++ /dev/null @@ -1,149 +0,0 @@ -#>cpp -#pragma once - -#include "communication/bolt/v1/decoder/decoder.hpp" -#include "communication/bolt/v1/encoder/base_encoder.hpp" -#include "durability/hashed_file_reader.hpp" -#include "durability/hashed_file_writer.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/common/types/property_value.hpp" -#include "storage/common/types/types.hpp" -#include "utils/typeinfo.hpp" - -class Vertex; -class Edge; -cpp<# - -(lcp:namespace database) - -#>cpp -class GraphDbAccessor; -cpp<# - -(lcp:define-struct state-delta () - ( - ;; Members valid for every delta. - (type "Type") - (transaction-id "::tx::TransactionId") - ;; Members valid only for some deltas, see StateDelta::Type comments above. - (vertex-id "::storage::Gid") - (edge-id "::storage::Gid") - (vertex-from-id "::storage::Gid") - (vertex-to-id "::storage::Gid") - (edge-type "::storage::EdgeType") - (edge-type-name "std::string") - (property "::storage::Property") - (property-name "std::string") - (properties "std::vector") - (property-names "std::vector") - (value "PropertyValue") - (label "::storage::Label") - (label-name "std::string") - (check-empty :bool)) - (:documentation - "Describes single change to the database state. Used for state communication -over network in HA. - -Labels, Properties and EdgeTypes are stored both as values (integers) and -strings (their names). The values are used when applying deltas in a running -database. Names are used when recovering the database as it's not guaranteed -that after recovery the old name<->value mapping will be preserved. - -TODO: ensure the mapping is preserved after recovery and don't save strings -in StateDeltas.") - (:public - (lcp:define-enum type - (transaction-begin - transaction-commit - transaction-abort - create-vertex ;; vertex_id - create-edge ;; edge_id, from_vertex_id, to_vertex_id, edge_type, edge_type_name - set-property-vertex ;; vertex_id, property, property_name, property_value - set-property-edge ;; edge_id, property, property_name, property_value - ;; remove property is done by setting a PropertyValue to Null - add-label ;; vertex_id, label, label_name - remove-label ;; vertex_id, label, label_name - remove-vertex ;; vertex_id, check_empty - remove-edge ;; edge_id - build-index ;; label, label_name, property, property_name - drop-index ;; label, label_name, property, property_name - no-op ;; no-op state delta required by Raft protocol - build-unique_constraint ;; label, label_name, properties, property_names - drop-unique_constraint ;; label, label_name, properties, property_names - ) - (:documentation - "Defines StateDelta type. For each type the comment indicates which values -need to be stored. All deltas have the transaction_id member, so that's -omitted in the comment.") - (:serialize)) - #>cpp - StateDelta() = default; - StateDelta(const enum Type &type, tx::TransactionId tx_id) - : type(type), transaction_id(tx_id) {} - - /** Attempts to decode a StateDelta from the given decoder. Returns the - * decoded value if successful, otherwise returns nullopt. */ - static std::optional Decode( - HashedFileReader &reader, - communication::bolt::Decoder &decoder); - - /** Encodes the delta using primitive encoder, and writes out the new hash - * with delta to the writer */ - void Encode( - HashedFileWriter &writer, - communication::bolt::BaseEncoder &encoder) const; - - static StateDelta TxBegin(tx::TransactionId tx_id); - static StateDelta TxCommit(tx::TransactionId tx_id); - static StateDelta TxAbort(tx::TransactionId tx_id); - static StateDelta CreateVertex(tx::TransactionId tx_id, - storage::Gid vertex_id); - static StateDelta CreateEdge(tx::TransactionId tx_id, storage::Gid edge_id, - storage::Gid vertex_from_id, - storage::Gid vertex_to_id, - storage::EdgeType edge_type, - const std::string &edge_type_name); - static StateDelta PropsSetVertex(tx::TransactionId tx_id, - storage::Gid vertex_id, - storage::Property property, - const std::string &property_name, - const PropertyValue &value); - static StateDelta PropsSetEdge(tx::TransactionId tx_id, storage::Gid edge_id, - storage::Property property, - const std::string &property_name, - const PropertyValue &value); - static StateDelta AddLabel(tx::TransactionId tx_id, storage::Gid vertex_id, - storage::Label label, - const std::string &label_name); - static StateDelta RemoveLabel(tx::TransactionId tx_id, storage::Gid vertex_id, - storage::Label label, - const std::string &label_name); - static StateDelta RemoveVertex(tx::TransactionId tx_id, storage::Gid vertex_id, - bool check_empty); - static StateDelta RemoveEdge(tx::TransactionId tx_id, storage::Gid edge_id); - static StateDelta BuildIndex(tx::TransactionId tx_id, storage::Label label, - const std::string &label_name, - storage::Property property, - const std::string &property_name); - static StateDelta DropIndex(tx::TransactionId tx_id, storage::Label label, - const std::string &label_name, - storage::Property property, - const std::string &property_name); - static StateDelta NoOp(tx::TransactionId tx_id); - static StateDelta BuildUniqueConstraint( - tx::TransactionId tx_id, storage::Label label, - const std::string &label_name, - const std::vector &properties, - const std::vector &property_names); - static StateDelta DropUniqueConstraint( - tx::TransactionId tx_id, storage::Label label, - const std::string &label_name, - const std::vector &property, - const std::vector &property_names); - - /// Applies CRUD delta to database accessor. Fails on other types of deltas - void Apply(GraphDbAccessor &dba) const; - cpp<#) - (:serialize (:slk))) - -(lcp:pop-namespace) ;; database diff --git a/src/durability/single_node_ha/version.hpp b/src/durability/single_node_ha/version.hpp deleted file mode 100644 index 99aa14807..000000000 --- a/src/durability/single_node_ha/version.hpp +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -/// -/// -/// IMPORTANT: Please update this file for every snapshot format change!!! -/// TODO (buda): This is not rock solid. -/// - -#include -#include - -namespace durability { - -constexpr std::array kSnapshotMagic{{'M', 'G', 'H', 'A', 's', 'n'}}; - -// The current default version of snapshot and WAL encoding / decoding. -constexpr int64_t kVersion{9}; - -// Snapshot format (version 9): -// 1) Magic number + snapshot version -// -// 2) A list of label+property indices. -// -// 3) Bolt encoded nodes. Each node is written in the following format: -// * gid, labels, properties -// 4) Bolt encoded edges. Each edge is written in the following format: -// * gid -// * from, to -// * edge_type -// * properties -// -// 5) Snapshot summary (number of nodes, number of edges, hash) - -} // namespace durability diff --git a/src/memgraph_ha.cpp b/src/memgraph_ha.cpp deleted file mode 100644 index 70faf8f62..000000000 --- a/src/memgraph_ha.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "communication/server.hpp" -#include "database/single_node_ha/graph_db.hpp" -#include "memgraph_init.hpp" -#include "query/exceptions.hpp" -#include "utils/flag_validation.hpp" - -// General purpose flags. -DEFINE_string(bolt_address, "0.0.0.0", - "IP address on which the Bolt server should listen."); -DEFINE_VALIDATED_int32(bolt_port, 7687, - "Port on which the Bolt server should listen.", - FLAG_IN_RANGE(0, std::numeric_limits::max())); -DEFINE_VALIDATED_int32( - bolt_num_workers, std::max(std::thread::hardware_concurrency(), 1U), - "Number of workers used by the Bolt server. By default, this will be the " - "number of processing units available on the machine.", - FLAG_IN_RANGE(1, INT32_MAX)); -DEFINE_VALIDATED_int32( - bolt_session_inactivity_timeout, 1800, - "Time in seconds after which inactive Bolt sessions will be " - "closed.", - FLAG_IN_RANGE(1, INT32_MAX)); -DEFINE_string(bolt_cert_file, "", - "Certificate file which should be used for the Bolt server."); -DEFINE_string(bolt_key_file, "", - "Key file which should be used for the Bolt server."); - -using ServerT = communication::Server; -using communication::ServerContext; - -void SingleNodeHAMain() { - auto durability_directory = std::filesystem::path(FLAGS_durability_directory); - - database::GraphDb db; - query::InterpreterContext interpreter_context{&db}; - SessionData session_data{&db, &interpreter_context, nullptr, nullptr}; - - ServerContext context; - std::string service_name = "Bolt"; - if (!FLAGS_bolt_key_file.empty() && !FLAGS_bolt_cert_file.empty()) { - context = ServerContext(FLAGS_bolt_key_file, FLAGS_bolt_cert_file); - service_name = "BoltS"; - } - - ServerT server({FLAGS_bolt_address, static_cast(FLAGS_bolt_port)}, - &session_data, &context, FLAGS_bolt_session_inactivity_timeout, - service_name, FLAGS_bolt_num_workers); - - // Handler for regular termination signals - auto shutdown = [&db] { db.Shutdown(); }; - - InitSignalHandlers(shutdown); - - // Start the database. - db.Start(); - // Start the Bolt server. - CHECK(server.Start()) << "Couldn't start the Bolt server!"; - - db.AwaitShutdown([&server] { - server.Shutdown(); - server.AwaitShutdown(); - }); -} - -int main(int argc, char **argv) { - google::SetUsageMessage("Memgraph high availability database server"); - return WithInit(argc, argv, SingleNodeHAMain); -} diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index f183c5dbb..242a05228 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -2,8 +2,6 @@ #include -#include "database/graph_db.hpp" -#include "database/graph_db_accessor.hpp" #include "query/context.hpp" #include "query/db_accessor.hpp" #include "query/frontend/ast/ast.hpp" diff --git a/src/raft/config.hpp b/src/raft/config.hpp deleted file mode 100644 index 60fc3fad9..000000000 --- a/src/raft/config.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include - -#include - -#include "raft/exceptions.hpp" -#include "utils/file.hpp" -#include "utils/string.hpp" - -namespace raft { - -/// Configurable Raft parameters. -struct Config { - std::chrono::milliseconds election_timeout_min; - std::chrono::milliseconds election_timeout_max; - std::chrono::milliseconds heartbeat_interval; - std::chrono::milliseconds replication_timeout; - int64_t log_size_snapshot_threshold; - - static Config LoadFromFile(const std::string &raft_config_file) { - if (!std::filesystem::exists(raft_config_file)) - throw RaftConfigException(raft_config_file); - - nlohmann::json data; - try { - data = nlohmann::json::parse( - utils::Join(utils::ReadLines(raft_config_file), "")); - } catch (const nlohmann::json::parse_error &e) { - throw RaftConfigException(raft_config_file); - } - - if (!data.is_object()) throw RaftConfigException(raft_config_file); - if (!data["election_timeout_min"].is_number()) - throw RaftConfigException(raft_config_file); - if (!data["election_timeout_max"].is_number()) - throw RaftConfigException(raft_config_file); - if (!data["heartbeat_interval"].is_number()) - throw RaftConfigException(raft_config_file); - if (!data["replication_timeout"].is_number()) - throw RaftConfigException(raft_config_file); - if (!data["log_size_snapshot_threshold"].is_number()) - throw RaftConfigException(raft_config_file); - - return Config{ - std::chrono::duration( - data["election_timeout_min"]), - std::chrono::duration( - data["election_timeout_max"]), - std::chrono::duration(data["heartbeat_interval"]), - std::chrono::duration(data["replication_timeout"]), - data["log_size_snapshot_threshold"]}; - } -}; - -} // namespace raft diff --git a/src/raft/coordination.cpp b/src/raft/coordination.cpp deleted file mode 100644 index 2ac1578fa..000000000 --- a/src/raft/coordination.cpp +++ /dev/null @@ -1,137 +0,0 @@ -#include "raft/coordination.hpp" - -#include -#include - -#include "utils/file.hpp" -#include "utils/string.hpp" - -DEFINE_string(rpc_cert_file, "", "Certificate file to use (RPC)."); -DEFINE_string(rpc_key_file, "", "Key file to use (RPC)."); - -namespace raft { - -namespace fs = std::filesystem; - -std::unordered_map LoadNodesFromFile( - const std::string &coordination_config_file) { - if (!fs::exists(coordination_config_file)) - throw RaftCoordinationConfigException("file (" + coordination_config_file + - ") doesn't exist"); - - std::unordered_map nodes; - nlohmann::json data; - try { - data = nlohmann::json::parse( - utils::Join(utils::ReadLines(coordination_config_file), "")); - } catch (const nlohmann::json::parse_error &e) { - throw RaftCoordinationConfigException("invalid json"); - } - - if (!data.is_array()) throw RaftCoordinationConfigException("not an array"); - - for (auto &it : data) { - if (!it.is_array()) - throw RaftCoordinationConfigException("element not an array"); - - if (it.size() != 3) - throw RaftCoordinationConfigException("invalid number of subelements"); - - if (!it[0].is_number_unsigned() || !it[1].is_string() || - !it[2].is_number_unsigned()) - throw RaftCoordinationConfigException("subelement data is invalid"); - - nodes[it[0]] = io::network::Endpoint{it[1], it[2]}; - } - - return nodes; -} - -Coordination::Coordination( - uint16_t node_id, - std::unordered_map all_nodes) - : node_id_(node_id), cluster_size_(all_nodes.size()) { - // Create and initialize all server elements. - if (!FLAGS_rpc_cert_file.empty() && !FLAGS_rpc_key_file.empty()) { - server_context_.emplace(FLAGS_rpc_key_file, FLAGS_rpc_cert_file); - } else { - server_context_.emplace(); - } - server_.emplace(all_nodes[node_id_], &server_context_.value(), - all_nodes.size() * 2); - - // Create all client elements. - endpoints_.resize(cluster_size_); - clients_.resize(cluster_size_); - client_locks_.resize(cluster_size_); - - // Initialize all client elements. - client_context_.emplace(server_context_->use_ssl()); - for (uint16_t i = 1; i <= cluster_size_; ++i) { - auto it = all_nodes.find(i); - if (it == all_nodes.end()) { - throw RaftCoordinationConfigException("missing endpoint for node " + - std::to_string(i)); - } - endpoints_[i - 1] = it->second; - client_locks_[i - 1] = std::make_unique(); - } -} - -Coordination::~Coordination() { - CHECK(!alive_) << "You must call Shutdown and AwaitShutdown on Coordination!"; -} - -std::vector Coordination::GetAllNodeIds() { - std::vector ret; - ret.reserve(cluster_size_); - for (uint16_t i = 1; i <= cluster_size_; ++i) { - ret.push_back(i); - } - return ret; -} - -std::vector Coordination::GetOtherNodeIds() { - std::vector ret; - ret.reserve(cluster_size_ - 1); - for (uint16_t i = 1; i <= cluster_size_; ++i) { - if (i == node_id_) continue; - ret.push_back(i); - } - return ret; -} - -uint16_t Coordination::GetAllNodeCount() { return cluster_size_; } - -uint16_t Coordination::GetOtherNodeCount() { return cluster_size_ - 1; } - -io::network::Endpoint Coordination::GetOtherNodeEndpoint(uint16_t other_id) { - CHECK(other_id != node_id_) << "Trying to execute RPC on self!"; - CHECK(other_id >= 1 && other_id <= cluster_size_) << "Invalid node id!"; - return endpoints_[other_id - 1]; -} - -communication::ClientContext *Coordination::GetRpcClientContext() { - return &client_context_.value(); -} - -bool Coordination::Start() { return server_->Start(); } - -void Coordination::AwaitShutdown( - std::function call_before_shutdown) { - // Wait for a shutdown notification. - while (alive_) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); - } - - // Call the before shutdown callback. - call_before_shutdown(); - - // Shutdown our RPC server. - server_->Shutdown(); - server_->AwaitShutdown(); -} - -void Coordination::Shutdown() { alive_.store(false); } - -} // namespace raft diff --git a/src/raft/coordination.hpp b/src/raft/coordination.hpp deleted file mode 100644 index ef36605f8..000000000 --- a/src/raft/coordination.hpp +++ /dev/null @@ -1,142 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "io/network/endpoint.hpp" -#include "raft/exceptions.hpp" -#include "rpc/client.hpp" -#include "rpc/server.hpp" - -namespace raft { - -/// Loads raft cluster configuration from file. -/// -/// File format: -/// [[node_id, "node_address", node_port], ...] -std::unordered_map LoadNodesFromFile( - const std::string &coordination_config_file); - -/// This class is responsible for coordination between nodes within the Raft -/// cluster. Its implementation is quite similar to coordination in distributed -/// Memgraph apart from slight modifications which align more closely to Raft. -/// -/// It should be noted that, in the context of communication, all nodes within -/// the Raft cluster are considered equivalent and are henceforth known simply -/// as nodes. -/// -/// This class is thread safe. -class Coordination final { - public: - /// Class constructor - /// - /// @param node_id ID of Raft node on this machine. - /// @param node mapping from node_id to endpoint information (for the whole - /// cluster). - Coordination(uint16_t node_id, - std::unordered_map all_nodes); - - ~Coordination(); - - Coordination(const Coordination &) = delete; - Coordination(Coordination &&) = delete; - Coordination &operator=(const Coordination &) = delete; - Coordination &operator=(Coordination &&) = delete; - - /// Returns all node IDs. - std::vector GetAllNodeIds(); - - /// Returns other node IDs (excluding this node). - std::vector GetOtherNodeIds(); - - /// Returns total number of nodes. - uint16_t GetAllNodeCount(); - - /// Returns number of other nodes. - uint16_t GetOtherNodeCount(); - - /// Returns endpoint of other node. - io::network::Endpoint GetOtherNodeEndpoint(uint16_t other_id); - - /// Returns the currently used RPC client context. - communication::ClientContext *GetRpcClientContext(); - - /// Executes a RPC on another node in the cluster. If the RPC execution - /// fails (because of underlying network issues) it returns a `std::nullopt`. - template - std::optional ExecuteOnOtherNode( - uint16_t other_id, Args &&... args) { - CHECK(other_id != node_id_) << "Trying to execute RPC on self!"; - CHECK(other_id >= 1 && other_id <= cluster_size_) << "Invalid node id!"; - - auto &lock = *client_locks_[other_id - 1].get(); - auto &client = clients_[other_id - 1]; - - std::lock_guard guard(lock); - - if (!client) { - const auto &endpoint = endpoints_[other_id - 1]; - client = - std::make_unique(endpoint, &client_context_.value()); - } - - try { - return client->Call(std::forward(args)...); - } catch (...) { - // Invalidate the client so that we reconnect next time. - client = nullptr; - return std::nullopt; - } - } - - /// Registers a RPC call on this node. - template - void Register(std::function callback) { - server_->Register(callback); - } - - /// Registers an extended RPC call on this node. - template - void Register(std::function - callback) { - server_->Register(callback); - } - - /// Starts the coordination and its servers. - bool Start(); - - /// Blocks until the coordination is shut down. Accepts a callback function - /// that is called to clean up all services that should be stopped before the - /// coordination. - void AwaitShutdown(std::function call_before_shutdown); - - /// Hints that the coordination should start shutting down the whole cluster. - void Shutdown(); - - private: - uint16_t node_id_; - uint16_t cluster_size_; - - std::optional server_context_; - std::optional server_; - - std::optional client_context_; - std::vector endpoints_; - std::vector> clients_; - std::vector> client_locks_; - - std::atomic alive_{true}; -}; - -} // namespace raft diff --git a/src/raft/exceptions.hpp b/src/raft/exceptions.hpp deleted file mode 100644 index fafebae0e..000000000 --- a/src/raft/exceptions.hpp +++ /dev/null @@ -1,111 +0,0 @@ -/// @file - -#pragma once - -#include "communication/bolt/v1/exceptions.hpp" - -namespace raft { - -/// Base exception class used for all exceptions that can occur within the -/// Raft protocol. -class RaftException : public communication::bolt::VerboseError { - public: - template - RaftException(const std::string &format, Args &&... args) - : communication::bolt::VerboseError( - communication::bolt::VerboseError::Classification::DATABASE_ERROR, - "Raft", "Error", format, std::forward(args)...) {} -}; - -/// This exception should be thrown when attempting to transition between -/// incompatible states, e.g. from `FOLLOWER` to `LEADER`. -class InvalidTransitionException : public RaftException { - public: - using RaftException::RaftException; - InvalidTransitionException(const std::string &old_mode, - const std::string &new_mode) - : RaftException("Invalid transition from " + old_mode + " to " + - new_mode) {} -}; - -/// Exception used to indicate something is wrong with the raft config provided -/// by the user. -class RaftConfigException : public RaftException { - public: - using RaftException::RaftException; - explicit RaftConfigException(const std::string &path) - : RaftException("Unable to parse raft config file " + path) {} -}; - -/// Exception used to indicate something is wrong with the coordination config -/// provided by the user. -class RaftCoordinationConfigException : public RaftException { - public: - using RaftException::RaftException; - explicit RaftCoordinationConfigException(const std::string &msg) - : RaftException("Unable to parse raft coordination config file: " + msg + - "!") {} -}; - -/// This exception should be thrown when a `RaftServer` instance attempts -/// to read data from persistent storage which is missing. -class MissingPersistentDataException : public RaftException { - public: - using RaftException::RaftException; - explicit MissingPersistentDataException(const std::string &key) - : RaftException( - "Attempting to read non-existing persistent data under key: " + - key) {} -}; - -/// This exception should be thrown when a `RaftServer` instance attempts to -/// read from replication log for a garbage collected transaction or a -/// transaction that didn't begin. -class InvalidReplicationLogLookup : public RaftException { - public: - using RaftException::RaftException; - InvalidReplicationLogLookup() - : RaftException("Replication log lookup for invalid transaction.") {} -}; - -/// This exception is thrown when a transaction is taking too long to replicate. -/// We're throwing this to reduce the number of threads that are in an infinite -/// loop during a network partition. -class ReplicationTimeoutException : public RaftException { - public: - using RaftException::RaftException; - ReplicationTimeoutException() - : RaftException("Raft Log replication is taking too long. ") {} -}; - -/// This exception is thrown when a client tries to execute a query on a server -/// that isn't a leader. -class CantExecuteQueries : public RaftException { - public: - using RaftException::RaftException; - CantExecuteQueries() - : RaftException( - "Memgraph High Availability: Can't execute queries if not " - "leader.") {} -}; - -/// This exception is thrown when leader re-election takes place during -/// transaction commit. We're throwing this exception to inform the client that -/// transaction failed. -class UnexpectedLeaderChangeException : public RaftException { - public: - using RaftException::RaftException; - UnexpectedLeaderChangeException() - : RaftException( - "Leader change happened during transaction commit. Aborting.") {} -}; - -/// This exception is thrown when the machine is in the process of shutting down -/// and Raft API is being used. -class RaftShutdownException : public RaftException { - public: - using RaftException::RaftException; - RaftShutdownException() : RaftException("Raft Server is shutting down.") {} -}; - -} // namespace raft diff --git a/src/raft/log_entry.lcp b/src/raft/log_entry.lcp deleted file mode 100644 index ba1504e47..000000000 --- a/src/raft/log_entry.lcp +++ /dev/null @@ -1,19 +0,0 @@ -#>cpp -#pragma once - -#include "database/single_node_ha/serialization.hpp" -#include "durability/single_node_ha/state_delta.hpp" -cpp<# - -(lcp:namespace raft) - -(lcp:define-struct log-entry () - ((term :uint64_t) - (deltas "std::vector")) - (:public #>cpp - LogEntry() = default; - LogEntry(uint64_t _term, std::vector _deltas): term(_term), deltas(_deltas) {} - cpp<#) - (:serialize (:slk))) - -(lcp:pop-namespace) ;; raft diff --git a/src/raft/raft_interface.hpp b/src/raft/raft_interface.hpp deleted file mode 100644 index 29e66c7aa..000000000 --- a/src/raft/raft_interface.hpp +++ /dev/null @@ -1,96 +0,0 @@ -/// @file - -#pragma once - -#include - -#include "durability/single_node_ha/state_delta.hpp" -#include "transactions/type.hpp" - -namespace raft { - -enum class ReplicationStatus { REPLICATED, WAITING, ABORTED, INVALID }; - -inline std::string ReplicationStatusToString( - const ReplicationStatus &replication_status) { - switch (replication_status) { - case ReplicationStatus::REPLICATED: - return "REPLICATED"; - case ReplicationStatus::WAITING: - return "WAITING"; - case ReplicationStatus::ABORTED: - return "ABORTED"; - case ReplicationStatus::INVALID: - return "INVALID"; - } -} - -/// Structure which describes the status of a newly created LogEntry after the -/// execution of RaftServer's Emplace method. -/// -/// It consists of two unsigned 64-bit integers which uniquely describe -/// the emplaced LogEntry: -/// 1) Term when the LogEntry was emplaced to the Raft log. -/// 2) Index of the entry within the Raft log. -/// -/// In the case an entry was not successfully emplaced (e.g. unexpected -/// leadership change), the values will have a std::nullopt value instead. -struct LogEntryStatus { - uint64_t term_id; - uint64_t log_index; -}; - -/// Exposes only functionality that other parts of Memgraph can interact with. -class RaftInterface { - public: - /// Emplace a new LogEntry in the raft log and start its replication. This - /// entry is created from a given batched set of StateDelta objects. - /// - /// It is possible that the entry was not successfully emplaced. In that case, - /// the method returns std::nullopt and the caller is responsible for handling - /// situation correctly (e.g. aborting the corresponding transaction). - /// - /// @returns an optional LogEntryStatus object as result. - virtual std::optional Emplace( - const std::vector &) = 0; - - /// Returns true if the current servers mode is LEADER. False otherwise. - virtual bool IsLeader() = 0; - - /// Returns the term ID of the current leader. - virtual uint64_t TermId() = 0; - - /// Returns the replication status of LogEntry which began its replication in - /// a given term ID and was emplaced in the raft log at the given index. - /// - /// Replication status can be one of the following - /// 1) REPLICATED -- LogEntry was successfully replicated across - /// the Raft cluster - /// 2) WAITING -- LogEntry was successfully emplaced in the Raft - /// log and is currently being replicated. - /// 3) ABORTED -- LogEntry will not be replicated. - /// 4) INVALID -- the request for the LogEntry was invalid, most - /// likely either term_id or log_index were out of range. - virtual ReplicationStatus GetReplicationStatus(uint64_t term_id, - uint64_t log_index) = 0; - - /// Checks if the LogEntry with the give term id and log index can safely be - /// committed in local storage. - /// - /// @param term_id term when the LogEntry was created - /// @param log_index index of the LogEntry in the Raft log - /// - /// @return bool True if the transaction is safe to commit, false otherwise. - /// - /// @throws ReplicationTimeoutException - /// @throws RaftShutdownException - /// @throws InvalidReplicationLogLookup - virtual bool SafeToCommit(uint64_t term_id, uint64_t log_index) = 0; - - virtual std::mutex &WithLock() = 0; - - protected: - ~RaftInterface() {} -}; - -} // namespace raft diff --git a/src/raft/raft_rpc_messages.lcp b/src/raft/raft_rpc_messages.lcp deleted file mode 100644 index dd64f645b..000000000 --- a/src/raft/raft_rpc_messages.lcp +++ /dev/null @@ -1,43 +0,0 @@ -#>cpp -#pragma once - -#include -#include - -#include "raft/log_entry.hpp" -#include "rpc/messages.hpp" -cpp<# - -(lcp:namespace raft) - -(lcp:define-rpc request-vote - (:request - ((candidate-id :uint16_t) - (term :uint64_t) - (last-log-index :uint64_t) - (last-log-term :uint64_t))) - (:response - ((vote-granted :bool) - (term :uint64_t)))) - -(lcp:define-rpc append-entries - (:request - ((leader-id :uint16_t) - (leader-commit :uint64_t) - (term :uint64_t) - (prev-log-index :uint64_t) - (prev-log-term :uint64_t) - (entries "std::vector"))) - (:response - ((success :bool) - (term :uint64_t)))) - -(lcp:define-rpc heartbeat - (:request - ((leader-id :uint16_t) - (term :uint64_t))) - (:response - ((success :bool) - (term :uint64_t)))) - -(lcp:pop-namespace) ;; raft diff --git a/src/raft/raft_server.cpp b/src/raft/raft_server.cpp deleted file mode 100644 index 285c09937..000000000 --- a/src/raft/raft_server.cpp +++ /dev/null @@ -1,961 +0,0 @@ -#include "raft/raft_server.hpp" - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "database/graph_db_accessor.hpp" -#include "durability/single_node_ha/paths.hpp" -#include "raft/exceptions.hpp" -#include "rpc/client.hpp" -#include "slk/streams.hpp" -#include "utils/cast.hpp" -#include "utils/exceptions.hpp" -#include "utils/on_scope_exit.hpp" -#include "utils/thread.hpp" - -namespace raft { - -using namespace std::literals::chrono_literals; -namespace fs = std::filesystem; - -const std::string kCurrentTermKey = "current_term"; -const std::string kVotedForKey = "voted_for"; -const std::string kLogSizeKey = "log_size"; -const std::string kLogEntryPrefix = "log_entry_"; -const std::string kRaftDir = "raft"; - -RaftServer::RaftServer(uint16_t server_id, const std::string &durability_dir, - bool db_recover_on_startup, const Config &config, - Coordination *coordination, database::GraphDb *db) - : config_(config), - coordination_(coordination), - db_(db), - mode_(Mode::FOLLOWER), - server_id_(server_id), - durability_dir_(fs::path(durability_dir)), - db_recover_on_startup_(db_recover_on_startup), - commit_index_(0), - last_applied_(0), - last_entry_term_(0), - issue_hb_(false), - replication_timeout_(config.replication_timeout), - disk_storage_(fs::path(durability_dir) / kRaftDir) {} - -void RaftServer::Start() { - if (!db_recover_on_startup_) { - // We need to clear persisted data if we don't want any recovery. - disk_storage_.DeletePrefix(""); - } - - // Persistent storage initialization - if (!disk_storage_.Get(kLogSizeKey)) { - SetCurrentTerm(0); - SetLogSize(0); - LogEntry empty_log_entry(0, {}); - AppendLogEntries(0, 0, {empty_log_entry}); - } else { - RecoverPersistentData(); - } - - // Peer state initialization - auto cluster_size = coordination_->GetAllNodeCount() + 1; - next_index_.resize(cluster_size); - index_offset_.resize(cluster_size); - match_index_.resize(cluster_size); - next_replication_.resize(cluster_size); - next_heartbeat_.resize(cluster_size); - - // RPC registration - coordination_->Register( - [this](auto *req_reader, auto *res_builder) { - std::lock_guard guard(lock_); - RequestVoteReq req; - slk::Load(&req, req_reader); - - // [Raft paper 5.1] - // "If a server recieves a request with a stale term, - // it rejects the request" - if (exiting_ || req.term < current_term_) { - RequestVoteRes res(false, current_term_); - slk::Save(res, res_builder); - return; - } - - // [Raft paper figure 2] - // If RPC request or response contains term T > currentTerm, - // set currentTerm = T and convert to follower. - if (req.term > current_term_) { - SetCurrentTerm(req.term); - if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER); - } - - if (voted_for_) { - bool grant_vote = voted_for_.value() == req.candidate_id; - if (grant_vote) SetNextElectionTimePoint(); - RequestVoteRes res(grant_vote, current_term_); - slk::Save(res, res_builder); - return; - } - - // [Raft paper 5.2, 5.4] - // "Each server will vote for at most one candidate in a given - // term, on a first-come-first-serve basis with an additional - // restriction on votes" - // Restriction: "The voter denies its vote if its own log is more - // up-to-date than that of the candidate" - auto last_entry_data = LastEntryData(); - bool grant_vote = - AtLeastUpToDate(req.last_log_index, req.last_log_term, - last_entry_data.first, last_entry_data.second); - if (grant_vote) { - SetVotedFor(req.candidate_id); - SetNextElectionTimePoint(); - } - RequestVoteRes res(grant_vote, current_term_); - slk::Save(res, res_builder); - }); - - coordination_->Register([this](auto *req_reader, - auto *res_builder) { - std::lock_guard guard(lock_); - AppendEntriesReq req; - slk::Load(&req, req_reader); - - // [Raft paper 5.1] - // "If a server receives a request with a stale term, it rejects the - // request" - if (exiting_ || req.term < current_term_) { - AppendEntriesRes res(false, current_term_); - slk::Save(res, res_builder); - return; - } - - // Everything below is considered to be a valid RPC. This will ensure that - // after we finish processing the current request, the election timeout will - // be extended. During this process we will prevent the timeout from - // occuring. - next_election_ = TimePoint::max(); - election_change_.notify_all(); - utils::OnScopeExit extend_election_timeout([this] { - // [Raft thesis 3.4] - // A server remains in follower state as long as it receives valid RPCs - // from a leader or candidate. - SetNextElectionTimePoint(); - election_change_.notify_all(); - }); - - // [Raft paper figure 2] - // If RPC request or response contains term T > currentTerm, - // set currentTerm = T and convert to follower. - if (req.term > current_term_) { - SetCurrentTerm(req.term); - if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER); - } - - // [Raft paper 5.3] - // "If a follower's log is inconsistent with the leader's, the - // consistency check will fail in the AppendEntries RPC." - // - // Consistency checking assures the Log Matching Property: - // - If two entries in different logs have the same index and - // term, then they store the same command. - // - If two entries in different logs have the same index and term, - // then the logs are identical in all preceding entries. - if (log_size_ <= req.prev_log_index || - GetLogEntry(req.prev_log_index).term != req.prev_log_term) { - AppendEntriesRes res(false, current_term_); - slk::Save(res, res_builder); - return; - } - - // No need to call this function for a heartbeat - if (!req.entries.empty()) { - AppendLogEntries(req.leader_commit, req.prev_log_index + 1, req.entries); - } - - // [Raft paper 5.3] - // "Once a follower learns that a log entry is committed, it applies - // the entry to its state machine (in log order) - while (req.leader_commit > last_applied_ && last_applied_ + 1 < log_size_) { - ++last_applied_; - ApplyStateDeltas(GetLogEntry(last_applied_).deltas); - } - - // Respond positively to a heartbeat. - if (req.entries.empty()) { - AppendEntriesRes res(true, current_term_); - slk::Save(res, res_builder); - if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER); - return; - } - - AppendEntriesRes res(true, current_term_); - slk::Save(res, res_builder); - }); - - coordination_->Register( - [this](auto *req_reader, auto *res_builder) { - std::lock_guard guard(lock_); - HeartbeatReq req; - slk::Load(&req, req_reader); - - if (exiting_ || req.term < current_term_) { - HeartbeatRes res(false, current_term_); - slk::Save(res, res_builder); - return; - } - - if (req.term > current_term_) { - SetCurrentTerm(req.term); - if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER); - } - - SetNextElectionTimePoint(); - election_change_.notify_all(); - - HeartbeatRes res(true, current_term_); - slk::Save(res, res_builder); - }); - - // start threads - - SetNextElectionTimePoint(); - election_thread_ = std::thread(&RaftServer::ElectionThreadMain, this); - - for (auto peer_id : coordination_->GetOtherNodeIds()) { - peer_threads_.emplace_back(&RaftServer::PeerThreadMain, this, peer_id); - hb_threads_.emplace_back(&RaftServer::HBThreadMain, this, peer_id); - } - - no_op_issuer_thread_ = std::thread(&RaftServer::NoOpIssuerThreadMain, this); -} - -void RaftServer::Shutdown() { - exiting_ = true; - { - std::lock_guard guard(lock_); - - state_changed_.notify_all(); - election_change_.notify_all(); - leader_changed_.notify_all(); - hb_condition_.notify_all(); - } - - for (auto &peer_thread : peer_threads_) { - if (peer_thread.joinable()) peer_thread.join(); - } - - for (auto &hb_thread : hb_threads_) { - if (hb_thread.joinable()) hb_thread.join(); - } - - if (election_thread_.joinable()) election_thread_.join(); - if (no_op_issuer_thread_.joinable()) no_op_issuer_thread_.join(); -} - -void RaftServer::SetCurrentTerm(uint64_t new_current_term) { - current_term_ = new_current_term; - disk_storage_.Put(kCurrentTermKey, std::to_string(new_current_term)); - SetVotedFor(std::nullopt); -} - -void RaftServer::SetVotedFor(std::optional new_voted_for) { - voted_for_ = new_voted_for; - if (new_voted_for) - disk_storage_.Put(kVotedForKey, std::to_string(new_voted_for.value())); - else - disk_storage_.Delete(kVotedForKey); -} - -void RaftServer::SetLogSize(uint64_t new_log_size) { - log_size_ = new_log_size; - disk_storage_.Put(kLogSizeKey, std::to_string(new_log_size)); -} - -std::optional RaftServer::Emplace( - const std::vector &deltas) { - std::unique_lock lock(lock_); - if (mode_ != Mode::LEADER) { - return std::nullopt; - } - - LogEntry new_entry(current_term_, deltas); - - log_[log_size_] = new_entry; - disk_storage_.Put(LogEntryKey(log_size_), SerializeLogEntry(new_entry)); - last_entry_term_ = new_entry.term; - SetLogSize(log_size_ + 1); - - // Force replication - TimePoint now = Clock::now(); - for (auto &peer_replication : next_replication_) peer_replication = now; - - // From this point on, we can say that the replication of a LogEntry started. - replication_timeout_.Insert(new_entry.term, log_size_ - 1); - - state_changed_.notify_all(); - return {{new_entry.term, log_size_ - 1}}; -} - -bool RaftServer::IsLeader() { return !exiting_ && mode_ == Mode::LEADER; } - -uint64_t RaftServer::TermId() { return current_term_; } - -ReplicationStatus RaftServer::GetReplicationStatus(uint64_t term_id, - uint64_t log_index) { - std::unique_lock lock(lock_); - if (term_id > current_term_ || log_index >= log_size_) - return ReplicationStatus::INVALID; - - auto log_entry = GetLogEntry(log_index); - - // This is correct because the leader can only append to the log and no two - // workers can be leaders in the same term. - if (log_entry.term != term_id) return ReplicationStatus::ABORTED; - - if (last_applied_ < log_index) return ReplicationStatus::WAITING; - return ReplicationStatus::REPLICATED; -} - -bool RaftServer::SafeToCommit(uint64_t term_id, uint64_t log_index) { - auto replication_status = GetReplicationStatus(term_id, log_index); - - // If we are shutting down, but we know that the Raft Log replicated - // successfully, we return true. This will eventually commit since we - // replicate NoOp on leader election. - if (replication_status == ReplicationStatus::REPLICATED) return true; - - // Only if the log entry isn't replicated, throw an exception to inform - // the client. - if (exiting_) throw RaftShutdownException(); - - if (replication_status == ReplicationStatus::WAITING) { - if (replication_timeout_.CheckTimeout(term_id, log_index)) { - throw ReplicationTimeoutException(); - } - return false; - } - - // TODO(ipaljak): Fix the old naming. - // The only possibility left is that our ReplicationLog doesn't contain - // information about that tx. - throw InvalidReplicationLogLookup(); -} - -void RaftServer::RecoverPersistentData() { - auto opt_term = disk_storage_.Get(kCurrentTermKey); - if (opt_term) current_term_ = std::stoull(opt_term.value()); - - auto opt_voted_for = disk_storage_.Get(kVotedForKey); - if (!opt_voted_for) { - voted_for_ = std::nullopt; - } else { - voted_for_ = {std::stoul(opt_voted_for.value())}; - } - - auto opt_log_size = disk_storage_.Get(kLogSizeKey); - if (opt_log_size) log_size_ = std::stoull(opt_log_size.value()); - - if (log_size_ != 0) { - auto opt_last_log_entry = disk_storage_.Get(LogEntryKey(log_size_ - 1)); - DCHECK(opt_last_log_entry != std::nullopt) - << "Log size is equal to " << log_size_ - << ", but there is no log entry on index: " << log_size_ - 1; - last_entry_term_ = DeserializeLogEntry(opt_last_log_entry.value()).term; - } -} - -void RaftServer::Transition(const Mode &new_mode) { - switch (new_mode) { - case Mode::FOLLOWER: { - LOG(INFO) << "Server " << server_id_ - << ": Transition to FOLLOWER (Term: " << current_term_ << ")"; - - bool reset = mode_ == Mode::LEADER; - issue_hb_ = false; - mode_ = Mode::FOLLOWER; - - if (reset) { - VLOG(40) << "Resetting internal state"; - // Temporary freeze election timer while we do the reset. - next_election_ = TimePoint::max(); - - db_->Reset(); - replication_timeout_.Clear(); - - // Re-apply raft log. - uint64_t starting_index = 1; - for (uint64_t i = starting_index; i <= commit_index_; ++i) { - ApplyStateDeltas(GetLogEntry(i).deltas); - } - - last_applied_ = commit_index_; - } - - SetNextElectionTimePoint(); - election_change_.notify_all(); - state_changed_.notify_all(); - break; - } - - case Mode::CANDIDATE: { - LOG(INFO) << "Server " << server_id_ - << ": Transition to CANDIDATE (Term: " << current_term_ << ")"; - - // [Raft thesis, section 3.4] - // "Each candidate restarts its randomized election timeout at the start - // of an election, and it waits for that timeout to elapse before - // starting the next election; this reduces the likelihood of another - // split vote in the new election." - SetNextElectionTimePoint(); - election_change_.notify_all(); - - // [Raft thesis, section 3.4] - // "To begin an election, a follower increments its current term and - // transitions to candidate state. It then votes for itself and issues - // RequestVote RPCs in parallel to each of the other servers in the - // cluster." - SetCurrentTerm(current_term_ + 1); - SetVotedFor(server_id_); - - granted_votes_ = 1; - vote_requested_.assign(coordination_->GetAllNodeCount() + 1, false); - - issue_hb_ = false; - mode_ = Mode::CANDIDATE; - state_changed_.notify_all(); - - break; - } - - case Mode::LEADER: { - LOG(INFO) << "Server " << server_id_ - << ": Transition to LEADER (Term: " << current_term_ << ")"; - // Freeze election timer - next_election_ = TimePoint::max(); - election_change_.notify_all(); - - // Set next heartbeat and replication to correct values - TimePoint now = Clock::now(); - for (auto &peer_replication : next_replication_) - peer_replication = now + config_.heartbeat_interval; - for (auto &peer_heartbeat : next_heartbeat_) - peer_heartbeat = now + config_.heartbeat_interval; - - issue_hb_ = true; - hb_condition_.notify_all(); - - // [Raft paper figure 2] - // "For each server, index of the next log entry to send to that server - // is initialized to leader's last log index + 1" - for (int i = 1; i <= coordination_->GetAllNodeCount(); ++i) { - next_index_[i] = log_size_; - index_offset_[i] = 1; - match_index_[i] = 0; - } - - // Raft guarantees the Leader Append-Only property [Raft paper 5.2] - // so its safe to apply everything from our log into our state machine - for (int i = last_applied_ + 1; i < log_size_; ++i) - ApplyStateDeltas(GetLogEntry(i).deltas); - last_applied_ = log_size_ - 1; - - mode_ = Mode::LEADER; - - leader_changed_.notify_all(); - break; - } - } -} - -void RaftServer::AdvanceCommitIndex() { - DCHECK(mode_ == Mode::LEADER) - << "Commit index can only be advanced by the leader"; - - std::vector known_replication_indices; - for (int i = 1; i <= coordination_->GetAllNodeCount(); ++i) { - if (i != server_id_) - known_replication_indices.push_back(match_index_[i]); - else - known_replication_indices.push_back(log_size_ - 1); - } - - std::sort(known_replication_indices.begin(), known_replication_indices.end()); - uint64_t new_commit_index = - known_replication_indices[(coordination_->GetAllNodeCount() - 1) / 2]; - - // This can happen because we reset `match_index` vector to 0 after a - // new leader has been elected. - if (commit_index_ >= new_commit_index) return; - - // [Raft thesis, section 3.6.2] - // "(...) Raft never commits log entries from previous terms by counting - // replicas. Only log entries from the leader's current term are committed by - // counting replicas; once an entry from the current term has been committed - // in this way, then all prior entries are committed indirectly because of the - // Log Matching Property." - if (GetLogEntry(new_commit_index).term != current_term_) { - VLOG(40) << "Server " << server_id_ - << ": cannot commit log entry from " - "previous term based on " - "replication count."; - return; - } - - VLOG(40) << "Begin applying commited transactions"; - - for (int i = commit_index_ + 1; i <= new_commit_index; ++i) { - auto log_entry = GetLogEntry(i); - DCHECK(log_entry.deltas.size() > 2) - << "Log entry should consist of at least three state deltas."; - replication_timeout_.Remove(log_entry.term, i); - } - - commit_index_ = new_commit_index; - last_applied_ = new_commit_index; -} - -void RaftServer::SendEntries(uint16_t peer_id, - std::unique_lock *lock) { - SendLogEntries(peer_id, lock); -} - -void RaftServer::SendLogEntries(uint16_t peer_id, - std::unique_lock *lock) { - uint64_t request_term = current_term_; - uint64_t request_prev_log_index = next_index_[peer_id] - 1; - uint64_t request_prev_log_term; - - request_prev_log_term = GetLogEntry(next_index_[peer_id] - 1).term; - - std::vector request_entries; - if (next_index_[peer_id] <= log_size_ - 1) - GetLogSuffix(next_index_[peer_id], request_entries); - - // Copy all internal variables before releasing the lock. - auto server_id = server_id_; - auto commit_index = commit_index_; - - VLOG(40) << "Server " << server_id_ - << ": Sending Entries RPC to server " << peer_id - << " (Term: " << current_term_ << ")"; - VLOG(40) << "Entries size: " << request_entries.size(); - - // Execute the RPC. - lock->unlock(); - auto reply = coordination_->ExecuteOnOtherNode( - peer_id, server_id, commit_index, request_term, request_prev_log_index, - request_prev_log_term, request_entries); - lock->lock(); - - if (!reply) { - next_replication_[peer_id] = Clock::now() + config_.heartbeat_interval; - return; - } - - // We can't early exit if the `exiting_` flag is true just yet. It is possible - // that the response we handle here carries the last confirmation that the logs - // have been replicated. We need to handle the response so the client doesn't - // retry the query because he thinks the query failed. - if (current_term_ != request_term || mode_ != Mode::LEADER) { - return; - } - - if (OutOfSync(reply->term)) { - state_changed_.notify_all(); - return; - } - - DCHECK(mode_ == Mode::LEADER) - << "Elected leader for term should never change."; - - if (reply->term != current_term_) { - VLOG(40) << "Server " << server_id_ - << ": Ignoring stale AppendEntriesRPC reply from " << peer_id; - return; - } - - if (!reply->success) { - // Replication can fail for the first log entry if the peer that we're - // sending the entry is in the process of shutting down. - if (next_index_[peer_id] > index_offset_[peer_id]) { - next_index_[peer_id] -= index_offset_[peer_id]; - // Overflow should be prevented by snapshot threshold constant. - index_offset_[peer_id] <<= 1UL; - } else { - next_index_[peer_id] = 1UL; - } - } else { - uint64_t new_match_index = request_prev_log_index + request_entries.size(); - DCHECK(match_index_[peer_id] <= new_match_index) - << "`match_index` should increase monotonically within a term"; - match_index_[peer_id] = new_match_index; - if (request_entries.size() > 0) AdvanceCommitIndex(); - next_index_[peer_id] = match_index_[peer_id] + 1; - index_offset_[peer_id] = 1; - next_replication_[peer_id] = Clock::now() + config_.heartbeat_interval; - } - - if (exiting_) return; - state_changed_.notify_all(); -} - -void RaftServer::ElectionThreadMain() { - utils::ThreadSetName("ElectionThread"); - std::unique_lock lock(lock_); - while (!exiting_) { - if (Clock::now() >= next_election_) { - VLOG(40) << "Server " << server_id_ - << ": Election timeout exceeded (Term: " << current_term_ << ")"; - Transition(Mode::CANDIDATE); - state_changed_.notify_all(); - } - election_change_.wait_until(lock, next_election_); - } -} - -void RaftServer::PeerThreadMain(uint16_t peer_id) { - utils::ThreadSetName(fmt::format("RaftPeer{}", peer_id)); - std::unique_lock lock(lock_); - - /* This loop will either call a function that issues an RPC or wait on the - * condition variable. It must not do both! Lock on `mutex_` is released - * while waiting for RPC response, which might cause us to miss a - * notification on `state_changed_` conditional variable and wait - * indefinitely. The safest thing to do is to assume some important part of - * state was modified while we were waiting for the response and loop around - * to check. */ - while (!exiting_) { - TimePoint now = Clock::now(); - TimePoint wait_until; - - switch (mode_) { - case Mode::FOLLOWER: { - wait_until = TimePoint::max(); - break; - } - - case Mode::CANDIDATE: { - if (vote_requested_[peer_id]) { - wait_until = TimePoint::max(); - break; - } - - // TODO(ipaljak): Consider backoff. - wait_until = TimePoint::max(); - - // Copy all internal variables before releasing the lock. - auto server_id = server_id_; - auto request_term = current_term_.load(); - auto last_entry_data = LastEntryData(); - - vote_requested_[peer_id] = true; - - // Execute the RPC. - lock.unlock(); // Release lock while waiting for response - auto reply = coordination_->ExecuteOnOtherNode( - peer_id, server_id, request_term, last_entry_data.first, - last_entry_data.second); - lock.lock(); - - // If the peer isn't reachable, it is the same as if he didn't grant - // us his vote. - if (!reply) { - reply = RequestVoteRes(false, request_term); - } - - if (current_term_ != request_term || mode_ != Mode::CANDIDATE || - exiting_) { - VLOG(40) << "Server " << server_id_ - << ": Ignoring RequestVoteRPC reply from " << peer_id; - break; - } - - if (OutOfSync(reply->term)) { - state_changed_.notify_all(); - continue; - } - - if (reply->vote_granted) { - VLOG(40) << "Server " << server_id_ << ": Got vote from " - << peer_id; - ++granted_votes_; - if (HasMajorityVote()) Transition(Mode::LEADER); - } else { - VLOG(40) << "Server " << server_id_ << ": Denied vote from " - << peer_id; - } - - state_changed_.notify_all(); - continue; - } - - case Mode::LEADER: { - if (now >= next_replication_[peer_id]) { - SendEntries(peer_id, &lock); - continue; - } - wait_until = next_replication_[peer_id]; - break; - } - } - - if (exiting_) break; - state_changed_.wait_until(lock, wait_until); - } -} - -void RaftServer::HBThreadMain(uint16_t peer_id) { - utils::ThreadSetName(fmt::format("HBThread{}", peer_id)); - std::unique_lock lock(heartbeat_lock_); - - // The heartbeat thread uses a dedicated RPC client for its peer so that it - // can issue heartbeats in parallel with other RPC requests that are being - // issued to the peer (replication, voting, etc.) - std::unique_ptr rpc_client; - - while (!exiting_) { - TimePoint wait_until; - - if (!issue_hb_) { - wait_until = TimePoint::max(); - } else { - TimePoint now = Clock::now(); - if (now < next_heartbeat_[peer_id]) { - wait_until = next_heartbeat_[peer_id]; - } else { - VLOG(40) << "Server " << server_id_ << ": Sending HB to server " - << peer_id << " (Term: " << current_term_ << ")"; - - lock.unlock(); - if (!rpc_client) { - rpc_client = std::make_unique( - coordination_->GetOtherNodeEndpoint(peer_id), - coordination_->GetRpcClientContext()); - } - try { - rpc_client->Call(server_id_, current_term_); - } catch (...) { - // Invalidate the client so that we reconnect next time. - rpc_client = nullptr; - } - lock.lock(); - - // This is ok even if we don't receive a reply. - next_heartbeat_[peer_id] = now + config_.heartbeat_interval; - wait_until = next_heartbeat_[peer_id]; - } - } - - if (exiting_) break; - hb_condition_.wait_until(lock, wait_until); - } -} - -void RaftServer::NoOpIssuerThreadMain() { - utils::ThreadSetName(fmt::format("NoOpIssuer")); - std::mutex m; - auto lock = std::unique_lock(m); - while (!exiting_) { - leader_changed_.wait(lock); - // no_op_create_callback_ will create a new transaction that has a NO_OP - // StateDelta. This will trigger the whole procedure of replicating logs - // in our implementation of Raft. - if (!exiting_) NoOpCreate(); - } -} - -void RaftServer::SetNextElectionTimePoint() { - // [Raft thesis, section 3.4] - // "Raft uses randomized election timeouts to ensure that split votes are - // rare and that they are resolved quickly. To prevent split votes in the - // first place, election timeouts are chosen randomly from a fixed interval - // (e.g., 150-300 ms)." - std::uniform_int_distribution distribution( - config_.election_timeout_min.count(), - config_.election_timeout_max.count()); - Clock::duration wait_interval = std::chrono::milliseconds(distribution(rng_)); - next_election_ = Clock::now() + wait_interval; -} - -bool RaftServer::HasMajorityVote() { - if (2 * granted_votes_ > coordination_->GetAllNodeCount()) { - VLOG(40) << "Server " << server_id_ - << ": Obtained majority vote (Term: " << current_term_ << ")"; - return true; - } - return false; -} - -std::pair RaftServer::LastEntryData() { - return {log_size_, last_entry_term_}; -} - -bool RaftServer::AtLeastUpToDate(uint64_t last_log_index_a, - uint64_t last_log_term_a, - uint64_t last_log_index_b, - uint64_t last_log_term_b) { - if (last_log_term_a == last_log_term_b) - return last_log_index_a >= last_log_index_b; - return last_log_term_a > last_log_term_b; -} - -bool RaftServer::OutOfSync(uint64_t reply_term) { - DCHECK(mode_ != Mode::FOLLOWER) << "`OutOfSync` called from FOLLOWER mode"; - - // [Raft thesis, Section 3.3] - // "Current terms are exchanged whenever servers communicate; if one - // server's current term is smaller than the other's, then it updates - // its current term to the larger value. If a candidate or leader - // discovers that its term is out of date, it immediately reverts to - // follower state." - if (current_term_ < reply_term) { - disk_storage_.Put(kCurrentTermKey, std::to_string(reply_term)); - disk_storage_.Delete(kVotedForKey); - granted_votes_ = 0; - Transition(Mode::FOLLOWER); - return true; - } - return false; -} - -LogEntry RaftServer::GetLogEntry(uint64_t index) { - auto it = log_.find(index); - if (it != log_.end()) - return it->second; // retrieve in-mem if possible - auto opt_value = disk_storage_.Get(LogEntryKey(index)); - DCHECK(opt_value != std::nullopt) - << "Log index (" << index << ") out of bounds."; - return DeserializeLogEntry(opt_value.value()); -} - -void RaftServer::DeleteLogSuffix(int starting_index) { - DCHECK(0 <= starting_index && starting_index < log_size_) - << "Log index out of bounds."; - for (int i = starting_index; i < log_size_; ++i) { - log_.erase(i); - disk_storage_.Delete(LogEntryKey(i)); - } - SetLogSize(starting_index); -} - -void RaftServer::GetLogSuffix(int starting_index, - std::vector &entries) { - DCHECK(0 <= starting_index && starting_index < log_size_) - << "Log index out of bounds."; - for (int i = starting_index; i < log_size_; ++i) - entries.push_back(GetLogEntry(i)); -} - -void RaftServer::AppendLogEntries(uint64_t leader_commit_index, - uint64_t starting_index, - const std::vector &new_entries) { - for (int i = 0; i < new_entries.size(); ++i) { - // If existing entry conflicts with new one, we need to delete the - // existing entry and all that follow it. - int current_index = i + starting_index; - if (log_size_ > current_index && - GetLogEntry(current_index).term != new_entries[i].term) { - DeleteLogSuffix(current_index); - } - DCHECK(log_size_ >= current_index) << "Current Log index out of bounds."; - if (log_size_ == current_index) { - log_[log_size_] = new_entries[i]; - disk_storage_.Put(LogEntryKey(log_size_), - SerializeLogEntry(new_entries[i])); - last_entry_term_ = new_entries[i].term; - SetLogSize(log_size_ + 1); - } - } - - // See Raft paper 5.3 - if (leader_commit_index > commit_index_) { - commit_index_ = std::min(leader_commit_index, log_size_ - 1); - } -} - -std::string RaftServer::LogEntryKey(uint64_t index) { - return kLogEntryPrefix + std::to_string(index); -} - -std::string RaftServer::SerializeLogEntry(const LogEntry &log_entry) { - std::stringstream stream(std::ios_base::in | std::ios_base::out | - std::ios_base::binary); - slk::Builder builder( - [&stream](const uint8_t *data, size_t size, bool have_more) { - for (size_t i = 0; i < size; ++i) { - stream << utils::MemcpyCast(data[i]); - } - }); - slk::Save(log_entry, &builder); - builder.Finalize(); - return stream.str(); -} - -LogEntry RaftServer::DeserializeLogEntry( - const std::string &serialized_log_entry) { - slk::Reader reader( - reinterpret_cast(serialized_log_entry.data()), - serialized_log_entry.size()); - LogEntry deserialized; - try { - slk::Load(&deserialized, &reader); - reader.Finalize(); - } catch (const slk::SlkReaderException &) { - LOG(FATAL) << "Couldn't load log from disk storage!"; - } - return deserialized; -} - -void RaftServer::NoOpCreate() { - // TODO(ipaljak): Review this after implementing RaftDelta object. - auto dba = db_->Access(); - db_->sd_buffer()->Emplace(database::StateDelta::NoOp(dba.transaction_id())); - try { - dba.Commit(); - } catch (const RaftException &) { - // NoOp failure can be ignored. - return; - } -} - -void RaftServer::ApplyStateDeltas( - const std::vector &deltas) { - std::optional dba; - for (auto &delta : deltas) { - switch (delta.type) { - case database::StateDelta::Type::NO_OP: - break; - case database::StateDelta::Type::TRANSACTION_BEGIN: - CHECK(!dba) << "Double transaction start"; - dba = db_->Access(); - break; - case database::StateDelta::Type::TRANSACTION_COMMIT: - CHECK(dba) << "Missing accessor for transaction" - << delta.transaction_id; - dba->Commit(); - dba = std::nullopt; - break; - case database::StateDelta::Type::TRANSACTION_ABORT: - LOG(FATAL) << "ApplyStateDeltas shouldn't know about aborted " - "transactions"; - break; - default: - CHECK(dba) << "Missing accessor for transaction" - << delta.transaction_id; - delta.Apply(*dba); - } - } - CHECK(!dba) << "StateDeltas missing commit command"; -} - -std::mutex &RaftServer::WithLock() { return lock_; } - -} // namespace raft diff --git a/src/raft/raft_server.hpp b/src/raft/raft_server.hpp deleted file mode 100644 index e430cd74f..000000000 --- a/src/raft/raft_server.hpp +++ /dev/null @@ -1,393 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include -#include -#include - -#include "durability/single_node_ha/state_delta.hpp" -#include "kvstore/kvstore.hpp" -#include "raft/config.hpp" -#include "raft/coordination.hpp" -#include "raft/log_entry.hpp" -#include "raft/raft_interface.hpp" -#include "raft/raft_rpc_messages.hpp" -#include "raft/replication_log.hpp" -#include "raft/replication_timeout_map.hpp" -#include "transactions/type.hpp" -#include "utils/scheduler.hpp" - -// Forward declaration -namespace database { -class GraphDb; -} // namespace database - -namespace raft { - -using Clock = std::chrono::system_clock; -using TimePoint = std::chrono::system_clock::time_point; - -enum class Mode { FOLLOWER, CANDIDATE, LEADER }; - -inline std::string ModeToString(const Mode &mode) { - switch (mode) { - case Mode::FOLLOWER: - return "FOLLOWER"; - case Mode::CANDIDATE: - return "CANDIDATE"; - case Mode::LEADER: - return "LEADER"; - } -} - -/// Class which models the behaviour of a single server within the Raft -/// cluster. The class is responsible for storing both volatile and -/// persistent internal state of the corresponding state machine as well -/// as performing operations that comply with the Raft protocol. -class RaftServer final : public RaftInterface { - public: - RaftServer() = delete; - - /// The implementation assumes that server IDs are unique integers between - /// ranging from 1 to cluster_size. - /// - /// @param server_id ID of the current server. - /// @param durbility_dir directory for persisted data. - /// @param db_recover_on_startup flag indicating if recovery should happen at - /// startup. - /// @param config raft configuration. - /// @param coordination Abstraction for coordination between Raft servers. - /// @param db The current DB object. - RaftServer(uint16_t server_id, const std::string &durability_dir, - bool db_recover_on_startup, const Config &config, - raft::Coordination *coordination, database::GraphDb *db); - - /// Starts the RPC servers and starts mechanisms inside Raft protocol. - void Start(); - - /// Stops all threads responsible for the Raft protocol. - void Shutdown(); - - /// Setter for the current term. It updates the persistent storage as well - /// as its in-memory copy. - void SetCurrentTerm(uint64_t new_current_term); - - /// Setter for `voted for` member. It updates the persistent storage as well - /// as its in-memory copy. - void SetVotedFor(std::optional new_voted_for); - - /// Setter for `log size` member. It updates the persistent storage as well - /// as its in-memory copy. - void SetLogSize(uint64_t new_log_size); - - /// Emplace a new LogEntry in the raft log and start its replication. This - /// entry is created from a given batched set of StateDelta objects. - /// - /// It is possible that the entry was not successfully emplaced. In that case, - /// the method returns std::nullopt and the caller is responsible for handling - /// situation correctly (e.g. aborting the corresponding transaction). - /// - /// @returns an optional LogEntryStatus object as result. - std::optional Emplace( - const std::vector &deltas) override; - - /// Returns true if the current servers mode is LEADER. False otherwise. - bool IsLeader() override; - - /// Returns the term ID of the current leader. - uint64_t TermId() override; - - /// Returns the replication status of LogEntry which began its replication in - /// a given term ID and was emplaced in the raft log at the given index. - /// - /// Replication status can be one of the following - /// 1) REPLICATED -- LogEntry was successfully replicated across - /// the Raft cluster - /// 2) WAITING -- LogEntry was successfully emplaced in the Raft - /// log and is currently being replicated. - /// 3) ABORTED -- LogEntry will not be replicated. - /// 4) INVALID -- the request for the LogEntry was invalid, most - /// likely either term_id or log_index were out of range. - ReplicationStatus GetReplicationStatus(uint64_t term_id, - uint64_t log_index) override; - - /// Checks if the LogEntry with the give term id and log index can safely be - /// committed in local storage. - /// - /// @param term_id term when the LogEntry was created - /// @param log_index index of the LogEntry in the Raft log - /// - /// @return bool True if the transaction is safe to commit, false otherwise. - /// - /// @throws ReplicationTimeoutException - /// @throws RaftShutdownException - /// @throws InvalidReplicationLogLookup - bool SafeToCommit(uint64_t term_id, uint64_t log_index) override; - - private: - mutable std::mutex lock_; ///< Guards all internal state. - mutable std::mutex heartbeat_lock_; ///< Guards HB issuing - - ////////////////////////////////////////////////////////////////////////////// - // volatile state on all servers - ////////////////////////////////////////////////////////////////////////////// - - Config config_; ///< Raft config. - Coordination *coordination_{nullptr}; ///< Cluster coordination. - database::GraphDb *db_{nullptr}; - - std::atomic mode_; ///< Server's current mode. - uint16_t server_id_; ///< ID of the current server. - std::filesystem::path durability_dir_; ///< Durability directory. - bool db_recover_on_startup_; ///< Flag indicating if recovery should happen - ///< on startup. - uint64_t commit_index_; ///< Index of the highest known committed entry. - uint64_t last_applied_; ///< Index of the highest applied entry to SM. - uint64_t last_entry_term_; ///< Term of the last entry in Raft log - - std::atomic issue_hb_; ///< Flag which signalizes if the current server - ///< should send HBs to the rest of the cluster. - - std::vector peer_threads_; ///< One thread per peer which - ///< handles outgoing RPCs. - - std::vector hb_threads_; ///< One thread per peer which is - ///< responsible for sending periodic - ///< heartbeats. - - std::condition_variable state_changed_; ///< Notifies all peer threads on - ///< relevant state change. - - std::thread no_op_issuer_thread_; ///< Thread responsible for issuing no-op - ///< command on leader change. - - std::condition_variable leader_changed_; ///< Notifies the - ///< no_op_issuer_thread that a new - ///< leader has been elected. - - std::condition_variable hb_condition_; ///< Notifies the HBIssuer thread - ///< that it should start sending - ///< heartbeats. - - std::atomic exiting_{false}; ///< True on server shutdown. - - ////////////////////////////////////////////////////////////////////////////// - // volatile state on followers and candidates - ////////////////////////////////////////////////////////////////////////////// - - std::thread election_thread_; ///< Timer thread for triggering elections. - TimePoint next_election_; ///< Next election `TimePoint`. - - std::condition_variable election_change_; ///> Used to notify election_thread - ///> on next_election_ change. - - std::mt19937_64 rng_ = std::mt19937_64(std::random_device{}()); - - ////////////////////////////////////////////////////////////////////////////// - // volatile state on candidates - ////////////////////////////////////////////////////////////////////////////// - - uint16_t granted_votes_; - std::vector vote_requested_; - - ////////////////////////////////////////////////////////////////////////////// - // volatile state on leaders - ////////////////////////////////////////////////////////////////////////////// - - std::vector next_index_; ///< for each server, index of the next - ///< log entry to send to that server. - - std::vector index_offset_; ///< for each server, the offset for - ///< which we reduce the next_index_ - ///< field if the AppendEntries request - ///< is denied. We use "binary lifting" - ///< style technique to achieve at most - ///< O(logn) requests. - - std::vector match_index_; ///< for each server, index of the - ///< highest log entry known to be - ///< replicated on server. - - std::vector next_replication_; ///< for each server, time point - ///< for the next replication. - - std::vector next_heartbeat_; ///< for each server, time point for - ///< the next heartbeat. - - // Tracks timepoints until a transactions is allowed to be in the replication - // process. - ReplicationTimeoutMap replication_timeout_; - - ////////////////////////////////////////////////////////////////////////////// - // persistent state on all servers - // - // Persistent data consists of: - // - uint64_t current_term -- latest term server has seen. - // - uint16_t voted_for -- candidate_id that received vote in current - // term (null if none). - // - uint64_t log_size -- Number of stored entries within the log. - // - vector log -- log entries. Each log entry is stored under - // a separate key within KVStore. - ////////////////////////////////////////////////////////////////////////////// - - kvstore::KVStore disk_storage_; - - std::optional voted_for_; - - std::atomic current_term_; - uint64_t log_size_; - - std::map log_; - - /// Recovers persistent data from disk and stores its in-memory copies - /// that insure faster read-only operations. This method should be called - /// on start-up. If parts of persistent data are missing, the method won't - /// make a copy of that data, i.e. no exception is thrown and the caller - /// should check whether persistent data actually exists. - void RecoverPersistentData(); - - /// Makes a transition to a new `raft::Mode`. - /// - /// throws InvalidTransitionException when transitioning between incompatible - /// `raft::Mode`s. - void Transition(const raft::Mode &new_mode); - - /// Tries to advance the commit index on a leader. - void AdvanceCommitIndex(); - - /// Decides whether to send Log Entires or Snapshot to the given peer. - /// - /// @param peer_id ID of the peer which receives entries. - /// @param lock Lock from the peer thread (released while waiting for - /// response) - void SendEntries(uint16_t peer_id, std::unique_lock *lock); - - /// Sends Log Entries to peer. This function should only be called in leader - /// mode. - /// - /// @param peer_id ID of the peer which receives entries. - /// @param lock Lock from the peer thread (released while waiting for - /// response) - void SendLogEntries(uint16_t peer_id, - std::unique_lock *lock); - - /// Send Snapshot to peer. This function should only be called in leader - /// mode. - /// - /// @param peer_id ID of the peer which receives entries. - /// @param lock Lock from the peer thread (released while waiting for - /// response) - void SendSnapshot(uint16_t peer_id, std::unique_lock *lock); - - /// Main function of the `election_thread_`. It is responsible for - /// transition to CANDIDATE mode when election timeout elapses. - void ElectionThreadMain(); - - /// Main function of the thread that handles outgoing RPCs towards a - /// specified node within the Raft cluster. - /// - /// @param peer_id - ID of a receiving node in the cluster. - void PeerThreadMain(uint16_t peer_id); - - /// Main function of the thread that handles issuing heartbeats towards - /// other peers. At the moment, this function is ignorant about the status - /// of LogEntry replication. Therefore, it might issue unnecessary - /// heartbeats, but we can live with that at this point. - /// - /// @param peer_id - ID of a receiving node in the cluster. - void HBThreadMain(uint16_t peer_id); - - /// Issues no-op command when a new leader is elected. This is done to - /// force the Raft protocol to commit logs from previous terms that - /// have been replicated on a majority of peers. - void NoOpIssuerThreadMain(); - - /// Sets the `TimePoint` for next election. - void SetNextElectionTimePoint(); - - /// Checks if the current server obtained enough votes to become a leader. - bool HasMajorityVote(); - - /// Returns relevant metadata about the last entry in this server's Raft Log. - /// More precisely, returns a pair consisting of an index of the last entry - /// in the log and the term of the last entry in the log. - /// - /// @return std::pair - std::pair LastEntryData(); - - /// Checks whether Raft log of server A is at least as up-to-date as the Raft - /// log of server B. This is strictly defined in Raft paper 5.4. - /// - /// @param last_log_index_a - Index of server A's last log entry. - /// @param last_log_term_a - Term of server A's last log entry. - /// @param last_log_index_b - Index of server B's last log entry. - /// @param last_log_term_b - Term of server B's last log entry. - bool AtLeastUpToDate(uint64_t last_log_index_a, uint64_t last_log_term_a, - uint64_t last_log_index_b, uint64_t last_log_term_b); - - /// Checks whether the current server got a reply from "future", i.e. reply - /// with a higher term. If so, the current server falls back to follower mode - /// and updates its current term. - /// - /// @param reply_term Term from RPC response. - /// @return true if the current server's term lags behind. - bool OutOfSync(uint64_t reply_term); - - /// Retrieves a log entry from the log at a given index. - /// - /// @param index Index of the log entry to be retrieved. - LogEntry GetLogEntry(uint64_t index); - - /// Deletes log entries with indexes that are greater or equal to the given - /// starting index. - /// - /// @param starting_index Smallest index which will be deleted from the Log. - /// Also, a friendly remainder that log entries are - /// 1-indexed. - void DeleteLogSuffix(int starting_index); - - /// Stores log entries with indexes that are greater or equal to the given - /// starting index into a provided container. If the starting index is - /// greater than the log size, nothing will be stored in the provided - /// container. - /// - /// @param starting_index Smallest index which will be stored. - /// @param entries The container which will store the wanted suffix. - void GetLogSuffix(int starting_index, std::vector &entries); - - /// Appends new log entries to Raft log. Note that this function is not - /// smart in any way, i.e. the caller should make sure that it's safe - /// to call this function. This function also updates this server's commit - /// index if necessary. - /// - /// @param leader_commit_index - Used to update local commit index. - /// @param starting_index - Index in the log from which we start to append. - /// @param new_entries - New `LogEntry` instances to be appended in the log. - void AppendLogEntries(uint64_t leader_commit_index, uint64_t starting_index, - const std::vector &new_entries); - - /// Generates the key under which the `LogEntry` with a given index should - /// be stored on our disk storage. - /// - /// @param index - Index of the `LogEntry` for which we generate the key. - std::string LogEntryKey(uint64_t index); - - /// Serializes Raft log entry into `std::string` - std::string SerializeLogEntry(const LogEntry &log_entry); - - /// Deserialized Raft log entry from `std::string` - LogEntry DeserializeLogEntry(const std::string &serialized_log_entry); - - /// Start a new transaction with a NO-OP StateDelta. - void NoOpCreate(); - - /// Applies the given batch of state deltas that are representing a transacton - /// to the db. - void ApplyStateDeltas(const std::vector &deltas); - - std::mutex &WithLock() override; -}; -} // namespace raft diff --git a/src/raft/replication_log.hpp b/src/raft/replication_log.hpp deleted file mode 100644 index bfd70a83c..000000000 --- a/src/raft/replication_log.hpp +++ /dev/null @@ -1,86 +0,0 @@ -/// @file -#pragma once - -#include - -#include "data_structures/bitset/dynamic_bitset.hpp" -#include "transactions/type.hpp" - -namespace raft { - -/// Tracks information about replicated and active logs for high availability. -/// -/// The main difference between ReplicationLog and CommitLog is that -/// ReplicationLog doesn't throw when looking up garbage collected transaction -/// ids. -class ReplicationLog final { - public: - static constexpr int kBitsetBlockSize = 32768; - - ReplicationLog() = default; - ReplicationLog(const ReplicationLog &) = delete; - ReplicationLog(ReplicationLog &&) = delete; - ReplicationLog &operator=(const ReplicationLog &) = delete; - ReplicationLog &operator=(ReplicationLog &&) = delete; - - bool is_active(tx::TransactionId id) const { - return fetch_info(id).is_active(); - } - - void set_active(tx::TransactionId id) { log.set(2 * id); } - - bool is_replicated(tx::TransactionId id) const { - return fetch_info(id).is_replicated(); - } - - void set_replicated(tx::TransactionId id) { log.set(2 * id + 1); } - - // Clears the replication log from bits associated with transactions with an - // id lower than `id`. - void garbage_collect_older(tx::TransactionId id) { - // We keep track of the valid prefix in order to avoid the `CHECK` inside - // the `DynamicBitset`. - valid_prefix = 2 * id; - log.delete_prefix(2 * id); - } - - class Info final { - public: - enum Status { - UNKNOWN = 0, // 00 - ACTIVE = 1, // 01 - REPLICATED = 2, // 10 - }; - - explicit Info(uint8_t flags) { - if (flags & REPLICATED) { - flags_ = REPLICATED; - } else if (flags & ACTIVE) { - flags_ = ACTIVE; - } else { - flags_ = UNKNOWN; - } - } - - bool is_active() const { return flags_ & ACTIVE; } - - bool is_replicated() const { return flags_ & REPLICATED; } - - operator uint8_t() const { return flags_; } - - private: - uint8_t flags_{0}; - }; - - Info fetch_info(tx::TransactionId id) const { - if (valid_prefix > 2 * id) return Info{0}; - - return Info{log.at(2 * id, 2)}; - } - - private: - DynamicBitset log; - std::atomic valid_prefix{0}; -}; - -} // namespace raft diff --git a/src/raft/replication_timeout_map.hpp b/src/raft/replication_timeout_map.hpp deleted file mode 100644 index 447c66d77..000000000 --- a/src/raft/replication_timeout_map.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/// @file -#pragma once - -#include -#include -#include - -namespace raft { - -using Clock = std::chrono::system_clock; -using TimePoint = std::chrono::system_clock::time_point; - -/// A wrapper around an unordered_map whose reads/writes are protected with a -/// lock. It's also specialized to serve the sole purpose of tracking -/// replication timeout. -class ReplicationTimeoutMap final { - public: - ReplicationTimeoutMap() = delete; - - ReplicationTimeoutMap(const ReplicationTimeoutMap &) = delete; - ReplicationTimeoutMap(ReplicationTimeoutMap &&) = delete; - ReplicationTimeoutMap operator=(const ReplicationTimeoutMap &) = delete; - ReplicationTimeoutMap operator=(ReplicationTimeoutMap &&) = delete; - - explicit ReplicationTimeoutMap(std::chrono::milliseconds replication_timeout) - : replication_timeout_(replication_timeout) {} - - /// Remove all entries from the map. - void Clear() { - std::lock_guard guard(lock_); - timeout_.clear(); - } - - /// Remove a single entry from the map. - void Remove(const uint64_t term_id, const uint64_t log_index) { - std::lock_guard guard(lock_); - timeout_.erase({term_id, log_index}); - } - - /// Inserts and entry in the map by setting a point in time until it needs to - /// replicated. - void Insert(const uint64_t term_id, const uint64_t log_index) { - std::lock_guard guard(lock_); - timeout_[{term_id, log_index}] = replication_timeout_ + Clock::now(); - } - - /// Checks if the given entry has timed out. - /// @returns bool True if it exceeded timeout, false otherwise. - bool CheckTimeout(const uint64_t term_id, const uint64_t log_index) { - std::lock_guard guard(lock_); - auto found = timeout_.find({term_id, log_index}); - // If we didn't set the timeout yet, or we already deleted it, we didn't - // time out. - if (found == timeout_.end()) return false; - if (found->second < Clock::now()) { - return true; - } else { - return false; - } - } - - private: - std::chrono::milliseconds replication_timeout_; - - mutable std::mutex lock_; - // TODO(ipaljak): Consider using unordered_map if we encounter any performance - // issues. - std::map, TimePoint> timeout_; -}; - -} // namespace raft diff --git a/src/raft/storage_info.cpp b/src/raft/storage_info.cpp deleted file mode 100644 index 31122b955..000000000 --- a/src/raft/storage_info.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "raft/storage_info.hpp" - -#include - -#include "database/single_node_ha/graph_db.hpp" -#include "raft/coordination.hpp" -#include "raft/storage_info_rpc_messages.hpp" -#include "utils/future.hpp" -#include "utils/stat.hpp" - -namespace raft { - -using namespace std::literals::chrono_literals; -using Clock = std::chrono::system_clock; -using TimePoint = std::chrono::system_clock::time_point; - -StorageInfo::StorageInfo(database::GraphDb *db, Coordination *coordination, - uint16_t server_id) - : db_(db), coordination_(coordination), server_id_(server_id) { - CHECK(db) << "Graph DB can't be nullptr"; - CHECK(coordination) << "Coordination can't be nullptr"; -} - -StorageInfo::~StorageInfo() {} - -void StorageInfo::Start() { - coordination_->Register( - [this](auto *req_reader, auto *res_builder) { - StorageInfoReq req; - slk::Load(&req, req_reader); - - StorageInfoRes res(this->server_id_, this->GetLocalStorageInfo()); - slk::Save(res, res_builder); - }); -} - -std::vector> -StorageInfo::GetLocalStorageInfo() const { - std::vector> info; - - db_->RefreshStat(); - auto &stat = db_->GetStat(); - - info.emplace_back("vertex_count", std::to_string(stat.vertex_count)); - info.emplace_back("edge_count", std::to_string(stat.edge_count)); - info.emplace_back("average_degree", std::to_string(stat.avg_degree)); - info.emplace_back("memory_usage", std::to_string(utils::GetMemoryUsage())); - info.emplace_back("disk_usage", - std::to_string(db_->GetDurabilityDirDiskUsage())); - - return info; -} - -std::map>> -StorageInfo::GetStorageInfo() const { - std::map>> info; - - for (auto id : coordination_->GetAllNodeIds()) { - if (id == server_id_) { - info.emplace(std::to_string(id), GetLocalStorageInfo()); - } else { - auto reply = coordination_->ExecuteOnOtherNode(id); - if (reply) { - info[std::to_string(id)] = std::move(reply->storage_info); - } else { - info[std::to_string(id)] = {}; - } - } - } - - return info; -} - -} // namespace raft diff --git a/src/raft/storage_info.hpp b/src/raft/storage_info.hpp deleted file mode 100644 index 57dc72730..000000000 --- a/src/raft/storage_info.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/// @file - -#pragma once - -#include -#include - -// Forward declaration -namespace database { -class GraphDb; -} // namespace database - -namespace raft { - -// Forward declaration -class Coordination; - -/// StorageInfo takes care of the Raft cluster storage info retrieval. -class StorageInfo final { - public: - StorageInfo() = delete; - StorageInfo(database::GraphDb *db, Coordination *coordination, - uint16_t server_id); - - StorageInfo(const StorageInfo &) = delete; - StorageInfo(StorageInfo &&) = delete; - StorageInfo operator=(const StorageInfo &) = delete; - StorageInfo operator=(StorageInfo &&) = delete; - - ~StorageInfo(); - - void Start(); - - /// Returns storage info for the local storage only. - std::vector> GetLocalStorageInfo() const; - - /// Returns storage info for each peer in the Raft cluster. - std::map>> - GetStorageInfo() const; - - private: - database::GraphDb *db_{nullptr}; - Coordination *coordination_{nullptr}; - uint16_t server_id_; -}; - -} // namespace raft diff --git a/src/raft/storage_info_rpc_messages.lcp b/src/raft/storage_info_rpc_messages.lcp deleted file mode 100644 index c9c7edf94..000000000 --- a/src/raft/storage_info_rpc_messages.lcp +++ /dev/null @@ -1,19 +0,0 @@ -#>cpp -#pragma once - -#include -#include - -#include "rpc/messages.hpp" -#include "slk/serialization.hpp" -cpp<# - -(lcp:namespace raft) - -(lcp:define-rpc storage-info - (:request ()) - (:response - ((server-id :uint16_t) - (storage-info "std::vector>")))) - -(lcp:pop-namespace) ;; raft diff --git a/src/storage/common/constraints/common.hpp b/src/storage/common/constraints/common.hpp deleted file mode 100644 index 011acd289..000000000 --- a/src/storage/common/constraints/common.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/// @file - -#pragma once - -#include - -#include "storage/common/types/property_value_store.hpp" -#include "transactions/engine.hpp" -#include "transactions/snapshot.hpp" - -namespace storage::constraints::common { -template -void UniqueConstraintRefresh(const tx::Snapshot &snapshot, - const tx::Engine &engine, - TConstraints &constraints, std::mutex &lock) { - std::lock_guard guard(lock); - for (auto &constraint : constraints) { - for (auto p = constraint.version_pairs.begin(); - p != constraint.version_pairs.end(); ++p) { - auto exp_id = p->record.tx_id_exp; - auto cre_id = p->record.tx_id_cre; - if ((exp_id != 0 && exp_id < snapshot.back() && - engine.Info(exp_id).is_committed() && !snapshot.contains(exp_id)) || - (cre_id < snapshot.back() && engine.Info(cre_id).is_aborted())) { - constraint.version_pairs.erase(p); - } - } - } -} -} // namespace storage::constraints::common diff --git a/src/storage/common/constraints/exceptions.hpp b/src/storage/common/constraints/exceptions.hpp deleted file mode 100644 index 4e216b23f..000000000 --- a/src/storage/common/constraints/exceptions.hpp +++ /dev/null @@ -1,19 +0,0 @@ -/// @file - -#pragma once - -#include "utils/exceptions.hpp" - -namespace storage::constraints { - -/// Thrown when a violation of a constraint occurs. -class ViolationException : public utils::BasicException { - using utils::BasicException::BasicException; -}; - -/// Thrown when multiple transactions alter the same constraint. -class SerializationException : public utils::BasicException { - using utils::BasicException::BasicException; -}; - -} // namespace database diff --git a/src/storage/common/constraints/record.cpp b/src/storage/common/constraints/record.cpp deleted file mode 100644 index 60c4cf6d0..000000000 --- a/src/storage/common/constraints/record.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include "storage/common/constraints/record.hpp" - -#include "storage/common/constraints/exceptions.hpp" -#include "storage/common/mvcc/exceptions.hpp" -#include "transactions/engine.hpp" -#include "transactions/transaction.hpp" - -namespace storage::constraints::impl { -Record::Record(storage::Gid gid, const tx::Transaction &t) - : curr_gid(gid), tx_id_cre(t.id_) {} - -void Record::Insert(storage::Gid gid, const tx::Transaction &t) { - // Insert - // - delete before or in this transaction and not aborted - // - insert before and aborted - // Throw SerializationException - // - delted of inserted after this transaction - // Throw ViolationException - // - insert before or in this transaction and not aborted - // - delete before and aborted - - t.TakeLock(lock_); - if (t.id_ < tx_id_cre || (tx_id_exp != 0 && t.id_ < tx_id_exp)) { - throw SerializationException( - "Node couldn't be updated due to unique constraint serialization " - "error!"); - } - - bool has_entry = tx_id_exp == 0; - bool is_aborted = has_entry ? t.engine_.Info(tx_id_cre).is_aborted() - : t.engine_.Info(tx_id_exp).is_aborted(); - - if ((has_entry && !is_aborted) || (!has_entry && is_aborted)) { - throw ViolationException( - "Node couldn't be updated due to unique constraint violation!"); - } - - curr_gid = gid; - tx_id_cre = t.id_; - tx_id_exp = 0; -} - -void Record::Remove(storage::Gid gid, const tx::Transaction &t) { - // Remove - // - insert before or in this transaction and not aborted - // - remove before and aborted - // Nothing - // - remove before or in this transaction and not aborted - // - insert before and aborted - // Throw SerializationException - // - delete or insert after this transaction - - t.TakeLock(lock_); - DCHECK(gid == curr_gid); - if (t.id_ < tx_id_cre || (tx_id_exp != 0 && t.id_ < tx_id_exp)) - throw mvcc::SerializationError(); - - bool has_entry = tx_id_exp == 0; - bool is_aborted = has_entry ? t.engine_.Info(tx_id_cre).is_aborted() - : t.engine_.Info(tx_id_exp).is_aborted(); - - if ((!has_entry && !is_aborted) || (has_entry && is_aborted)) return; - - tx_id_exp = t.id_; -} -} // namespace storage::constraints::impl diff --git a/src/storage/common/constraints/record.hpp b/src/storage/common/constraints/record.hpp deleted file mode 100644 index 70c526a5c..000000000 --- a/src/storage/common/constraints/record.hpp +++ /dev/null @@ -1,25 +0,0 @@ -/// @file - -#pragma once - -#include "storage/common/locking/record_lock.hpp" -#include "storage/common/types/types.hpp" -#include "transactions/type.hpp" - -namespace tx { -class Transaction; -} // namespace tx - -namespace storage::constraints::impl { -/// Contains records of creation and deletion of entry in a constraint. -struct Record { - Record(storage::Gid gid, const tx::Transaction &t); - void Insert(storage::Gid gid, const tx::Transaction &t); - void Remove(storage::Gid gid, const tx::Transaction &t); - - storage::Gid curr_gid; - tx::TransactionId tx_id_cre; - tx::TransactionId tx_id_exp{0}; - RecordLock lock_; -}; -} // namespace storage::constraints::impl diff --git a/src/storage/common/constraints/unique_constraints.cpp b/src/storage/common/constraints/unique_constraints.cpp deleted file mode 100644 index 4d9367062..000000000 --- a/src/storage/common/constraints/unique_constraints.cpp +++ /dev/null @@ -1,258 +0,0 @@ -#include "storage/common/constraints/unique_constraints.hpp" - -#include - -#include "storage/vertex_accessor.hpp" - -namespace storage::constraints { - -namespace { -auto FindIn(storage::Label label, - const std::vector &properties, - const std::list &constraints) { - return std::find_if( - constraints.begin(), constraints.end(), [label, properties](auto &c) { - return c.label == label && - std::is_permutation(properties.begin(), properties.end(), - c.properties.begin(), c.properties.end()); - }); -} -} // anonymous namespace - -bool UniqueConstraints::AddConstraint(const ConstraintEntry &entry) { - auto constraint = FindIn(entry.label, entry.properties, constraints_); - if (constraint == constraints_.end()) { - constraints_.emplace_back(entry.label, entry.properties); - return true; - } - return false; -} - -bool UniqueConstraints::RemoveConstraint(const ConstraintEntry &entry) { - auto constraint = FindIn(entry.label, entry.properties, constraints_); - if (constraint != constraints_.end()) { - constraints_.erase(constraint); - return true; - } - return false; -} - -bool UniqueConstraints::Exists( - storage::Label label, - const std::vector &properties) const { - return FindIn(label, properties, constraints_) != constraints_.end(); -} - -std::vector UniqueConstraints::ListConstraints() const { - std::vector constraints(constraints_.size()); - std::transform(constraints_.begin(), constraints_.end(), constraints.begin(), - [](auto &c) { - return ConstraintEntry{c.label, c.properties}; - }); - return constraints; -} - -void UniqueConstraints::Update(const RecordAccessor &accessor, - const tx::Transaction &t) { - auto &vertex = accessor.current(); - std::lock_guard guard(lock_); - for (auto &constraint : constraints_) { - if (!utils::Contains(vertex.labels_, constraint.label)) continue; - std::vector values; - for (auto p : constraint.properties) { - auto value = vertex.properties_.at(p); - if (value.IsNull()) break; - values.emplace_back(value); - } - if (values.size() != constraint.properties.size()) continue; - auto entry = std::find_if(constraint.version_pairs.begin(), - constraint.version_pairs.end(), - [values](const impl::LabelPropertyPair &p) { - return p.values == values; - }); - if (entry != constraint.version_pairs.end()) { - entry->record.Insert(accessor.gid(), t); - } else { - constraint.version_pairs.emplace_back(accessor.gid(), values, t); - } - } -} - -void UniqueConstraints::UpdateOnAddLabel(storage::Label label, - const RecordAccessor &accessor, - const tx::Transaction &t) { - auto &vertex = accessor.current(); - std::lock_guard guard(lock_); - for (auto &constraint : constraints_) { - if (constraint.label != label) continue; - std::vector values; - for (auto p : constraint.properties) { - auto value = vertex.properties_.at(p); - if (value.IsNull()) break; - values.emplace_back(value); - } - if (values.size() != constraint.properties.size()) continue; - auto entry = std::find_if(constraint.version_pairs.begin(), - constraint.version_pairs.end(), - [values](const impl::LabelPropertyPair &p) { - return p.values == values; - }); - if (entry != constraint.version_pairs.end()) { - entry->record.Insert(accessor.gid(), t); - } else { - constraint.version_pairs.emplace_back(accessor.gid(), values, t); - } - } -} - -void UniqueConstraints::UpdateOnRemoveLabel( - storage::Label label, const RecordAccessor &accessor, - const tx::Transaction &t) { - auto &vertex = accessor.current(); - std::lock_guard guard(lock_); - for (auto &constraint : constraints_) { - if (constraint.label != label) continue; - std::vector values; - for (auto p : constraint.properties) { - auto value = vertex.properties_.at(p); - if (value.IsNull()) break; - values.emplace_back(value); - } - if (values.size() != constraint.properties.size()) continue; - auto entry = std::find_if(constraint.version_pairs.begin(), - constraint.version_pairs.end(), - [values](const impl::LabelPropertyPair &p) { - return p.values == values; - }); - if (entry != constraint.version_pairs.end()) - entry->record.Remove(accessor.gid(), t); - } -} - -void UniqueConstraints::UpdateOnAddProperty( - storage::Property property, const PropertyValue &previous_value, - const PropertyValue &new_value, const RecordAccessor &accessor, - const tx::Transaction &t) { - auto &vertex = accessor.current(); - std::lock_guard guard(lock_); - for (auto &constraint : constraints_) { - if (!utils::Contains(vertex.labels_, constraint.label)) continue; - if (!utils::Contains(constraint.properties, property)) continue; - - std::vector old_values; - std::vector new_values; - for (auto p : constraint.properties) { - auto value = vertex.properties_.at(p); - - if (p == property) { - if (!previous_value.IsNull()) old_values.emplace_back(previous_value); - if (!new_value.IsNull()) new_values.emplace_back(new_value); - } else { - if (value.IsNull()) break; - old_values.emplace_back(value); - new_values.emplace_back(value); - } - } - - // First we need to remove the old entry if there was one. - if (old_values.size() == constraint.properties.size()) { - auto entry = std::find_if(constraint.version_pairs.begin(), - constraint.version_pairs.end(), - [old_values](const impl::LabelPropertyPair &p) { - return p.values == old_values; - }); - if (entry != constraint.version_pairs.end()) - entry->record.Remove(accessor.gid(), t); - } - - if (new_values.size() != constraint.properties.size()) continue; - auto entry = std::find_if(constraint.version_pairs.begin(), - constraint.version_pairs.end(), - [new_values](const impl::LabelPropertyPair &p) { - return p.values == new_values; - }); - if (entry != constraint.version_pairs.end()) { - entry->record.Insert(accessor.gid(), t); - } else { - constraint.version_pairs.emplace_back(accessor.gid(), new_values, t); - } - } -} - -void UniqueConstraints::UpdateOnRemoveProperty( - storage::Property property, const PropertyValue &previous_value, - const RecordAccessor &accessor, const tx::Transaction &t) { - auto &vertex = accessor.current(); - std::lock_guard guard(lock_); - for (auto &constraint : constraints_) { - if (!utils::Contains(vertex.labels_, constraint.label)) continue; - if (!utils::Contains(constraint.properties, property)) continue; - - std::vector values; - for (auto p : constraint.properties) { - auto value = vertex.properties_.at(p); - if (p == property) { - values.emplace_back(previous_value); - } else { - if (value.IsNull()) break; - values.emplace_back(value); - } - } - - if (values.size() != constraint.properties.size()) continue; - auto entry = std::find_if(constraint.version_pairs.begin(), - constraint.version_pairs.end(), - [values](const impl::LabelPropertyPair &p) { - return p.values == values; - }); - if (entry != constraint.version_pairs.end()) { - entry->record.Remove(accessor.gid(), t); - } - } -} - -void UniqueConstraints::UpdateOnRemoveVertex( - const RecordAccessor &accessor, const tx::Transaction &t) { - auto &vertex = accessor.current(); - std::lock_guard guard(lock_); - for (auto &constraint : constraints_) { - if (!utils::Contains(vertex.labels_, constraint.label)) continue; - - std::vector values; - for (auto p : constraint.properties) { - auto value = vertex.properties_.at(p); - if (value.IsNull()) break; - values.emplace_back(value); - } - - if (values.size() != constraint.properties.size()) continue; - auto entry = std::find_if(constraint.version_pairs.begin(), - constraint.version_pairs.end(), - [values](const impl::LabelPropertyPair &p) { - return p.values == values; - }); - if (entry != constraint.version_pairs.end()) { - entry->record.Remove(accessor.gid(), t); - } - } -} - -void UniqueConstraints::Refresh(const tx::Snapshot &snapshot, - const tx::Engine &engine) { - std::lock_guard guard(lock_); - for (auto &constraint : constraints_) { - for (auto p = constraint.version_pairs.begin(); - p != constraint.version_pairs.end();) { - auto exp_id = p->record.tx_id_exp; - auto cre_id = p->record.tx_id_cre; - if ((exp_id != 0 && exp_id < snapshot.back() && - engine.Info(exp_id).is_committed() && !snapshot.contains(exp_id)) || - (cre_id < snapshot.back() && engine.Info(cre_id).is_aborted())) { - p = constraint.version_pairs.erase(p); - } else { - ++p; - } - } - } -} -} // namespace storage::constraints diff --git a/src/storage/common/constraints/unique_constraints.hpp b/src/storage/common/constraints/unique_constraints.hpp deleted file mode 100644 index 893bf9cf5..000000000 --- a/src/storage/common/constraints/unique_constraints.hpp +++ /dev/null @@ -1,166 +0,0 @@ -/// @file - -#pragma once - -#include -#include - -#include "storage/common/types/property_value.hpp" -#include "storage/common/types/types.hpp" -#include "storage/common/constraints/record.hpp" - -namespace tx { -class Snapshot; -}; // namespace tx - -class Vertex; - -template -class RecordAccessor; - -namespace storage::constraints { -namespace impl { -struct LabelPropertyPair { - LabelPropertyPair(storage::Gid gid, const std::vector &v, - const tx::Transaction &t) - : values(v), record(gid, t) {} - - std::vector values; - Record record; -}; - -struct LabelPropertiesEntry { - LabelPropertiesEntry(storage::Label l, - const std::vector &p) - : label(l), properties(p) {} - - storage::Label label; - std::vector properties; - std::list version_pairs; -}; -} // namespace impl - -struct ConstraintEntry { - // This struct is used by ListConstraints method in order to avoid using - // std::pair or something like that. - storage::Label label; - std::vector properties; -}; - -/// UniqueConstraints contains all unique constraints defined by both label and -/// a set of properties. To create or delete unique constraint, caller must -/// ensure that there are no other transactions running in parallel. -/// Additionally, for adding unique constraint caller must first call -/// AddConstraint to create unique constraint and then call Update for every -/// existing Vertex. If there is a unique constraint violation, the caller must -/// manually handle that by catching exceptions and calling RemoveConstraint -/// method. This is needed to ensure logical correctness of transactions. Once -/// created, client uses UpdateOn* methods to notify UniqueConstraint about -/// changes. In case of violation UpdateOn* methods throw -/// ConstraintViolationException exception. Methods can also throw -/// SerializationError. This class is thread safe. -class UniqueConstraints { - public: - UniqueConstraints() = default; - UniqueConstraints(const UniqueConstraints &) = delete; - UniqueConstraints(UniqueConstraints &&) = delete; - UniqueConstraints &operator=(const UniqueConstraints &) = delete; - UniqueConstraints &operator=(UniqueConstraints &&) = delete; - - ~UniqueConstraints() = default; - - /// Add new unique constraint, if constraint already exists this method does - /// nothing. This method doesn't check if any of the existing vertices breaks - /// this constraint. Caller must do that instead. Caller must also ensure that - /// no other transaction is running in parallel. - /// - /// @return true if the constraint doesn't exists and was added. - bool AddConstraint(const ConstraintEntry &entry); - - /// Removes existing unique constraint, if the constraint doesn't exist this - /// method does nothing. Caller must ensure that no other transaction is - /// running in parallel. - /// - /// @return true if the constraint existed and was removed. - bool RemoveConstraint(const ConstraintEntry &entry); - - /// Checks whether given unique constraint is visible. - bool Exists(storage::Label label, - const std::vector &properties) const; - - /// Returns list of unique constraints. - std::vector ListConstraints() const; - - /// Updates unique constraint versions when adding new constraint rule. - /// - /// @throws ConstraintViolationException - /// @throws SerializationError - void Update(const RecordAccessor &accessor, const tx::Transaction &t); - - /// Updates unique constraint versions when adding label. - /// @param label - label that was added - /// @param accessor - accessor that was updated - /// @param t - current transaction - /// - /// @throws ConstraintViolationException - /// @throws SerializationError - void UpdateOnAddLabel(storage::Label label, - const RecordAccessor &accessor, - const tx::Transaction &t); - - /// Updates unique constraint versions when removing label. - /// @param label - label that was removed - /// @param accessor - accessor that was updated - /// @param t - current transaction - /// - /// @throws SerializationError - void UpdateOnRemoveLabel(storage::Label label, - const RecordAccessor &accessor, - const tx::Transaction &t); - - /// Updates unique constraint versions when adding property. - /// @param property - property that was added - /// @param previous_value - previous value of the property - /// @param new_value - new value of the property - /// @param accessor - accessor that was updated - /// @param t - current transaction - /// - /// @throws ConstraintViolationException - /// @throws SerializationError - void UpdateOnAddProperty(storage::Property property, - const PropertyValue &previous_value, - const PropertyValue &new_value, - const RecordAccessor &accessor, - const tx::Transaction &t); - - /// Updates unique constraint versions when removing property. - /// @param property - property that was removed - /// @param previous_value - previous value of the property - /// @param accessor - accessor that was updated - /// @param t - current transaction - /// - /// @throws SerializationError - void UpdateOnRemoveProperty(storage::Property property, - const PropertyValue &previous_value, - const RecordAccessor &accessor, - const tx::Transaction &t); - - /// Updates unique constraint versions when removing a vertex. - /// @param accessor - accessor that was updated - /// @param t - current transaction - /// - /// @throws SerializationError - void UpdateOnRemoveVertex(const RecordAccessor &accessor, - const tx::Transaction &t); - - /// Removes records that are no longer visible. - /// @param snapshot - the GC snapshot. - /// @param engine - current transaction engine. - void Refresh(const tx::Snapshot &snapshot, const tx::Engine &engine); - - private: - std::mutex lock_; - - std::list constraints_; -}; -} // namespace storage::constraints diff --git a/src/storage/common/index.hpp b/src/storage/common/index.hpp deleted file mode 100644 index 3c42c0465..000000000 --- a/src/storage/common/index.hpp +++ /dev/null @@ -1,183 +0,0 @@ -#pragma once - -#include "cppitertools/filter.hpp" -#include "cppitertools/imap.hpp" -#include "cppitertools/takewhile.hpp" -#include "glog/logging.h" - -#include "data_structures/concurrent/concurrent_map.hpp" -#include "data_structures/concurrent/skiplist.hpp" -#include "transactions/transaction.hpp" - -namespace database::index { -/** - * @brief - Wrap beginning iterator to iterable object. This provides us with - * begin and end iterator, and allows us to iterate from the iterator given in - * constructor till the end of the collection over which we are really - * iterating, i.e. it allows us to iterate over the suffix of some skiplist - * hence the name SkipListSuffix. - */ -template -class SkipListSuffix { - public: - class Iterator { - public: - explicit Iterator(TIterator current) : current_(current) {} - - TValue &operator*() { return *current_; } - - bool operator!=(Iterator other) const { - return this->current_ != other.current_; - } - - Iterator &operator++() { - ++current_; - return *this; - } - - private: - TIterator current_; - }; - - explicit SkipListSuffix( - const TIterator begin, - typename SkipList::template Accessor &&accessor) - : begin_(begin), accessor_(std::move(accessor)) {} - - Iterator begin() const { return Iterator(begin_); } - Iterator end() { return Iterator(accessor_.end()); } - - TIterator begin_; - typename SkipList::template Accessor accessor_; -}; - -/** - * @brief - Get all inserted vlists in TKey specific storage which - * still return true for the 'exists' function. - * @param skiplist_accessor - accessor used to get begin iterator, and that - * should be used to get end iterator as well. - * @param begin - starting iterator for vlist iteration. - * @param predicate - function which checks if TIndexEntry has a value that we - * are looking for - * @param t - current transaction, which determines visibility. - * @param exists - method which determines visibility of entry and version - * (record) of the underlying objects (vertex/edge) - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @Tparam TIndexEntry - index entry inside skiplist - * @Tparam TRecord - type of record under index (edge/vertex usually.) - * @Tparam TAccessor - type of accessor to use (const skiplist/non const - * skiplist). - * @return iterable collection of distinct vlist records for which - * exists function evaluates as true - */ -template -static auto GetVlists( - typename SkipList::template Accessor - &&skiplist_accessor, - TIterator begin, - const std::function &predicate, - const tx::Transaction &t, - const std::function &exists, - bool current_state = false) { - TIndexEntry *prev = nullptr; - auto range = iter::takewhile( - predicate, SkipListSuffix( - begin, std::move(skiplist_accessor))); - auto filtered = iter::filter( - [&t, exists, prev, current_state](TIndexEntry &entry) mutable { - // Check if the current entry could offer new possible return value - // with respect to the previous entry we evaluated. - // We do this to guarantee uniqueness, and also as an optimization to - // avoid checking same vlist twice when we can. - if (prev && entry.IsAlreadyChecked(*prev)) return false; - prev = &entry; - - // TODO when refactoring MVCC reconsider the return-value-arg idiom - // here - TRecord *old_record, *new_record; - entry.vlist_->find_set_old_new(t, &old_record, &new_record); - // filtering out records not visible to the current - // transaction+command - // taking into account the current_state flag - bool visible = - (old_record && !(current_state && old_record->is_expired_by(t))) || - (current_state && new_record && !new_record->is_expired_by(t)); - if (!visible) return false; - // if current_state is true and we have the new record, then that's - // the reference value, and that needs to be compared with the index - // predicate - - return (current_state && new_record) ? exists(entry, new_record) - : exists(entry, old_record); - }, - std::move(range)); - return iter::imap([](auto entry) { return entry.vlist_; }, - std::move(filtered)); -} - -/** - * @brief - Removes from the index all entries for which records don't contain - * the given label/edge type/label + property anymore. Also update (remove) - * all records which are not visible for any transaction in the given - * 'snapshot'. This method assumes that the MVCC GC has been run with the - * same 'snapshot'. - * - * @param indices - map of index entries (TIndexKey, skiplist) - * @param snapshot - the GC snapshot. Consists of the oldest active - * transaction's snapshot, with that transaction's id appened as last. - * @param engine - transaction engine to see which records are commited - * @param exists - function which checks 'key' and 'entry' if the entry still - * contains required properties (key + optional value (in case of label_property - * index)) - * @Tparam Tkey - index key - * @Tparam TIndexEntry - index entry inside skiplist - * @Tparam TRecord - type of record under index (edge/vertex usually.) - */ -template -static void Refresh( - ConcurrentMap>> &indices, - const tx::Snapshot &snapshot, tx::Engine &engine, - const std::function &exists) { - // iterate over all the indices - for (auto &key_indices_pair : indices.access()) { - // iterate over index entries - auto indices_entries_accessor = key_indices_pair.second->access(); - for (auto indices_entry : indices_entries_accessor) { - if (indices_entry.record_->is_not_visible_from(snapshot, engine)) { - // be careful when deleting the record which is not visible anymore. - // it's newer copy could be visible, and might still logically belong to - // index (it satisfies the `exists` function). that's why we can't just - // remove the index entry, but also re-insert the oldest visible record - // to the index. if that record does not satisfy `exists`, it will be - // cleaned up in the next Refresh first insert and then remove, - // otherwise there is a timeframe during which the record is not present - // in the index - auto new_record = indices_entry.vlist_->Oldest(); - if (new_record != nullptr) - indices_entries_accessor.insert( - TIndexEntry(indices_entry, new_record)); - - [[gnu::unused]] auto success = - indices_entries_accessor.remove(indices_entry); - DCHECK(success) << "Unable to delete entry."; - } - - // if the record is still visible, - // check if it satisfies the `exists` function. if not - // it does not belong in index anymore. - // be careful when using the `exists` function - // because it's creator transaction could still be modifying it, - // and modify+read is not thread-safe. for that reason we need to - // first see if the the transaction that created it has ended - // (tx().cre < oldest active trancsation). - else if (indices_entry.record_->tx().cre < snapshot.back() && - !exists(key_indices_pair.first, indices_entry)) { - indices_entries_accessor.remove(indices_entry); - } - } - } -} -}; // namespace database::index diff --git a/src/storage/common/locking/lock_status.hpp b/src/storage/common/locking/lock_status.hpp deleted file mode 100644 index bbd6de10c..000000000 --- a/src/storage/common/locking/lock_status.hpp +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -enum class LockStatus { Acquired, AlreadyHeld }; diff --git a/src/storage/common/locking/record_lock.cpp b/src/storage/common/locking/record_lock.cpp deleted file mode 100644 index 042d94d52..000000000 --- a/src/storage/common/locking/record_lock.cpp +++ /dev/null @@ -1,131 +0,0 @@ -#include "storage/common/locking/record_lock.hpp" - -#include -#include -#include -#include -#include - -#include "transactions/engine.hpp" -#include "utils/on_scope_exit.hpp" -#include "utils/thread/sync.hpp" -#include "utils/timer.hpp" - -namespace { - -// Finds lock cycle that start transaction is a part of and returns id of oldest -// transaction in that cycle. If start transaction is not in a cycle nullopt is -// returned. -template -std::optional FindOldestTxInLockCycle( - tx::TransactionId start, TAccessor &graph_accessor) { - std::vector path; - std::unordered_set visited; - - auto current = start; - - do { - visited.insert(current); - path.push_back(current); - auto it = graph_accessor.find(current); - if (it == graph_accessor.end()) return std::nullopt; - current = it->second; - } while (visited.find(current) == visited.end()); - - if (current == start) { - // start is a part of the cycle, return oldest transaction. - CHECK(path.size() >= 2U) << "Cycle must have at least two nodes"; - return *std::min(path.begin(), path.end()); - } - - // There is a cycle, but start is not a part of it. Some transaction that is - // in a cycle will find it and abort oldest transaction. - return std::nullopt; -} - -} // namespace - -bool RecordLock::TryLock(tx::TransactionId tx_id) { - tx::TransactionId unlocked{0}; - return owner_.compare_exchange_strong(unlocked, tx_id); -} - -LockStatus RecordLock::Lock(const tx::Transaction &tx, tx::Engine &engine) { - if (TryLock(tx.id_)) { - return LockStatus::Acquired; - } - - tx::TransactionId owner = owner_; - if (owner_ == tx.id_) return LockStatus::AlreadyHeld; - - // In a distributed worker the transaction objects (and the locks they own) - // are not destructed at the same time like on the master. Consequently a lock - // might be active for a dead transaction. By asking the transaction engine - // for transaction info, we'll make the worker refresh it's knowledge about - // live transactions and release obsolete locks. - if (owner == 0 || !engine.Info(owner).is_active()) { - if (TryLock(tx.id_)) { - return LockStatus::Acquired; - } - } - - // Insert edge into local lock_graph. - auto accessor = engine.local_lock_graph().access(); - auto it = accessor.insert(tx.id_, owner).first; - - auto abort_oldest_tx_in_lock_cycle = [&tx, &accessor, &engine]() { - // Find oldest transaction in lock cycle if cycle exists and notify that - // transaction that it should abort. - // TODO: maybe we can be smarter and abort some other transaction and not - // the oldest one. - auto oldest = FindOldestTxInLockCycle(tx.id_, accessor); - if (oldest) { - engine.LocalForEachActiveTransaction([&](tx::Transaction &t) { - if (t.id_ == oldest) { - t.set_should_abort(); - } - }); - } - }; - - abort_oldest_tx_in_lock_cycle(); - - // Make sure to erase edge on function exit. Either function will throw and - // transaction will be killed so we should erase the edge because transaction - // won't exist anymore or owner_ will finish and we will be able to acquire - // the lock. - utils::OnScopeExit cleanup{[&tx, &accessor] { accessor.remove(tx.id_); }}; - - utils::Timer t; - while (t.Elapsed() < kTimeout) { - if (tx.should_abort()) { - // Message could be incorrect. Transaction could be aborted because it was - // running for too long time, but that is unlikely and it is not very - // important which exception (and message) we throw here. - throw utils::LockTimeoutException( - "Transaction was aborted since it was oldest in a lock cycle"); - } - if (TryLock(tx.id_)) { - return LockStatus::Acquired; - } - if (owner != owner_) { - // Owner changed while we were spinlocking. Update the edge and rerun - // cycle resolution routine. - // TODO: we should make sure that first transaction that tries to acquire - // already held lock succeeds in acquiring the lock once transaction that - // was lock owner finishes. That would probably reduce number of aborted - // transactions. - owner = owner_; - it->second = owner; - abort_oldest_tx_in_lock_cycle(); - } - utils::CpuRelax(); - } - - throw utils::LockTimeoutException(fmt::format( - "Transaction locked for more than {} seconds", kTimeout.count())); -} - -void RecordLock::Unlock() { owner_ = 0; } - -constexpr std::chrono::duration RecordLock::kTimeout; diff --git a/src/storage/common/locking/record_lock.hpp b/src/storage/common/locking/record_lock.hpp deleted file mode 100644 index a814c4a34..000000000 --- a/src/storage/common/locking/record_lock.hpp +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "storage/common/locking/lock_status.hpp" -#include "transactions/type.hpp" - -namespace tx { -class Engine; -class Transaction; -}; // namespace tx - -class RecordLock { - public: - /// @throw utils::LockTimeoutException - LockStatus Lock(const tx::Transaction &id, tx::Engine &engine); - - void Unlock(); - - private: - bool TryLock(tx::TransactionId tx_id); - - // Arbitrary choosen constant, postgresql uses 1 second so do we. - constexpr static std::chrono::duration kTimeout{ - std::chrono::seconds(1)}; - - std::atomic owner_{0}; -}; diff --git a/src/storage/common/mvcc/exceptions.hpp b/src/storage/common/mvcc/exceptions.hpp deleted file mode 100644 index 7332287f5..000000000 --- a/src/storage/common/mvcc/exceptions.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include "utils/exceptions.hpp" - -namespace mvcc { -class SerializationError : public utils::BasicException { - static constexpr const char *default_message = - "Can't serialize due to concurrent operations."; - - public: - using utils::BasicException::BasicException; - SerializationError() : BasicException(default_message) {} -}; - -} // namespace mvcc diff --git a/src/storage/common/mvcc/version.hpp b/src/storage/common/mvcc/version.hpp deleted file mode 100644 index a903d4cc3..000000000 --- a/src/storage/common/mvcc/version.hpp +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include - -namespace mvcc { - -template -class Version { - public: - Version() = default; - explicit Version(T *older) : older_(older) {} - - // this must also destroy all the older versions - virtual ~Version() { - auto curr = next(); - while (curr != nullptr) { - auto next = curr->next(); - // remove link to older version to avoid recursion - curr->older_.store(nullptr); - delete curr; - curr = next; - } - } - - // return a pointer to an older version stored in this record - T *next(std::memory_order order = std::memory_order_seq_cst) { - return older_.load(order); - } - - const T *next(std::memory_order order = std::memory_order_seq_cst) const { - return older_.load(order); - } - - // set the older version of this record - void next(T *value, std::memory_order order = std::memory_order_seq_cst) { - older_.store(value, order); - } - - private: - std::atomic older_{nullptr}; -}; -} // namespace mvcc diff --git a/src/storage/common/pod_buffer.hpp b/src/storage/common/pod_buffer.hpp deleted file mode 100644 index 0326331e4..000000000 --- a/src/storage/common/pod_buffer.hpp +++ /dev/null @@ -1,49 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace storage { - -/** - * Buffer used for serialization of disk properties. The buffer - * implements a template parameter Buffer interface from BaseEncoder - * and Decoder classes for bolt serialization. - */ -class PODBuffer { - public: - PODBuffer() = default; - explicit PODBuffer(const std::string &s) { - buffer = std::vector{s.begin(), s.end()}; - } - - /** - * Writes data to buffer - * - * @param data - Pointer to data to be written. - * @param len - Data length. - */ - void Write(const uint8_t *data, size_t len) { - for (size_t i = 0; i < len; ++i) buffer.push_back(data[i]); - } - - /** - * Reads raw data from buffer. - * - * @param data - pointer to where data should be stored. - * @param len - data length - * @return - True if successful, False otherwise. - */ - bool Read(uint8_t *data, size_t len) { - if (len > buffer.size()) return false; - memcpy(data, buffer.data(), len); - buffer.erase(buffer.begin(), buffer.begin() + len); - return true; - } - - std::vector buffer; -}; - -} // namespace storage diff --git a/src/storage/common/types/property_value.hpp b/src/storage/common/types/property_value.hpp deleted file mode 100644 index cbec54e44..000000000 --- a/src/storage/common/types/property_value.hpp +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once - -#include "storage/v2/property_value.hpp" - -using storage::PropertyValue; -using storage::PropertyValueException; diff --git a/src/storage/common/types/property_value_store.cpp b/src/storage/common/types/property_value_store.cpp deleted file mode 100644 index 27bfb13fb..000000000 --- a/src/storage/common/types/property_value_store.cpp +++ /dev/null @@ -1,240 +0,0 @@ -#include "storage/common/types/property_value_store.hpp" - -#include - -#include -#include - -#include "communication/bolt/v1/decoder/decoder.hpp" -#include "communication/bolt/v1/encoder/base_encoder.hpp" -#include "glue/communication.hpp" -#include "storage/common/pod_buffer.hpp" - -namespace fs = std::filesystem; - -using namespace communication::bolt; - -const std::string kDiskKeySeparator = "_"; - -std::atomic PropertyValueStore::global_key_cnt_ = {0}; - -// properties on disk are stored in a directory named properties within the -// durability directory -DECLARE_string(durability_directory); -DECLARE_string(properties_on_disk); - -std::string DiskKeyPrefix(const std::string &version_key) { - return version_key + kDiskKeySeparator; -} - -std::string DiskKey(const std::string &version_key, - const std::string &property_id) { - return DiskKeyPrefix(version_key) + property_id; -} - -PropertyValueStore::PropertyValueStore(const PropertyValueStore &old) - : props_(old.props_) { - // We need to update disk key and disk key counter when calling a copy - // constructor due to mvcc. - if (!FLAGS_properties_on_disk.empty()) { - version_key_ = global_key_cnt_++; - kvstore::KVStore::iterator old_disk_it( - &DiskStorage(), DiskKeyPrefix(std::to_string(old.version_key_))); - iterator it(&old, old.props_.end(), std::move(old_disk_it)); - - while (it != old.end()) { - this->set(it->first, it->second); - ++it; - } - } -} - -PropertyValueStore::~PropertyValueStore() { - if (!FLAGS_properties_on_disk.empty()) { - DiskStorage().DeletePrefix(DiskKeyPrefix(std::to_string(version_key_))); - } -} - -PropertyValue PropertyValueStore::at(const Property &key) const { - auto GetValue = [&key](const auto &props) { - for (const auto &kv : props) - if (kv.first == key) return kv.second; - return PropertyValue(); - }; - - if (key.Location() == Location::Memory) return GetValue(props_); - - CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from " - "disk storage with properties on " - "disk disabled!"; - - std::string disk_key = - DiskKey(std::to_string(version_key_), std::to_string(key.Id())); - auto serialized_prop = DiskStorage().Get(disk_key); - if (serialized_prop) return DeserializeProp(serialized_prop.value()); - return PropertyValue(); -} - -void PropertyValueStore::set(const Property &key, const char *value) { - set(key, PropertyValue(value)); -} - -void PropertyValueStore::set(const Property &key, const PropertyValue &value) { - if (value.type() == PropertyValue::Type::Null) { - erase(key); - return; - } - - auto SetValue = [&key, &value](auto &props) { - for (auto &kv : props) - if (kv.first == key) { - kv.second = value; - return; - } - props.emplace_back(key, value); - }; - - if (key.Location() == Location::Memory) { - SetValue(props_); - } else { - CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from " - "disk storage with properties " - "on disk disabled!"; - std::string disk_key = - DiskKey(std::to_string(version_key_), std::to_string(key.Id())); - DiskStorage().Put(disk_key, SerializeProp(value)); - } -} - -bool PropertyValueStore::erase(const Property &key) { - auto EraseKey = [&key](auto &props) { - auto found = std::find_if(props.begin(), props.end(), - [&key](std::pair &kv) { - return kv.first == key; - }); - if (found != props.end()) props.erase(found); - return true; - }; - - if (key.Location() == Location::Memory) return EraseKey(props_); - - CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from " - "disk storage with properties on " - "disk disabled!"; - - std::string disk_key = - DiskKey(std::to_string(version_key_), std::to_string(key.Id())); - return DiskStorage().Delete(disk_key); -} - -void PropertyValueStore::clear() { - props_.clear(); - if (!FLAGS_properties_on_disk.empty()) { - DiskStorage().DeletePrefix(DiskKeyPrefix(std::to_string(version_key_))); - } -} - -kvstore::KVStore &PropertyValueStore::DiskStorage() const { - static auto disk_storage = ConstructDiskStorage(); - return disk_storage; -} - -PropertyValueStore::iterator::iterator( - const PropertyValueStore *pvs, - std::vector>::const_iterator memory_it) - : pvs_(pvs), memory_it_(memory_it) {} - -PropertyValueStore::iterator::iterator( - const PropertyValueStore *pvs, - std::vector>::const_iterator memory_it, - kvstore::KVStore::iterator disk_it) - : pvs_(pvs), memory_it_(memory_it), disk_it_(std::move(disk_it)) {} - -PropertyValueStore::iterator &PropertyValueStore::iterator::operator++() { - if (memory_it_ != pvs_->props_.end()) { - ++memory_it_; - } else if (disk_it_) { - ++(*disk_it_); - } - return *this; -} - -bool PropertyValueStore::iterator::operator==(const iterator &other) const { - return pvs_ == other.pvs_ && memory_it_ == other.memory_it_ && - disk_it_ == other.disk_it_; -} - -bool PropertyValueStore::iterator::operator!=(const iterator &other) const { - return !(*this == other); -} - -PropertyValueStore::iterator::reference PropertyValueStore::iterator::operator - *() { - if (memory_it_ != pvs_->props_.end() || !disk_it_) return *memory_it_; - std::pair kv = *(*disk_it_); - std::string prop_id = kv.first.substr(kv.first.find(kDiskKeySeparator) + 1); - disk_prop_ = {Property(std::stoi(prop_id), Location::Disk), - pvs_->DeserializeProp(kv.second)}; - return disk_prop_.value(); -} - -PropertyValueStore::iterator::pointer PropertyValueStore::iterator:: -operator->() { - return &**this; -} - -size_t PropertyValueStore::size() const { - if (FLAGS_properties_on_disk.empty()) { - return props_.size(); - } else { - return props_.size() + - DiskStorage().Size(DiskKeyPrefix(std::to_string(version_key_))); - } -} - -PropertyValueStore::iterator PropertyValueStore::begin() const { - if (FLAGS_properties_on_disk.empty()) { - return iterator(this, props_.begin()); - } else { - return iterator( - this, props_.begin(), - DiskStorage().begin(DiskKeyPrefix(std::to_string(version_key_)))); - } -} - -PropertyValueStore::iterator PropertyValueStore::end() const { - if (FLAGS_properties_on_disk.empty()) { - return iterator(this, props_.end()); - } else { - return iterator( - this, props_.end(), - DiskStorage().end(DiskKeyPrefix(std::to_string(version_key_)))); - } -} - -std::string PropertyValueStore::SerializeProp(const PropertyValue &prop) const { - storage::PODBuffer pod_buffer; - BaseEncoder encoder{pod_buffer}; - encoder.WriteValue(glue::ToBoltValue(prop)); - return std::string(reinterpret_cast(pod_buffer.buffer.data()), - pod_buffer.buffer.size()); -} - -PropertyValue PropertyValueStore::DeserializeProp( - const std::string &serialized_prop) const { - storage::PODBuffer pod_buffer{serialized_prop}; - communication::bolt::Decoder decoder{pod_buffer}; - - Value dv; - if (!decoder.ReadValue(&dv)) { - DLOG(WARNING) << "Unable to read property value"; - return PropertyValue(); - } - return glue::ToPropertyValue(dv); -} - -kvstore::KVStore PropertyValueStore::ConstructDiskStorage() const { - auto storage_path = fs::path() / FLAGS_durability_directory / "properties"; - if (fs::exists(storage_path)) fs::remove_all(storage_path); - return kvstore::KVStore(storage_path); -} diff --git a/src/storage/common/types/property_value_store.hpp b/src/storage/common/types/property_value_store.hpp deleted file mode 100644 index efef3baea..000000000 --- a/src/storage/common/types/property_value_store.hpp +++ /dev/null @@ -1,164 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "kvstore/kvstore.hpp" -#include "storage/common/types/property_value.hpp" -#include "storage/common/types/types.hpp" - -/** - * A collection of properties accessed in a map-like way using a key of type - * Storage::Property. - * - * PropertyValueStore handles storage on disk or in memory. Property key defines - * where the corresponding property should be stored. Each instance of - * PropertyValueStore contains a version_key_ member which specifies where on - * disk should the properties be stored. That key is inferred from a static - * global counter global_key_cnt_. - * - * The underlying implementation of in-memory storage is not necessarily - * std::map. - */ -class PropertyValueStore { - using Property = storage::Property; - using Location = storage::Location; - - public: - // Property name which will be used to store vertex/edge ids inside property - // value store - static constexpr char IdPropertyName[] = "__id__"; - - PropertyValueStore() = default; - PropertyValueStore(const PropertyValueStore &old); - - ~PropertyValueStore(); - - /** - * Returns a PropertyValue (by reference) at the given key. - * If the key does not exist, the Null property is returned. - * - * This is NOT thread-safe, the reference might not be valid - * when used in a multithreaded scenario. - * - * @param key The key for which a PropertyValue is sought. - * @return See above. - */ - PropertyValue at(const Property &key) const; - - /** - * Set overriding for character constants. Forces conversion - * to std::string, otherwise templating might cast the pointer - * to something else (bool) and mess things up. - */ - void set(const Property &key, const char *value); - - /** - * Set overriding for PropertyValue. When setting a Null value it - * calls 'erase' instead of inserting the Null into storage. - */ - void set(const Property &key, const PropertyValue &value); - - /** - * Removes the PropertyValue for the given key. - * - * @param key - The key for which to remove the property. - * - * @return true if the operation was successful and there is nothing stored - * under given key after this operation. - */ - bool erase(const Property &key); - - /** Removes all the properties (both in-mem and on-disk) from this store. */ - void clear(); - - /** - * Returns a static kvstore::KVStore instance used for storing properties on - * disk. This hack is needed due to statics that are internal to RocksDB and - * availability of durability_directory flag. - */ - kvstore::KVStore &DiskStorage() const; - - /** - * Custom PVS iterator behaves as if all properties are stored in a single - * iterable collection of std::pair. - */ - class iterator final - : public std::iterator< - std::input_iterator_tag, // iterator_category - std::pair, // value_type - long, // difference_type - const std::pair *, // pointer - const std::pair & // reference - > { - public: - iterator() = delete; - - iterator(const PropertyValueStore *pvs, - std::vector>::const_iterator - memory_it); - - iterator(const PropertyValueStore *pvs, - std::vector>::const_iterator - memory_it, - kvstore::KVStore::iterator disk_it); - - iterator(const iterator &other) = delete; - - iterator(iterator &&other) = default; - - iterator &operator=(iterator &&other) = default; - - iterator &operator=(const iterator &other) = delete; - - iterator &operator++(); - - bool operator==(const iterator &other) const; - - bool operator!=(const iterator &other) const; - - reference operator*(); - - pointer operator->(); - - private: - const PropertyValueStore *pvs_; - std::vector>::const_iterator memory_it_; - std::optional disk_it_; - std::optional> disk_prop_; - }; - - size_t size() const; - - iterator begin() const; - - iterator end() const; - - private: - static std::atomic global_key_cnt_; - uint64_t version_key_ = global_key_cnt_++; - - std::vector> props_; - - /** - * Serializes a single PropertyValue into std::string. - * - * @param prop - Property to be serialized. - * - * @return Serialized property. - */ - std::string SerializeProp(const PropertyValue &prop) const; - - /** - * Deserializes a single PropertyValue from std::string. - * - * @param serialized_prop - Serialized property. - * - * @return Deserialized property. - */ - PropertyValue DeserializeProp(const std::string &serialized_prop) const; - - kvstore::KVStore ConstructDiskStorage() const; -}; diff --git a/src/storage/common/types/slk.cpp b/src/storage/common/types/slk.cpp deleted file mode 100644 index d2aa4d177..000000000 --- a/src/storage/common/types/slk.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#include "storage/common/types/slk.hpp" - -namespace slk { - -void Save(const PropertyValue &value, slk::Builder *builder) { - switch (value.type()) { - case PropertyValue::Type::Null: - slk::Save(static_cast(0), builder); - return; - case PropertyValue::Type::Bool: - slk::Save(static_cast(1), builder); - slk::Save(value.ValueBool(), builder); - return; - case PropertyValue::Type::Int: - slk::Save(static_cast(2), builder); - slk::Save(value.ValueInt(), builder); - return; - case PropertyValue::Type::Double: - slk::Save(static_cast(3), builder); - slk::Save(value.ValueDouble(), builder); - return; - case PropertyValue::Type::String: - slk::Save(static_cast(4), builder); - slk::Save(value.ValueString(), builder); - return; - case PropertyValue::Type::List: { - slk::Save(static_cast(5), builder); - const auto &values = value.ValueList(); - size_t size = values.size(); - slk::Save(size, builder); - for (const auto &v : values) { - slk::Save(v, builder); - } - return; - } - case PropertyValue::Type::Map: { - slk::Save(static_cast(6), builder); - const auto &map = value.ValueMap(); - size_t size = map.size(); - slk::Save(size, builder); - for (const auto &kv : map) { - slk::Save(kv, builder); - } - return; - } - } -} - -void Load(PropertyValue *value, slk::Reader *reader) { - uint8_t type; - slk::Load(&type, reader); - switch (type) { - case static_cast(0): - *value = PropertyValue(); - return; - case static_cast(1): { - bool v; - slk::Load(&v, reader); - *value = PropertyValue(v); - return; - } - case static_cast(2): { - int64_t v; - slk::Load(&v, reader); - *value = PropertyValue(v); - return; - } - case static_cast(3): { - double v; - slk::Load(&v, reader); - *value = PropertyValue(v); - return; - } - case static_cast(4): { - std::string v; - slk::Load(&v, reader); - *value = PropertyValue(std::move(v)); - return; - } - case static_cast(5): { - size_t size; - slk::Load(&size, reader); - std::vector list(size); - for (size_t i = 0; i < size; ++i) { - slk::Load(&list[i], reader); - } - *value = PropertyValue(std::move(list)); - return; - } - case static_cast(6): { - size_t size; - slk::Load(&size, reader); - std::map map; - for (size_t i = 0; i < size; ++i) { - std::pair kv; - slk::Load(&kv, reader); - map.insert(kv); - } - *value = PropertyValue(std::move(map)); - return; - } - default: - throw slk::SlkDecodeException("Trying to load unknown PropertyValue!"); - } -} - -void Save(const PropertyValueStore &properties, slk::Builder *builder) { - size_t size = properties.size(); - slk::Save(size, builder); - for (const auto &kv : properties) { - slk::Save(kv, builder); - } -} - -void Load(PropertyValueStore *properties, slk::Reader *reader) { - properties->clear(); - size_t size; - slk::Load(&size, reader); - for (size_t i = 0; i < size; ++i) { - std::pair kv; - slk::Load(&kv, reader); - properties->set(kv.first, kv.second); - } -} - -} // namespace slk diff --git a/src/storage/common/types/slk.hpp b/src/storage/common/types/slk.hpp deleted file mode 100644 index f51a44123..000000000 --- a/src/storage/common/types/slk.hpp +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#include "slk/serialization.hpp" -#include "storage/common/types/property_value.hpp" -#include "storage/common/types/property_value_store.hpp" -#include "storage/common/types/types.hpp" - -namespace slk { - -inline void Save(const storage::Label &common, slk::Builder *builder) { - slk::Save(common.id_, builder); -} - -inline void Load(storage::Label *common, slk::Reader *reader) { - slk::Load(&common->id_, reader); -} - -inline void Save(const storage::EdgeType &common, slk::Builder *builder) { - slk::Save(common.id_, builder); -} - -inline void Load(storage::EdgeType *common, slk::Reader *reader) { - slk::Load(&common->id_, reader); -} - -inline void Save(const storage::Property &common, slk::Builder *builder) { - slk::Save(common.id_, builder); -} - -inline void Load(storage::Property *common, slk::Reader *reader) { - slk::Load(&common->id_, reader); -} - -inline void Save(const storage::Gid &gid, slk::Builder *builder) { - slk::Save(gid.AsUint(), builder); -} - -inline void Load(storage::Gid *gid, slk::Reader *reader) { - uint64_t id; - slk::Load(&id, reader); - *gid = storage::Gid::FromUint(id); -} - -void Save(const PropertyValue &value, slk::Builder *builder); - -void Load(PropertyValue *value, slk::Reader *reader); - -void Save(const PropertyValueStore &properties, slk::Builder *builder); - -void Load(PropertyValueStore *properties, slk::Reader *reader); - -} // namespace slk diff --git a/src/storage/common/types/types.hpp b/src/storage/common/types/types.hpp deleted file mode 100644 index b86aee9e1..000000000 --- a/src/storage/common/types/types.hpp +++ /dev/null @@ -1,249 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include - -#include "utils/atomic.hpp" -#include "utils/cast.hpp" - -namespace storage { - -using IdT = uint16_t; - -const IdT IdMask = std::numeric_limits::max() >> 1; -const IdT IdNotMask = ~IdMask; - -// In case of a new location Mask value has to be updated. -// -// |-------------|--------------| -// |---location--|------id------| -// |-Memory|Disk-|-----2^15-----| -enum class Location : IdT { Memory = 0x8000, Disk = 0x0000 }; - -class Label final { - public: - Label() = default; - explicit Label(const IdT id, const Location location = Location::Memory) - : id_((id & IdMask) | static_cast(location)) { - // TODO(ipaljak): A better way would be to throw an exception - // and send a message to the user that a new Id can't be created. - // By doing that, database instance will continue to work and user - // has a chance to make an appropriate action. - // CHECK isn't user friendly at all because it will immediately - // terminate the whole process. - // TODO implement throw and error handling - CHECK(id <= IdMask) << "Number of used ids overflowed!"; - } - - IdT Id() const { return static_cast(id_ & IdMask); } - storage::Location Location() const { - return static_cast(id_ & IdNotMask); - } - - friend bool operator==(const Label &a, const Label &b) { - return a.Id() == b.Id(); - } - friend bool operator!=(const Label &a, const Label &b) { - return a.Id() != b.Id(); - } - friend bool operator<(const Label &a, const Label &b) { - return a.Id() < b.Id(); - } - friend bool operator>(const Label &a, const Label &b) { - return a.Id() > b.Id(); - } - friend bool operator<=(const Label &a, const Label &b) { - return a.Id() <= b.Id(); - } - friend bool operator>=(const Label &a, const Label &b) { - return a.Id() >= b.Id(); - } - - IdT id_{0}; -}; - -class EdgeType final { - public: - EdgeType() = default; - explicit EdgeType(const IdT id, const Location location = Location::Memory) - : id_((id & IdMask) | static_cast(location)) { - // TODO(ipaljak): A better way would be to throw an exception - // and send a message to the user that a new Id can't be created. - // By doing that, database instance will continue to work and user - // has a chance to make an appropriate action. - // CHECK isn't user friendly at all because it will immediately - // terminate the whole process. - // TODO implement throw and error handling - CHECK(id <= IdMask) << "Number of used ids overflowed!"; - } - - IdT Id() const { return static_cast(id_ & IdMask); } - storage::Location Location() const { - return static_cast(id_ & IdNotMask); - } - - friend bool operator==(const EdgeType &a, const EdgeType &b) { - return a.Id() == b.Id(); - } - friend bool operator!=(const EdgeType &a, const EdgeType &b) { - return a.Id() != b.Id(); - } - friend bool operator<(const EdgeType &a, const EdgeType &b) { - return a.Id() < b.Id(); - } - friend bool operator>(const EdgeType &a, const EdgeType &b) { - return a.Id() > b.Id(); - } - friend bool operator<=(const EdgeType &a, const EdgeType &b) { - return a.Id() <= b.Id(); - } - friend bool operator>=(const EdgeType &a, const EdgeType &b) { - return a.Id() >= b.Id(); - } - - IdT id_{0}; -}; - -class Property final { - public: - Property() = default; - explicit Property(const IdT id, const Location location = Location::Memory) - : id_((id & IdMask) | static_cast(location)) { - // TODO(ipaljak): A better way would be to throw an exception - // and send a message to the user that a new Id can't be created. - // By doing that, database instance will continue to work and user - // has a chance to make an appropriate action. - // CHECK isn't user friendly at all because it will immediately - // terminate the whole process. - // TODO implement throw and error handling - CHECK(id <= IdMask) << "Number of used ids overflowed!"; - } - - IdT Id() const { return static_cast(id_ & IdMask); } - storage::Location Location() const { - return static_cast(id_ & IdNotMask); - } - - friend bool operator==(const Property &a, const Property &b) { - return a.Id() == b.Id(); - } - friend bool operator!=(const Property &a, const Property &b) { - return a.Id() != b.Id(); - } - friend bool operator<(const Property &a, const Property &b) { - return a.Id() < b.Id(); - } - friend bool operator>(const Property &a, const Property &b) { - return a.Id() > b.Id(); - } - friend bool operator<=(const Property &a, const Property &b) { - return a.Id() <= b.Id(); - } - friend bool operator>=(const Property &a, const Property &b) { - return a.Id() >= b.Id(); - } - - IdT id_{0}; -}; - -/** Global ID of a record in the database. */ -class Gid final { - private: - explicit Gid(uint64_t id) : id_(id) {} - - public: - Gid() = default; - - static Gid FromUint(uint64_t id) { return Gid{id}; } - static Gid FromInt(int64_t id) { - return Gid{utils::MemcpyCast(id)}; - } - uint64_t AsUint() const { return id_; } - int64_t AsInt() const { return utils::MemcpyCast(id_); } - - private: - uint64_t id_; -}; - -inline bool operator==(const Gid &first, const Gid &second) { - return first.AsUint() == second.AsUint(); -} - -inline bool operator!=(const Gid &first, const Gid &second) { - return first.AsUint() != second.AsUint(); -} - -inline bool operator<(const Gid &first, const Gid &second) { - return first.AsUint() < second.AsUint(); -} - -inline bool operator>(const Gid &first, const Gid &second) { - return first.AsUint() > second.AsUint(); -} - -inline bool operator<=(const Gid &first, const Gid &second) { - return first.AsUint() <= second.AsUint(); -} - -inline bool operator>=(const Gid &first, const Gid &second) { - return first.AsUint() >= second.AsUint(); -} - -/** Threadsafe generation of new global IDs. */ -class GidGenerator { - public: - /** - * Returns a globally unique identifier. - * - * @param requested_gid - The desired gid. If given, it will be returned and - * this generator's state updated accordingly. - */ - Gid Next(std::optional requested_gid = std::nullopt) { - if (requested_gid) { - utils::EnsureAtomicGe(next_local_id_, requested_gid->AsUint() + 1U); - return *requested_gid; - } else { - return Gid::FromUint(next_local_id_++); - } - } - - private: - std::atomic next_local_id_{0}; -}; - -} // namespace storage - -namespace std { -template <> -struct hash { - size_t operator()(const storage::Label &k) const { - return hash()(k.Id()); - } -}; - -template <> -struct hash { - size_t operator()(const storage::EdgeType &k) const { - return hash()(k.Id()); - } -}; - -template <> -struct hash { - size_t operator()(const storage::Property &k) const { - return hash()(k.Id()); - } -}; - -template <> -struct hash { - size_t operator()(const storage::Gid &gid) const { - return hash()(gid.AsUint()); - } -}; -} // namespace std diff --git a/src/storage/edge_accessor.hpp b/src/storage/edge_accessor.hpp deleted file mode 100644 index 38eaea94c..000000000 --- a/src/storage/edge_accessor.hpp +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#ifdef MG_SINGLE_NODE_HA -#include "storage/single_node_ha/edge_accessor.hpp" -#endif - -// TODO: write documentation for the interface here! diff --git a/src/storage/single_node_ha/concurrent_id_mapper.hpp b/src/storage/single_node_ha/concurrent_id_mapper.hpp deleted file mode 100644 index 347c26ae4..000000000 --- a/src/storage/single_node_ha/concurrent_id_mapper.hpp +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once - -#include - -#include "data_structures/concurrent/concurrent_map.hpp" -#include "storage/common/types/types.hpp" -#include "utils/algorithm.hpp" - -namespace storage { - -/** SingleNode implementation of ConcurrentIdMapper. */ -template -class ConcurrentIdMapper { - using StorageT = IdT; - - public: - ConcurrentIdMapper() = default; - explicit ConcurrentIdMapper( - const std::vector &properties_on_disk) - : properties_on_disk_(properties_on_disk) {} - - TId value_to_id(const std::string &value) { - auto value_to_id_acc = value_to_id_.access(); - auto found = value_to_id_acc.find(value); - TId inserted_id(0); - if (found == value_to_id_acc.end()) { - StorageT new_id = id_.fetch_add(1); - // After we tried to insert value with our id we either got our id, or the - // id created by the thread which succesfully inserted (value, id) pair - // because that's ConcurrentMap's behaviour - if (std::is_same::value) - inserted_id = - value_to_id_acc.insert(value, TId(new_id, PropertyLocation(value))) - .first->second; - else - inserted_id = value_to_id_acc.insert(value, TId(new_id)).first->second; - } else { - inserted_id = found->second; - } - auto id_to_value_acc = id_to_value_.access(); - // We have to try to insert the inserted_id and value even if we are not the - // one who assigned id because we have to make sure that after this method - // returns that both mappings between id->value and value->id exist. - id_to_value_acc.insert(inserted_id, value); - return inserted_id; - } - - const std::string &id_to_value(const TId &id) { - auto id_to_value_acc = id_to_value_.access(); - auto result = id_to_value_acc.find(id); - DCHECK(result != id_to_value_acc.end()); - return result->second; - } - - private: - ConcurrentMap value_to_id_; - ConcurrentMap id_to_value_; - std::atomic id_{0}; - std::vector properties_on_disk_; - std::mutex mutex_; - - Location PropertyLocation(const std::string &name) { - std::unique_lock lock(mutex_); - if (utils::Contains(properties_on_disk_, name)) return Location::Disk; - return Location::Memory; - } -}; -} // namespace storage diff --git a/src/storage/single_node_ha/deferred_deleter.hpp b/src/storage/single_node_ha/deferred_deleter.hpp deleted file mode 100644 index 0b11ac7ec..000000000 --- a/src/storage/single_node_ha/deferred_deleter.hpp +++ /dev/null @@ -1,78 +0,0 @@ -#pragma once - -#include - -#include -#include - -#include "glog/logging.h" -#include "storage/single_node_ha/mvcc/record.hpp" -#include "transactions/transaction.hpp" - -/** - * @brief - Implements deferred deletion. - * @Tparam T - type of object to delete (Vertex/Edge/VersionList...) - * This is NOT a thread-safe class. - */ -template -class DeferredDeleter { - public: - /** - * @brief - keep track of what object was deleted at which time. - */ - struct DeletedObject { - const T *object; - const tx::TransactionId deleted_at; - DeletedObject(const T *object, tx::TransactionId deleted_at) - : object(object), deleted_at(deleted_at) {} - }; - - /** - * @brief - check if everything is freed - */ - ~DeferredDeleter() { - CHECK(objects_.size() == 0U) - << "Objects are not freed when calling the destructor."; - } - - /** - * @brief - Add objects to this deleter. This method assumes that it will - * always be called with a non-decreasing sequence of `deleted_at`. - * @param objects - vector of objects to add - * @param last_transaction - nothing newer or equal to it can see these - * objects - */ - void AddObjects(const std::vector &objects) { - auto previous_tx_id = objects_.empty() - ? std::numeric_limits::min() - : objects_.back().deleted_at; - for (auto object : objects) { - CHECK(previous_tx_id <= object.deleted_at) - << "deleted_at must be non-decreasing"; - previous_tx_id = object.deleted_at; - objects_.push_back(object); - } - } - - /** - * @brief - Free memory of objects deleted before the id. - * @param id - delete before this id - */ - void FreeExpiredObjects(tx::TransactionId id) { - auto it = objects_.begin(); - while (it != objects_.end() && it->deleted_at < id) { - delete it->object; - ++it; - } - objects_.erase(objects_.begin(), it); - } - - /** - * @brief - Return number of stored objects. - */ - size_t Count() { return objects_.size(); } - - private: - // Ascendingly sorted list of deleted objects by `deleted_at`. - std::list objects_; -}; diff --git a/src/storage/single_node_ha/edge.hpp b/src/storage/single_node_ha/edge.hpp deleted file mode 100644 index 150d39b08..000000000 --- a/src/storage/single_node_ha/edge.hpp +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "storage/single_node_ha/mvcc/record.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/common/types/property_value_store.hpp" -#include "storage/common/types/types.hpp" - -class Vertex; - -class Edge : public mvcc::Record { - public: - Edge(mvcc::VersionList *from, mvcc::VersionList *to, - storage::EdgeType edge_type) - : from_(from), to_(to), edge_type_(edge_type) {} - - // Returns new Edge with copy of data stored in this Edge, but without - // copying superclass' members. - Edge *CloneData() { return new Edge(*this); } - - mvcc::VersionList *from_; - mvcc::VersionList *to_; - storage::EdgeType edge_type_; - PropertyValueStore properties_; - - private: - Edge(const Edge &other) - : mvcc::Record(), - from_(other.from_), - to_(other.to_), - edge_type_(other.edge_type_), - properties_(other.properties_) {} -}; diff --git a/src/storage/single_node_ha/edge_accessor.cpp b/src/storage/single_node_ha/edge_accessor.cpp deleted file mode 100644 index 4ce69f984..000000000 --- a/src/storage/single_node_ha/edge_accessor.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "storage/edge_accessor.hpp" - -#include "database/single_node_ha/graph_db_accessor.hpp" -#include "storage/vertex_accessor.hpp" -#include "utils/algorithm.hpp" - -EdgeAccessor::EdgeAccessor(mvcc::VersionList *address, - database::GraphDbAccessor &db_accessor) - : RecordAccessor(address, db_accessor), - from_(nullptr), - to_(nullptr), - edge_type_() { - RecordAccessor::Reconstruct(); - if (current_ != nullptr) { - from_ = current_->from_; - to_ = current_->to_; - edge_type_ = current_->edge_type_; - } -} - -EdgeAccessor::EdgeAccessor(mvcc::VersionList *address, - database::GraphDbAccessor &db_accessor, - mvcc::VersionList *from, - mvcc::VersionList *to, - storage::EdgeType edge_type) - : RecordAccessor(address, db_accessor), - from_(from), - to_(to), - edge_type_(edge_type) {} - -storage::EdgeType EdgeAccessor::EdgeType() const { return edge_type_; } - -VertexAccessor EdgeAccessor::from() const { - return VertexAccessor(from_, db_accessor()); -} - -bool EdgeAccessor::from_is(const VertexAccessor &v) const { - return v.address() == from_; -} - -VertexAccessor EdgeAccessor::to() const { - return VertexAccessor(to_, db_accessor()); -} - -bool EdgeAccessor::to_is(const VertexAccessor &v) const { - return v.address() == to_; -} - -bool EdgeAccessor::is_cycle() const { return to_ == from_; } - -std::ostream &operator<<(std::ostream &os, const EdgeAccessor &ea) { - os << "E[" << ea.db_accessor().EdgeTypeName(ea.EdgeType()); - os << " {"; - utils::PrintIterable(os, ea.Properties(), ", ", [&](auto &stream, - const auto &pair) { - stream << ea.db_accessor().PropertyName(pair.first) << ": " << pair.second; - }); - return os << "}]"; -} diff --git a/src/storage/single_node_ha/edge_accessor.hpp b/src/storage/single_node_ha/edge_accessor.hpp deleted file mode 100644 index dc1326ad1..000000000 --- a/src/storage/single_node_ha/edge_accessor.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include "storage/single_node_ha/edge.hpp" -#include "storage/single_node_ha/record_accessor.hpp" - -// forward declaring the VertexAccessor because it's returned -// by some functions -class VertexAccessor; - -/** - * Provides ways for the client programmer (i.e. code generated by the compiler) - * to interact with an Edge. - * - * Note that EdgeAccessors do not necessary read versioned (MVCC) data. This is - * possible because edge endpoints (from and to), as well as the edge type, are - * all immutable. These are the most often used aspects of an edge, and are - * stored also in the vertex endpoints of the edge. Using them when creating an - * EdgeAccessor means that data does not have to be read from a random memory - * location, which is often a performance bottleneck in traversals. - */ -class EdgeAccessor final : public RecordAccessor { - public: - /** Constructor that reads data from the random memory location (lower - * performance, see class docs). */ - EdgeAccessor(mvcc::VersionList *address, - database::GraphDbAccessor &db_accessor); - - /** - * Constructor that does NOT read data from the random memory location - * (better performance, see class docs). - */ - EdgeAccessor(mvcc::VersionList *address, - database::GraphDbAccessor &db_accessor, - mvcc::VersionList *from, mvcc::VersionList *to, - storage::EdgeType edge_type); - - storage::EdgeType EdgeType() const; - - /** Returns an accessor to the originating Vertex of this edge. */ - VertexAccessor from() const; - - /** Returns the address of the originating Vertex of this edge. */ - auto from_addr() const { return from_; } - - /** Checks if the given vertex is the source of this edge, without - * creating an additional accessor to perform the check. */ - bool from_is(const VertexAccessor &v) const; - - /** Returns an accessor to the destination Vertex of this edge. */ - VertexAccessor to() const; - - /** Returns the address of the destination Vertex of this edge. */ - auto to_addr() const { return to_; } - - /** Checks if the given vertex is the destination of this edge, without - * creating an additional accessor to perform the check. */ - bool to_is(const VertexAccessor &v) const; - - /** Returns true if this edge is a cycle (start and end node are - * the same. */ - bool is_cycle() const; - - private: - mvcc::VersionList *from_; - mvcc::VersionList *to_; - storage::EdgeType edge_type_; -}; - -std::ostream &operator<<(std::ostream &, const EdgeAccessor &); - -// hash function for the edge accessor -namespace std { -template <> -struct hash { - size_t operator()(const EdgeAccessor &e) const { return e.gid().AsUint(); }; -}; -} // namespace std diff --git a/src/storage/single_node_ha/edges.hpp b/src/storage/single_node_ha/edges.hpp deleted file mode 100644 index 249add09c..000000000 --- a/src/storage/single_node_ha/edges.hpp +++ /dev/null @@ -1,156 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "glog/logging.h" - -#include "storage/common/types/types.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "utils/algorithm.hpp" - -/** - * A data stucture that holds a number of edges. This implementation assumes - * that separate Edges instances are used for incoming and outgoing edges in a - * vertex (and consequently that edge Addresses are unique in it). - */ -class Edges { - private: - struct Element { - mvcc::VersionList *vertex; - mvcc::VersionList *edge; - storage::EdgeType edge_type; - }; - - /** Custom iterator that takes care of skipping edges when the destination - * vertex or edge types are known. */ - class Iterator { - public: - /** Ctor that just sets the position. Used for normal iteration (that does - * not skip any edges), and for end-iterator creation in both normal and - * skipping iteration. - * - * @param iterator - Iterator in the underlying storage. - */ - explicit Iterator(std::vector::const_iterator iterator) - : position_(iterator) {} - - /** Ctor used for creating the beginning iterator with known destination - * vertex. - * - * @param iterator - Iterator in the underlying storage. - * @param end - End iterator in the underlying storage. - * @param vertex - The destination vertex address. If empty the - * edges are not filtered on destination. - * @param edge_types - The edge types at least one of which must be matched. - * If nullptr edges are not filtered on type. - */ - Iterator(std::vector::const_iterator position, - std::vector::const_iterator end, - mvcc::VersionList *vertex, - const std::vector *edge_types) - : position_(position), - end_(end), - vertex_(vertex), - edge_types_(edge_types) { - update_position(); - } - - Iterator &operator++() { - ++position_; - update_position(); - return *this; - } - - const Element &operator*() const { return *position_; } - const Element *operator->() const { return &(*position_); } - - bool operator==(const Iterator &other) const { - return position_ == other.position_; - } - - bool operator!=(const Iterator &other) const { return !(*this == other); } - - private: - std::vector::const_iterator position_; - // end_ is used only in update_position() to limit find. - std::vector::const_iterator end_; - - // Optional predicates. If set they define which edges are skipped by the - // iterator. - mvcc::VersionList *vertex_{nullptr}; - // For edge types we use a vector pointer because it's optional. - const std::vector *edge_types_ = nullptr; - - /** Helper function that skips edges that don't satisfy the predicate - * present in this iterator. */ - void update_position() { - if (vertex_ && edge_types_) { - position_ = std::find_if(position_, end_, [this](const Element &e) { - return e.vertex == this->vertex_ && - utils::Contains(*this->edge_types_, e.edge_type); - }); - } else if (vertex_) { - position_ = std::find_if(position_, end_, [this](const Element &e) { - return e.vertex == this->vertex_; - }); - } else if (edge_types_) { - position_ = std::find_if(position_, end_, [this](const Element &e) { - return utils::Contains(*this->edge_types_, e.edge_type); - }); - } - } - }; - - public: - /** - * Adds an edge to this structure. - * - * @param vertex - The destination vertex of the edge. That's the one - * opposite from the vertex that contains this `Edges` instance. - * @param edge - The edge. - * @param edge_type - Type of the edge. - */ - void emplace(mvcc::VersionList *vertex, mvcc::VersionList *edge, - storage::EdgeType edge_type) { - storage_.emplace_back(Element{vertex, edge, edge_type}); - } - - /** - * Removes an edge from this structure. - */ - void RemoveEdge(mvcc::VersionList *edge) { - auto found = std::find_if( - storage_.begin(), storage_.end(), - [edge](const Element &element) { return edge == element.edge; }); - // If the edge is not in the structure we don't care and can simply return - if (found == storage_.end()) return; - *found = std::move(storage_.back()); - storage_.pop_back(); - } - - auto size() const { return storage_.size(); } - auto begin() const { return Iterator(storage_.begin()); } - auto end() const { return Iterator(storage_.end()); } - - auto &storage() { return storage_; } - - /** - * Creates a beginning iterator that will skip edges whose destination - * vertex is not equal to the given vertex. - * - * @param vertex - The destination vertex Address. If empty the - * edges are not filtered on destination. - * @param edge_types - The edge types at least one of which must be matched. - * If nullptr edges are not filtered on type. - */ - auto begin(mvcc::VersionList *vertex, - const std::vector *edge_types) const { - if (edge_types && edge_types->empty()) edge_types = nullptr; - return Iterator(storage_.begin(), storage_.end(), vertex, edge_types); - } - - private: - std::vector storage_; -}; diff --git a/src/storage/single_node_ha/garbage_collector.hpp b/src/storage/single_node_ha/garbage_collector.hpp deleted file mode 100644 index 59ba5f129..000000000 --- a/src/storage/single_node_ha/garbage_collector.hpp +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once - -#include - -#include "data_structures/concurrent/skiplist.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/single_node_ha/deferred_deleter.hpp" -#include "transactions/single_node_ha/engine.hpp" - -/** - * @brief - Garbage collects deleted records. - * @tparam TCollection - type of collection. Must have a SkipList-like API - * (accessors). - * @tparam TRecord - type of underlying record in mvcc. - */ -template -class GarbageCollector { - public: - GarbageCollector( - TCollection &collection, DeferredDeleter &record_deleter, - DeferredDeleter> &version_list_deleter) - : collection_(collection), - record_deleter_(record_deleter), - version_list_deleter_(version_list_deleter) {} - - /** - * @brief - Runs garbage collector. Populates deferred deleters with version - * lists and records. - * - * @param snapshot - the GC snapshot. Consists of the oldest active - * transaction's snapshot, with that transaction's id appened as last. - * @param engine - reference to engine object - */ - void Run(const tx::Snapshot &snapshot, const tx::Engine &engine) { - auto collection_accessor = collection_.access(); - uint64_t count = 0; - std::vector::DeletedObject> - deleted_records; - std::vector< - typename DeferredDeleter>::DeletedObject> - deleted_version_lists; - for (auto id_vlist : collection_accessor) { - mvcc::VersionList *vlist = id_vlist.second; - // If the version_list is empty, i.e. there is nothing else to be read - // from it we can delete it. - auto ret = vlist->GcDeleted(snapshot, engine); - if (ret.first) { - deleted_version_lists.emplace_back(vlist, engine.LocalLast()); - count += collection_accessor.remove(id_vlist.first); - } - if (ret.second != nullptr) - deleted_records.emplace_back(ret.second, engine.LocalLast()); - } - DLOG_IF(INFO, count > 0) - << "GC started cleaning with snapshot: " << snapshot; - DLOG_IF(INFO, count > 0) << "Destroyed: " << count; - - // Add records to deleter, with the id larger or equal than the last active - // transaction. - record_deleter_.AddObjects(deleted_records); - // Add version_lists to deleter, with the id larger or equal than the last - // active transaction. - version_list_deleter_.AddObjects(deleted_version_lists); - } - - private: - TCollection &collection_; - DeferredDeleter &record_deleter_; - DeferredDeleter> &version_list_deleter_; -}; diff --git a/src/storage/single_node_ha/indexes/key_index.hpp b/src/storage/single_node_ha/indexes/key_index.hpp deleted file mode 100644 index 80743031e..000000000 --- a/src/storage/single_node_ha/indexes/key_index.hpp +++ /dev/null @@ -1,188 +0,0 @@ -#pragma once - -#include "glog/logging.h" - -#include "data_structures/concurrent/concurrent_map.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/common/index.hpp" -#include "storage/common/types/types.hpp" -#include "storage/single_node_ha/edge.hpp" -#include "storage/single_node_ha/vertex.hpp" -#include "transactions/transaction.hpp" -#include "utils/total_ordering.hpp" - -namespace database { - -/** - * @brief Implements index update and acquire. - * @Tparam TKey - underlying type by which to key objects - * @Tparam TRecord - object stored under the given key - */ -template -class KeyIndex { - public: - KeyIndex() {} - KeyIndex(const KeyIndex &other) = delete; - KeyIndex(KeyIndex &&other) = delete; - KeyIndex &operator=(const KeyIndex &other) = delete; - KeyIndex &operator=(KeyIndex &&other) = delete; - - /** - * @brief - Add record, vlist, if new, to TKey specific storage. - * @param key - TKey index to update. - * @param vlist - pointer to vlist entry to add - * @param record - pointer to record entry to add (contained in vlist) - */ - void Update(const TKey &key, mvcc::VersionList *vlist, - const TRecord *const record) { - GetKeyStorage(key)->access().insert(IndexEntry(vlist, record)); - } - - /** - * @brief - Get all the inserted vlists in TKey specific storage which - * still have that label visible in this transaction. - * @param key - key to query. - * @param t - current transaction, which determines visibility. - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @return iterable collection of vlists records with the requested - * TKey. - */ - auto GetVlists(const TKey &key, tx::Transaction &t, bool current_state) { - auto access = GetKeyStorage(key)->access(); - auto begin = access.begin(); - return index::GetVlists::Iterator, IndexEntry, - TRecord>( - std::move(access), begin, [](const IndexEntry &) { return true; }, t, - [key](const IndexEntry &, const TRecord *record) { - return KeyIndex::Exists(key, record); - }, - current_state); - } - - /** - * @brief - Return number of items in skiplist associated with the given - * TKey. This number could be imprecise because of the underlying skiplist - * storage. Use this as a hint, and not as a rule. - * Moreover, some transaction probably sees only part of the skiplist since - * not all versions are visible for it. Also, garbage collection might now - * have been run for some time so the index might have accumulated garbage. - * @param key - key to query for. - * @return number of items - */ - auto Count(const TKey &key) { return GetKeyStorage(key)->access().size(); } - - /** - * @brief - Removes from the index all entries for which records don't contain - * the given label anymore. Update all record which are not visible for any - * transaction with an id larger or equal to `id`. - * - * @param snapshot - the GC snapshot. Consists of the oldest active - * transaction's snapshot, with that transaction's id appened as last. - * @param engine - transaction engine to see which records are commited - */ - void Refresh(const tx::Snapshot &snapshot, tx::Engine &engine) { - return index::Refresh( - indices_, snapshot, engine, - [](const TKey &key, const IndexEntry &entry) { - return KeyIndex::Exists(key, entry.record_); - }); - } - - /** - * Returns a vector of keys present in this index. - */ - std::vector Keys() { - std::vector keys; - for (auto &kv : indices_.access()) keys.push_back(kv.first); - return keys; - } - - private: - /** - * @brief - Contains vlist and record pointers. - */ - class IndexEntry : public utils::TotalOrdering { - public: - IndexEntry(const IndexEntry &entry, const TRecord *const new_record) - : IndexEntry(entry.vlist_, new_record) {} - IndexEntry(mvcc::VersionList *const vlist, - const TRecord *const record) - : vlist_(vlist), record_(record) {} - - // Comparision operators - we need them to keep this sorted inside - // skiplist. - // This needs to be sorted first by vlist and second record because we - // want to keep same vlists close together since we need to filter them to - // get only the unique ones. - bool operator<(const IndexEntry &other) const { - if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_; - return this->record_ < other.record_; - } - - bool operator==(const IndexEntry &other) const { - return this->vlist_ == other.vlist_ && this->record_ == other.record_; - } - - /** - * @brief - Checks if previous IndexEntry has the same vlist as this - * IndexEntry. - * @return - true if the vlists match. - */ - bool IsAlreadyChecked(const IndexEntry &previous) const { - return previous.vlist_ == this->vlist_; - } - - mvcc::VersionList *const vlist_; - const TRecord *const record_; - }; - - /** - * @brief - Get storage for this label. Creates new - * storage if this key is not yet indexed. - * @param key - key for which to access storage. - * @return pointer to skiplist of version list records. - */ - auto GetKeyStorage(const TKey &key) { - auto access = indices_.access(); - // Avoid excessive new/delete by first checking if it exists. - auto iter = access.find(key); - if (iter == access.end()) { - auto ret = access.insert(key, std::make_unique>()); - return ret.first->second.get(); - } - return iter->second.get(); - } - - /** - * @brief - Check if Vertex contains label. - * @param label - label to check for. - * @return true if it contains, false otherwise. - */ - static bool Exists(storage::Label label, const Vertex *const v) { - DCHECK(v != nullptr) << "Vertex is nullptr."; - // We have to check for existance of label because the transaction - // might not see the label, or the label was deleted and not yet - // removed from the index. - const auto &labels = v->labels_; - return std::find(labels.begin(), labels.end(), label) != labels.end(); - } - - /** - * @brief - Check if Edge has edge_type. - * @param edge_type - edge_type to check for. - * @return true if it has that edge_type, false otherwise. - */ - static bool Exists(storage::EdgeType edge_type, const Edge *const e) { - DCHECK(e != nullptr) << "Edge is nullptr."; - // We have to check for equality of edge types because the transaction - // might not see the edge type, or the edge type was deleted and not yet - // removed from the index. - return e->edge_type_ == edge_type; - } - - ConcurrentMap>> indices_; -}; -} // namespace database diff --git a/src/storage/single_node_ha/indexes/label_property_index.hpp b/src/storage/single_node_ha/indexes/label_property_index.hpp deleted file mode 100644 index 2361b0206..000000000 --- a/src/storage/single_node_ha/indexes/label_property_index.hpp +++ /dev/null @@ -1,533 +0,0 @@ -#pragma once - -#include - -#include "data_structures/concurrent/concurrent_map.hpp" -#include "data_structures/concurrent/skiplist.hpp" -#include "storage/common/index.hpp" -#include "storage/common/types/types.hpp" -#include "storage/single_node_ha/edge.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/single_node_ha/vertex.hpp" -#include "transactions/transaction.hpp" -#include "utils/bound.hpp" -#include "utils/total_ordering.hpp" - -namespace database { - -/** - * @brief Implements LabelPropertyIndex. - * Currently this provides implementation for: - * acquiring all entries which contain the given label, and a given property - * sorted by the property value - * acquiring all non-unique entries with the given label, and property, with - * exactly one property value - */ -class LabelPropertyIndex { - public: - LabelPropertyIndex(){}; - LabelPropertyIndex(const LabelPropertyIndex &other) = delete; - LabelPropertyIndex(LabelPropertyIndex &&other) = delete; - LabelPropertyIndex &operator=(const LabelPropertyIndex &other) = delete; - LabelPropertyIndex &operator=(LabelPropertyIndex &&other) = delete; - - /** - * @brief - Contain Label + property, to be used as an index key. - */ - class Key : public utils::TotalOrdering { - public: - const storage::Label label_; - const storage::Property property_; - - Key(storage::Label label, storage::Property property) - : label_(label), property_(property) {} - - // Comparison operators - we need them to keep this sorted inside skiplist. - bool operator<(const Key &other) const { - if (this->label_ != other.label_) return this->label_ < other.label_; - return this->property_ < other.property_; - } - - bool operator==(const Key &other) const { - return this->label_ == other.label_ && this->property_ == other.property_; - } - }; - - /** - * @brief - Creates index with the given key if it doesn't exist. Note that - * you still need to populate the index with existing records. - * @return - True if it created the index, false if it already exists. - */ - bool CreateIndex(const Key &key) { - auto access = indices_.access(); - // Avoid creation if it already exists. - auto iter = access.find(key); - if (iter != access.end()) return false; - - auto ret = access.insert(key, std::make_unique>()); - return ret.second; - } - - /** - * Returns if it succeeded in deleting the index and freeing the index memory - */ - void DeleteIndex(const Key &key) { indices_.access().remove(key); } - - /** - * @brief - Updates all indexes which should contain this vertex. - * @param vlist - pointer to vlist entry to add - * @param vertex - pointer to vertex record entry to add (contained in vlist) - */ - void UpdateOnLabelProperty(mvcc::VersionList *const vlist, - const Vertex *const vertex) { - const auto &labels = vertex->labels_; - // We need to check if the given vertex can be inserted in all indexes - for (auto &index : indices_.access()) { - // Vertex has the given label - if (std::find(labels.begin(), labels.end(), index.first.label_) == - labels.end()) - continue; - auto prop = vertex->properties_.at(index.first.property_); - if (prop.type() != PropertyValue::Type::Null) { - Insert(*index.second, prop, vlist, vertex); - } - } - } - - /** - * @brief - Updates all indexes with `label` and any property in `vertex` that - * exists. - * @param label - indexes with this label might be updated if vertex contains - * the corresponding property. - * @param vlist - pointer to vlist entry to add - * @param vertex - pointer to vertex record entry to add (contained in vlist) - */ - void UpdateOnLabel(storage::Label label, - mvcc::VersionList *const vlist, - const Vertex *const vertex) { - // We need to check if the given vertex can be inserted in all indexes - for (auto &index : indices_.access()) { - if (index.first.label_ != label) continue; - auto prop = vertex->properties_.at(index.first.property_); - if (prop.type() != PropertyValue::Type::Null) { - // Property exists and vertex should be added to skiplist. - Insert(*index.second, prop, vlist, vertex); - } - } - } - - /** - * @brief - Updates all indexes with `property` and any label in `vertex` that - * exists. - * @param property - indexes with this property might be updated if vertex - * contains the corresponding label. - * @param vlist - pointer to vlist entry to add - * @param vertex - pointer to vertex record entry to add (contained in vlist) - */ - void UpdateOnProperty(storage::Property property, - mvcc::VersionList *const vlist, - const Vertex *const vertex) { - const auto &labels = vertex->labels_; - for (auto &index : indices_.access()) { - if (index.first.property_ != property) continue; - if (std::find(labels.begin(), labels.end(), index.first.label_) != - labels.end()) { - // Label exists and vertex should be added to skiplist. - Insert(*index.second, vertex->properties_.at(property), vlist, vertex); - } - } - } - - /** - * @brief - Get all the inserted vlists in key specific storage which still - * have that label and property visible in this transaction. - * @param key - Label+Property to query. - * @param t - current transaction, which determines visibility. - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @return iterable collection of vlists of vertex records with the requested - * key sorted ascendingly by the property value. - */ - auto GetVlists(const Key &key, const tx::Transaction &t, bool current_state) { - DCHECK(IndexExists(key)) << "Index not yet ready."; - auto access = GetKeyStorage(key)->access(); - auto begin = access.begin(); - return index::GetVlists::Iterator, IndexEntry, - Vertex, SkipList>( - std::move(access), begin, [](const IndexEntry &) { return true; }, t, - [key](const IndexEntry &entry, const Vertex *const vertex) { - return LabelPropertyIndex::Exists(key, entry.value_, vertex); - }, - current_state); - } - - /** - * @brief - Get all the inserted vlists in key specific storage which still - * have that label and property visible in this transaction with property - * value equal to 'value'. - * @param key - Label+Property to query. - * @param value - vlists with this value will be returned - * @param t - current transaction, which determines visibility. - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @return iterable collection of vlists of vertex records with the requested - * key and value - */ - auto GetVlists(const Key &key, const PropertyValue &value, - const tx::Transaction &t, bool current_state) { - DCHECK(IndexExists(key)) << "Index not yet ready."; - auto access = GetKeyStorage(key)->access(); - auto min_ptr = std::numeric_limits::min(); - auto start_iter = access.find_or_larger(IndexEntry( - value, reinterpret_cast *>(min_ptr), - reinterpret_cast(min_ptr))); - return index::GetVlists::Iterator, IndexEntry, - Vertex>( - std::move(access), start_iter, - [value](const IndexEntry &entry) { - return !IndexEntry::Less(value, entry.value_) && - !IndexEntry::Less(entry.value_, value); - }, - t, - [key](const IndexEntry &entry, const Vertex *const vertex) { - return LabelPropertyIndex::Exists(key, entry.value_, vertex); - }, - current_state); - } - - /** - * Get an iterable over all mvcc::VersionLists that are contained in this - * index and satisfy the given bounds. - * - * The returned iterator will only contain vertices/edges whose property value - * is comparable with the given bounds (w.r.t. type). This has implications on - * Cypher query execuction semantics which have not been resolved yet. - * - * At least one of the bounds must be specified. Bounds can't be @c - * PropertyValue::Null. If both bounds are specified, their PropertyValue - * elements must be of comparable types. - * - * @param key - Label+Property to query. - * @param lower - Lower bound of the interval. - * @param upper - Upper bound of the interval. - * @param t - current transaction, which determines visibility. - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - * @return iterable collection of mvcc:VersionLists pointers that - * satisfy the bounds and are visible to the given transaction. - */ - auto GetVlists(const Key &key, - const std::optional> lower, - const std::optional> upper, - const tx::Transaction &transaction, bool current_state) { - DCHECK(IndexExists(key)) << "Index not yet ready."; - - auto type = [](const auto &bound) { return bound.value().value().type(); }; - CHECK(lower || upper) << "At least one bound must be provided"; - CHECK(!lower || type(lower) != PropertyValue::Type::Null) - << "Null value is not a valid index bound"; - CHECK(!upper || type(upper) != PropertyValue::Type::Null) - << "Null value is not a valid index bound"; - - // helper function for creating a bound with an IndexElement - auto make_index_bound = [](const auto &optional_bound, bool bottom) { - std::uintptr_t ptr_bound = - bottom ? std::numeric_limits::min() - : std::numeric_limits::max(); - return IndexEntry( - optional_bound.value().value(), - reinterpret_cast *>(ptr_bound), - reinterpret_cast(ptr_bound)); - }; - - auto access = GetKeyStorage(key)->access(); - - // create the iterator startpoint based on the lower bound - auto start_iter = lower - ? access.find_or_larger(make_index_bound( - lower, lower.value().IsInclusive())) - : access.begin(); - - // a function that defines if an entry staisfies the filtering predicate. - // since we already handled the lower bound, we only need to deal with the - // upper bound and value type - std::function predicate; - if (lower && upper && - !AreComparablePropertyValueTypes(type(lower), type(upper))) - predicate = [](const IndexEntry &) { return false; }; - else if (upper) { - auto upper_index_entry = - make_index_bound(upper, upper.value().IsExclusive()); - predicate = [upper_index_entry](const IndexEntry &entry) { - return AreComparablePropertyValueTypes( - entry.value_.type(), upper_index_entry.value_.type()) && - entry < upper_index_entry; - }; - } else { - auto lower_type = type(lower); - make_index_bound(lower, lower.value().IsExclusive()); - predicate = [lower_type](const IndexEntry &entry) { - return AreComparablePropertyValueTypes(entry.value_.type(), lower_type); - }; - } - - return index::GetVlists::Iterator, IndexEntry, - Vertex>( - std::move(access), start_iter, predicate, transaction, - [key](const IndexEntry &entry, const Vertex *const vertex) { - return LabelPropertyIndex::Exists(key, entry.value_, vertex); - }, - current_state); - } - - /** - * @brief - Check for existance of index. - * @param key - Index key - * @return true if the index with that key exists - */ - bool IndexExists(const Key &key) { - auto access = indices_.access(); - return access.find(key) != access.end(); - } - - /** - * @brief - Return number of items in skiplist associated with the given - * key. This number could be imprecise because of the underlying skiplist - * storage. Use this as a hint, and not as a rule. Fails if index doesn't - * exist. - * Moreover, some transaction probably sees only part of the skiplist since - * not all versions are visible for it. Also, garbage collection might now - * have been run for some time so the index might have accumulated garbage. - * @param key - key to query for. - * @return number of items - */ - int64_t Count(const Key &key) { - auto index = GetKeyStorage(key); - CHECK(index != nullptr) << "Index doesn't exist."; - return index->access().size(); - } - - /** - * Returns the approximate position and count of the given value in the - * index for the given Key. - * - * Both are approximations for several reasons. Initially the position - * and count are obtained from the skipist (the index) and as such are - * not exact for perfromance reasons. At the same time the position - * and count are calculated based on property value comparison: an - * additional error is accumulated because the index could contain - * the same vertex with the same value multiple times, - * as well as the same vertex with different values. - */ - auto PositionAndCount(const Key &key, const PropertyValue &value) { - auto access = GetKeyStorage(key)->access(); - return access.position_and_count( - value, - // the 'less' function - [](const PropertyValue &a, const IndexEntry &b) { - return IndexEntry::Less(a, b.value_); - }, - // the 'equal_to' function - [](const PropertyValue &a, const IndexEntry &b) { - return !(IndexEntry::Less(a, b.value_) || - IndexEntry::Less(b.value_, a)); - }); - } - - /** - * @brief - Removes from the index all entries for which records don't contain - * the given label anymore, or the record was deleted before this transaction - * id. - * - * @param snapshot - the GC snapshot. Consists of the oldest active - * transaction's snapshot, with that transaction's id appened as last. - */ - void Refresh(const tx::Snapshot &snapshot, tx::Engine &engine) { - return index::Refresh( - indices_, snapshot, engine, - [](const Key &key, const IndexEntry &entry) { - return LabelPropertyIndex::Exists(key, entry.value_, entry.record_); - }); - } - - /** - * Returns a vector of keys present in this index. - */ - std::vector Keys() { - std::vector keys; - for (auto &kv : indices_.access()) keys.push_back(kv.first); - return keys; - } - - private: - static bool AreComparablePropertyValueTypes(PropertyValue::Type a, - PropertyValue::Type b) { - auto is_numeric = [](const PropertyValue::Type t) { - return t == PropertyValue::Type::Int || t == PropertyValue::Type::Double; - }; - - return a == b || (is_numeric(a) && is_numeric(b)); - } - - /** - * @brief - Contains value, vlist and vertex record to distinguish between - * index entries. - */ - class IndexEntry : public utils::TotalOrdering { - public: - IndexEntry(const IndexEntry &entry, const Vertex *new_record) - : IndexEntry(entry.value_, entry.vlist_, new_record) {} - IndexEntry(const PropertyValue &value, mvcc::VersionList *vlist, - const Vertex *record) - : value_(value), vlist_(vlist), record_(record) {} - - // Comparision operators - we need them to keep this sorted inside - // skiplist. - bool operator<(const IndexEntry &other) const { - bool this_value_smaller = Less(this->value_, other.value_); - if (this_value_smaller || Less(other.value_, this->value_)) - return this_value_smaller; - if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_; - return this->record_ < other.record_; - } - - bool operator==(const IndexEntry &other) const { - return !(*this < other) && !(other < *this); - } - - /** - * @brief - For two property values - orders the records by type and then by - * value. Except for integers and doubles - those are both converted to - * double and then compared. - * @return true if the first property value is smaller( should be before) - * than the second one - */ - static bool Less(const PropertyValue &a, const PropertyValue &b) { - if (!AreComparablePropertyValueTypes(a.type(), b.type())) - return a.type() < b.type(); - - if (a.type() == b.type()) { - switch (a.type()) { - case PropertyValue::Type::Null: - return false; - case PropertyValue::Type::String: - return a.ValueString() < b.ValueString(); - case PropertyValue::Type::Bool: - return a.ValueBool() < b.ValueBool(); - case PropertyValue::Type::Int: - return a.ValueInt() < b.ValueInt(); - case PropertyValue::Type::Double: - return a.ValueDouble() < b.ValueDouble(); - case PropertyValue::Type::List: { - auto va = a.ValueList(); - auto vb = b.ValueList(); - if (va.size() != vb.size()) return va.size() < vb.size(); - return lexicographical_compare(va.begin(), va.end(), vb.begin(), - vb.end(), Less); - } - case PropertyValue::Type::Map: { - auto ma = a.ValueMap(); - auto mb = b.ValueMap(); - if (ma.size() != mb.size()) return ma.size() < mb.size(); - const auto cmp = [](const auto &a, const auto &b) { - if (a.first != b.first) - return a.first < b.first; - else - return Less(a.second, b.second); - }; - return lexicographical_compare(ma.begin(), ma.end(), mb.begin(), - mb.end(), cmp); - } - } - } - - // helper for getting a double from PropertyValue, if possible - auto get_double = [](const PropertyValue &value) { - DCHECK(value.type() == PropertyValue::Type::Int || - value.type() == PropertyValue::Type::Double) - << "Invalid data type."; - if (value.type() == PropertyValue::Type::Int) - return static_cast(value.ValueInt()); - return value.ValueDouble(); - }; - - // Types are int and double - convert int to double - return get_double(a) < get_double(b); - } - - /** - * @brief - Check if previous IndexEntry represents the same vlist/value - * pair. - * @return - true if IndexEntries are equal by the vlist/value pair. - */ - bool IsAlreadyChecked(const IndexEntry &previous) const { - return previous.vlist_ == this->vlist_ && - !Less(previous.value_, this->value_) && - !Less(this->value_, previous.value_); - } - - const PropertyValue value_; - mvcc::VersionList *const vlist_{nullptr}; - const Vertex *const record_{nullptr}; - }; - - /** - * @brief - Insert value, vlist, vertex into corresponding index (key) if - * the index exists. - * @param index - into which index to add - * @param value - value which to add - * @param vlist - pointer to vlist entry to add - * @param vertex - pointer to vertex record entry to add (contained in - * vlist) - */ - void Insert(SkipList &index, const PropertyValue &value, - mvcc::VersionList *const vlist, - const Vertex *const vertex) { - index.access().insert(IndexEntry{value, vlist, vertex}); - } - - /** - * @brief - Get storage for this key. - * @param key - Label and and property for which to query. - * @return pointer to skiplist of IndexEntries, if none which matches key - * exists return nullptr - */ - SkipList *GetKeyStorage(const Key &key) { - auto access = indices_.access(); - auto iter = access.find(key); - if (iter == access.end()) return nullptr; - return iter->second.get(); - } - - /** - * @brief - Check if Vertex contains label and property with the given - * value. - * @param key - label and property to check for. - * @param value - value of property to compare - * @return true if it contains, false otherwise. - */ - static bool Exists(const Key &key, const PropertyValue &value, - const Vertex *const v) { - DCHECK(v != nullptr) << "Vertex is nullptr."; - // We have to check for existance of label because the transaction - // might not see the label, or the label was deleted and not yet - // removed from the index. - const auto &labels = v->labels_; - if (std::find(labels.begin(), labels.end(), key.label_) == labels.end()) - return false; - auto prop = v->properties_.at(key.property_); - // Property doesn't exists. - if (prop.type() == PropertyValue::Type::Null) return false; - // Property value is the same as expected. - return !IndexEntry::Less(prop, value) && !IndexEntry::Less(value, prop); - } - - ConcurrentMap>> indices_; -}; -} // namespace database diff --git a/src/storage/single_node_ha/mvcc/record.hpp b/src/storage/single_node_ha/mvcc/record.hpp deleted file mode 100644 index 3d81626ee..000000000 --- a/src/storage/single_node_ha/mvcc/record.hpp +++ /dev/null @@ -1,334 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "transactions/commit_log.hpp" -#include "transactions/single_node_ha/engine.hpp" -#include "transactions/transaction.hpp" - -#include "storage/common/locking/record_lock.hpp" -#include "storage/common/mvcc/version.hpp" - -// the mvcc implementation used here is very much like postgresql's -// more info: https://momjian.us/main/writings/pgsql/mvcc.pdf - -namespace mvcc { - -template -class Record : public Version { - public: - Record() = default; - Record(const Record &) = delete; - Record &operator=(const Record &) = delete; - Record(Record &&) = delete; - Record &operator=(Record &&) = delete; - - // check if this record is visible to the transaction t - bool visible(const tx::Transaction &t) { - // Mike Olson says 17 march 1993: the tests in this routine are correct; - // if you think they're not, you're wrong, and you should think about it - // again. i know, it happened to me. - - // fetch expiration info in a safe way (see fetch_exp for details) - tx::TransactionId tx_exp; - tx::CommandId cmd_exp; - std::tie(tx_exp, cmd_exp) = fetch_exp(); - - return ((tx_.cre == t.id_ && // inserted by the current transaction - cmd_.cre < t.cid() && // before this command, and - (tx_exp == 0 || // the row has not been deleted, or - (tx_exp == t.id_ && // it was deleted by the current - // transaction - cmd_exp >= t.cid()))) // but not before this command, - || // or - (visible_from(Hints::kCre, tx_.cre, - t) && // the record was inserted by a - // committed transaction, and - (tx_exp == 0 || // the record has not been deleted, or - (tx_exp == t.id_ && // the row is being deleted by this - // transaction - cmd_exp >= t.cid()) || // but it's not deleted "yet", or - (tx_exp != t.id_ && // the row was deleted by another - // transaction - !visible_from(Hints::kExp, tx_exp, - t) // that has not been committed - )))); - } - - void mark_created(const tx::Transaction &t) { - DCHECK(tx_.cre == 0) << "Marking node as created twice."; - tx_.cre = t.id_; - cmd_.cre = t.cid(); - } - - void mark_expired(const tx::Transaction &t) { - tx_.exp = t.id_; - cmd_.exp = t.cid(); - } - - bool exp_committed(tx::Engine &engine) { - return committed(Hints::kExp, engine); - } - - /** - * Check if this record is visible w.r.t. to the given garbage collection - * snapshot. See source comments for exact logic. - * - * @param snapshot - the GC snapshot. Consists of the oldest active - * transaction's snapshot, with that transaction's id appened as last. - */ - bool is_not_visible_from(const tx::Snapshot &snapshot, - const tx::Engine &engine) const { - // first get tx.exp so that all the subsequent checks operate on - // the same id. otherwise there could be a race condition - auto exp_id = tx_.exp.load(); - - // a record is NOT visible if: - // 1. it creating transaction aborted (last check), and is also older than - // the current oldest active transaction (optimization) OR - // 2. a) it's expiration is not 0 (some transaction expired it) - // AND - // b) the expiring transaction is older than latest active - // AND - // c) that transaction committed (as opposed to aborted) - // AND - // d) that transaction is not in oldest active transaction's - // snapshot (consequently also not in the snapshots of - // newer transactions) - return (exp_id != 0 && exp_id < snapshot.back() && - committed(Hints::kExp, engine) && !snapshot.contains(exp_id)) || - (tx_.cre.load() < snapshot.back() && cre_aborted(engine)); - } - - // TODO: Test this - // True if this record is visible for write. - // Note that this logic is different from the one above - // in the sense that a record is visible if created before - // OR DURING this command. this is done to support cypher's - // queries which can match, update and return in the same query - bool is_visible_write(const tx::Transaction &t) { - // fetch expiration info in a safe way (see fetch_exp for details) - tx::TransactionId tx_exp; - tx::CommandId cmd_exp; - std::tie(tx_exp, cmd_exp) = fetch_exp(); - - return (tx_.cre == t.id_ && // inserted by the current transaction - cmd_.cre <= t.cid() && // before OR DURING this command, and - (tx_exp == 0 || // the row has not been deleted, or - (tx_exp == t.id_ && // it was deleted by the current - // transaction - cmd_exp >= t.cid()))); // but not before this command, - } - - /** - * True if this record is created in the current command - * of the given transaction. - */ - bool is_created_by(const tx::Transaction &t) { - return tx_.cre == t.id_ && cmd_.cre == t.cid(); - } - - /** - * True if this record is expired in the current command - * of the given transaction. - */ - bool is_expired_by(const tx::Transaction &t) const { - return std::make_pair(t.id_, t.cid()) == fetch_exp(); - } - - const auto &tx() const { return tx_; } - const auto &cmd() const { return cmd_; } - - /** - * Makes sure that create and expiry are in sync with hints if they are - * committed or aborted and are before the `tx_cutoff`. - * `tx_cutoff` exists as a performance optimization to avoid setting hint bits - * on records for which we don't need to have a guarantee that they are set as - * part of GC hints setting procedure - */ - void populate_hints(const tx::Engine &engine, tx::TransactionId tx_cutoff) { - populate_hint_if_possible(engine, Hints::kCre, tx_cutoff); - if (!populate_hint_if_possible(engine, Hints::kExp, tx_cutoff)) { - // Exp is aborted and we can't set the hint, this way we don't have to set - // the hint because an aborted transaction which expires a record is the - // same thing as a non-expired record - tx::TransactionId expected; - do { - expected = tx_.exp; - // If the transaction expiry is no longer aborted we don't need to - // update it anymore, and hints can't be set since it's obviously an - // active transaction - there might be a case where this transaction - // gets finished and committed in the meantime and hints could be set, - // but since we are not going to delete info for this transaction from - // the commit log since it wasn't older than the oldest active - // transaction at the time, or before the invocation of this method; - // we are in the clear - if (!engine.Info(expected).is_aborted()) break; - } while (!tx_.exp.compare_exchange_weak(expected, 0)); - // Ideally we should set the command id as well, but by setting it we - // can't guarantee that some new update won't change the transaction id - // and command id before we had a chance to set it, and just leaving it - // unchanged and relying on all methods to operate on [tx_id: 0, cmd_id: - // some cmd] as a non-transaction doesn't seem too crazy - } - } - - private: - /** - * Fast indicators if a transaction has committed or aborted. It is possible - * the hints do not have that information, in which case the commit log needs - * to be consulted (a slower operation). - */ - class Hints { - public: - /// Masks for the creation/expration and commit/abort positions. - static constexpr uint8_t kCre = 0b0011; - static constexpr uint8_t kExp = 0b1100; - static constexpr uint8_t kCmt = 0b0101; - static constexpr uint8_t kAbt = 0b1010; - - /** Returns true if any bit under the given mask is set. */ - bool Get(uint8_t mask) const { return bits_ & mask; } - - /** Sets all the bits under the given mask. */ - void Set(uint8_t mask) { bits_.fetch_or(mask); } - - /** Clears all the bits under the given mask. */ - void Clear(uint8_t mask) { bits_.fetch_and(~mask); } - - private: - std::atomic bits_{0}; - }; - - template - struct CreExp { - std::atomic cre{0}; - std::atomic exp{0}; - }; - - // tx.cre is the id of the transaction that created the record - // and tx.exp is the id of the transaction that deleted the record - // These values are used to determine the visibility of the record - // to the current transaction. - CreExp tx_; - - // cmd.cre is the id of the command in this transaction that created the - // record and cmd.exp is the id of the command in this transaction that - // deleted the record. These values are used to determine the visibility - // of the record to the current command in the running transaction. - CreExp cmd_; - - mutable Hints hints_; - /** Fetch the (transaction, command) expiration before the check - * because they can be concurrently modified by multiple transactions. - * Do it in a loop to ensure that command is consistent with transaction. - */ - auto fetch_exp() const { - tx::TransactionId tx_exp; - tx::CommandId cmd_exp; - do { - tx_exp = tx_.exp; - cmd_exp = cmd_.exp; - } while (tx_exp != tx_.exp); - return std::make_pair(tx_exp, cmd_exp); - } - - /** - * Populates hint if it is not set for the given create/expiry mask and is - * before the `tx_cutoff` if specified. Note that it doesn't set hint bits for - * expiry transactions which abort because it's too expensive to maintain - * correctness of those hints with regards to race conditions - * @returns - true if hints are now equal to transaction status - * (committed/aborted), will only be false if we are trying to set hint for - * aborted transaction which is this records expiry - */ - bool populate_hint_if_possible( - const tx::Engine &engine, const uint8_t mask, - const std::optional tx_cutoff = std::nullopt) const { - DCHECK(mask == Hints::kCre || mask == Hints::kExp) - << "Mask should be either for creation or expiration"; - if (hints_.Get(mask)) return true; - auto id = mask == Hints::kCre ? tx_.cre.load() : tx_.exp.load(); - // Nothing to do here if there is no id or id is larger than tx_cutoff - if (!id || (tx_cutoff && id >= *tx_cutoff)) return true; - auto info = engine.Info(id); - if (info.is_committed()) { - hints_.Set(mask & Hints::kCmt); - } else if (info.is_aborted()) { - // Abort hints can only be updated for creation hints because only one - // transaction can be creating a single record, so there is no races - if (mask == Hints::kCre) - hints_.Set(mask & Hints::kAbt); - else - return false; - } - return true; - } - - /** - * @brief - Check if the transaciton `id` has comitted before `t` started - * (that means that edits done by transaction `id` are visible in `t`) - * - * Evaluates to true if that transaction has committed, - * it started before `t` and it's not in it's snapshot. - * - * about transactions commit/abort status - * @param mask - Hint bits mask (either Hints::kCre or Hints::kExp). - * @param id - id to check if it's commited and visible - * @return true if the id is commited and visible for the transaction t. - */ - bool visible_from(uint8_t mask, tx::TransactionId id, - const tx::Transaction &t) { - DCHECK(mask == Hints::kCre || mask == Hints::kExp) - << "Mask must be either kCre or kExp"; - // Dominik Gleich says 4 april 2017: the tests in this routine are correct; - // if you think they're not, you're wrong, and you should think about it - // again. I know, it happened to me (and also to Matej Gradicek). - - // You certainly can't see the transaction with id greater than yours as - // that means it started after this transaction and if it commited, it - // commited after this transaction has started. - if (id >= t.id_) return false; - - // The creating transaction is still in progress (examine snapshot) - if (t.snapshot().contains(id)) return false; - - return committed(mask, t.engine_); - } - - /** - * @brief - Check if the transaction with the given `id` is committed. - * - * @param mask - Hint bits mask (either Hints::kCre or Hints::kExp). - * @param id - id to check if commited - - * statuses - * @return true if it's commited, false otherwise - */ - bool committed(uint8_t mask, const tx::Engine &engine) const { - DCHECK(mask == Hints::kCre || mask == Hints::kExp) - << "Mask must be either kCre or kExp"; - populate_hint_if_possible(engine, mask); - return hints_.Get(Hints::kCmt & mask); - } - - /** - * @brief - Check if tx_.cre is aborted. If you need to check for exp - * transaction do it manually by looking at commit log. This function can't do - * that for you since hints can't be used for exp transaction (reason is - * described in function above). - * - * @param engine - engine instance with information about transaction - * statuses - * @return true if it's aborted, false otherwise - */ - bool cre_aborted(const tx::Engine &engine) const { - // Populate hints if not set and return result from hints - DCHECK(populate_hint_if_possible(engine, Hints::kCre)) - << "Hints not populated"; - return hints_.Get(Hints::kAbt & Hints::kCre); - } -}; -} // namespace mvcc diff --git a/src/storage/single_node_ha/mvcc/version_list.hpp b/src/storage/single_node_ha/mvcc/version_list.hpp deleted file mode 100644 index e44e6dda6..000000000 --- a/src/storage/single_node_ha/mvcc/version_list.hpp +++ /dev/null @@ -1,261 +0,0 @@ -#pragma once - -#include "storage/common/locking/record_lock.hpp" -#include "storage/common/mvcc/exceptions.hpp" -#include "storage/common/types/types.hpp" -#include "transactions/transaction.hpp" -#include "utils/exceptions.hpp" - -namespace mvcc { - -template -class VersionList { - public: - /** - * @brief Constructor that is used to insert one item into VersionList. - * - * @param t - transaction - * @param gid - Version list identifier. Uniqueness guaranteed by the code - * creating this version list. - * @param args - args forwarded to constructor of item T (for - * creating the first Record (Version) in this VersionList. - */ - template - VersionList(const tx::Transaction &t, storage::Gid gid, Args &&... args) - : gid_(gid) { - // TODO replace 'new' with something better - auto *v1 = new T(std::forward(args)...); - v1->mark_created(t); - head_ = v1; - } - - VersionList() = delete; - VersionList(const VersionList &) = delete; - VersionList &operator=(const VersionList &) = delete; - // We do a lot of raw-pointer ops with VLists, and these ops assume that a - // VList's address identifies a vertex/edge absolutely and during it's whole - // lifteme. We also assume that the VList owner is the database and that - // ownership is also handled via raw pointers so this shouldn't be moved or - // move assigned. - VersionList(VersionList &&other) = delete; - VersionList &operator=(VersionList &&other) = delete; - - ~VersionList() { delete head_.load(); } - - friend std::ostream &operator<<(std::ostream &stream, - const VersionList &vlist) { - stream << "VersionList" << std::endl; - - T *record = vlist.head_; - - while (record != nullptr) { - stream << "-- " << *record << std::endl; - record = record->next(); - } - - return stream; - } - - /** - * Garbage collects records that are not reachable/visible anymore. - * - * Relinks this version-list so that garbage collected records are no - * longer reachable through this version list. - * Visibility is defined in mvcc::Record::is_not_visible_from, - * to which the given `snapshot` is passed. - * - * This method is NOT thread-safe. - * - * @param snapshot - the GC snapshot. Consists of the oldest active - * transaction's snapshot, with that transaction's id appened as last. - * @param engine - transaction engine to use - we need it to check which - * records were commited and which weren't - * @return pair; status is true - If version list is empty - * after garbage collection. to_delete points to the newest record that is not - * visible anymore. If none exists to_delete will point to nullptr. - */ - std::pair GcDeleted(const tx::Snapshot &snapshot, - const tx::Engine &engine) { - // nullptr - // | - // [v1] ... all of this gets deleted! - // | - // [v2] <------+ head_of_deletable_records - // | | - // [v3] <------+ oldest_visible_record - // | | Jump backwards until you find the oldest visible - // [VerList] ----+ record, or you reach the end of the list - // - - T *head = head_; - T *current = head; - T *oldest_visible_record = nullptr; - while (current) { - // Populate hints only when needed to avoid excessive rpc calls on - // workers. - // snapshot.back() corresponds to the oldest active transaction, - // and this makes it set only hint bits when the creating or expiring - // transaction of a record is older than that) - current->populate_hints(engine, snapshot.back()); - if (!current->is_not_visible_from(snapshot, engine)) - oldest_visible_record = current; - current = current->next(); - } - - if (oldest_visible_record) { - T *head_of_deletable_records = oldest_visible_record->next(); - // oldest_visible_record might be visible to some transaction but - // head_of_deletable_records is not and will never be visted by the find - // function and as such doesn't represent pointer invalidation - // race-condition risk. - oldest_visible_record->next(nullptr); // No transaction will look - // further than this record and - // that's why it's safe to set - // next to nullptr. - // Calling destructor of head_of_deletable_records will clean everything - // older than this record since they are called recursively. - return std::make_pair(false, head_of_deletable_records); - } - - // This can happen only if the head points to a expired record. Since there - // is no visible records in this version_list we can remove it. - head_ = nullptr; - // This is safe to return as ready for deletion since we unlinked head - // above and this will only be deleted after the last active transaction - // ends. - return std::make_pair(true, head); - } - - /** - * @brief - returns oldest record - * @return nullptr if none exist - */ - T *Oldest() { - T *r = head_; - while (r && r->next(std::memory_order_seq_cst)) - r = r->next(std::memory_order_seq_cst); - return r; - } - - T *find(const tx::Transaction &t) { - T *r = head_; - - // nullptr - // | - // [v1] ... - // | - // [v2] <------+ - // | | - // [v3] <------+ - // | | Jump backwards until you find a first visible - // [VerList] ----+ version, or you reach the end of the list - // - while (r != nullptr && !r->visible(t)) - r = r->next(std::memory_order_seq_cst); - - return r; - } - - /** - * Looks for and sets two versions. The 'old' version is the - * newest version that is visible by the current transaction+command, - * but has not been created by it. The 'new' version is the version - * that has been created by current transaction+command. - * - * It is possible that both, either or neither are found: - * - both are found when an existing record has been modified - * - only old is found when an existing record has not been modified - * - only new is found when the whole vlist was created - * - neither is found when for example the record has been deleted but not - * garbage collected yet - * - * @param t The transaction - */ - void find_set_old_new(const tx::Transaction &t, T **old_ref, T **new_ref) { - // assume that the sought old record is further down the list - // from new record, so that if we found old we can stop looking - *new_ref = nullptr; - *old_ref = head_; - while (*old_ref != nullptr && !(*old_ref)->visible(t)) { - if (!*new_ref && (*old_ref)->is_created_by(t)) *new_ref = *old_ref; - *old_ref = (*old_ref)->next(std::memory_order_seq_cst); - } - } - - /** - * Looks for the first visible record seen by this transaction. If the current - * transaction has already created new record in the current command then that - * record is returned, else first older visible record is updated. New record - * becomes head of the version list and it is returned. There should always be - * older visible record when this update is called. - * - * @param t The transaction - */ - T *update(const tx::Transaction &t) { - DCHECK(head_ != nullptr) << "Head is nullptr on update."; - T *old_record = nullptr; - T *new_record = nullptr; - find_set_old_new(t, &old_record, &new_record); - - // check if current transaction in current cmd has - // already updated version list - if (new_record) return new_record; - - // check if we found any visible records - CHECK(old_record != nullptr) << "Updating nullptr record"; - - return update(old_record, t); - } - - /** Makes the given record as being expired by the given transaction. */ - void remove(T *record, const tx::Transaction &t) { - DCHECK(record != nullptr) << "Record is nullptr on removal."; - lock_and_validate(record, t); - record->mark_expired(t); - } - - const storage::Gid gid_; - - int64_t cypher_id() { return gid_.AsInt(); } - - private: - void lock_and_validate(T *record, const tx::Transaction &t) { - DCHECK(record != nullptr) << "Record is nullptr on lock and validation."; - - // take a lock on this node - t.TakeLock(lock_); - - // if the record hasn't been deleted yet or the deleting transaction - // has aborted, it's ok to modify it - if (!record->tx().exp || !record->exp_committed(t.engine_)) return; - - // if it committed, then we have a serialization conflict - throw SerializationError(); - } - - T *update(T *record, const tx::Transaction &t) { - DCHECK(record != nullptr) << "Record is nullptr on update."; - lock_and_validate(record, t); - - // It could be done with unique_ptr but while this could mean memory - // leak on exception, unique_ptr could mean use after free. Memory - // leak is less dangerous. - auto *updated = record->CloneData(); - - updated->mark_created(t); - record->mark_expired(t); - - // Updated version should point to the latest available version. Older - // versions that can be deleted will be removed during the GC phase. - updated->next(head_.load(), std::memory_order_seq_cst); - - // Store the updated version as the first version point to by head. - head_.store(updated, std::memory_order_seq_cst); - - return updated; - } - - std::atomic head_{nullptr}; - RecordLock lock_; -}; -} // namespace mvcc diff --git a/src/storage/single_node_ha/record_accessor.cpp b/src/storage/single_node_ha/record_accessor.cpp deleted file mode 100644 index 010983e68..000000000 --- a/src/storage/single_node_ha/record_accessor.cpp +++ /dev/null @@ -1,174 +0,0 @@ -#include "storage/single_node_ha/record_accessor.hpp" - -#include - -#include "database/single_node_ha/graph_db_accessor.hpp" -#include "durability/single_node_ha/state_delta.hpp" -#include "storage/single_node_ha/edge.hpp" -#include "storage/single_node_ha/vertex.hpp" - -using database::StateDelta; - -template -RecordAccessor::RecordAccessor(mvcc::VersionList *address, - database::GraphDbAccessor &db_accessor) - : db_accessor_(&db_accessor), address_(address) {} - -template -PropertyValue RecordAccessor::PropsAt(storage::Property key) const { - return current().properties_.at(key); -} - -template <> -void RecordAccessor::PropsSet(storage::Property key, - PropertyValue value) { - auto &dba = db_accessor(); - auto delta = StateDelta::PropsSetVertex(dba.transaction_id(), gid(), key, - dba.PropertyName(key), value); - auto previous_value = PropsAt(key); - update().properties_.set(key, value); - dba.UpdateOnAddProperty(key, previous_value, value, *this, &update()); - dba.sd_buffer()->Emplace(delta); -} - -template <> -void RecordAccessor::PropsSet(storage::Property key, - PropertyValue value) { - auto &dba = db_accessor(); - auto delta = StateDelta::PropsSetEdge(dba.transaction_id(), gid(), key, - dba.PropertyName(key), value); - - update().properties_.set(key, value); - dba.sd_buffer()->Emplace(delta); -} - -template <> -void RecordAccessor::PropsErase(storage::Property key) { - auto &dba = db_accessor(); - auto delta = - StateDelta::PropsSetVertex(dba.transaction_id(), gid(), key, - dba.PropertyName(key), PropertyValue()); - auto previous_value = PropsAt(key); - update().properties_.set(key, PropertyValue()); - dba.UpdateOnRemoveProperty(key, previous_value, *this, &update()); - dba.sd_buffer()->Emplace(delta); -} - -template <> -void RecordAccessor::PropsErase(storage::Property key) { - auto &dba = db_accessor(); - auto delta = - StateDelta::PropsSetEdge(dba.transaction_id(), gid(), key, - dba.PropertyName(key), PropertyValue()); - update().properties_.set(key, PropertyValue()); - dba.sd_buffer()->Emplace(delta); -} - -template -void RecordAccessor::PropsClear() { - std::vector to_remove; - for (const auto &kv : update().properties_) to_remove.emplace_back(kv.first); - for (const auto &prop : to_remove) { - PropsErase(prop); - } -} - -template -const PropertyValueStore &RecordAccessor::Properties() const { - return current().properties_; -} - -template -bool RecordAccessor::operator==(const RecordAccessor &other) const { - DCHECK(db_accessor_->transaction_id() == other.db_accessor_->transaction_id()) - << "Not in the same transaction."; - return address_ == other.address_; -} - -template -database::GraphDbAccessor &RecordAccessor::db_accessor() const { - return *db_accessor_; -} - -template -storage::Gid RecordAccessor::gid() const { - return address_->gid_; -} - -template -typename mvcc::VersionList *RecordAccessor::address() const { - return address_; -} - -template -RecordAccessor &RecordAccessor::SwitchNew() { - if (!new_) { - // if new_ is not set yet, look for it - // we can just Reconstruct the pointers, old_ will get initialized - // to the same value as it has now, and the amount of work is the - // same as just looking for a new_ record - if (!Reconstruct()) - DLOG(FATAL) - << "RecordAccessor::SwitchNew - accessor invalid after Reconstruct"; - } - current_ = new_ ? new_ : old_; - return *this; -} - -template -RecordAccessor &RecordAccessor::SwitchOld() { - current_ = old_ ? old_ : new_; - return *this; -} - -template -bool RecordAccessor::Reconstruct() const { - auto &dba = db_accessor(); - const auto &addr = address(); - addr->find_set_old_new(dba.transaction(), &old_, &new_); - current_ = old_ ? old_ : new_; - return old_ != nullptr || new_ != nullptr; -} - -template -TRecord &RecordAccessor::update() const { - auto &dba = db_accessor(); - // Edges have lazily initialize mutable, versioned data (properties). - if (std::is_same::value && current_ == nullptr) { - bool reconstructed = Reconstruct(); - DCHECK(reconstructed) << "Unable to initialize record"; - } - - const auto &t = dba.transaction(); - if (!new_ && old_->is_expired_by(t)) - throw RecordDeletedError(); - else if (new_ && new_->is_expired_by(t)) - throw RecordDeletedError(); - - if (new_) return *new_; - - const auto &addr = address(); - new_ = addr->update(dba.transaction()); - - DCHECK(new_ != nullptr) << "RecordAccessor.new_ is null after update"; - return *new_; -} - -template -int64_t RecordAccessor::CypherId() const { - return address()->cypher_id(); -} - -template -const TRecord &RecordAccessor::current() const { - // Edges have lazily initialize mutable, versioned data (properties). - if (std::is_same::value && current_ == nullptr) { - bool reconstructed = Reconstruct(); - DCHECK(reconstructed) << "Unable to initialize record"; - } - DCHECK(current_ != nullptr) << "RecordAccessor.current_ pointer is nullptr"; - return *current_; -} - -template class RecordAccessor; -template class RecordAccessor; diff --git a/src/storage/single_node_ha/record_accessor.hpp b/src/storage/single_node_ha/record_accessor.hpp deleted file mode 100644 index 4abfe6d57..000000000 --- a/src/storage/single_node_ha/record_accessor.hpp +++ /dev/null @@ -1,204 +0,0 @@ -/// @file -#pragma once - -#include - -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/common/types/property_value.hpp" -#include "storage/common/types/property_value_store.hpp" -#include "storage/common/types/types.hpp" - -namespace database { -class GraphDbAccessor; -struct StateDelta; -}; // namespace database - -/** - * An accessor to a database record (an Edge or a Vertex). - * - * Exposes view and update functions to the client programmer. - * Assumes responsibility of doing all the relevant book-keeping - * (such as index updates etc). - * - * @tparam TRecord Type of record (MVCC Version) of the accessor. - */ -template -class RecordAccessor { - protected: - /** - * The database::GraphDbAccessor is friend to this accessor so it can - * operate on it's data (mvcc version-list and the record itself). - * This is legitimate because database::GraphDbAccessor creates - * RecordAccessors - * and is semantically their parent/owner. It is necessary because - * the database::GraphDbAccessor handles insertions and deletions, and these - * operations modify data intensively. - */ - friend database::GraphDbAccessor; - - public: - /** - * @param address Address (local or global) of the Vertex/Edge of this - * accessor. - * @param db_accessor The DB accessor that "owns" this record accessor. - */ - RecordAccessor(mvcc::VersionList *address, database::GraphDbAccessor &db_accessor); - - // this class is default copyable, movable and assignable - RecordAccessor(const RecordAccessor &other) = default; - RecordAccessor(RecordAccessor &&other) = default; - RecordAccessor &operator=(const RecordAccessor &other) = default; - RecordAccessor &operator=(RecordAccessor &&other) = default; - - /** Gets the property for the given key. */ - PropertyValue PropsAt(storage::Property key) const; - - /** Sets a value on the record for the given property. */ - void PropsSet(storage::Property key, PropertyValue value); - - /** Erases the property for the given key. */ - void PropsErase(storage::Property key); - - /** Removes all the properties from this record. */ - void PropsClear(); - - /** Returns the properties of this record. */ - const PropertyValueStore &Properties() const; - - bool operator==(const RecordAccessor &other) const; - - bool operator!=(const RecordAccessor &other) const { - return !(*this == other); - } - - /** Returns a GraphDB accessor of this record accessor. */ - database::GraphDbAccessor &db_accessor() const; - - /** - * Returns a globally-unique ID of this vertex or edge. Note that vertices - * and edges have separate ID domains, there can be a vertex with ID X and an - * edge with the same id. - */ - storage::Gid gid() const; - - mvcc::VersionList *address() const; - - /* - * Switches this record accessor to use the latest version visible to the - * current transaction+command. Possibly the one that was created by this - * transaction+command. - * - * @return A reference to this. - */ - RecordAccessor &SwitchNew(); - - /** Returns the new record pointer. */ - TRecord *GetNew() const { return new_; } - - /** - * Attempts to switch this accessor to use the latest version not updated by - * the current transaction+command. If that is not possible (vertex/edge was - * created by the current transaction/command), it does nothing (current - * remains pointing to the new version). - * - * @return A reference to this. - */ - RecordAccessor &SwitchOld(); - - /** Returns the old record pointer. */ - TRecord *GetOld() const { return old_; } - - /** - * Reconstructs the internal state of the record accessor so it uses the - * versions appropriate to this transaction+command. - * - * @return True if this accessor is valid after reconstruction. This means - * that at least one record pointer was found (either new_ or old_), possibly - * both. - */ - bool Reconstruct() const; - - /** - * Ensures there is an updateable version of the record in the version_list, - * and that the `new_` pointer points to it. Returns a reference to that - * version. - * - * It is not legal to call this function on a Vertex/Edge that has been - * deleted in the current transaction+command. - * - * @throws RecordDeletedError - */ - TRecord &update() const; - - /** - * Returns true if the given accessor is visible to the given transaction. - * - * @param current_state If true then the graph state for the - * current transaction+command is returned (insertions, updates and - * deletions performed in the current transaction+command are not - * ignored). - */ - bool Visible(const tx::Transaction &t, bool current_state) const { - return (old_ && !(current_state && old_->is_expired_by(t))) || - (current_state && new_ && !new_->is_expired_by(t)); - } - - /** - * Returns Cypher Id of this record. - */ - int64_t CypherId() const; - - /** Returns the current version (either new_ or old_) set on this - * RecordAccessor. */ - const TRecord ¤t() const; - - protected: - /** - * Pointer to the version (either old_ or new_) that READ operations - * in the accessor should take data from. Note that WRITE operations - * should always use new_. - * - * This pointer can be null if created by an accessor which lazily reads from - * mvcc. - */ - mutable TRecord *current_{nullptr}; - - private: - // The database accessor for which this record accessor is created - // Provides means of getting to the transaction and database functions. - // Immutable, set in the constructor and never changed. - database::GraphDbAccessor *db_accessor_; - - mvcc::VersionList *address_; - - /** - * Latest version which is visible to the current transaction+command - * but has not been created nor modified by the current transaction+command. - * - * Can be null only when the record itself (the version-list) has - * been created by the current transaction+command. - */ - mutable TRecord *old_{nullptr}; - - /** - * Version that has been modified (created or updated) by the current - * transaction+command. - * - * Can be null when the record has not been modified in the current - * transaction+command. It is also possible that the modification - * has happened, but this RecordAccessor does not know this. To - * ensure correctness, the `SwitchNew` function must check if this - * is null, and if it is it must check with the vlist_ if there is - * an update. - */ - mutable TRecord *new_{nullptr}; -}; - -/** Error when trying to update a deleted record */ -class RecordDeletedError : public utils::BasicException { - public: - RecordDeletedError() - : utils::BasicException( - "Can't update a record deleted in the current transaction+commad") { - } -}; diff --git a/src/storage/single_node_ha/state_delta_buffer.hpp b/src/storage/single_node_ha/state_delta_buffer.hpp deleted file mode 100644 index abc747839..000000000 --- a/src/storage/single_node_ha/state_delta_buffer.hpp +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include "durability/single_node_ha/state_delta.hpp" - -namespace storage { - -class StateDeltaBuffer final { - public: - /// Inserts a new StateDelta in buffer. - void Emplace(const database::StateDelta &delta) { - tx::TransactionId tx_id = delta.transaction_id; - std::vector *curr_buffer; - { - // We only need the lock when we're inserting a new key into the buffer. - std::lock_guard lock(buffer_lock_); - curr_buffer = &buffer_[tx_id]; - } - curr_buffer->emplace_back(delta); - } - - /// Retrieves all buffered StateDeltas for a given transaction id. - /// If there are no such StateDeltas, the return vector is empty. - std::vector GetDeltas( - const tx::TransactionId &tx_id) { - std::vector *curr_buffer; - { - std::lock_guard lock(buffer_lock_); - auto it = buffer_.find(tx_id); - if (it == buffer_.end()) return {}; - curr_buffer = &it->second; - } - return *curr_buffer; - } - - /// Deletes all buffered StateDeltas for a given transaction id. - void Erase(const tx::TransactionId &tx_id) { - std::lock_guard lock(buffer_lock_); - buffer_.erase(tx_id); - } - - private: - mutable std::mutex buffer_lock_; - std::unordered_map> - buffer_; -}; - -} // namespace storage diff --git a/src/storage/single_node_ha/storage.hpp b/src/storage/single_node_ha/storage.hpp deleted file mode 100644 index d88064c51..000000000 --- a/src/storage/single_node_ha/storage.hpp +++ /dev/null @@ -1,102 +0,0 @@ -#pragma once - -#include -#include - -#include "data_structures/concurrent/concurrent_map.hpp" -#include "kvstore/kvstore.hpp" -#include "storage/common/constraints/unique_constraints.hpp" -#include "storage/common/types/types.hpp" -#include "storage/single_node_ha/edge.hpp" -#include "storage/single_node_ha/indexes/key_index.hpp" -#include "storage/single_node_ha/indexes/label_property_index.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/single_node_ha/vertex.hpp" -#include "transactions/type.hpp" - -namespace database { -class GraphDb; -}; - -namespace database { - -/** A data structure containing the main data members of a graph database. */ -class Storage { - public: - explicit Storage(const std::vector &properties_on_disk) - : properties_on_disk_{properties_on_disk} {} - - public: - ~Storage() { - // Delete vertices and edges which weren't collected before, also deletes - // records inside version list - for (auto &id_vlist : vertices_.access()) delete id_vlist.second; - for (auto &id_vlist : edges_.access()) delete id_vlist.second; - } - - Storage(const Storage &) = delete; - Storage(Storage &&) = delete; - Storage &operator=(const Storage &) = delete; - Storage &operator=(Storage &&) = delete; - - storage::GidGenerator &VertexGenerator() { return vertex_generator_; } - storage::GidGenerator &EdgeGenerator() { return edge_generator_; } - LabelPropertyIndex &label_property_index() { return label_property_index_; } - - /// Gets the local address for the given gid. Fails if not present. - template - mvcc::VersionList *LocalAddress(storage::Gid gid) const { - const auto &map = GetMap(); - auto access = map.access(); - auto found = access.find(gid); - CHECK(found != access.end()) - << "Failed to find " - << (std::is_same::value ? "vertex" : "edge") - << " for gid: " << gid.AsUint(); - return found->second; - } - - /// Gets names of properties stored on disk - std::vector &PropertiesOnDisk() { return properties_on_disk_; } - - private: - friend class GraphDbAccessor; - // Needed for GraphDb::RefreshStat. - friend class GraphDb; - friend class StorageGc; - - storage::GidGenerator vertex_generator_; - storage::GidGenerator edge_generator_; - - // main storage for the graph - ConcurrentMap *> vertices_; - ConcurrentMap *> edges_; - - // indexes - KeyIndex labels_index_; - LabelPropertyIndex label_property_index_; - - // unique constraints - storage::constraints::UniqueConstraints unique_constraints_; - - std::vector properties_on_disk_; - - /// Gets the Vertex/Edge main storage map. - template - const ConcurrentMap *> &GetMap() - const; -}; - -template <> -inline const ConcurrentMap *> - &Storage::GetMap() const { - return vertices_; -} - -template <> -inline const ConcurrentMap *> - &Storage::GetMap() const { - return edges_; -} - -} // namespace database diff --git a/src/storage/single_node_ha/storage_gc.hpp b/src/storage/single_node_ha/storage_gc.hpp deleted file mode 100644 index b87a5295e..000000000 --- a/src/storage/single_node_ha/storage_gc.hpp +++ /dev/null @@ -1,174 +0,0 @@ -#pragma once - -#include -#include - -#include "data_structures/concurrent/concurrent_map.hpp" -#include "raft/raft_server.hpp" -#include "storage/common/types/types.hpp" -#include "storage/single_node_ha/deferred_deleter.hpp" -#include "storage/single_node_ha/edge.hpp" -#include "storage/single_node_ha/garbage_collector.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/single_node_ha/storage.hpp" -#include "storage/single_node_ha/vertex.hpp" -#include "transactions/single_node_ha/engine.hpp" -#include "utils/exceptions.hpp" -#include "utils/scheduler.hpp" -#include "utils/timer.hpp" - -namespace database { - -/** Garbage collection capabilities for database::Storage. Extracted into a - * separate class for better code organization, and because the GC requires a - * tx::Engine, while the Storage itself can exist without it. Even though, a - * database::Storage is always acompanied by a Gc. - */ -class StorageGc { - template - class MvccDeleter { - using VlistT = mvcc::VersionList; - - public: - explicit MvccDeleter(ConcurrentMap &collection) - : gc_(collection, record_deleter_, version_list_deleter_) {} - DeferredDeleter record_deleter_; - DeferredDeleter> version_list_deleter_; - GarbageCollector, TRecord> gc_; - }; - - public: - /** Creates a garbage collector for the given storage that uses the given - * tx::Engine. If `pause_sec` is greater then zero, then GC gets triggered - * periodically. */ - StorageGc(Storage &storage, tx::Engine &tx_engine, - raft::RaftServer *raft_server, int pause_sec) - : tx_engine_(tx_engine), - raft_server_(raft_server), - storage_(storage), - vertices_(storage.vertices_), - edges_(storage.edges_) { - if (pause_sec > 0) - scheduler_.Run( - "Storage GC", std::chrono::seconds(pause_sec), [this] { - try { - CollectGarbage(); - } catch (const utils::BasicException &e) { - DLOG(WARNING) - << "Couldn't perform storage garbage collection due to: " - << e.what(); - } - }); - } - - ~StorageGc() { - // We have to stop the scheduler before destroying this class. - scheduler_.Stop(); - - edges_.record_deleter_.FreeExpiredObjects(tx::Transaction::MaxId()); - vertices_.record_deleter_.FreeExpiredObjects(tx::Transaction::MaxId()); - edges_.version_list_deleter_.FreeExpiredObjects(tx::Transaction::MaxId()); - vertices_.version_list_deleter_.FreeExpiredObjects( - tx::Transaction::MaxId()); - } - - StorageGc(const StorageGc &) = delete; - StorageGc(StorageGc &&) = delete; - StorageGc &operator=(const StorageGc &) = delete; - StorageGc &operator=(StorageGc &&) = delete; - - void CollectLogGarbage(tx::TransactionId oldest_active) { - auto safe_to_delete = GetClogSafeTransaction(oldest_active); - if (safe_to_delete) { - tx_engine_.GarbageCollectCommitLog(*safe_to_delete); - } - } - - void CollectGarbage() { - // main garbage collection logic - // see wiki documentation for logic explanation - VLOG(21) << "Garbage collector started"; - const auto snapshot_gc = tx_engine_.GlobalGcSnapshot(); - { - // This can be run concurrently - utils::Timer x; - - vertices_.gc_.Run(snapshot_gc, tx_engine_); - edges_.gc_.Run(snapshot_gc, tx_engine_); - storage_.unique_constraints_.Refresh(snapshot_gc, tx_engine_); - VLOG(21) << "Garbage collector mvcc phase time: " << x.Elapsed().count(); - } - // This has to be run sequentially after gc because gc modifies - // version_lists and changes the oldest visible record, on which Refresh - // depends. - { - // This can be run concurrently - utils::Timer x; - storage_.labels_index_.Refresh(snapshot_gc, tx_engine_); - storage_.label_property_index_.Refresh(snapshot_gc, tx_engine_); - VLOG(21) << "Garbage collector index phase time: " << x.Elapsed().count(); - } - { - // We free expired objects with snapshot.back(), which is - // the ID of the oldest active transaction (or next active, if there - // are no currently active). That's legal because that was the - // last possible transaction that could have obtained pointers - // to those records. New snapshot can be used, different than one used for - // first two phases of gc. - utils::Timer x; - const auto snapshot_gc = tx_engine_.GlobalGcSnapshot(); - edges_.record_deleter_.FreeExpiredObjects(snapshot_gc.back()); - vertices_.record_deleter_.FreeExpiredObjects(snapshot_gc.back()); - edges_.version_list_deleter_.FreeExpiredObjects(snapshot_gc.back()); - vertices_.version_list_deleter_.FreeExpiredObjects(snapshot_gc.back()); - VLOG(21) << "Garbage collector deferred deletion phase time: " - << x.Elapsed().count(); - } - - CollectLogGarbage(snapshot_gc.back()); - gc_txid_ranges_.emplace(snapshot_gc.back(), tx_engine_.GlobalLast()); - - VLOG(21) << "gc snapshot: " << snapshot_gc; - VLOG(21) << "edge_record_deleter_ size: " << edges_.record_deleter_.Count(); - VLOG(21) << "vertex record deleter_ size: " - << vertices_.record_deleter_.Count(); - VLOG(21) << "edge_version_list_deleter_ size: " - << edges_.version_list_deleter_.Count(); - VLOG(21) << "vertex_version_list_deleter_ size: " - << vertices_.version_list_deleter_.Count(); - VLOG(21) << "vertices_ size: " << storage_.vertices_.access().size(); - VLOG(21) << "edges_ size: " << storage_.edges_.access().size(); - VLOG(21) << "Garbage collector finished."; - } - - protected: - // Find the largest transaction from which everything older is safe to - // delete, ones for which the hints have been set in the gc phase, and no - // alive transaction from the time before the hints were set is still alive - // (otherwise that transaction could still be waiting for a resolution of - // the query to the commit log about some old transaction) - std::optional GetClogSafeTransaction( - tx::TransactionId oldest_active) { - std::optional safe_to_delete; - while (!gc_txid_ranges_.empty() && - gc_txid_ranges_.front().second < oldest_active) { - safe_to_delete = gc_txid_ranges_.front().first; - gc_txid_ranges_.pop(); - } - return safe_to_delete; - } - - tx::Engine &tx_engine_; - raft::RaftServer *raft_server_; - utils::Scheduler scheduler_; - - private: - Storage &storage_; - MvccDeleter vertices_; - MvccDeleter edges_; - - // History of ranges - // that gc operated on at some previous time - used to clear commit log - std::queue> gc_txid_ranges_; -}; -} // namespace database diff --git a/src/storage/single_node_ha/vertex.hpp b/src/storage/single_node_ha/vertex.hpp deleted file mode 100644 index 9e1c653cd..000000000 --- a/src/storage/single_node_ha/vertex.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include "storage/single_node_ha/mvcc/record.hpp" -#include "storage/single_node_ha/mvcc/version_list.hpp" -#include "storage/common/types/property_value_store.hpp" -#include "storage/common/types/types.hpp" -#include "storage/single_node_ha/edges.hpp" - -class Vertex : public mvcc::Record { - public: - Vertex() = default; - // Returns new Vertex with copy of data stored in this Vertex, but without - // copying superclass' members. - Vertex *CloneData() { return new Vertex(*this); } - - Edges out_; - Edges in_; - std::vector labels_; - PropertyValueStore properties_; - - private: - Vertex(const Vertex &other) - : mvcc::Record(), - out_(other.out_), - in_(other.in_), - labels_(other.labels_), - properties_(other.properties_) {} -}; diff --git a/src/storage/single_node_ha/vertex_accessor.cpp b/src/storage/single_node_ha/vertex_accessor.cpp deleted file mode 100644 index ad1c5bc33..000000000 --- a/src/storage/single_node_ha/vertex_accessor.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include "storage/single_node_ha/vertex_accessor.hpp" - -#include - -#include "database/single_node_ha/graph_db_accessor.hpp" -#include "durability/single_node_ha/state_delta.hpp" -#include "utils/algorithm.hpp" - -VertexAccessor::VertexAccessor(mvcc::VersionList *address, - database::GraphDbAccessor &db_accessor) - : RecordAccessor(address, db_accessor) { - Reconstruct(); -} - -size_t VertexAccessor::out_degree() const { return current().out_.size(); } - -size_t VertexAccessor::in_degree() const { return current().in_.size(); } - -void VertexAccessor::add_label(storage::Label label) { - auto &dba = db_accessor(); - auto delta = database::StateDelta::AddLabel(dba.transaction_id(), gid(), - label, dba.LabelName(label)); - Vertex &vertex = update(); - // not a duplicate label, add it - if (!utils::Contains(vertex.labels_, label)) { - vertex.labels_.emplace_back(label); - dba.sd_buffer()->Emplace(delta); - dba.UpdateOnAddLabel(label, *this, &vertex); - } -} - -void VertexAccessor::remove_label(storage::Label label) { - auto &dba = db_accessor(); - auto delta = database::StateDelta::RemoveLabel(dba.transaction_id(), gid(), - label, dba.LabelName(label)); - Vertex &vertex = update(); - if (utils::Contains(vertex.labels_, label)) { - auto &labels = vertex.labels_; - auto found = std::find(labels.begin(), labels.end(), delta.label); - std::swap(*found, labels.back()); - labels.pop_back(); - dba.sd_buffer()->Emplace(delta); - dba.UpdateOnRemoveLabel(label, *this); - } -} - -bool VertexAccessor::has_label(storage::Label label) const { - auto &labels = this->current().labels_; - return std::find(labels.begin(), labels.end(), label) != labels.end(); -} - -const std::vector &VertexAccessor::labels() const { - return this->current().labels_; -} - -void VertexAccessor::RemoveOutEdge(mvcc::VersionList *edge) { - auto &dba = db_accessor(); - - SwitchNew(); - if (current().is_expired_by(dba.transaction())) return; - - update().out_.RemoveEdge(edge); -} - -void VertexAccessor::RemoveInEdge(mvcc::VersionList *edge) { - auto &dba = db_accessor(); - - SwitchNew(); - if (current().is_expired_by(dba.transaction())) return; - - update().in_.RemoveEdge(edge); -} - -std::ostream &operator<<(std::ostream &os, const VertexAccessor &va) { - os << "V("; - utils::PrintIterable(os, va.labels(), ":", [&](auto &stream, auto label) { - stream << va.db_accessor().LabelName(label); - }); - os << " {"; - utils::PrintIterable(os, va.Properties(), ", ", - [&](auto &stream, const auto &pair) { - stream << va.db_accessor().PropertyName(pair.first) - << ": " << pair.second; - }); - return os << "})"; -} diff --git a/src/storage/single_node_ha/vertex_accessor.hpp b/src/storage/single_node_ha/vertex_accessor.hpp deleted file mode 100644 index 472a96063..000000000 --- a/src/storage/single_node_ha/vertex_accessor.hpp +++ /dev/null @@ -1,158 +0,0 @@ -#pragma once - -#include -#include -#include - -#include -#include - -#include "storage/single_node_ha/edge_accessor.hpp" -#include "storage/single_node_ha/record_accessor.hpp" -#include "storage/single_node_ha/vertex.hpp" -#include "utils/algorithm.hpp" - -/** - * Provides ways for the client programmer (i.e. code generated - * by the compiler) to interact with a Vertex. - * - * This class indirectly inherits MVCC data structures and - * takes care of MVCC versioning. - */ -class VertexAccessor final : public RecordAccessor { - // Helper function for creating an iterator over edges. - // @param begin - begin iterator - // @param end - end iterator - // @param from - if true specifies that the vertex represents `from` part of - // the edge, otherwise it specifies `to` part of the edge - // @param vertex - one endpoint of every edge - // @param db_accessor - database accessor - // @return - Iterator over EdgeAccessors - template - static inline auto MakeAccessorIterator( - TIterator &&begin, TIterator &&end, bool from, - mvcc::VersionList *vertex, - database::GraphDbAccessor &db_accessor) { - return iter::imap( - [from, vertex, &db_accessor](auto &edges_element) { - if (from) { - return EdgeAccessor(edges_element.edge, db_accessor, vertex, - edges_element.vertex, edges_element.edge_type); - } else { - return EdgeAccessor(edges_element.edge, db_accessor, - edges_element.vertex, vertex, - edges_element.edge_type); - } - }, - utils::Iterable(std::forward(begin), - std::forward(end))); - } - - public: - VertexAccessor(mvcc::VersionList *address, - database::GraphDbAccessor &db_accessor); - - /** Returns the number of outgoing edges. */ - size_t out_degree() const; - - /** Returns the number of incoming edges. */ - size_t in_degree() const; - - /** Adds a label to the Vertex. If the Vertex already has that label the call - * has no effect. */ - void add_label(storage::Label label); - - /** Removes a label from the Vertex. */ - void remove_label(storage::Label label); - - /** Indicates if the Vertex has the given label. */ - bool has_label(storage::Label label) const; - - /** Returns all the Labels of the Vertex. */ - const std::vector &labels() const; - - /** Returns EdgeAccessors for all incoming edges. */ - auto in() const { - return MakeAccessorIterator(current().in_.begin(), current().in_.end(), - false, address(), db_accessor()); - } - - /** - * Returns EdgeAccessors for all incoming edges. - * - * @param dest - The destination vertex filter. - * @param edge_types - Edge types filter. At least one be matched. If nullptr - * or empty, the parameter is ignored. - */ - auto in(const VertexAccessor &dest, - const std::vector *edge_types = nullptr) const { - return MakeAccessorIterator(current().in_.begin(dest.address(), edge_types), - current().in_.end(), false, address(), - db_accessor()); - } - - /** - * Returns EdgeAccessors for all incoming edges. - * - * @param edge_types - Edge types filter. At least one be matched. If nullptr - * or empty, the parameter is ignored. - */ - auto in(const std::vector *edge_types) const { - return MakeAccessorIterator(current().in_.begin(nullptr, edge_types), - current().in_.end(), false, address(), - db_accessor()); - } - - /** Returns EdgeAccessors for all outgoing edges. */ - auto out() const { - return MakeAccessorIterator(current().out_.begin(), current().out_.end(), - true, address(), db_accessor()); - } - - /** - * Returns EdgeAccessors for all outgoing edges whose destination is the given - * vertex. - * - * @param dest - The destination vertex filter. - * @param edge_types - Edge types filter. At least one be matched. If nullptr - * or empty, the parameter is ignored. - */ - auto out(const VertexAccessor &dest, - const std::vector *edge_types = nullptr) const { - return MakeAccessorIterator( - current().out_.begin(dest.address(), edge_types), current().out_.end(), - true, address(), db_accessor()); - } - - /** - * Returns EdgeAccessors for all outgoing edges. - * - * @param edge_types - Edge types filter. At least one be matched. If nullptr - * or empty, the parameter is ignored. - */ - auto out(const std::vector *edge_types) const { - return MakeAccessorIterator(current().out_.begin(nullptr, edge_types), - current().out_.end(), true, address(), - db_accessor()); - } - - /** Removes the given edge from the outgoing edges of this vertex. Note that - * this operation should always be accompanied by the removal of the edge from - * the incoming edges on the other side and edge deletion. */ - void RemoveOutEdge(mvcc::VersionList *edge); - - /** Removes the given edge from the incoming edges of this vertex. Note that - * this operation should always be accompanied by the removal of the edge from - * the outgoing edges on the other side and edge deletion. */ - void RemoveInEdge(mvcc::VersionList *edge); -}; - -std::ostream &operator<<(std::ostream &, const VertexAccessor &); - -// hash function for the vertex accessor -namespace std { -template <> -struct hash { - size_t operator()(const VertexAccessor &v) const { return v.gid().AsUint(); }; -}; -} // namespace std diff --git a/src/storage/vertex_accessor.hpp b/src/storage/vertex_accessor.hpp deleted file mode 100644 index 13f0afc93..000000000 --- a/src/storage/vertex_accessor.hpp +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#ifdef MG_SINGLE_NODE_HA -#include "storage/single_node_ha/vertex_accessor.hpp" -#endif - -// TODO: write documentation for the interface here! diff --git a/src/transactions/commit_log.hpp b/src/transactions/commit_log.hpp deleted file mode 100644 index b99a55ce4..000000000 --- a/src/transactions/commit_log.hpp +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include "data_structures/bitset/dynamic_bitset.hpp" -#include "transactions/type.hpp" - -namespace tx { - -// This class is lock free. There is no need to acquire any lock when accessing -// this class and this class doesn't acquire any lock on method calls. -class CommitLog final { - public: - static constexpr int kBitsetBlockSize = 32768; - - CommitLog() = default; - CommitLog(const CommitLog &) = delete; - CommitLog(CommitLog &&) = delete; - CommitLog &operator=(const CommitLog &) = delete; - CommitLog &operator=(CommitLog &&) = delete; - - bool is_active(TransactionId id) const { - return fetch_info(id).is_active(); - } - - bool is_committed(TransactionId id) const { - return fetch_info(id).is_committed(); - } - - void set_committed(TransactionId id) { log.set(2 * id); } - - bool is_aborted(TransactionId id) const { - return fetch_info(id).is_aborted(); - } - - void set_aborted(TransactionId id) { log.set(2 * id + 1); } - - // Clears the commit log from bits associated with transactions with an id - // lower than `id`. - void garbage_collect_older(TransactionId id) { log.delete_prefix(2 * id); } - - class Info final { - public: - Info() {} // Needed for serialization. - enum Status { - ACTIVE = 0, // 00 - COMMITTED = 1, // 01 - ABORTED = 2, // 10 - }; - - explicit Info(uint8_t flags) { - if (flags & ABORTED) { - flags_ = ABORTED; - } else if (flags & COMMITTED) { - flags_ = COMMITTED; - } else { - flags_ = ACTIVE; - } - } - - bool is_active() const { return flags_ == ACTIVE; } - - bool is_committed() const { - if (flags_ & ABORTED) return false; - return flags_ & COMMITTED; - } - - bool is_aborted() const { return flags_ & ABORTED; } - - operator uint8_t() const { return flags_; } - - private: - uint8_t flags_{0}; - }; - - Info fetch_info(TransactionId id) const { return Info{log.at(2 * id, 2)}; } - - private: - DynamicBitset log; -}; - -} // namespace tx diff --git a/src/transactions/engine.hpp b/src/transactions/engine.hpp deleted file mode 100644 index e480d1aa8..000000000 --- a/src/transactions/engine.hpp +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#ifdef MG_SINGLE_NODE_HA -#include "transactions/single_node_ha/engine.hpp" -#endif diff --git a/src/transactions/lock_store.hpp b/src/transactions/lock_store.hpp deleted file mode 100644 index fd5a92001..000000000 --- a/src/transactions/lock_store.hpp +++ /dev/null @@ -1,85 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "glog/logging.h" -#include "storage/common/locking/lock_status.hpp" -#include "storage/common/locking/record_lock.hpp" -#include "transactions/type.hpp" -#include "utils/spin_lock.hpp" - -namespace tx { - -class Engine; -class Transaction; - -class LockStore { - class LockHolder { - public: - LockHolder() = default; - - /// @throw utils::LockTimeoutException - LockHolder(RecordLock *lock, const Transaction &tx, tx::Engine &engine) - : lock_(lock) { - DCHECK(lock != nullptr) << "Lock is nullptr."; - auto status = lock_->Lock(tx, engine); - - if (status != LockStatus::Acquired) { - lock_ = nullptr; - } - } - - LockHolder(const LockHolder &) = delete; - LockHolder &operator=(const LockHolder &) = delete; - - LockHolder(LockHolder &&other) : lock_(other.lock_) { - other.lock_ = nullptr; - } - - LockHolder &operator=(LockHolder &&other) { - if (this == &other) return *this; - lock_ = other.lock_; - other.lock_ = nullptr; - return *this; - } - - ~LockHolder() { - if (lock_ != nullptr) { - lock_->Unlock(); - } - } - - bool active() const { return lock_ != nullptr; } - - private: - RecordLock *lock_{nullptr}; - }; - - public: - /// @throw utils::LockTimeoutException - void Take(RecordLock *lock, const tx::Transaction &tx, tx::Engine &engine) { - // Creating a lock holder locks the version list to the given transaction. - // Note that it's an op that can take a long time (if there are multiple - // transactions trying to lock. - LockHolder holder{lock, tx, engine}; - - // This guard prevents the same transaction from concurrent modificaton of - // locks_. This can only happen in distributed memgraph, when there are - // multiple edits coming to the same worker in the same transaction at the - // same time. IMPORTANT: This guard must come after LockHolder construction, - // as that potentially takes a long time and this guard only needs to - // protect locks_ update. - std::lock_guard guard{locks_lock_}; - locks_.emplace_back(std::move(holder)); - if (!locks_.back().active()) { - locks_.pop_back(); - } - } - - private: - utils::SpinLock locks_lock_; - std::vector locks_; -}; -} // namespace tx diff --git a/src/transactions/single_node_ha/engine.cpp b/src/transactions/single_node_ha/engine.cpp deleted file mode 100644 index 6f9565f56..000000000 --- a/src/transactions/single_node_ha/engine.cpp +++ /dev/null @@ -1,305 +0,0 @@ -#include "transactions/single_node_ha/engine.hpp" - -#include -#include - -#include "glog/logging.h" - -#include "durability/single_node_ha/state_delta.hpp" -#include "raft/exceptions.hpp" - -namespace tx { - -Engine::Engine(raft::RaftInterface *raft, - storage::StateDeltaBuffer *delta_buffer) - : clog_(std::make_unique()), - raft_(raft), - delta_buffer_(delta_buffer) { - CHECK(raft) << "Raft can't be nullptr in HA"; - CHECK(delta_buffer) << "State delta buffer can't be nullptr in HA"; -} - -Transaction *Engine::Begin() { - VLOG(11) << "[Tx] Starting transaction " << counter_ + 1; - std::lock_guard guard(lock_); - if (!accepting_transactions_.load() || !replication_errors_.empty()) - throw TransactionEngineError( - "The transaction engine currently isn't accepting new transactions."); - - return BeginTransaction(false); -} - -Transaction *Engine::BeginBlocking(std::optional parent_tx) { - Snapshot wait_for_txs; - { - std::lock_guard guard(lock_); - if (!accepting_transactions_.load() || !replication_errors_.empty()) - throw TransactionEngineError( - "The transaction engine currently isn't accepting new transactions."); - - // Block the engine from accepting new transactions. - accepting_transactions_.store(false); - - // Set active transactions to abort ASAP. - for (auto transaction : active_) { - store_.find(transaction)->second->set_should_abort(); - } - - wait_for_txs = active_; - } - - // Wait for all active transactions except the parent (optional) and ourselves - // to end. - for (auto id : wait_for_txs) { - if (parent_tx && *parent_tx == id) continue; - while (Info(id).is_active()) { - // TODO reconsider this constant, currently rule-of-thumb chosen - std::this_thread::sleep_for(std::chrono::microseconds(100)); - } - } - - // Only after all transactions have finished, start the blocking transaction. - std::lock_guard guard(lock_); - return BeginTransaction(true); -} - -CommandId Engine::Advance(TransactionId id) { - std::lock_guard guard(lock_); - - auto it = store_.find(id); - DCHECK(it != store_.end()) - << "Transaction::advance on non-existing transaction"; - - return it->second.get()->AdvanceCommand(); -} - -CommandId Engine::UpdateCommand(TransactionId id) { - std::lock_guard guard(lock_); - auto it = store_.find(id); - DCHECK(it != store_.end()) - << "Transaction::advance on non-existing transaction"; - return it->second->cid(); -} - -void Engine::Commit(const Transaction &t) { - VLOG(11) << "[Tx] Committing transaction " << t.id_; - delta_buffer_->Emplace(database::StateDelta::TxCommit(t.id_)); - auto deltas = delta_buffer_->GetDeltas(t.id_); - - // If we have only two state deltas in our transaction, that means we are - // dealing with a read-only transaction which does not need to be replicated - // throughout the cluster, so we simply commit it in our storage. - // - // Also, when the current server is not in the leader mode, the following - // holds: - // - // 1) In CANDIDATE mode we need to be able to commit because Raft is - // initialzed in that mode and needs to perform recovery. - // - // 2) In FOLLOWER mode, Raft will only try to apply state deltas from logs - // that are behind the current commit index and are therefore safe to - // apply. - if (deltas.size() == 2 || !raft_->IsLeader()) { - delta_buffer_->Erase(t.id_); - std::lock_guard guard(lock_); - clog_->set_committed(t.id_); - active_.remove(t.id_); - store_.erase(store_.find(t.id_)); - if (t.blocking()) { - accepting_transactions_.store(true); - } - return; - } - - auto log_entry_status = raft_->Emplace(deltas); - - // Log Entry was not successfully emplaced and the transaction should be - // aborted - if (!log_entry_status) { - Abort(t); - return; - } - - // It is important to note the following situation. If our cluster ends up - // with a network partition where the current leader can't communicate with - // the majority of the peers, and the client is still sending queries to it, - // all of the transaction will end up waiting here until the network - // partition is resolved. The problem that can occur afterwards is bad. - // When the machine transitions from leader to follower mode, - // `ReplicationInfo` method will start returning `is_replicated=true`. This - // might lead to a problem where we suddenly want to alter the state of the - // transaction engine that isn't valid anymore, because the current machine - // isn't the leader anymore. This is all handled in the `Transition` method - // where once the transition from leader to follower occurs, the mode will - // be set to follower first, then the `Reset` method on the transaction - // engine will wait for all transactions to finish, and even though we - // change the transaction engine state here, the engine will perform a - // `Reset` and start recovering from zero, and the invalid changes won't - // matter. - - // Wait for Raft to receive confirmation from the majority of followers. - while (true) { - try { - if (raft_->SafeToCommit(log_entry_status->term_id, - log_entry_status->log_index)) - break; - } catch (const raft::ReplicationTimeoutException &e) { - std::lock_guard guard(lock_); - if (replication_errors_.insert(t.id_).second) { - LOG(WARNING) << e.what(); - } - } - std::this_thread::sleep_for(std::chrono::microseconds(100)); - } - - std::unique_lock raft_lock(raft_->WithLock(), std::defer_lock); - // We need to acquire the Raft lock so we don't end up racing with a Raft - // thread that can reset the engine state. If we can't acquire the lock, and - // we end up with reseting the engine, we throw - // UnexpectedLeaderChangeException. - while (true) { - if (raft_lock.try_lock()) { - break; - } - // This is the case when we've lost our leader status due to another peer - // requesting election. - if (reset_active_.load()) throw raft::UnexpectedLeaderChangeException(); - // This is the case when we're shutting down and we're no longer a valid - // leader. `SafeToCommit` will throw `RaftShutdownException` if the - // transaction wasn't replicated and the client will receive a negative - // response. Otherwise, we'll end up here, and since the transaction was - // replciated, we need to inform the client that the query succeeded. - if (!raft_->IsLeader()) break; - std::this_thread::sleep_for(std::chrono::microseconds(100)); - } - - delta_buffer_->Erase(t.id_); - std::lock_guard guard(lock_); - replication_errors_.erase(t.id_); - clog_->set_committed(t.id_); - active_.remove(t.id_); - store_.erase(store_.find(t.id_)); - if (t.blocking()) { - accepting_transactions_.store(true); - } -} - -void Engine::Abort(const Transaction &t) { - VLOG(11) << "[Tx] Aborting transaction " << t.id_; - delta_buffer_->Erase(t.id_); - std::lock_guard guard(lock_); - clog_->set_aborted(t.id_); - active_.remove(t.id_); - store_.erase(store_.find(t.id_)); - if (t.blocking()) { - accepting_transactions_.store(true); - } -} - -CommitLog::Info Engine::Info(TransactionId tx) const { - return clog_->fetch_info(tx); -} - -Snapshot Engine::GlobalGcSnapshot() { - std::lock_guard guard(lock_); - - // No active transactions. - if (active_.size() == 0) { - auto snapshot_copy = active_; - snapshot_copy.insert(counter_ + 1); - return snapshot_copy; - } - - // There are active transactions. - auto snapshot_copy = store_.find(active_.front())->second->snapshot(); - snapshot_copy.insert(active_.front()); - return snapshot_copy; -} - -Snapshot Engine::GlobalActiveTransactions() { - std::lock_guard guard(lock_); - Snapshot active_transactions = active_; - return active_transactions; -} - -TransactionId Engine::LocalLast() const { - std::lock_guard guard(lock_); - return counter_; -} - -TransactionId Engine::GlobalLast() const { return LocalLast(); } - -TransactionId Engine::LocalOldestActive() const { - std::lock_guard guard(lock_); - return active_.empty() ? counter_ + 1 : active_.front(); -} - -void Engine::GarbageCollectCommitLog(TransactionId tx_id) { - clog_->garbage_collect_older(tx_id); -} - -void Engine::LocalForEachActiveTransaction( - std::function f) { - std::lock_guard guard(lock_); - for (auto transaction : active_) { - f(*store_.find(transaction)->second); - } -} - -Transaction *Engine::RunningTransaction(TransactionId tx_id) { - std::lock_guard guard(lock_); - auto found = store_.find(tx_id); - CHECK(found != store_.end()) - << "Can't return snapshot for an inactive transaction"; - return found->second.get(); -} - -void Engine::Reset() { - Snapshot wait_for_txs; - { - std::lock_guard guard(lock_); - - // Block the engine from accepting new transactions. - accepting_transactions_.store(false); - - // Set active transactions to abort ASAP. - for (auto transaction : active_) { - store_.find(transaction)->second->set_should_abort(); - } - - wait_for_txs = active_; - reset_active_.store(true); - } - - // Wait for all active transactions to end. - for (auto id : wait_for_txs) { - while (Info(id).is_active()) { - // TODO reconsider this constant, currently rule-of-thumb chosen - std::this_thread::sleep_for(std::chrono::microseconds(100)); - } - } - - // Only after all transactions have finished, reset the engine. - std::lock_guard guard(lock_); - counter_ = 0; - replication_errors_.clear(); - store_.clear(); - active_.clear(); - { - clog_ = nullptr; - clog_ = std::make_unique(); - } - accepting_transactions_.store(true); - reset_active_.store(false); -} - -Transaction *Engine::BeginTransaction(bool blocking) { - TransactionId id{++counter_}; - Transaction *t = new Transaction(id, active_, *this, blocking); - active_.insert(id); - store_.emplace(id, t); - delta_buffer_->Emplace(database::StateDelta::TxBegin(id)); - return t; -} - -} // namespace tx diff --git a/src/transactions/single_node_ha/engine.hpp b/src/transactions/single_node_ha/engine.hpp deleted file mode 100644 index ad5056e0c..000000000 --- a/src/transactions/single_node_ha/engine.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include -#include - -#include "raft/raft_interface.hpp" -#include "storage/single_node_ha/state_delta_buffer.hpp" -#include "transactions/commit_log.hpp" -#include "transactions/transaction.hpp" -#include "utils/spin_lock.hpp" - -namespace tx { - -class TransactionEngineError : public utils::BasicException { - using utils::BasicException::BasicException; -}; - -/// High availability single node transaction engine. -/// -/// Requires RaftInterface where it stores StateDeltas containing transaction -/// information needed for raft followers when replicating logs. -class Engine final { - public: - Engine(raft::RaftInterface *raft, storage::StateDeltaBuffer *delta_buffer); - - Engine(const Engine &) = delete; - Engine(Engine &&) = delete; - Engine &operator=(const Engine &) = delete; - Engine &operator=(Engine &&) = delete; - - Transaction *Begin(); - /// Blocking transactions are used when we can't allow any other transaction - /// to run (besides this one). This is the reason why this transactions blocks - /// the engine from creating new transactions and waits for the existing ones - /// to finish. - Transaction *BeginBlocking(std::optional parent_tx); - CommandId Advance(TransactionId id); - CommandId UpdateCommand(TransactionId id); - void Commit(const Transaction &t); - void Abort(const Transaction &t); - CommitLog::Info Info(TransactionId tx) const; - Snapshot GlobalGcSnapshot(); - Snapshot GlobalActiveTransactions(); - TransactionId GlobalLast() const; - TransactionId LocalLast() const; - TransactionId LocalOldestActive() const; - void LocalForEachActiveTransaction(std::function f); - Transaction *RunningTransaction(TransactionId tx_id); - void GarbageCollectCommitLog(TransactionId tx_id); - - auto &local_lock_graph() { return local_lock_graph_; } - const auto &local_lock_graph() const { return local_lock_graph_; } - - /// Reset the internal state of the engine. Use with caution as this will - /// block the engine from receiving any new transaction and will hint all - /// transactions to abort and will wait for them to finish before reseting - /// engines internal state. - void Reset(); - - private: - // Map lock dependencies. Each entry maps (tx_that_wants_lock, - // tx_that_holds_lock). Used for local deadlock resolution. - // TODO consider global deadlock resolution. - ConcurrentMap local_lock_graph_; - - TransactionId counter_{0}; - std::unique_ptr clog_{nullptr}; - std::unordered_map> store_; - Snapshot active_; - mutable utils::SpinLock lock_; - raft::RaftInterface *raft_{nullptr}; - storage::StateDeltaBuffer *delta_buffer_{nullptr}; - std::atomic accepting_transactions_{true}; - std::atomic reset_active_{false}; - - // Keep track of transaction that experienced a replication error. - // While there is a replication error known to the engine, the engine won't - // accept new transactions. - std::unordered_set replication_errors_; - - // Helper method for transaction begin. - Transaction *BeginTransaction(bool blocking); -}; -} // namespace tx diff --git a/src/transactions/snapshot.hpp b/src/transactions/snapshot.hpp deleted file mode 100644 index 80c0776c4..000000000 --- a/src/transactions/snapshot.hpp +++ /dev/null @@ -1,98 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include - -#include "glog/logging.h" -#include "transactions/type.hpp" -#include "utils/algorithm.hpp" - -namespace tx { - -/// Ascendingly sorted collection of transaction ids. -/// -/// Represents the transactions that were active at -/// some point in the discrete transaction time. -class Snapshot final { - public: - Snapshot() = default; - Snapshot(std::vector &&active) - : transaction_ids_(std::move(active)) {} - - Snapshot(const Snapshot &) = default; - Snapshot(Snapshot &&) = default; - Snapshot &operator=(const Snapshot &) = default; - Snapshot &operator=(Snapshot &&) = default; - - /// Returns true if this snapshot contains the given - /// transaction id. - /// - /// @param xid - The transcation id in question - bool contains(TransactionId id) const { - return std::binary_search(transaction_ids_.begin(), transaction_ids_.end(), - id); - } - - /// Adds the given transaction id to the end of this Snapshot. - /// The given id must be greater then all the existing ones, - /// to maintain ascending sort order. - /// - /// @param id - the transaction id to add - void insert(TransactionId id) { - transaction_ids_.push_back(id); - DCHECK(std::is_sorted(transaction_ids_.begin(), transaction_ids_.end())) - << "Snapshot must be sorted"; - } - - /// Removes the given transaction id from this Snapshot. - /// - /// @param id - the transaction id to remove - void remove(TransactionId id) { - auto last = - std::remove(transaction_ids_.begin(), transaction_ids_.end(), id); - transaction_ids_.erase(last, transaction_ids_.end()); - } - - /// Removes all transactions from this Snapshot. - void clear() { - transaction_ids_.clear(); - } - - TransactionId front() const { - DCHECK(transaction_ids_.size()) << "Snapshot.front() on empty Snapshot"; - return transaction_ids_.front(); - } - - TransactionId back() const { - DCHECK(transaction_ids_.size()) << "Snapshot.back() on empty Snapshot"; - return transaction_ids_.back(); - } - - size_t size() const { return transaction_ids_.size(); } - bool empty() const { return transaction_ids_.empty(); } - bool operator==(const Snapshot &other) const { - return transaction_ids_ == other.transaction_ids_; - } - auto begin() { return transaction_ids_.begin(); } - auto end() { return transaction_ids_.end(); } - auto begin() const { return transaction_ids_.cbegin(); } - auto end() const { return transaction_ids_.cend(); } - - friend std::ostream &operator<<(std::ostream &stream, - const Snapshot &snapshot) { - stream << "Snapshot("; - utils::PrintIterable(stream, snapshot.transaction_ids_); - stream << ")"; - return stream; - } - - const auto &transaction_ids() const { return transaction_ids_; } - - private: - std::vector transaction_ids_; -}; - -} // namespace tx diff --git a/src/transactions/transaction.hpp b/src/transactions/transaction.hpp deleted file mode 100644 index 51bdb4d79..000000000 --- a/src/transactions/transaction.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include -#include -#include - -#include "data_structures/concurrent/concurrent_map.hpp" -#include "storage/common/locking/record_lock.hpp" -#include "transactions/lock_store.hpp" -#include "transactions/snapshot.hpp" -#include "transactions/type.hpp" -#include "utils/exceptions.hpp" - -namespace tx { - -/// Indicates an error in transaction handling (currently -/// only command id overflow). -class TransactionError : public utils::BasicException { - public: - using utils::BasicException::BasicException; -}; - -/// A database transaction. Encapsulates an atomic, abortable unit of work. Also -/// defines that all db ops are single-threaded within a single transaction -class Transaction final { - public: - /// Returns the maximum possible transcation id - static TransactionId MaxId() { - return std::numeric_limits::max(); - } - - private: - friend class Engine; - - // The constructor is private, only the Engine ever uses it. - Transaction(TransactionId id, const Snapshot &snapshot, Engine &engine, - bool blocking) - : id_(id), - engine_(engine), - snapshot_(snapshot), - blocking_(blocking) {} - - // A transaction can't be moved nor copied. it's owned by the transaction - // engine, and it's lifetime is managed by it. - Transaction(const Transaction &) = delete; - Transaction(Transaction &&) = delete; - Transaction &operator=(const Transaction &) = delete; - Transaction &operator=(Transaction &&) = delete; - - public: - /// Acquires the lock over the given RecordLock, preventing other transactions - /// from doing the same - /// @throw utils::LockTimeoutException - void TakeLock(RecordLock &lock) const { locks_.Take(&lock, *this, engine_); } - - /// Transaction's id. Unique in the engine that owns it - const TransactionId id_; - - /// The transaction engine to which this transaction belongs - Engine &engine_; - - /// Returns the current transaction's current command id - // TODO rename to cmd_id (variable and function - auto cid() const { return cid_; } - - /// Returns this transaction's snapshot. - const Snapshot &snapshot() const { return snapshot_; } - - /// Signal to transaction that it should abort. It doesn't really enforce that - /// transaction will abort, but it merely hints too the transaction that it is - /// preferable to stop its execution. - void set_should_abort() { should_abort_ = true; } - - bool should_abort() const { return should_abort_; } - - auto creation_time() const { return creation_time_; } - - auto blocking() const { return blocking_; } - - private: - /// Function used to advance the command. - /// @throw TransactionError - CommandId AdvanceCommand() { - if (cid_ == std::numeric_limits::max()) { - throw TransactionError( - "Reached maximum number of commands in this " - "transaction."); - } - return ++cid_; - } - - // Function used to set the command. - void SetCommand(CommandId cid) { cid_ = cid; } - - // Index of the current command in the current transaction. - CommandId cid_{1}; - - // A snapshot of currently active transactions. - const Snapshot snapshot_; - - // Record locks held by this transaction. - mutable LockStore locks_; - - // True if transaction should abort. Used to signal query executor that it - // should stop execution, it is only a hint, transaction can disobey. - std::atomic should_abort_{false}; - - // Creation time. - const std::chrono::time_point creation_time_{ - std::chrono::steady_clock::now()}; - - bool blocking_{false}; -}; -} // namespace tx diff --git a/src/transactions/type.hpp b/src/transactions/type.hpp deleted file mode 100644 index 9dd1a6617..000000000 --- a/src/transactions/type.hpp +++ /dev/null @@ -1,14 +0,0 @@ -/// @file - -#include - -// transcation and command types defined -// in a separate header to avoid cyclic dependencies -namespace tx { - - /// Type of a tx::Transcation's id member - using TransactionId = uint64_t; - - /// Type of a tx::Transcation's command id member - using CommandId = uint32_t; -} diff --git a/tests/unit/plan_pretty_print.cpp b/tests/unit/plan_pretty_print.cpp index 9c4741ee1..5b1a184b6 100644 --- a/tests/unit/plan_pretty_print.cpp +++ b/tests/unit/plan_pretty_print.cpp @@ -1,6 +1,5 @@ #include -#include "database/graph_db.hpp" #include "query/frontend/semantic/symbol_table.hpp" #include "query/plan/operator.hpp" #include "query/plan/pretty_print.hpp"