Remove old HA implementation
Reviewers: teon.banek, ipaljak Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2686
This commit is contained in:
parent
4e5a91e7fb
commit
bfbace8168
@ -17,112 +17,6 @@ if (MG_ENTERPRISE)
|
|||||||
add_subdirectory(rpc)
|
add_subdirectory(rpc)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
## ----------------------------------------------------------------------------
|
|
||||||
## Common LCP files
|
|
||||||
## ----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
#define_add_lcp(add_lcp_common lcp_common_cpp_files generated_lcp_common_files)
|
|
||||||
#
|
|
||||||
#add_lcp_common(query/frontend/ast/ast.lcp)
|
|
||||||
#add_lcp_common(query/frontend/semantic/symbol.lcp)
|
|
||||||
#add_lcp_common(query/plan/operator.lcp)
|
|
||||||
#
|
|
||||||
#add_custom_target(generate_lcp_common DEPENDS ${generated_lcp_common_files})
|
|
||||||
#
|
|
||||||
## ----------------------------------------------------------------------------
|
|
||||||
## END Common LCP files
|
|
||||||
## ----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
## ----------------------------------------------------------------------------
|
|
||||||
## Memgraph Single Node High Availability
|
|
||||||
## ----------------------------------------------------------------------------
|
|
||||||
#set(mg_single_node_ha_sources
|
|
||||||
# ${lcp_common_cpp_files}
|
|
||||||
# data_structures/concurrent/skiplist_gc.cpp
|
|
||||||
# database/single_node_ha/config.cpp
|
|
||||||
# database/single_node_ha/graph_db.cpp
|
|
||||||
# database/single_node_ha/graph_db_accessor.cpp
|
|
||||||
# durability/single_node_ha/state_delta.cpp
|
|
||||||
# durability/single_node_ha/paths.cpp
|
|
||||||
# durability/single_node_ha/snapshooter.cpp
|
|
||||||
# durability/single_node_ha/recovery.cpp
|
|
||||||
# glue/communication.cpp
|
|
||||||
# raft/coordination.cpp
|
|
||||||
# raft/raft_server.cpp
|
|
||||||
# raft/storage_info.cpp
|
|
||||||
# query/common.cpp
|
|
||||||
# query/frontend/ast/cypher_main_visitor.cpp
|
|
||||||
# query/frontend/ast/pretty_print.cpp
|
|
||||||
# query/frontend/parsing.cpp
|
|
||||||
# query/frontend/semantic/required_privileges.cpp
|
|
||||||
# query/frontend/semantic/symbol_generator.cpp
|
|
||||||
# query/frontend/stripped.cpp
|
|
||||||
# query/interpret/awesome_memgraph_functions.cpp
|
|
||||||
# query/interpreter.cpp
|
|
||||||
# query/plan/operator.cpp
|
|
||||||
# query/plan/preprocess.cpp
|
|
||||||
# query/plan/pretty_print.cpp
|
|
||||||
# query/plan/profile.cpp
|
|
||||||
# query/plan/rewrite/index_lookup.cpp
|
|
||||||
# query/plan/rule_based_planner.cpp
|
|
||||||
# query/plan/variable_start_planner.cpp
|
|
||||||
# query/procedure/mg_procedure_impl.cpp
|
|
||||||
# query/procedure/module.cpp
|
|
||||||
# query/typed_value.cpp
|
|
||||||
# storage/common/constraints/record.cpp
|
|
||||||
# storage/common/constraints/unique_constraints.cpp
|
|
||||||
# storage/common/types/slk.cpp
|
|
||||||
# storage/common/types/property_value_store.cpp
|
|
||||||
# storage/common/locking/record_lock.cpp
|
|
||||||
# storage/single_node_ha/edge_accessor.cpp
|
|
||||||
# storage/single_node_ha/record_accessor.cpp
|
|
||||||
# storage/single_node_ha/vertex_accessor.cpp
|
|
||||||
# transactions/single_node_ha/engine.cpp
|
|
||||||
# memgraph_init.cpp
|
|
||||||
#)
|
|
||||||
#if (MG_ENTERPRISE)
|
|
||||||
# set(mg_single_node_ha_sources
|
|
||||||
# ${mg_single_node_ha_sources}
|
|
||||||
# glue/auth.cpp)
|
|
||||||
#endif()
|
|
||||||
#
|
|
||||||
#define_add_lcp(add_lcp_single_node_ha mg_single_node_ha_sources generated_lcp_single_node_ha_files)
|
|
||||||
#
|
|
||||||
#add_lcp_single_node_ha(durability/single_node_ha/state_delta.lcp)
|
|
||||||
#add_lcp_single_node_ha(database/single_node_ha/serialization.lcp SLK_SERIALIZE
|
|
||||||
# DEPENDS durability/single_node_ha/state_delta.lcp)
|
|
||||||
#add_lcp_single_node_ha(raft/raft_rpc_messages.lcp SLK_SERIALIZE)
|
|
||||||
#add_lcp_single_node_ha(raft/log_entry.lcp SLK_SERIALIZE)
|
|
||||||
#add_lcp_single_node_ha(raft/snapshot_metadata.lcp SLK_SERIALIZE)
|
|
||||||
#add_lcp_single_node_ha(raft/storage_info_rpc_messages.lcp SLK_SERIALIZE)
|
|
||||||
#
|
|
||||||
#add_custom_target(generate_lcp_single_node_ha DEPENDS generate_lcp_common ${generated_lcp_single_node_ha_files})
|
|
||||||
#
|
|
||||||
#set(MG_SINGLE_NODE_HA_LIBS stdc++fs Threads::Threads fmt cppitertools
|
|
||||||
# antlr_opencypher_parser_lib dl glog gflags
|
|
||||||
# mg-utils mg-io mg-requests mg-communication mg-rpc
|
|
||||||
# mg-auth mg-audit)
|
|
||||||
#
|
|
||||||
#add_library(mg-single-node-ha STATIC ${mg_single_node_ha_sources})
|
|
||||||
#target_include_directories(mg-single-node-ha PUBLIC ${CMAKE_SOURCE_DIR}/include)
|
|
||||||
#target_link_libraries(mg-single-node-ha ${MG_SINGLE_NODE_HA_LIBS})
|
|
||||||
#add_dependencies(mg-single-node-ha generate_opencypher_parser)
|
|
||||||
#add_dependencies(mg-single-node-ha generate_lcp_single_node_ha)
|
|
||||||
#target_compile_definitions(mg-single-node-ha PUBLIC MG_SINGLE_NODE_HA)
|
|
||||||
## TODO: Make these symbols visible once we add support for custom procedure
|
|
||||||
## modules in HA.
|
|
||||||
## NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which
|
|
||||||
## should be dynamically exported, so that `dlopen` can correctly link the
|
|
||||||
## symbols in custom procedure module libraries.
|
|
||||||
## target_link_libraries(mg-single-node-ha "-Wl,--dynamic-list=${CMAKE_SOURCE_DIR}/include/mg_procedure.syms")
|
|
||||||
#
|
|
||||||
## ----------------------------------------------------------------------------
|
|
||||||
## END Memgraph Single Node High Availability
|
|
||||||
## ----------------------------------------------------------------------------
|
|
||||||
#
|
|
||||||
#add_custom_target(generate_lcp)
|
|
||||||
#add_dependencies(generate_lcp generate_lcp_single_node_ha)
|
|
||||||
|
|
||||||
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
|
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
|
||||||
|
|
||||||
# Generate a version.hpp file
|
# Generate a version.hpp file
|
||||||
@ -218,18 +112,3 @@ endif()
|
|||||||
# Create empty directories for default location of lib and log.
|
# Create empty directories for default location of lib and log.
|
||||||
install(CODE "file(MAKE_DIRECTORY \$ENV{DESTDIR}/var/log/memgraph
|
install(CODE "file(MAKE_DIRECTORY \$ENV{DESTDIR}/var/log/memgraph
|
||||||
\$ENV{DESTDIR}/var/lib/memgraph)")
|
\$ENV{DESTDIR}/var/lib/memgraph)")
|
||||||
|
|
||||||
|
|
||||||
## memgraph single node high availability executable
|
|
||||||
#add_executable(memgraph_ha memgraph_ha.cpp)
|
|
||||||
#target_link_libraries(memgraph_ha mg-single-node-ha mg-kvstore telemetry_lib)
|
|
||||||
#set_target_properties(memgraph_ha PROPERTIES
|
|
||||||
# # Set the executable output name to include version information.
|
|
||||||
# OUTPUT_NAME "memgraph_ha-${MEMGRAPH_VERSION}_${CMAKE_BUILD_TYPE}"
|
|
||||||
# # Output the executable in main binary dir.
|
|
||||||
# RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
|
||||||
## Create symlink to the built executable.
|
|
||||||
#add_custom_command(TARGET memgraph_ha POST_BUILD
|
|
||||||
# COMMAND ${CMAKE_COMMAND} -E create_symlink $<TARGET_FILE:memgraph_ha> ${CMAKE_BINARY_DIR}/memgraph_ha
|
|
||||||
# BYPRODUCTS ${CMAKE_BINARY_DIR}/memgraph_ha
|
|
||||||
# COMMENT "Creating symlink to memgraph single node high availability executable")
|
|
||||||
|
@ -1,5 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#ifdef MG_SINGLE_NODE_HA
|
|
||||||
#include "database/single_node_ha/graph_db.hpp"
|
|
||||||
#endif
|
|
@ -1,5 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#ifdef MG_SINGLE_NODE_HA
|
|
||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
#endif
|
|
@ -1,51 +0,0 @@
|
|||||||
#include "database/single_node_ha/config.hpp"
|
|
||||||
|
|
||||||
#include <limits>
|
|
||||||
#include <thread>
|
|
||||||
|
|
||||||
#include "utils/flag_validation.hpp"
|
|
||||||
#include "utils/string.hpp"
|
|
||||||
|
|
||||||
// Durability flags.
|
|
||||||
DEFINE_string(
|
|
||||||
durability_directory, "durability",
|
|
||||||
"Path to directory in which to save snapshots and write-ahead log files.");
|
|
||||||
DEFINE_bool(db_recover_on_startup, true, "Recover database on startup.");
|
|
||||||
|
|
||||||
// Misc flags
|
|
||||||
DEFINE_int32(query_execution_time_sec, 180,
|
|
||||||
"Maximum allowed query execution time. Queries exceeding this "
|
|
||||||
"limit will be aborted. Value of -1 means no limit.");
|
|
||||||
DEFINE_int32(gc_cycle_sec, 30,
|
|
||||||
"Amount of time between starts of two cleaning cycles in seconds. "
|
|
||||||
"-1 to turn off.");
|
|
||||||
// Data location.
|
|
||||||
DEFINE_string(properties_on_disk, "",
|
|
||||||
"Property names of properties which will be stored on available "
|
|
||||||
"disk. Property names have to be separated with comma (,).");
|
|
||||||
|
|
||||||
// High availability.
|
|
||||||
DEFINE_string(
|
|
||||||
coordination_config_file, "coordination.json",
|
|
||||||
"Path to the file containing coordination configuration in JSON format");
|
|
||||||
|
|
||||||
DEFINE_string(raft_config_file, "raft.json",
|
|
||||||
"Path to the file containing raft configuration in JSON format");
|
|
||||||
|
|
||||||
DEFINE_VALIDATED_int32(
|
|
||||||
server_id, 1U, "Id used in the coordination configuration for this machine",
|
|
||||||
FLAG_IN_RANGE(1, std::numeric_limits<uint16_t>::max()));
|
|
||||||
|
|
||||||
database::Config::Config()
|
|
||||||
// Durability flags.
|
|
||||||
: durability_directory{FLAGS_durability_directory},
|
|
||||||
db_recover_on_startup{FLAGS_db_recover_on_startup},
|
|
||||||
// Misc flags.
|
|
||||||
gc_cycle_sec{FLAGS_gc_cycle_sec},
|
|
||||||
query_execution_time_sec{FLAGS_query_execution_time_sec},
|
|
||||||
// Data location.
|
|
||||||
properties_on_disk(utils::Split(FLAGS_properties_on_disk, ",")),
|
|
||||||
// High availability.
|
|
||||||
coordination_config_file{FLAGS_coordination_config_file},
|
|
||||||
raft_config_file{FLAGS_raft_config_file},
|
|
||||||
server_id{static_cast<uint16_t>(FLAGS_server_id)} {}
|
|
@ -1,31 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
/// Database configuration. Initialized from flags, but modifiable.
|
|
||||||
struct Config {
|
|
||||||
Config();
|
|
||||||
|
|
||||||
// Durability flags.
|
|
||||||
std::string durability_directory;
|
|
||||||
bool db_recover_on_startup;
|
|
||||||
|
|
||||||
// Misc flags.
|
|
||||||
int gc_cycle_sec;
|
|
||||||
int query_execution_time_sec;
|
|
||||||
|
|
||||||
// set of properties which will be stored on disk
|
|
||||||
std::vector<std::string> properties_on_disk;
|
|
||||||
|
|
||||||
// HA flags.
|
|
||||||
std::string coordination_config_file;
|
|
||||||
std::string raft_config_file;
|
|
||||||
uint16_t server_id;
|
|
||||||
};
|
|
||||||
} // namespace database
|
|
@ -1,103 +0,0 @@
|
|||||||
#include "database/single_node_ha/graph_db.hpp"
|
|
||||||
|
|
||||||
#include <optional>
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
#include "storage/single_node_ha/concurrent_id_mapper.hpp"
|
|
||||||
#include "storage/single_node_ha/storage_gc.hpp"
|
|
||||||
#include "transactions/single_node_ha/engine.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
GraphDb::GraphDb(Config config) : config_(config) {}
|
|
||||||
|
|
||||||
void GraphDb::Start() {
|
|
||||||
utils::EnsureDirOrDie(config_.durability_directory);
|
|
||||||
raft_server_.Start();
|
|
||||||
storage_info_.Start();
|
|
||||||
CHECK(coordination_.Start()) << "Couldn't start coordination!";
|
|
||||||
|
|
||||||
// Start transaction killer.
|
|
||||||
if (config_.query_execution_time_sec != -1) {
|
|
||||||
transaction_killer_.Run(
|
|
||||||
"TX killer",
|
|
||||||
std::chrono::seconds(
|
|
||||||
std::max(1, std::min(5, config_.query_execution_time_sec / 4))),
|
|
||||||
[this]() {
|
|
||||||
tx_engine_.LocalForEachActiveTransaction([this](tx::Transaction &t) {
|
|
||||||
if (t.creation_time() +
|
|
||||||
std::chrono::seconds(config_.query_execution_time_sec) <
|
|
||||||
std::chrono::steady_clock::now()) {
|
|
||||||
t.set_should_abort();
|
|
||||||
};
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDb::AwaitShutdown(std::function<void(void)> call_before_shutdown) {
|
|
||||||
coordination_.AwaitShutdown([this, &call_before_shutdown]() {
|
|
||||||
tx_engine_.LocalForEachActiveTransaction(
|
|
||||||
[](auto &t) { t.set_should_abort(); });
|
|
||||||
|
|
||||||
call_before_shutdown();
|
|
||||||
|
|
||||||
raft_server_.Shutdown();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDb::Shutdown() { coordination_.Shutdown(); }
|
|
||||||
|
|
||||||
GraphDbAccessor GraphDb::Access() {
|
|
||||||
return GraphDbAccessor(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
GraphDbAccessor GraphDb::Access(tx::TransactionId tx_id) {
|
|
||||||
return GraphDbAccessor(this, tx_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
GraphDbAccessor GraphDb::AccessBlocking(
|
|
||||||
std::optional<tx::TransactionId> parent_tx) {
|
|
||||||
return GraphDbAccessor(this, parent_tx);
|
|
||||||
}
|
|
||||||
|
|
||||||
Storage &GraphDb::storage() { return *storage_; }
|
|
||||||
|
|
||||||
raft::RaftInterface *GraphDb::raft() { return &raft_server_; }
|
|
||||||
|
|
||||||
raft::StorageInfo *GraphDb::storage_info() { return &storage_info_; }
|
|
||||||
|
|
||||||
tx::Engine &GraphDb::tx_engine() { return tx_engine_; }
|
|
||||||
|
|
||||||
storage::StateDeltaBuffer *GraphDb::sd_buffer() { return &sd_buffer_; }
|
|
||||||
|
|
||||||
storage::ConcurrentIdMapper<storage::Label> &GraphDb::label_mapper() {
|
|
||||||
return label_mapper_;
|
|
||||||
}
|
|
||||||
|
|
||||||
storage::ConcurrentIdMapper<storage::EdgeType> &GraphDb::edge_type_mapper() {
|
|
||||||
return edge_mapper_;
|
|
||||||
}
|
|
||||||
|
|
||||||
storage::ConcurrentIdMapper<storage::Property> &GraphDb::property_mapper() {
|
|
||||||
return property_mapper_;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDb::CollectGarbage() { storage_gc_->CollectGarbage(); }
|
|
||||||
|
|
||||||
void GraphDb::Reset() {
|
|
||||||
// Release gc scheduler to stop it from touching storage.
|
|
||||||
storage_gc_ = nullptr;
|
|
||||||
|
|
||||||
// This will make all active transactions to abort and reset the internal
|
|
||||||
// state.
|
|
||||||
tx_engine_.Reset();
|
|
||||||
|
|
||||||
storage_ = std::make_unique<Storage>(config_.properties_on_disk);
|
|
||||||
storage_gc_ = std::make_unique<StorageGc>(
|
|
||||||
*storage_, tx_engine_, &raft_server_, config_.gc_cycle_sec);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace database
|
|
@ -1,156 +0,0 @@
|
|||||||
/// @file
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <memory>
|
|
||||||
#include <optional>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "database/single_node_ha/config.hpp"
|
|
||||||
#include "io/network/endpoint.hpp"
|
|
||||||
#include "raft/coordination.hpp"
|
|
||||||
#include "raft/raft_server.hpp"
|
|
||||||
#include "raft/storage_info.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/single_node_ha/concurrent_id_mapper.hpp"
|
|
||||||
#include "storage/single_node_ha/state_delta_buffer.hpp"
|
|
||||||
#include "storage/single_node_ha/storage.hpp"
|
|
||||||
#include "storage/single_node_ha/storage_gc.hpp"
|
|
||||||
#include "transactions/single_node_ha/engine.hpp"
|
|
||||||
#include "utils/scheduler.hpp"
|
|
||||||
#include "utils/stat.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
/// Struct containing basic statistics about storage.
|
|
||||||
struct Stat {
|
|
||||||
// std::atomic<int64_t> is needed as reference to stat is passed to
|
|
||||||
// other threads. If there were no std::atomic we couldn't guarantee
|
|
||||||
// that a change to any member will be visible to other threads.
|
|
||||||
|
|
||||||
/// Vertex count is number of `VersionList<Vertex>` physically stored.
|
|
||||||
std::atomic<int64_t> vertex_count{0};
|
|
||||||
|
|
||||||
/// Vertex count is number of `VersionList<Edge>` physically stored.
|
|
||||||
std::atomic<int64_t> edge_count{0};
|
|
||||||
|
|
||||||
/// Average in/out degree of a vertex.
|
|
||||||
/// `avg_degree` is calculated as 2 * `edges_count` / `vertex_count`.
|
|
||||||
std::atomic<double> avg_degree{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
class GraphDbAccessor;
|
|
||||||
|
|
||||||
/// An abstract base class providing the interface for a graph database.
|
|
||||||
///
|
|
||||||
/// Always be sure that GraphDb object is destructed before main exits, i. e.
|
|
||||||
/// GraphDb object shouldn't be part of global/static variable, except if its
|
|
||||||
/// destructor is explicitly called before main exits. Consider code:
|
|
||||||
///
|
|
||||||
/// GraphDb db; // KeyIndex is created as a part of database::Storage
|
|
||||||
/// int main() {
|
|
||||||
/// GraphDbAccessor dba(db);
|
|
||||||
/// auto v = dba.InsertVertex();
|
|
||||||
/// v.add_label(dba.Label(
|
|
||||||
/// "Start")); // New SkipList is created in KeyIndex for LabelIndex.
|
|
||||||
/// // That SkipList creates SkipListGc which
|
|
||||||
/// // initialises static Executor object.
|
|
||||||
/// return 0;
|
|
||||||
/// }
|
|
||||||
///
|
|
||||||
/// After main exits: 1. Executor is destructed, 2. KeyIndex is destructed.
|
|
||||||
/// Destructor of KeyIndex calls delete on created SkipLists which destroy
|
|
||||||
/// SkipListGc that tries to use Excutioner object that doesn't exist anymore.
|
|
||||||
/// -> CRASH
|
|
||||||
class GraphDb {
|
|
||||||
public:
|
|
||||||
explicit GraphDb(Config config = Config());
|
|
||||||
|
|
||||||
GraphDb(const GraphDb &) = delete;
|
|
||||||
GraphDb(GraphDb &&) = delete;
|
|
||||||
GraphDb &operator=(const GraphDb &) = delete;
|
|
||||||
GraphDb &operator=(GraphDb &&) = delete;
|
|
||||||
|
|
||||||
void Start();
|
|
||||||
void AwaitShutdown(std::function<void(void)> call_before_shutdown);
|
|
||||||
void Shutdown();
|
|
||||||
|
|
||||||
/// Create a new accessor by starting a new transaction.
|
|
||||||
GraphDbAccessor Access();
|
|
||||||
GraphDbAccessor AccessBlocking(std::optional<tx::TransactionId> parent_tx);
|
|
||||||
/// Create an accessor for a running transaction.
|
|
||||||
GraphDbAccessor Access(tx::TransactionId);
|
|
||||||
|
|
||||||
Storage &storage();
|
|
||||||
raft::RaftInterface *raft();
|
|
||||||
raft::StorageInfo *storage_info();
|
|
||||||
tx::Engine &tx_engine();
|
|
||||||
storage::StateDeltaBuffer *sd_buffer();
|
|
||||||
storage::ConcurrentIdMapper<storage::Label> &label_mapper();
|
|
||||||
storage::ConcurrentIdMapper<storage::EdgeType> &edge_type_mapper();
|
|
||||||
storage::ConcurrentIdMapper<storage::Property> &property_mapper();
|
|
||||||
void CollectGarbage();
|
|
||||||
|
|
||||||
/// Releases the storage object safely and creates a new object, resets the tx
|
|
||||||
/// engine.
|
|
||||||
///
|
|
||||||
/// This is needed in HA during the leader -> follower transition where we
|
|
||||||
/// might end up with some stale transactions on the leader.
|
|
||||||
void Reset();
|
|
||||||
|
|
||||||
/// Get live view of storage stats. Gets updated on RefreshStat.
|
|
||||||
const Stat &GetStat() const { return stat_; }
|
|
||||||
|
|
||||||
/// Updates storage stats.
|
|
||||||
void RefreshStat() {
|
|
||||||
auto vertex_count = storage().vertices_.access().size();
|
|
||||||
auto edge_count = storage().edges_.access().size();
|
|
||||||
|
|
||||||
stat_.vertex_count = vertex_count;
|
|
||||||
stat_.edge_count = edge_count;
|
|
||||||
|
|
||||||
if (vertex_count != 0) {
|
|
||||||
stat_.avg_degree = 2 * static_cast<double>(edge_count) / vertex_count;
|
|
||||||
} else {
|
|
||||||
stat_.avg_degree = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the number of bytes used by the durability directory on disk.
|
|
||||||
uint64_t GetDurabilityDirDiskUsage() const {
|
|
||||||
return utils::GetDirDiskUsage(config_.durability_directory);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
Stat stat_;
|
|
||||||
|
|
||||||
utils::Scheduler transaction_killer_;
|
|
||||||
|
|
||||||
Config config_;
|
|
||||||
std::unique_ptr<Storage> storage_ =
|
|
||||||
std::make_unique<Storage>(config_.properties_on_disk);
|
|
||||||
raft::Coordination coordination_{
|
|
||||||
config_.server_id,
|
|
||||||
raft::LoadNodesFromFile(config_.coordination_config_file)};
|
|
||||||
raft::RaftServer raft_server_{
|
|
||||||
config_.server_id,
|
|
||||||
config_.durability_directory,
|
|
||||||
config_.db_recover_on_startup,
|
|
||||||
raft::Config::LoadFromFile(config_.raft_config_file),
|
|
||||||
&coordination_,
|
|
||||||
this};
|
|
||||||
raft::StorageInfo storage_info_{this, &coordination_, config_.server_id};
|
|
||||||
storage::StateDeltaBuffer sd_buffer_;
|
|
||||||
|
|
||||||
tx::Engine tx_engine_{&raft_server_, &sd_buffer_};
|
|
||||||
std::unique_ptr<StorageGc> storage_gc_ = std::make_unique<StorageGc>(
|
|
||||||
*storage_, tx_engine_, &raft_server_, config_.gc_cycle_sec);
|
|
||||||
storage::ConcurrentIdMapper<storage::Label> label_mapper_{
|
|
||||||
storage_->PropertiesOnDisk()};
|
|
||||||
storage::ConcurrentIdMapper<storage::EdgeType> edge_mapper_{
|
|
||||||
storage_->PropertiesOnDisk()};
|
|
||||||
storage::ConcurrentIdMapper<storage::Property> property_mapper_{
|
|
||||||
storage_->PropertiesOnDisk()};
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace database
|
|
@ -1,556 +0,0 @@
|
|||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
#include <thread>
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
#include "storage/common/constraints/exceptions.hpp"
|
|
||||||
#include "storage/single_node_ha/edge.hpp"
|
|
||||||
#include "storage/single_node_ha/edge_accessor.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex_accessor.hpp"
|
|
||||||
#include "utils/cast.hpp"
|
|
||||||
#include "utils/on_scope_exit.hpp"
|
|
||||||
#include "utils/stat.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
GraphDbAccessor::GraphDbAccessor(GraphDb *db)
|
|
||||||
: db_(db),
|
|
||||||
transaction_(db->tx_engine().Begin()),
|
|
||||||
transaction_starter_{true} {}
|
|
||||||
|
|
||||||
GraphDbAccessor::GraphDbAccessor(GraphDb *db, tx::TransactionId tx_id)
|
|
||||||
: db_(db),
|
|
||||||
transaction_(db->tx_engine().RunningTransaction(tx_id)),
|
|
||||||
transaction_starter_{false} {}
|
|
||||||
|
|
||||||
GraphDbAccessor::GraphDbAccessor(GraphDb *db,
|
|
||||||
std::optional<tx::TransactionId> parent_tx)
|
|
||||||
: db_(db),
|
|
||||||
transaction_(db->tx_engine().BeginBlocking(parent_tx)),
|
|
||||||
transaction_starter_{true} {}
|
|
||||||
|
|
||||||
GraphDbAccessor::GraphDbAccessor(GraphDbAccessor &&other)
|
|
||||||
: db_(other.db_),
|
|
||||||
transaction_(other.transaction_),
|
|
||||||
transaction_starter_(other.transaction_starter_),
|
|
||||||
commited_(other.commited_),
|
|
||||||
aborted_(other.aborted_) {
|
|
||||||
// Make sure that the other transaction isn't a transaction starter so that
|
|
||||||
// its destructor doesn't close the transaction.
|
|
||||||
other.transaction_starter_ = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
GraphDbAccessor &GraphDbAccessor::operator=(GraphDbAccessor &&other) {
|
|
||||||
db_ = other.db_;
|
|
||||||
transaction_ = other.transaction_;
|
|
||||||
transaction_starter_ = other.transaction_starter_;
|
|
||||||
commited_ = other.commited_;
|
|
||||||
aborted_ = other.aborted_;
|
|
||||||
|
|
||||||
// Make sure that the other transaction isn't a transaction starter so that
|
|
||||||
// its destructor doesn't close the transaction.
|
|
||||||
other.transaction_starter_ = false;
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
GraphDbAccessor::~GraphDbAccessor() {
|
|
||||||
if (transaction_starter_ && !commited_ && !aborted_) {
|
|
||||||
this->Abort();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tx::TransactionId GraphDbAccessor::transaction_id() const {
|
|
||||||
return transaction_->id_;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::AdvanceCommand() {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
db_->tx_engine().Advance(transaction_->id_);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::Commit() {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Already aborted or commited transaction.";
|
|
||||||
db_->tx_engine().Commit(*transaction_);
|
|
||||||
commited_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::Abort() {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Already aborted or commited transaction.";
|
|
||||||
db_->tx_engine().Abort(*transaction_);
|
|
||||||
aborted_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool GraphDbAccessor::should_abort() const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return transaction_->should_abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
raft::RaftInterface *GraphDbAccessor::raft() {
|
|
||||||
return db_->raft();
|
|
||||||
}
|
|
||||||
|
|
||||||
storage::StateDeltaBuffer *GraphDbAccessor::sd_buffer() {
|
|
||||||
return db_->sd_buffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
VertexAccessor GraphDbAccessor::InsertVertex(
|
|
||||||
std::optional<storage::Gid> requested_gid) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
|
|
||||||
auto gid = db_->storage().vertex_generator_.Next(requested_gid);
|
|
||||||
auto vertex_vlist = new mvcc::VersionList<Vertex>(*transaction_, gid);
|
|
||||||
|
|
||||||
bool success =
|
|
||||||
db_->storage().vertices_.access().insert(gid, vertex_vlist).second;
|
|
||||||
CHECK(success) << "Attempting to insert a vertex with an existing GID: "
|
|
||||||
<< gid.AsUint();
|
|
||||||
sd_buffer()->Emplace(
|
|
||||||
database::StateDelta::CreateVertex(transaction_->id_, vertex_vlist->gid_));
|
|
||||||
auto va = VertexAccessor(vertex_vlist, *this);
|
|
||||||
return va;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<VertexAccessor> GraphDbAccessor::FindVertexOptional(
|
|
||||||
storage::Gid gid, bool current_state) {
|
|
||||||
VertexAccessor record_accessor(db_->storage().LocalAddress<Vertex>(gid),
|
|
||||||
*this);
|
|
||||||
if (!record_accessor.Visible(transaction(), current_state))
|
|
||||||
return std::nullopt;
|
|
||||||
return record_accessor;
|
|
||||||
}
|
|
||||||
|
|
||||||
VertexAccessor GraphDbAccessor::FindVertex(storage::Gid gid,
|
|
||||||
bool current_state) {
|
|
||||||
auto found = FindVertexOptional(gid, current_state);
|
|
||||||
CHECK(found) << "Unable to find vertex for id: " << gid.AsUint();
|
|
||||||
return *found;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<EdgeAccessor> GraphDbAccessor::FindEdgeOptional(
|
|
||||||
storage::Gid gid, bool current_state) {
|
|
||||||
EdgeAccessor record_accessor(db_->storage().LocalAddress<Edge>(gid), *this);
|
|
||||||
if (!record_accessor.Visible(transaction(), current_state))
|
|
||||||
return std::nullopt;
|
|
||||||
return record_accessor;
|
|
||||||
}
|
|
||||||
|
|
||||||
EdgeAccessor GraphDbAccessor::FindEdge(storage::Gid gid, bool current_state) {
|
|
||||||
auto found = FindEdgeOptional(gid, current_state);
|
|
||||||
CHECK(found) << "Unable to find edge for id: " << gid.AsUint();
|
|
||||||
return *found;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::BuildIndex(storage::Label label,
|
|
||||||
storage::Property property) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
|
|
||||||
// Create the index
|
|
||||||
const LabelPropertyIndex::Key key(label, property);
|
|
||||||
if (db_->storage().label_property_index_.CreateIndex(key) == false) {
|
|
||||||
throw IndexExistsException(
|
|
||||||
"Index is either being created by another transaction or already "
|
|
||||||
"exists.");
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
auto dba = db_->AccessBlocking(std::make_optional(transaction_->id_));
|
|
||||||
|
|
||||||
dba.PopulateIndex(key);
|
|
||||||
dba.EnableIndex(key);
|
|
||||||
dba.Commit();
|
|
||||||
} catch (const tx::TransactionEngineError &e) {
|
|
||||||
db_->storage().label_property_index_.DeleteIndex(key);
|
|
||||||
throw TransactionException(e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::EnableIndex(const LabelPropertyIndex::Key &key) {
|
|
||||||
// Commit transaction as we finished applying method on newest visible
|
|
||||||
// records. Write that transaction's ID to the RaftServer as the index has
|
|
||||||
// been built at this point even if this DBA's transaction aborts for some
|
|
||||||
// reason.
|
|
||||||
sd_buffer()->Emplace(database::StateDelta::BuildIndex(
|
|
||||||
transaction_id(), key.label_, LabelName(key.label_), key.property_,
|
|
||||||
PropertyName(key.property_)));
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::PopulateIndex(const LabelPropertyIndex::Key &key) {
|
|
||||||
for (auto vertex : Vertices(key.label_, false)) {
|
|
||||||
if (vertex.PropsAt(key.property_).type() == PropertyValue::Type::Null)
|
|
||||||
continue;
|
|
||||||
db_->storage().label_property_index_.UpdateOnLabelProperty(vertex.address(),
|
|
||||||
vertex.current_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::DeleteIndex(storage::Label label,
|
|
||||||
storage::Property property) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
|
|
||||||
LabelPropertyIndex::Key key(label, property);
|
|
||||||
try {
|
|
||||||
auto dba = db_->AccessBlocking(std::make_optional(transaction_->id_));
|
|
||||||
|
|
||||||
db_->storage().label_property_index_.DeleteIndex(key);
|
|
||||||
dba.sd_buffer()->Emplace(database::StateDelta::DropIndex(
|
|
||||||
dba.transaction_id(), key.label_, LabelName(key.label_), key.property_,
|
|
||||||
PropertyName(key.property_)));
|
|
||||||
|
|
||||||
dba.Commit();
|
|
||||||
} catch (const tx::TransactionEngineError &e) {
|
|
||||||
throw TransactionException(e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::BuildUniqueConstraint(
|
|
||||||
storage::Label label, const std::vector<storage::Property> &properties) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
|
|
||||||
storage::constraints::ConstraintEntry entry{label, properties};
|
|
||||||
if (!db_->storage().unique_constraints_.AddConstraint(entry)) {
|
|
||||||
// Already exists
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
auto dba = db_->AccessBlocking(std::make_optional(transaction().id_));
|
|
||||||
|
|
||||||
for (auto v : dba.Vertices(false)) {
|
|
||||||
if (std::find(v.labels().begin(), v.labels().end(), label) !=
|
|
||||||
v.labels().end()) {
|
|
||||||
db_->storage().unique_constraints_.Update(v, dba.transaction());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> property_names(properties.size());
|
|
||||||
std::transform(properties.begin(), properties.end(), property_names.begin(),
|
|
||||||
[&dba](storage::Property property) {
|
|
||||||
return dba.PropertyName(property);
|
|
||||||
});
|
|
||||||
|
|
||||||
dba.sd_buffer()->Emplace(database::StateDelta::BuildUniqueConstraint(
|
|
||||||
dba.transaction().id_, label, dba.LabelName(label), properties,
|
|
||||||
property_names));
|
|
||||||
|
|
||||||
dba.Commit();
|
|
||||||
|
|
||||||
} catch (const tx::TransactionEngineError &e) {
|
|
||||||
db_->storage().unique_constraints_.RemoveConstraint(entry);
|
|
||||||
throw TransactionException(e.what());
|
|
||||||
} catch (const storage::constraints::ViolationException &e) {
|
|
||||||
db_->storage().unique_constraints_.RemoveConstraint(entry);
|
|
||||||
throw ConstraintViolationException(e.what());
|
|
||||||
} catch (const storage::constraints::SerializationException &e) {
|
|
||||||
db_->storage().unique_constraints_.RemoveConstraint(entry);
|
|
||||||
throw mvcc::SerializationError();
|
|
||||||
} catch (...) {
|
|
||||||
db_->storage().unique_constraints_.RemoveConstraint(entry);
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::DeleteUniqueConstraint(
|
|
||||||
storage::Label label, const std::vector<storage::Property> &properties) {
|
|
||||||
storage::constraints::ConstraintEntry entry{label, properties};
|
|
||||||
try {
|
|
||||||
auto dba = db_->AccessBlocking(std::make_optional(transaction().id_));
|
|
||||||
|
|
||||||
if (!db_->storage().unique_constraints_.RemoveConstraint(entry)) {
|
|
||||||
// Nothing was deleted
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> property_names(properties.size());
|
|
||||||
std::transform(properties.begin(), properties.end(), property_names.begin(),
|
|
||||||
[&dba](storage::Property property) {
|
|
||||||
return dba.PropertyName(property);
|
|
||||||
});
|
|
||||||
|
|
||||||
dba.sd_buffer()->Emplace(database::StateDelta::DropUniqueConstraint(
|
|
||||||
dba.transaction().id_, label, dba.LabelName(label), properties,
|
|
||||||
property_names));
|
|
||||||
|
|
||||||
dba.Commit();
|
|
||||||
} catch (const tx::TransactionEngineError &e) {
|
|
||||||
throw TransactionException(e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<storage::constraints::ConstraintEntry>
|
|
||||||
GraphDbAccessor::ListUniqueConstraints() const {
|
|
||||||
return db_->storage().unique_constraints_.ListConstraints();
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::UpdateOnAddLabel(storage::Label label,
|
|
||||||
const VertexAccessor &vertex_accessor,
|
|
||||||
const Vertex *vertex) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
auto *vlist_ptr = vertex_accessor.address();
|
|
||||||
|
|
||||||
try {
|
|
||||||
db_->storage().unique_constraints_.UpdateOnAddLabel(label, vertex_accessor,
|
|
||||||
transaction());
|
|
||||||
} catch (const storage::constraints::SerializationException &e) {
|
|
||||||
throw mvcc::SerializationError();
|
|
||||||
} catch (const storage::constraints::ViolationException &e) {
|
|
||||||
throw ConstraintViolationException(e.what());
|
|
||||||
}
|
|
||||||
|
|
||||||
db_->storage().label_property_index_.UpdateOnLabel(label, vlist_ptr, vertex);
|
|
||||||
db_->storage().labels_index_.Update(label, vlist_ptr, vertex);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::UpdateOnRemoveLabel(
|
|
||||||
storage::Label label, const RecordAccessor<Vertex> &accessor) {
|
|
||||||
db_->storage().unique_constraints_.UpdateOnRemoveLabel(label, accessor,
|
|
||||||
transaction());
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::UpdateOnAddProperty(
|
|
||||||
storage::Property property, const PropertyValue &previous_value,
|
|
||||||
const PropertyValue &new_value,
|
|
||||||
const RecordAccessor<Vertex> &vertex_accessor, const Vertex *vertex) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
|
|
||||||
try {
|
|
||||||
db_->storage().unique_constraints_.UpdateOnAddProperty(
|
|
||||||
property, previous_value, new_value, vertex_accessor, transaction());
|
|
||||||
} catch (const storage::constraints::SerializationException &e) {
|
|
||||||
throw mvcc::SerializationError();
|
|
||||||
} catch (const storage::constraints::ViolationException &e) {
|
|
||||||
throw ConstraintViolationException(e.what());
|
|
||||||
}
|
|
||||||
|
|
||||||
db_->storage().label_property_index_.UpdateOnProperty(
|
|
||||||
property, vertex_accessor.address(), vertex);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::UpdateOnRemoveProperty(
|
|
||||||
storage::Property property, const PropertyValue &previous_value,
|
|
||||||
const RecordAccessor<Vertex> &accessor, const Vertex *vertex) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
|
|
||||||
try {
|
|
||||||
db_->storage().unique_constraints_.UpdateOnRemoveProperty(
|
|
||||||
property, previous_value, accessor, transaction());
|
|
||||||
} catch (const storage::constraints::SerializationException &e) {
|
|
||||||
throw mvcc::SerializationError();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t GraphDbAccessor::VerticesCount() const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->storage().vertices_.access().size();
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t GraphDbAccessor::VerticesCount(storage::Label label) const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->storage().labels_index_.Count(label);
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t GraphDbAccessor::VerticesCount(storage::Label label,
|
|
||||||
storage::Property property) const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
const LabelPropertyIndex::Key key(label, property);
|
|
||||||
DCHECK(db_->storage().label_property_index_.IndexExists(key))
|
|
||||||
<< "Index doesn't exist.";
|
|
||||||
return db_->storage().label_property_index_.Count(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t GraphDbAccessor::VerticesCount(storage::Label label,
|
|
||||||
storage::Property property,
|
|
||||||
const PropertyValue &value) const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
const LabelPropertyIndex::Key key(label, property);
|
|
||||||
DCHECK(db_->storage().label_property_index_.IndexExists(key))
|
|
||||||
<< "Index doesn't exist.";
|
|
||||||
return db_->storage()
|
|
||||||
.label_property_index_.PositionAndCount(key, value)
|
|
||||||
.second;
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t GraphDbAccessor::VerticesCount(
|
|
||||||
storage::Label label, storage::Property property,
|
|
||||||
const std::optional<utils::Bound<PropertyValue>> lower,
|
|
||||||
const std::optional<utils::Bound<PropertyValue>> upper) const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
const LabelPropertyIndex::Key key(label, property);
|
|
||||||
DCHECK(db_->storage().label_property_index_.IndexExists(key))
|
|
||||||
<< "Index doesn't exist.";
|
|
||||||
CHECK(lower || upper) << "At least one bound must be provided";
|
|
||||||
CHECK(!lower || lower.value().value().type() != PropertyValue::Type::Null)
|
|
||||||
<< "Null value is not a valid index bound";
|
|
||||||
CHECK(!upper || upper.value().value().type() != PropertyValue::Type::Null)
|
|
||||||
<< "Null value is not a valid index bound";
|
|
||||||
|
|
||||||
if (!upper) {
|
|
||||||
auto lower_pac = db_->storage().label_property_index_.PositionAndCount(
|
|
||||||
key, lower.value().value());
|
|
||||||
int64_t size = db_->storage().label_property_index_.Count(key);
|
|
||||||
return std::max(0l,
|
|
||||||
size - lower_pac.first -
|
|
||||||
(lower.value().IsInclusive() ? 0l : lower_pac.second));
|
|
||||||
|
|
||||||
} else if (!lower) {
|
|
||||||
auto upper_pac = db_->storage().label_property_index_.PositionAndCount(
|
|
||||||
key, upper.value().value());
|
|
||||||
return upper.value().IsInclusive() ? upper_pac.first + upper_pac.second
|
|
||||||
: upper_pac.first;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
auto lower_pac = db_->storage().label_property_index_.PositionAndCount(
|
|
||||||
key, lower.value().value());
|
|
||||||
auto upper_pac = db_->storage().label_property_index_.PositionAndCount(
|
|
||||||
key, upper.value().value());
|
|
||||||
auto result = upper_pac.first - lower_pac.first;
|
|
||||||
if (lower.value().IsExclusive()) result -= lower_pac.second;
|
|
||||||
if (upper.value().IsInclusive()) result += upper_pac.second;
|
|
||||||
return std::max(0l, result);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool GraphDbAccessor::RemoveVertex(VertexAccessor &vertex_accessor,
|
|
||||||
bool check_empty) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
vertex_accessor.SwitchNew();
|
|
||||||
// it's possible the vertex was removed already in this transaction
|
|
||||||
// due to it getting matched multiple times by some patterns
|
|
||||||
// we can only delete it once, so check if it's already deleted
|
|
||||||
if (vertex_accessor.current().is_expired_by(*transaction_)) return true;
|
|
||||||
if (check_empty &&
|
|
||||||
vertex_accessor.out_degree() + vertex_accessor.in_degree() > 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
auto *vlist_ptr = vertex_accessor.address();
|
|
||||||
sd_buffer()->Emplace(database::StateDelta::RemoveVertex(
|
|
||||||
transaction_->id_, vlist_ptr->gid_, check_empty));
|
|
||||||
vlist_ptr->remove(vertex_accessor.current_, *transaction_);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::DetachRemoveVertex(VertexAccessor &vertex_accessor) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
|
|
||||||
vertex_accessor.SwitchNew();
|
|
||||||
|
|
||||||
// Note that when we call RemoveEdge we must take care not to delete from the
|
|
||||||
// collection we are iterating over. This invalidates the iterator in a subtle
|
|
||||||
// way that does not fail in tests, but is NOT correct.
|
|
||||||
for (auto edge_accessor : vertex_accessor.in())
|
|
||||||
RemoveEdge(edge_accessor, true, false);
|
|
||||||
vertex_accessor.SwitchNew();
|
|
||||||
for (auto edge_accessor : vertex_accessor.out())
|
|
||||||
RemoveEdge(edge_accessor, false, true);
|
|
||||||
|
|
||||||
RemoveVertex(vertex_accessor, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
EdgeAccessor GraphDbAccessor::InsertEdge(
|
|
||||||
VertexAccessor &from, VertexAccessor &to, storage::EdgeType edge_type,
|
|
||||||
std::optional<storage::Gid> requested_gid) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
auto gid = db_->storage().edge_generator_.Next(requested_gid);
|
|
||||||
auto edge_vlist = new mvcc::VersionList<Edge>(
|
|
||||||
*transaction_, gid, from.address(), to.address(), edge_type);
|
|
||||||
// We need to insert edge_vlist to edges_ before calling update since update
|
|
||||||
// can throw and edge_vlist will not be garbage collected if it is not in
|
|
||||||
// edges_ skiplist.
|
|
||||||
bool success = db_->storage().edges_.access().insert(gid, edge_vlist).second;
|
|
||||||
CHECK(success) << "Attempting to insert an edge with an existing GID: "
|
|
||||||
<< gid.AsUint();
|
|
||||||
|
|
||||||
// ensure that the "from" accessor has the latest version
|
|
||||||
from.SwitchNew();
|
|
||||||
from.update().out_.emplace(to.address(), edge_vlist, edge_type);
|
|
||||||
|
|
||||||
// ensure that the "to" accessor has the latest version (Switch new)
|
|
||||||
// WARNING: must do that after the above "from.update()" for cases when
|
|
||||||
// we are creating a cycle and "from" and "to" are the same vlist
|
|
||||||
to.SwitchNew();
|
|
||||||
to.update().in_.emplace(from.address(), edge_vlist, edge_type);
|
|
||||||
|
|
||||||
sd_buffer()->Emplace(database::StateDelta::CreateEdge(
|
|
||||||
transaction_->id_, edge_vlist->gid_, from.gid(), to.gid(), edge_type,
|
|
||||||
EdgeTypeName(edge_type)));
|
|
||||||
|
|
||||||
return EdgeAccessor(edge_vlist, *this, from.address(), to.address(),
|
|
||||||
edge_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
int64_t GraphDbAccessor::EdgesCount() const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->storage().edges_.access().size();
|
|
||||||
}
|
|
||||||
|
|
||||||
void GraphDbAccessor::RemoveEdge(EdgeAccessor &edge, bool remove_out_edge,
|
|
||||||
bool remove_in_edge) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
// it's possible the edge was removed already in this transaction
|
|
||||||
// due to it getting matched multiple times by some patterns
|
|
||||||
// we can only delete it once, so check if it's already deleted
|
|
||||||
edge.SwitchNew();
|
|
||||||
if (edge.current().is_expired_by(*transaction_)) return;
|
|
||||||
if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
|
|
||||||
if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
|
|
||||||
|
|
||||||
edge.address()->remove(edge.current_, *transaction_);
|
|
||||||
sd_buffer()->Emplace(
|
|
||||||
database::StateDelta::RemoveEdge(transaction_->id_, edge.gid()));
|
|
||||||
}
|
|
||||||
|
|
||||||
storage::Label GraphDbAccessor::Label(const std::string &label_name) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->label_mapper().value_to_id(label_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string &GraphDbAccessor::LabelName(storage::Label label) const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->label_mapper().id_to_value(label);
|
|
||||||
}
|
|
||||||
|
|
||||||
storage::EdgeType GraphDbAccessor::EdgeType(const std::string &edge_type_name) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->edge_type_mapper().value_to_id(edge_type_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string &GraphDbAccessor::EdgeTypeName(
|
|
||||||
storage::EdgeType edge_type) const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->edge_type_mapper().id_to_value(edge_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
storage::Property GraphDbAccessor::Property(const std::string &property_name) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->property_mapper().value_to_id(property_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string &GraphDbAccessor::PropertyName(
|
|
||||||
storage::Property property) const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->property_mapper().id_to_value(property);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> GraphDbAccessor::IndexInfo() const {
|
|
||||||
std::vector<std::string> info;
|
|
||||||
for (storage::Label label : db_->storage().labels_index_.Keys()) {
|
|
||||||
info.emplace_back(":" + LabelName(label));
|
|
||||||
}
|
|
||||||
for (LabelPropertyIndex::Key key :
|
|
||||||
db_->storage().label_property_index_.Keys()) {
|
|
||||||
info.emplace_back(fmt::format(":{}({})", LabelName(key.label_),
|
|
||||||
PropertyName(key.property_)));
|
|
||||||
}
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, std::vector<std::pair<std::string, std::string>>>
|
|
||||||
GraphDbAccessor::StorageInfo() const {
|
|
||||||
return db_->storage_info()->GetStorageInfo();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace database
|
|
@ -1,691 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <optional>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
#include <cppitertools/filter.hpp>
|
|
||||||
#include <cppitertools/imap.hpp>
|
|
||||||
|
|
||||||
#include "database/single_node_ha/graph_db.hpp"
|
|
||||||
#include "raft/raft_interface.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/single_node_ha/edge_accessor.hpp"
|
|
||||||
#include "storage/single_node_ha/state_delta_buffer.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex_accessor.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
#include "utils/bound.hpp"
|
|
||||||
#include "utils/exceptions.hpp"
|
|
||||||
|
|
||||||
namespace storage::constraints {
|
|
||||||
struct ConstraintEntry;
|
|
||||||
} // namespace storage::constraints
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
/** Thrown when inserting in an index with constraint. */
|
|
||||||
class ConstraintViolationException : public utils::BasicException {
|
|
||||||
using utils::BasicException::BasicException;
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Thrown when creating an index which already exists. */
|
|
||||||
class IndexExistsException : public utils::BasicException {
|
|
||||||
using utils::BasicException::BasicException;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Thrown on concurrent index creation when the transaction engine fails to
|
|
||||||
/// start a new transaction.
|
|
||||||
class TransactionException : public utils::BasicException {
|
|
||||||
using utils::BasicException::BasicException;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Base accessor for the database object: exposes functions for operating on the
|
|
||||||
* database. All the functions in this class should be self-sufficient: for
|
|
||||||
* example the function for creating a new Vertex should take care of all the
|
|
||||||
* book-keeping around the creation.
|
|
||||||
*/
|
|
||||||
class GraphDbAccessor {
|
|
||||||
// We need to make friends with this guys since they need to access private
|
|
||||||
// methods for updating indices.
|
|
||||||
// TODO: Rethink this, we have too much long-distance friendship complicating
|
|
||||||
// the code.
|
|
||||||
friend class ::RecordAccessor<Vertex>;
|
|
||||||
friend class ::VertexAccessor;
|
|
||||||
friend class GraphDb;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
// Construction should only be done through GraphDb::Access function and
|
|
||||||
// concrete GraphDbAccessor type.
|
|
||||||
|
|
||||||
/// Creates a new accessor by starting a new transaction.
|
|
||||||
explicit GraphDbAccessor(GraphDb *db);
|
|
||||||
/// Creates an accessor for a running transaction.
|
|
||||||
GraphDbAccessor(GraphDb *db, tx::TransactionId tx_id);
|
|
||||||
|
|
||||||
GraphDbAccessor(GraphDb *db, std::optional<tx::TransactionId> parent_tx);
|
|
||||||
|
|
||||||
public:
|
|
||||||
~GraphDbAccessor();
|
|
||||||
|
|
||||||
GraphDbAccessor(const GraphDbAccessor &other) = delete;
|
|
||||||
GraphDbAccessor &operator=(const GraphDbAccessor &other) = delete;
|
|
||||||
|
|
||||||
GraphDbAccessor(GraphDbAccessor &&other);
|
|
||||||
GraphDbAccessor &operator=(GraphDbAccessor &&other);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new Vertex and returns an accessor to it. If the ID is
|
|
||||||
* provided, the created Vertex will have that local ID, and the ID counter
|
|
||||||
* will be increased to it so collisions are avoided. This should only be used
|
|
||||||
* by durability recovery, normal vertex creation should not provide the ID.
|
|
||||||
*
|
|
||||||
* You should NOT make interleaved recovery and normal DB op calls to this
|
|
||||||
* function. Doing so will likely mess up the ID generation and crash MG.
|
|
||||||
* Always perform recovery only once, immediately when the database is
|
|
||||||
* created, before any transactional ops start.
|
|
||||||
*
|
|
||||||
* @param requested_gid The requested GID. Should only be provided when
|
|
||||||
* recovering from durability.
|
|
||||||
*
|
|
||||||
* @return See above.
|
|
||||||
*/
|
|
||||||
VertexAccessor InsertVertex(
|
|
||||||
std::optional<storage::Gid> requested_gid = std::nullopt);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes the vertex of the given accessor. If the vertex has any outgoing or
|
|
||||||
* incoming edges, it is not deleted. See `DetachRemoveVertex` if you want to
|
|
||||||
* remove a vertex regardless of connectivity.
|
|
||||||
*
|
|
||||||
* If the vertex has already been deleted by the current transaction+command,
|
|
||||||
* this function will not do anything and will return true.
|
|
||||||
*
|
|
||||||
* @param vertex_accessor Accessor to vertex.
|
|
||||||
* @param check_empty If the vertex should be checked for existing edges
|
|
||||||
* before deletion.
|
|
||||||
* @return If or not the vertex was deleted.
|
|
||||||
*/
|
|
||||||
bool RemoveVertex(VertexAccessor &vertex_accessor, bool check_empty = true);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes the vertex of the given accessor along with all it's outgoing
|
|
||||||
* and incoming connections.
|
|
||||||
*
|
|
||||||
* @param vertex_accessor Accessor to a vertex.
|
|
||||||
*/
|
|
||||||
void DetachRemoveVertex(VertexAccessor &vertex_accessor);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the vertex for the given ID. If there is no vertex for the given
|
|
||||||
* ID, or it's not visible to this accessor's transaction, nullopt is
|
|
||||||
* returned.
|
|
||||||
*
|
|
||||||
* @param gid - The GID of the sought vertex.
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
*/
|
|
||||||
std::optional<VertexAccessor> FindVertexOptional(storage::Gid gid,
|
|
||||||
bool current_state);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the vertex for the given ID. If there is no vertex for the given
|
|
||||||
* ID, or it's not visible to this accessor's transaction, MG is crashed
|
|
||||||
* using a CHECK.
|
|
||||||
*
|
|
||||||
* @param gid - The GID of the sought vertex.
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
*/
|
|
||||||
VertexAccessor FindVertex(storage::Gid gid, bool current_state);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns iterable over accessors to all the vertices in the graph
|
|
||||||
* visible to the current transaction.
|
|
||||||
*
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
*/
|
|
||||||
auto Vertices(bool current_state) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
// wrap version lists into accessors, which will look for visible versions
|
|
||||||
auto accessors = iter::imap(
|
|
||||||
[this](auto id_vlist) {
|
|
||||||
return VertexAccessor(id_vlist.second, *this);
|
|
||||||
},
|
|
||||||
db_->storage().vertices_.access());
|
|
||||||
|
|
||||||
// filter out the accessors not visible to the current transaction
|
|
||||||
return iter::filter(
|
|
||||||
[this, current_state](const VertexAccessor &accessor) {
|
|
||||||
return accessor.Visible(transaction(), current_state);
|
|
||||||
},
|
|
||||||
std::move(accessors));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return VertexAccessors which contain the current label for the current
|
|
||||||
* transaction visibilty.
|
|
||||||
* @param label - label for which to return VertexAccessors
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @return iterable collection
|
|
||||||
*/
|
|
||||||
auto Vertices(storage::Label label, bool current_state) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return iter::imap(
|
|
||||||
[this](auto vlist) { return VertexAccessor(vlist, *this); },
|
|
||||||
db_->storage().labels_index_.GetVlists(label, *transaction_,
|
|
||||||
current_state));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return VertexAccessors which contain the current label and property for the
|
|
||||||
* given transaction visibility.
|
|
||||||
*
|
|
||||||
* @param label - label for which to return VertexAccessors
|
|
||||||
* @param property - property for which to return VertexAccessors
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @return iterable collection
|
|
||||||
*/
|
|
||||||
auto Vertices(storage::Label label, storage::Property property,
|
|
||||||
bool current_state) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
DCHECK(db_->storage().label_property_index_.IndexExists(
|
|
||||||
LabelPropertyIndex::Key(label, property)))
|
|
||||||
<< "Label+property index doesn't exist.";
|
|
||||||
return iter::imap(
|
|
||||||
[this](auto vlist) { return VertexAccessor(vlist, *this); },
|
|
||||||
db_->storage().label_property_index_.GetVlists(
|
|
||||||
LabelPropertyIndex::Key(label, property), *transaction_,
|
|
||||||
current_state));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return VertexAccessors which contain the current label + property, and
|
|
||||||
* those properties are equal to this 'value' for the given transaction
|
|
||||||
* visibility.
|
|
||||||
* @param label - label for which to return VertexAccessors
|
|
||||||
* @param property - property for which to return VertexAccessors
|
|
||||||
* @param value - property value for which to return VertexAccessors
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @return iterable collection
|
|
||||||
*/
|
|
||||||
auto Vertices(storage::Label label, storage::Property property,
|
|
||||||
const PropertyValue &value, bool current_state) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
DCHECK(db_->storage().label_property_index_.IndexExists(
|
|
||||||
LabelPropertyIndex::Key(label, property)))
|
|
||||||
<< "Label+property index doesn't exist.";
|
|
||||||
CHECK(value.type() != PropertyValue::Type::Null)
|
|
||||||
<< "Can't query index for propery value type null.";
|
|
||||||
return iter::imap(
|
|
||||||
[this](auto vlist) { return VertexAccessor(vlist, *this); },
|
|
||||||
db_->storage().label_property_index_.GetVlists(
|
|
||||||
LabelPropertyIndex::Key(label, property), value, *transaction_,
|
|
||||||
current_state));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return an iterable over VertexAccessors which contain the
|
|
||||||
* given label and whose property value (for the given property)
|
|
||||||
* falls within the given (lower, upper) @c Bound.
|
|
||||||
*
|
|
||||||
* The returned iterator will only contain
|
|
||||||
* vertices/edges whose property value is comparable with the
|
|
||||||
* given bounds (w.r.t. type). This has implications on Cypher
|
|
||||||
* query execuction semantics which have not been resovled yet.
|
|
||||||
*
|
|
||||||
* At least one of the bounds must be specified. Bonds can't be
|
|
||||||
* @c PropertyValue::Null. If both bounds are
|
|
||||||
* specified, their PropertyValue elments must be of comparable
|
|
||||||
* types.
|
|
||||||
*
|
|
||||||
* @param label - label for which to return VertexAccessors
|
|
||||||
* @param property - property for which to return VertexAccessors
|
|
||||||
* @param lower - Lower bound of the interval.
|
|
||||||
* @param upper - Upper bound of the interval.
|
|
||||||
* @param value - property value for which to return VertexAccessors
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @return iterable collection of record accessors
|
|
||||||
* satisfy the bounds and are visible to the current transaction.
|
|
||||||
*/
|
|
||||||
auto Vertices(storage::Label label, storage::Property property,
|
|
||||||
const std::optional<utils::Bound<PropertyValue>> lower,
|
|
||||||
const std::optional<utils::Bound<PropertyValue>> upper,
|
|
||||||
bool current_state) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
DCHECK(db_->storage().label_property_index_.IndexExists(
|
|
||||||
LabelPropertyIndex::Key(label, property)))
|
|
||||||
<< "Label+property index doesn't exist.";
|
|
||||||
return iter::imap(
|
|
||||||
[this](auto vlist) { return VertexAccessor(vlist, *this); },
|
|
||||||
db_->storage().label_property_index_.GetVlists(
|
|
||||||
LabelPropertyIndex::Key(label, property), lower, upper,
|
|
||||||
*transaction_, current_state));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new Edge and returns an accessor to it. If the ID is
|
|
||||||
* provided, the created Edge will have that ID, and the ID counter will be
|
|
||||||
* increased to it so collisions are avoided. This should only be used by
|
|
||||||
* durability recovery, normal edge creation should not provide the ID.
|
|
||||||
*
|
|
||||||
* You should NOT make interleaved recovery and normal DB op calls to this
|
|
||||||
* function. Doing so will likely mess up the ID generation and crash MG.
|
|
||||||
* Always perform recovery only once, immediately when the database is
|
|
||||||
* created, before any transactional ops start.
|
|
||||||
*
|
|
||||||
* @param from The 'from' vertex.
|
|
||||||
* @param to The 'to' vertex'
|
|
||||||
* @param type Edge type.
|
|
||||||
* @param requested_gid The requested GID. Should only be provided when
|
|
||||||
* recovering from durability.
|
|
||||||
*
|
|
||||||
* @return An accessor to the edge.
|
|
||||||
*/
|
|
||||||
EdgeAccessor InsertEdge(
|
|
||||||
VertexAccessor & from, VertexAccessor & to, storage::EdgeType type,
|
|
||||||
std::optional<storage::Gid> requested_gid = std::nullopt);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes an edge from the graph. Parameters can indicate if the edge should
|
|
||||||
* be removed from data structures in vertices it connects. When removing an
|
|
||||||
* edge both arguments should be `true`. `false` is only used when
|
|
||||||
* detach-deleting a vertex.
|
|
||||||
*
|
|
||||||
* @param edge The accessor to an edge.
|
|
||||||
* @param remove_out_edge If the edge should be removed from the its origin
|
|
||||||
* side.
|
|
||||||
* @param remove_in_edge If the edge should be removed from the its
|
|
||||||
* destination side.
|
|
||||||
*/
|
|
||||||
void RemoveEdge(EdgeAccessor &edge, bool remove_out_edge = true,
|
|
||||||
bool remove_in_edge = true);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the edge for the given ID. If there is no edge for the given
|
|
||||||
* ID, or it's not visible to this accessor's transaction, nullopt is
|
|
||||||
* returned.
|
|
||||||
*
|
|
||||||
* @param gid - The GID of the sought edge.
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
*/
|
|
||||||
std::optional<EdgeAccessor> FindEdgeOptional(storage::Gid gid,
|
|
||||||
bool current_state);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the edge for the given ID. If there is no edge for the given
|
|
||||||
* ID, or it's not visible to this accessor's transaction, MG is crashed
|
|
||||||
* using a CHECK.
|
|
||||||
*
|
|
||||||
* @param gid - The GID of the sought edge.
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
*/
|
|
||||||
EdgeAccessor FindEdge(storage::Gid gid, bool current_state);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns iterable over accessors to all the edges in the graph
|
|
||||||
* visible to the current transaction.
|
|
||||||
*
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
*/
|
|
||||||
auto Edges(bool current_state) {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
|
|
||||||
// wrap version lists into accessors, which will look for visible versions
|
|
||||||
auto accessors = iter::imap(
|
|
||||||
[this](auto id_vlist) { return EdgeAccessor(id_vlist.second, *this); },
|
|
||||||
db_->storage().edges_.access());
|
|
||||||
|
|
||||||
// filter out the accessors not visible to the current transaction
|
|
||||||
return iter::filter(
|
|
||||||
[this, current_state](const EdgeAccessor &accessor) {
|
|
||||||
return accessor.Visible(transaction(), current_state);
|
|
||||||
},
|
|
||||||
std::move(accessors));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates and returns a new accessor that represents the same graph element
|
|
||||||
* (node / version) as the given `accessor`, but in this `GraphDbAccessor`.
|
|
||||||
*
|
|
||||||
* It is possible that the given `accessor` graph element is not visible in
|
|
||||||
* this `GraphDbAccessor`'s transaction. If that is the case, a `nullopt` is
|
|
||||||
* returned.
|
|
||||||
*
|
|
||||||
* The returned accessor does NOT have the same `current_` set as the given
|
|
||||||
* `accessor`. It has default post-construction `current_` set (`old` if
|
|
||||||
* available, otherwise `new`).
|
|
||||||
*
|
|
||||||
* @param accessor The [Vertex/Edge]Accessor whose underlying graph element we
|
|
||||||
* want in this GraphDbAccessor.
|
|
||||||
* @return See above.
|
|
||||||
* @tparam TAccessor Either VertexAccessor or EdgeAccessor
|
|
||||||
*/
|
|
||||||
template <typename TAccessor>
|
|
||||||
std::optional<TAccessor> Transfer(const TAccessor &accessor) {
|
|
||||||
if (accessor.db_accessor_ == this) return std::make_optional(accessor);
|
|
||||||
|
|
||||||
TAccessor accessor_in_this(accessor.address(), *this);
|
|
||||||
if (accessor_in_this.current_)
|
|
||||||
return std::make_optional(std::move(accessor_in_this));
|
|
||||||
else
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds an index for the given (label, property) and populates it with
|
|
||||||
* existing vertices that belong to it.
|
|
||||||
*
|
|
||||||
* You should never call BuildIndex on a GraphDbAccessor (transaction) on
|
|
||||||
* which new vertices have been inserted or existing ones updated. Do it
|
|
||||||
* in a new accessor instead.
|
|
||||||
*
|
|
||||||
* Build index throws if an index for the given (label, property) already
|
|
||||||
* exists (even if it's being built by a concurrent transaction and is not yet
|
|
||||||
* ready for use).
|
|
||||||
*
|
|
||||||
* It also throws if there is another index being built concurrently on the
|
|
||||||
* same database this accessor is for.
|
|
||||||
*
|
|
||||||
* @param label - label to build for
|
|
||||||
* @param property - property to build for
|
|
||||||
*/
|
|
||||||
void BuildIndex(storage::Label label, storage::Property property);
|
|
||||||
|
|
||||||
/// Deletes the index responisble for (label, property).
|
|
||||||
///
|
|
||||||
/// @throws IndexTransactionException if it can't obtain a blocking
|
|
||||||
/// transaction.
|
|
||||||
void DeleteIndex(storage::Label label, storage::Property property);
|
|
||||||
|
|
||||||
/// Populates index with vertices containing the key
|
|
||||||
void PopulateIndex(const LabelPropertyIndex::Key &key);
|
|
||||||
|
|
||||||
/// Writes Index (key) creation to Raft, marks it as ready for usage
|
|
||||||
void EnableIndex(const LabelPropertyIndex::Key &key);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates new unique constraint that consists of a label and multiple
|
|
||||||
* properties.
|
|
||||||
* If the constraint already exists, this method does nothing.
|
|
||||||
*
|
|
||||||
* @throws ConstraintViolationException if constraint couldn't be build
|
|
||||||
* due to existing constraint violation.
|
|
||||||
* @throws TransactionEngineError if the engine doesn't accept transactions.
|
|
||||||
* @throws mvcc::SerializationError on serialization errors.
|
|
||||||
*/
|
|
||||||
void BuildUniqueConstraint(storage::Label label,
|
|
||||||
const std::vector<storage::Property> &properties);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Deletes existing unique constraint.
|
|
||||||
* If the constraint doesn't exist, this method does nothing.
|
|
||||||
*/
|
|
||||||
void DeleteUniqueConstraint(storage::Label label,
|
|
||||||
const std::vector<storage::Property> &properties);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a list of currently active unique constraints.
|
|
||||||
*/
|
|
||||||
std::vector<storage::constraints::ConstraintEntry> ListUniqueConstraints()
|
|
||||||
const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Returns true if the given label+property index already exists and
|
|
||||||
* is ready for use.
|
|
||||||
*/
|
|
||||||
bool LabelPropertyIndexExists(storage::Label label,
|
|
||||||
storage::Property property) const {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->storage().label_property_index_.IndexExists(
|
|
||||||
LabelPropertyIndex::Key(label, property));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Returns vector of keys of label-property indices.
|
|
||||||
*/
|
|
||||||
std::vector<LabelPropertyIndex::Key> GetIndicesKeys() {
|
|
||||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
|
||||||
return db_->storage().label_property_index_.Keys();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return approximate number of all vertices in the database.
|
|
||||||
* Note that this is always an over-estimate and never an under-estimate.
|
|
||||||
*/
|
|
||||||
int64_t VerticesCount() const;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Return approximate number of all edges in the database.
|
|
||||||
* Note that this is always an over-estimate and never an under-estimate.
|
|
||||||
*/
|
|
||||||
int64_t EdgesCount() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return approximate number of vertices under indexes with the given label.
|
|
||||||
* Note that this is always an over-estimate and never an under-estimate.
|
|
||||||
*
|
|
||||||
* @param label - label to check for
|
|
||||||
* @return number of vertices with the given label
|
|
||||||
*/
|
|
||||||
int64_t VerticesCount(storage::Label label) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return approximate number of vertices under indexes with the given label
|
|
||||||
* and property. Note that this is always an over-estimate and never an
|
|
||||||
* under-estimate.
|
|
||||||
*
|
|
||||||
* @param label - label to check for
|
|
||||||
* @param property - property to check for
|
|
||||||
* @return number of vertices with the given label, fails if no such
|
|
||||||
* label+property index exists.
|
|
||||||
*/
|
|
||||||
int64_t VerticesCount(storage::Label label, storage::Property property) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns approximate number of vertices that have the given label
|
|
||||||
* and the given value for the given property.
|
|
||||||
*
|
|
||||||
* Assumes that an index for that (label, property) exists.
|
|
||||||
*/
|
|
||||||
int64_t VerticesCount(storage::Label label, storage::Property property,
|
|
||||||
const PropertyValue &value) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns approximate number of vertices that have the given label
|
|
||||||
* and whose vaue is in the range defined by upper and lower @c Bound.
|
|
||||||
*
|
|
||||||
* At least one bound must be specified. Neither can be
|
|
||||||
* PropertyValue::Null.
|
|
||||||
*
|
|
||||||
* Assumes that an index for that (label, property) exists.
|
|
||||||
*/
|
|
||||||
int64_t VerticesCount(
|
|
||||||
storage::Label label, storage::Property property,
|
|
||||||
const std::optional<utils::Bound<PropertyValue>> lower,
|
|
||||||
const std::optional<utils::Bound<PropertyValue>> upper) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the Label for the label's name.
|
|
||||||
* @return See above.
|
|
||||||
*/
|
|
||||||
storage::Label Label(const std::string &label_name);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the label name (a string) for the given label.
|
|
||||||
*
|
|
||||||
* @param label a Label.
|
|
||||||
* @return See above.
|
|
||||||
*/
|
|
||||||
const std::string &LabelName(storage::Label label) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the EdgeType for it's name.
|
|
||||||
* @return See above.
|
|
||||||
*/
|
|
||||||
storage::EdgeType EdgeType(const std::string &edge_type_name);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the edge type name (a string) for the given edge type.
|
|
||||||
*
|
|
||||||
* @param edge_type an EdgeType.
|
|
||||||
* @return See above.
|
|
||||||
*/
|
|
||||||
const std::string &EdgeTypeName(storage::EdgeType edge_type) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the Property for it's name.
|
|
||||||
* @return See above.
|
|
||||||
*/
|
|
||||||
storage::Property Property(const std::string &property_name);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Obtains the property name (a string) for the given property.
|
|
||||||
*
|
|
||||||
* @param property a Property.
|
|
||||||
* @return See above.
|
|
||||||
*/
|
|
||||||
const std::string &PropertyName(storage::Property property) const;
|
|
||||||
|
|
||||||
/** Returns the id of this accessor's transaction */
|
|
||||||
tx::TransactionId transaction_id() const;
|
|
||||||
|
|
||||||
/** Advances transaction's command id by 1. */
|
|
||||||
void AdvanceCommand();
|
|
||||||
|
|
||||||
/** Commit transaction. */
|
|
||||||
void Commit();
|
|
||||||
|
|
||||||
/** Abort transaction. */
|
|
||||||
void Abort();
|
|
||||||
|
|
||||||
/** Return true if transaction is hinted to abort. */
|
|
||||||
bool should_abort() const;
|
|
||||||
|
|
||||||
const tx::Transaction &transaction() const { return *transaction_; }
|
|
||||||
raft::RaftInterface *raft();
|
|
||||||
storage::StateDeltaBuffer *sd_buffer();
|
|
||||||
auto &db() { return db_; }
|
|
||||||
const auto &db() const { return db_; }
|
|
||||||
|
|
||||||
/* Returns a list of index names present in the database. */
|
|
||||||
std::vector<std::string> IndexInfo() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a map containing storage information for each Raft cluster member.
|
|
||||||
*
|
|
||||||
* Inside the vector, the following storage stats will exist:
|
|
||||||
* - vertex_count
|
|
||||||
* - edge_count
|
|
||||||
* - average_degree
|
|
||||||
* - memory_usage
|
|
||||||
* - disk_usage
|
|
||||||
**/
|
|
||||||
std::map<std::string, std::vector<std::pair<std::string, std::string>>>
|
|
||||||
StorageInfo() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Insert this vertex into corresponding label and label+property (if it
|
|
||||||
* exists) index.
|
|
||||||
*
|
|
||||||
* @param label - label with which to insert vertex label record
|
|
||||||
* @param vertex_accessor - vertex_accessor to insert
|
|
||||||
* @param vertex - vertex record to insert
|
|
||||||
*/
|
|
||||||
void UpdateLabelIndices(storage::Label label,
|
|
||||||
const VertexAccessor &vertex_accessor,
|
|
||||||
const Vertex *const vertex);
|
|
||||||
|
|
||||||
private:
|
|
||||||
GraphDb *db_;
|
|
||||||
tx::Transaction *transaction_;
|
|
||||||
// Indicates if this db-accessor started the transaction and should Abort it
|
|
||||||
// upon destruction.
|
|
||||||
bool transaction_starter_;
|
|
||||||
|
|
||||||
bool commited_{false};
|
|
||||||
bool aborted_{false};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Notifies storage about label addition.
|
|
||||||
*
|
|
||||||
* @param label - label that was added
|
|
||||||
* @param vertex_accessor - vertex_accessor that was updated
|
|
||||||
* @param vertex - vertex that was updated
|
|
||||||
*/
|
|
||||||
void UpdateOnAddLabel(storage::Label label,
|
|
||||||
const VertexAccessor &vertex_accessor,
|
|
||||||
const Vertex *vertex);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Notifies storage about label removal.
|
|
||||||
*
|
|
||||||
* @param label - label that was removed
|
|
||||||
* @param vertex_accessor - vertex_accessor that was updated
|
|
||||||
*/
|
|
||||||
void UpdateOnRemoveLabel(storage::Label label,
|
|
||||||
const RecordAccessor<Vertex> &accessor);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Notifies storage about a property removal.
|
|
||||||
*
|
|
||||||
* @param property - property that was removed
|
|
||||||
* @param previous_value - previous value of the property
|
|
||||||
* @param vertex_accessor - vertex_accessor that was updated
|
|
||||||
* @param vertex - vertex that was updated
|
|
||||||
*/
|
|
||||||
void UpdateOnRemoveProperty(storage::Property property,
|
|
||||||
const PropertyValue &previous_value,
|
|
||||||
const RecordAccessor<Vertex> &accessor,
|
|
||||||
const Vertex *vertex);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Notifies storage about a property addition.
|
|
||||||
*
|
|
||||||
* @param property - property that was added
|
|
||||||
* @param previous_value - previous value of the property
|
|
||||||
* @param new_value - new value of the property
|
|
||||||
* @param vertex_accessor - vertex accessor that was updated
|
|
||||||
* @param vertex - vertex that was updated
|
|
||||||
*/
|
|
||||||
void UpdateOnAddProperty(storage::Property property,
|
|
||||||
const PropertyValue &previous_value,
|
|
||||||
const PropertyValue &new_value,
|
|
||||||
const RecordAccessor<Vertex> &vertex_accessor,
|
|
||||||
const Vertex *vertex);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace database
|
|
@ -1,9 +0,0 @@
|
|||||||
#>cpp
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
#include "storage/common/types/slk.hpp"
|
|
||||||
cpp<#
|
|
||||||
|
|
||||||
;; Generate serialization of state-delta
|
|
||||||
(load "durability/single_node_ha/state_delta.lcp")
|
|
@ -1,74 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
#include "hasher.hpp"
|
|
||||||
#include "utils/endian.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Buffer reads data from file and calculates hash of read data. Implements
|
|
||||||
* template param Buffer interface from BaseDecoder class.
|
|
||||||
*/
|
|
||||||
class HashedFileReader {
|
|
||||||
public:
|
|
||||||
/** Opens the file for reading. Returns true if successful. */
|
|
||||||
bool Open(const std::string &file) {
|
|
||||||
input_stream_.open(file, std::ios::in | std::ios::binary);
|
|
||||||
hasher_ = Hasher();
|
|
||||||
return !input_stream_.fail();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Closes ifstream. Returns false if closing fails. */
|
|
||||||
bool Close() {
|
|
||||||
input_stream_.close();
|
|
||||||
return !input_stream_.fail();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads raw data from stream.
|
|
||||||
*
|
|
||||||
* @param data - pointer to where data should be stored.
|
|
||||||
* @param n - data length.
|
|
||||||
* @param hash - If the read should be included in the hash calculation.
|
|
||||||
*/
|
|
||||||
bool Read(uint8_t *data, size_t n, bool hash = true) {
|
|
||||||
input_stream_.read(reinterpret_cast<char *>(data), n);
|
|
||||||
if (input_stream_.fail()) return false;
|
|
||||||
if (hash) hasher_.Update(data, n);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a TValue value from the stream.
|
|
||||||
*
|
|
||||||
* @param val - The value to read into.
|
|
||||||
* @param hash - If the read should be included in the hash calculation.
|
|
||||||
* @tparam TValue - Type of value being read.
|
|
||||||
* @return - If the read was successful.
|
|
||||||
*/
|
|
||||||
template <typename TValue>
|
|
||||||
bool ReadType(TValue &val, bool hash = true) {
|
|
||||||
if (!Read(reinterpret_cast<uint8_t *>(&val), sizeof(TValue), hash))
|
|
||||||
return false;
|
|
||||||
val = utils::BigEndianToHost(val);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Seek(std::streamoff offset, std::ios_base::seekdir way) {
|
|
||||||
input_stream_.seekg(offset, way);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Seek(std::streampos pos) { input_stream_.seekg(pos); }
|
|
||||||
|
|
||||||
auto Tellg() { return input_stream_.tellg(); }
|
|
||||||
|
|
||||||
/** Returns the hash of the data read so far from the stream. */
|
|
||||||
uint64_t hash() const { return hasher_.hash(); }
|
|
||||||
|
|
||||||
/** Checks whether the end of file is reached. */
|
|
||||||
bool EndOfFile() const { return input_stream_.eof(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
Hasher hasher_;
|
|
||||||
std::ifstream input_stream_;
|
|
||||||
};
|
|
@ -1,74 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
#include "hasher.hpp"
|
|
||||||
#include "utils/endian.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Buffer that writes data to file and calculates hash of written data.
|
|
||||||
* Implements template param Buffer interface from BaseEncoder class.
|
|
||||||
*
|
|
||||||
* All of the methods on a HashedFileWriter can throw an exception.
|
|
||||||
*/
|
|
||||||
class HashedFileWriter {
|
|
||||||
public:
|
|
||||||
/** Constructor, initialize ofstream to throw exception on fail. */
|
|
||||||
HashedFileWriter() {
|
|
||||||
output_stream_.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Constructor which also takes a file path and opens it immediately. */
|
|
||||||
explicit HashedFileWriter(const std::string &path) : HashedFileWriter() {
|
|
||||||
output_stream_.open(path, std::ios::out | std::ios::binary);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Opens the writer */
|
|
||||||
void Open(const std::string &path) {
|
|
||||||
output_stream_.open(path, std::ios::out | std::ios::binary);
|
|
||||||
hasher_ = Hasher();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Closes the writer. */
|
|
||||||
void Close() { output_stream_.close(); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes data to stream.
|
|
||||||
*
|
|
||||||
* @param data - Pointer to data to write.
|
|
||||||
* @param n - Data length.
|
|
||||||
* @param hash - If writing should update the hash.
|
|
||||||
* @return - True if succesful.
|
|
||||||
*/
|
|
||||||
void Write(const uint8_t *data, size_t n, bool hash = true) {
|
|
||||||
output_stream_.write(reinterpret_cast<const char *>(data), n);
|
|
||||||
if (hash) hasher_.Update(data, n);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes a TValue to the stream.
|
|
||||||
*
|
|
||||||
* @param val - The value to write.
|
|
||||||
* @param hash - If writing should update the hash.
|
|
||||||
* @return - True if succesful.
|
|
||||||
*/
|
|
||||||
template <typename TValue>
|
|
||||||
void WriteValue(const TValue &val, bool hash = true) {
|
|
||||||
TValue val_big = utils::HostToBigEndian(val);
|
|
||||||
Write(reinterpret_cast<const uint8_t *>(&val_big), sizeof(TValue), hash);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO try to remove before diff
|
|
||||||
/** Does nothing. Just for API compatibility with the bolt buffer. */
|
|
||||||
void Chunk() {}
|
|
||||||
|
|
||||||
/** Flushes data to stream. */
|
|
||||||
void Flush() { output_stream_.flush(); }
|
|
||||||
|
|
||||||
/** Returns the hash of the data written so far to the stream. */
|
|
||||||
uint64_t hash() const { return hasher_.hash(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::ofstream output_stream_;
|
|
||||||
Hasher hasher_;
|
|
||||||
};
|
|
@ -1,31 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdlib>
|
|
||||||
|
|
||||||
// TODO: implement better hash function
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class calculates hash of the data dynamically.
|
|
||||||
*/
|
|
||||||
class Hasher {
|
|
||||||
/** Prime number used in calculating hash. */
|
|
||||||
static constexpr uint64_t kPrime = 3137;
|
|
||||||
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Updates hash from given data.
|
|
||||||
*
|
|
||||||
* @param data - Data from which hash will be updated.
|
|
||||||
* @param n - Length of the data.
|
|
||||||
*/
|
|
||||||
void Update(const uint8_t *data, size_t n) {
|
|
||||||
for (size_t i = 0; i < n; ++i) hash_ = hash_ * kPrime + data[i] + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns current hash value. */
|
|
||||||
uint64_t hash() const { return hash_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint64_t hash_ = 0;
|
|
||||||
};
|
|
@ -1,26 +0,0 @@
|
|||||||
#include "durability/single_node_ha/paths.hpp"
|
|
||||||
|
|
||||||
#include "utils/string.hpp"
|
|
||||||
#include "utils/timestamp.hpp"
|
|
||||||
|
|
||||||
namespace durability {
|
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
|
||||||
|
|
||||||
// This is the prefix used for WAL and Snapshot filenames. It is a timestamp
|
|
||||||
// format that equals to: YYYYmmddHHMMSSffffff
|
|
||||||
const std::string kTimestampFormat =
|
|
||||||
"{:04d}{:02d}{:02d}{:02d}{:02d}{:02d}{:06d}";
|
|
||||||
|
|
||||||
std::string GetSnapshotFilename(uint64_t last_included_term,
|
|
||||||
uint64_t last_included_index) {
|
|
||||||
std::string date_str = utils::Timestamp::Now().ToString(kTimestampFormat);
|
|
||||||
return date_str + "_term_" + std::to_string(last_included_term) + "_index_" +
|
|
||||||
std::to_string(last_included_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
fs::path MakeSnapshotPath(const fs::path &durability_dir,
|
|
||||||
const std::string &snapshot_filename) {
|
|
||||||
return durability_dir / kSnapshotDir / snapshot_filename;
|
|
||||||
}
|
|
||||||
} // namespace durability
|
|
@ -1,20 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <filesystem>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
namespace durability {
|
|
||||||
const std::string kSnapshotDir = "snapshots";
|
|
||||||
const std::string kBackupDir = ".backup";
|
|
||||||
|
|
||||||
/// Generates a filename for a DB snapshot in the given folder in a well-defined
|
|
||||||
/// sortable format with last included term and last included index from which
|
|
||||||
/// the snapshot is created appended to the file name.
|
|
||||||
std::string GetSnapshotFilename(uint64_t last_included_term,
|
|
||||||
uint64_t last_included_index);
|
|
||||||
|
|
||||||
/// Generates a full path for a DB snapshot.
|
|
||||||
std::filesystem::path MakeSnapshotPath(
|
|
||||||
const std::filesystem::path &durability_dir,
|
|
||||||
const std::string &snapshot_filename);
|
|
||||||
} // namespace durability
|
|
@ -1,172 +0,0 @@
|
|||||||
#include "durability/single_node_ha/recovery.hpp"
|
|
||||||
|
|
||||||
#include <filesystem>
|
|
||||||
#include <limits>
|
|
||||||
#include <optional>
|
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
#include "communication/bolt/v1/decoder/decoder.hpp"
|
|
||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
#include "durability/hashed_file_reader.hpp"
|
|
||||||
#include "durability/single_node_ha/paths.hpp"
|
|
||||||
#include "durability/single_node_ha/version.hpp"
|
|
||||||
#include "glue/communication.hpp"
|
|
||||||
#include "storage/single_node_ha/indexes/label_property_index.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
#include "utils/algorithm.hpp"
|
|
||||||
#include "utils/file.hpp"
|
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
|
||||||
|
|
||||||
namespace durability {
|
|
||||||
|
|
||||||
using communication::bolt::Value;
|
|
||||||
bool ReadSnapshotSummary(HashedFileReader &buffer, int64_t &vertex_count,
|
|
||||||
int64_t &edge_count, uint64_t &hash) {
|
|
||||||
auto pos = buffer.Tellg();
|
|
||||||
auto offset = sizeof(vertex_count) + sizeof(edge_count) + sizeof(hash);
|
|
||||||
buffer.Seek(-offset, std::ios_base::end);
|
|
||||||
bool r_val = buffer.ReadType(vertex_count, false) &&
|
|
||||||
buffer.ReadType(edge_count, false) &&
|
|
||||||
buffer.ReadType(hash, false);
|
|
||||||
buffer.Seek(pos);
|
|
||||||
return r_val;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
using communication::bolt::Value;
|
|
||||||
|
|
||||||
#define RETURN_IF_NOT(condition) \
|
|
||||||
if (!(condition)) { \
|
|
||||||
reader.Close(); \
|
|
||||||
return false; \
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RecoverSnapshot(const fs::path &snapshot_file, database::GraphDb *db,
|
|
||||||
RecoveryData *recovery_data) {
|
|
||||||
HashedFileReader reader;
|
|
||||||
communication::bolt::Decoder<HashedFileReader> decoder(reader);
|
|
||||||
|
|
||||||
RETURN_IF_NOT(reader.Open(snapshot_file));
|
|
||||||
|
|
||||||
auto magic_number = durability::kSnapshotMagic;
|
|
||||||
reader.Read(magic_number.data(), magic_number.size());
|
|
||||||
RETURN_IF_NOT(magic_number == durability::kSnapshotMagic);
|
|
||||||
|
|
||||||
// Read the vertex and edge count, and the hash, from the end of the snapshot.
|
|
||||||
int64_t vertex_count;
|
|
||||||
int64_t edge_count;
|
|
||||||
uint64_t hash;
|
|
||||||
RETURN_IF_NOT(
|
|
||||||
durability::ReadSnapshotSummary(reader, vertex_count, edge_count, hash));
|
|
||||||
|
|
||||||
Value dv;
|
|
||||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::Int) &&
|
|
||||||
dv.ValueInt() == durability::kVersion);
|
|
||||||
|
|
||||||
// A list of label+property indexes.
|
|
||||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::List));
|
|
||||||
auto index_value = dv.ValueList();
|
|
||||||
for (auto it = index_value.begin(); it != index_value.end();) {
|
|
||||||
auto label = *it++;
|
|
||||||
RETURN_IF_NOT(it != index_value.end());
|
|
||||||
auto property = *it++;
|
|
||||||
RETURN_IF_NOT(label.IsString() && property.IsString());
|
|
||||||
recovery_data->indexes.emplace_back(
|
|
||||||
IndexRecoveryData{label.ValueString(), property.ValueString(),
|
|
||||||
/*create = */ true});
|
|
||||||
}
|
|
||||||
|
|
||||||
auto dba = db->Access();
|
|
||||||
std::unordered_map<uint64_t, VertexAccessor> vertices;
|
|
||||||
for (int64_t i = 0; i < vertex_count; ++i) {
|
|
||||||
Value vertex_dv;
|
|
||||||
RETURN_IF_NOT(decoder.ReadValue(&vertex_dv, Value::Type::Vertex));
|
|
||||||
auto &vertex = vertex_dv.ValueVertex();
|
|
||||||
auto vertex_accessor =
|
|
||||||
dba.InsertVertex(storage::Gid::FromUint(vertex.id.AsUint()));
|
|
||||||
|
|
||||||
for (const auto &label : vertex.labels) {
|
|
||||||
vertex_accessor.add_label(dba.Label(label));
|
|
||||||
}
|
|
||||||
for (const auto &property_pair : vertex.properties) {
|
|
||||||
vertex_accessor.PropsSet(dba.Property(property_pair.first),
|
|
||||||
glue::ToPropertyValue(property_pair.second));
|
|
||||||
}
|
|
||||||
vertices.insert({vertex.id.AsUint(), vertex_accessor});
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int64_t i = 0; i < edge_count; ++i) {
|
|
||||||
Value edge_dv;
|
|
||||||
RETURN_IF_NOT(decoder.ReadValue(&edge_dv, Value::Type::Edge));
|
|
||||||
auto &edge = edge_dv.ValueEdge();
|
|
||||||
auto it_from = vertices.find(edge.from.AsUint());
|
|
||||||
auto it_to = vertices.find(edge.to.AsUint());
|
|
||||||
RETURN_IF_NOT(it_from != vertices.end() && it_to != vertices.end());
|
|
||||||
auto edge_accessor =
|
|
||||||
dba.InsertEdge(it_from->second, it_to->second, dba.EdgeType(edge.type),
|
|
||||||
storage::Gid::FromUint(edge.id.AsUint()));
|
|
||||||
|
|
||||||
for (const auto &property_pair : edge.properties)
|
|
||||||
edge_accessor.PropsSet(dba.Property(property_pair.first),
|
|
||||||
glue::ToPropertyValue(property_pair.second));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Vertex and edge counts are included in the hash. Re-read them to update the
|
|
||||||
// hash.
|
|
||||||
reader.ReadType(vertex_count);
|
|
||||||
reader.ReadType(edge_count);
|
|
||||||
if (!reader.Close() || reader.hash() != hash) {
|
|
||||||
dba.Abort();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
dba.Commit();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef RETURN_IF_NOT
|
|
||||||
|
|
||||||
} // anonymous namespace
|
|
||||||
|
|
||||||
bool RecoverSnapshot(database::GraphDb *db, RecoveryData *recovery_data,
|
|
||||||
const fs::path &durability_dir,
|
|
||||||
const std::string &snapshot_filename) {
|
|
||||||
const auto snapshot_dir = durability_dir / kSnapshotDir;
|
|
||||||
if (!fs::exists(snapshot_dir) || !fs::is_directory(snapshot_dir)) {
|
|
||||||
LOG(WARNING) << "Missing snapshot directory!";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto snapshot = snapshot_dir / snapshot_filename;
|
|
||||||
if (!fs::exists(snapshot)) {
|
|
||||||
LOG(WARNING) << "Missing snapshot file!";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG(INFO) << "Starting snapshot recovery from: " << snapshot;
|
|
||||||
if (!RecoverSnapshot(snapshot, db, recovery_data)) {
|
|
||||||
LOG(WARNING) << "Snapshot recovery failed.";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG(INFO) << "Snapshot recovery successful.";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RecoverIndexes(database::GraphDb *db,
|
|
||||||
const std::vector<IndexRecoveryData> &indexes) {
|
|
||||||
auto dba = db->Access();
|
|
||||||
for (const auto &index : indexes) {
|
|
||||||
auto label = dba.Label(index.label);
|
|
||||||
auto property = dba.Property(index.property);
|
|
||||||
if (index.create) {
|
|
||||||
dba.BuildIndex(label, property);
|
|
||||||
} else {
|
|
||||||
dba.DeleteIndex(label, property);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dba.Commit();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace durability
|
|
@ -1,57 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <filesystem>
|
|
||||||
#include <optional>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "durability/hashed_file_reader.hpp"
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
class GraphDb;
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace durability {
|
|
||||||
|
|
||||||
struct IndexRecoveryData {
|
|
||||||
std::string label;
|
|
||||||
std::string property;
|
|
||||||
bool create; // distinguish between creating and dropping index
|
|
||||||
bool unique; // used only when creating an index
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Data structure for exchanging info between main recovery function and
|
|
||||||
/// snapshot recovery functions.
|
|
||||||
struct RecoveryData {
|
|
||||||
// A collection into which the indexes should be added so they
|
|
||||||
// can be rebuilt at the end of the recovery transaction.
|
|
||||||
std::vector<IndexRecoveryData> indexes;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Reads snapshot metadata from the end of the file without messing up the
|
|
||||||
/// hash.
|
|
||||||
bool ReadSnapshotSummary(HashedFileReader &buffer, int64_t &vertex_count,
|
|
||||||
int64_t &edge_count, uint64_t &hash);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recovers database from the given snapshot. If recovering fails, false is
|
|
||||||
* returned and db_accessor aborts transaction, else true is returned and
|
|
||||||
* transaction is commited.
|
|
||||||
*
|
|
||||||
* @param db - The database to recover into.
|
|
||||||
* @param recovery_data - Struct that will contain additional recovery data.
|
|
||||||
* @param durability_dir - Path to durability directory.
|
|
||||||
* @param snapshot_filename - Snapshot filename.
|
|
||||||
* @return - recovery info
|
|
||||||
*/
|
|
||||||
bool RecoverSnapshot(database::GraphDb *db,
|
|
||||||
durability::RecoveryData *recovery_data,
|
|
||||||
const std::filesystem::path &durability_dir,
|
|
||||||
const std::string &snapshot_filename);
|
|
||||||
|
|
||||||
void RecoverIndexes(database::GraphDb *db,
|
|
||||||
const std::vector<IndexRecoveryData> &indexes);
|
|
||||||
|
|
||||||
} // namespace durability
|
|
@ -1,108 +0,0 @@
|
|||||||
#include "durability/single_node_ha/snapshooter.hpp"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "communication/bolt/v1/encoder/base_encoder.hpp"
|
|
||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
#include "durability/hashed_file_writer.hpp"
|
|
||||||
#include "durability/single_node_ha/paths.hpp"
|
|
||||||
#include "durability/single_node_ha/version.hpp"
|
|
||||||
#include "glue/communication.hpp"
|
|
||||||
#include "storage/v2/view.hpp"
|
|
||||||
#include "utils/file.hpp"
|
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
|
||||||
|
|
||||||
namespace durability {
|
|
||||||
|
|
||||||
// Snapshot layout is described in durability/version.hpp
|
|
||||||
static_assert(durability::kVersion == 9,
|
|
||||||
"Wrong snapshot version, please update!");
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
bool Encode(const fs::path &snapshot_file, database::GraphDb &db,
|
|
||||||
database::GraphDbAccessor &dba) {
|
|
||||||
try {
|
|
||||||
HashedFileWriter buffer(snapshot_file);
|
|
||||||
communication::bolt::BaseEncoder<HashedFileWriter> encoder(buffer);
|
|
||||||
int64_t vertex_num = 0, edge_num = 0;
|
|
||||||
|
|
||||||
encoder.WriteRAW(durability::kSnapshotMagic.data(),
|
|
||||||
durability::kSnapshotMagic.size());
|
|
||||||
encoder.WriteInt(durability::kVersion);
|
|
||||||
|
|
||||||
// Write label+property indexes as list ["label", "property", ...]
|
|
||||||
{
|
|
||||||
std::vector<communication::bolt::Value> index_vec;
|
|
||||||
for (const auto &key : dba.GetIndicesKeys()) {
|
|
||||||
index_vec.emplace_back(dba.LabelName(key.label_));
|
|
||||||
index_vec.emplace_back(dba.PropertyName(key.property_));
|
|
||||||
}
|
|
||||||
encoder.WriteList(index_vec);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto &vertex : dba.Vertices(false)) {
|
|
||||||
encoder.WriteVertex(glue::ToBoltVertex(vertex, storage::View::OLD));
|
|
||||||
vertex_num++;
|
|
||||||
}
|
|
||||||
for (const auto &edge : dba.Edges(false)) {
|
|
||||||
encoder.WriteEdge(glue::ToBoltEdge(edge, storage::View::OLD));
|
|
||||||
edge_num++;
|
|
||||||
}
|
|
||||||
buffer.WriteValue(vertex_num);
|
|
||||||
buffer.WriteValue(edge_num);
|
|
||||||
buffer.WriteValue(buffer.hash());
|
|
||||||
buffer.Close();
|
|
||||||
} catch (const std::ifstream::failure &) {
|
|
||||||
if (fs::exists(snapshot_file) && !fs::remove(snapshot_file)) {
|
|
||||||
LOG(ERROR) << "Error while removing corrupted snapshot file: "
|
|
||||||
<< snapshot_file;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Remove old snapshots but leave at most `keep` number of latest ones.
|
|
||||||
void RemoveOldSnapshots(const fs::path &snapshot_dir, uint16_t keep) {
|
|
||||||
std::vector<fs::path> files;
|
|
||||||
for (auto &file : fs::directory_iterator(snapshot_dir))
|
|
||||||
files.push_back(file.path());
|
|
||||||
if (static_cast<uint16_t>(files.size()) <= keep) return;
|
|
||||||
sort(files.begin(), files.end());
|
|
||||||
for (int i = 0; i < static_cast<uint16_t>(files.size()) - keep; ++i) {
|
|
||||||
if (!fs::remove(files[i])) {
|
|
||||||
LOG(ERROR) << "Error while removing file: " << files[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
bool MakeSnapshot(database::GraphDb &db, database::GraphDbAccessor &dba,
|
|
||||||
const fs::path &durability_dir,
|
|
||||||
const std::string &snapshot_filename) {
|
|
||||||
if (!utils::EnsureDir(durability_dir / kSnapshotDir)) return false;
|
|
||||||
const auto snapshot_file =
|
|
||||||
MakeSnapshotPath(durability_dir, snapshot_filename);
|
|
||||||
if (fs::exists(snapshot_file)) return false;
|
|
||||||
if (Encode(snapshot_file, db, dba)) {
|
|
||||||
// Only keep the latest snapshot.
|
|
||||||
RemoveOldSnapshots(durability_dir / kSnapshotDir, 1);
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
std::error_code error_code; // Just for exception suppression.
|
|
||||||
fs::remove(snapshot_file, error_code);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RemoveAllSnapshots(const fs::path &durability_dir) {
|
|
||||||
auto snapshot_dir = durability_dir / kSnapshotDir;
|
|
||||||
if (!utils::EnsureDir(snapshot_dir)) return;
|
|
||||||
RemoveOldSnapshots(snapshot_dir, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace durability
|
|
@ -1,22 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <filesystem>
|
|
||||||
|
|
||||||
#include "database/single_node_ha/graph_db.hpp"
|
|
||||||
|
|
||||||
namespace durability {
|
|
||||||
|
|
||||||
/// Make snapshot and save it in snapshots folder. Returns true if successful.
|
|
||||||
/// @param db - database for which we are creating a snapshot
|
|
||||||
/// @param dba - db accessor with which we are creating a snapshot (reading
|
|
||||||
/// data)
|
|
||||||
/// @param durability_dir - directory where durability data is stored.
|
|
||||||
/// @param snapshot_filename - filename for the snapshot.
|
|
||||||
bool MakeSnapshot(database::GraphDb &db, database::GraphDbAccessor &dba,
|
|
||||||
const std::filesystem::path &durability_dir,
|
|
||||||
const std::string &snapshot_filename);
|
|
||||||
|
|
||||||
/// Remove all snapshots inside the snapshot durability directory.
|
|
||||||
void RemoveAllSnapshots(const std::filesystem::path &durability_dir);
|
|
||||||
|
|
||||||
} // namespace durability
|
|
@ -1,453 +0,0 @@
|
|||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "communication/bolt/v1/value.hpp"
|
|
||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
#include "glue/communication.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
StateDelta StateDelta::TxBegin(tx::TransactionId tx_id) {
|
|
||||||
return {StateDelta::Type::TRANSACTION_BEGIN, tx_id};
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::TxCommit(tx::TransactionId tx_id) {
|
|
||||||
return {StateDelta::Type::TRANSACTION_COMMIT, tx_id};
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::TxAbort(tx::TransactionId tx_id) {
|
|
||||||
return {StateDelta::Type::TRANSACTION_ABORT, tx_id};
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::CreateVertex(tx::TransactionId tx_id,
|
|
||||||
storage::Gid vertex_id) {
|
|
||||||
StateDelta op(StateDelta::Type::CREATE_VERTEX, tx_id);
|
|
||||||
op.vertex_id = vertex_id;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::CreateEdge(tx::TransactionId tx_id, storage::Gid edge_id,
|
|
||||||
storage::Gid vertex_from_id,
|
|
||||||
storage::Gid vertex_to_id,
|
|
||||||
storage::EdgeType edge_type,
|
|
||||||
const std::string &edge_type_name) {
|
|
||||||
StateDelta op(StateDelta::Type::CREATE_EDGE, tx_id);
|
|
||||||
op.edge_id = edge_id;
|
|
||||||
op.vertex_from_id = vertex_from_id;
|
|
||||||
op.vertex_to_id = vertex_to_id;
|
|
||||||
op.edge_type = edge_type;
|
|
||||||
op.edge_type_name = edge_type_name;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::PropsSetVertex(tx::TransactionId tx_id,
|
|
||||||
storage::Gid vertex_id,
|
|
||||||
storage::Property property,
|
|
||||||
const std::string &property_name,
|
|
||||||
const PropertyValue &value) {
|
|
||||||
StateDelta op(StateDelta::Type::SET_PROPERTY_VERTEX, tx_id);
|
|
||||||
op.vertex_id = vertex_id;
|
|
||||||
op.property = property;
|
|
||||||
op.property_name = property_name;
|
|
||||||
op.value = value;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::PropsSetEdge(tx::TransactionId tx_id,
|
|
||||||
storage::Gid edge_id,
|
|
||||||
storage::Property property,
|
|
||||||
const std::string &property_name,
|
|
||||||
const PropertyValue &value) {
|
|
||||||
StateDelta op(StateDelta::Type::SET_PROPERTY_EDGE, tx_id);
|
|
||||||
op.edge_id = edge_id;
|
|
||||||
op.property = property;
|
|
||||||
op.property_name = property_name;
|
|
||||||
op.value = value;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::AddLabel(tx::TransactionId tx_id, storage::Gid vertex_id,
|
|
||||||
storage::Label label,
|
|
||||||
const std::string &label_name) {
|
|
||||||
StateDelta op(StateDelta::Type::ADD_LABEL, tx_id);
|
|
||||||
op.vertex_id = vertex_id;
|
|
||||||
op.label = label;
|
|
||||||
op.label_name = label_name;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::RemoveLabel(tx::TransactionId tx_id,
|
|
||||||
storage::Gid vertex_id, storage::Label label,
|
|
||||||
const std::string &label_name) {
|
|
||||||
StateDelta op(StateDelta::Type::REMOVE_LABEL, tx_id);
|
|
||||||
op.vertex_id = vertex_id;
|
|
||||||
op.label = label;
|
|
||||||
op.label_name = label_name;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::RemoveVertex(tx::TransactionId tx_id,
|
|
||||||
storage::Gid vertex_id, bool check_empty) {
|
|
||||||
StateDelta op(StateDelta::Type::REMOVE_VERTEX, tx_id);
|
|
||||||
op.vertex_id = vertex_id;
|
|
||||||
op.check_empty = check_empty;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::RemoveEdge(tx::TransactionId tx_id,
|
|
||||||
storage::Gid edge_id) {
|
|
||||||
StateDelta op(StateDelta::Type::REMOVE_EDGE, tx_id);
|
|
||||||
op.edge_id = edge_id;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::BuildIndex(tx::TransactionId tx_id, storage::Label label,
|
|
||||||
const std::string &label_name,
|
|
||||||
storage::Property property,
|
|
||||||
const std::string &property_name) {
|
|
||||||
StateDelta op(StateDelta::Type::BUILD_INDEX, tx_id);
|
|
||||||
op.label = label;
|
|
||||||
op.label_name = label_name;
|
|
||||||
op.property = property;
|
|
||||||
op.property_name = property_name;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::DropIndex(tx::TransactionId tx_id, storage::Label label,
|
|
||||||
const std::string &label_name,
|
|
||||||
storage::Property property,
|
|
||||||
const std::string &property_name) {
|
|
||||||
StateDelta op(StateDelta::Type::DROP_INDEX, tx_id);
|
|
||||||
op.label = label;
|
|
||||||
op.label_name = label_name;
|
|
||||||
op.property = property;
|
|
||||||
op.property_name = property_name;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::NoOp(tx::TransactionId tx_id) {
|
|
||||||
StateDelta op(StateDelta::Type::NO_OP, tx_id);
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::BuildUniqueConstraint(
|
|
||||||
tx::TransactionId tx_id, storage::Label label,
|
|
||||||
const std::string &label_name,
|
|
||||||
const std::vector<storage::Property> &properties,
|
|
||||||
const std::vector<std::string> &property_names) {
|
|
||||||
StateDelta op(StateDelta::Type::BUILD_UNIQUE_CONSTRAINT, tx_id);
|
|
||||||
op.label = label;
|
|
||||||
op.label_name = label_name;
|
|
||||||
op.properties = properties;
|
|
||||||
op.property_names = property_names;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
StateDelta StateDelta::DropUniqueConstraint(
|
|
||||||
tx::TransactionId tx_id, storage::Label label,
|
|
||||||
const std::string &label_name,
|
|
||||||
const std::vector<storage::Property> &properties,
|
|
||||||
const std::vector<std::string> &property_names) {
|
|
||||||
StateDelta op(StateDelta::Type::DROP_UNIQUE_CONSTRAINT, tx_id);
|
|
||||||
op.label = label;
|
|
||||||
op.label_name = label_name;
|
|
||||||
op.properties = properties;
|
|
||||||
op.property_names = property_names;
|
|
||||||
return op;
|
|
||||||
}
|
|
||||||
|
|
||||||
void StateDelta::Encode(
|
|
||||||
HashedFileWriter &writer,
|
|
||||||
communication::bolt::BaseEncoder<HashedFileWriter> &encoder) const {
|
|
||||||
encoder.WriteInt(static_cast<int64_t>(type));
|
|
||||||
encoder.WriteInt(static_cast<int64_t>(transaction_id));
|
|
||||||
|
|
||||||
switch (type) {
|
|
||||||
case Type::TRANSACTION_BEGIN:
|
|
||||||
case Type::TRANSACTION_COMMIT:
|
|
||||||
case Type::TRANSACTION_ABORT:
|
|
||||||
case Type::NO_OP:
|
|
||||||
break;
|
|
||||||
case Type::CREATE_VERTEX:
|
|
||||||
encoder.WriteInt(vertex_id.AsInt());
|
|
||||||
break;
|
|
||||||
case Type::CREATE_EDGE:
|
|
||||||
encoder.WriteInt(edge_id.AsInt());
|
|
||||||
encoder.WriteInt(vertex_from_id.AsInt());
|
|
||||||
encoder.WriteInt(vertex_to_id.AsInt());
|
|
||||||
encoder.WriteInt(edge_type.Id());
|
|
||||||
encoder.WriteString(edge_type_name);
|
|
||||||
break;
|
|
||||||
case Type::SET_PROPERTY_VERTEX:
|
|
||||||
encoder.WriteInt(vertex_id.AsInt());
|
|
||||||
encoder.WriteInt(property.Id());
|
|
||||||
encoder.WriteString(property_name);
|
|
||||||
encoder.WriteValue(glue::ToBoltValue(value));
|
|
||||||
break;
|
|
||||||
case Type::SET_PROPERTY_EDGE:
|
|
||||||
encoder.WriteInt(edge_id.AsInt());
|
|
||||||
encoder.WriteInt(property.Id());
|
|
||||||
encoder.WriteString(property_name);
|
|
||||||
encoder.WriteValue(glue::ToBoltValue(value));
|
|
||||||
break;
|
|
||||||
case Type::ADD_LABEL:
|
|
||||||
case Type::REMOVE_LABEL:
|
|
||||||
encoder.WriteInt(vertex_id.AsInt());
|
|
||||||
encoder.WriteInt(label.Id());
|
|
||||||
encoder.WriteString(label_name);
|
|
||||||
break;
|
|
||||||
case Type::REMOVE_VERTEX:
|
|
||||||
encoder.WriteInt(vertex_id.AsInt());
|
|
||||||
break;
|
|
||||||
case Type::REMOVE_EDGE:
|
|
||||||
encoder.WriteInt(edge_id.AsInt());
|
|
||||||
break;
|
|
||||||
case Type::BUILD_INDEX:
|
|
||||||
encoder.WriteInt(label.Id());
|
|
||||||
encoder.WriteString(label_name);
|
|
||||||
encoder.WriteInt(property.Id());
|
|
||||||
encoder.WriteString(property_name);
|
|
||||||
break;
|
|
||||||
case Type::DROP_INDEX:
|
|
||||||
encoder.WriteInt(label.Id());
|
|
||||||
encoder.WriteString(label_name);
|
|
||||||
encoder.WriteInt(property.Id());
|
|
||||||
encoder.WriteString(property_name);
|
|
||||||
break;
|
|
||||||
case Type::BUILD_UNIQUE_CONSTRAINT:
|
|
||||||
encoder.WriteInt(label.Id());
|
|
||||||
encoder.WriteString(label_name);
|
|
||||||
encoder.WriteInt(properties.size());
|
|
||||||
for (auto prop : properties) {
|
|
||||||
encoder.WriteInt(prop.Id());
|
|
||||||
}
|
|
||||||
for (auto &name : property_names) {
|
|
||||||
encoder.WriteString(name);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Type::DROP_UNIQUE_CONSTRAINT:
|
|
||||||
encoder.WriteInt(label.Id());
|
|
||||||
encoder.WriteString(label_name);
|
|
||||||
encoder.WriteInt(properties.size());
|
|
||||||
for (auto prop : properties) {
|
|
||||||
encoder.WriteInt(prop.Id());
|
|
||||||
}
|
|
||||||
for (auto &name : property_names) {
|
|
||||||
encoder.WriteString(name);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
writer.WriteValue(writer.hash());
|
|
||||||
}
|
|
||||||
|
|
||||||
#define DECODE_MEMBER(member, value_f) \
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt; \
|
|
||||||
r_val.member = dv.value_f();
|
|
||||||
|
|
||||||
#define DECODE_GID_MEMBER(member) \
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt; \
|
|
||||||
r_val.member = storage::Gid::FromInt(dv.ValueInt());
|
|
||||||
|
|
||||||
#define DECODE_MEMBER_CAST(member, value_f, type) \
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt; \
|
|
||||||
r_val.member = static_cast<type>(dv.value_f());
|
|
||||||
|
|
||||||
std::optional<StateDelta> StateDelta::Decode(
|
|
||||||
HashedFileReader &reader,
|
|
||||||
communication::bolt::Decoder<HashedFileReader> &decoder) {
|
|
||||||
using std::nullopt;
|
|
||||||
|
|
||||||
StateDelta r_val;
|
|
||||||
// The decoded value used as a temporary while decoding.
|
|
||||||
communication::bolt::Value dv;
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
r_val.type = static_cast<enum StateDelta::Type>(dv.ValueInt());
|
|
||||||
DECODE_MEMBER(transaction_id, ValueInt)
|
|
||||||
|
|
||||||
switch (r_val.type) {
|
|
||||||
case Type::TRANSACTION_BEGIN:
|
|
||||||
case Type::TRANSACTION_COMMIT:
|
|
||||||
case Type::TRANSACTION_ABORT:
|
|
||||||
case Type::NO_OP:
|
|
||||||
break;
|
|
||||||
case Type::CREATE_VERTEX:
|
|
||||||
DECODE_GID_MEMBER(vertex_id)
|
|
||||||
break;
|
|
||||||
case Type::CREATE_EDGE:
|
|
||||||
DECODE_GID_MEMBER(edge_id)
|
|
||||||
DECODE_GID_MEMBER(vertex_from_id)
|
|
||||||
DECODE_GID_MEMBER(vertex_to_id)
|
|
||||||
DECODE_MEMBER_CAST(edge_type, ValueInt, storage::EdgeType)
|
|
||||||
DECODE_MEMBER(edge_type_name, ValueString)
|
|
||||||
break;
|
|
||||||
case Type::SET_PROPERTY_VERTEX:
|
|
||||||
DECODE_GID_MEMBER(vertex_id)
|
|
||||||
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
|
|
||||||
DECODE_MEMBER(property_name, ValueString)
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
r_val.value = glue::ToPropertyValue(dv);
|
|
||||||
break;
|
|
||||||
case Type::SET_PROPERTY_EDGE:
|
|
||||||
DECODE_GID_MEMBER(edge_id)
|
|
||||||
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
|
|
||||||
DECODE_MEMBER(property_name, ValueString)
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
r_val.value = glue::ToPropertyValue(dv);
|
|
||||||
break;
|
|
||||||
case Type::ADD_LABEL:
|
|
||||||
case Type::REMOVE_LABEL:
|
|
||||||
DECODE_GID_MEMBER(vertex_id)
|
|
||||||
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
|
|
||||||
DECODE_MEMBER(label_name, ValueString)
|
|
||||||
break;
|
|
||||||
case Type::REMOVE_VERTEX:
|
|
||||||
DECODE_GID_MEMBER(vertex_id)
|
|
||||||
break;
|
|
||||||
case Type::REMOVE_EDGE:
|
|
||||||
DECODE_GID_MEMBER(edge_id)
|
|
||||||
break;
|
|
||||||
case Type::BUILD_INDEX:
|
|
||||||
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
|
|
||||||
DECODE_MEMBER(label_name, ValueString)
|
|
||||||
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
|
|
||||||
DECODE_MEMBER(property_name, ValueString)
|
|
||||||
break;
|
|
||||||
case Type::DROP_INDEX:
|
|
||||||
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
|
|
||||||
DECODE_MEMBER(label_name, ValueString)
|
|
||||||
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
|
|
||||||
DECODE_MEMBER(property_name, ValueString)
|
|
||||||
break;
|
|
||||||
case Type::BUILD_UNIQUE_CONSTRAINT: {
|
|
||||||
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
|
|
||||||
DECODE_MEMBER(label_name, ValueString)
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
int size = dv.ValueInt();
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
r_val.properties.push_back(
|
|
||||||
static_cast<storage::Property>(dv.ValueInt()));
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
r_val.property_names.push_back(dv.ValueString());
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::DROP_UNIQUE_CONSTRAINT: {
|
|
||||||
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
|
|
||||||
DECODE_MEMBER(label_name, ValueString)
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
int size = dv.ValueInt();
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
r_val.properties.push_back(
|
|
||||||
static_cast<storage::Property>(dv.ValueInt()));
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
|
||||||
r_val.property_names.push_back(dv.ValueString());
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto decoder_hash = reader.hash();
|
|
||||||
uint64_t encoded_hash;
|
|
||||||
if (!reader.ReadType(encoded_hash, true)) return nullopt;
|
|
||||||
if (decoder_hash != encoded_hash) return nullopt;
|
|
||||||
|
|
||||||
return r_val;
|
|
||||||
} catch (communication::bolt::ValueException &) {
|
|
||||||
return nullopt;
|
|
||||||
} catch (std::ifstream::failure &) {
|
|
||||||
return nullopt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef DECODE_MEMBER
|
|
||||||
|
|
||||||
void StateDelta::Apply(GraphDbAccessor &dba) const {
|
|
||||||
switch (type) {
|
|
||||||
// Transactional state is not recovered.
|
|
||||||
case Type::TRANSACTION_BEGIN:
|
|
||||||
case Type::TRANSACTION_COMMIT:
|
|
||||||
case Type::TRANSACTION_ABORT:
|
|
||||||
LOG(FATAL) << "Transaction handling not handled in Apply";
|
|
||||||
break;
|
|
||||||
case Type::CREATE_VERTEX:
|
|
||||||
dba.InsertVertex(vertex_id);
|
|
||||||
break;
|
|
||||||
case Type::CREATE_EDGE: {
|
|
||||||
auto from = dba.FindVertex(vertex_from_id, true);
|
|
||||||
auto to = dba.FindVertex(vertex_to_id, true);
|
|
||||||
dba.InsertEdge(from, to, dba.EdgeType(edge_type_name), edge_id);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::SET_PROPERTY_VERTEX: {
|
|
||||||
auto vertex = dba.FindVertex(vertex_id, true);
|
|
||||||
vertex.PropsSet(dba.Property(property_name), value);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::SET_PROPERTY_EDGE: {
|
|
||||||
auto edge = dba.FindEdge(edge_id, true);
|
|
||||||
edge.PropsSet(dba.Property(property_name), value);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::ADD_LABEL: {
|
|
||||||
auto vertex = dba.FindVertex(vertex_id, true);
|
|
||||||
vertex.add_label(dba.Label(label_name));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::REMOVE_LABEL: {
|
|
||||||
auto vertex = dba.FindVertex(vertex_id, true);
|
|
||||||
vertex.remove_label(dba.Label(label_name));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::REMOVE_VERTEX: {
|
|
||||||
auto vertex = dba.FindVertex(vertex_id, true);
|
|
||||||
dba.DetachRemoveVertex(vertex);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::REMOVE_EDGE: {
|
|
||||||
auto edge = dba.FindEdge(edge_id, true);
|
|
||||||
dba.RemoveEdge(edge);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::BUILD_INDEX: {
|
|
||||||
dba.BuildIndex(dba.Label(label_name), dba.Property(property_name));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::DROP_INDEX: {
|
|
||||||
dba.DeleteIndex(dba.Label(label_name), dba.Property(property_name));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Type::NO_OP:
|
|
||||||
break;
|
|
||||||
case Type::BUILD_UNIQUE_CONSTRAINT: {
|
|
||||||
std::vector<storage::Property> properties;
|
|
||||||
properties.reserve(property_names.size());
|
|
||||||
for (auto &p : property_names) {
|
|
||||||
properties.push_back(dba.Property(p));
|
|
||||||
}
|
|
||||||
|
|
||||||
dba.BuildUniqueConstraint(dba.Label(label_name), properties);
|
|
||||||
} break;
|
|
||||||
case Type::DROP_UNIQUE_CONSTRAINT: {
|
|
||||||
std::vector<storage::Property> properties;
|
|
||||||
properties.reserve(property_names.size());
|
|
||||||
for (auto &p : property_names) {
|
|
||||||
properties.push_back(dba.Property(p));
|
|
||||||
}
|
|
||||||
|
|
||||||
dba.DeleteUniqueConstraint(dba.Label(label_name), properties);
|
|
||||||
} break;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}; // namespace database
|
|
@ -1,149 +0,0 @@
|
|||||||
#>cpp
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "communication/bolt/v1/decoder/decoder.hpp"
|
|
||||||
#include "communication/bolt/v1/encoder/base_encoder.hpp"
|
|
||||||
#include "durability/hashed_file_reader.hpp"
|
|
||||||
#include "durability/hashed_file_writer.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/common/types/property_value.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "utils/typeinfo.hpp"
|
|
||||||
|
|
||||||
class Vertex;
|
|
||||||
class Edge;
|
|
||||||
cpp<#
|
|
||||||
|
|
||||||
(lcp:namespace database)
|
|
||||||
|
|
||||||
#>cpp
|
|
||||||
class GraphDbAccessor;
|
|
||||||
cpp<#
|
|
||||||
|
|
||||||
(lcp:define-struct state-delta ()
|
|
||||||
(
|
|
||||||
;; Members valid for every delta.
|
|
||||||
(type "Type")
|
|
||||||
(transaction-id "::tx::TransactionId")
|
|
||||||
;; Members valid only for some deltas, see StateDelta::Type comments above.
|
|
||||||
(vertex-id "::storage::Gid")
|
|
||||||
(edge-id "::storage::Gid")
|
|
||||||
(vertex-from-id "::storage::Gid")
|
|
||||||
(vertex-to-id "::storage::Gid")
|
|
||||||
(edge-type "::storage::EdgeType")
|
|
||||||
(edge-type-name "std::string")
|
|
||||||
(property "::storage::Property")
|
|
||||||
(property-name "std::string")
|
|
||||||
(properties "std::vector<storage::Property>")
|
|
||||||
(property-names "std::vector<std::string>")
|
|
||||||
(value "PropertyValue")
|
|
||||||
(label "::storage::Label")
|
|
||||||
(label-name "std::string")
|
|
||||||
(check-empty :bool))
|
|
||||||
(:documentation
|
|
||||||
"Describes single change to the database state. Used for state communication
|
|
||||||
over network in HA.
|
|
||||||
|
|
||||||
Labels, Properties and EdgeTypes are stored both as values (integers) and
|
|
||||||
strings (their names). The values are used when applying deltas in a running
|
|
||||||
database. Names are used when recovering the database as it's not guaranteed
|
|
||||||
that after recovery the old name<->value mapping will be preserved.
|
|
||||||
|
|
||||||
TODO: ensure the mapping is preserved after recovery and don't save strings
|
|
||||||
in StateDeltas.")
|
|
||||||
(:public
|
|
||||||
(lcp:define-enum type
|
|
||||||
(transaction-begin
|
|
||||||
transaction-commit
|
|
||||||
transaction-abort
|
|
||||||
create-vertex ;; vertex_id
|
|
||||||
create-edge ;; edge_id, from_vertex_id, to_vertex_id, edge_type, edge_type_name
|
|
||||||
set-property-vertex ;; vertex_id, property, property_name, property_value
|
|
||||||
set-property-edge ;; edge_id, property, property_name, property_value
|
|
||||||
;; remove property is done by setting a PropertyValue to Null
|
|
||||||
add-label ;; vertex_id, label, label_name
|
|
||||||
remove-label ;; vertex_id, label, label_name
|
|
||||||
remove-vertex ;; vertex_id, check_empty
|
|
||||||
remove-edge ;; edge_id
|
|
||||||
build-index ;; label, label_name, property, property_name
|
|
||||||
drop-index ;; label, label_name, property, property_name
|
|
||||||
no-op ;; no-op state delta required by Raft protocol
|
|
||||||
build-unique_constraint ;; label, label_name, properties, property_names
|
|
||||||
drop-unique_constraint ;; label, label_name, properties, property_names
|
|
||||||
)
|
|
||||||
(:documentation
|
|
||||||
"Defines StateDelta type. For each type the comment indicates which values
|
|
||||||
need to be stored. All deltas have the transaction_id member, so that's
|
|
||||||
omitted in the comment.")
|
|
||||||
(:serialize))
|
|
||||||
#>cpp
|
|
||||||
StateDelta() = default;
|
|
||||||
StateDelta(const enum Type &type, tx::TransactionId tx_id)
|
|
||||||
: type(type), transaction_id(tx_id) {}
|
|
||||||
|
|
||||||
/** Attempts to decode a StateDelta from the given decoder. Returns the
|
|
||||||
* decoded value if successful, otherwise returns nullopt. */
|
|
||||||
static std::optional<StateDelta> Decode(
|
|
||||||
HashedFileReader &reader,
|
|
||||||
communication::bolt::Decoder<HashedFileReader> &decoder);
|
|
||||||
|
|
||||||
/** Encodes the delta using primitive encoder, and writes out the new hash
|
|
||||||
* with delta to the writer */
|
|
||||||
void Encode(
|
|
||||||
HashedFileWriter &writer,
|
|
||||||
communication::bolt::BaseEncoder<HashedFileWriter> &encoder) const;
|
|
||||||
|
|
||||||
static StateDelta TxBegin(tx::TransactionId tx_id);
|
|
||||||
static StateDelta TxCommit(tx::TransactionId tx_id);
|
|
||||||
static StateDelta TxAbort(tx::TransactionId tx_id);
|
|
||||||
static StateDelta CreateVertex(tx::TransactionId tx_id,
|
|
||||||
storage::Gid vertex_id);
|
|
||||||
static StateDelta CreateEdge(tx::TransactionId tx_id, storage::Gid edge_id,
|
|
||||||
storage::Gid vertex_from_id,
|
|
||||||
storage::Gid vertex_to_id,
|
|
||||||
storage::EdgeType edge_type,
|
|
||||||
const std::string &edge_type_name);
|
|
||||||
static StateDelta PropsSetVertex(tx::TransactionId tx_id,
|
|
||||||
storage::Gid vertex_id,
|
|
||||||
storage::Property property,
|
|
||||||
const std::string &property_name,
|
|
||||||
const PropertyValue &value);
|
|
||||||
static StateDelta PropsSetEdge(tx::TransactionId tx_id, storage::Gid edge_id,
|
|
||||||
storage::Property property,
|
|
||||||
const std::string &property_name,
|
|
||||||
const PropertyValue &value);
|
|
||||||
static StateDelta AddLabel(tx::TransactionId tx_id, storage::Gid vertex_id,
|
|
||||||
storage::Label label,
|
|
||||||
const std::string &label_name);
|
|
||||||
static StateDelta RemoveLabel(tx::TransactionId tx_id, storage::Gid vertex_id,
|
|
||||||
storage::Label label,
|
|
||||||
const std::string &label_name);
|
|
||||||
static StateDelta RemoveVertex(tx::TransactionId tx_id, storage::Gid vertex_id,
|
|
||||||
bool check_empty);
|
|
||||||
static StateDelta RemoveEdge(tx::TransactionId tx_id, storage::Gid edge_id);
|
|
||||||
static StateDelta BuildIndex(tx::TransactionId tx_id, storage::Label label,
|
|
||||||
const std::string &label_name,
|
|
||||||
storage::Property property,
|
|
||||||
const std::string &property_name);
|
|
||||||
static StateDelta DropIndex(tx::TransactionId tx_id, storage::Label label,
|
|
||||||
const std::string &label_name,
|
|
||||||
storage::Property property,
|
|
||||||
const std::string &property_name);
|
|
||||||
static StateDelta NoOp(tx::TransactionId tx_id);
|
|
||||||
static StateDelta BuildUniqueConstraint(
|
|
||||||
tx::TransactionId tx_id, storage::Label label,
|
|
||||||
const std::string &label_name,
|
|
||||||
const std::vector<storage::Property> &properties,
|
|
||||||
const std::vector<std::string> &property_names);
|
|
||||||
static StateDelta DropUniqueConstraint(
|
|
||||||
tx::TransactionId tx_id, storage::Label label,
|
|
||||||
const std::string &label_name,
|
|
||||||
const std::vector<storage::Property> &property,
|
|
||||||
const std::vector<std::string> &property_names);
|
|
||||||
|
|
||||||
/// Applies CRUD delta to database accessor. Fails on other types of deltas
|
|
||||||
void Apply(GraphDbAccessor &dba) const;
|
|
||||||
cpp<#)
|
|
||||||
(:serialize (:slk)))
|
|
||||||
|
|
||||||
(lcp:pop-namespace) ;; database
|
|
@ -1,34 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
///
|
|
||||||
///
|
|
||||||
/// IMPORTANT: Please update this file for every snapshot format change!!!
|
|
||||||
/// TODO (buda): This is not rock solid.
|
|
||||||
///
|
|
||||||
|
|
||||||
#include <array>
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
namespace durability {
|
|
||||||
|
|
||||||
constexpr std::array<uint8_t, 6> kSnapshotMagic{{'M', 'G', 'H', 'A', 's', 'n'}};
|
|
||||||
|
|
||||||
// The current default version of snapshot and WAL encoding / decoding.
|
|
||||||
constexpr int64_t kVersion{9};
|
|
||||||
|
|
||||||
// Snapshot format (version 9):
|
|
||||||
// 1) Magic number + snapshot version
|
|
||||||
//
|
|
||||||
// 2) A list of label+property indices.
|
|
||||||
//
|
|
||||||
// 3) Bolt encoded nodes. Each node is written in the following format:
|
|
||||||
// * gid, labels, properties
|
|
||||||
// 4) Bolt encoded edges. Each edge is written in the following format:
|
|
||||||
// * gid
|
|
||||||
// * from, to
|
|
||||||
// * edge_type
|
|
||||||
// * properties
|
|
||||||
//
|
|
||||||
// 5) Snapshot summary (number of nodes, number of edges, hash)
|
|
||||||
|
|
||||||
} // namespace durability
|
|
@ -1,79 +0,0 @@
|
|||||||
#include <algorithm>
|
|
||||||
#include <chrono>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <exception>
|
|
||||||
#include <functional>
|
|
||||||
#include <limits>
|
|
||||||
#include <thread>
|
|
||||||
|
|
||||||
#include <gflags/gflags.h>
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "communication/server.hpp"
|
|
||||||
#include "database/single_node_ha/graph_db.hpp"
|
|
||||||
#include "memgraph_init.hpp"
|
|
||||||
#include "query/exceptions.hpp"
|
|
||||||
#include "utils/flag_validation.hpp"
|
|
||||||
|
|
||||||
// General purpose flags.
|
|
||||||
DEFINE_string(bolt_address, "0.0.0.0",
|
|
||||||
"IP address on which the Bolt server should listen.");
|
|
||||||
DEFINE_VALIDATED_int32(bolt_port, 7687,
|
|
||||||
"Port on which the Bolt server should listen.",
|
|
||||||
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
|
|
||||||
DEFINE_VALIDATED_int32(
|
|
||||||
bolt_num_workers, std::max(std::thread::hardware_concurrency(), 1U),
|
|
||||||
"Number of workers used by the Bolt server. By default, this will be the "
|
|
||||||
"number of processing units available on the machine.",
|
|
||||||
FLAG_IN_RANGE(1, INT32_MAX));
|
|
||||||
DEFINE_VALIDATED_int32(
|
|
||||||
bolt_session_inactivity_timeout, 1800,
|
|
||||||
"Time in seconds after which inactive Bolt sessions will be "
|
|
||||||
"closed.",
|
|
||||||
FLAG_IN_RANGE(1, INT32_MAX));
|
|
||||||
DEFINE_string(bolt_cert_file, "",
|
|
||||||
"Certificate file which should be used for the Bolt server.");
|
|
||||||
DEFINE_string(bolt_key_file, "",
|
|
||||||
"Key file which should be used for the Bolt server.");
|
|
||||||
|
|
||||||
using ServerT = communication::Server<BoltSession, SessionData>;
|
|
||||||
using communication::ServerContext;
|
|
||||||
|
|
||||||
void SingleNodeHAMain() {
|
|
||||||
auto durability_directory = std::filesystem::path(FLAGS_durability_directory);
|
|
||||||
|
|
||||||
database::GraphDb db;
|
|
||||||
query::InterpreterContext interpreter_context{&db};
|
|
||||||
SessionData session_data{&db, &interpreter_context, nullptr, nullptr};
|
|
||||||
|
|
||||||
ServerContext context;
|
|
||||||
std::string service_name = "Bolt";
|
|
||||||
if (!FLAGS_bolt_key_file.empty() && !FLAGS_bolt_cert_file.empty()) {
|
|
||||||
context = ServerContext(FLAGS_bolt_key_file, FLAGS_bolt_cert_file);
|
|
||||||
service_name = "BoltS";
|
|
||||||
}
|
|
||||||
|
|
||||||
ServerT server({FLAGS_bolt_address, static_cast<uint16_t>(FLAGS_bolt_port)},
|
|
||||||
&session_data, &context, FLAGS_bolt_session_inactivity_timeout,
|
|
||||||
service_name, FLAGS_bolt_num_workers);
|
|
||||||
|
|
||||||
// Handler for regular termination signals
|
|
||||||
auto shutdown = [&db] { db.Shutdown(); };
|
|
||||||
|
|
||||||
InitSignalHandlers(shutdown);
|
|
||||||
|
|
||||||
// Start the database.
|
|
||||||
db.Start();
|
|
||||||
// Start the Bolt server.
|
|
||||||
CHECK(server.Start()) << "Couldn't start the Bolt server!";
|
|
||||||
|
|
||||||
db.AwaitShutdown([&server] {
|
|
||||||
server.Shutdown();
|
|
||||||
server.AwaitShutdown();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
google::SetUsageMessage("Memgraph high availability database server");
|
|
||||||
return WithInit(argc, argv, SingleNodeHAMain);
|
|
||||||
}
|
|
@ -2,8 +2,6 @@
|
|||||||
|
|
||||||
#include <gflags/gflags.h>
|
#include <gflags/gflags.h>
|
||||||
|
|
||||||
#include "database/graph_db.hpp"
|
|
||||||
#include "database/graph_db_accessor.hpp"
|
|
||||||
#include "query/context.hpp"
|
#include "query/context.hpp"
|
||||||
#include "query/db_accessor.hpp"
|
#include "query/db_accessor.hpp"
|
||||||
#include "query/frontend/ast/ast.hpp"
|
#include "query/frontend/ast/ast.hpp"
|
||||||
|
@ -1,60 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
#include <filesystem>
|
|
||||||
#include <ratio>
|
|
||||||
|
|
||||||
#include <json/json.hpp>
|
|
||||||
|
|
||||||
#include "raft/exceptions.hpp"
|
|
||||||
#include "utils/file.hpp"
|
|
||||||
#include "utils/string.hpp"
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
/// Configurable Raft parameters.
|
|
||||||
struct Config {
|
|
||||||
std::chrono::milliseconds election_timeout_min;
|
|
||||||
std::chrono::milliseconds election_timeout_max;
|
|
||||||
std::chrono::milliseconds heartbeat_interval;
|
|
||||||
std::chrono::milliseconds replication_timeout;
|
|
||||||
int64_t log_size_snapshot_threshold;
|
|
||||||
|
|
||||||
static Config LoadFromFile(const std::string &raft_config_file) {
|
|
||||||
if (!std::filesystem::exists(raft_config_file))
|
|
||||||
throw RaftConfigException(raft_config_file);
|
|
||||||
|
|
||||||
nlohmann::json data;
|
|
||||||
try {
|
|
||||||
data = nlohmann::json::parse(
|
|
||||||
utils::Join(utils::ReadLines(raft_config_file), ""));
|
|
||||||
} catch (const nlohmann::json::parse_error &e) {
|
|
||||||
throw RaftConfigException(raft_config_file);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!data.is_object()) throw RaftConfigException(raft_config_file);
|
|
||||||
if (!data["election_timeout_min"].is_number())
|
|
||||||
throw RaftConfigException(raft_config_file);
|
|
||||||
if (!data["election_timeout_max"].is_number())
|
|
||||||
throw RaftConfigException(raft_config_file);
|
|
||||||
if (!data["heartbeat_interval"].is_number())
|
|
||||||
throw RaftConfigException(raft_config_file);
|
|
||||||
if (!data["replication_timeout"].is_number())
|
|
||||||
throw RaftConfigException(raft_config_file);
|
|
||||||
if (!data["log_size_snapshot_threshold"].is_number())
|
|
||||||
throw RaftConfigException(raft_config_file);
|
|
||||||
|
|
||||||
return Config{
|
|
||||||
std::chrono::duration<int64_t, std::milli>(
|
|
||||||
data["election_timeout_min"]),
|
|
||||||
std::chrono::duration<int64_t, std::milli>(
|
|
||||||
data["election_timeout_max"]),
|
|
||||||
std::chrono::duration<int64_t, std::milli>(data["heartbeat_interval"]),
|
|
||||||
std::chrono::duration<int64_t, std::milli>(data["replication_timeout"]),
|
|
||||||
data["log_size_snapshot_threshold"]};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,137 +0,0 @@
|
|||||||
#include "raft/coordination.hpp"
|
|
||||||
|
|
||||||
#include <gflags/gflags.h>
|
|
||||||
#include <json/json.hpp>
|
|
||||||
|
|
||||||
#include "utils/file.hpp"
|
|
||||||
#include "utils/string.hpp"
|
|
||||||
|
|
||||||
DEFINE_string(rpc_cert_file, "", "Certificate file to use (RPC).");
|
|
||||||
DEFINE_string(rpc_key_file, "", "Key file to use (RPC).");
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
|
||||||
|
|
||||||
std::unordered_map<uint16_t, io::network::Endpoint> LoadNodesFromFile(
|
|
||||||
const std::string &coordination_config_file) {
|
|
||||||
if (!fs::exists(coordination_config_file))
|
|
||||||
throw RaftCoordinationConfigException("file (" + coordination_config_file +
|
|
||||||
") doesn't exist");
|
|
||||||
|
|
||||||
std::unordered_map<uint16_t, io::network::Endpoint> nodes;
|
|
||||||
nlohmann::json data;
|
|
||||||
try {
|
|
||||||
data = nlohmann::json::parse(
|
|
||||||
utils::Join(utils::ReadLines(coordination_config_file), ""));
|
|
||||||
} catch (const nlohmann::json::parse_error &e) {
|
|
||||||
throw RaftCoordinationConfigException("invalid json");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!data.is_array()) throw RaftCoordinationConfigException("not an array");
|
|
||||||
|
|
||||||
for (auto &it : data) {
|
|
||||||
if (!it.is_array())
|
|
||||||
throw RaftCoordinationConfigException("element not an array");
|
|
||||||
|
|
||||||
if (it.size() != 3)
|
|
||||||
throw RaftCoordinationConfigException("invalid number of subelements");
|
|
||||||
|
|
||||||
if (!it[0].is_number_unsigned() || !it[1].is_string() ||
|
|
||||||
!it[2].is_number_unsigned())
|
|
||||||
throw RaftCoordinationConfigException("subelement data is invalid");
|
|
||||||
|
|
||||||
nodes[it[0]] = io::network::Endpoint{it[1], it[2]};
|
|
||||||
}
|
|
||||||
|
|
||||||
return nodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
Coordination::Coordination(
|
|
||||||
uint16_t node_id,
|
|
||||||
std::unordered_map<uint16_t, io::network::Endpoint> all_nodes)
|
|
||||||
: node_id_(node_id), cluster_size_(all_nodes.size()) {
|
|
||||||
// Create and initialize all server elements.
|
|
||||||
if (!FLAGS_rpc_cert_file.empty() && !FLAGS_rpc_key_file.empty()) {
|
|
||||||
server_context_.emplace(FLAGS_rpc_key_file, FLAGS_rpc_cert_file);
|
|
||||||
} else {
|
|
||||||
server_context_.emplace();
|
|
||||||
}
|
|
||||||
server_.emplace(all_nodes[node_id_], &server_context_.value(),
|
|
||||||
all_nodes.size() * 2);
|
|
||||||
|
|
||||||
// Create all client elements.
|
|
||||||
endpoints_.resize(cluster_size_);
|
|
||||||
clients_.resize(cluster_size_);
|
|
||||||
client_locks_.resize(cluster_size_);
|
|
||||||
|
|
||||||
// Initialize all client elements.
|
|
||||||
client_context_.emplace(server_context_->use_ssl());
|
|
||||||
for (uint16_t i = 1; i <= cluster_size_; ++i) {
|
|
||||||
auto it = all_nodes.find(i);
|
|
||||||
if (it == all_nodes.end()) {
|
|
||||||
throw RaftCoordinationConfigException("missing endpoint for node " +
|
|
||||||
std::to_string(i));
|
|
||||||
}
|
|
||||||
endpoints_[i - 1] = it->second;
|
|
||||||
client_locks_[i - 1] = std::make_unique<std::mutex>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Coordination::~Coordination() {
|
|
||||||
CHECK(!alive_) << "You must call Shutdown and AwaitShutdown on Coordination!";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<uint16_t> Coordination::GetAllNodeIds() {
|
|
||||||
std::vector<uint16_t> ret;
|
|
||||||
ret.reserve(cluster_size_);
|
|
||||||
for (uint16_t i = 1; i <= cluster_size_; ++i) {
|
|
||||||
ret.push_back(i);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<uint16_t> Coordination::GetOtherNodeIds() {
|
|
||||||
std::vector<uint16_t> ret;
|
|
||||||
ret.reserve(cluster_size_ - 1);
|
|
||||||
for (uint16_t i = 1; i <= cluster_size_; ++i) {
|
|
||||||
if (i == node_id_) continue;
|
|
||||||
ret.push_back(i);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint16_t Coordination::GetAllNodeCount() { return cluster_size_; }
|
|
||||||
|
|
||||||
uint16_t Coordination::GetOtherNodeCount() { return cluster_size_ - 1; }
|
|
||||||
|
|
||||||
io::network::Endpoint Coordination::GetOtherNodeEndpoint(uint16_t other_id) {
|
|
||||||
CHECK(other_id != node_id_) << "Trying to execute RPC on self!";
|
|
||||||
CHECK(other_id >= 1 && other_id <= cluster_size_) << "Invalid node id!";
|
|
||||||
return endpoints_[other_id - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
communication::ClientContext *Coordination::GetRpcClientContext() {
|
|
||||||
return &client_context_.value();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Coordination::Start() { return server_->Start(); }
|
|
||||||
|
|
||||||
void Coordination::AwaitShutdown(
|
|
||||||
std::function<void(void)> call_before_shutdown) {
|
|
||||||
// Wait for a shutdown notification.
|
|
||||||
while (alive_) {
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Call the before shutdown callback.
|
|
||||||
call_before_shutdown();
|
|
||||||
|
|
||||||
// Shutdown our RPC server.
|
|
||||||
server_->Shutdown();
|
|
||||||
server_->AwaitShutdown();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Coordination::Shutdown() { alive_.store(false); }
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,142 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <functional>
|
|
||||||
#include <memory>
|
|
||||||
#include <mutex>
|
|
||||||
#include <optional>
|
|
||||||
#include <thread>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "io/network/endpoint.hpp"
|
|
||||||
#include "raft/exceptions.hpp"
|
|
||||||
#include "rpc/client.hpp"
|
|
||||||
#include "rpc/server.hpp"
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
/// Loads raft cluster configuration from file.
|
|
||||||
///
|
|
||||||
/// File format:
|
|
||||||
/// [[node_id, "node_address", node_port], ...]
|
|
||||||
std::unordered_map<uint16_t, io::network::Endpoint> LoadNodesFromFile(
|
|
||||||
const std::string &coordination_config_file);
|
|
||||||
|
|
||||||
/// This class is responsible for coordination between nodes within the Raft
|
|
||||||
/// cluster. Its implementation is quite similar to coordination in distributed
|
|
||||||
/// Memgraph apart from slight modifications which align more closely to Raft.
|
|
||||||
///
|
|
||||||
/// It should be noted that, in the context of communication, all nodes within
|
|
||||||
/// the Raft cluster are considered equivalent and are henceforth known simply
|
|
||||||
/// as nodes.
|
|
||||||
///
|
|
||||||
/// This class is thread safe.
|
|
||||||
class Coordination final {
|
|
||||||
public:
|
|
||||||
/// Class constructor
|
|
||||||
///
|
|
||||||
/// @param node_id ID of Raft node on this machine.
|
|
||||||
/// @param node mapping from node_id to endpoint information (for the whole
|
|
||||||
/// cluster).
|
|
||||||
Coordination(uint16_t node_id,
|
|
||||||
std::unordered_map<uint16_t, io::network::Endpoint> all_nodes);
|
|
||||||
|
|
||||||
~Coordination();
|
|
||||||
|
|
||||||
Coordination(const Coordination &) = delete;
|
|
||||||
Coordination(Coordination &&) = delete;
|
|
||||||
Coordination &operator=(const Coordination &) = delete;
|
|
||||||
Coordination &operator=(Coordination &&) = delete;
|
|
||||||
|
|
||||||
/// Returns all node IDs.
|
|
||||||
std::vector<uint16_t> GetAllNodeIds();
|
|
||||||
|
|
||||||
/// Returns other node IDs (excluding this node).
|
|
||||||
std::vector<uint16_t> GetOtherNodeIds();
|
|
||||||
|
|
||||||
/// Returns total number of nodes.
|
|
||||||
uint16_t GetAllNodeCount();
|
|
||||||
|
|
||||||
/// Returns number of other nodes.
|
|
||||||
uint16_t GetOtherNodeCount();
|
|
||||||
|
|
||||||
/// Returns endpoint of other node.
|
|
||||||
io::network::Endpoint GetOtherNodeEndpoint(uint16_t other_id);
|
|
||||||
|
|
||||||
/// Returns the currently used RPC client context.
|
|
||||||
communication::ClientContext *GetRpcClientContext();
|
|
||||||
|
|
||||||
/// Executes a RPC on another node in the cluster. If the RPC execution
|
|
||||||
/// fails (because of underlying network issues) it returns a `std::nullopt`.
|
|
||||||
template <class TRequestResponse, class... Args>
|
|
||||||
std::optional<typename TRequestResponse::Response> ExecuteOnOtherNode(
|
|
||||||
uint16_t other_id, Args &&... args) {
|
|
||||||
CHECK(other_id != node_id_) << "Trying to execute RPC on self!";
|
|
||||||
CHECK(other_id >= 1 && other_id <= cluster_size_) << "Invalid node id!";
|
|
||||||
|
|
||||||
auto &lock = *client_locks_[other_id - 1].get();
|
|
||||||
auto &client = clients_[other_id - 1];
|
|
||||||
|
|
||||||
std::lock_guard<std::mutex> guard(lock);
|
|
||||||
|
|
||||||
if (!client) {
|
|
||||||
const auto &endpoint = endpoints_[other_id - 1];
|
|
||||||
client =
|
|
||||||
std::make_unique<rpc::Client>(endpoint, &client_context_.value());
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
return client->Call<TRequestResponse>(std::forward<Args>(args)...);
|
|
||||||
} catch (...) {
|
|
||||||
// Invalidate the client so that we reconnect next time.
|
|
||||||
client = nullptr;
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Registers a RPC call on this node.
|
|
||||||
template <class TRequestResponse>
|
|
||||||
void Register(std::function<void(slk::Reader *, slk::Builder *)> callback) {
|
|
||||||
server_->Register<TRequestResponse>(callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Registers an extended RPC call on this node.
|
|
||||||
template <class TRequestResponse>
|
|
||||||
void Register(std::function<void(const io::network::Endpoint &, slk::Reader *,
|
|
||||||
slk::Builder *)>
|
|
||||||
callback) {
|
|
||||||
server_->Register<TRequestResponse>(callback);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Starts the coordination and its servers.
|
|
||||||
bool Start();
|
|
||||||
|
|
||||||
/// Blocks until the coordination is shut down. Accepts a callback function
|
|
||||||
/// that is called to clean up all services that should be stopped before the
|
|
||||||
/// coordination.
|
|
||||||
void AwaitShutdown(std::function<void(void)> call_before_shutdown);
|
|
||||||
|
|
||||||
/// Hints that the coordination should start shutting down the whole cluster.
|
|
||||||
void Shutdown();
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint16_t node_id_;
|
|
||||||
uint16_t cluster_size_;
|
|
||||||
|
|
||||||
std::optional<communication::ServerContext> server_context_;
|
|
||||||
std::optional<rpc::Server> server_;
|
|
||||||
|
|
||||||
std::optional<communication::ClientContext> client_context_;
|
|
||||||
std::vector<io::network::Endpoint> endpoints_;
|
|
||||||
std::vector<std::unique_ptr<rpc::Client>> clients_;
|
|
||||||
std::vector<std::unique_ptr<std::mutex>> client_locks_;
|
|
||||||
|
|
||||||
std::atomic<bool> alive_{true};
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,111 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "communication/bolt/v1/exceptions.hpp"
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
/// Base exception class used for all exceptions that can occur within the
|
|
||||||
/// Raft protocol.
|
|
||||||
class RaftException : public communication::bolt::VerboseError {
|
|
||||||
public:
|
|
||||||
template <class... Args>
|
|
||||||
RaftException(const std::string &format, Args &&... args)
|
|
||||||
: communication::bolt::VerboseError(
|
|
||||||
communication::bolt::VerboseError::Classification::DATABASE_ERROR,
|
|
||||||
"Raft", "Error", format, std::forward<Args>(args)...) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// This exception should be thrown when attempting to transition between
|
|
||||||
/// incompatible states, e.g. from `FOLLOWER` to `LEADER`.
|
|
||||||
class InvalidTransitionException : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
InvalidTransitionException(const std::string &old_mode,
|
|
||||||
const std::string &new_mode)
|
|
||||||
: RaftException("Invalid transition from " + old_mode + " to " +
|
|
||||||
new_mode) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Exception used to indicate something is wrong with the raft config provided
|
|
||||||
/// by the user.
|
|
||||||
class RaftConfigException : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
explicit RaftConfigException(const std::string &path)
|
|
||||||
: RaftException("Unable to parse raft config file " + path) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Exception used to indicate something is wrong with the coordination config
|
|
||||||
/// provided by the user.
|
|
||||||
class RaftCoordinationConfigException : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
explicit RaftCoordinationConfigException(const std::string &msg)
|
|
||||||
: RaftException("Unable to parse raft coordination config file: " + msg +
|
|
||||||
"!") {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// This exception should be thrown when a `RaftServer` instance attempts
|
|
||||||
/// to read data from persistent storage which is missing.
|
|
||||||
class MissingPersistentDataException : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
explicit MissingPersistentDataException(const std::string &key)
|
|
||||||
: RaftException(
|
|
||||||
"Attempting to read non-existing persistent data under key: " +
|
|
||||||
key) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// This exception should be thrown when a `RaftServer` instance attempts to
|
|
||||||
/// read from replication log for a garbage collected transaction or a
|
|
||||||
/// transaction that didn't begin.
|
|
||||||
class InvalidReplicationLogLookup : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
InvalidReplicationLogLookup()
|
|
||||||
: RaftException("Replication log lookup for invalid transaction.") {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// This exception is thrown when a transaction is taking too long to replicate.
|
|
||||||
/// We're throwing this to reduce the number of threads that are in an infinite
|
|
||||||
/// loop during a network partition.
|
|
||||||
class ReplicationTimeoutException : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
ReplicationTimeoutException()
|
|
||||||
: RaftException("Raft Log replication is taking too long. ") {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// This exception is thrown when a client tries to execute a query on a server
|
|
||||||
/// that isn't a leader.
|
|
||||||
class CantExecuteQueries : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
CantExecuteQueries()
|
|
||||||
: RaftException(
|
|
||||||
"Memgraph High Availability: Can't execute queries if not "
|
|
||||||
"leader.") {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// This exception is thrown when leader re-election takes place during
|
|
||||||
/// transaction commit. We're throwing this exception to inform the client that
|
|
||||||
/// transaction failed.
|
|
||||||
class UnexpectedLeaderChangeException : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
UnexpectedLeaderChangeException()
|
|
||||||
: RaftException(
|
|
||||||
"Leader change happened during transaction commit. Aborting.") {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// This exception is thrown when the machine is in the process of shutting down
|
|
||||||
/// and Raft API is being used.
|
|
||||||
class RaftShutdownException : public RaftException {
|
|
||||||
public:
|
|
||||||
using RaftException::RaftException;
|
|
||||||
RaftShutdownException() : RaftException("Raft Server is shutting down.") {}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,19 +0,0 @@
|
|||||||
#>cpp
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "database/single_node_ha/serialization.hpp"
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
cpp<#
|
|
||||||
|
|
||||||
(lcp:namespace raft)
|
|
||||||
|
|
||||||
(lcp:define-struct log-entry ()
|
|
||||||
((term :uint64_t)
|
|
||||||
(deltas "std::vector<database::StateDelta>"))
|
|
||||||
(:public #>cpp
|
|
||||||
LogEntry() = default;
|
|
||||||
LogEntry(uint64_t _term, std::vector<database::StateDelta> _deltas): term(_term), deltas(_deltas) {}
|
|
||||||
cpp<#)
|
|
||||||
(:serialize (:slk)))
|
|
||||||
|
|
||||||
(lcp:pop-namespace) ;; raft
|
|
@ -1,96 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
enum class ReplicationStatus { REPLICATED, WAITING, ABORTED, INVALID };
|
|
||||||
|
|
||||||
inline std::string ReplicationStatusToString(
|
|
||||||
const ReplicationStatus &replication_status) {
|
|
||||||
switch (replication_status) {
|
|
||||||
case ReplicationStatus::REPLICATED:
|
|
||||||
return "REPLICATED";
|
|
||||||
case ReplicationStatus::WAITING:
|
|
||||||
return "WAITING";
|
|
||||||
case ReplicationStatus::ABORTED:
|
|
||||||
return "ABORTED";
|
|
||||||
case ReplicationStatus::INVALID:
|
|
||||||
return "INVALID";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Structure which describes the status of a newly created LogEntry after the
|
|
||||||
/// execution of RaftServer's Emplace method.
|
|
||||||
///
|
|
||||||
/// It consists of two unsigned 64-bit integers which uniquely describe
|
|
||||||
/// the emplaced LogEntry:
|
|
||||||
/// 1) Term when the LogEntry was emplaced to the Raft log.
|
|
||||||
/// 2) Index of the entry within the Raft log.
|
|
||||||
///
|
|
||||||
/// In the case an entry was not successfully emplaced (e.g. unexpected
|
|
||||||
/// leadership change), the values will have a std::nullopt value instead.
|
|
||||||
struct LogEntryStatus {
|
|
||||||
uint64_t term_id;
|
|
||||||
uint64_t log_index;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Exposes only functionality that other parts of Memgraph can interact with.
|
|
||||||
class RaftInterface {
|
|
||||||
public:
|
|
||||||
/// Emplace a new LogEntry in the raft log and start its replication. This
|
|
||||||
/// entry is created from a given batched set of StateDelta objects.
|
|
||||||
///
|
|
||||||
/// It is possible that the entry was not successfully emplaced. In that case,
|
|
||||||
/// the method returns std::nullopt and the caller is responsible for handling
|
|
||||||
/// situation correctly (e.g. aborting the corresponding transaction).
|
|
||||||
///
|
|
||||||
/// @returns an optional LogEntryStatus object as result.
|
|
||||||
virtual std::optional<LogEntryStatus> Emplace(
|
|
||||||
const std::vector<database::StateDelta> &) = 0;
|
|
||||||
|
|
||||||
/// Returns true if the current servers mode is LEADER. False otherwise.
|
|
||||||
virtual bool IsLeader() = 0;
|
|
||||||
|
|
||||||
/// Returns the term ID of the current leader.
|
|
||||||
virtual uint64_t TermId() = 0;
|
|
||||||
|
|
||||||
/// Returns the replication status of LogEntry which began its replication in
|
|
||||||
/// a given term ID and was emplaced in the raft log at the given index.
|
|
||||||
///
|
|
||||||
/// Replication status can be one of the following
|
|
||||||
/// 1) REPLICATED -- LogEntry was successfully replicated across
|
|
||||||
/// the Raft cluster
|
|
||||||
/// 2) WAITING -- LogEntry was successfully emplaced in the Raft
|
|
||||||
/// log and is currently being replicated.
|
|
||||||
/// 3) ABORTED -- LogEntry will not be replicated.
|
|
||||||
/// 4) INVALID -- the request for the LogEntry was invalid, most
|
|
||||||
/// likely either term_id or log_index were out of range.
|
|
||||||
virtual ReplicationStatus GetReplicationStatus(uint64_t term_id,
|
|
||||||
uint64_t log_index) = 0;
|
|
||||||
|
|
||||||
/// Checks if the LogEntry with the give term id and log index can safely be
|
|
||||||
/// committed in local storage.
|
|
||||||
///
|
|
||||||
/// @param term_id term when the LogEntry was created
|
|
||||||
/// @param log_index index of the LogEntry in the Raft log
|
|
||||||
///
|
|
||||||
/// @return bool True if the transaction is safe to commit, false otherwise.
|
|
||||||
///
|
|
||||||
/// @throws ReplicationTimeoutException
|
|
||||||
/// @throws RaftShutdownException
|
|
||||||
/// @throws InvalidReplicationLogLookup
|
|
||||||
virtual bool SafeToCommit(uint64_t term_id, uint64_t log_index) = 0;
|
|
||||||
|
|
||||||
virtual std::mutex &WithLock() = 0;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
~RaftInterface() {}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,43 +0,0 @@
|
|||||||
#>cpp
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "raft/log_entry.hpp"
|
|
||||||
#include "rpc/messages.hpp"
|
|
||||||
cpp<#
|
|
||||||
|
|
||||||
(lcp:namespace raft)
|
|
||||||
|
|
||||||
(lcp:define-rpc request-vote
|
|
||||||
(:request
|
|
||||||
((candidate-id :uint16_t)
|
|
||||||
(term :uint64_t)
|
|
||||||
(last-log-index :uint64_t)
|
|
||||||
(last-log-term :uint64_t)))
|
|
||||||
(:response
|
|
||||||
((vote-granted :bool)
|
|
||||||
(term :uint64_t))))
|
|
||||||
|
|
||||||
(lcp:define-rpc append-entries
|
|
||||||
(:request
|
|
||||||
((leader-id :uint16_t)
|
|
||||||
(leader-commit :uint64_t)
|
|
||||||
(term :uint64_t)
|
|
||||||
(prev-log-index :uint64_t)
|
|
||||||
(prev-log-term :uint64_t)
|
|
||||||
(entries "std::vector<raft::LogEntry>")))
|
|
||||||
(:response
|
|
||||||
((success :bool)
|
|
||||||
(term :uint64_t))))
|
|
||||||
|
|
||||||
(lcp:define-rpc heartbeat
|
|
||||||
(:request
|
|
||||||
((leader-id :uint16_t)
|
|
||||||
(term :uint64_t)))
|
|
||||||
(:response
|
|
||||||
((success :bool)
|
|
||||||
(term :uint64_t))))
|
|
||||||
|
|
||||||
(lcp:pop-namespace) ;; raft
|
|
@ -1,961 +0,0 @@
|
|||||||
#include "raft/raft_server.hpp"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <chrono>
|
|
||||||
#include <iostream>
|
|
||||||
#include <memory>
|
|
||||||
#include <optional>
|
|
||||||
|
|
||||||
#include <fmt/format.h>
|
|
||||||
#include <gflags/gflags.h>
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "database/graph_db_accessor.hpp"
|
|
||||||
#include "durability/single_node_ha/paths.hpp"
|
|
||||||
#include "raft/exceptions.hpp"
|
|
||||||
#include "rpc/client.hpp"
|
|
||||||
#include "slk/streams.hpp"
|
|
||||||
#include "utils/cast.hpp"
|
|
||||||
#include "utils/exceptions.hpp"
|
|
||||||
#include "utils/on_scope_exit.hpp"
|
|
||||||
#include "utils/thread.hpp"
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
using namespace std::literals::chrono_literals;
|
|
||||||
namespace fs = std::filesystem;
|
|
||||||
|
|
||||||
const std::string kCurrentTermKey = "current_term";
|
|
||||||
const std::string kVotedForKey = "voted_for";
|
|
||||||
const std::string kLogSizeKey = "log_size";
|
|
||||||
const std::string kLogEntryPrefix = "log_entry_";
|
|
||||||
const std::string kRaftDir = "raft";
|
|
||||||
|
|
||||||
RaftServer::RaftServer(uint16_t server_id, const std::string &durability_dir,
|
|
||||||
bool db_recover_on_startup, const Config &config,
|
|
||||||
Coordination *coordination, database::GraphDb *db)
|
|
||||||
: config_(config),
|
|
||||||
coordination_(coordination),
|
|
||||||
db_(db),
|
|
||||||
mode_(Mode::FOLLOWER),
|
|
||||||
server_id_(server_id),
|
|
||||||
durability_dir_(fs::path(durability_dir)),
|
|
||||||
db_recover_on_startup_(db_recover_on_startup),
|
|
||||||
commit_index_(0),
|
|
||||||
last_applied_(0),
|
|
||||||
last_entry_term_(0),
|
|
||||||
issue_hb_(false),
|
|
||||||
replication_timeout_(config.replication_timeout),
|
|
||||||
disk_storage_(fs::path(durability_dir) / kRaftDir) {}
|
|
||||||
|
|
||||||
void RaftServer::Start() {
|
|
||||||
if (!db_recover_on_startup_) {
|
|
||||||
// We need to clear persisted data if we don't want any recovery.
|
|
||||||
disk_storage_.DeletePrefix("");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Persistent storage initialization
|
|
||||||
if (!disk_storage_.Get(kLogSizeKey)) {
|
|
||||||
SetCurrentTerm(0);
|
|
||||||
SetLogSize(0);
|
|
||||||
LogEntry empty_log_entry(0, {});
|
|
||||||
AppendLogEntries(0, 0, {empty_log_entry});
|
|
||||||
} else {
|
|
||||||
RecoverPersistentData();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Peer state initialization
|
|
||||||
auto cluster_size = coordination_->GetAllNodeCount() + 1;
|
|
||||||
next_index_.resize(cluster_size);
|
|
||||||
index_offset_.resize(cluster_size);
|
|
||||||
match_index_.resize(cluster_size);
|
|
||||||
next_replication_.resize(cluster_size);
|
|
||||||
next_heartbeat_.resize(cluster_size);
|
|
||||||
|
|
||||||
// RPC registration
|
|
||||||
coordination_->Register<RequestVoteRpc>(
|
|
||||||
[this](auto *req_reader, auto *res_builder) {
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
RequestVoteReq req;
|
|
||||||
slk::Load(&req, req_reader);
|
|
||||||
|
|
||||||
// [Raft paper 5.1]
|
|
||||||
// "If a server recieves a request with a stale term,
|
|
||||||
// it rejects the request"
|
|
||||||
if (exiting_ || req.term < current_term_) {
|
|
||||||
RequestVoteRes res(false, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// [Raft paper figure 2]
|
|
||||||
// If RPC request or response contains term T > currentTerm,
|
|
||||||
// set currentTerm = T and convert to follower.
|
|
||||||
if (req.term > current_term_) {
|
|
||||||
SetCurrentTerm(req.term);
|
|
||||||
if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (voted_for_) {
|
|
||||||
bool grant_vote = voted_for_.value() == req.candidate_id;
|
|
||||||
if (grant_vote) SetNextElectionTimePoint();
|
|
||||||
RequestVoteRes res(grant_vote, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// [Raft paper 5.2, 5.4]
|
|
||||||
// "Each server will vote for at most one candidate in a given
|
|
||||||
// term, on a first-come-first-serve basis with an additional
|
|
||||||
// restriction on votes"
|
|
||||||
// Restriction: "The voter denies its vote if its own log is more
|
|
||||||
// up-to-date than that of the candidate"
|
|
||||||
auto last_entry_data = LastEntryData();
|
|
||||||
bool grant_vote =
|
|
||||||
AtLeastUpToDate(req.last_log_index, req.last_log_term,
|
|
||||||
last_entry_data.first, last_entry_data.second);
|
|
||||||
if (grant_vote) {
|
|
||||||
SetVotedFor(req.candidate_id);
|
|
||||||
SetNextElectionTimePoint();
|
|
||||||
}
|
|
||||||
RequestVoteRes res(grant_vote, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
});
|
|
||||||
|
|
||||||
coordination_->Register<AppendEntriesRpc>([this](auto *req_reader,
|
|
||||||
auto *res_builder) {
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
AppendEntriesReq req;
|
|
||||||
slk::Load(&req, req_reader);
|
|
||||||
|
|
||||||
// [Raft paper 5.1]
|
|
||||||
// "If a server receives a request with a stale term, it rejects the
|
|
||||||
// request"
|
|
||||||
if (exiting_ || req.term < current_term_) {
|
|
||||||
AppendEntriesRes res(false, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Everything below is considered to be a valid RPC. This will ensure that
|
|
||||||
// after we finish processing the current request, the election timeout will
|
|
||||||
// be extended. During this process we will prevent the timeout from
|
|
||||||
// occuring.
|
|
||||||
next_election_ = TimePoint::max();
|
|
||||||
election_change_.notify_all();
|
|
||||||
utils::OnScopeExit extend_election_timeout([this] {
|
|
||||||
// [Raft thesis 3.4]
|
|
||||||
// A server remains in follower state as long as it receives valid RPCs
|
|
||||||
// from a leader or candidate.
|
|
||||||
SetNextElectionTimePoint();
|
|
||||||
election_change_.notify_all();
|
|
||||||
});
|
|
||||||
|
|
||||||
// [Raft paper figure 2]
|
|
||||||
// If RPC request or response contains term T > currentTerm,
|
|
||||||
// set currentTerm = T and convert to follower.
|
|
||||||
if (req.term > current_term_) {
|
|
||||||
SetCurrentTerm(req.term);
|
|
||||||
if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER);
|
|
||||||
}
|
|
||||||
|
|
||||||
// [Raft paper 5.3]
|
|
||||||
// "If a follower's log is inconsistent with the leader's, the
|
|
||||||
// consistency check will fail in the AppendEntries RPC."
|
|
||||||
//
|
|
||||||
// Consistency checking assures the Log Matching Property:
|
|
||||||
// - If two entries in different logs have the same index and
|
|
||||||
// term, then they store the same command.
|
|
||||||
// - If two entries in different logs have the same index and term,
|
|
||||||
// then the logs are identical in all preceding entries.
|
|
||||||
if (log_size_ <= req.prev_log_index ||
|
|
||||||
GetLogEntry(req.prev_log_index).term != req.prev_log_term) {
|
|
||||||
AppendEntriesRes res(false, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// No need to call this function for a heartbeat
|
|
||||||
if (!req.entries.empty()) {
|
|
||||||
AppendLogEntries(req.leader_commit, req.prev_log_index + 1, req.entries);
|
|
||||||
}
|
|
||||||
|
|
||||||
// [Raft paper 5.3]
|
|
||||||
// "Once a follower learns that a log entry is committed, it applies
|
|
||||||
// the entry to its state machine (in log order)
|
|
||||||
while (req.leader_commit > last_applied_ && last_applied_ + 1 < log_size_) {
|
|
||||||
++last_applied_;
|
|
||||||
ApplyStateDeltas(GetLogEntry(last_applied_).deltas);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Respond positively to a heartbeat.
|
|
||||||
if (req.entries.empty()) {
|
|
||||||
AppendEntriesRes res(true, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
AppendEntriesRes res(true, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
});
|
|
||||||
|
|
||||||
coordination_->Register<HeartbeatRpc>(
|
|
||||||
[this](auto *req_reader, auto *res_builder) {
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
HeartbeatReq req;
|
|
||||||
slk::Load(&req, req_reader);
|
|
||||||
|
|
||||||
if (exiting_ || req.term < current_term_) {
|
|
||||||
HeartbeatRes res(false, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (req.term > current_term_) {
|
|
||||||
SetCurrentTerm(req.term);
|
|
||||||
if (mode_ != Mode::FOLLOWER) Transition(Mode::FOLLOWER);
|
|
||||||
}
|
|
||||||
|
|
||||||
SetNextElectionTimePoint();
|
|
||||||
election_change_.notify_all();
|
|
||||||
|
|
||||||
HeartbeatRes res(true, current_term_);
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
});
|
|
||||||
|
|
||||||
// start threads
|
|
||||||
|
|
||||||
SetNextElectionTimePoint();
|
|
||||||
election_thread_ = std::thread(&RaftServer::ElectionThreadMain, this);
|
|
||||||
|
|
||||||
for (auto peer_id : coordination_->GetOtherNodeIds()) {
|
|
||||||
peer_threads_.emplace_back(&RaftServer::PeerThreadMain, this, peer_id);
|
|
||||||
hb_threads_.emplace_back(&RaftServer::HBThreadMain, this, peer_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
no_op_issuer_thread_ = std::thread(&RaftServer::NoOpIssuerThreadMain, this);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::Shutdown() {
|
|
||||||
exiting_ = true;
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
|
|
||||||
state_changed_.notify_all();
|
|
||||||
election_change_.notify_all();
|
|
||||||
leader_changed_.notify_all();
|
|
||||||
hb_condition_.notify_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto &peer_thread : peer_threads_) {
|
|
||||||
if (peer_thread.joinable()) peer_thread.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto &hb_thread : hb_threads_) {
|
|
||||||
if (hb_thread.joinable()) hb_thread.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (election_thread_.joinable()) election_thread_.join();
|
|
||||||
if (no_op_issuer_thread_.joinable()) no_op_issuer_thread_.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::SetCurrentTerm(uint64_t new_current_term) {
|
|
||||||
current_term_ = new_current_term;
|
|
||||||
disk_storage_.Put(kCurrentTermKey, std::to_string(new_current_term));
|
|
||||||
SetVotedFor(std::nullopt);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::SetVotedFor(std::optional<uint16_t> new_voted_for) {
|
|
||||||
voted_for_ = new_voted_for;
|
|
||||||
if (new_voted_for)
|
|
||||||
disk_storage_.Put(kVotedForKey, std::to_string(new_voted_for.value()));
|
|
||||||
else
|
|
||||||
disk_storage_.Delete(kVotedForKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::SetLogSize(uint64_t new_log_size) {
|
|
||||||
log_size_ = new_log_size;
|
|
||||||
disk_storage_.Put(kLogSizeKey, std::to_string(new_log_size));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<LogEntryStatus> RaftServer::Emplace(
|
|
||||||
const std::vector<database::StateDelta> &deltas) {
|
|
||||||
std::unique_lock<std::mutex> lock(lock_);
|
|
||||||
if (mode_ != Mode::LEADER) {
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
LogEntry new_entry(current_term_, deltas);
|
|
||||||
|
|
||||||
log_[log_size_] = new_entry;
|
|
||||||
disk_storage_.Put(LogEntryKey(log_size_), SerializeLogEntry(new_entry));
|
|
||||||
last_entry_term_ = new_entry.term;
|
|
||||||
SetLogSize(log_size_ + 1);
|
|
||||||
|
|
||||||
// Force replication
|
|
||||||
TimePoint now = Clock::now();
|
|
||||||
for (auto &peer_replication : next_replication_) peer_replication = now;
|
|
||||||
|
|
||||||
// From this point on, we can say that the replication of a LogEntry started.
|
|
||||||
replication_timeout_.Insert(new_entry.term, log_size_ - 1);
|
|
||||||
|
|
||||||
state_changed_.notify_all();
|
|
||||||
return {{new_entry.term, log_size_ - 1}};
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RaftServer::IsLeader() { return !exiting_ && mode_ == Mode::LEADER; }
|
|
||||||
|
|
||||||
uint64_t RaftServer::TermId() { return current_term_; }
|
|
||||||
|
|
||||||
ReplicationStatus RaftServer::GetReplicationStatus(uint64_t term_id,
|
|
||||||
uint64_t log_index) {
|
|
||||||
std::unique_lock<std::mutex> lock(lock_);
|
|
||||||
if (term_id > current_term_ || log_index >= log_size_)
|
|
||||||
return ReplicationStatus::INVALID;
|
|
||||||
|
|
||||||
auto log_entry = GetLogEntry(log_index);
|
|
||||||
|
|
||||||
// This is correct because the leader can only append to the log and no two
|
|
||||||
// workers can be leaders in the same term.
|
|
||||||
if (log_entry.term != term_id) return ReplicationStatus::ABORTED;
|
|
||||||
|
|
||||||
if (last_applied_ < log_index) return ReplicationStatus::WAITING;
|
|
||||||
return ReplicationStatus::REPLICATED;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RaftServer::SafeToCommit(uint64_t term_id, uint64_t log_index) {
|
|
||||||
auto replication_status = GetReplicationStatus(term_id, log_index);
|
|
||||||
|
|
||||||
// If we are shutting down, but we know that the Raft Log replicated
|
|
||||||
// successfully, we return true. This will eventually commit since we
|
|
||||||
// replicate NoOp on leader election.
|
|
||||||
if (replication_status == ReplicationStatus::REPLICATED) return true;
|
|
||||||
|
|
||||||
// Only if the log entry isn't replicated, throw an exception to inform
|
|
||||||
// the client.
|
|
||||||
if (exiting_) throw RaftShutdownException();
|
|
||||||
|
|
||||||
if (replication_status == ReplicationStatus::WAITING) {
|
|
||||||
if (replication_timeout_.CheckTimeout(term_id, log_index)) {
|
|
||||||
throw ReplicationTimeoutException();
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(ipaljak): Fix the old naming.
|
|
||||||
// The only possibility left is that our ReplicationLog doesn't contain
|
|
||||||
// information about that tx.
|
|
||||||
throw InvalidReplicationLogLookup();
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::RecoverPersistentData() {
|
|
||||||
auto opt_term = disk_storage_.Get(kCurrentTermKey);
|
|
||||||
if (opt_term) current_term_ = std::stoull(opt_term.value());
|
|
||||||
|
|
||||||
auto opt_voted_for = disk_storage_.Get(kVotedForKey);
|
|
||||||
if (!opt_voted_for) {
|
|
||||||
voted_for_ = std::nullopt;
|
|
||||||
} else {
|
|
||||||
voted_for_ = {std::stoul(opt_voted_for.value())};
|
|
||||||
}
|
|
||||||
|
|
||||||
auto opt_log_size = disk_storage_.Get(kLogSizeKey);
|
|
||||||
if (opt_log_size) log_size_ = std::stoull(opt_log_size.value());
|
|
||||||
|
|
||||||
if (log_size_ != 0) {
|
|
||||||
auto opt_last_log_entry = disk_storage_.Get(LogEntryKey(log_size_ - 1));
|
|
||||||
DCHECK(opt_last_log_entry != std::nullopt)
|
|
||||||
<< "Log size is equal to " << log_size_
|
|
||||||
<< ", but there is no log entry on index: " << log_size_ - 1;
|
|
||||||
last_entry_term_ = DeserializeLogEntry(opt_last_log_entry.value()).term;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::Transition(const Mode &new_mode) {
|
|
||||||
switch (new_mode) {
|
|
||||||
case Mode::FOLLOWER: {
|
|
||||||
LOG(INFO) << "Server " << server_id_
|
|
||||||
<< ": Transition to FOLLOWER (Term: " << current_term_ << ")";
|
|
||||||
|
|
||||||
bool reset = mode_ == Mode::LEADER;
|
|
||||||
issue_hb_ = false;
|
|
||||||
mode_ = Mode::FOLLOWER;
|
|
||||||
|
|
||||||
if (reset) {
|
|
||||||
VLOG(40) << "Resetting internal state";
|
|
||||||
// Temporary freeze election timer while we do the reset.
|
|
||||||
next_election_ = TimePoint::max();
|
|
||||||
|
|
||||||
db_->Reset();
|
|
||||||
replication_timeout_.Clear();
|
|
||||||
|
|
||||||
// Re-apply raft log.
|
|
||||||
uint64_t starting_index = 1;
|
|
||||||
for (uint64_t i = starting_index; i <= commit_index_; ++i) {
|
|
||||||
ApplyStateDeltas(GetLogEntry(i).deltas);
|
|
||||||
}
|
|
||||||
|
|
||||||
last_applied_ = commit_index_;
|
|
||||||
}
|
|
||||||
|
|
||||||
SetNextElectionTimePoint();
|
|
||||||
election_change_.notify_all();
|
|
||||||
state_changed_.notify_all();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case Mode::CANDIDATE: {
|
|
||||||
LOG(INFO) << "Server " << server_id_
|
|
||||||
<< ": Transition to CANDIDATE (Term: " << current_term_ << ")";
|
|
||||||
|
|
||||||
// [Raft thesis, section 3.4]
|
|
||||||
// "Each candidate restarts its randomized election timeout at the start
|
|
||||||
// of an election, and it waits for that timeout to elapse before
|
|
||||||
// starting the next election; this reduces the likelihood of another
|
|
||||||
// split vote in the new election."
|
|
||||||
SetNextElectionTimePoint();
|
|
||||||
election_change_.notify_all();
|
|
||||||
|
|
||||||
// [Raft thesis, section 3.4]
|
|
||||||
// "To begin an election, a follower increments its current term and
|
|
||||||
// transitions to candidate state. It then votes for itself and issues
|
|
||||||
// RequestVote RPCs in parallel to each of the other servers in the
|
|
||||||
// cluster."
|
|
||||||
SetCurrentTerm(current_term_ + 1);
|
|
||||||
SetVotedFor(server_id_);
|
|
||||||
|
|
||||||
granted_votes_ = 1;
|
|
||||||
vote_requested_.assign(coordination_->GetAllNodeCount() + 1, false);
|
|
||||||
|
|
||||||
issue_hb_ = false;
|
|
||||||
mode_ = Mode::CANDIDATE;
|
|
||||||
state_changed_.notify_all();
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case Mode::LEADER: {
|
|
||||||
LOG(INFO) << "Server " << server_id_
|
|
||||||
<< ": Transition to LEADER (Term: " << current_term_ << ")";
|
|
||||||
// Freeze election timer
|
|
||||||
next_election_ = TimePoint::max();
|
|
||||||
election_change_.notify_all();
|
|
||||||
|
|
||||||
// Set next heartbeat and replication to correct values
|
|
||||||
TimePoint now = Clock::now();
|
|
||||||
for (auto &peer_replication : next_replication_)
|
|
||||||
peer_replication = now + config_.heartbeat_interval;
|
|
||||||
for (auto &peer_heartbeat : next_heartbeat_)
|
|
||||||
peer_heartbeat = now + config_.heartbeat_interval;
|
|
||||||
|
|
||||||
issue_hb_ = true;
|
|
||||||
hb_condition_.notify_all();
|
|
||||||
|
|
||||||
// [Raft paper figure 2]
|
|
||||||
// "For each server, index of the next log entry to send to that server
|
|
||||||
// is initialized to leader's last log index + 1"
|
|
||||||
for (int i = 1; i <= coordination_->GetAllNodeCount(); ++i) {
|
|
||||||
next_index_[i] = log_size_;
|
|
||||||
index_offset_[i] = 1;
|
|
||||||
match_index_[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Raft guarantees the Leader Append-Only property [Raft paper 5.2]
|
|
||||||
// so its safe to apply everything from our log into our state machine
|
|
||||||
for (int i = last_applied_ + 1; i < log_size_; ++i)
|
|
||||||
ApplyStateDeltas(GetLogEntry(i).deltas);
|
|
||||||
last_applied_ = log_size_ - 1;
|
|
||||||
|
|
||||||
mode_ = Mode::LEADER;
|
|
||||||
|
|
||||||
leader_changed_.notify_all();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::AdvanceCommitIndex() {
|
|
||||||
DCHECK(mode_ == Mode::LEADER)
|
|
||||||
<< "Commit index can only be advanced by the leader";
|
|
||||||
|
|
||||||
std::vector<uint64_t> known_replication_indices;
|
|
||||||
for (int i = 1; i <= coordination_->GetAllNodeCount(); ++i) {
|
|
||||||
if (i != server_id_)
|
|
||||||
known_replication_indices.push_back(match_index_[i]);
|
|
||||||
else
|
|
||||||
known_replication_indices.push_back(log_size_ - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::sort(known_replication_indices.begin(), known_replication_indices.end());
|
|
||||||
uint64_t new_commit_index =
|
|
||||||
known_replication_indices[(coordination_->GetAllNodeCount() - 1) / 2];
|
|
||||||
|
|
||||||
// This can happen because we reset `match_index` vector to 0 after a
|
|
||||||
// new leader has been elected.
|
|
||||||
if (commit_index_ >= new_commit_index) return;
|
|
||||||
|
|
||||||
// [Raft thesis, section 3.6.2]
|
|
||||||
// "(...) Raft never commits log entries from previous terms by counting
|
|
||||||
// replicas. Only log entries from the leader's current term are committed by
|
|
||||||
// counting replicas; once an entry from the current term has been committed
|
|
||||||
// in this way, then all prior entries are committed indirectly because of the
|
|
||||||
// Log Matching Property."
|
|
||||||
if (GetLogEntry(new_commit_index).term != current_term_) {
|
|
||||||
VLOG(40) << "Server " << server_id_
|
|
||||||
<< ": cannot commit log entry from "
|
|
||||||
"previous term based on "
|
|
||||||
"replication count.";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
VLOG(40) << "Begin applying commited transactions";
|
|
||||||
|
|
||||||
for (int i = commit_index_ + 1; i <= new_commit_index; ++i) {
|
|
||||||
auto log_entry = GetLogEntry(i);
|
|
||||||
DCHECK(log_entry.deltas.size() > 2)
|
|
||||||
<< "Log entry should consist of at least three state deltas.";
|
|
||||||
replication_timeout_.Remove(log_entry.term, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
commit_index_ = new_commit_index;
|
|
||||||
last_applied_ = new_commit_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::SendEntries(uint16_t peer_id,
|
|
||||||
std::unique_lock<std::mutex> *lock) {
|
|
||||||
SendLogEntries(peer_id, lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::SendLogEntries(uint16_t peer_id,
|
|
||||||
std::unique_lock<std::mutex> *lock) {
|
|
||||||
uint64_t request_term = current_term_;
|
|
||||||
uint64_t request_prev_log_index = next_index_[peer_id] - 1;
|
|
||||||
uint64_t request_prev_log_term;
|
|
||||||
|
|
||||||
request_prev_log_term = GetLogEntry(next_index_[peer_id] - 1).term;
|
|
||||||
|
|
||||||
std::vector<LogEntry> request_entries;
|
|
||||||
if (next_index_[peer_id] <= log_size_ - 1)
|
|
||||||
GetLogSuffix(next_index_[peer_id], request_entries);
|
|
||||||
|
|
||||||
// Copy all internal variables before releasing the lock.
|
|
||||||
auto server_id = server_id_;
|
|
||||||
auto commit_index = commit_index_;
|
|
||||||
|
|
||||||
VLOG(40) << "Server " << server_id_
|
|
||||||
<< ": Sending Entries RPC to server " << peer_id
|
|
||||||
<< " (Term: " << current_term_ << ")";
|
|
||||||
VLOG(40) << "Entries size: " << request_entries.size();
|
|
||||||
|
|
||||||
// Execute the RPC.
|
|
||||||
lock->unlock();
|
|
||||||
auto reply = coordination_->ExecuteOnOtherNode<AppendEntriesRpc>(
|
|
||||||
peer_id, server_id, commit_index, request_term, request_prev_log_index,
|
|
||||||
request_prev_log_term, request_entries);
|
|
||||||
lock->lock();
|
|
||||||
|
|
||||||
if (!reply) {
|
|
||||||
next_replication_[peer_id] = Clock::now() + config_.heartbeat_interval;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We can't early exit if the `exiting_` flag is true just yet. It is possible
|
|
||||||
// that the response we handle here carries the last confirmation that the logs
|
|
||||||
// have been replicated. We need to handle the response so the client doesn't
|
|
||||||
// retry the query because he thinks the query failed.
|
|
||||||
if (current_term_ != request_term || mode_ != Mode::LEADER) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (OutOfSync(reply->term)) {
|
|
||||||
state_changed_.notify_all();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
DCHECK(mode_ == Mode::LEADER)
|
|
||||||
<< "Elected leader for term should never change.";
|
|
||||||
|
|
||||||
if (reply->term != current_term_) {
|
|
||||||
VLOG(40) << "Server " << server_id_
|
|
||||||
<< ": Ignoring stale AppendEntriesRPC reply from " << peer_id;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!reply->success) {
|
|
||||||
// Replication can fail for the first log entry if the peer that we're
|
|
||||||
// sending the entry is in the process of shutting down.
|
|
||||||
if (next_index_[peer_id] > index_offset_[peer_id]) {
|
|
||||||
next_index_[peer_id] -= index_offset_[peer_id];
|
|
||||||
// Overflow should be prevented by snapshot threshold constant.
|
|
||||||
index_offset_[peer_id] <<= 1UL;
|
|
||||||
} else {
|
|
||||||
next_index_[peer_id] = 1UL;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
uint64_t new_match_index = request_prev_log_index + request_entries.size();
|
|
||||||
DCHECK(match_index_[peer_id] <= new_match_index)
|
|
||||||
<< "`match_index` should increase monotonically within a term";
|
|
||||||
match_index_[peer_id] = new_match_index;
|
|
||||||
if (request_entries.size() > 0) AdvanceCommitIndex();
|
|
||||||
next_index_[peer_id] = match_index_[peer_id] + 1;
|
|
||||||
index_offset_[peer_id] = 1;
|
|
||||||
next_replication_[peer_id] = Clock::now() + config_.heartbeat_interval;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (exiting_) return;
|
|
||||||
state_changed_.notify_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::ElectionThreadMain() {
|
|
||||||
utils::ThreadSetName("ElectionThread");
|
|
||||||
std::unique_lock<std::mutex> lock(lock_);
|
|
||||||
while (!exiting_) {
|
|
||||||
if (Clock::now() >= next_election_) {
|
|
||||||
VLOG(40) << "Server " << server_id_
|
|
||||||
<< ": Election timeout exceeded (Term: " << current_term_ << ")";
|
|
||||||
Transition(Mode::CANDIDATE);
|
|
||||||
state_changed_.notify_all();
|
|
||||||
}
|
|
||||||
election_change_.wait_until(lock, next_election_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::PeerThreadMain(uint16_t peer_id) {
|
|
||||||
utils::ThreadSetName(fmt::format("RaftPeer{}", peer_id));
|
|
||||||
std::unique_lock<std::mutex> lock(lock_);
|
|
||||||
|
|
||||||
/* This loop will either call a function that issues an RPC or wait on the
|
|
||||||
* condition variable. It must not do both! Lock on `mutex_` is released
|
|
||||||
* while waiting for RPC response, which might cause us to miss a
|
|
||||||
* notification on `state_changed_` conditional variable and wait
|
|
||||||
* indefinitely. The safest thing to do is to assume some important part of
|
|
||||||
* state was modified while we were waiting for the response and loop around
|
|
||||||
* to check. */
|
|
||||||
while (!exiting_) {
|
|
||||||
TimePoint now = Clock::now();
|
|
||||||
TimePoint wait_until;
|
|
||||||
|
|
||||||
switch (mode_) {
|
|
||||||
case Mode::FOLLOWER: {
|
|
||||||
wait_until = TimePoint::max();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case Mode::CANDIDATE: {
|
|
||||||
if (vote_requested_[peer_id]) {
|
|
||||||
wait_until = TimePoint::max();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(ipaljak): Consider backoff.
|
|
||||||
wait_until = TimePoint::max();
|
|
||||||
|
|
||||||
// Copy all internal variables before releasing the lock.
|
|
||||||
auto server_id = server_id_;
|
|
||||||
auto request_term = current_term_.load();
|
|
||||||
auto last_entry_data = LastEntryData();
|
|
||||||
|
|
||||||
vote_requested_[peer_id] = true;
|
|
||||||
|
|
||||||
// Execute the RPC.
|
|
||||||
lock.unlock(); // Release lock while waiting for response
|
|
||||||
auto reply = coordination_->ExecuteOnOtherNode<RequestVoteRpc>(
|
|
||||||
peer_id, server_id, request_term, last_entry_data.first,
|
|
||||||
last_entry_data.second);
|
|
||||||
lock.lock();
|
|
||||||
|
|
||||||
// If the peer isn't reachable, it is the same as if he didn't grant
|
|
||||||
// us his vote.
|
|
||||||
if (!reply) {
|
|
||||||
reply = RequestVoteRes(false, request_term);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (current_term_ != request_term || mode_ != Mode::CANDIDATE ||
|
|
||||||
exiting_) {
|
|
||||||
VLOG(40) << "Server " << server_id_
|
|
||||||
<< ": Ignoring RequestVoteRPC reply from " << peer_id;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (OutOfSync(reply->term)) {
|
|
||||||
state_changed_.notify_all();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reply->vote_granted) {
|
|
||||||
VLOG(40) << "Server " << server_id_ << ": Got vote from "
|
|
||||||
<< peer_id;
|
|
||||||
++granted_votes_;
|
|
||||||
if (HasMajorityVote()) Transition(Mode::LEADER);
|
|
||||||
} else {
|
|
||||||
VLOG(40) << "Server " << server_id_ << ": Denied vote from "
|
|
||||||
<< peer_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
state_changed_.notify_all();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
case Mode::LEADER: {
|
|
||||||
if (now >= next_replication_[peer_id]) {
|
|
||||||
SendEntries(peer_id, &lock);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
wait_until = next_replication_[peer_id];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (exiting_) break;
|
|
||||||
state_changed_.wait_until(lock, wait_until);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::HBThreadMain(uint16_t peer_id) {
|
|
||||||
utils::ThreadSetName(fmt::format("HBThread{}", peer_id));
|
|
||||||
std::unique_lock<std::mutex> lock(heartbeat_lock_);
|
|
||||||
|
|
||||||
// The heartbeat thread uses a dedicated RPC client for its peer so that it
|
|
||||||
// can issue heartbeats in parallel with other RPC requests that are being
|
|
||||||
// issued to the peer (replication, voting, etc.)
|
|
||||||
std::unique_ptr<rpc::Client> rpc_client;
|
|
||||||
|
|
||||||
while (!exiting_) {
|
|
||||||
TimePoint wait_until;
|
|
||||||
|
|
||||||
if (!issue_hb_) {
|
|
||||||
wait_until = TimePoint::max();
|
|
||||||
} else {
|
|
||||||
TimePoint now = Clock::now();
|
|
||||||
if (now < next_heartbeat_[peer_id]) {
|
|
||||||
wait_until = next_heartbeat_[peer_id];
|
|
||||||
} else {
|
|
||||||
VLOG(40) << "Server " << server_id_ << ": Sending HB to server "
|
|
||||||
<< peer_id << " (Term: " << current_term_ << ")";
|
|
||||||
|
|
||||||
lock.unlock();
|
|
||||||
if (!rpc_client) {
|
|
||||||
rpc_client = std::make_unique<rpc::Client>(
|
|
||||||
coordination_->GetOtherNodeEndpoint(peer_id),
|
|
||||||
coordination_->GetRpcClientContext());
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
rpc_client->Call<HeartbeatRpc>(server_id_, current_term_);
|
|
||||||
} catch (...) {
|
|
||||||
// Invalidate the client so that we reconnect next time.
|
|
||||||
rpc_client = nullptr;
|
|
||||||
}
|
|
||||||
lock.lock();
|
|
||||||
|
|
||||||
// This is ok even if we don't receive a reply.
|
|
||||||
next_heartbeat_[peer_id] = now + config_.heartbeat_interval;
|
|
||||||
wait_until = next_heartbeat_[peer_id];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (exiting_) break;
|
|
||||||
hb_condition_.wait_until(lock, wait_until);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::NoOpIssuerThreadMain() {
|
|
||||||
utils::ThreadSetName(fmt::format("NoOpIssuer"));
|
|
||||||
std::mutex m;
|
|
||||||
auto lock = std::unique_lock<std::mutex>(m);
|
|
||||||
while (!exiting_) {
|
|
||||||
leader_changed_.wait(lock);
|
|
||||||
// no_op_create_callback_ will create a new transaction that has a NO_OP
|
|
||||||
// StateDelta. This will trigger the whole procedure of replicating logs
|
|
||||||
// in our implementation of Raft.
|
|
||||||
if (!exiting_) NoOpCreate();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::SetNextElectionTimePoint() {
|
|
||||||
// [Raft thesis, section 3.4]
|
|
||||||
// "Raft uses randomized election timeouts to ensure that split votes are
|
|
||||||
// rare and that they are resolved quickly. To prevent split votes in the
|
|
||||||
// first place, election timeouts are chosen randomly from a fixed interval
|
|
||||||
// (e.g., 150-300 ms)."
|
|
||||||
std::uniform_int_distribution<uint64_t> distribution(
|
|
||||||
config_.election_timeout_min.count(),
|
|
||||||
config_.election_timeout_max.count());
|
|
||||||
Clock::duration wait_interval = std::chrono::milliseconds(distribution(rng_));
|
|
||||||
next_election_ = Clock::now() + wait_interval;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RaftServer::HasMajorityVote() {
|
|
||||||
if (2 * granted_votes_ > coordination_->GetAllNodeCount()) {
|
|
||||||
VLOG(40) << "Server " << server_id_
|
|
||||||
<< ": Obtained majority vote (Term: " << current_term_ << ")";
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<uint64_t, uint64_t> RaftServer::LastEntryData() {
|
|
||||||
return {log_size_, last_entry_term_};
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RaftServer::AtLeastUpToDate(uint64_t last_log_index_a,
|
|
||||||
uint64_t last_log_term_a,
|
|
||||||
uint64_t last_log_index_b,
|
|
||||||
uint64_t last_log_term_b) {
|
|
||||||
if (last_log_term_a == last_log_term_b)
|
|
||||||
return last_log_index_a >= last_log_index_b;
|
|
||||||
return last_log_term_a > last_log_term_b;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RaftServer::OutOfSync(uint64_t reply_term) {
|
|
||||||
DCHECK(mode_ != Mode::FOLLOWER) << "`OutOfSync` called from FOLLOWER mode";
|
|
||||||
|
|
||||||
// [Raft thesis, Section 3.3]
|
|
||||||
// "Current terms are exchanged whenever servers communicate; if one
|
|
||||||
// server's current term is smaller than the other's, then it updates
|
|
||||||
// its current term to the larger value. If a candidate or leader
|
|
||||||
// discovers that its term is out of date, it immediately reverts to
|
|
||||||
// follower state."
|
|
||||||
if (current_term_ < reply_term) {
|
|
||||||
disk_storage_.Put(kCurrentTermKey, std::to_string(reply_term));
|
|
||||||
disk_storage_.Delete(kVotedForKey);
|
|
||||||
granted_votes_ = 0;
|
|
||||||
Transition(Mode::FOLLOWER);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
LogEntry RaftServer::GetLogEntry(uint64_t index) {
|
|
||||||
auto it = log_.find(index);
|
|
||||||
if (it != log_.end())
|
|
||||||
return it->second; // retrieve in-mem if possible
|
|
||||||
auto opt_value = disk_storage_.Get(LogEntryKey(index));
|
|
||||||
DCHECK(opt_value != std::nullopt)
|
|
||||||
<< "Log index (" << index << ") out of bounds.";
|
|
||||||
return DeserializeLogEntry(opt_value.value());
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::DeleteLogSuffix(int starting_index) {
|
|
||||||
DCHECK(0 <= starting_index && starting_index < log_size_)
|
|
||||||
<< "Log index out of bounds.";
|
|
||||||
for (int i = starting_index; i < log_size_; ++i) {
|
|
||||||
log_.erase(i);
|
|
||||||
disk_storage_.Delete(LogEntryKey(i));
|
|
||||||
}
|
|
||||||
SetLogSize(starting_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::GetLogSuffix(int starting_index,
|
|
||||||
std::vector<raft::LogEntry> &entries) {
|
|
||||||
DCHECK(0 <= starting_index && starting_index < log_size_)
|
|
||||||
<< "Log index out of bounds.";
|
|
||||||
for (int i = starting_index; i < log_size_; ++i)
|
|
||||||
entries.push_back(GetLogEntry(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::AppendLogEntries(uint64_t leader_commit_index,
|
|
||||||
uint64_t starting_index,
|
|
||||||
const std::vector<LogEntry> &new_entries) {
|
|
||||||
for (int i = 0; i < new_entries.size(); ++i) {
|
|
||||||
// If existing entry conflicts with new one, we need to delete the
|
|
||||||
// existing entry and all that follow it.
|
|
||||||
int current_index = i + starting_index;
|
|
||||||
if (log_size_ > current_index &&
|
|
||||||
GetLogEntry(current_index).term != new_entries[i].term) {
|
|
||||||
DeleteLogSuffix(current_index);
|
|
||||||
}
|
|
||||||
DCHECK(log_size_ >= current_index) << "Current Log index out of bounds.";
|
|
||||||
if (log_size_ == current_index) {
|
|
||||||
log_[log_size_] = new_entries[i];
|
|
||||||
disk_storage_.Put(LogEntryKey(log_size_),
|
|
||||||
SerializeLogEntry(new_entries[i]));
|
|
||||||
last_entry_term_ = new_entries[i].term;
|
|
||||||
SetLogSize(log_size_ + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// See Raft paper 5.3
|
|
||||||
if (leader_commit_index > commit_index_) {
|
|
||||||
commit_index_ = std::min(leader_commit_index, log_size_ - 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string RaftServer::LogEntryKey(uint64_t index) {
|
|
||||||
return kLogEntryPrefix + std::to_string(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string RaftServer::SerializeLogEntry(const LogEntry &log_entry) {
|
|
||||||
std::stringstream stream(std::ios_base::in | std::ios_base::out |
|
|
||||||
std::ios_base::binary);
|
|
||||||
slk::Builder builder(
|
|
||||||
[&stream](const uint8_t *data, size_t size, bool have_more) {
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
stream << utils::MemcpyCast<char>(data[i]);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
slk::Save(log_entry, &builder);
|
|
||||||
builder.Finalize();
|
|
||||||
return stream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
LogEntry RaftServer::DeserializeLogEntry(
|
|
||||||
const std::string &serialized_log_entry) {
|
|
||||||
slk::Reader reader(
|
|
||||||
reinterpret_cast<const uint8_t *>(serialized_log_entry.data()),
|
|
||||||
serialized_log_entry.size());
|
|
||||||
LogEntry deserialized;
|
|
||||||
try {
|
|
||||||
slk::Load(&deserialized, &reader);
|
|
||||||
reader.Finalize();
|
|
||||||
} catch (const slk::SlkReaderException &) {
|
|
||||||
LOG(FATAL) << "Couldn't load log from disk storage!";
|
|
||||||
}
|
|
||||||
return deserialized;
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::NoOpCreate() {
|
|
||||||
// TODO(ipaljak): Review this after implementing RaftDelta object.
|
|
||||||
auto dba = db_->Access();
|
|
||||||
db_->sd_buffer()->Emplace(database::StateDelta::NoOp(dba.transaction_id()));
|
|
||||||
try {
|
|
||||||
dba.Commit();
|
|
||||||
} catch (const RaftException &) {
|
|
||||||
// NoOp failure can be ignored.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RaftServer::ApplyStateDeltas(
|
|
||||||
const std::vector<database::StateDelta> &deltas) {
|
|
||||||
std::optional<database::GraphDbAccessor> dba;
|
|
||||||
for (auto &delta : deltas) {
|
|
||||||
switch (delta.type) {
|
|
||||||
case database::StateDelta::Type::NO_OP:
|
|
||||||
break;
|
|
||||||
case database::StateDelta::Type::TRANSACTION_BEGIN:
|
|
||||||
CHECK(!dba) << "Double transaction start";
|
|
||||||
dba = db_->Access();
|
|
||||||
break;
|
|
||||||
case database::StateDelta::Type::TRANSACTION_COMMIT:
|
|
||||||
CHECK(dba) << "Missing accessor for transaction"
|
|
||||||
<< delta.transaction_id;
|
|
||||||
dba->Commit();
|
|
||||||
dba = std::nullopt;
|
|
||||||
break;
|
|
||||||
case database::StateDelta::Type::TRANSACTION_ABORT:
|
|
||||||
LOG(FATAL) << "ApplyStateDeltas shouldn't know about aborted "
|
|
||||||
"transactions";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
CHECK(dba) << "Missing accessor for transaction"
|
|
||||||
<< delta.transaction_id;
|
|
||||||
delta.Apply(*dba);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CHECK(!dba) << "StateDeltas missing commit command";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::mutex &RaftServer::WithLock() { return lock_; }
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,393 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <filesystem>
|
|
||||||
#include <mutex>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
#include "kvstore/kvstore.hpp"
|
|
||||||
#include "raft/config.hpp"
|
|
||||||
#include "raft/coordination.hpp"
|
|
||||||
#include "raft/log_entry.hpp"
|
|
||||||
#include "raft/raft_interface.hpp"
|
|
||||||
#include "raft/raft_rpc_messages.hpp"
|
|
||||||
#include "raft/replication_log.hpp"
|
|
||||||
#include "raft/replication_timeout_map.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
#include "utils/scheduler.hpp"
|
|
||||||
|
|
||||||
// Forward declaration
|
|
||||||
namespace database {
|
|
||||||
class GraphDb;
|
|
||||||
} // namespace database
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
using Clock = std::chrono::system_clock;
|
|
||||||
using TimePoint = std::chrono::system_clock::time_point;
|
|
||||||
|
|
||||||
enum class Mode { FOLLOWER, CANDIDATE, LEADER };
|
|
||||||
|
|
||||||
inline std::string ModeToString(const Mode &mode) {
|
|
||||||
switch (mode) {
|
|
||||||
case Mode::FOLLOWER:
|
|
||||||
return "FOLLOWER";
|
|
||||||
case Mode::CANDIDATE:
|
|
||||||
return "CANDIDATE";
|
|
||||||
case Mode::LEADER:
|
|
||||||
return "LEADER";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Class which models the behaviour of a single server within the Raft
|
|
||||||
/// cluster. The class is responsible for storing both volatile and
|
|
||||||
/// persistent internal state of the corresponding state machine as well
|
|
||||||
/// as performing operations that comply with the Raft protocol.
|
|
||||||
class RaftServer final : public RaftInterface {
|
|
||||||
public:
|
|
||||||
RaftServer() = delete;
|
|
||||||
|
|
||||||
/// The implementation assumes that server IDs are unique integers between
|
|
||||||
/// ranging from 1 to cluster_size.
|
|
||||||
///
|
|
||||||
/// @param server_id ID of the current server.
|
|
||||||
/// @param durbility_dir directory for persisted data.
|
|
||||||
/// @param db_recover_on_startup flag indicating if recovery should happen at
|
|
||||||
/// startup.
|
|
||||||
/// @param config raft configuration.
|
|
||||||
/// @param coordination Abstraction for coordination between Raft servers.
|
|
||||||
/// @param db The current DB object.
|
|
||||||
RaftServer(uint16_t server_id, const std::string &durability_dir,
|
|
||||||
bool db_recover_on_startup, const Config &config,
|
|
||||||
raft::Coordination *coordination, database::GraphDb *db);
|
|
||||||
|
|
||||||
/// Starts the RPC servers and starts mechanisms inside Raft protocol.
|
|
||||||
void Start();
|
|
||||||
|
|
||||||
/// Stops all threads responsible for the Raft protocol.
|
|
||||||
void Shutdown();
|
|
||||||
|
|
||||||
/// Setter for the current term. It updates the persistent storage as well
|
|
||||||
/// as its in-memory copy.
|
|
||||||
void SetCurrentTerm(uint64_t new_current_term);
|
|
||||||
|
|
||||||
/// Setter for `voted for` member. It updates the persistent storage as well
|
|
||||||
/// as its in-memory copy.
|
|
||||||
void SetVotedFor(std::optional<uint16_t> new_voted_for);
|
|
||||||
|
|
||||||
/// Setter for `log size` member. It updates the persistent storage as well
|
|
||||||
/// as its in-memory copy.
|
|
||||||
void SetLogSize(uint64_t new_log_size);
|
|
||||||
|
|
||||||
/// Emplace a new LogEntry in the raft log and start its replication. This
|
|
||||||
/// entry is created from a given batched set of StateDelta objects.
|
|
||||||
///
|
|
||||||
/// It is possible that the entry was not successfully emplaced. In that case,
|
|
||||||
/// the method returns std::nullopt and the caller is responsible for handling
|
|
||||||
/// situation correctly (e.g. aborting the corresponding transaction).
|
|
||||||
///
|
|
||||||
/// @returns an optional LogEntryStatus object as result.
|
|
||||||
std::optional<LogEntryStatus> Emplace(
|
|
||||||
const std::vector<database::StateDelta> &deltas) override;
|
|
||||||
|
|
||||||
/// Returns true if the current servers mode is LEADER. False otherwise.
|
|
||||||
bool IsLeader() override;
|
|
||||||
|
|
||||||
/// Returns the term ID of the current leader.
|
|
||||||
uint64_t TermId() override;
|
|
||||||
|
|
||||||
/// Returns the replication status of LogEntry which began its replication in
|
|
||||||
/// a given term ID and was emplaced in the raft log at the given index.
|
|
||||||
///
|
|
||||||
/// Replication status can be one of the following
|
|
||||||
/// 1) REPLICATED -- LogEntry was successfully replicated across
|
|
||||||
/// the Raft cluster
|
|
||||||
/// 2) WAITING -- LogEntry was successfully emplaced in the Raft
|
|
||||||
/// log and is currently being replicated.
|
|
||||||
/// 3) ABORTED -- LogEntry will not be replicated.
|
|
||||||
/// 4) INVALID -- the request for the LogEntry was invalid, most
|
|
||||||
/// likely either term_id or log_index were out of range.
|
|
||||||
ReplicationStatus GetReplicationStatus(uint64_t term_id,
|
|
||||||
uint64_t log_index) override;
|
|
||||||
|
|
||||||
/// Checks if the LogEntry with the give term id and log index can safely be
|
|
||||||
/// committed in local storage.
|
|
||||||
///
|
|
||||||
/// @param term_id term when the LogEntry was created
|
|
||||||
/// @param log_index index of the LogEntry in the Raft log
|
|
||||||
///
|
|
||||||
/// @return bool True if the transaction is safe to commit, false otherwise.
|
|
||||||
///
|
|
||||||
/// @throws ReplicationTimeoutException
|
|
||||||
/// @throws RaftShutdownException
|
|
||||||
/// @throws InvalidReplicationLogLookup
|
|
||||||
bool SafeToCommit(uint64_t term_id, uint64_t log_index) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
mutable std::mutex lock_; ///< Guards all internal state.
|
|
||||||
mutable std::mutex heartbeat_lock_; ///< Guards HB issuing
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
// volatile state on all servers
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
Config config_; ///< Raft config.
|
|
||||||
Coordination *coordination_{nullptr}; ///< Cluster coordination.
|
|
||||||
database::GraphDb *db_{nullptr};
|
|
||||||
|
|
||||||
std::atomic<Mode> mode_; ///< Server's current mode.
|
|
||||||
uint16_t server_id_; ///< ID of the current server.
|
|
||||||
std::filesystem::path durability_dir_; ///< Durability directory.
|
|
||||||
bool db_recover_on_startup_; ///< Flag indicating if recovery should happen
|
|
||||||
///< on startup.
|
|
||||||
uint64_t commit_index_; ///< Index of the highest known committed entry.
|
|
||||||
uint64_t last_applied_; ///< Index of the highest applied entry to SM.
|
|
||||||
uint64_t last_entry_term_; ///< Term of the last entry in Raft log
|
|
||||||
|
|
||||||
std::atomic<bool> issue_hb_; ///< Flag which signalizes if the current server
|
|
||||||
///< should send HBs to the rest of the cluster.
|
|
||||||
|
|
||||||
std::vector<std::thread> peer_threads_; ///< One thread per peer which
|
|
||||||
///< handles outgoing RPCs.
|
|
||||||
|
|
||||||
std::vector<std::thread> hb_threads_; ///< One thread per peer which is
|
|
||||||
///< responsible for sending periodic
|
|
||||||
///< heartbeats.
|
|
||||||
|
|
||||||
std::condition_variable state_changed_; ///< Notifies all peer threads on
|
|
||||||
///< relevant state change.
|
|
||||||
|
|
||||||
std::thread no_op_issuer_thread_; ///< Thread responsible for issuing no-op
|
|
||||||
///< command on leader change.
|
|
||||||
|
|
||||||
std::condition_variable leader_changed_; ///< Notifies the
|
|
||||||
///< no_op_issuer_thread that a new
|
|
||||||
///< leader has been elected.
|
|
||||||
|
|
||||||
std::condition_variable hb_condition_; ///< Notifies the HBIssuer thread
|
|
||||||
///< that it should start sending
|
|
||||||
///< heartbeats.
|
|
||||||
|
|
||||||
std::atomic<bool> exiting_{false}; ///< True on server shutdown.
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
// volatile state on followers and candidates
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
std::thread election_thread_; ///< Timer thread for triggering elections.
|
|
||||||
TimePoint next_election_; ///< Next election `TimePoint`.
|
|
||||||
|
|
||||||
std::condition_variable election_change_; ///> Used to notify election_thread
|
|
||||||
///> on next_election_ change.
|
|
||||||
|
|
||||||
std::mt19937_64 rng_ = std::mt19937_64(std::random_device{}());
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
// volatile state on candidates
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
uint16_t granted_votes_;
|
|
||||||
std::vector<bool> vote_requested_;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
// volatile state on leaders
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
std::vector<uint64_t> next_index_; ///< for each server, index of the next
|
|
||||||
///< log entry to send to that server.
|
|
||||||
|
|
||||||
std::vector<uint64_t> index_offset_; ///< for each server, the offset for
|
|
||||||
///< which we reduce the next_index_
|
|
||||||
///< field if the AppendEntries request
|
|
||||||
///< is denied. We use "binary lifting"
|
|
||||||
///< style technique to achieve at most
|
|
||||||
///< O(logn) requests.
|
|
||||||
|
|
||||||
std::vector<uint64_t> match_index_; ///< for each server, index of the
|
|
||||||
///< highest log entry known to be
|
|
||||||
///< replicated on server.
|
|
||||||
|
|
||||||
std::vector<TimePoint> next_replication_; ///< for each server, time point
|
|
||||||
///< for the next replication.
|
|
||||||
|
|
||||||
std::vector<TimePoint> next_heartbeat_; ///< for each server, time point for
|
|
||||||
///< the next heartbeat.
|
|
||||||
|
|
||||||
// Tracks timepoints until a transactions is allowed to be in the replication
|
|
||||||
// process.
|
|
||||||
ReplicationTimeoutMap replication_timeout_;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
// persistent state on all servers
|
|
||||||
//
|
|
||||||
// Persistent data consists of:
|
|
||||||
// - uint64_t current_term -- latest term server has seen.
|
|
||||||
// - uint16_t voted_for -- candidate_id that received vote in current
|
|
||||||
// term (null if none).
|
|
||||||
// - uint64_t log_size -- Number of stored entries within the log.
|
|
||||||
// - vector<LogEntry> log -- log entries. Each log entry is stored under
|
|
||||||
// a separate key within KVStore.
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
kvstore::KVStore disk_storage_;
|
|
||||||
|
|
||||||
std::optional<uint16_t> voted_for_;
|
|
||||||
|
|
||||||
std::atomic<uint64_t> current_term_;
|
|
||||||
uint64_t log_size_;
|
|
||||||
|
|
||||||
std::map<uint64_t, LogEntry> log_;
|
|
||||||
|
|
||||||
/// Recovers persistent data from disk and stores its in-memory copies
|
|
||||||
/// that insure faster read-only operations. This method should be called
|
|
||||||
/// on start-up. If parts of persistent data are missing, the method won't
|
|
||||||
/// make a copy of that data, i.e. no exception is thrown and the caller
|
|
||||||
/// should check whether persistent data actually exists.
|
|
||||||
void RecoverPersistentData();
|
|
||||||
|
|
||||||
/// Makes a transition to a new `raft::Mode`.
|
|
||||||
///
|
|
||||||
/// throws InvalidTransitionException when transitioning between incompatible
|
|
||||||
/// `raft::Mode`s.
|
|
||||||
void Transition(const raft::Mode &new_mode);
|
|
||||||
|
|
||||||
/// Tries to advance the commit index on a leader.
|
|
||||||
void AdvanceCommitIndex();
|
|
||||||
|
|
||||||
/// Decides whether to send Log Entires or Snapshot to the given peer.
|
|
||||||
///
|
|
||||||
/// @param peer_id ID of the peer which receives entries.
|
|
||||||
/// @param lock Lock from the peer thread (released while waiting for
|
|
||||||
/// response)
|
|
||||||
void SendEntries(uint16_t peer_id, std::unique_lock<std::mutex> *lock);
|
|
||||||
|
|
||||||
/// Sends Log Entries to peer. This function should only be called in leader
|
|
||||||
/// mode.
|
|
||||||
///
|
|
||||||
/// @param peer_id ID of the peer which receives entries.
|
|
||||||
/// @param lock Lock from the peer thread (released while waiting for
|
|
||||||
/// response)
|
|
||||||
void SendLogEntries(uint16_t peer_id,
|
|
||||||
std::unique_lock<std::mutex> *lock);
|
|
||||||
|
|
||||||
/// Send Snapshot to peer. This function should only be called in leader
|
|
||||||
/// mode.
|
|
||||||
///
|
|
||||||
/// @param peer_id ID of the peer which receives entries.
|
|
||||||
/// @param lock Lock from the peer thread (released while waiting for
|
|
||||||
/// response)
|
|
||||||
void SendSnapshot(uint16_t peer_id, std::unique_lock<std::mutex> *lock);
|
|
||||||
|
|
||||||
/// Main function of the `election_thread_`. It is responsible for
|
|
||||||
/// transition to CANDIDATE mode when election timeout elapses.
|
|
||||||
void ElectionThreadMain();
|
|
||||||
|
|
||||||
/// Main function of the thread that handles outgoing RPCs towards a
|
|
||||||
/// specified node within the Raft cluster.
|
|
||||||
///
|
|
||||||
/// @param peer_id - ID of a receiving node in the cluster.
|
|
||||||
void PeerThreadMain(uint16_t peer_id);
|
|
||||||
|
|
||||||
/// Main function of the thread that handles issuing heartbeats towards
|
|
||||||
/// other peers. At the moment, this function is ignorant about the status
|
|
||||||
/// of LogEntry replication. Therefore, it might issue unnecessary
|
|
||||||
/// heartbeats, but we can live with that at this point.
|
|
||||||
///
|
|
||||||
/// @param peer_id - ID of a receiving node in the cluster.
|
|
||||||
void HBThreadMain(uint16_t peer_id);
|
|
||||||
|
|
||||||
/// Issues no-op command when a new leader is elected. This is done to
|
|
||||||
/// force the Raft protocol to commit logs from previous terms that
|
|
||||||
/// have been replicated on a majority of peers.
|
|
||||||
void NoOpIssuerThreadMain();
|
|
||||||
|
|
||||||
/// Sets the `TimePoint` for next election.
|
|
||||||
void SetNextElectionTimePoint();
|
|
||||||
|
|
||||||
/// Checks if the current server obtained enough votes to become a leader.
|
|
||||||
bool HasMajorityVote();
|
|
||||||
|
|
||||||
/// Returns relevant metadata about the last entry in this server's Raft Log.
|
|
||||||
/// More precisely, returns a pair consisting of an index of the last entry
|
|
||||||
/// in the log and the term of the last entry in the log.
|
|
||||||
///
|
|
||||||
/// @return std::pair<last_log_index, last_log_term>
|
|
||||||
std::pair<uint64_t, uint64_t> LastEntryData();
|
|
||||||
|
|
||||||
/// Checks whether Raft log of server A is at least as up-to-date as the Raft
|
|
||||||
/// log of server B. This is strictly defined in Raft paper 5.4.
|
|
||||||
///
|
|
||||||
/// @param last_log_index_a - Index of server A's last log entry.
|
|
||||||
/// @param last_log_term_a - Term of server A's last log entry.
|
|
||||||
/// @param last_log_index_b - Index of server B's last log entry.
|
|
||||||
/// @param last_log_term_b - Term of server B's last log entry.
|
|
||||||
bool AtLeastUpToDate(uint64_t last_log_index_a, uint64_t last_log_term_a,
|
|
||||||
uint64_t last_log_index_b, uint64_t last_log_term_b);
|
|
||||||
|
|
||||||
/// Checks whether the current server got a reply from "future", i.e. reply
|
|
||||||
/// with a higher term. If so, the current server falls back to follower mode
|
|
||||||
/// and updates its current term.
|
|
||||||
///
|
|
||||||
/// @param reply_term Term from RPC response.
|
|
||||||
/// @return true if the current server's term lags behind.
|
|
||||||
bool OutOfSync(uint64_t reply_term);
|
|
||||||
|
|
||||||
/// Retrieves a log entry from the log at a given index.
|
|
||||||
///
|
|
||||||
/// @param index Index of the log entry to be retrieved.
|
|
||||||
LogEntry GetLogEntry(uint64_t index);
|
|
||||||
|
|
||||||
/// Deletes log entries with indexes that are greater or equal to the given
|
|
||||||
/// starting index.
|
|
||||||
///
|
|
||||||
/// @param starting_index Smallest index which will be deleted from the Log.
|
|
||||||
/// Also, a friendly remainder that log entries are
|
|
||||||
/// 1-indexed.
|
|
||||||
void DeleteLogSuffix(int starting_index);
|
|
||||||
|
|
||||||
/// Stores log entries with indexes that are greater or equal to the given
|
|
||||||
/// starting index into a provided container. If the starting index is
|
|
||||||
/// greater than the log size, nothing will be stored in the provided
|
|
||||||
/// container.
|
|
||||||
///
|
|
||||||
/// @param starting_index Smallest index which will be stored.
|
|
||||||
/// @param entries The container which will store the wanted suffix.
|
|
||||||
void GetLogSuffix(int starting_index, std::vector<raft::LogEntry> &entries);
|
|
||||||
|
|
||||||
/// Appends new log entries to Raft log. Note that this function is not
|
|
||||||
/// smart in any way, i.e. the caller should make sure that it's safe
|
|
||||||
/// to call this function. This function also updates this server's commit
|
|
||||||
/// index if necessary.
|
|
||||||
///
|
|
||||||
/// @param leader_commit_index - Used to update local commit index.
|
|
||||||
/// @param starting_index - Index in the log from which we start to append.
|
|
||||||
/// @param new_entries - New `LogEntry` instances to be appended in the log.
|
|
||||||
void AppendLogEntries(uint64_t leader_commit_index, uint64_t starting_index,
|
|
||||||
const std::vector<LogEntry> &new_entries);
|
|
||||||
|
|
||||||
/// Generates the key under which the `LogEntry` with a given index should
|
|
||||||
/// be stored on our disk storage.
|
|
||||||
///
|
|
||||||
/// @param index - Index of the `LogEntry` for which we generate the key.
|
|
||||||
std::string LogEntryKey(uint64_t index);
|
|
||||||
|
|
||||||
/// Serializes Raft log entry into `std::string`
|
|
||||||
std::string SerializeLogEntry(const LogEntry &log_entry);
|
|
||||||
|
|
||||||
/// Deserialized Raft log entry from `std::string`
|
|
||||||
LogEntry DeserializeLogEntry(const std::string &serialized_log_entry);
|
|
||||||
|
|
||||||
/// Start a new transaction with a NO-OP StateDelta.
|
|
||||||
void NoOpCreate();
|
|
||||||
|
|
||||||
/// Applies the given batch of state deltas that are representing a transacton
|
|
||||||
/// to the db.
|
|
||||||
void ApplyStateDeltas(const std::vector<database::StateDelta> &deltas);
|
|
||||||
|
|
||||||
std::mutex &WithLock() override;
|
|
||||||
};
|
|
||||||
} // namespace raft
|
|
@ -1,86 +0,0 @@
|
|||||||
/// @file
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
|
|
||||||
#include "data_structures/bitset/dynamic_bitset.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
/// Tracks information about replicated and active logs for high availability.
|
|
||||||
///
|
|
||||||
/// The main difference between ReplicationLog and CommitLog is that
|
|
||||||
/// ReplicationLog doesn't throw when looking up garbage collected transaction
|
|
||||||
/// ids.
|
|
||||||
class ReplicationLog final {
|
|
||||||
public:
|
|
||||||
static constexpr int kBitsetBlockSize = 32768;
|
|
||||||
|
|
||||||
ReplicationLog() = default;
|
|
||||||
ReplicationLog(const ReplicationLog &) = delete;
|
|
||||||
ReplicationLog(ReplicationLog &&) = delete;
|
|
||||||
ReplicationLog &operator=(const ReplicationLog &) = delete;
|
|
||||||
ReplicationLog &operator=(ReplicationLog &&) = delete;
|
|
||||||
|
|
||||||
bool is_active(tx::TransactionId id) const {
|
|
||||||
return fetch_info(id).is_active();
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_active(tx::TransactionId id) { log.set(2 * id); }
|
|
||||||
|
|
||||||
bool is_replicated(tx::TransactionId id) const {
|
|
||||||
return fetch_info(id).is_replicated();
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_replicated(tx::TransactionId id) { log.set(2 * id + 1); }
|
|
||||||
|
|
||||||
// Clears the replication log from bits associated with transactions with an
|
|
||||||
// id lower than `id`.
|
|
||||||
void garbage_collect_older(tx::TransactionId id) {
|
|
||||||
// We keep track of the valid prefix in order to avoid the `CHECK` inside
|
|
||||||
// the `DynamicBitset`.
|
|
||||||
valid_prefix = 2 * id;
|
|
||||||
log.delete_prefix(2 * id);
|
|
||||||
}
|
|
||||||
|
|
||||||
class Info final {
|
|
||||||
public:
|
|
||||||
enum Status {
|
|
||||||
UNKNOWN = 0, // 00
|
|
||||||
ACTIVE = 1, // 01
|
|
||||||
REPLICATED = 2, // 10
|
|
||||||
};
|
|
||||||
|
|
||||||
explicit Info(uint8_t flags) {
|
|
||||||
if (flags & REPLICATED) {
|
|
||||||
flags_ = REPLICATED;
|
|
||||||
} else if (flags & ACTIVE) {
|
|
||||||
flags_ = ACTIVE;
|
|
||||||
} else {
|
|
||||||
flags_ = UNKNOWN;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_active() const { return flags_ & ACTIVE; }
|
|
||||||
|
|
||||||
bool is_replicated() const { return flags_ & REPLICATED; }
|
|
||||||
|
|
||||||
operator uint8_t() const { return flags_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint8_t flags_{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
Info fetch_info(tx::TransactionId id) const {
|
|
||||||
if (valid_prefix > 2 * id) return Info{0};
|
|
||||||
|
|
||||||
return Info{log.at(2 * id, 2)};
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
DynamicBitset<uint8_t, kBitsetBlockSize> log;
|
|
||||||
std::atomic<tx::TransactionId> valid_prefix{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,71 +0,0 @@
|
|||||||
/// @file
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
#include <map>
|
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
using Clock = std::chrono::system_clock;
|
|
||||||
using TimePoint = std::chrono::system_clock::time_point;
|
|
||||||
|
|
||||||
/// A wrapper around an unordered_map whose reads/writes are protected with a
|
|
||||||
/// lock. It's also specialized to serve the sole purpose of tracking
|
|
||||||
/// replication timeout.
|
|
||||||
class ReplicationTimeoutMap final {
|
|
||||||
public:
|
|
||||||
ReplicationTimeoutMap() = delete;
|
|
||||||
|
|
||||||
ReplicationTimeoutMap(const ReplicationTimeoutMap &) = delete;
|
|
||||||
ReplicationTimeoutMap(ReplicationTimeoutMap &&) = delete;
|
|
||||||
ReplicationTimeoutMap operator=(const ReplicationTimeoutMap &) = delete;
|
|
||||||
ReplicationTimeoutMap operator=(ReplicationTimeoutMap &&) = delete;
|
|
||||||
|
|
||||||
explicit ReplicationTimeoutMap(std::chrono::milliseconds replication_timeout)
|
|
||||||
: replication_timeout_(replication_timeout) {}
|
|
||||||
|
|
||||||
/// Remove all entries from the map.
|
|
||||||
void Clear() {
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
timeout_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Remove a single entry from the map.
|
|
||||||
void Remove(const uint64_t term_id, const uint64_t log_index) {
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
timeout_.erase({term_id, log_index});
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Inserts and entry in the map by setting a point in time until it needs to
|
|
||||||
/// replicated.
|
|
||||||
void Insert(const uint64_t term_id, const uint64_t log_index) {
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
timeout_[{term_id, log_index}] = replication_timeout_ + Clock::now();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Checks if the given entry has timed out.
|
|
||||||
/// @returns bool True if it exceeded timeout, false otherwise.
|
|
||||||
bool CheckTimeout(const uint64_t term_id, const uint64_t log_index) {
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
auto found = timeout_.find({term_id, log_index});
|
|
||||||
// If we didn't set the timeout yet, or we already deleted it, we didn't
|
|
||||||
// time out.
|
|
||||||
if (found == timeout_.end()) return false;
|
|
||||||
if (found->second < Clock::now()) {
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::chrono::milliseconds replication_timeout_;
|
|
||||||
|
|
||||||
mutable std::mutex lock_;
|
|
||||||
// TODO(ipaljak): Consider using unordered_map if we encounter any performance
|
|
||||||
// issues.
|
|
||||||
std::map<std::pair<uint64_t, uint64_t>, TimePoint> timeout_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,74 +0,0 @@
|
|||||||
#include "raft/storage_info.hpp"
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
|
|
||||||
#include "database/single_node_ha/graph_db.hpp"
|
|
||||||
#include "raft/coordination.hpp"
|
|
||||||
#include "raft/storage_info_rpc_messages.hpp"
|
|
||||||
#include "utils/future.hpp"
|
|
||||||
#include "utils/stat.hpp"
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
using namespace std::literals::chrono_literals;
|
|
||||||
using Clock = std::chrono::system_clock;
|
|
||||||
using TimePoint = std::chrono::system_clock::time_point;
|
|
||||||
|
|
||||||
StorageInfo::StorageInfo(database::GraphDb *db, Coordination *coordination,
|
|
||||||
uint16_t server_id)
|
|
||||||
: db_(db), coordination_(coordination), server_id_(server_id) {
|
|
||||||
CHECK(db) << "Graph DB can't be nullptr";
|
|
||||||
CHECK(coordination) << "Coordination can't be nullptr";
|
|
||||||
}
|
|
||||||
|
|
||||||
StorageInfo::~StorageInfo() {}
|
|
||||||
|
|
||||||
void StorageInfo::Start() {
|
|
||||||
coordination_->Register<StorageInfoRpc>(
|
|
||||||
[this](auto *req_reader, auto *res_builder) {
|
|
||||||
StorageInfoReq req;
|
|
||||||
slk::Load(&req, req_reader);
|
|
||||||
|
|
||||||
StorageInfoRes res(this->server_id_, this->GetLocalStorageInfo());
|
|
||||||
slk::Save(res, res_builder);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::pair<std::string, std::string>>
|
|
||||||
StorageInfo::GetLocalStorageInfo() const {
|
|
||||||
std::vector<std::pair<std::string, std::string>> info;
|
|
||||||
|
|
||||||
db_->RefreshStat();
|
|
||||||
auto &stat = db_->GetStat();
|
|
||||||
|
|
||||||
info.emplace_back("vertex_count", std::to_string(stat.vertex_count));
|
|
||||||
info.emplace_back("edge_count", std::to_string(stat.edge_count));
|
|
||||||
info.emplace_back("average_degree", std::to_string(stat.avg_degree));
|
|
||||||
info.emplace_back("memory_usage", std::to_string(utils::GetMemoryUsage()));
|
|
||||||
info.emplace_back("disk_usage",
|
|
||||||
std::to_string(db_->GetDurabilityDirDiskUsage()));
|
|
||||||
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, std::vector<std::pair<std::string, std::string>>>
|
|
||||||
StorageInfo::GetStorageInfo() const {
|
|
||||||
std::map<std::string, std::vector<std::pair<std::string, std::string>>> info;
|
|
||||||
|
|
||||||
for (auto id : coordination_->GetAllNodeIds()) {
|
|
||||||
if (id == server_id_) {
|
|
||||||
info.emplace(std::to_string(id), GetLocalStorageInfo());
|
|
||||||
} else {
|
|
||||||
auto reply = coordination_->ExecuteOnOtherNode<StorageInfoRpc>(id);
|
|
||||||
if (reply) {
|
|
||||||
info[std::to_string(id)] = std::move(reply->storage_info);
|
|
||||||
} else {
|
|
||||||
info[std::to_string(id)] = {};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,47 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
// Forward declaration
|
|
||||||
namespace database {
|
|
||||||
class GraphDb;
|
|
||||||
} // namespace database
|
|
||||||
|
|
||||||
namespace raft {
|
|
||||||
|
|
||||||
// Forward declaration
|
|
||||||
class Coordination;
|
|
||||||
|
|
||||||
/// StorageInfo takes care of the Raft cluster storage info retrieval.
|
|
||||||
class StorageInfo final {
|
|
||||||
public:
|
|
||||||
StorageInfo() = delete;
|
|
||||||
StorageInfo(database::GraphDb *db, Coordination *coordination,
|
|
||||||
uint16_t server_id);
|
|
||||||
|
|
||||||
StorageInfo(const StorageInfo &) = delete;
|
|
||||||
StorageInfo(StorageInfo &&) = delete;
|
|
||||||
StorageInfo operator=(const StorageInfo &) = delete;
|
|
||||||
StorageInfo operator=(StorageInfo &&) = delete;
|
|
||||||
|
|
||||||
~StorageInfo();
|
|
||||||
|
|
||||||
void Start();
|
|
||||||
|
|
||||||
/// Returns storage info for the local storage only.
|
|
||||||
std::vector<std::pair<std::string, std::string>> GetLocalStorageInfo() const;
|
|
||||||
|
|
||||||
/// Returns storage info for each peer in the Raft cluster.
|
|
||||||
std::map<std::string, std::vector<std::pair<std::string, std::string>>>
|
|
||||||
GetStorageInfo() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
database::GraphDb *db_{nullptr};
|
|
||||||
Coordination *coordination_{nullptr};
|
|
||||||
uint16_t server_id_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace raft
|
|
@ -1,19 +0,0 @@
|
|||||||
#>cpp
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "rpc/messages.hpp"
|
|
||||||
#include "slk/serialization.hpp"
|
|
||||||
cpp<#
|
|
||||||
|
|
||||||
(lcp:namespace raft)
|
|
||||||
|
|
||||||
(lcp:define-rpc storage-info
|
|
||||||
(:request ())
|
|
||||||
(:response
|
|
||||||
((server-id :uint16_t)
|
|
||||||
(storage-info "std::vector<std::pair<std::string, std::string>>"))))
|
|
||||||
|
|
||||||
(lcp:pop-namespace) ;; raft
|
|
@ -1,30 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
#include "storage/common/types/property_value_store.hpp"
|
|
||||||
#include "transactions/engine.hpp"
|
|
||||||
#include "transactions/snapshot.hpp"
|
|
||||||
|
|
||||||
namespace storage::constraints::common {
|
|
||||||
template <typename TConstraints>
|
|
||||||
void UniqueConstraintRefresh(const tx::Snapshot &snapshot,
|
|
||||||
const tx::Engine &engine,
|
|
||||||
TConstraints &constraints, std::mutex &lock) {
|
|
||||||
std::lock_guard<std::mutex> guard(lock);
|
|
||||||
for (auto &constraint : constraints) {
|
|
||||||
for (auto p = constraint.version_pairs.begin();
|
|
||||||
p != constraint.version_pairs.end(); ++p) {
|
|
||||||
auto exp_id = p->record.tx_id_exp;
|
|
||||||
auto cre_id = p->record.tx_id_cre;
|
|
||||||
if ((exp_id != 0 && exp_id < snapshot.back() &&
|
|
||||||
engine.Info(exp_id).is_committed() && !snapshot.contains(exp_id)) ||
|
|
||||||
(cre_id < snapshot.back() && engine.Info(cre_id).is_aborted())) {
|
|
||||||
constraint.version_pairs.erase(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace storage::constraints::common
|
|
@ -1,19 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "utils/exceptions.hpp"
|
|
||||||
|
|
||||||
namespace storage::constraints {
|
|
||||||
|
|
||||||
/// Thrown when a violation of a constraint occurs.
|
|
||||||
class ViolationException : public utils::BasicException {
|
|
||||||
using utils::BasicException::BasicException;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Thrown when multiple transactions alter the same constraint.
|
|
||||||
class SerializationException : public utils::BasicException {
|
|
||||||
using utils::BasicException::BasicException;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace database
|
|
@ -1,66 +0,0 @@
|
|||||||
#include "storage/common/constraints/record.hpp"
|
|
||||||
|
|
||||||
#include "storage/common/constraints/exceptions.hpp"
|
|
||||||
#include "storage/common/mvcc/exceptions.hpp"
|
|
||||||
#include "transactions/engine.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
|
|
||||||
namespace storage::constraints::impl {
|
|
||||||
Record::Record(storage::Gid gid, const tx::Transaction &t)
|
|
||||||
: curr_gid(gid), tx_id_cre(t.id_) {}
|
|
||||||
|
|
||||||
void Record::Insert(storage::Gid gid, const tx::Transaction &t) {
|
|
||||||
// Insert
|
|
||||||
// - delete before or in this transaction and not aborted
|
|
||||||
// - insert before and aborted
|
|
||||||
// Throw SerializationException
|
|
||||||
// - delted of inserted after this transaction
|
|
||||||
// Throw ViolationException
|
|
||||||
// - insert before or in this transaction and not aborted
|
|
||||||
// - delete before and aborted
|
|
||||||
|
|
||||||
t.TakeLock(lock_);
|
|
||||||
if (t.id_ < tx_id_cre || (tx_id_exp != 0 && t.id_ < tx_id_exp)) {
|
|
||||||
throw SerializationException(
|
|
||||||
"Node couldn't be updated due to unique constraint serialization "
|
|
||||||
"error!");
|
|
||||||
}
|
|
||||||
|
|
||||||
bool has_entry = tx_id_exp == 0;
|
|
||||||
bool is_aborted = has_entry ? t.engine_.Info(tx_id_cre).is_aborted()
|
|
||||||
: t.engine_.Info(tx_id_exp).is_aborted();
|
|
||||||
|
|
||||||
if ((has_entry && !is_aborted) || (!has_entry && is_aborted)) {
|
|
||||||
throw ViolationException(
|
|
||||||
"Node couldn't be updated due to unique constraint violation!");
|
|
||||||
}
|
|
||||||
|
|
||||||
curr_gid = gid;
|
|
||||||
tx_id_cre = t.id_;
|
|
||||||
tx_id_exp = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Record::Remove(storage::Gid gid, const tx::Transaction &t) {
|
|
||||||
// Remove
|
|
||||||
// - insert before or in this transaction and not aborted
|
|
||||||
// - remove before and aborted
|
|
||||||
// Nothing
|
|
||||||
// - remove before or in this transaction and not aborted
|
|
||||||
// - insert before and aborted
|
|
||||||
// Throw SerializationException
|
|
||||||
// - delete or insert after this transaction
|
|
||||||
|
|
||||||
t.TakeLock(lock_);
|
|
||||||
DCHECK(gid == curr_gid);
|
|
||||||
if (t.id_ < tx_id_cre || (tx_id_exp != 0 && t.id_ < tx_id_exp))
|
|
||||||
throw mvcc::SerializationError();
|
|
||||||
|
|
||||||
bool has_entry = tx_id_exp == 0;
|
|
||||||
bool is_aborted = has_entry ? t.engine_.Info(tx_id_cre).is_aborted()
|
|
||||||
: t.engine_.Info(tx_id_exp).is_aborted();
|
|
||||||
|
|
||||||
if ((!has_entry && !is_aborted) || (has_entry && is_aborted)) return;
|
|
||||||
|
|
||||||
tx_id_exp = t.id_;
|
|
||||||
}
|
|
||||||
} // namespace storage::constraints::impl
|
|
@ -1,25 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "storage/common/locking/record_lock.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
class Transaction;
|
|
||||||
} // namespace tx
|
|
||||||
|
|
||||||
namespace storage::constraints::impl {
|
|
||||||
/// Contains records of creation and deletion of entry in a constraint.
|
|
||||||
struct Record {
|
|
||||||
Record(storage::Gid gid, const tx::Transaction &t);
|
|
||||||
void Insert(storage::Gid gid, const tx::Transaction &t);
|
|
||||||
void Remove(storage::Gid gid, const tx::Transaction &t);
|
|
||||||
|
|
||||||
storage::Gid curr_gid;
|
|
||||||
tx::TransactionId tx_id_cre;
|
|
||||||
tx::TransactionId tx_id_exp{0};
|
|
||||||
RecordLock lock_;
|
|
||||||
};
|
|
||||||
} // namespace storage::constraints::impl
|
|
@ -1,258 +0,0 @@
|
|||||||
#include "storage/common/constraints/unique_constraints.hpp"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "storage/vertex_accessor.hpp"
|
|
||||||
|
|
||||||
namespace storage::constraints {
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
auto FindIn(storage::Label label,
|
|
||||||
const std::vector<storage::Property> &properties,
|
|
||||||
const std::list<impl::LabelPropertiesEntry> &constraints) {
|
|
||||||
return std::find_if(
|
|
||||||
constraints.begin(), constraints.end(), [label, properties](auto &c) {
|
|
||||||
return c.label == label &&
|
|
||||||
std::is_permutation(properties.begin(), properties.end(),
|
|
||||||
c.properties.begin(), c.properties.end());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} // anonymous namespace
|
|
||||||
|
|
||||||
bool UniqueConstraints::AddConstraint(const ConstraintEntry &entry) {
|
|
||||||
auto constraint = FindIn(entry.label, entry.properties, constraints_);
|
|
||||||
if (constraint == constraints_.end()) {
|
|
||||||
constraints_.emplace_back(entry.label, entry.properties);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool UniqueConstraints::RemoveConstraint(const ConstraintEntry &entry) {
|
|
||||||
auto constraint = FindIn(entry.label, entry.properties, constraints_);
|
|
||||||
if (constraint != constraints_.end()) {
|
|
||||||
constraints_.erase(constraint);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool UniqueConstraints::Exists(
|
|
||||||
storage::Label label,
|
|
||||||
const std::vector<storage::Property> &properties) const {
|
|
||||||
return FindIn(label, properties, constraints_) != constraints_.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<ConstraintEntry> UniqueConstraints::ListConstraints() const {
|
|
||||||
std::vector<ConstraintEntry> constraints(constraints_.size());
|
|
||||||
std::transform(constraints_.begin(), constraints_.end(), constraints.begin(),
|
|
||||||
[](auto &c) {
|
|
||||||
return ConstraintEntry{c.label, c.properties};
|
|
||||||
});
|
|
||||||
return constraints;
|
|
||||||
}
|
|
||||||
|
|
||||||
void UniqueConstraints::Update(const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t) {
|
|
||||||
auto &vertex = accessor.current();
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
for (auto &constraint : constraints_) {
|
|
||||||
if (!utils::Contains(vertex.labels_, constraint.label)) continue;
|
|
||||||
std::vector<PropertyValue> values;
|
|
||||||
for (auto p : constraint.properties) {
|
|
||||||
auto value = vertex.properties_.at(p);
|
|
||||||
if (value.IsNull()) break;
|
|
||||||
values.emplace_back(value);
|
|
||||||
}
|
|
||||||
if (values.size() != constraint.properties.size()) continue;
|
|
||||||
auto entry = std::find_if(constraint.version_pairs.begin(),
|
|
||||||
constraint.version_pairs.end(),
|
|
||||||
[values](const impl::LabelPropertyPair &p) {
|
|
||||||
return p.values == values;
|
|
||||||
});
|
|
||||||
if (entry != constraint.version_pairs.end()) {
|
|
||||||
entry->record.Insert(accessor.gid(), t);
|
|
||||||
} else {
|
|
||||||
constraint.version_pairs.emplace_back(accessor.gid(), values, t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void UniqueConstraints::UpdateOnAddLabel(storage::Label label,
|
|
||||||
const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t) {
|
|
||||||
auto &vertex = accessor.current();
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
for (auto &constraint : constraints_) {
|
|
||||||
if (constraint.label != label) continue;
|
|
||||||
std::vector<PropertyValue> values;
|
|
||||||
for (auto p : constraint.properties) {
|
|
||||||
auto value = vertex.properties_.at(p);
|
|
||||||
if (value.IsNull()) break;
|
|
||||||
values.emplace_back(value);
|
|
||||||
}
|
|
||||||
if (values.size() != constraint.properties.size()) continue;
|
|
||||||
auto entry = std::find_if(constraint.version_pairs.begin(),
|
|
||||||
constraint.version_pairs.end(),
|
|
||||||
[values](const impl::LabelPropertyPair &p) {
|
|
||||||
return p.values == values;
|
|
||||||
});
|
|
||||||
if (entry != constraint.version_pairs.end()) {
|
|
||||||
entry->record.Insert(accessor.gid(), t);
|
|
||||||
} else {
|
|
||||||
constraint.version_pairs.emplace_back(accessor.gid(), values, t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void UniqueConstraints::UpdateOnRemoveLabel(
|
|
||||||
storage::Label label, const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t) {
|
|
||||||
auto &vertex = accessor.current();
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
for (auto &constraint : constraints_) {
|
|
||||||
if (constraint.label != label) continue;
|
|
||||||
std::vector<PropertyValue> values;
|
|
||||||
for (auto p : constraint.properties) {
|
|
||||||
auto value = vertex.properties_.at(p);
|
|
||||||
if (value.IsNull()) break;
|
|
||||||
values.emplace_back(value);
|
|
||||||
}
|
|
||||||
if (values.size() != constraint.properties.size()) continue;
|
|
||||||
auto entry = std::find_if(constraint.version_pairs.begin(),
|
|
||||||
constraint.version_pairs.end(),
|
|
||||||
[values](const impl::LabelPropertyPair &p) {
|
|
||||||
return p.values == values;
|
|
||||||
});
|
|
||||||
if (entry != constraint.version_pairs.end())
|
|
||||||
entry->record.Remove(accessor.gid(), t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void UniqueConstraints::UpdateOnAddProperty(
|
|
||||||
storage::Property property, const PropertyValue &previous_value,
|
|
||||||
const PropertyValue &new_value, const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t) {
|
|
||||||
auto &vertex = accessor.current();
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
for (auto &constraint : constraints_) {
|
|
||||||
if (!utils::Contains(vertex.labels_, constraint.label)) continue;
|
|
||||||
if (!utils::Contains(constraint.properties, property)) continue;
|
|
||||||
|
|
||||||
std::vector<PropertyValue> old_values;
|
|
||||||
std::vector<PropertyValue> new_values;
|
|
||||||
for (auto p : constraint.properties) {
|
|
||||||
auto value = vertex.properties_.at(p);
|
|
||||||
|
|
||||||
if (p == property) {
|
|
||||||
if (!previous_value.IsNull()) old_values.emplace_back(previous_value);
|
|
||||||
if (!new_value.IsNull()) new_values.emplace_back(new_value);
|
|
||||||
} else {
|
|
||||||
if (value.IsNull()) break;
|
|
||||||
old_values.emplace_back(value);
|
|
||||||
new_values.emplace_back(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// First we need to remove the old entry if there was one.
|
|
||||||
if (old_values.size() == constraint.properties.size()) {
|
|
||||||
auto entry = std::find_if(constraint.version_pairs.begin(),
|
|
||||||
constraint.version_pairs.end(),
|
|
||||||
[old_values](const impl::LabelPropertyPair &p) {
|
|
||||||
return p.values == old_values;
|
|
||||||
});
|
|
||||||
if (entry != constraint.version_pairs.end())
|
|
||||||
entry->record.Remove(accessor.gid(), t);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (new_values.size() != constraint.properties.size()) continue;
|
|
||||||
auto entry = std::find_if(constraint.version_pairs.begin(),
|
|
||||||
constraint.version_pairs.end(),
|
|
||||||
[new_values](const impl::LabelPropertyPair &p) {
|
|
||||||
return p.values == new_values;
|
|
||||||
});
|
|
||||||
if (entry != constraint.version_pairs.end()) {
|
|
||||||
entry->record.Insert(accessor.gid(), t);
|
|
||||||
} else {
|
|
||||||
constraint.version_pairs.emplace_back(accessor.gid(), new_values, t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void UniqueConstraints::UpdateOnRemoveProperty(
|
|
||||||
storage::Property property, const PropertyValue &previous_value,
|
|
||||||
const RecordAccessor<Vertex> &accessor, const tx::Transaction &t) {
|
|
||||||
auto &vertex = accessor.current();
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
for (auto &constraint : constraints_) {
|
|
||||||
if (!utils::Contains(vertex.labels_, constraint.label)) continue;
|
|
||||||
if (!utils::Contains(constraint.properties, property)) continue;
|
|
||||||
|
|
||||||
std::vector<PropertyValue> values;
|
|
||||||
for (auto p : constraint.properties) {
|
|
||||||
auto value = vertex.properties_.at(p);
|
|
||||||
if (p == property) {
|
|
||||||
values.emplace_back(previous_value);
|
|
||||||
} else {
|
|
||||||
if (value.IsNull()) break;
|
|
||||||
values.emplace_back(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (values.size() != constraint.properties.size()) continue;
|
|
||||||
auto entry = std::find_if(constraint.version_pairs.begin(),
|
|
||||||
constraint.version_pairs.end(),
|
|
||||||
[values](const impl::LabelPropertyPair &p) {
|
|
||||||
return p.values == values;
|
|
||||||
});
|
|
||||||
if (entry != constraint.version_pairs.end()) {
|
|
||||||
entry->record.Remove(accessor.gid(), t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void UniqueConstraints::UpdateOnRemoveVertex(
|
|
||||||
const RecordAccessor<Vertex> &accessor, const tx::Transaction &t) {
|
|
||||||
auto &vertex = accessor.current();
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
for (auto &constraint : constraints_) {
|
|
||||||
if (!utils::Contains(vertex.labels_, constraint.label)) continue;
|
|
||||||
|
|
||||||
std::vector<PropertyValue> values;
|
|
||||||
for (auto p : constraint.properties) {
|
|
||||||
auto value = vertex.properties_.at(p);
|
|
||||||
if (value.IsNull()) break;
|
|
||||||
values.emplace_back(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (values.size() != constraint.properties.size()) continue;
|
|
||||||
auto entry = std::find_if(constraint.version_pairs.begin(),
|
|
||||||
constraint.version_pairs.end(),
|
|
||||||
[values](const impl::LabelPropertyPair &p) {
|
|
||||||
return p.values == values;
|
|
||||||
});
|
|
||||||
if (entry != constraint.version_pairs.end()) {
|
|
||||||
entry->record.Remove(accessor.gid(), t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void UniqueConstraints::Refresh(const tx::Snapshot &snapshot,
|
|
||||||
const tx::Engine &engine) {
|
|
||||||
std::lock_guard<std::mutex> guard(lock_);
|
|
||||||
for (auto &constraint : constraints_) {
|
|
||||||
for (auto p = constraint.version_pairs.begin();
|
|
||||||
p != constraint.version_pairs.end();) {
|
|
||||||
auto exp_id = p->record.tx_id_exp;
|
|
||||||
auto cre_id = p->record.tx_id_cre;
|
|
||||||
if ((exp_id != 0 && exp_id < snapshot.back() &&
|
|
||||||
engine.Info(exp_id).is_committed() && !snapshot.contains(exp_id)) ||
|
|
||||||
(cre_id < snapshot.back() && engine.Info(cre_id).is_aborted())) {
|
|
||||||
p = constraint.version_pairs.erase(p);
|
|
||||||
} else {
|
|
||||||
++p;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace storage::constraints
|
|
@ -1,166 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <list>
|
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
#include "storage/common/types/property_value.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/common/constraints/record.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
class Snapshot;
|
|
||||||
}; // namespace tx
|
|
||||||
|
|
||||||
class Vertex;
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
class RecordAccessor;
|
|
||||||
|
|
||||||
namespace storage::constraints {
|
|
||||||
namespace impl {
|
|
||||||
struct LabelPropertyPair {
|
|
||||||
LabelPropertyPair(storage::Gid gid, const std::vector<PropertyValue> &v,
|
|
||||||
const tx::Transaction &t)
|
|
||||||
: values(v), record(gid, t) {}
|
|
||||||
|
|
||||||
std::vector<PropertyValue> values;
|
|
||||||
Record record;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct LabelPropertiesEntry {
|
|
||||||
LabelPropertiesEntry(storage::Label l,
|
|
||||||
const std::vector<storage::Property> &p)
|
|
||||||
: label(l), properties(p) {}
|
|
||||||
|
|
||||||
storage::Label label;
|
|
||||||
std::vector<storage::Property> properties;
|
|
||||||
std::list<LabelPropertyPair> version_pairs;
|
|
||||||
};
|
|
||||||
} // namespace impl
|
|
||||||
|
|
||||||
struct ConstraintEntry {
|
|
||||||
// This struct is used by ListConstraints method in order to avoid using
|
|
||||||
// std::pair or something like that.
|
|
||||||
storage::Label label;
|
|
||||||
std::vector<storage::Property> properties;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// UniqueConstraints contains all unique constraints defined by both label and
|
|
||||||
/// a set of properties. To create or delete unique constraint, caller must
|
|
||||||
/// ensure that there are no other transactions running in parallel.
|
|
||||||
/// Additionally, for adding unique constraint caller must first call
|
|
||||||
/// AddConstraint to create unique constraint and then call Update for every
|
|
||||||
/// existing Vertex. If there is a unique constraint violation, the caller must
|
|
||||||
/// manually handle that by catching exceptions and calling RemoveConstraint
|
|
||||||
/// method. This is needed to ensure logical correctness of transactions. Once
|
|
||||||
/// created, client uses UpdateOn* methods to notify UniqueConstraint about
|
|
||||||
/// changes. In case of violation UpdateOn* methods throw
|
|
||||||
/// ConstraintViolationException exception. Methods can also throw
|
|
||||||
/// SerializationError. This class is thread safe.
|
|
||||||
class UniqueConstraints {
|
|
||||||
public:
|
|
||||||
UniqueConstraints() = default;
|
|
||||||
UniqueConstraints(const UniqueConstraints &) = delete;
|
|
||||||
UniqueConstraints(UniqueConstraints &&) = delete;
|
|
||||||
UniqueConstraints &operator=(const UniqueConstraints &) = delete;
|
|
||||||
UniqueConstraints &operator=(UniqueConstraints &&) = delete;
|
|
||||||
|
|
||||||
~UniqueConstraints() = default;
|
|
||||||
|
|
||||||
/// Add new unique constraint, if constraint already exists this method does
|
|
||||||
/// nothing. This method doesn't check if any of the existing vertices breaks
|
|
||||||
/// this constraint. Caller must do that instead. Caller must also ensure that
|
|
||||||
/// no other transaction is running in parallel.
|
|
||||||
///
|
|
||||||
/// @return true if the constraint doesn't exists and was added.
|
|
||||||
bool AddConstraint(const ConstraintEntry &entry);
|
|
||||||
|
|
||||||
/// Removes existing unique constraint, if the constraint doesn't exist this
|
|
||||||
/// method does nothing. Caller must ensure that no other transaction is
|
|
||||||
/// running in parallel.
|
|
||||||
///
|
|
||||||
/// @return true if the constraint existed and was removed.
|
|
||||||
bool RemoveConstraint(const ConstraintEntry &entry);
|
|
||||||
|
|
||||||
/// Checks whether given unique constraint is visible.
|
|
||||||
bool Exists(storage::Label label,
|
|
||||||
const std::vector<storage::Property> &properties) const;
|
|
||||||
|
|
||||||
/// Returns list of unique constraints.
|
|
||||||
std::vector<ConstraintEntry> ListConstraints() const;
|
|
||||||
|
|
||||||
/// Updates unique constraint versions when adding new constraint rule.
|
|
||||||
///
|
|
||||||
/// @throws ConstraintViolationException
|
|
||||||
/// @throws SerializationError
|
|
||||||
void Update(const RecordAccessor<Vertex> &accessor, const tx::Transaction &t);
|
|
||||||
|
|
||||||
/// Updates unique constraint versions when adding label.
|
|
||||||
/// @param label - label that was added
|
|
||||||
/// @param accessor - accessor that was updated
|
|
||||||
/// @param t - current transaction
|
|
||||||
///
|
|
||||||
/// @throws ConstraintViolationException
|
|
||||||
/// @throws SerializationError
|
|
||||||
void UpdateOnAddLabel(storage::Label label,
|
|
||||||
const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t);
|
|
||||||
|
|
||||||
/// Updates unique constraint versions when removing label.
|
|
||||||
/// @param label - label that was removed
|
|
||||||
/// @param accessor - accessor that was updated
|
|
||||||
/// @param t - current transaction
|
|
||||||
///
|
|
||||||
/// @throws SerializationError
|
|
||||||
void UpdateOnRemoveLabel(storage::Label label,
|
|
||||||
const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t);
|
|
||||||
|
|
||||||
/// Updates unique constraint versions when adding property.
|
|
||||||
/// @param property - property that was added
|
|
||||||
/// @param previous_value - previous value of the property
|
|
||||||
/// @param new_value - new value of the property
|
|
||||||
/// @param accessor - accessor that was updated
|
|
||||||
/// @param t - current transaction
|
|
||||||
///
|
|
||||||
/// @throws ConstraintViolationException
|
|
||||||
/// @throws SerializationError
|
|
||||||
void UpdateOnAddProperty(storage::Property property,
|
|
||||||
const PropertyValue &previous_value,
|
|
||||||
const PropertyValue &new_value,
|
|
||||||
const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t);
|
|
||||||
|
|
||||||
/// Updates unique constraint versions when removing property.
|
|
||||||
/// @param property - property that was removed
|
|
||||||
/// @param previous_value - previous value of the property
|
|
||||||
/// @param accessor - accessor that was updated
|
|
||||||
/// @param t - current transaction
|
|
||||||
///
|
|
||||||
/// @throws SerializationError
|
|
||||||
void UpdateOnRemoveProperty(storage::Property property,
|
|
||||||
const PropertyValue &previous_value,
|
|
||||||
const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t);
|
|
||||||
|
|
||||||
/// Updates unique constraint versions when removing a vertex.
|
|
||||||
/// @param accessor - accessor that was updated
|
|
||||||
/// @param t - current transaction
|
|
||||||
///
|
|
||||||
/// @throws SerializationError
|
|
||||||
void UpdateOnRemoveVertex(const RecordAccessor<Vertex> &accessor,
|
|
||||||
const tx::Transaction &t);
|
|
||||||
|
|
||||||
/// Removes records that are no longer visible.
|
|
||||||
/// @param snapshot - the GC snapshot.
|
|
||||||
/// @param engine - current transaction engine.
|
|
||||||
void Refresh(const tx::Snapshot &snapshot, const tx::Engine &engine);
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::mutex lock_;
|
|
||||||
|
|
||||||
std::list<impl::LabelPropertiesEntry> constraints_;
|
|
||||||
};
|
|
||||||
} // namespace storage::constraints
|
|
@ -1,183 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "cppitertools/filter.hpp"
|
|
||||||
#include "cppitertools/imap.hpp"
|
|
||||||
#include "cppitertools/takewhile.hpp"
|
|
||||||
#include "glog/logging.h"
|
|
||||||
|
|
||||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
|
||||||
#include "data_structures/concurrent/skiplist.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
|
|
||||||
namespace database::index {
|
|
||||||
/**
|
|
||||||
* @brief - Wrap beginning iterator to iterable object. This provides us with
|
|
||||||
* begin and end iterator, and allows us to iterate from the iterator given in
|
|
||||||
* constructor till the end of the collection over which we are really
|
|
||||||
* iterating, i.e. it allows us to iterate over the suffix of some skiplist
|
|
||||||
* hence the name SkipListSuffix.
|
|
||||||
*/
|
|
||||||
template <class TIterator, class TValue, typename TAccessor>
|
|
||||||
class SkipListSuffix {
|
|
||||||
public:
|
|
||||||
class Iterator {
|
|
||||||
public:
|
|
||||||
explicit Iterator(TIterator current) : current_(current) {}
|
|
||||||
|
|
||||||
TValue &operator*() { return *current_; }
|
|
||||||
|
|
||||||
bool operator!=(Iterator other) const {
|
|
||||||
return this->current_ != other.current_;
|
|
||||||
}
|
|
||||||
|
|
||||||
Iterator &operator++() {
|
|
||||||
++current_;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
TIterator current_;
|
|
||||||
};
|
|
||||||
|
|
||||||
explicit SkipListSuffix(
|
|
||||||
const TIterator begin,
|
|
||||||
typename SkipList<TValue>::template Accessor<TAccessor> &&accessor)
|
|
||||||
: begin_(begin), accessor_(std::move(accessor)) {}
|
|
||||||
|
|
||||||
Iterator begin() const { return Iterator(begin_); }
|
|
||||||
Iterator end() { return Iterator(accessor_.end()); }
|
|
||||||
|
|
||||||
TIterator begin_;
|
|
||||||
typename SkipList<TValue>::template Accessor<TAccessor> accessor_;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Get all inserted vlists in TKey specific storage which
|
|
||||||
* still return true for the 'exists' function.
|
|
||||||
* @param skiplist_accessor - accessor used to get begin iterator, and that
|
|
||||||
* should be used to get end iterator as well.
|
|
||||||
* @param begin - starting iterator for vlist iteration.
|
|
||||||
* @param predicate - function which checks if TIndexEntry has a value that we
|
|
||||||
* are looking for
|
|
||||||
* @param t - current transaction, which determines visibility.
|
|
||||||
* @param exists - method which determines visibility of entry and version
|
|
||||||
* (record) of the underlying objects (vertex/edge)
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @Tparam TIndexEntry - index entry inside skiplist
|
|
||||||
* @Tparam TRecord - type of record under index (edge/vertex usually.)
|
|
||||||
* @Tparam TAccessor - type of accessor to use (const skiplist/non const
|
|
||||||
* skiplist).
|
|
||||||
* @return iterable collection of distinct vlist records<TRecord> for which
|
|
||||||
* exists function evaluates as true
|
|
||||||
*/
|
|
||||||
template <class TIterator, class TIndexEntry, class TRecord, typename TAccessor>
|
|
||||||
static auto GetVlists(
|
|
||||||
typename SkipList<TIndexEntry>::template Accessor<TAccessor>
|
|
||||||
&&skiplist_accessor,
|
|
||||||
TIterator begin,
|
|
||||||
const std::function<bool(const TIndexEntry &entry)> &predicate,
|
|
||||||
const tx::Transaction &t,
|
|
||||||
const std::function<bool(const TIndexEntry &, const TRecord *)> &exists,
|
|
||||||
bool current_state = false) {
|
|
||||||
TIndexEntry *prev = nullptr;
|
|
||||||
auto range = iter::takewhile(
|
|
||||||
predicate, SkipListSuffix<TIterator, TIndexEntry, TAccessor>(
|
|
||||||
begin, std::move(skiplist_accessor)));
|
|
||||||
auto filtered = iter::filter(
|
|
||||||
[&t, exists, prev, current_state](TIndexEntry &entry) mutable {
|
|
||||||
// Check if the current entry could offer new possible return value
|
|
||||||
// with respect to the previous entry we evaluated.
|
|
||||||
// We do this to guarantee uniqueness, and also as an optimization to
|
|
||||||
// avoid checking same vlist twice when we can.
|
|
||||||
if (prev && entry.IsAlreadyChecked(*prev)) return false;
|
|
||||||
prev = &entry;
|
|
||||||
|
|
||||||
// TODO when refactoring MVCC reconsider the return-value-arg idiom
|
|
||||||
// here
|
|
||||||
TRecord *old_record, *new_record;
|
|
||||||
entry.vlist_->find_set_old_new(t, &old_record, &new_record);
|
|
||||||
// filtering out records not visible to the current
|
|
||||||
// transaction+command
|
|
||||||
// taking into account the current_state flag
|
|
||||||
bool visible =
|
|
||||||
(old_record && !(current_state && old_record->is_expired_by(t))) ||
|
|
||||||
(current_state && new_record && !new_record->is_expired_by(t));
|
|
||||||
if (!visible) return false;
|
|
||||||
// if current_state is true and we have the new record, then that's
|
|
||||||
// the reference value, and that needs to be compared with the index
|
|
||||||
// predicate
|
|
||||||
|
|
||||||
return (current_state && new_record) ? exists(entry, new_record)
|
|
||||||
: exists(entry, old_record);
|
|
||||||
},
|
|
||||||
std::move(range));
|
|
||||||
return iter::imap([](auto entry) { return entry.vlist_; },
|
|
||||||
std::move(filtered));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Removes from the index all entries for which records don't contain
|
|
||||||
* the given label/edge type/label + property anymore. Also update (remove)
|
|
||||||
* all records which are not visible for any transaction in the given
|
|
||||||
* 'snapshot'. This method assumes that the MVCC GC has been run with the
|
|
||||||
* same 'snapshot'.
|
|
||||||
*
|
|
||||||
* @param indices - map of index entries (TIndexKey, skiplist<TIndexEntry>)
|
|
||||||
* @param snapshot - the GC snapshot. Consists of the oldest active
|
|
||||||
* transaction's snapshot, with that transaction's id appened as last.
|
|
||||||
* @param engine - transaction engine to see which records are commited
|
|
||||||
* @param exists - function which checks 'key' and 'entry' if the entry still
|
|
||||||
* contains required properties (key + optional value (in case of label_property
|
|
||||||
* index))
|
|
||||||
* @Tparam Tkey - index key
|
|
||||||
* @Tparam TIndexEntry - index entry inside skiplist
|
|
||||||
* @Tparam TRecord - type of record under index (edge/vertex usually.)
|
|
||||||
*/
|
|
||||||
template <class TKey, class TIndexEntry, class TRecord>
|
|
||||||
static void Refresh(
|
|
||||||
ConcurrentMap<TKey, std::unique_ptr<SkipList<TIndexEntry>>> &indices,
|
|
||||||
const tx::Snapshot &snapshot, tx::Engine &engine,
|
|
||||||
const std::function<bool(const TKey &, const TIndexEntry &)> &exists) {
|
|
||||||
// iterate over all the indices
|
|
||||||
for (auto &key_indices_pair : indices.access()) {
|
|
||||||
// iterate over index entries
|
|
||||||
auto indices_entries_accessor = key_indices_pair.second->access();
|
|
||||||
for (auto indices_entry : indices_entries_accessor) {
|
|
||||||
if (indices_entry.record_->is_not_visible_from(snapshot, engine)) {
|
|
||||||
// be careful when deleting the record which is not visible anymore.
|
|
||||||
// it's newer copy could be visible, and might still logically belong to
|
|
||||||
// index (it satisfies the `exists` function). that's why we can't just
|
|
||||||
// remove the index entry, but also re-insert the oldest visible record
|
|
||||||
// to the index. if that record does not satisfy `exists`, it will be
|
|
||||||
// cleaned up in the next Refresh first insert and then remove,
|
|
||||||
// otherwise there is a timeframe during which the record is not present
|
|
||||||
// in the index
|
|
||||||
auto new_record = indices_entry.vlist_->Oldest();
|
|
||||||
if (new_record != nullptr)
|
|
||||||
indices_entries_accessor.insert(
|
|
||||||
TIndexEntry(indices_entry, new_record));
|
|
||||||
|
|
||||||
[[gnu::unused]] auto success =
|
|
||||||
indices_entries_accessor.remove(indices_entry);
|
|
||||||
DCHECK(success) << "Unable to delete entry.";
|
|
||||||
}
|
|
||||||
|
|
||||||
// if the record is still visible,
|
|
||||||
// check if it satisfies the `exists` function. if not
|
|
||||||
// it does not belong in index anymore.
|
|
||||||
// be careful when using the `exists` function
|
|
||||||
// because it's creator transaction could still be modifying it,
|
|
||||||
// and modify+read is not thread-safe. for that reason we need to
|
|
||||||
// first see if the the transaction that created it has ended
|
|
||||||
// (tx().cre < oldest active trancsation).
|
|
||||||
else if (indices_entry.record_->tx().cre < snapshot.back() &&
|
|
||||||
!exists(key_indices_pair.first, indices_entry)) {
|
|
||||||
indices_entries_accessor.remove(indices_entry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}; // namespace database::index
|
|
@ -1,3 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
enum class LockStatus { Acquired, AlreadyHeld };
|
|
@ -1,131 +0,0 @@
|
|||||||
#include "storage/common/locking/record_lock.hpp"
|
|
||||||
|
|
||||||
#include <fmt/format.h>
|
|
||||||
#include <glog/logging.h>
|
|
||||||
#include <optional>
|
|
||||||
#include <stack>
|
|
||||||
#include <unordered_set>
|
|
||||||
|
|
||||||
#include "transactions/engine.hpp"
|
|
||||||
#include "utils/on_scope_exit.hpp"
|
|
||||||
#include "utils/thread/sync.hpp"
|
|
||||||
#include "utils/timer.hpp"
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Finds lock cycle that start transaction is a part of and returns id of oldest
|
|
||||||
// transaction in that cycle. If start transaction is not in a cycle nullopt is
|
|
||||||
// returned.
|
|
||||||
template <typename TAccessor>
|
|
||||||
std::optional<tx::TransactionId> FindOldestTxInLockCycle(
|
|
||||||
tx::TransactionId start, TAccessor &graph_accessor) {
|
|
||||||
std::vector<tx::TransactionId> path;
|
|
||||||
std::unordered_set<tx::TransactionId> visited;
|
|
||||||
|
|
||||||
auto current = start;
|
|
||||||
|
|
||||||
do {
|
|
||||||
visited.insert(current);
|
|
||||||
path.push_back(current);
|
|
||||||
auto it = graph_accessor.find(current);
|
|
||||||
if (it == graph_accessor.end()) return std::nullopt;
|
|
||||||
current = it->second;
|
|
||||||
} while (visited.find(current) == visited.end());
|
|
||||||
|
|
||||||
if (current == start) {
|
|
||||||
// start is a part of the cycle, return oldest transaction.
|
|
||||||
CHECK(path.size() >= 2U) << "Cycle must have at least two nodes";
|
|
||||||
return *std::min(path.begin(), path.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
// There is a cycle, but start is not a part of it. Some transaction that is
|
|
||||||
// in a cycle will find it and abort oldest transaction.
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
bool RecordLock::TryLock(tx::TransactionId tx_id) {
|
|
||||||
tx::TransactionId unlocked{0};
|
|
||||||
return owner_.compare_exchange_strong(unlocked, tx_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
LockStatus RecordLock::Lock(const tx::Transaction &tx, tx::Engine &engine) {
|
|
||||||
if (TryLock(tx.id_)) {
|
|
||||||
return LockStatus::Acquired;
|
|
||||||
}
|
|
||||||
|
|
||||||
tx::TransactionId owner = owner_;
|
|
||||||
if (owner_ == tx.id_) return LockStatus::AlreadyHeld;
|
|
||||||
|
|
||||||
// In a distributed worker the transaction objects (and the locks they own)
|
|
||||||
// are not destructed at the same time like on the master. Consequently a lock
|
|
||||||
// might be active for a dead transaction. By asking the transaction engine
|
|
||||||
// for transaction info, we'll make the worker refresh it's knowledge about
|
|
||||||
// live transactions and release obsolete locks.
|
|
||||||
if (owner == 0 || !engine.Info(owner).is_active()) {
|
|
||||||
if (TryLock(tx.id_)) {
|
|
||||||
return LockStatus::Acquired;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Insert edge into local lock_graph.
|
|
||||||
auto accessor = engine.local_lock_graph().access();
|
|
||||||
auto it = accessor.insert(tx.id_, owner).first;
|
|
||||||
|
|
||||||
auto abort_oldest_tx_in_lock_cycle = [&tx, &accessor, &engine]() {
|
|
||||||
// Find oldest transaction in lock cycle if cycle exists and notify that
|
|
||||||
// transaction that it should abort.
|
|
||||||
// TODO: maybe we can be smarter and abort some other transaction and not
|
|
||||||
// the oldest one.
|
|
||||||
auto oldest = FindOldestTxInLockCycle(tx.id_, accessor);
|
|
||||||
if (oldest) {
|
|
||||||
engine.LocalForEachActiveTransaction([&](tx::Transaction &t) {
|
|
||||||
if (t.id_ == oldest) {
|
|
||||||
t.set_should_abort();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
abort_oldest_tx_in_lock_cycle();
|
|
||||||
|
|
||||||
// Make sure to erase edge on function exit. Either function will throw and
|
|
||||||
// transaction will be killed so we should erase the edge because transaction
|
|
||||||
// won't exist anymore or owner_ will finish and we will be able to acquire
|
|
||||||
// the lock.
|
|
||||||
utils::OnScopeExit cleanup{[&tx, &accessor] { accessor.remove(tx.id_); }};
|
|
||||||
|
|
||||||
utils::Timer t;
|
|
||||||
while (t.Elapsed() < kTimeout) {
|
|
||||||
if (tx.should_abort()) {
|
|
||||||
// Message could be incorrect. Transaction could be aborted because it was
|
|
||||||
// running for too long time, but that is unlikely and it is not very
|
|
||||||
// important which exception (and message) we throw here.
|
|
||||||
throw utils::LockTimeoutException(
|
|
||||||
"Transaction was aborted since it was oldest in a lock cycle");
|
|
||||||
}
|
|
||||||
if (TryLock(tx.id_)) {
|
|
||||||
return LockStatus::Acquired;
|
|
||||||
}
|
|
||||||
if (owner != owner_) {
|
|
||||||
// Owner changed while we were spinlocking. Update the edge and rerun
|
|
||||||
// cycle resolution routine.
|
|
||||||
// TODO: we should make sure that first transaction that tries to acquire
|
|
||||||
// already held lock succeeds in acquiring the lock once transaction that
|
|
||||||
// was lock owner finishes. That would probably reduce number of aborted
|
|
||||||
// transactions.
|
|
||||||
owner = owner_;
|
|
||||||
it->second = owner;
|
|
||||||
abort_oldest_tx_in_lock_cycle();
|
|
||||||
}
|
|
||||||
utils::CpuRelax();
|
|
||||||
}
|
|
||||||
|
|
||||||
throw utils::LockTimeoutException(fmt::format(
|
|
||||||
"Transaction locked for more than {} seconds", kTimeout.count()));
|
|
||||||
}
|
|
||||||
|
|
||||||
void RecordLock::Unlock() { owner_ = 0; }
|
|
||||||
|
|
||||||
constexpr std::chrono::duration<double> RecordLock::kTimeout;
|
|
@ -1,30 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <chrono>
|
|
||||||
#include <unordered_set>
|
|
||||||
|
|
||||||
#include "storage/common/locking/lock_status.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
class Engine;
|
|
||||||
class Transaction;
|
|
||||||
}; // namespace tx
|
|
||||||
|
|
||||||
class RecordLock {
|
|
||||||
public:
|
|
||||||
/// @throw utils::LockTimeoutException
|
|
||||||
LockStatus Lock(const tx::Transaction &id, tx::Engine &engine);
|
|
||||||
|
|
||||||
void Unlock();
|
|
||||||
|
|
||||||
private:
|
|
||||||
bool TryLock(tx::TransactionId tx_id);
|
|
||||||
|
|
||||||
// Arbitrary choosen constant, postgresql uses 1 second so do we.
|
|
||||||
constexpr static std::chrono::duration<double> kTimeout{
|
|
||||||
std::chrono::seconds(1)};
|
|
||||||
|
|
||||||
std::atomic<tx::TransactionId> owner_{0};
|
|
||||||
};
|
|
@ -1,15 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "utils/exceptions.hpp"
|
|
||||||
|
|
||||||
namespace mvcc {
|
|
||||||
class SerializationError : public utils::BasicException {
|
|
||||||
static constexpr const char *default_message =
|
|
||||||
"Can't serialize due to concurrent operations.";
|
|
||||||
|
|
||||||
public:
|
|
||||||
using utils::BasicException::BasicException;
|
|
||||||
SerializationError() : BasicException(default_message) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace mvcc
|
|
@ -1,42 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
|
|
||||||
namespace mvcc {
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
class Version {
|
|
||||||
public:
|
|
||||||
Version() = default;
|
|
||||||
explicit Version(T *older) : older_(older) {}
|
|
||||||
|
|
||||||
// this must also destroy all the older versions
|
|
||||||
virtual ~Version() {
|
|
||||||
auto curr = next();
|
|
||||||
while (curr != nullptr) {
|
|
||||||
auto next = curr->next();
|
|
||||||
// remove link to older version to avoid recursion
|
|
||||||
curr->older_.store(nullptr);
|
|
||||||
delete curr;
|
|
||||||
curr = next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// return a pointer to an older version stored in this record
|
|
||||||
T *next(std::memory_order order = std::memory_order_seq_cst) {
|
|
||||||
return older_.load(order);
|
|
||||||
}
|
|
||||||
|
|
||||||
const T *next(std::memory_order order = std::memory_order_seq_cst) const {
|
|
||||||
return older_.load(order);
|
|
||||||
}
|
|
||||||
|
|
||||||
// set the older version of this record
|
|
||||||
void next(T *value, std::memory_order order = std::memory_order_seq_cst) {
|
|
||||||
older_.store(value, order);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::atomic<T *> older_{nullptr};
|
|
||||||
};
|
|
||||||
} // namespace mvcc
|
|
@ -1,49 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstring>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
namespace storage {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Buffer used for serialization of disk properties. The buffer
|
|
||||||
* implements a template parameter Buffer interface from BaseEncoder
|
|
||||||
* and Decoder classes for bolt serialization.
|
|
||||||
*/
|
|
||||||
class PODBuffer {
|
|
||||||
public:
|
|
||||||
PODBuffer() = default;
|
|
||||||
explicit PODBuffer(const std::string &s) {
|
|
||||||
buffer = std::vector<uint8_t>{s.begin(), s.end()};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes data to buffer
|
|
||||||
*
|
|
||||||
* @param data - Pointer to data to be written.
|
|
||||||
* @param len - Data length.
|
|
||||||
*/
|
|
||||||
void Write(const uint8_t *data, size_t len) {
|
|
||||||
for (size_t i = 0; i < len; ++i) buffer.push_back(data[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads raw data from buffer.
|
|
||||||
*
|
|
||||||
* @param data - pointer to where data should be stored.
|
|
||||||
* @param len - data length
|
|
||||||
* @return - True if successful, False otherwise.
|
|
||||||
*/
|
|
||||||
bool Read(uint8_t *data, size_t len) {
|
|
||||||
if (len > buffer.size()) return false;
|
|
||||||
memcpy(data, buffer.data(), len);
|
|
||||||
buffer.erase(buffer.begin(), buffer.begin() + len);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<uint8_t> buffer;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace storage
|
|
@ -1,6 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "storage/v2/property_value.hpp"
|
|
||||||
|
|
||||||
using storage::PropertyValue;
|
|
||||||
using storage::PropertyValueException;
|
|
@ -1,240 +0,0 @@
|
|||||||
#include "storage/common/types/property_value_store.hpp"
|
|
||||||
|
|
||||||
#include <filesystem>
|
|
||||||
|
|
||||||
#include <gflags/gflags.h>
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "communication/bolt/v1/decoder/decoder.hpp"
|
|
||||||
#include "communication/bolt/v1/encoder/base_encoder.hpp"
|
|
||||||
#include "glue/communication.hpp"
|
|
||||||
#include "storage/common/pod_buffer.hpp"
|
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
|
||||||
|
|
||||||
using namespace communication::bolt;
|
|
||||||
|
|
||||||
const std::string kDiskKeySeparator = "_";
|
|
||||||
|
|
||||||
std::atomic<uint64_t> PropertyValueStore::global_key_cnt_ = {0};
|
|
||||||
|
|
||||||
// properties on disk are stored in a directory named properties within the
|
|
||||||
// durability directory
|
|
||||||
DECLARE_string(durability_directory);
|
|
||||||
DECLARE_string(properties_on_disk);
|
|
||||||
|
|
||||||
std::string DiskKeyPrefix(const std::string &version_key) {
|
|
||||||
return version_key + kDiskKeySeparator;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string DiskKey(const std::string &version_key,
|
|
||||||
const std::string &property_id) {
|
|
||||||
return DiskKeyPrefix(version_key) + property_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValueStore::PropertyValueStore(const PropertyValueStore &old)
|
|
||||||
: props_(old.props_) {
|
|
||||||
// We need to update disk key and disk key counter when calling a copy
|
|
||||||
// constructor due to mvcc.
|
|
||||||
if (!FLAGS_properties_on_disk.empty()) {
|
|
||||||
version_key_ = global_key_cnt_++;
|
|
||||||
kvstore::KVStore::iterator old_disk_it(
|
|
||||||
&DiskStorage(), DiskKeyPrefix(std::to_string(old.version_key_)));
|
|
||||||
iterator it(&old, old.props_.end(), std::move(old_disk_it));
|
|
||||||
|
|
||||||
while (it != old.end()) {
|
|
||||||
this->set(it->first, it->second);
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValueStore::~PropertyValueStore() {
|
|
||||||
if (!FLAGS_properties_on_disk.empty()) {
|
|
||||||
DiskStorage().DeletePrefix(DiskKeyPrefix(std::to_string(version_key_)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValue PropertyValueStore::at(const Property &key) const {
|
|
||||||
auto GetValue = [&key](const auto &props) {
|
|
||||||
for (const auto &kv : props)
|
|
||||||
if (kv.first == key) return kv.second;
|
|
||||||
return PropertyValue();
|
|
||||||
};
|
|
||||||
|
|
||||||
if (key.Location() == Location::Memory) return GetValue(props_);
|
|
||||||
|
|
||||||
CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from "
|
|
||||||
"disk storage with properties on "
|
|
||||||
"disk disabled!";
|
|
||||||
|
|
||||||
std::string disk_key =
|
|
||||||
DiskKey(std::to_string(version_key_), std::to_string(key.Id()));
|
|
||||||
auto serialized_prop = DiskStorage().Get(disk_key);
|
|
||||||
if (serialized_prop) return DeserializeProp(serialized_prop.value());
|
|
||||||
return PropertyValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
void PropertyValueStore::set(const Property &key, const char *value) {
|
|
||||||
set(key, PropertyValue(value));
|
|
||||||
}
|
|
||||||
|
|
||||||
void PropertyValueStore::set(const Property &key, const PropertyValue &value) {
|
|
||||||
if (value.type() == PropertyValue::Type::Null) {
|
|
||||||
erase(key);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto SetValue = [&key, &value](auto &props) {
|
|
||||||
for (auto &kv : props)
|
|
||||||
if (kv.first == key) {
|
|
||||||
kv.second = value;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
props.emplace_back(key, value);
|
|
||||||
};
|
|
||||||
|
|
||||||
if (key.Location() == Location::Memory) {
|
|
||||||
SetValue(props_);
|
|
||||||
} else {
|
|
||||||
CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from "
|
|
||||||
"disk storage with properties "
|
|
||||||
"on disk disabled!";
|
|
||||||
std::string disk_key =
|
|
||||||
DiskKey(std::to_string(version_key_), std::to_string(key.Id()));
|
|
||||||
DiskStorage().Put(disk_key, SerializeProp(value));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PropertyValueStore::erase(const Property &key) {
|
|
||||||
auto EraseKey = [&key](auto &props) {
|
|
||||||
auto found = std::find_if(props.begin(), props.end(),
|
|
||||||
[&key](std::pair<Property, PropertyValue> &kv) {
|
|
||||||
return kv.first == key;
|
|
||||||
});
|
|
||||||
if (found != props.end()) props.erase(found);
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (key.Location() == Location::Memory) return EraseKey(props_);
|
|
||||||
|
|
||||||
CHECK(!FLAGS_properties_on_disk.empty()) << "Trying to read property from "
|
|
||||||
"disk storage with properties on "
|
|
||||||
"disk disabled!";
|
|
||||||
|
|
||||||
std::string disk_key =
|
|
||||||
DiskKey(std::to_string(version_key_), std::to_string(key.Id()));
|
|
||||||
return DiskStorage().Delete(disk_key);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PropertyValueStore::clear() {
|
|
||||||
props_.clear();
|
|
||||||
if (!FLAGS_properties_on_disk.empty()) {
|
|
||||||
DiskStorage().DeletePrefix(DiskKeyPrefix(std::to_string(version_key_)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
kvstore::KVStore &PropertyValueStore::DiskStorage() const {
|
|
||||||
static auto disk_storage = ConstructDiskStorage();
|
|
||||||
return disk_storage;
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValueStore::iterator::iterator(
|
|
||||||
const PropertyValueStore *pvs,
|
|
||||||
std::vector<std::pair<Property, PropertyValue>>::const_iterator memory_it)
|
|
||||||
: pvs_(pvs), memory_it_(memory_it) {}
|
|
||||||
|
|
||||||
PropertyValueStore::iterator::iterator(
|
|
||||||
const PropertyValueStore *pvs,
|
|
||||||
std::vector<std::pair<Property, PropertyValue>>::const_iterator memory_it,
|
|
||||||
kvstore::KVStore::iterator disk_it)
|
|
||||||
: pvs_(pvs), memory_it_(memory_it), disk_it_(std::move(disk_it)) {}
|
|
||||||
|
|
||||||
PropertyValueStore::iterator &PropertyValueStore::iterator::operator++() {
|
|
||||||
if (memory_it_ != pvs_->props_.end()) {
|
|
||||||
++memory_it_;
|
|
||||||
} else if (disk_it_) {
|
|
||||||
++(*disk_it_);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PropertyValueStore::iterator::operator==(const iterator &other) const {
|
|
||||||
return pvs_ == other.pvs_ && memory_it_ == other.memory_it_ &&
|
|
||||||
disk_it_ == other.disk_it_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PropertyValueStore::iterator::operator!=(const iterator &other) const {
|
|
||||||
return !(*this == other);
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValueStore::iterator::reference PropertyValueStore::iterator::operator
|
|
||||||
*() {
|
|
||||||
if (memory_it_ != pvs_->props_.end() || !disk_it_) return *memory_it_;
|
|
||||||
std::pair<std::string, std::string> kv = *(*disk_it_);
|
|
||||||
std::string prop_id = kv.first.substr(kv.first.find(kDiskKeySeparator) + 1);
|
|
||||||
disk_prop_ = {Property(std::stoi(prop_id), Location::Disk),
|
|
||||||
pvs_->DeserializeProp(kv.second)};
|
|
||||||
return disk_prop_.value();
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValueStore::iterator::pointer PropertyValueStore::iterator::
|
|
||||||
operator->() {
|
|
||||||
return &**this;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t PropertyValueStore::size() const {
|
|
||||||
if (FLAGS_properties_on_disk.empty()) {
|
|
||||||
return props_.size();
|
|
||||||
} else {
|
|
||||||
return props_.size() +
|
|
||||||
DiskStorage().Size(DiskKeyPrefix(std::to_string(version_key_)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValueStore::iterator PropertyValueStore::begin() const {
|
|
||||||
if (FLAGS_properties_on_disk.empty()) {
|
|
||||||
return iterator(this, props_.begin());
|
|
||||||
} else {
|
|
||||||
return iterator(
|
|
||||||
this, props_.begin(),
|
|
||||||
DiskStorage().begin(DiskKeyPrefix(std::to_string(version_key_))));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValueStore::iterator PropertyValueStore::end() const {
|
|
||||||
if (FLAGS_properties_on_disk.empty()) {
|
|
||||||
return iterator(this, props_.end());
|
|
||||||
} else {
|
|
||||||
return iterator(
|
|
||||||
this, props_.end(),
|
|
||||||
DiskStorage().end(DiskKeyPrefix(std::to_string(version_key_))));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string PropertyValueStore::SerializeProp(const PropertyValue &prop) const {
|
|
||||||
storage::PODBuffer pod_buffer;
|
|
||||||
BaseEncoder<storage::PODBuffer> encoder{pod_buffer};
|
|
||||||
encoder.WriteValue(glue::ToBoltValue(prop));
|
|
||||||
return std::string(reinterpret_cast<char *>(pod_buffer.buffer.data()),
|
|
||||||
pod_buffer.buffer.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
PropertyValue PropertyValueStore::DeserializeProp(
|
|
||||||
const std::string &serialized_prop) const {
|
|
||||||
storage::PODBuffer pod_buffer{serialized_prop};
|
|
||||||
communication::bolt::Decoder<storage::PODBuffer> decoder{pod_buffer};
|
|
||||||
|
|
||||||
Value dv;
|
|
||||||
if (!decoder.ReadValue(&dv)) {
|
|
||||||
DLOG(WARNING) << "Unable to read property value";
|
|
||||||
return PropertyValue();
|
|
||||||
}
|
|
||||||
return glue::ToPropertyValue(dv);
|
|
||||||
}
|
|
||||||
|
|
||||||
kvstore::KVStore PropertyValueStore::ConstructDiskStorage() const {
|
|
||||||
auto storage_path = fs::path() / FLAGS_durability_directory / "properties";
|
|
||||||
if (fs::exists(storage_path)) fs::remove_all(storage_path);
|
|
||||||
return kvstore::KVStore(storage_path);
|
|
||||||
}
|
|
@ -1,164 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <optional>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "kvstore/kvstore.hpp"
|
|
||||||
#include "storage/common/types/property_value.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A collection of properties accessed in a map-like way using a key of type
|
|
||||||
* Storage::Property.
|
|
||||||
*
|
|
||||||
* PropertyValueStore handles storage on disk or in memory. Property key defines
|
|
||||||
* where the corresponding property should be stored. Each instance of
|
|
||||||
* PropertyValueStore contains a version_key_ member which specifies where on
|
|
||||||
* disk should the properties be stored. That key is inferred from a static
|
|
||||||
* global counter global_key_cnt_.
|
|
||||||
*
|
|
||||||
* The underlying implementation of in-memory storage is not necessarily
|
|
||||||
* std::map.
|
|
||||||
*/
|
|
||||||
class PropertyValueStore {
|
|
||||||
using Property = storage::Property;
|
|
||||||
using Location = storage::Location;
|
|
||||||
|
|
||||||
public:
|
|
||||||
// Property name which will be used to store vertex/edge ids inside property
|
|
||||||
// value store
|
|
||||||
static constexpr char IdPropertyName[] = "__id__";
|
|
||||||
|
|
||||||
PropertyValueStore() = default;
|
|
||||||
PropertyValueStore(const PropertyValueStore &old);
|
|
||||||
|
|
||||||
~PropertyValueStore();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a PropertyValue (by reference) at the given key.
|
|
||||||
* If the key does not exist, the Null property is returned.
|
|
||||||
*
|
|
||||||
* This is NOT thread-safe, the reference might not be valid
|
|
||||||
* when used in a multithreaded scenario.
|
|
||||||
*
|
|
||||||
* @param key The key for which a PropertyValue is sought.
|
|
||||||
* @return See above.
|
|
||||||
*/
|
|
||||||
PropertyValue at(const Property &key) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set overriding for character constants. Forces conversion
|
|
||||||
* to std::string, otherwise templating might cast the pointer
|
|
||||||
* to something else (bool) and mess things up.
|
|
||||||
*/
|
|
||||||
void set(const Property &key, const char *value);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set overriding for PropertyValue. When setting a Null value it
|
|
||||||
* calls 'erase' instead of inserting the Null into storage.
|
|
||||||
*/
|
|
||||||
void set(const Property &key, const PropertyValue &value);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes the PropertyValue for the given key.
|
|
||||||
*
|
|
||||||
* @param key - The key for which to remove the property.
|
|
||||||
*
|
|
||||||
* @return true if the operation was successful and there is nothing stored
|
|
||||||
* under given key after this operation.
|
|
||||||
*/
|
|
||||||
bool erase(const Property &key);
|
|
||||||
|
|
||||||
/** Removes all the properties (both in-mem and on-disk) from this store. */
|
|
||||||
void clear();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a static kvstore::KVStore instance used for storing properties on
|
|
||||||
* disk. This hack is needed due to statics that are internal to RocksDB and
|
|
||||||
* availability of durability_directory flag.
|
|
||||||
*/
|
|
||||||
kvstore::KVStore &DiskStorage() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Custom PVS iterator behaves as if all properties are stored in a single
|
|
||||||
* iterable collection of std::pair<Property, PropertyValue>.
|
|
||||||
*/
|
|
||||||
class iterator final
|
|
||||||
: public std::iterator<
|
|
||||||
std::input_iterator_tag, // iterator_category
|
|
||||||
std::pair<Property, PropertyValue>, // value_type
|
|
||||||
long, // difference_type
|
|
||||||
const std::pair<Property, PropertyValue> *, // pointer
|
|
||||||
const std::pair<Property, PropertyValue> & // reference
|
|
||||||
> {
|
|
||||||
public:
|
|
||||||
iterator() = delete;
|
|
||||||
|
|
||||||
iterator(const PropertyValueStore *pvs,
|
|
||||||
std::vector<std::pair<Property, PropertyValue>>::const_iterator
|
|
||||||
memory_it);
|
|
||||||
|
|
||||||
iterator(const PropertyValueStore *pvs,
|
|
||||||
std::vector<std::pair<Property, PropertyValue>>::const_iterator
|
|
||||||
memory_it,
|
|
||||||
kvstore::KVStore::iterator disk_it);
|
|
||||||
|
|
||||||
iterator(const iterator &other) = delete;
|
|
||||||
|
|
||||||
iterator(iterator &&other) = default;
|
|
||||||
|
|
||||||
iterator &operator=(iterator &&other) = default;
|
|
||||||
|
|
||||||
iterator &operator=(const iterator &other) = delete;
|
|
||||||
|
|
||||||
iterator &operator++();
|
|
||||||
|
|
||||||
bool operator==(const iterator &other) const;
|
|
||||||
|
|
||||||
bool operator!=(const iterator &other) const;
|
|
||||||
|
|
||||||
reference operator*();
|
|
||||||
|
|
||||||
pointer operator->();
|
|
||||||
|
|
||||||
private:
|
|
||||||
const PropertyValueStore *pvs_;
|
|
||||||
std::vector<std::pair<Property, PropertyValue>>::const_iterator memory_it_;
|
|
||||||
std::optional<kvstore::KVStore::iterator> disk_it_;
|
|
||||||
std::optional<std::pair<Property, PropertyValue>> disk_prop_;
|
|
||||||
};
|
|
||||||
|
|
||||||
size_t size() const;
|
|
||||||
|
|
||||||
iterator begin() const;
|
|
||||||
|
|
||||||
iterator end() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
static std::atomic<uint64_t> global_key_cnt_;
|
|
||||||
uint64_t version_key_ = global_key_cnt_++;
|
|
||||||
|
|
||||||
std::vector<std::pair<Property, PropertyValue>> props_;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Serializes a single PropertyValue into std::string.
|
|
||||||
*
|
|
||||||
* @param prop - Property to be serialized.
|
|
||||||
*
|
|
||||||
* @return Serialized property.
|
|
||||||
*/
|
|
||||||
std::string SerializeProp(const PropertyValue &prop) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Deserializes a single PropertyValue from std::string.
|
|
||||||
*
|
|
||||||
* @param serialized_prop - Serialized property.
|
|
||||||
*
|
|
||||||
* @return Deserialized property.
|
|
||||||
*/
|
|
||||||
PropertyValue DeserializeProp(const std::string &serialized_prop) const;
|
|
||||||
|
|
||||||
kvstore::KVStore ConstructDiskStorage() const;
|
|
||||||
};
|
|
@ -1,126 +0,0 @@
|
|||||||
#include "storage/common/types/slk.hpp"
|
|
||||||
|
|
||||||
namespace slk {
|
|
||||||
|
|
||||||
void Save(const PropertyValue &value, slk::Builder *builder) {
|
|
||||||
switch (value.type()) {
|
|
||||||
case PropertyValue::Type::Null:
|
|
||||||
slk::Save(static_cast<uint8_t>(0), builder);
|
|
||||||
return;
|
|
||||||
case PropertyValue::Type::Bool:
|
|
||||||
slk::Save(static_cast<uint8_t>(1), builder);
|
|
||||||
slk::Save(value.ValueBool(), builder);
|
|
||||||
return;
|
|
||||||
case PropertyValue::Type::Int:
|
|
||||||
slk::Save(static_cast<uint8_t>(2), builder);
|
|
||||||
slk::Save(value.ValueInt(), builder);
|
|
||||||
return;
|
|
||||||
case PropertyValue::Type::Double:
|
|
||||||
slk::Save(static_cast<uint8_t>(3), builder);
|
|
||||||
slk::Save(value.ValueDouble(), builder);
|
|
||||||
return;
|
|
||||||
case PropertyValue::Type::String:
|
|
||||||
slk::Save(static_cast<uint8_t>(4), builder);
|
|
||||||
slk::Save(value.ValueString(), builder);
|
|
||||||
return;
|
|
||||||
case PropertyValue::Type::List: {
|
|
||||||
slk::Save(static_cast<uint8_t>(5), builder);
|
|
||||||
const auto &values = value.ValueList();
|
|
||||||
size_t size = values.size();
|
|
||||||
slk::Save(size, builder);
|
|
||||||
for (const auto &v : values) {
|
|
||||||
slk::Save(v, builder);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case PropertyValue::Type::Map: {
|
|
||||||
slk::Save(static_cast<uint8_t>(6), builder);
|
|
||||||
const auto &map = value.ValueMap();
|
|
||||||
size_t size = map.size();
|
|
||||||
slk::Save(size, builder);
|
|
||||||
for (const auto &kv : map) {
|
|
||||||
slk::Save(kv, builder);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Load(PropertyValue *value, slk::Reader *reader) {
|
|
||||||
uint8_t type;
|
|
||||||
slk::Load(&type, reader);
|
|
||||||
switch (type) {
|
|
||||||
case static_cast<uint8_t>(0):
|
|
||||||
*value = PropertyValue();
|
|
||||||
return;
|
|
||||||
case static_cast<uint8_t>(1): {
|
|
||||||
bool v;
|
|
||||||
slk::Load(&v, reader);
|
|
||||||
*value = PropertyValue(v);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case static_cast<uint8_t>(2): {
|
|
||||||
int64_t v;
|
|
||||||
slk::Load(&v, reader);
|
|
||||||
*value = PropertyValue(v);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case static_cast<uint8_t>(3): {
|
|
||||||
double v;
|
|
||||||
slk::Load(&v, reader);
|
|
||||||
*value = PropertyValue(v);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case static_cast<uint8_t>(4): {
|
|
||||||
std::string v;
|
|
||||||
slk::Load(&v, reader);
|
|
||||||
*value = PropertyValue(std::move(v));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case static_cast<uint8_t>(5): {
|
|
||||||
size_t size;
|
|
||||||
slk::Load(&size, reader);
|
|
||||||
std::vector<PropertyValue> list(size);
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
slk::Load(&list[i], reader);
|
|
||||||
}
|
|
||||||
*value = PropertyValue(std::move(list));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case static_cast<uint8_t>(6): {
|
|
||||||
size_t size;
|
|
||||||
slk::Load(&size, reader);
|
|
||||||
std::map<std::string, PropertyValue> map;
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
std::pair<std::string, PropertyValue> kv;
|
|
||||||
slk::Load(&kv, reader);
|
|
||||||
map.insert(kv);
|
|
||||||
}
|
|
||||||
*value = PropertyValue(std::move(map));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw slk::SlkDecodeException("Trying to load unknown PropertyValue!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Save(const PropertyValueStore &properties, slk::Builder *builder) {
|
|
||||||
size_t size = properties.size();
|
|
||||||
slk::Save(size, builder);
|
|
||||||
for (const auto &kv : properties) {
|
|
||||||
slk::Save(kv, builder);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Load(PropertyValueStore *properties, slk::Reader *reader) {
|
|
||||||
properties->clear();
|
|
||||||
size_t size;
|
|
||||||
slk::Load(&size, reader);
|
|
||||||
for (size_t i = 0; i < size; ++i) {
|
|
||||||
std::pair<storage::Property, PropertyValue> kv;
|
|
||||||
slk::Load(&kv, reader);
|
|
||||||
properties->set(kv.first, kv.second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace slk
|
|
@ -1,52 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "slk/serialization.hpp"
|
|
||||||
#include "storage/common/types/property_value.hpp"
|
|
||||||
#include "storage/common/types/property_value_store.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
|
|
||||||
namespace slk {
|
|
||||||
|
|
||||||
inline void Save(const storage::Label &common, slk::Builder *builder) {
|
|
||||||
slk::Save(common.id_, builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Load(storage::Label *common, slk::Reader *reader) {
|
|
||||||
slk::Load(&common->id_, reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Save(const storage::EdgeType &common, slk::Builder *builder) {
|
|
||||||
slk::Save(common.id_, builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Load(storage::EdgeType *common, slk::Reader *reader) {
|
|
||||||
slk::Load(&common->id_, reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Save(const storage::Property &common, slk::Builder *builder) {
|
|
||||||
slk::Save(common.id_, builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Load(storage::Property *common, slk::Reader *reader) {
|
|
||||||
slk::Load(&common->id_, reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Save(const storage::Gid &gid, slk::Builder *builder) {
|
|
||||||
slk::Save(gid.AsUint(), builder);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void Load(storage::Gid *gid, slk::Reader *reader) {
|
|
||||||
uint64_t id;
|
|
||||||
slk::Load(&id, reader);
|
|
||||||
*gid = storage::Gid::FromUint(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Save(const PropertyValue &value, slk::Builder *builder);
|
|
||||||
|
|
||||||
void Load(PropertyValue *value, slk::Reader *reader);
|
|
||||||
|
|
||||||
void Save(const PropertyValueStore &properties, slk::Builder *builder);
|
|
||||||
|
|
||||||
void Load(PropertyValueStore *properties, slk::Reader *reader);
|
|
||||||
|
|
||||||
} // namespace slk
|
|
@ -1,249 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <functional>
|
|
||||||
#include <limits>
|
|
||||||
#include <optional>
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "utils/atomic.hpp"
|
|
||||||
#include "utils/cast.hpp"
|
|
||||||
|
|
||||||
namespace storage {
|
|
||||||
|
|
||||||
using IdT = uint16_t;
|
|
||||||
|
|
||||||
const IdT IdMask = std::numeric_limits<IdT>::max() >> 1;
|
|
||||||
const IdT IdNotMask = ~IdMask;
|
|
||||||
|
|
||||||
// In case of a new location Mask value has to be updated.
|
|
||||||
//
|
|
||||||
// |-------------|--------------|
|
|
||||||
// |---location--|------id------|
|
|
||||||
// |-Memory|Disk-|-----2^15-----|
|
|
||||||
enum class Location : IdT { Memory = 0x8000, Disk = 0x0000 };
|
|
||||||
|
|
||||||
class Label final {
|
|
||||||
public:
|
|
||||||
Label() = default;
|
|
||||||
explicit Label(const IdT id, const Location location = Location::Memory)
|
|
||||||
: id_((id & IdMask) | static_cast<IdT>(location)) {
|
|
||||||
// TODO(ipaljak): A better way would be to throw an exception
|
|
||||||
// and send a message to the user that a new Id can't be created.
|
|
||||||
// By doing that, database instance will continue to work and user
|
|
||||||
// has a chance to make an appropriate action.
|
|
||||||
// CHECK isn't user friendly at all because it will immediately
|
|
||||||
// terminate the whole process.
|
|
||||||
// TODO implement throw and error handling
|
|
||||||
CHECK(id <= IdMask) << "Number of used ids overflowed!";
|
|
||||||
}
|
|
||||||
|
|
||||||
IdT Id() const { return static_cast<IdT>(id_ & IdMask); }
|
|
||||||
storage::Location Location() const {
|
|
||||||
return static_cast<storage::Location>(id_ & IdNotMask);
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool operator==(const Label &a, const Label &b) {
|
|
||||||
return a.Id() == b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator!=(const Label &a, const Label &b) {
|
|
||||||
return a.Id() != b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator<(const Label &a, const Label &b) {
|
|
||||||
return a.Id() < b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator>(const Label &a, const Label &b) {
|
|
||||||
return a.Id() > b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator<=(const Label &a, const Label &b) {
|
|
||||||
return a.Id() <= b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator>=(const Label &a, const Label &b) {
|
|
||||||
return a.Id() >= b.Id();
|
|
||||||
}
|
|
||||||
|
|
||||||
IdT id_{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
class EdgeType final {
|
|
||||||
public:
|
|
||||||
EdgeType() = default;
|
|
||||||
explicit EdgeType(const IdT id, const Location location = Location::Memory)
|
|
||||||
: id_((id & IdMask) | static_cast<IdT>(location)) {
|
|
||||||
// TODO(ipaljak): A better way would be to throw an exception
|
|
||||||
// and send a message to the user that a new Id can't be created.
|
|
||||||
// By doing that, database instance will continue to work and user
|
|
||||||
// has a chance to make an appropriate action.
|
|
||||||
// CHECK isn't user friendly at all because it will immediately
|
|
||||||
// terminate the whole process.
|
|
||||||
// TODO implement throw and error handling
|
|
||||||
CHECK(id <= IdMask) << "Number of used ids overflowed!";
|
|
||||||
}
|
|
||||||
|
|
||||||
IdT Id() const { return static_cast<IdT>(id_ & IdMask); }
|
|
||||||
storage::Location Location() const {
|
|
||||||
return static_cast<storage::Location>(id_ & IdNotMask);
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool operator==(const EdgeType &a, const EdgeType &b) {
|
|
||||||
return a.Id() == b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator!=(const EdgeType &a, const EdgeType &b) {
|
|
||||||
return a.Id() != b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator<(const EdgeType &a, const EdgeType &b) {
|
|
||||||
return a.Id() < b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator>(const EdgeType &a, const EdgeType &b) {
|
|
||||||
return a.Id() > b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator<=(const EdgeType &a, const EdgeType &b) {
|
|
||||||
return a.Id() <= b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator>=(const EdgeType &a, const EdgeType &b) {
|
|
||||||
return a.Id() >= b.Id();
|
|
||||||
}
|
|
||||||
|
|
||||||
IdT id_{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
class Property final {
|
|
||||||
public:
|
|
||||||
Property() = default;
|
|
||||||
explicit Property(const IdT id, const Location location = Location::Memory)
|
|
||||||
: id_((id & IdMask) | static_cast<IdT>(location)) {
|
|
||||||
// TODO(ipaljak): A better way would be to throw an exception
|
|
||||||
// and send a message to the user that a new Id can't be created.
|
|
||||||
// By doing that, database instance will continue to work and user
|
|
||||||
// has a chance to make an appropriate action.
|
|
||||||
// CHECK isn't user friendly at all because it will immediately
|
|
||||||
// terminate the whole process.
|
|
||||||
// TODO implement throw and error handling
|
|
||||||
CHECK(id <= IdMask) << "Number of used ids overflowed!";
|
|
||||||
}
|
|
||||||
|
|
||||||
IdT Id() const { return static_cast<IdT>(id_ & IdMask); }
|
|
||||||
storage::Location Location() const {
|
|
||||||
return static_cast<storage::Location>(id_ & IdNotMask);
|
|
||||||
}
|
|
||||||
|
|
||||||
friend bool operator==(const Property &a, const Property &b) {
|
|
||||||
return a.Id() == b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator!=(const Property &a, const Property &b) {
|
|
||||||
return a.Id() != b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator<(const Property &a, const Property &b) {
|
|
||||||
return a.Id() < b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator>(const Property &a, const Property &b) {
|
|
||||||
return a.Id() > b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator<=(const Property &a, const Property &b) {
|
|
||||||
return a.Id() <= b.Id();
|
|
||||||
}
|
|
||||||
friend bool operator>=(const Property &a, const Property &b) {
|
|
||||||
return a.Id() >= b.Id();
|
|
||||||
}
|
|
||||||
|
|
||||||
IdT id_{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Global ID of a record in the database. */
|
|
||||||
class Gid final {
|
|
||||||
private:
|
|
||||||
explicit Gid(uint64_t id) : id_(id) {}
|
|
||||||
|
|
||||||
public:
|
|
||||||
Gid() = default;
|
|
||||||
|
|
||||||
static Gid FromUint(uint64_t id) { return Gid{id}; }
|
|
||||||
static Gid FromInt(int64_t id) {
|
|
||||||
return Gid{utils::MemcpyCast<uint64_t>(id)};
|
|
||||||
}
|
|
||||||
uint64_t AsUint() const { return id_; }
|
|
||||||
int64_t AsInt() const { return utils::MemcpyCast<int64_t>(id_); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint64_t id_;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline bool operator==(const Gid &first, const Gid &second) {
|
|
||||||
return first.AsUint() == second.AsUint();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool operator!=(const Gid &first, const Gid &second) {
|
|
||||||
return first.AsUint() != second.AsUint();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool operator<(const Gid &first, const Gid &second) {
|
|
||||||
return first.AsUint() < second.AsUint();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool operator>(const Gid &first, const Gid &second) {
|
|
||||||
return first.AsUint() > second.AsUint();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool operator<=(const Gid &first, const Gid &second) {
|
|
||||||
return first.AsUint() <= second.AsUint();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool operator>=(const Gid &first, const Gid &second) {
|
|
||||||
return first.AsUint() >= second.AsUint();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Threadsafe generation of new global IDs. */
|
|
||||||
class GidGenerator {
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Returns a globally unique identifier.
|
|
||||||
*
|
|
||||||
* @param requested_gid - The desired gid. If given, it will be returned and
|
|
||||||
* this generator's state updated accordingly.
|
|
||||||
*/
|
|
||||||
Gid Next(std::optional<Gid> requested_gid = std::nullopt) {
|
|
||||||
if (requested_gid) {
|
|
||||||
utils::EnsureAtomicGe(next_local_id_, requested_gid->AsUint() + 1U);
|
|
||||||
return *requested_gid;
|
|
||||||
} else {
|
|
||||||
return Gid::FromUint(next_local_id_++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::atomic<uint64_t> next_local_id_{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace storage
|
|
||||||
|
|
||||||
namespace std {
|
|
||||||
template <>
|
|
||||||
struct hash<storage::Label> {
|
|
||||||
size_t operator()(const storage::Label &k) const {
|
|
||||||
return hash<storage::IdT>()(k.Id());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct hash<storage::EdgeType> {
|
|
||||||
size_t operator()(const storage::EdgeType &k) const {
|
|
||||||
return hash<storage::IdT>()(k.Id());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct hash<storage::Property> {
|
|
||||||
size_t operator()(const storage::Property &k) const {
|
|
||||||
return hash<storage::IdT>()(k.Id());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct hash<storage::Gid> {
|
|
||||||
size_t operator()(const storage::Gid &gid) const {
|
|
||||||
return hash<uint64_t>()(gid.AsUint());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} // namespace std
|
|
@ -1,7 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#ifdef MG_SINGLE_NODE_HA
|
|
||||||
#include "storage/single_node_ha/edge_accessor.hpp"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// TODO: write documentation for the interface here!
|
|
@ -1,68 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "utils/algorithm.hpp"
|
|
||||||
|
|
||||||
namespace storage {
|
|
||||||
|
|
||||||
/** SingleNode implementation of ConcurrentIdMapper. */
|
|
||||||
template <typename TId>
|
|
||||||
class ConcurrentIdMapper {
|
|
||||||
using StorageT = IdT;
|
|
||||||
|
|
||||||
public:
|
|
||||||
ConcurrentIdMapper() = default;
|
|
||||||
explicit ConcurrentIdMapper(
|
|
||||||
const std::vector<std::string> &properties_on_disk)
|
|
||||||
: properties_on_disk_(properties_on_disk) {}
|
|
||||||
|
|
||||||
TId value_to_id(const std::string &value) {
|
|
||||||
auto value_to_id_acc = value_to_id_.access();
|
|
||||||
auto found = value_to_id_acc.find(value);
|
|
||||||
TId inserted_id(0);
|
|
||||||
if (found == value_to_id_acc.end()) {
|
|
||||||
StorageT new_id = id_.fetch_add(1);
|
|
||||||
// After we tried to insert value with our id we either got our id, or the
|
|
||||||
// id created by the thread which succesfully inserted (value, id) pair
|
|
||||||
// because that's ConcurrentMap's behaviour
|
|
||||||
if (std::is_same<TId, Property>::value)
|
|
||||||
inserted_id =
|
|
||||||
value_to_id_acc.insert(value, TId(new_id, PropertyLocation(value)))
|
|
||||||
.first->second;
|
|
||||||
else
|
|
||||||
inserted_id = value_to_id_acc.insert(value, TId(new_id)).first->second;
|
|
||||||
} else {
|
|
||||||
inserted_id = found->second;
|
|
||||||
}
|
|
||||||
auto id_to_value_acc = id_to_value_.access();
|
|
||||||
// We have to try to insert the inserted_id and value even if we are not the
|
|
||||||
// one who assigned id because we have to make sure that after this method
|
|
||||||
// returns that both mappings between id->value and value->id exist.
|
|
||||||
id_to_value_acc.insert(inserted_id, value);
|
|
||||||
return inserted_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string &id_to_value(const TId &id) {
|
|
||||||
auto id_to_value_acc = id_to_value_.access();
|
|
||||||
auto result = id_to_value_acc.find(id);
|
|
||||||
DCHECK(result != id_to_value_acc.end());
|
|
||||||
return result->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
ConcurrentMap<std::string, TId> value_to_id_;
|
|
||||||
ConcurrentMap<TId, std::string> id_to_value_;
|
|
||||||
std::atomic<StorageT> id_{0};
|
|
||||||
std::vector<std::string> properties_on_disk_;
|
|
||||||
std::mutex mutex_;
|
|
||||||
|
|
||||||
Location PropertyLocation(const std::string &name) {
|
|
||||||
std::unique_lock<std::mutex> lock(mutex_);
|
|
||||||
if (utils::Contains(properties_on_disk_, name)) return Location::Disk;
|
|
||||||
return Location::Memory;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} // namespace storage
|
|
@ -1,78 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <malloc.h>
|
|
||||||
|
|
||||||
#include <limits>
|
|
||||||
#include <list>
|
|
||||||
|
|
||||||
#include "glog/logging.h"
|
|
||||||
#include "storage/single_node_ha/mvcc/record.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Implements deferred deletion.
|
|
||||||
* @Tparam T - type of object to delete (Vertex/Edge/VersionList...)
|
|
||||||
* This is NOT a thread-safe class.
|
|
||||||
*/
|
|
||||||
template <typename T>
|
|
||||||
class DeferredDeleter {
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* @brief - keep track of what object was deleted at which time.
|
|
||||||
*/
|
|
||||||
struct DeletedObject {
|
|
||||||
const T *object;
|
|
||||||
const tx::TransactionId deleted_at;
|
|
||||||
DeletedObject(const T *object, tx::TransactionId deleted_at)
|
|
||||||
: object(object), deleted_at(deleted_at) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - check if everything is freed
|
|
||||||
*/
|
|
||||||
~DeferredDeleter() {
|
|
||||||
CHECK(objects_.size() == 0U)
|
|
||||||
<< "Objects are not freed when calling the destructor.";
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Add objects to this deleter. This method assumes that it will
|
|
||||||
* always be called with a non-decreasing sequence of `deleted_at`.
|
|
||||||
* @param objects - vector of objects to add
|
|
||||||
* @param last_transaction - nothing newer or equal to it can see these
|
|
||||||
* objects
|
|
||||||
*/
|
|
||||||
void AddObjects(const std::vector<DeletedObject> &objects) {
|
|
||||||
auto previous_tx_id = objects_.empty()
|
|
||||||
? std::numeric_limits<tx::TransactionId>::min()
|
|
||||||
: objects_.back().deleted_at;
|
|
||||||
for (auto object : objects) {
|
|
||||||
CHECK(previous_tx_id <= object.deleted_at)
|
|
||||||
<< "deleted_at must be non-decreasing";
|
|
||||||
previous_tx_id = object.deleted_at;
|
|
||||||
objects_.push_back(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Free memory of objects deleted before the id.
|
|
||||||
* @param id - delete before this id
|
|
||||||
*/
|
|
||||||
void FreeExpiredObjects(tx::TransactionId id) {
|
|
||||||
auto it = objects_.begin();
|
|
||||||
while (it != objects_.end() && it->deleted_at < id) {
|
|
||||||
delete it->object;
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
objects_.erase(objects_.begin(), it);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Return number of stored objects.
|
|
||||||
*/
|
|
||||||
size_t Count() { return objects_.size(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Ascendingly sorted list of deleted objects by `deleted_at`.
|
|
||||||
std::list<DeletedObject> objects_;
|
|
||||||
};
|
|
@ -1,32 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "storage/single_node_ha/mvcc/record.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/common/types/property_value_store.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
|
|
||||||
class Vertex;
|
|
||||||
|
|
||||||
class Edge : public mvcc::Record<Edge> {
|
|
||||||
public:
|
|
||||||
Edge(mvcc::VersionList<Vertex> *from, mvcc::VersionList<Vertex> *to,
|
|
||||||
storage::EdgeType edge_type)
|
|
||||||
: from_(from), to_(to), edge_type_(edge_type) {}
|
|
||||||
|
|
||||||
// Returns new Edge with copy of data stored in this Edge, but without
|
|
||||||
// copying superclass' members.
|
|
||||||
Edge *CloneData() { return new Edge(*this); }
|
|
||||||
|
|
||||||
mvcc::VersionList<Vertex> *from_;
|
|
||||||
mvcc::VersionList<Vertex> *to_;
|
|
||||||
storage::EdgeType edge_type_;
|
|
||||||
PropertyValueStore properties_;
|
|
||||||
|
|
||||||
private:
|
|
||||||
Edge(const Edge &other)
|
|
||||||
: mvcc::Record<Edge>(),
|
|
||||||
from_(other.from_),
|
|
||||||
to_(other.to_),
|
|
||||||
edge_type_(other.edge_type_),
|
|
||||||
properties_(other.properties_) {}
|
|
||||||
};
|
|
@ -1,59 +0,0 @@
|
|||||||
#include "storage/edge_accessor.hpp"
|
|
||||||
|
|
||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
#include "storage/vertex_accessor.hpp"
|
|
||||||
#include "utils/algorithm.hpp"
|
|
||||||
|
|
||||||
EdgeAccessor::EdgeAccessor(mvcc::VersionList<Edge> *address,
|
|
||||||
database::GraphDbAccessor &db_accessor)
|
|
||||||
: RecordAccessor(address, db_accessor),
|
|
||||||
from_(nullptr),
|
|
||||||
to_(nullptr),
|
|
||||||
edge_type_() {
|
|
||||||
RecordAccessor::Reconstruct();
|
|
||||||
if (current_ != nullptr) {
|
|
||||||
from_ = current_->from_;
|
|
||||||
to_ = current_->to_;
|
|
||||||
edge_type_ = current_->edge_type_;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
EdgeAccessor::EdgeAccessor(mvcc::VersionList<Edge> *address,
|
|
||||||
database::GraphDbAccessor &db_accessor,
|
|
||||||
mvcc::VersionList<Vertex> *from,
|
|
||||||
mvcc::VersionList<Vertex> *to,
|
|
||||||
storage::EdgeType edge_type)
|
|
||||||
: RecordAccessor(address, db_accessor),
|
|
||||||
from_(from),
|
|
||||||
to_(to),
|
|
||||||
edge_type_(edge_type) {}
|
|
||||||
|
|
||||||
storage::EdgeType EdgeAccessor::EdgeType() const { return edge_type_; }
|
|
||||||
|
|
||||||
VertexAccessor EdgeAccessor::from() const {
|
|
||||||
return VertexAccessor(from_, db_accessor());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool EdgeAccessor::from_is(const VertexAccessor &v) const {
|
|
||||||
return v.address() == from_;
|
|
||||||
}
|
|
||||||
|
|
||||||
VertexAccessor EdgeAccessor::to() const {
|
|
||||||
return VertexAccessor(to_, db_accessor());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool EdgeAccessor::to_is(const VertexAccessor &v) const {
|
|
||||||
return v.address() == to_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool EdgeAccessor::is_cycle() const { return to_ == from_; }
|
|
||||||
|
|
||||||
std::ostream &operator<<(std::ostream &os, const EdgeAccessor &ea) {
|
|
||||||
os << "E[" << ea.db_accessor().EdgeTypeName(ea.EdgeType());
|
|
||||||
os << " {";
|
|
||||||
utils::PrintIterable(os, ea.Properties(), ", ", [&](auto &stream,
|
|
||||||
const auto &pair) {
|
|
||||||
stream << ea.db_accessor().PropertyName(pair.first) << ": " << pair.second;
|
|
||||||
});
|
|
||||||
return os << "}]";
|
|
||||||
}
|
|
@ -1,77 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "storage/single_node_ha/edge.hpp"
|
|
||||||
#include "storage/single_node_ha/record_accessor.hpp"
|
|
||||||
|
|
||||||
// forward declaring the VertexAccessor because it's returned
|
|
||||||
// by some functions
|
|
||||||
class VertexAccessor;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides ways for the client programmer (i.e. code generated by the compiler)
|
|
||||||
* to interact with an Edge.
|
|
||||||
*
|
|
||||||
* Note that EdgeAccessors do not necessary read versioned (MVCC) data. This is
|
|
||||||
* possible because edge endpoints (from and to), as well as the edge type, are
|
|
||||||
* all immutable. These are the most often used aspects of an edge, and are
|
|
||||||
* stored also in the vertex endpoints of the edge. Using them when creating an
|
|
||||||
* EdgeAccessor means that data does not have to be read from a random memory
|
|
||||||
* location, which is often a performance bottleneck in traversals.
|
|
||||||
*/
|
|
||||||
class EdgeAccessor final : public RecordAccessor<Edge> {
|
|
||||||
public:
|
|
||||||
/** Constructor that reads data from the random memory location (lower
|
|
||||||
* performance, see class docs). */
|
|
||||||
EdgeAccessor(mvcc::VersionList<Edge> *address,
|
|
||||||
database::GraphDbAccessor &db_accessor);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor that does NOT read data from the random memory location
|
|
||||||
* (better performance, see class docs).
|
|
||||||
*/
|
|
||||||
EdgeAccessor(mvcc::VersionList<Edge> *address,
|
|
||||||
database::GraphDbAccessor &db_accessor,
|
|
||||||
mvcc::VersionList<Vertex> *from, mvcc::VersionList<Vertex> *to,
|
|
||||||
storage::EdgeType edge_type);
|
|
||||||
|
|
||||||
storage::EdgeType EdgeType() const;
|
|
||||||
|
|
||||||
/** Returns an accessor to the originating Vertex of this edge. */
|
|
||||||
VertexAccessor from() const;
|
|
||||||
|
|
||||||
/** Returns the address of the originating Vertex of this edge. */
|
|
||||||
auto from_addr() const { return from_; }
|
|
||||||
|
|
||||||
/** Checks if the given vertex is the source of this edge, without
|
|
||||||
* creating an additional accessor to perform the check. */
|
|
||||||
bool from_is(const VertexAccessor &v) const;
|
|
||||||
|
|
||||||
/** Returns an accessor to the destination Vertex of this edge. */
|
|
||||||
VertexAccessor to() const;
|
|
||||||
|
|
||||||
/** Returns the address of the destination Vertex of this edge. */
|
|
||||||
auto to_addr() const { return to_; }
|
|
||||||
|
|
||||||
/** Checks if the given vertex is the destination of this edge, without
|
|
||||||
* creating an additional accessor to perform the check. */
|
|
||||||
bool to_is(const VertexAccessor &v) const;
|
|
||||||
|
|
||||||
/** Returns true if this edge is a cycle (start and end node are
|
|
||||||
* the same. */
|
|
||||||
bool is_cycle() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
mvcc::VersionList<Vertex> *from_;
|
|
||||||
mvcc::VersionList<Vertex> *to_;
|
|
||||||
storage::EdgeType edge_type_;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::ostream &operator<<(std::ostream &, const EdgeAccessor &);
|
|
||||||
|
|
||||||
// hash function for the edge accessor
|
|
||||||
namespace std {
|
|
||||||
template <>
|
|
||||||
struct hash<EdgeAccessor> {
|
|
||||||
size_t operator()(const EdgeAccessor &e) const { return e.gid().AsUint(); };
|
|
||||||
};
|
|
||||||
} // namespace std
|
|
@ -1,156 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <optional>
|
|
||||||
#include <utility>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "glog/logging.h"
|
|
||||||
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "utils/algorithm.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A data stucture that holds a number of edges. This implementation assumes
|
|
||||||
* that separate Edges instances are used for incoming and outgoing edges in a
|
|
||||||
* vertex (and consequently that edge Addresses are unique in it).
|
|
||||||
*/
|
|
||||||
class Edges {
|
|
||||||
private:
|
|
||||||
struct Element {
|
|
||||||
mvcc::VersionList<Vertex> *vertex;
|
|
||||||
mvcc::VersionList<Edge> *edge;
|
|
||||||
storage::EdgeType edge_type;
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Custom iterator that takes care of skipping edges when the destination
|
|
||||||
* vertex or edge types are known. */
|
|
||||||
class Iterator {
|
|
||||||
public:
|
|
||||||
/** Ctor that just sets the position. Used for normal iteration (that does
|
|
||||||
* not skip any edges), and for end-iterator creation in both normal and
|
|
||||||
* skipping iteration.
|
|
||||||
*
|
|
||||||
* @param iterator - Iterator in the underlying storage.
|
|
||||||
*/
|
|
||||||
explicit Iterator(std::vector<Element>::const_iterator iterator)
|
|
||||||
: position_(iterator) {}
|
|
||||||
|
|
||||||
/** Ctor used for creating the beginning iterator with known destination
|
|
||||||
* vertex.
|
|
||||||
*
|
|
||||||
* @param iterator - Iterator in the underlying storage.
|
|
||||||
* @param end - End iterator in the underlying storage.
|
|
||||||
* @param vertex - The destination vertex address. If empty the
|
|
||||||
* edges are not filtered on destination.
|
|
||||||
* @param edge_types - The edge types at least one of which must be matched.
|
|
||||||
* If nullptr edges are not filtered on type.
|
|
||||||
*/
|
|
||||||
Iterator(std::vector<Element>::const_iterator position,
|
|
||||||
std::vector<Element>::const_iterator end,
|
|
||||||
mvcc::VersionList<Vertex> *vertex,
|
|
||||||
const std::vector<storage::EdgeType> *edge_types)
|
|
||||||
: position_(position),
|
|
||||||
end_(end),
|
|
||||||
vertex_(vertex),
|
|
||||||
edge_types_(edge_types) {
|
|
||||||
update_position();
|
|
||||||
}
|
|
||||||
|
|
||||||
Iterator &operator++() {
|
|
||||||
++position_;
|
|
||||||
update_position();
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
const Element &operator*() const { return *position_; }
|
|
||||||
const Element *operator->() const { return &(*position_); }
|
|
||||||
|
|
||||||
bool operator==(const Iterator &other) const {
|
|
||||||
return position_ == other.position_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator!=(const Iterator &other) const { return !(*this == other); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<Element>::const_iterator position_;
|
|
||||||
// end_ is used only in update_position() to limit find.
|
|
||||||
std::vector<Element>::const_iterator end_;
|
|
||||||
|
|
||||||
// Optional predicates. If set they define which edges are skipped by the
|
|
||||||
// iterator.
|
|
||||||
mvcc::VersionList<Vertex> *vertex_{nullptr};
|
|
||||||
// For edge types we use a vector pointer because it's optional.
|
|
||||||
const std::vector<storage::EdgeType> *edge_types_ = nullptr;
|
|
||||||
|
|
||||||
/** Helper function that skips edges that don't satisfy the predicate
|
|
||||||
* present in this iterator. */
|
|
||||||
void update_position() {
|
|
||||||
if (vertex_ && edge_types_) {
|
|
||||||
position_ = std::find_if(position_, end_, [this](const Element &e) {
|
|
||||||
return e.vertex == this->vertex_ &&
|
|
||||||
utils::Contains(*this->edge_types_, e.edge_type);
|
|
||||||
});
|
|
||||||
} else if (vertex_) {
|
|
||||||
position_ = std::find_if(position_, end_, [this](const Element &e) {
|
|
||||||
return e.vertex == this->vertex_;
|
|
||||||
});
|
|
||||||
} else if (edge_types_) {
|
|
||||||
position_ = std::find_if(position_, end_, [this](const Element &e) {
|
|
||||||
return utils::Contains(*this->edge_types_, e.edge_type);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Adds an edge to this structure.
|
|
||||||
*
|
|
||||||
* @param vertex - The destination vertex of the edge. That's the one
|
|
||||||
* opposite from the vertex that contains this `Edges` instance.
|
|
||||||
* @param edge - The edge.
|
|
||||||
* @param edge_type - Type of the edge.
|
|
||||||
*/
|
|
||||||
void emplace(mvcc::VersionList<Vertex> *vertex, mvcc::VersionList<Edge> *edge,
|
|
||||||
storage::EdgeType edge_type) {
|
|
||||||
storage_.emplace_back(Element{vertex, edge, edge_type});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes an edge from this structure.
|
|
||||||
*/
|
|
||||||
void RemoveEdge(mvcc::VersionList<Edge> *edge) {
|
|
||||||
auto found = std::find_if(
|
|
||||||
storage_.begin(), storage_.end(),
|
|
||||||
[edge](const Element &element) { return edge == element.edge; });
|
|
||||||
// If the edge is not in the structure we don't care and can simply return
|
|
||||||
if (found == storage_.end()) return;
|
|
||||||
*found = std::move(storage_.back());
|
|
||||||
storage_.pop_back();
|
|
||||||
}
|
|
||||||
|
|
||||||
auto size() const { return storage_.size(); }
|
|
||||||
auto begin() const { return Iterator(storage_.begin()); }
|
|
||||||
auto end() const { return Iterator(storage_.end()); }
|
|
||||||
|
|
||||||
auto &storage() { return storage_; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a beginning iterator that will skip edges whose destination
|
|
||||||
* vertex is not equal to the given vertex.
|
|
||||||
*
|
|
||||||
* @param vertex - The destination vertex Address. If empty the
|
|
||||||
* edges are not filtered on destination.
|
|
||||||
* @param edge_types - The edge types at least one of which must be matched.
|
|
||||||
* If nullptr edges are not filtered on type.
|
|
||||||
*/
|
|
||||||
auto begin(mvcc::VersionList<Vertex> *vertex,
|
|
||||||
const std::vector<storage::EdgeType> *edge_types) const {
|
|
||||||
if (edge_types && edge_types->empty()) edge_types = nullptr;
|
|
||||||
return Iterator(storage_.begin(), storage_.end(), vertex, edge_types);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<Element> storage_;
|
|
||||||
};
|
|
@ -1,70 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "data_structures/concurrent/skiplist.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/single_node_ha/deferred_deleter.hpp"
|
|
||||||
#include "transactions/single_node_ha/engine.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Garbage collects deleted records.
|
|
||||||
* @tparam TCollection - type of collection. Must have a SkipList-like API
|
|
||||||
* (accessors).
|
|
||||||
* @tparam TRecord - type of underlying record in mvcc.
|
|
||||||
*/
|
|
||||||
template <typename TCollection, typename TRecord>
|
|
||||||
class GarbageCollector {
|
|
||||||
public:
|
|
||||||
GarbageCollector(
|
|
||||||
TCollection &collection, DeferredDeleter<TRecord> &record_deleter,
|
|
||||||
DeferredDeleter<mvcc::VersionList<TRecord>> &version_list_deleter)
|
|
||||||
: collection_(collection),
|
|
||||||
record_deleter_(record_deleter),
|
|
||||||
version_list_deleter_(version_list_deleter) {}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Runs garbage collector. Populates deferred deleters with version
|
|
||||||
* lists and records.
|
|
||||||
*
|
|
||||||
* @param snapshot - the GC snapshot. Consists of the oldest active
|
|
||||||
* transaction's snapshot, with that transaction's id appened as last.
|
|
||||||
* @param engine - reference to engine object
|
|
||||||
*/
|
|
||||||
void Run(const tx::Snapshot &snapshot, const tx::Engine &engine) {
|
|
||||||
auto collection_accessor = collection_.access();
|
|
||||||
uint64_t count = 0;
|
|
||||||
std::vector<typename DeferredDeleter<TRecord>::DeletedObject>
|
|
||||||
deleted_records;
|
|
||||||
std::vector<
|
|
||||||
typename DeferredDeleter<mvcc::VersionList<TRecord>>::DeletedObject>
|
|
||||||
deleted_version_lists;
|
|
||||||
for (auto id_vlist : collection_accessor) {
|
|
||||||
mvcc::VersionList<TRecord> *vlist = id_vlist.second;
|
|
||||||
// If the version_list is empty, i.e. there is nothing else to be read
|
|
||||||
// from it we can delete it.
|
|
||||||
auto ret = vlist->GcDeleted(snapshot, engine);
|
|
||||||
if (ret.first) {
|
|
||||||
deleted_version_lists.emplace_back(vlist, engine.LocalLast());
|
|
||||||
count += collection_accessor.remove(id_vlist.first);
|
|
||||||
}
|
|
||||||
if (ret.second != nullptr)
|
|
||||||
deleted_records.emplace_back(ret.second, engine.LocalLast());
|
|
||||||
}
|
|
||||||
DLOG_IF(INFO, count > 0)
|
|
||||||
<< "GC started cleaning with snapshot: " << snapshot;
|
|
||||||
DLOG_IF(INFO, count > 0) << "Destroyed: " << count;
|
|
||||||
|
|
||||||
// Add records to deleter, with the id larger or equal than the last active
|
|
||||||
// transaction.
|
|
||||||
record_deleter_.AddObjects(deleted_records);
|
|
||||||
// Add version_lists to deleter, with the id larger or equal than the last
|
|
||||||
// active transaction.
|
|
||||||
version_list_deleter_.AddObjects(deleted_version_lists);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
TCollection &collection_;
|
|
||||||
DeferredDeleter<TRecord> &record_deleter_;
|
|
||||||
DeferredDeleter<mvcc::VersionList<TRecord>> &version_list_deleter_;
|
|
||||||
};
|
|
@ -1,188 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "glog/logging.h"
|
|
||||||
|
|
||||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/common/index.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/single_node_ha/edge.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
#include "utils/total_ordering.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Implements index update and acquire.
|
|
||||||
* @Tparam TKey - underlying type by which to key objects
|
|
||||||
* @Tparam TRecord - object stored under the given key
|
|
||||||
*/
|
|
||||||
template <typename TKey, typename TRecord>
|
|
||||||
class KeyIndex {
|
|
||||||
public:
|
|
||||||
KeyIndex() {}
|
|
||||||
KeyIndex(const KeyIndex &other) = delete;
|
|
||||||
KeyIndex(KeyIndex &&other) = delete;
|
|
||||||
KeyIndex &operator=(const KeyIndex &other) = delete;
|
|
||||||
KeyIndex &operator=(KeyIndex &&other) = delete;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Add record, vlist, if new, to TKey specific storage.
|
|
||||||
* @param key - TKey index to update.
|
|
||||||
* @param vlist - pointer to vlist entry to add
|
|
||||||
* @param record - pointer to record entry to add (contained in vlist)
|
|
||||||
*/
|
|
||||||
void Update(const TKey &key, mvcc::VersionList<TRecord> *vlist,
|
|
||||||
const TRecord *const record) {
|
|
||||||
GetKeyStorage(key)->access().insert(IndexEntry(vlist, record));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Get all the inserted vlists in TKey specific storage which
|
|
||||||
* still have that label visible in this transaction.
|
|
||||||
* @param key - key to query.
|
|
||||||
* @param t - current transaction, which determines visibility.
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @return iterable collection of vlists records<TRecord> with the requested
|
|
||||||
* TKey.
|
|
||||||
*/
|
|
||||||
auto GetVlists(const TKey &key, tx::Transaction &t, bool current_state) {
|
|
||||||
auto access = GetKeyStorage(key)->access();
|
|
||||||
auto begin = access.begin();
|
|
||||||
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
|
|
||||||
TRecord>(
|
|
||||||
std::move(access), begin, [](const IndexEntry &) { return true; }, t,
|
|
||||||
[key](const IndexEntry &, const TRecord *record) {
|
|
||||||
return KeyIndex::Exists(key, record);
|
|
||||||
},
|
|
||||||
current_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Return number of items in skiplist associated with the given
|
|
||||||
* TKey. This number could be imprecise because of the underlying skiplist
|
|
||||||
* storage. Use this as a hint, and not as a rule.
|
|
||||||
* Moreover, some transaction probably sees only part of the skiplist since
|
|
||||||
* not all versions are visible for it. Also, garbage collection might now
|
|
||||||
* have been run for some time so the index might have accumulated garbage.
|
|
||||||
* @param key - key to query for.
|
|
||||||
* @return number of items
|
|
||||||
*/
|
|
||||||
auto Count(const TKey &key) { return GetKeyStorage(key)->access().size(); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Removes from the index all entries for which records don't contain
|
|
||||||
* the given label anymore. Update all record which are not visible for any
|
|
||||||
* transaction with an id larger or equal to `id`.
|
|
||||||
*
|
|
||||||
* @param snapshot - the GC snapshot. Consists of the oldest active
|
|
||||||
* transaction's snapshot, with that transaction's id appened as last.
|
|
||||||
* @param engine - transaction engine to see which records are commited
|
|
||||||
*/
|
|
||||||
void Refresh(const tx::Snapshot &snapshot, tx::Engine &engine) {
|
|
||||||
return index::Refresh<TKey, IndexEntry, TRecord>(
|
|
||||||
indices_, snapshot, engine,
|
|
||||||
[](const TKey &key, const IndexEntry &entry) {
|
|
||||||
return KeyIndex::Exists(key, entry.record_);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a vector of keys present in this index.
|
|
||||||
*/
|
|
||||||
std::vector<TKey> Keys() {
|
|
||||||
std::vector<TKey> keys;
|
|
||||||
for (auto &kv : indices_.access()) keys.push_back(kv.first);
|
|
||||||
return keys;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
/**
|
|
||||||
* @brief - Contains vlist and record pointers.
|
|
||||||
*/
|
|
||||||
class IndexEntry : public utils::TotalOrdering<IndexEntry> {
|
|
||||||
public:
|
|
||||||
IndexEntry(const IndexEntry &entry, const TRecord *const new_record)
|
|
||||||
: IndexEntry(entry.vlist_, new_record) {}
|
|
||||||
IndexEntry(mvcc::VersionList<TRecord> *const vlist,
|
|
||||||
const TRecord *const record)
|
|
||||||
: vlist_(vlist), record_(record) {}
|
|
||||||
|
|
||||||
// Comparision operators - we need them to keep this sorted inside
|
|
||||||
// skiplist.
|
|
||||||
// This needs to be sorted first by vlist and second record because we
|
|
||||||
// want to keep same vlists close together since we need to filter them to
|
|
||||||
// get only the unique ones.
|
|
||||||
bool operator<(const IndexEntry &other) const {
|
|
||||||
if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_;
|
|
||||||
return this->record_ < other.record_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const IndexEntry &other) const {
|
|
||||||
return this->vlist_ == other.vlist_ && this->record_ == other.record_;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Checks if previous IndexEntry has the same vlist as this
|
|
||||||
* IndexEntry.
|
|
||||||
* @return - true if the vlists match.
|
|
||||||
*/
|
|
||||||
bool IsAlreadyChecked(const IndexEntry &previous) const {
|
|
||||||
return previous.vlist_ == this->vlist_;
|
|
||||||
}
|
|
||||||
|
|
||||||
mvcc::VersionList<TRecord> *const vlist_;
|
|
||||||
const TRecord *const record_;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Get storage for this label. Creates new
|
|
||||||
* storage if this key is not yet indexed.
|
|
||||||
* @param key - key for which to access storage.
|
|
||||||
* @return pointer to skiplist of version list records<T>.
|
|
||||||
*/
|
|
||||||
auto GetKeyStorage(const TKey &key) {
|
|
||||||
auto access = indices_.access();
|
|
||||||
// Avoid excessive new/delete by first checking if it exists.
|
|
||||||
auto iter = access.find(key);
|
|
||||||
if (iter == access.end()) {
|
|
||||||
auto ret = access.insert(key, std::make_unique<SkipList<IndexEntry>>());
|
|
||||||
return ret.first->second.get();
|
|
||||||
}
|
|
||||||
return iter->second.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Check if Vertex contains label.
|
|
||||||
* @param label - label to check for.
|
|
||||||
* @return true if it contains, false otherwise.
|
|
||||||
*/
|
|
||||||
static bool Exists(storage::Label label, const Vertex *const v) {
|
|
||||||
DCHECK(v != nullptr) << "Vertex is nullptr.";
|
|
||||||
// We have to check for existance of label because the transaction
|
|
||||||
// might not see the label, or the label was deleted and not yet
|
|
||||||
// removed from the index.
|
|
||||||
const auto &labels = v->labels_;
|
|
||||||
return std::find(labels.begin(), labels.end(), label) != labels.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Check if Edge has edge_type.
|
|
||||||
* @param edge_type - edge_type to check for.
|
|
||||||
* @return true if it has that edge_type, false otherwise.
|
|
||||||
*/
|
|
||||||
static bool Exists(storage::EdgeType edge_type, const Edge *const e) {
|
|
||||||
DCHECK(e != nullptr) << "Edge is nullptr.";
|
|
||||||
// We have to check for equality of edge types because the transaction
|
|
||||||
// might not see the edge type, or the edge type was deleted and not yet
|
|
||||||
// removed from the index.
|
|
||||||
return e->edge_type_ == edge_type;
|
|
||||||
}
|
|
||||||
|
|
||||||
ConcurrentMap<TKey, std::unique_ptr<SkipList<IndexEntry>>> indices_;
|
|
||||||
};
|
|
||||||
} // namespace database
|
|
@ -1,533 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <optional>
|
|
||||||
|
|
||||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
|
||||||
#include "data_structures/concurrent/skiplist.hpp"
|
|
||||||
#include "storage/common/index.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/single_node_ha/edge.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
#include "utils/bound.hpp"
|
|
||||||
#include "utils/total_ordering.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Implements LabelPropertyIndex.
|
|
||||||
* Currently this provides implementation for:
|
|
||||||
* acquiring all entries which contain the given label, and a given property
|
|
||||||
* sorted by the property value
|
|
||||||
* acquiring all non-unique entries with the given label, and property, with
|
|
||||||
* exactly one property value
|
|
||||||
*/
|
|
||||||
class LabelPropertyIndex {
|
|
||||||
public:
|
|
||||||
LabelPropertyIndex(){};
|
|
||||||
LabelPropertyIndex(const LabelPropertyIndex &other) = delete;
|
|
||||||
LabelPropertyIndex(LabelPropertyIndex &&other) = delete;
|
|
||||||
LabelPropertyIndex &operator=(const LabelPropertyIndex &other) = delete;
|
|
||||||
LabelPropertyIndex &operator=(LabelPropertyIndex &&other) = delete;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Contain Label + property, to be used as an index key.
|
|
||||||
*/
|
|
||||||
class Key : public utils::TotalOrdering<Key> {
|
|
||||||
public:
|
|
||||||
const storage::Label label_;
|
|
||||||
const storage::Property property_;
|
|
||||||
|
|
||||||
Key(storage::Label label, storage::Property property)
|
|
||||||
: label_(label), property_(property) {}
|
|
||||||
|
|
||||||
// Comparison operators - we need them to keep this sorted inside skiplist.
|
|
||||||
bool operator<(const Key &other) const {
|
|
||||||
if (this->label_ != other.label_) return this->label_ < other.label_;
|
|
||||||
return this->property_ < other.property_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const Key &other) const {
|
|
||||||
return this->label_ == other.label_ && this->property_ == other.property_;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Creates index with the given key if it doesn't exist. Note that
|
|
||||||
* you still need to populate the index with existing records.
|
|
||||||
* @return - True if it created the index, false if it already exists.
|
|
||||||
*/
|
|
||||||
bool CreateIndex(const Key &key) {
|
|
||||||
auto access = indices_.access();
|
|
||||||
// Avoid creation if it already exists.
|
|
||||||
auto iter = access.find(key);
|
|
||||||
if (iter != access.end()) return false;
|
|
||||||
|
|
||||||
auto ret = access.insert(key, std::make_unique<SkipList<IndexEntry>>());
|
|
||||||
return ret.second;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns if it succeeded in deleting the index and freeing the index memory
|
|
||||||
*/
|
|
||||||
void DeleteIndex(const Key &key) { indices_.access().remove(key); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Updates all indexes which should contain this vertex.
|
|
||||||
* @param vlist - pointer to vlist entry to add
|
|
||||||
* @param vertex - pointer to vertex record entry to add (contained in vlist)
|
|
||||||
*/
|
|
||||||
void UpdateOnLabelProperty(mvcc::VersionList<Vertex> *const vlist,
|
|
||||||
const Vertex *const vertex) {
|
|
||||||
const auto &labels = vertex->labels_;
|
|
||||||
// We need to check if the given vertex can be inserted in all indexes
|
|
||||||
for (auto &index : indices_.access()) {
|
|
||||||
// Vertex has the given label
|
|
||||||
if (std::find(labels.begin(), labels.end(), index.first.label_) ==
|
|
||||||
labels.end())
|
|
||||||
continue;
|
|
||||||
auto prop = vertex->properties_.at(index.first.property_);
|
|
||||||
if (prop.type() != PropertyValue::Type::Null) {
|
|
||||||
Insert(*index.second, prop, vlist, vertex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Updates all indexes with `label` and any property in `vertex` that
|
|
||||||
* exists.
|
|
||||||
* @param label - indexes with this label might be updated if vertex contains
|
|
||||||
* the corresponding property.
|
|
||||||
* @param vlist - pointer to vlist entry to add
|
|
||||||
* @param vertex - pointer to vertex record entry to add (contained in vlist)
|
|
||||||
*/
|
|
||||||
void UpdateOnLabel(storage::Label label,
|
|
||||||
mvcc::VersionList<Vertex> *const vlist,
|
|
||||||
const Vertex *const vertex) {
|
|
||||||
// We need to check if the given vertex can be inserted in all indexes
|
|
||||||
for (auto &index : indices_.access()) {
|
|
||||||
if (index.first.label_ != label) continue;
|
|
||||||
auto prop = vertex->properties_.at(index.first.property_);
|
|
||||||
if (prop.type() != PropertyValue::Type::Null) {
|
|
||||||
// Property exists and vertex should be added to skiplist.
|
|
||||||
Insert(*index.second, prop, vlist, vertex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Updates all indexes with `property` and any label in `vertex` that
|
|
||||||
* exists.
|
|
||||||
* @param property - indexes with this property might be updated if vertex
|
|
||||||
* contains the corresponding label.
|
|
||||||
* @param vlist - pointer to vlist entry to add
|
|
||||||
* @param vertex - pointer to vertex record entry to add (contained in vlist)
|
|
||||||
*/
|
|
||||||
void UpdateOnProperty(storage::Property property,
|
|
||||||
mvcc::VersionList<Vertex> *const vlist,
|
|
||||||
const Vertex *const vertex) {
|
|
||||||
const auto &labels = vertex->labels_;
|
|
||||||
for (auto &index : indices_.access()) {
|
|
||||||
if (index.first.property_ != property) continue;
|
|
||||||
if (std::find(labels.begin(), labels.end(), index.first.label_) !=
|
|
||||||
labels.end()) {
|
|
||||||
// Label exists and vertex should be added to skiplist.
|
|
||||||
Insert(*index.second, vertex->properties_.at(property), vlist, vertex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Get all the inserted vlists in key specific storage which still
|
|
||||||
* have that label and property visible in this transaction.
|
|
||||||
* @param key - Label+Property to query.
|
|
||||||
* @param t - current transaction, which determines visibility.
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @return iterable collection of vlists of vertex records with the requested
|
|
||||||
* key sorted ascendingly by the property value.
|
|
||||||
*/
|
|
||||||
auto GetVlists(const Key &key, const tx::Transaction &t, bool current_state) {
|
|
||||||
DCHECK(IndexExists(key)) << "Index not yet ready.";
|
|
||||||
auto access = GetKeyStorage(key)->access();
|
|
||||||
auto begin = access.begin();
|
|
||||||
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
|
|
||||||
Vertex, SkipList<IndexEntry>>(
|
|
||||||
std::move(access), begin, [](const IndexEntry &) { return true; }, t,
|
|
||||||
[key](const IndexEntry &entry, const Vertex *const vertex) {
|
|
||||||
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
|
|
||||||
},
|
|
||||||
current_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Get all the inserted vlists in key specific storage which still
|
|
||||||
* have that label and property visible in this transaction with property
|
|
||||||
* value equal to 'value'.
|
|
||||||
* @param key - Label+Property to query.
|
|
||||||
* @param value - vlists with this value will be returned
|
|
||||||
* @param t - current transaction, which determines visibility.
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @return iterable collection of vlists of vertex records with the requested
|
|
||||||
* key and value
|
|
||||||
*/
|
|
||||||
auto GetVlists(const Key &key, const PropertyValue &value,
|
|
||||||
const tx::Transaction &t, bool current_state) {
|
|
||||||
DCHECK(IndexExists(key)) << "Index not yet ready.";
|
|
||||||
auto access = GetKeyStorage(key)->access();
|
|
||||||
auto min_ptr = std::numeric_limits<std::uintptr_t>::min();
|
|
||||||
auto start_iter = access.find_or_larger(IndexEntry(
|
|
||||||
value, reinterpret_cast<mvcc::VersionList<Vertex> *>(min_ptr),
|
|
||||||
reinterpret_cast<const Vertex *>(min_ptr)));
|
|
||||||
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
|
|
||||||
Vertex>(
|
|
||||||
std::move(access), start_iter,
|
|
||||||
[value](const IndexEntry &entry) {
|
|
||||||
return !IndexEntry::Less(value, entry.value_) &&
|
|
||||||
!IndexEntry::Less(entry.value_, value);
|
|
||||||
},
|
|
||||||
t,
|
|
||||||
[key](const IndexEntry &entry, const Vertex *const vertex) {
|
|
||||||
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
|
|
||||||
},
|
|
||||||
current_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get an iterable over all mvcc::VersionLists that are contained in this
|
|
||||||
* index and satisfy the given bounds.
|
|
||||||
*
|
|
||||||
* The returned iterator will only contain vertices/edges whose property value
|
|
||||||
* is comparable with the given bounds (w.r.t. type). This has implications on
|
|
||||||
* Cypher query execuction semantics which have not been resolved yet.
|
|
||||||
*
|
|
||||||
* At least one of the bounds must be specified. Bounds can't be @c
|
|
||||||
* PropertyValue::Null. If both bounds are specified, their PropertyValue
|
|
||||||
* elements must be of comparable types.
|
|
||||||
*
|
|
||||||
* @param key - Label+Property to query.
|
|
||||||
* @param lower - Lower bound of the interval.
|
|
||||||
* @param upper - Upper bound of the interval.
|
|
||||||
* @param t - current transaction, which determines visibility.
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
* @return iterable collection of mvcc:VersionLists pointers that
|
|
||||||
* satisfy the bounds and are visible to the given transaction.
|
|
||||||
*/
|
|
||||||
auto GetVlists(const Key &key,
|
|
||||||
const std::optional<utils::Bound<PropertyValue>> lower,
|
|
||||||
const std::optional<utils::Bound<PropertyValue>> upper,
|
|
||||||
const tx::Transaction &transaction, bool current_state) {
|
|
||||||
DCHECK(IndexExists(key)) << "Index not yet ready.";
|
|
||||||
|
|
||||||
auto type = [](const auto &bound) { return bound.value().value().type(); };
|
|
||||||
CHECK(lower || upper) << "At least one bound must be provided";
|
|
||||||
CHECK(!lower || type(lower) != PropertyValue::Type::Null)
|
|
||||||
<< "Null value is not a valid index bound";
|
|
||||||
CHECK(!upper || type(upper) != PropertyValue::Type::Null)
|
|
||||||
<< "Null value is not a valid index bound";
|
|
||||||
|
|
||||||
// helper function for creating a bound with an IndexElement
|
|
||||||
auto make_index_bound = [](const auto &optional_bound, bool bottom) {
|
|
||||||
std::uintptr_t ptr_bound =
|
|
||||||
bottom ? std::numeric_limits<std::uintptr_t>::min()
|
|
||||||
: std::numeric_limits<std::uintptr_t>::max();
|
|
||||||
return IndexEntry(
|
|
||||||
optional_bound.value().value(),
|
|
||||||
reinterpret_cast<mvcc::VersionList<Vertex> *>(ptr_bound),
|
|
||||||
reinterpret_cast<const Vertex *>(ptr_bound));
|
|
||||||
};
|
|
||||||
|
|
||||||
auto access = GetKeyStorage(key)->access();
|
|
||||||
|
|
||||||
// create the iterator startpoint based on the lower bound
|
|
||||||
auto start_iter = lower
|
|
||||||
? access.find_or_larger(make_index_bound(
|
|
||||||
lower, lower.value().IsInclusive()))
|
|
||||||
: access.begin();
|
|
||||||
|
|
||||||
// a function that defines if an entry staisfies the filtering predicate.
|
|
||||||
// since we already handled the lower bound, we only need to deal with the
|
|
||||||
// upper bound and value type
|
|
||||||
std::function<bool(const IndexEntry &entry)> predicate;
|
|
||||||
if (lower && upper &&
|
|
||||||
!AreComparablePropertyValueTypes(type(lower), type(upper)))
|
|
||||||
predicate = [](const IndexEntry &) { return false; };
|
|
||||||
else if (upper) {
|
|
||||||
auto upper_index_entry =
|
|
||||||
make_index_bound(upper, upper.value().IsExclusive());
|
|
||||||
predicate = [upper_index_entry](const IndexEntry &entry) {
|
|
||||||
return AreComparablePropertyValueTypes(
|
|
||||||
entry.value_.type(), upper_index_entry.value_.type()) &&
|
|
||||||
entry < upper_index_entry;
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
auto lower_type = type(lower);
|
|
||||||
make_index_bound(lower, lower.value().IsExclusive());
|
|
||||||
predicate = [lower_type](const IndexEntry &entry) {
|
|
||||||
return AreComparablePropertyValueTypes(entry.value_.type(), lower_type);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
|
|
||||||
Vertex>(
|
|
||||||
std::move(access), start_iter, predicate, transaction,
|
|
||||||
[key](const IndexEntry &entry, const Vertex *const vertex) {
|
|
||||||
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
|
|
||||||
},
|
|
||||||
current_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Check for existance of index.
|
|
||||||
* @param key - Index key
|
|
||||||
* @return true if the index with that key exists
|
|
||||||
*/
|
|
||||||
bool IndexExists(const Key &key) {
|
|
||||||
auto access = indices_.access();
|
|
||||||
return access.find(key) != access.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Return number of items in skiplist associated with the given
|
|
||||||
* key. This number could be imprecise because of the underlying skiplist
|
|
||||||
* storage. Use this as a hint, and not as a rule. Fails if index doesn't
|
|
||||||
* exist.
|
|
||||||
* Moreover, some transaction probably sees only part of the skiplist since
|
|
||||||
* not all versions are visible for it. Also, garbage collection might now
|
|
||||||
* have been run for some time so the index might have accumulated garbage.
|
|
||||||
* @param key - key to query for.
|
|
||||||
* @return number of items
|
|
||||||
*/
|
|
||||||
int64_t Count(const Key &key) {
|
|
||||||
auto index = GetKeyStorage(key);
|
|
||||||
CHECK(index != nullptr) << "Index doesn't exist.";
|
|
||||||
return index->access().size();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the approximate position and count of the given value in the
|
|
||||||
* index for the given Key.
|
|
||||||
*
|
|
||||||
* Both are approximations for several reasons. Initially the position
|
|
||||||
* and count are obtained from the skipist (the index) and as such are
|
|
||||||
* not exact for perfromance reasons. At the same time the position
|
|
||||||
* and count are calculated based on property value comparison: an
|
|
||||||
* additional error is accumulated because the index could contain
|
|
||||||
* the same vertex with the same value multiple times,
|
|
||||||
* as well as the same vertex with different values.
|
|
||||||
*/
|
|
||||||
auto PositionAndCount(const Key &key, const PropertyValue &value) {
|
|
||||||
auto access = GetKeyStorage(key)->access();
|
|
||||||
return access.position_and_count(
|
|
||||||
value,
|
|
||||||
// the 'less' function
|
|
||||||
[](const PropertyValue &a, const IndexEntry &b) {
|
|
||||||
return IndexEntry::Less(a, b.value_);
|
|
||||||
},
|
|
||||||
// the 'equal_to' function
|
|
||||||
[](const PropertyValue &a, const IndexEntry &b) {
|
|
||||||
return !(IndexEntry::Less(a, b.value_) ||
|
|
||||||
IndexEntry::Less(b.value_, a));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Removes from the index all entries for which records don't contain
|
|
||||||
* the given label anymore, or the record was deleted before this transaction
|
|
||||||
* id.
|
|
||||||
*
|
|
||||||
* @param snapshot - the GC snapshot. Consists of the oldest active
|
|
||||||
* transaction's snapshot, with that transaction's id appened as last.
|
|
||||||
*/
|
|
||||||
void Refresh(const tx::Snapshot &snapshot, tx::Engine &engine) {
|
|
||||||
return index::Refresh<Key, IndexEntry, Vertex>(
|
|
||||||
indices_, snapshot, engine,
|
|
||||||
[](const Key &key, const IndexEntry &entry) {
|
|
||||||
return LabelPropertyIndex::Exists(key, entry.value_, entry.record_);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a vector of keys present in this index.
|
|
||||||
*/
|
|
||||||
std::vector<Key> Keys() {
|
|
||||||
std::vector<Key> keys;
|
|
||||||
for (auto &kv : indices_.access()) keys.push_back(kv.first);
|
|
||||||
return keys;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
static bool AreComparablePropertyValueTypes(PropertyValue::Type a,
|
|
||||||
PropertyValue::Type b) {
|
|
||||||
auto is_numeric = [](const PropertyValue::Type t) {
|
|
||||||
return t == PropertyValue::Type::Int || t == PropertyValue::Type::Double;
|
|
||||||
};
|
|
||||||
|
|
||||||
return a == b || (is_numeric(a) && is_numeric(b));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Contains value, vlist and vertex record to distinguish between
|
|
||||||
* index entries.
|
|
||||||
*/
|
|
||||||
class IndexEntry : public utils::TotalOrdering<IndexEntry> {
|
|
||||||
public:
|
|
||||||
IndexEntry(const IndexEntry &entry, const Vertex *new_record)
|
|
||||||
: IndexEntry(entry.value_, entry.vlist_, new_record) {}
|
|
||||||
IndexEntry(const PropertyValue &value, mvcc::VersionList<Vertex> *vlist,
|
|
||||||
const Vertex *record)
|
|
||||||
: value_(value), vlist_(vlist), record_(record) {}
|
|
||||||
|
|
||||||
// Comparision operators - we need them to keep this sorted inside
|
|
||||||
// skiplist.
|
|
||||||
bool operator<(const IndexEntry &other) const {
|
|
||||||
bool this_value_smaller = Less(this->value_, other.value_);
|
|
||||||
if (this_value_smaller || Less(other.value_, this->value_))
|
|
||||||
return this_value_smaller;
|
|
||||||
if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_;
|
|
||||||
return this->record_ < other.record_;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const IndexEntry &other) const {
|
|
||||||
return !(*this < other) && !(other < *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - For two property values - orders the records by type and then by
|
|
||||||
* value. Except for integers and doubles - those are both converted to
|
|
||||||
* double and then compared.
|
|
||||||
* @return true if the first property value is smaller( should be before)
|
|
||||||
* than the second one
|
|
||||||
*/
|
|
||||||
static bool Less(const PropertyValue &a, const PropertyValue &b) {
|
|
||||||
if (!AreComparablePropertyValueTypes(a.type(), b.type()))
|
|
||||||
return a.type() < b.type();
|
|
||||||
|
|
||||||
if (a.type() == b.type()) {
|
|
||||||
switch (a.type()) {
|
|
||||||
case PropertyValue::Type::Null:
|
|
||||||
return false;
|
|
||||||
case PropertyValue::Type::String:
|
|
||||||
return a.ValueString() < b.ValueString();
|
|
||||||
case PropertyValue::Type::Bool:
|
|
||||||
return a.ValueBool() < b.ValueBool();
|
|
||||||
case PropertyValue::Type::Int:
|
|
||||||
return a.ValueInt() < b.ValueInt();
|
|
||||||
case PropertyValue::Type::Double:
|
|
||||||
return a.ValueDouble() < b.ValueDouble();
|
|
||||||
case PropertyValue::Type::List: {
|
|
||||||
auto va = a.ValueList();
|
|
||||||
auto vb = b.ValueList();
|
|
||||||
if (va.size() != vb.size()) return va.size() < vb.size();
|
|
||||||
return lexicographical_compare(va.begin(), va.end(), vb.begin(),
|
|
||||||
vb.end(), Less);
|
|
||||||
}
|
|
||||||
case PropertyValue::Type::Map: {
|
|
||||||
auto ma = a.ValueMap();
|
|
||||||
auto mb = b.ValueMap();
|
|
||||||
if (ma.size() != mb.size()) return ma.size() < mb.size();
|
|
||||||
const auto cmp = [](const auto &a, const auto &b) {
|
|
||||||
if (a.first != b.first)
|
|
||||||
return a.first < b.first;
|
|
||||||
else
|
|
||||||
return Less(a.second, b.second);
|
|
||||||
};
|
|
||||||
return lexicographical_compare(ma.begin(), ma.end(), mb.begin(),
|
|
||||||
mb.end(), cmp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// helper for getting a double from PropertyValue, if possible
|
|
||||||
auto get_double = [](const PropertyValue &value) {
|
|
||||||
DCHECK(value.type() == PropertyValue::Type::Int ||
|
|
||||||
value.type() == PropertyValue::Type::Double)
|
|
||||||
<< "Invalid data type.";
|
|
||||||
if (value.type() == PropertyValue::Type::Int)
|
|
||||||
return static_cast<double>(value.ValueInt());
|
|
||||||
return value.ValueDouble();
|
|
||||||
};
|
|
||||||
|
|
||||||
// Types are int and double - convert int to double
|
|
||||||
return get_double(a) < get_double(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Check if previous IndexEntry represents the same vlist/value
|
|
||||||
* pair.
|
|
||||||
* @return - true if IndexEntries are equal by the vlist/value pair.
|
|
||||||
*/
|
|
||||||
bool IsAlreadyChecked(const IndexEntry &previous) const {
|
|
||||||
return previous.vlist_ == this->vlist_ &&
|
|
||||||
!Less(previous.value_, this->value_) &&
|
|
||||||
!Less(this->value_, previous.value_);
|
|
||||||
}
|
|
||||||
|
|
||||||
const PropertyValue value_;
|
|
||||||
mvcc::VersionList<Vertex> *const vlist_{nullptr};
|
|
||||||
const Vertex *const record_{nullptr};
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Insert value, vlist, vertex into corresponding index (key) if
|
|
||||||
* the index exists.
|
|
||||||
* @param index - into which index to add
|
|
||||||
* @param value - value which to add
|
|
||||||
* @param vlist - pointer to vlist entry to add
|
|
||||||
* @param vertex - pointer to vertex record entry to add (contained in
|
|
||||||
* vlist)
|
|
||||||
*/
|
|
||||||
void Insert(SkipList<IndexEntry> &index, const PropertyValue &value,
|
|
||||||
mvcc::VersionList<Vertex> *const vlist,
|
|
||||||
const Vertex *const vertex) {
|
|
||||||
index.access().insert(IndexEntry{value, vlist, vertex});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Get storage for this key.
|
|
||||||
* @param key - Label and and property for which to query.
|
|
||||||
* @return pointer to skiplist of IndexEntries, if none which matches key
|
|
||||||
* exists return nullptr
|
|
||||||
*/
|
|
||||||
SkipList<IndexEntry> *GetKeyStorage(const Key &key) {
|
|
||||||
auto access = indices_.access();
|
|
||||||
auto iter = access.find(key);
|
|
||||||
if (iter == access.end()) return nullptr;
|
|
||||||
return iter->second.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Check if Vertex contains label and property with the given
|
|
||||||
* value.
|
|
||||||
* @param key - label and property to check for.
|
|
||||||
* @param value - value of property to compare
|
|
||||||
* @return true if it contains, false otherwise.
|
|
||||||
*/
|
|
||||||
static bool Exists(const Key &key, const PropertyValue &value,
|
|
||||||
const Vertex *const v) {
|
|
||||||
DCHECK(v != nullptr) << "Vertex is nullptr.";
|
|
||||||
// We have to check for existance of label because the transaction
|
|
||||||
// might not see the label, or the label was deleted and not yet
|
|
||||||
// removed from the index.
|
|
||||||
const auto &labels = v->labels_;
|
|
||||||
if (std::find(labels.begin(), labels.end(), key.label_) == labels.end())
|
|
||||||
return false;
|
|
||||||
auto prop = v->properties_.at(key.property_);
|
|
||||||
// Property doesn't exists.
|
|
||||||
if (prop.type() == PropertyValue::Type::Null) return false;
|
|
||||||
// Property value is the same as expected.
|
|
||||||
return !IndexEntry::Less(prop, value) && !IndexEntry::Less(value, prop);
|
|
||||||
}
|
|
||||||
|
|
||||||
ConcurrentMap<Key, std::unique_ptr<SkipList<IndexEntry>>> indices_;
|
|
||||||
};
|
|
||||||
} // namespace database
|
|
@ -1,334 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <iostream>
|
|
||||||
#include <optional>
|
|
||||||
|
|
||||||
#include "transactions/commit_log.hpp"
|
|
||||||
#include "transactions/single_node_ha/engine.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
|
|
||||||
#include "storage/common/locking/record_lock.hpp"
|
|
||||||
#include "storage/common/mvcc/version.hpp"
|
|
||||||
|
|
||||||
// the mvcc implementation used here is very much like postgresql's
|
|
||||||
// more info: https://momjian.us/main/writings/pgsql/mvcc.pdf
|
|
||||||
|
|
||||||
namespace mvcc {
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
class Record : public Version<T> {
|
|
||||||
public:
|
|
||||||
Record() = default;
|
|
||||||
Record(const Record &) = delete;
|
|
||||||
Record &operator=(const Record &) = delete;
|
|
||||||
Record(Record &&) = delete;
|
|
||||||
Record &operator=(Record &&) = delete;
|
|
||||||
|
|
||||||
// check if this record is visible to the transaction t
|
|
||||||
bool visible(const tx::Transaction &t) {
|
|
||||||
// Mike Olson says 17 march 1993: the tests in this routine are correct;
|
|
||||||
// if you think they're not, you're wrong, and you should think about it
|
|
||||||
// again. i know, it happened to me.
|
|
||||||
|
|
||||||
// fetch expiration info in a safe way (see fetch_exp for details)
|
|
||||||
tx::TransactionId tx_exp;
|
|
||||||
tx::CommandId cmd_exp;
|
|
||||||
std::tie(tx_exp, cmd_exp) = fetch_exp();
|
|
||||||
|
|
||||||
return ((tx_.cre == t.id_ && // inserted by the current transaction
|
|
||||||
cmd_.cre < t.cid() && // before this command, and
|
|
||||||
(tx_exp == 0 || // the row has not been deleted, or
|
|
||||||
(tx_exp == t.id_ && // it was deleted by the current
|
|
||||||
// transaction
|
|
||||||
cmd_exp >= t.cid()))) // but not before this command,
|
|
||||||
|| // or
|
|
||||||
(visible_from(Hints::kCre, tx_.cre,
|
|
||||||
t) && // the record was inserted by a
|
|
||||||
// committed transaction, and
|
|
||||||
(tx_exp == 0 || // the record has not been deleted, or
|
|
||||||
(tx_exp == t.id_ && // the row is being deleted by this
|
|
||||||
// transaction
|
|
||||||
cmd_exp >= t.cid()) || // but it's not deleted "yet", or
|
|
||||||
(tx_exp != t.id_ && // the row was deleted by another
|
|
||||||
// transaction
|
|
||||||
!visible_from(Hints::kExp, tx_exp,
|
|
||||||
t) // that has not been committed
|
|
||||||
))));
|
|
||||||
}
|
|
||||||
|
|
||||||
void mark_created(const tx::Transaction &t) {
|
|
||||||
DCHECK(tx_.cre == 0) << "Marking node as created twice.";
|
|
||||||
tx_.cre = t.id_;
|
|
||||||
cmd_.cre = t.cid();
|
|
||||||
}
|
|
||||||
|
|
||||||
void mark_expired(const tx::Transaction &t) {
|
|
||||||
tx_.exp = t.id_;
|
|
||||||
cmd_.exp = t.cid();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool exp_committed(tx::Engine &engine) {
|
|
||||||
return committed(Hints::kExp, engine);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if this record is visible w.r.t. to the given garbage collection
|
|
||||||
* snapshot. See source comments for exact logic.
|
|
||||||
*
|
|
||||||
* @param snapshot - the GC snapshot. Consists of the oldest active
|
|
||||||
* transaction's snapshot, with that transaction's id appened as last.
|
|
||||||
*/
|
|
||||||
bool is_not_visible_from(const tx::Snapshot &snapshot,
|
|
||||||
const tx::Engine &engine) const {
|
|
||||||
// first get tx.exp so that all the subsequent checks operate on
|
|
||||||
// the same id. otherwise there could be a race condition
|
|
||||||
auto exp_id = tx_.exp.load();
|
|
||||||
|
|
||||||
// a record is NOT visible if:
|
|
||||||
// 1. it creating transaction aborted (last check), and is also older than
|
|
||||||
// the current oldest active transaction (optimization) OR
|
|
||||||
// 2. a) it's expiration is not 0 (some transaction expired it)
|
|
||||||
// AND
|
|
||||||
// b) the expiring transaction is older than latest active
|
|
||||||
// AND
|
|
||||||
// c) that transaction committed (as opposed to aborted)
|
|
||||||
// AND
|
|
||||||
// d) that transaction is not in oldest active transaction's
|
|
||||||
// snapshot (consequently also not in the snapshots of
|
|
||||||
// newer transactions)
|
|
||||||
return (exp_id != 0 && exp_id < snapshot.back() &&
|
|
||||||
committed(Hints::kExp, engine) && !snapshot.contains(exp_id)) ||
|
|
||||||
(tx_.cre.load() < snapshot.back() && cre_aborted(engine));
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Test this
|
|
||||||
// True if this record is visible for write.
|
|
||||||
// Note that this logic is different from the one above
|
|
||||||
// in the sense that a record is visible if created before
|
|
||||||
// OR DURING this command. this is done to support cypher's
|
|
||||||
// queries which can match, update and return in the same query
|
|
||||||
bool is_visible_write(const tx::Transaction &t) {
|
|
||||||
// fetch expiration info in a safe way (see fetch_exp for details)
|
|
||||||
tx::TransactionId tx_exp;
|
|
||||||
tx::CommandId cmd_exp;
|
|
||||||
std::tie(tx_exp, cmd_exp) = fetch_exp();
|
|
||||||
|
|
||||||
return (tx_.cre == t.id_ && // inserted by the current transaction
|
|
||||||
cmd_.cre <= t.cid() && // before OR DURING this command, and
|
|
||||||
(tx_exp == 0 || // the row has not been deleted, or
|
|
||||||
(tx_exp == t.id_ && // it was deleted by the current
|
|
||||||
// transaction
|
|
||||||
cmd_exp >= t.cid()))); // but not before this command,
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* True if this record is created in the current command
|
|
||||||
* of the given transaction.
|
|
||||||
*/
|
|
||||||
bool is_created_by(const tx::Transaction &t) {
|
|
||||||
return tx_.cre == t.id_ && cmd_.cre == t.cid();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* True if this record is expired in the current command
|
|
||||||
* of the given transaction.
|
|
||||||
*/
|
|
||||||
bool is_expired_by(const tx::Transaction &t) const {
|
|
||||||
return std::make_pair(t.id_, t.cid()) == fetch_exp();
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto &tx() const { return tx_; }
|
|
||||||
const auto &cmd() const { return cmd_; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Makes sure that create and expiry are in sync with hints if they are
|
|
||||||
* committed or aborted and are before the `tx_cutoff`.
|
|
||||||
* `tx_cutoff` exists as a performance optimization to avoid setting hint bits
|
|
||||||
* on records for which we don't need to have a guarantee that they are set as
|
|
||||||
* part of GC hints setting procedure
|
|
||||||
*/
|
|
||||||
void populate_hints(const tx::Engine &engine, tx::TransactionId tx_cutoff) {
|
|
||||||
populate_hint_if_possible(engine, Hints::kCre, tx_cutoff);
|
|
||||||
if (!populate_hint_if_possible(engine, Hints::kExp, tx_cutoff)) {
|
|
||||||
// Exp is aborted and we can't set the hint, this way we don't have to set
|
|
||||||
// the hint because an aborted transaction which expires a record is the
|
|
||||||
// same thing as a non-expired record
|
|
||||||
tx::TransactionId expected;
|
|
||||||
do {
|
|
||||||
expected = tx_.exp;
|
|
||||||
// If the transaction expiry is no longer aborted we don't need to
|
|
||||||
// update it anymore, and hints can't be set since it's obviously an
|
|
||||||
// active transaction - there might be a case where this transaction
|
|
||||||
// gets finished and committed in the meantime and hints could be set,
|
|
||||||
// but since we are not going to delete info for this transaction from
|
|
||||||
// the commit log since it wasn't older than the oldest active
|
|
||||||
// transaction at the time, or before the invocation of this method;
|
|
||||||
// we are in the clear
|
|
||||||
if (!engine.Info(expected).is_aborted()) break;
|
|
||||||
} while (!tx_.exp.compare_exchange_weak(expected, 0));
|
|
||||||
// Ideally we should set the command id as well, but by setting it we
|
|
||||||
// can't guarantee that some new update won't change the transaction id
|
|
||||||
// and command id before we had a chance to set it, and just leaving it
|
|
||||||
// unchanged and relying on all methods to operate on [tx_id: 0, cmd_id:
|
|
||||||
// some cmd] as a non-transaction doesn't seem too crazy
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
/**
|
|
||||||
* Fast indicators if a transaction has committed or aborted. It is possible
|
|
||||||
* the hints do not have that information, in which case the commit log needs
|
|
||||||
* to be consulted (a slower operation).
|
|
||||||
*/
|
|
||||||
class Hints {
|
|
||||||
public:
|
|
||||||
/// Masks for the creation/expration and commit/abort positions.
|
|
||||||
static constexpr uint8_t kCre = 0b0011;
|
|
||||||
static constexpr uint8_t kExp = 0b1100;
|
|
||||||
static constexpr uint8_t kCmt = 0b0101;
|
|
||||||
static constexpr uint8_t kAbt = 0b1010;
|
|
||||||
|
|
||||||
/** Returns true if any bit under the given mask is set. */
|
|
||||||
bool Get(uint8_t mask) const { return bits_ & mask; }
|
|
||||||
|
|
||||||
/** Sets all the bits under the given mask. */
|
|
||||||
void Set(uint8_t mask) { bits_.fetch_or(mask); }
|
|
||||||
|
|
||||||
/** Clears all the bits under the given mask. */
|
|
||||||
void Clear(uint8_t mask) { bits_.fetch_and(~mask); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::atomic<uint8_t> bits_{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename TId>
|
|
||||||
struct CreExp {
|
|
||||||
std::atomic<TId> cre{0};
|
|
||||||
std::atomic<TId> exp{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
// tx.cre is the id of the transaction that created the record
|
|
||||||
// and tx.exp is the id of the transaction that deleted the record
|
|
||||||
// These values are used to determine the visibility of the record
|
|
||||||
// to the current transaction.
|
|
||||||
CreExp<tx::TransactionId> tx_;
|
|
||||||
|
|
||||||
// cmd.cre is the id of the command in this transaction that created the
|
|
||||||
// record and cmd.exp is the id of the command in this transaction that
|
|
||||||
// deleted the record. These values are used to determine the visibility
|
|
||||||
// of the record to the current command in the running transaction.
|
|
||||||
CreExp<tx::CommandId> cmd_;
|
|
||||||
|
|
||||||
mutable Hints hints_;
|
|
||||||
/** Fetch the (transaction, command) expiration before the check
|
|
||||||
* because they can be concurrently modified by multiple transactions.
|
|
||||||
* Do it in a loop to ensure that command is consistent with transaction.
|
|
||||||
*/
|
|
||||||
auto fetch_exp() const {
|
|
||||||
tx::TransactionId tx_exp;
|
|
||||||
tx::CommandId cmd_exp;
|
|
||||||
do {
|
|
||||||
tx_exp = tx_.exp;
|
|
||||||
cmd_exp = cmd_.exp;
|
|
||||||
} while (tx_exp != tx_.exp);
|
|
||||||
return std::make_pair(tx_exp, cmd_exp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Populates hint if it is not set for the given create/expiry mask and is
|
|
||||||
* before the `tx_cutoff` if specified. Note that it doesn't set hint bits for
|
|
||||||
* expiry transactions which abort because it's too expensive to maintain
|
|
||||||
* correctness of those hints with regards to race conditions
|
|
||||||
* @returns - true if hints are now equal to transaction status
|
|
||||||
* (committed/aborted), will only be false if we are trying to set hint for
|
|
||||||
* aborted transaction which is this records expiry
|
|
||||||
*/
|
|
||||||
bool populate_hint_if_possible(
|
|
||||||
const tx::Engine &engine, const uint8_t mask,
|
|
||||||
const std::optional<tx::TransactionId> tx_cutoff = std::nullopt) const {
|
|
||||||
DCHECK(mask == Hints::kCre || mask == Hints::kExp)
|
|
||||||
<< "Mask should be either for creation or expiration";
|
|
||||||
if (hints_.Get(mask)) return true;
|
|
||||||
auto id = mask == Hints::kCre ? tx_.cre.load() : tx_.exp.load();
|
|
||||||
// Nothing to do here if there is no id or id is larger than tx_cutoff
|
|
||||||
if (!id || (tx_cutoff && id >= *tx_cutoff)) return true;
|
|
||||||
auto info = engine.Info(id);
|
|
||||||
if (info.is_committed()) {
|
|
||||||
hints_.Set(mask & Hints::kCmt);
|
|
||||||
} else if (info.is_aborted()) {
|
|
||||||
// Abort hints can only be updated for creation hints because only one
|
|
||||||
// transaction can be creating a single record, so there is no races
|
|
||||||
if (mask == Hints::kCre)
|
|
||||||
hints_.Set(mask & Hints::kAbt);
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Check if the transaciton `id` has comitted before `t` started
|
|
||||||
* (that means that edits done by transaction `id` are visible in `t`)
|
|
||||||
*
|
|
||||||
* Evaluates to true if that transaction has committed,
|
|
||||||
* it started before `t` and it's not in it's snapshot.
|
|
||||||
*
|
|
||||||
* about transactions commit/abort status
|
|
||||||
* @param mask - Hint bits mask (either Hints::kCre or Hints::kExp).
|
|
||||||
* @param id - id to check if it's commited and visible
|
|
||||||
* @return true if the id is commited and visible for the transaction t.
|
|
||||||
*/
|
|
||||||
bool visible_from(uint8_t mask, tx::TransactionId id,
|
|
||||||
const tx::Transaction &t) {
|
|
||||||
DCHECK(mask == Hints::kCre || mask == Hints::kExp)
|
|
||||||
<< "Mask must be either kCre or kExp";
|
|
||||||
// Dominik Gleich says 4 april 2017: the tests in this routine are correct;
|
|
||||||
// if you think they're not, you're wrong, and you should think about it
|
|
||||||
// again. I know, it happened to me (and also to Matej Gradicek).
|
|
||||||
|
|
||||||
// You certainly can't see the transaction with id greater than yours as
|
|
||||||
// that means it started after this transaction and if it commited, it
|
|
||||||
// commited after this transaction has started.
|
|
||||||
if (id >= t.id_) return false;
|
|
||||||
|
|
||||||
// The creating transaction is still in progress (examine snapshot)
|
|
||||||
if (t.snapshot().contains(id)) return false;
|
|
||||||
|
|
||||||
return committed(mask, t.engine_);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Check if the transaction with the given `id` is committed.
|
|
||||||
*
|
|
||||||
* @param mask - Hint bits mask (either Hints::kCre or Hints::kExp).
|
|
||||||
* @param id - id to check if commited
|
|
||||||
|
|
||||||
* statuses
|
|
||||||
* @return true if it's commited, false otherwise
|
|
||||||
*/
|
|
||||||
bool committed(uint8_t mask, const tx::Engine &engine) const {
|
|
||||||
DCHECK(mask == Hints::kCre || mask == Hints::kExp)
|
|
||||||
<< "Mask must be either kCre or kExp";
|
|
||||||
populate_hint_if_possible(engine, mask);
|
|
||||||
return hints_.Get(Hints::kCmt & mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - Check if tx_.cre is aborted. If you need to check for exp
|
|
||||||
* transaction do it manually by looking at commit log. This function can't do
|
|
||||||
* that for you since hints can't be used for exp transaction (reason is
|
|
||||||
* described in function above).
|
|
||||||
*
|
|
||||||
* @param engine - engine instance with information about transaction
|
|
||||||
* statuses
|
|
||||||
* @return true if it's aborted, false otherwise
|
|
||||||
*/
|
|
||||||
bool cre_aborted(const tx::Engine &engine) const {
|
|
||||||
// Populate hints if not set and return result from hints
|
|
||||||
DCHECK(populate_hint_if_possible(engine, Hints::kCre))
|
|
||||||
<< "Hints not populated";
|
|
||||||
return hints_.Get(Hints::kAbt & Hints::kCre);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} // namespace mvcc
|
|
@ -1,261 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "storage/common/locking/record_lock.hpp"
|
|
||||||
#include "storage/common/mvcc/exceptions.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
#include "utils/exceptions.hpp"
|
|
||||||
|
|
||||||
namespace mvcc {
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
class VersionList {
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* @brief Constructor that is used to insert one item into VersionList.
|
|
||||||
*
|
|
||||||
* @param t - transaction
|
|
||||||
* @param gid - Version list identifier. Uniqueness guaranteed by the code
|
|
||||||
* creating this version list.
|
|
||||||
* @param args - args forwarded to constructor of item T (for
|
|
||||||
* creating the first Record (Version) in this VersionList.
|
|
||||||
*/
|
|
||||||
template <typename... Args>
|
|
||||||
VersionList(const tx::Transaction &t, storage::Gid gid, Args &&... args)
|
|
||||||
: gid_(gid) {
|
|
||||||
// TODO replace 'new' with something better
|
|
||||||
auto *v1 = new T(std::forward<Args>(args)...);
|
|
||||||
v1->mark_created(t);
|
|
||||||
head_ = v1;
|
|
||||||
}
|
|
||||||
|
|
||||||
VersionList() = delete;
|
|
||||||
VersionList(const VersionList &) = delete;
|
|
||||||
VersionList &operator=(const VersionList &) = delete;
|
|
||||||
// We do a lot of raw-pointer ops with VLists, and these ops assume that a
|
|
||||||
// VList's address identifies a vertex/edge absolutely and during it's whole
|
|
||||||
// lifteme. We also assume that the VList owner is the database and that
|
|
||||||
// ownership is also handled via raw pointers so this shouldn't be moved or
|
|
||||||
// move assigned.
|
|
||||||
VersionList(VersionList &&other) = delete;
|
|
||||||
VersionList &operator=(VersionList &&other) = delete;
|
|
||||||
|
|
||||||
~VersionList() { delete head_.load(); }
|
|
||||||
|
|
||||||
friend std::ostream &operator<<(std::ostream &stream,
|
|
||||||
const VersionList<T> &vlist) {
|
|
||||||
stream << "VersionList" << std::endl;
|
|
||||||
|
|
||||||
T *record = vlist.head_;
|
|
||||||
|
|
||||||
while (record != nullptr) {
|
|
||||||
stream << "-- " << *record << std::endl;
|
|
||||||
record = record->next();
|
|
||||||
}
|
|
||||||
|
|
||||||
return stream;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Garbage collects records that are not reachable/visible anymore.
|
|
||||||
*
|
|
||||||
* Relinks this version-list so that garbage collected records are no
|
|
||||||
* longer reachable through this version list.
|
|
||||||
* Visibility is defined in mvcc::Record::is_not_visible_from,
|
|
||||||
* to which the given `snapshot` is passed.
|
|
||||||
*
|
|
||||||
* This method is NOT thread-safe.
|
|
||||||
*
|
|
||||||
* @param snapshot - the GC snapshot. Consists of the oldest active
|
|
||||||
* transaction's snapshot, with that transaction's id appened as last.
|
|
||||||
* @param engine - transaction engine to use - we need it to check which
|
|
||||||
* records were commited and which weren't
|
|
||||||
* @return pair<status, to_delete>; status is true - If version list is empty
|
|
||||||
* after garbage collection. to_delete points to the newest record that is not
|
|
||||||
* visible anymore. If none exists to_delete will point to nullptr.
|
|
||||||
*/
|
|
||||||
std::pair<bool, T *> GcDeleted(const tx::Snapshot &snapshot,
|
|
||||||
const tx::Engine &engine) {
|
|
||||||
// nullptr
|
|
||||||
// |
|
|
||||||
// [v1] ... all of this gets deleted!
|
|
||||||
// |
|
|
||||||
// [v2] <------+ head_of_deletable_records
|
|
||||||
// | |
|
|
||||||
// [v3] <------+ oldest_visible_record
|
|
||||||
// | | Jump backwards until you find the oldest visible
|
|
||||||
// [VerList] ----+ record, or you reach the end of the list
|
|
||||||
//
|
|
||||||
|
|
||||||
T *head = head_;
|
|
||||||
T *current = head;
|
|
||||||
T *oldest_visible_record = nullptr;
|
|
||||||
while (current) {
|
|
||||||
// Populate hints only when needed to avoid excessive rpc calls on
|
|
||||||
// workers.
|
|
||||||
// snapshot.back() corresponds to the oldest active transaction,
|
|
||||||
// and this makes it set only hint bits when the creating or expiring
|
|
||||||
// transaction of a record is older than that)
|
|
||||||
current->populate_hints(engine, snapshot.back());
|
|
||||||
if (!current->is_not_visible_from(snapshot, engine))
|
|
||||||
oldest_visible_record = current;
|
|
||||||
current = current->next();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (oldest_visible_record) {
|
|
||||||
T *head_of_deletable_records = oldest_visible_record->next();
|
|
||||||
// oldest_visible_record might be visible to some transaction but
|
|
||||||
// head_of_deletable_records is not and will never be visted by the find
|
|
||||||
// function and as such doesn't represent pointer invalidation
|
|
||||||
// race-condition risk.
|
|
||||||
oldest_visible_record->next(nullptr); // No transaction will look
|
|
||||||
// further than this record and
|
|
||||||
// that's why it's safe to set
|
|
||||||
// next to nullptr.
|
|
||||||
// Calling destructor of head_of_deletable_records will clean everything
|
|
||||||
// older than this record since they are called recursively.
|
|
||||||
return std::make_pair(false, head_of_deletable_records);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This can happen only if the head points to a expired record. Since there
|
|
||||||
// is no visible records in this version_list we can remove it.
|
|
||||||
head_ = nullptr;
|
|
||||||
// This is safe to return as ready for deletion since we unlinked head
|
|
||||||
// above and this will only be deleted after the last active transaction
|
|
||||||
// ends.
|
|
||||||
return std::make_pair(true, head);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief - returns oldest record
|
|
||||||
* @return nullptr if none exist
|
|
||||||
*/
|
|
||||||
T *Oldest() {
|
|
||||||
T *r = head_;
|
|
||||||
while (r && r->next(std::memory_order_seq_cst))
|
|
||||||
r = r->next(std::memory_order_seq_cst);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
T *find(const tx::Transaction &t) {
|
|
||||||
T *r = head_;
|
|
||||||
|
|
||||||
// nullptr
|
|
||||||
// |
|
|
||||||
// [v1] ...
|
|
||||||
// |
|
|
||||||
// [v2] <------+
|
|
||||||
// | |
|
|
||||||
// [v3] <------+
|
|
||||||
// | | Jump backwards until you find a first visible
|
|
||||||
// [VerList] ----+ version, or you reach the end of the list
|
|
||||||
//
|
|
||||||
while (r != nullptr && !r->visible(t))
|
|
||||||
r = r->next(std::memory_order_seq_cst);
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks for and sets two versions. The 'old' version is the
|
|
||||||
* newest version that is visible by the current transaction+command,
|
|
||||||
* but has not been created by it. The 'new' version is the version
|
|
||||||
* that has been created by current transaction+command.
|
|
||||||
*
|
|
||||||
* It is possible that both, either or neither are found:
|
|
||||||
* - both are found when an existing record has been modified
|
|
||||||
* - only old is found when an existing record has not been modified
|
|
||||||
* - only new is found when the whole vlist was created
|
|
||||||
* - neither is found when for example the record has been deleted but not
|
|
||||||
* garbage collected yet
|
|
||||||
*
|
|
||||||
* @param t The transaction
|
|
||||||
*/
|
|
||||||
void find_set_old_new(const tx::Transaction &t, T **old_ref, T **new_ref) {
|
|
||||||
// assume that the sought old record is further down the list
|
|
||||||
// from new record, so that if we found old we can stop looking
|
|
||||||
*new_ref = nullptr;
|
|
||||||
*old_ref = head_;
|
|
||||||
while (*old_ref != nullptr && !(*old_ref)->visible(t)) {
|
|
||||||
if (!*new_ref && (*old_ref)->is_created_by(t)) *new_ref = *old_ref;
|
|
||||||
*old_ref = (*old_ref)->next(std::memory_order_seq_cst);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks for the first visible record seen by this transaction. If the current
|
|
||||||
* transaction has already created new record in the current command then that
|
|
||||||
* record is returned, else first older visible record is updated. New record
|
|
||||||
* becomes head of the version list and it is returned. There should always be
|
|
||||||
* older visible record when this update is called.
|
|
||||||
*
|
|
||||||
* @param t The transaction
|
|
||||||
*/
|
|
||||||
T *update(const tx::Transaction &t) {
|
|
||||||
DCHECK(head_ != nullptr) << "Head is nullptr on update.";
|
|
||||||
T *old_record = nullptr;
|
|
||||||
T *new_record = nullptr;
|
|
||||||
find_set_old_new(t, &old_record, &new_record);
|
|
||||||
|
|
||||||
// check if current transaction in current cmd has
|
|
||||||
// already updated version list
|
|
||||||
if (new_record) return new_record;
|
|
||||||
|
|
||||||
// check if we found any visible records
|
|
||||||
CHECK(old_record != nullptr) << "Updating nullptr record";
|
|
||||||
|
|
||||||
return update(old_record, t);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Makes the given record as being expired by the given transaction. */
|
|
||||||
void remove(T *record, const tx::Transaction &t) {
|
|
||||||
DCHECK(record != nullptr) << "Record is nullptr on removal.";
|
|
||||||
lock_and_validate(record, t);
|
|
||||||
record->mark_expired(t);
|
|
||||||
}
|
|
||||||
|
|
||||||
const storage::Gid gid_;
|
|
||||||
|
|
||||||
int64_t cypher_id() { return gid_.AsInt(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
void lock_and_validate(T *record, const tx::Transaction &t) {
|
|
||||||
DCHECK(record != nullptr) << "Record is nullptr on lock and validation.";
|
|
||||||
|
|
||||||
// take a lock on this node
|
|
||||||
t.TakeLock(lock_);
|
|
||||||
|
|
||||||
// if the record hasn't been deleted yet or the deleting transaction
|
|
||||||
// has aborted, it's ok to modify it
|
|
||||||
if (!record->tx().exp || !record->exp_committed(t.engine_)) return;
|
|
||||||
|
|
||||||
// if it committed, then we have a serialization conflict
|
|
||||||
throw SerializationError();
|
|
||||||
}
|
|
||||||
|
|
||||||
T *update(T *record, const tx::Transaction &t) {
|
|
||||||
DCHECK(record != nullptr) << "Record is nullptr on update.";
|
|
||||||
lock_and_validate(record, t);
|
|
||||||
|
|
||||||
// It could be done with unique_ptr but while this could mean memory
|
|
||||||
// leak on exception, unique_ptr could mean use after free. Memory
|
|
||||||
// leak is less dangerous.
|
|
||||||
auto *updated = record->CloneData();
|
|
||||||
|
|
||||||
updated->mark_created(t);
|
|
||||||
record->mark_expired(t);
|
|
||||||
|
|
||||||
// Updated version should point to the latest available version. Older
|
|
||||||
// versions that can be deleted will be removed during the GC phase.
|
|
||||||
updated->next(head_.load(), std::memory_order_seq_cst);
|
|
||||||
|
|
||||||
// Store the updated version as the first version point to by head.
|
|
||||||
head_.store(updated, std::memory_order_seq_cst);
|
|
||||||
|
|
||||||
return updated;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::atomic<T *> head_{nullptr};
|
|
||||||
RecordLock lock_;
|
|
||||||
};
|
|
||||||
} // namespace mvcc
|
|
@ -1,174 +0,0 @@
|
|||||||
#include "storage/single_node_ha/record_accessor.hpp"
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
#include "storage/single_node_ha/edge.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex.hpp"
|
|
||||||
|
|
||||||
using database::StateDelta;
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
RecordAccessor<TRecord>::RecordAccessor(mvcc::VersionList<TRecord> *address,
|
|
||||||
database::GraphDbAccessor &db_accessor)
|
|
||||||
: db_accessor_(&db_accessor), address_(address) {}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
PropertyValue RecordAccessor<TRecord>::PropsAt(storage::Property key) const {
|
|
||||||
return current().properties_.at(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RecordAccessor<Vertex>::PropsSet(storage::Property key,
|
|
||||||
PropertyValue value) {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
auto delta = StateDelta::PropsSetVertex(dba.transaction_id(), gid(), key,
|
|
||||||
dba.PropertyName(key), value);
|
|
||||||
auto previous_value = PropsAt(key);
|
|
||||||
update().properties_.set(key, value);
|
|
||||||
dba.UpdateOnAddProperty(key, previous_value, value, *this, &update());
|
|
||||||
dba.sd_buffer()->Emplace(delta);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RecordAccessor<Edge>::PropsSet(storage::Property key,
|
|
||||||
PropertyValue value) {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
auto delta = StateDelta::PropsSetEdge(dba.transaction_id(), gid(), key,
|
|
||||||
dba.PropertyName(key), value);
|
|
||||||
|
|
||||||
update().properties_.set(key, value);
|
|
||||||
dba.sd_buffer()->Emplace(delta);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RecordAccessor<Vertex>::PropsErase(storage::Property key) {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
auto delta =
|
|
||||||
StateDelta::PropsSetVertex(dba.transaction_id(), gid(), key,
|
|
||||||
dba.PropertyName(key), PropertyValue());
|
|
||||||
auto previous_value = PropsAt(key);
|
|
||||||
update().properties_.set(key, PropertyValue());
|
|
||||||
dba.UpdateOnRemoveProperty(key, previous_value, *this, &update());
|
|
||||||
dba.sd_buffer()->Emplace(delta);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void RecordAccessor<Edge>::PropsErase(storage::Property key) {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
auto delta =
|
|
||||||
StateDelta::PropsSetEdge(dba.transaction_id(), gid(), key,
|
|
||||||
dba.PropertyName(key), PropertyValue());
|
|
||||||
update().properties_.set(key, PropertyValue());
|
|
||||||
dba.sd_buffer()->Emplace(delta);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
void RecordAccessor<TRecord>::PropsClear() {
|
|
||||||
std::vector<storage::Property> to_remove;
|
|
||||||
for (const auto &kv : update().properties_) to_remove.emplace_back(kv.first);
|
|
||||||
for (const auto &prop : to_remove) {
|
|
||||||
PropsErase(prop);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
const PropertyValueStore &RecordAccessor<TRecord>::Properties() const {
|
|
||||||
return current().properties_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
bool RecordAccessor<TRecord>::operator==(const RecordAccessor &other) const {
|
|
||||||
DCHECK(db_accessor_->transaction_id() == other.db_accessor_->transaction_id())
|
|
||||||
<< "Not in the same transaction.";
|
|
||||||
return address_ == other.address_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
database::GraphDbAccessor &RecordAccessor<TRecord>::db_accessor() const {
|
|
||||||
return *db_accessor_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
storage::Gid RecordAccessor<TRecord>::gid() const {
|
|
||||||
return address_->gid_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
typename mvcc::VersionList<TRecord> *RecordAccessor<TRecord>::address() const {
|
|
||||||
return address_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
RecordAccessor<TRecord> &RecordAccessor<TRecord>::SwitchNew() {
|
|
||||||
if (!new_) {
|
|
||||||
// if new_ is not set yet, look for it
|
|
||||||
// we can just Reconstruct the pointers, old_ will get initialized
|
|
||||||
// to the same value as it has now, and the amount of work is the
|
|
||||||
// same as just looking for a new_ record
|
|
||||||
if (!Reconstruct())
|
|
||||||
DLOG(FATAL)
|
|
||||||
<< "RecordAccessor::SwitchNew - accessor invalid after Reconstruct";
|
|
||||||
}
|
|
||||||
current_ = new_ ? new_ : old_;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
RecordAccessor<TRecord> &RecordAccessor<TRecord>::SwitchOld() {
|
|
||||||
current_ = old_ ? old_ : new_;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
bool RecordAccessor<TRecord>::Reconstruct() const {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
const auto &addr = address();
|
|
||||||
addr->find_set_old_new(dba.transaction(), &old_, &new_);
|
|
||||||
current_ = old_ ? old_ : new_;
|
|
||||||
return old_ != nullptr || new_ != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
TRecord &RecordAccessor<TRecord>::update() const {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
// Edges have lazily initialize mutable, versioned data (properties).
|
|
||||||
if (std::is_same<TRecord, Edge>::value && current_ == nullptr) {
|
|
||||||
bool reconstructed = Reconstruct();
|
|
||||||
DCHECK(reconstructed) << "Unable to initialize record";
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto &t = dba.transaction();
|
|
||||||
if (!new_ && old_->is_expired_by(t))
|
|
||||||
throw RecordDeletedError();
|
|
||||||
else if (new_ && new_->is_expired_by(t))
|
|
||||||
throw RecordDeletedError();
|
|
||||||
|
|
||||||
if (new_) return *new_;
|
|
||||||
|
|
||||||
const auto &addr = address();
|
|
||||||
new_ = addr->update(dba.transaction());
|
|
||||||
|
|
||||||
DCHECK(new_ != nullptr) << "RecordAccessor.new_ is null after update";
|
|
||||||
return *new_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
int64_t RecordAccessor<TRecord>::CypherId() const {
|
|
||||||
return address()->cypher_id();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename TRecord>
|
|
||||||
const TRecord &RecordAccessor<TRecord>::current() const {
|
|
||||||
// Edges have lazily initialize mutable, versioned data (properties).
|
|
||||||
if (std::is_same<TRecord, Edge>::value && current_ == nullptr) {
|
|
||||||
bool reconstructed = Reconstruct();
|
|
||||||
DCHECK(reconstructed) << "Unable to initialize record";
|
|
||||||
}
|
|
||||||
DCHECK(current_ != nullptr) << "RecordAccessor.current_ pointer is nullptr";
|
|
||||||
return *current_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template class RecordAccessor<Vertex>;
|
|
||||||
template class RecordAccessor<Edge>;
|
|
@ -1,204 +0,0 @@
|
|||||||
/// @file
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/common/types/property_value.hpp"
|
|
||||||
#include "storage/common/types/property_value_store.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
class GraphDbAccessor;
|
|
||||||
struct StateDelta;
|
|
||||||
}; // namespace database
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An accessor to a database record (an Edge or a Vertex).
|
|
||||||
*
|
|
||||||
* Exposes view and update functions to the client programmer.
|
|
||||||
* Assumes responsibility of doing all the relevant book-keeping
|
|
||||||
* (such as index updates etc).
|
|
||||||
*
|
|
||||||
* @tparam TRecord Type of record (MVCC Version) of the accessor.
|
|
||||||
*/
|
|
||||||
template <typename TRecord>
|
|
||||||
class RecordAccessor {
|
|
||||||
protected:
|
|
||||||
/**
|
|
||||||
* The database::GraphDbAccessor is friend to this accessor so it can
|
|
||||||
* operate on it's data (mvcc version-list and the record itself).
|
|
||||||
* This is legitimate because database::GraphDbAccessor creates
|
|
||||||
* RecordAccessors
|
|
||||||
* and is semantically their parent/owner. It is necessary because
|
|
||||||
* the database::GraphDbAccessor handles insertions and deletions, and these
|
|
||||||
* operations modify data intensively.
|
|
||||||
*/
|
|
||||||
friend database::GraphDbAccessor;
|
|
||||||
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* @param address Address (local or global) of the Vertex/Edge of this
|
|
||||||
* accessor.
|
|
||||||
* @param db_accessor The DB accessor that "owns" this record accessor.
|
|
||||||
*/
|
|
||||||
RecordAccessor(mvcc::VersionList<TRecord> *address, database::GraphDbAccessor &db_accessor);
|
|
||||||
|
|
||||||
// this class is default copyable, movable and assignable
|
|
||||||
RecordAccessor(const RecordAccessor &other) = default;
|
|
||||||
RecordAccessor(RecordAccessor &&other) = default;
|
|
||||||
RecordAccessor &operator=(const RecordAccessor &other) = default;
|
|
||||||
RecordAccessor &operator=(RecordAccessor &&other) = default;
|
|
||||||
|
|
||||||
/** Gets the property for the given key. */
|
|
||||||
PropertyValue PropsAt(storage::Property key) const;
|
|
||||||
|
|
||||||
/** Sets a value on the record for the given property. */
|
|
||||||
void PropsSet(storage::Property key, PropertyValue value);
|
|
||||||
|
|
||||||
/** Erases the property for the given key. */
|
|
||||||
void PropsErase(storage::Property key);
|
|
||||||
|
|
||||||
/** Removes all the properties from this record. */
|
|
||||||
void PropsClear();
|
|
||||||
|
|
||||||
/** Returns the properties of this record. */
|
|
||||||
const PropertyValueStore &Properties() const;
|
|
||||||
|
|
||||||
bool operator==(const RecordAccessor &other) const;
|
|
||||||
|
|
||||||
bool operator!=(const RecordAccessor &other) const {
|
|
||||||
return !(*this == other);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns a GraphDB accessor of this record accessor. */
|
|
||||||
database::GraphDbAccessor &db_accessor() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a globally-unique ID of this vertex or edge. Note that vertices
|
|
||||||
* and edges have separate ID domains, there can be a vertex with ID X and an
|
|
||||||
* edge with the same id.
|
|
||||||
*/
|
|
||||||
storage::Gid gid() const;
|
|
||||||
|
|
||||||
mvcc::VersionList<TRecord> *address() const;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Switches this record accessor to use the latest version visible to the
|
|
||||||
* current transaction+command. Possibly the one that was created by this
|
|
||||||
* transaction+command.
|
|
||||||
*
|
|
||||||
* @return A reference to this.
|
|
||||||
*/
|
|
||||||
RecordAccessor<TRecord> &SwitchNew();
|
|
||||||
|
|
||||||
/** Returns the new record pointer. */
|
|
||||||
TRecord *GetNew() const { return new_; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Attempts to switch this accessor to use the latest version not updated by
|
|
||||||
* the current transaction+command. If that is not possible (vertex/edge was
|
|
||||||
* created by the current transaction/command), it does nothing (current
|
|
||||||
* remains pointing to the new version).
|
|
||||||
*
|
|
||||||
* @return A reference to this.
|
|
||||||
*/
|
|
||||||
RecordAccessor<TRecord> &SwitchOld();
|
|
||||||
|
|
||||||
/** Returns the old record pointer. */
|
|
||||||
TRecord *GetOld() const { return old_; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reconstructs the internal state of the record accessor so it uses the
|
|
||||||
* versions appropriate to this transaction+command.
|
|
||||||
*
|
|
||||||
* @return True if this accessor is valid after reconstruction. This means
|
|
||||||
* that at least one record pointer was found (either new_ or old_), possibly
|
|
||||||
* both.
|
|
||||||
*/
|
|
||||||
bool Reconstruct() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ensures there is an updateable version of the record in the version_list,
|
|
||||||
* and that the `new_` pointer points to it. Returns a reference to that
|
|
||||||
* version.
|
|
||||||
*
|
|
||||||
* It is not legal to call this function on a Vertex/Edge that has been
|
|
||||||
* deleted in the current transaction+command.
|
|
||||||
*
|
|
||||||
* @throws RecordDeletedError
|
|
||||||
*/
|
|
||||||
TRecord &update() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the given accessor is visible to the given transaction.
|
|
||||||
*
|
|
||||||
* @param current_state If true then the graph state for the
|
|
||||||
* current transaction+command is returned (insertions, updates and
|
|
||||||
* deletions performed in the current transaction+command are not
|
|
||||||
* ignored).
|
|
||||||
*/
|
|
||||||
bool Visible(const tx::Transaction &t, bool current_state) const {
|
|
||||||
return (old_ && !(current_state && old_->is_expired_by(t))) ||
|
|
||||||
(current_state && new_ && !new_->is_expired_by(t));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns Cypher Id of this record.
|
|
||||||
*/
|
|
||||||
int64_t CypherId() const;
|
|
||||||
|
|
||||||
/** Returns the current version (either new_ or old_) set on this
|
|
||||||
* RecordAccessor. */
|
|
||||||
const TRecord ¤t() const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
/**
|
|
||||||
* Pointer to the version (either old_ or new_) that READ operations
|
|
||||||
* in the accessor should take data from. Note that WRITE operations
|
|
||||||
* should always use new_.
|
|
||||||
*
|
|
||||||
* This pointer can be null if created by an accessor which lazily reads from
|
|
||||||
* mvcc.
|
|
||||||
*/
|
|
||||||
mutable TRecord *current_{nullptr};
|
|
||||||
|
|
||||||
private:
|
|
||||||
// The database accessor for which this record accessor is created
|
|
||||||
// Provides means of getting to the transaction and database functions.
|
|
||||||
// Immutable, set in the constructor and never changed.
|
|
||||||
database::GraphDbAccessor *db_accessor_;
|
|
||||||
|
|
||||||
mvcc::VersionList<TRecord> *address_;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Latest version which is visible to the current transaction+command
|
|
||||||
* but has not been created nor modified by the current transaction+command.
|
|
||||||
*
|
|
||||||
* Can be null only when the record itself (the version-list) has
|
|
||||||
* been created by the current transaction+command.
|
|
||||||
*/
|
|
||||||
mutable TRecord *old_{nullptr};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Version that has been modified (created or updated) by the current
|
|
||||||
* transaction+command.
|
|
||||||
*
|
|
||||||
* Can be null when the record has not been modified in the current
|
|
||||||
* transaction+command. It is also possible that the modification
|
|
||||||
* has happened, but this RecordAccessor does not know this. To
|
|
||||||
* ensure correctness, the `SwitchNew` function must check if this
|
|
||||||
* is null, and if it is it must check with the vlist_ if there is
|
|
||||||
* an update.
|
|
||||||
*/
|
|
||||||
mutable TRecord *new_{nullptr};
|
|
||||||
};
|
|
||||||
|
|
||||||
/** Error when trying to update a deleted record */
|
|
||||||
class RecordDeletedError : public utils::BasicException {
|
|
||||||
public:
|
|
||||||
RecordDeletedError()
|
|
||||||
: utils::BasicException(
|
|
||||||
"Can't update a record deleted in the current transaction+commad") {
|
|
||||||
}
|
|
||||||
};
|
|
@ -1,47 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
|
|
||||||
namespace storage {
|
|
||||||
|
|
||||||
class StateDeltaBuffer final {
|
|
||||||
public:
|
|
||||||
/// Inserts a new StateDelta in buffer.
|
|
||||||
void Emplace(const database::StateDelta &delta) {
|
|
||||||
tx::TransactionId tx_id = delta.transaction_id;
|
|
||||||
std::vector<database::StateDelta> *curr_buffer;
|
|
||||||
{
|
|
||||||
// We only need the lock when we're inserting a new key into the buffer.
|
|
||||||
std::lock_guard<std::mutex> lock(buffer_lock_);
|
|
||||||
curr_buffer = &buffer_[tx_id];
|
|
||||||
}
|
|
||||||
curr_buffer->emplace_back(delta);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retrieves all buffered StateDeltas for a given transaction id.
|
|
||||||
/// If there are no such StateDeltas, the return vector is empty.
|
|
||||||
std::vector<database::StateDelta> GetDeltas(
|
|
||||||
const tx::TransactionId &tx_id) {
|
|
||||||
std::vector<database::StateDelta> *curr_buffer;
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(buffer_lock_);
|
|
||||||
auto it = buffer_.find(tx_id);
|
|
||||||
if (it == buffer_.end()) return {};
|
|
||||||
curr_buffer = &it->second;
|
|
||||||
}
|
|
||||||
return *curr_buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Deletes all buffered StateDeltas for a given transaction id.
|
|
||||||
void Erase(const tx::TransactionId &tx_id) {
|
|
||||||
std::lock_guard<std::mutex> lock(buffer_lock_);
|
|
||||||
buffer_.erase(tx_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
mutable std::mutex buffer_lock_;
|
|
||||||
std::unordered_map<tx::TransactionId, std::vector<database::StateDelta>>
|
|
||||||
buffer_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace storage
|
|
@ -1,102 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <filesystem>
|
|
||||||
#include <optional>
|
|
||||||
|
|
||||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
|
||||||
#include "kvstore/kvstore.hpp"
|
|
||||||
#include "storage/common/constraints/unique_constraints.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/single_node_ha/edge.hpp"
|
|
||||||
#include "storage/single_node_ha/indexes/key_index.hpp"
|
|
||||||
#include "storage/single_node_ha/indexes/label_property_index.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
class GraphDb;
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
/** A data structure containing the main data members of a graph database. */
|
|
||||||
class Storage {
|
|
||||||
public:
|
|
||||||
explicit Storage(const std::vector<std::string> &properties_on_disk)
|
|
||||||
: properties_on_disk_{properties_on_disk} {}
|
|
||||||
|
|
||||||
public:
|
|
||||||
~Storage() {
|
|
||||||
// Delete vertices and edges which weren't collected before, also deletes
|
|
||||||
// records inside version list
|
|
||||||
for (auto &id_vlist : vertices_.access()) delete id_vlist.second;
|
|
||||||
for (auto &id_vlist : edges_.access()) delete id_vlist.second;
|
|
||||||
}
|
|
||||||
|
|
||||||
Storage(const Storage &) = delete;
|
|
||||||
Storage(Storage &&) = delete;
|
|
||||||
Storage &operator=(const Storage &) = delete;
|
|
||||||
Storage &operator=(Storage &&) = delete;
|
|
||||||
|
|
||||||
storage::GidGenerator &VertexGenerator() { return vertex_generator_; }
|
|
||||||
storage::GidGenerator &EdgeGenerator() { return edge_generator_; }
|
|
||||||
LabelPropertyIndex &label_property_index() { return label_property_index_; }
|
|
||||||
|
|
||||||
/// Gets the local address for the given gid. Fails if not present.
|
|
||||||
template <typename TRecord>
|
|
||||||
mvcc::VersionList<TRecord> *LocalAddress(storage::Gid gid) const {
|
|
||||||
const auto &map = GetMap<TRecord>();
|
|
||||||
auto access = map.access();
|
|
||||||
auto found = access.find(gid);
|
|
||||||
CHECK(found != access.end())
|
|
||||||
<< "Failed to find "
|
|
||||||
<< (std::is_same<TRecord, Vertex>::value ? "vertex" : "edge")
|
|
||||||
<< " for gid: " << gid.AsUint();
|
|
||||||
return found->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gets names of properties stored on disk
|
|
||||||
std::vector<std::string> &PropertiesOnDisk() { return properties_on_disk_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
friend class GraphDbAccessor;
|
|
||||||
// Needed for GraphDb::RefreshStat.
|
|
||||||
friend class GraphDb;
|
|
||||||
friend class StorageGc;
|
|
||||||
|
|
||||||
storage::GidGenerator vertex_generator_;
|
|
||||||
storage::GidGenerator edge_generator_;
|
|
||||||
|
|
||||||
// main storage for the graph
|
|
||||||
ConcurrentMap<storage::Gid, mvcc::VersionList<Vertex> *> vertices_;
|
|
||||||
ConcurrentMap<storage::Gid, mvcc::VersionList<Edge> *> edges_;
|
|
||||||
|
|
||||||
// indexes
|
|
||||||
KeyIndex<storage::Label, Vertex> labels_index_;
|
|
||||||
LabelPropertyIndex label_property_index_;
|
|
||||||
|
|
||||||
// unique constraints
|
|
||||||
storage::constraints::UniqueConstraints unique_constraints_;
|
|
||||||
|
|
||||||
std::vector<std::string> properties_on_disk_;
|
|
||||||
|
|
||||||
/// Gets the Vertex/Edge main storage map.
|
|
||||||
template <typename TRecord>
|
|
||||||
const ConcurrentMap<storage::Gid, mvcc::VersionList<TRecord> *> &GetMap()
|
|
||||||
const;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
inline const ConcurrentMap<storage::Gid, mvcc::VersionList<Vertex> *>
|
|
||||||
&Storage::GetMap() const {
|
|
||||||
return vertices_;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
inline const ConcurrentMap<storage::Gid, mvcc::VersionList<Edge> *>
|
|
||||||
&Storage::GetMap() const {
|
|
||||||
return edges_;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace database
|
|
@ -1,174 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
#include <queue>
|
|
||||||
|
|
||||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
|
||||||
#include "raft/raft_server.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/single_node_ha/deferred_deleter.hpp"
|
|
||||||
#include "storage/single_node_ha/edge.hpp"
|
|
||||||
#include "storage/single_node_ha/garbage_collector.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/single_node_ha/storage.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex.hpp"
|
|
||||||
#include "transactions/single_node_ha/engine.hpp"
|
|
||||||
#include "utils/exceptions.hpp"
|
|
||||||
#include "utils/scheduler.hpp"
|
|
||||||
#include "utils/timer.hpp"
|
|
||||||
|
|
||||||
namespace database {
|
|
||||||
|
|
||||||
/** Garbage collection capabilities for database::Storage. Extracted into a
|
|
||||||
* separate class for better code organization, and because the GC requires a
|
|
||||||
* tx::Engine, while the Storage itself can exist without it. Even though, a
|
|
||||||
* database::Storage is always acompanied by a Gc.
|
|
||||||
*/
|
|
||||||
class StorageGc {
|
|
||||||
template <typename TRecord>
|
|
||||||
class MvccDeleter {
|
|
||||||
using VlistT = mvcc::VersionList<TRecord>;
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit MvccDeleter(ConcurrentMap<storage::Gid, VlistT *> &collection)
|
|
||||||
: gc_(collection, record_deleter_, version_list_deleter_) {}
|
|
||||||
DeferredDeleter<TRecord> record_deleter_;
|
|
||||||
DeferredDeleter<mvcc::VersionList<TRecord>> version_list_deleter_;
|
|
||||||
GarbageCollector<ConcurrentMap<storage::Gid, VlistT *>, TRecord> gc_;
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
/** Creates a garbage collector for the given storage that uses the given
|
|
||||||
* tx::Engine. If `pause_sec` is greater then zero, then GC gets triggered
|
|
||||||
* periodically. */
|
|
||||||
StorageGc(Storage &storage, tx::Engine &tx_engine,
|
|
||||||
raft::RaftServer *raft_server, int pause_sec)
|
|
||||||
: tx_engine_(tx_engine),
|
|
||||||
raft_server_(raft_server),
|
|
||||||
storage_(storage),
|
|
||||||
vertices_(storage.vertices_),
|
|
||||||
edges_(storage.edges_) {
|
|
||||||
if (pause_sec > 0)
|
|
||||||
scheduler_.Run(
|
|
||||||
"Storage GC", std::chrono::seconds(pause_sec), [this] {
|
|
||||||
try {
|
|
||||||
CollectGarbage();
|
|
||||||
} catch (const utils::BasicException &e) {
|
|
||||||
DLOG(WARNING)
|
|
||||||
<< "Couldn't perform storage garbage collection due to: "
|
|
||||||
<< e.what();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
~StorageGc() {
|
|
||||||
// We have to stop the scheduler before destroying this class.
|
|
||||||
scheduler_.Stop();
|
|
||||||
|
|
||||||
edges_.record_deleter_.FreeExpiredObjects(tx::Transaction::MaxId());
|
|
||||||
vertices_.record_deleter_.FreeExpiredObjects(tx::Transaction::MaxId());
|
|
||||||
edges_.version_list_deleter_.FreeExpiredObjects(tx::Transaction::MaxId());
|
|
||||||
vertices_.version_list_deleter_.FreeExpiredObjects(
|
|
||||||
tx::Transaction::MaxId());
|
|
||||||
}
|
|
||||||
|
|
||||||
StorageGc(const StorageGc &) = delete;
|
|
||||||
StorageGc(StorageGc &&) = delete;
|
|
||||||
StorageGc &operator=(const StorageGc &) = delete;
|
|
||||||
StorageGc &operator=(StorageGc &&) = delete;
|
|
||||||
|
|
||||||
void CollectLogGarbage(tx::TransactionId oldest_active) {
|
|
||||||
auto safe_to_delete = GetClogSafeTransaction(oldest_active);
|
|
||||||
if (safe_to_delete) {
|
|
||||||
tx_engine_.GarbageCollectCommitLog(*safe_to_delete);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void CollectGarbage() {
|
|
||||||
// main garbage collection logic
|
|
||||||
// see wiki documentation for logic explanation
|
|
||||||
VLOG(21) << "Garbage collector started";
|
|
||||||
const auto snapshot_gc = tx_engine_.GlobalGcSnapshot();
|
|
||||||
{
|
|
||||||
// This can be run concurrently
|
|
||||||
utils::Timer x;
|
|
||||||
|
|
||||||
vertices_.gc_.Run(snapshot_gc, tx_engine_);
|
|
||||||
edges_.gc_.Run(snapshot_gc, tx_engine_);
|
|
||||||
storage_.unique_constraints_.Refresh(snapshot_gc, tx_engine_);
|
|
||||||
VLOG(21) << "Garbage collector mvcc phase time: " << x.Elapsed().count();
|
|
||||||
}
|
|
||||||
// This has to be run sequentially after gc because gc modifies
|
|
||||||
// version_lists and changes the oldest visible record, on which Refresh
|
|
||||||
// depends.
|
|
||||||
{
|
|
||||||
// This can be run concurrently
|
|
||||||
utils::Timer x;
|
|
||||||
storage_.labels_index_.Refresh(snapshot_gc, tx_engine_);
|
|
||||||
storage_.label_property_index_.Refresh(snapshot_gc, tx_engine_);
|
|
||||||
VLOG(21) << "Garbage collector index phase time: " << x.Elapsed().count();
|
|
||||||
}
|
|
||||||
{
|
|
||||||
// We free expired objects with snapshot.back(), which is
|
|
||||||
// the ID of the oldest active transaction (or next active, if there
|
|
||||||
// are no currently active). That's legal because that was the
|
|
||||||
// last possible transaction that could have obtained pointers
|
|
||||||
// to those records. New snapshot can be used, different than one used for
|
|
||||||
// first two phases of gc.
|
|
||||||
utils::Timer x;
|
|
||||||
const auto snapshot_gc = tx_engine_.GlobalGcSnapshot();
|
|
||||||
edges_.record_deleter_.FreeExpiredObjects(snapshot_gc.back());
|
|
||||||
vertices_.record_deleter_.FreeExpiredObjects(snapshot_gc.back());
|
|
||||||
edges_.version_list_deleter_.FreeExpiredObjects(snapshot_gc.back());
|
|
||||||
vertices_.version_list_deleter_.FreeExpiredObjects(snapshot_gc.back());
|
|
||||||
VLOG(21) << "Garbage collector deferred deletion phase time: "
|
|
||||||
<< x.Elapsed().count();
|
|
||||||
}
|
|
||||||
|
|
||||||
CollectLogGarbage(snapshot_gc.back());
|
|
||||||
gc_txid_ranges_.emplace(snapshot_gc.back(), tx_engine_.GlobalLast());
|
|
||||||
|
|
||||||
VLOG(21) << "gc snapshot: " << snapshot_gc;
|
|
||||||
VLOG(21) << "edge_record_deleter_ size: " << edges_.record_deleter_.Count();
|
|
||||||
VLOG(21) << "vertex record deleter_ size: "
|
|
||||||
<< vertices_.record_deleter_.Count();
|
|
||||||
VLOG(21) << "edge_version_list_deleter_ size: "
|
|
||||||
<< edges_.version_list_deleter_.Count();
|
|
||||||
VLOG(21) << "vertex_version_list_deleter_ size: "
|
|
||||||
<< vertices_.version_list_deleter_.Count();
|
|
||||||
VLOG(21) << "vertices_ size: " << storage_.vertices_.access().size();
|
|
||||||
VLOG(21) << "edges_ size: " << storage_.edges_.access().size();
|
|
||||||
VLOG(21) << "Garbage collector finished.";
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
// Find the largest transaction from which everything older is safe to
|
|
||||||
// delete, ones for which the hints have been set in the gc phase, and no
|
|
||||||
// alive transaction from the time before the hints were set is still alive
|
|
||||||
// (otherwise that transaction could still be waiting for a resolution of
|
|
||||||
// the query to the commit log about some old transaction)
|
|
||||||
std::optional<tx::TransactionId> GetClogSafeTransaction(
|
|
||||||
tx::TransactionId oldest_active) {
|
|
||||||
std::optional<tx::TransactionId> safe_to_delete;
|
|
||||||
while (!gc_txid_ranges_.empty() &&
|
|
||||||
gc_txid_ranges_.front().second < oldest_active) {
|
|
||||||
safe_to_delete = gc_txid_ranges_.front().first;
|
|
||||||
gc_txid_ranges_.pop();
|
|
||||||
}
|
|
||||||
return safe_to_delete;
|
|
||||||
}
|
|
||||||
|
|
||||||
tx::Engine &tx_engine_;
|
|
||||||
raft::RaftServer *raft_server_;
|
|
||||||
utils::Scheduler scheduler_;
|
|
||||||
|
|
||||||
private:
|
|
||||||
Storage &storage_;
|
|
||||||
MvccDeleter<Vertex> vertices_;
|
|
||||||
MvccDeleter<Edge> edges_;
|
|
||||||
|
|
||||||
// History of <oldest active transaction, next transaction to be ran> ranges
|
|
||||||
// that gc operated on at some previous time - used to clear commit log
|
|
||||||
std::queue<std::pair<tx::TransactionId, tx::TransactionId>> gc_txid_ranges_;
|
|
||||||
};
|
|
||||||
} // namespace database
|
|
@ -1,28 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "storage/single_node_ha/mvcc/record.hpp"
|
|
||||||
#include "storage/single_node_ha/mvcc/version_list.hpp"
|
|
||||||
#include "storage/common/types/property_value_store.hpp"
|
|
||||||
#include "storage/common/types/types.hpp"
|
|
||||||
#include "storage/single_node_ha/edges.hpp"
|
|
||||||
|
|
||||||
class Vertex : public mvcc::Record<Vertex> {
|
|
||||||
public:
|
|
||||||
Vertex() = default;
|
|
||||||
// Returns new Vertex with copy of data stored in this Vertex, but without
|
|
||||||
// copying superclass' members.
|
|
||||||
Vertex *CloneData() { return new Vertex(*this); }
|
|
||||||
|
|
||||||
Edges out_;
|
|
||||||
Edges in_;
|
|
||||||
std::vector<storage::Label> labels_;
|
|
||||||
PropertyValueStore properties_;
|
|
||||||
|
|
||||||
private:
|
|
||||||
Vertex(const Vertex &other)
|
|
||||||
: mvcc::Record<Vertex>(),
|
|
||||||
out_(other.out_),
|
|
||||||
in_(other.in_),
|
|
||||||
labels_(other.labels_),
|
|
||||||
properties_(other.properties_) {}
|
|
||||||
};
|
|
@ -1,86 +0,0 @@
|
|||||||
#include "storage/single_node_ha/vertex_accessor.hpp"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
#include "utils/algorithm.hpp"
|
|
||||||
|
|
||||||
VertexAccessor::VertexAccessor(mvcc::VersionList<Vertex> *address,
|
|
||||||
database::GraphDbAccessor &db_accessor)
|
|
||||||
: RecordAccessor(address, db_accessor) {
|
|
||||||
Reconstruct();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t VertexAccessor::out_degree() const { return current().out_.size(); }
|
|
||||||
|
|
||||||
size_t VertexAccessor::in_degree() const { return current().in_.size(); }
|
|
||||||
|
|
||||||
void VertexAccessor::add_label(storage::Label label) {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
auto delta = database::StateDelta::AddLabel(dba.transaction_id(), gid(),
|
|
||||||
label, dba.LabelName(label));
|
|
||||||
Vertex &vertex = update();
|
|
||||||
// not a duplicate label, add it
|
|
||||||
if (!utils::Contains(vertex.labels_, label)) {
|
|
||||||
vertex.labels_.emplace_back(label);
|
|
||||||
dba.sd_buffer()->Emplace(delta);
|
|
||||||
dba.UpdateOnAddLabel(label, *this, &vertex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void VertexAccessor::remove_label(storage::Label label) {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
auto delta = database::StateDelta::RemoveLabel(dba.transaction_id(), gid(),
|
|
||||||
label, dba.LabelName(label));
|
|
||||||
Vertex &vertex = update();
|
|
||||||
if (utils::Contains(vertex.labels_, label)) {
|
|
||||||
auto &labels = vertex.labels_;
|
|
||||||
auto found = std::find(labels.begin(), labels.end(), delta.label);
|
|
||||||
std::swap(*found, labels.back());
|
|
||||||
labels.pop_back();
|
|
||||||
dba.sd_buffer()->Emplace(delta);
|
|
||||||
dba.UpdateOnRemoveLabel(label, *this);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VertexAccessor::has_label(storage::Label label) const {
|
|
||||||
auto &labels = this->current().labels_;
|
|
||||||
return std::find(labels.begin(), labels.end(), label) != labels.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::vector<storage::Label> &VertexAccessor::labels() const {
|
|
||||||
return this->current().labels_;
|
|
||||||
}
|
|
||||||
|
|
||||||
void VertexAccessor::RemoveOutEdge(mvcc::VersionList<Edge> *edge) {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
|
|
||||||
SwitchNew();
|
|
||||||
if (current().is_expired_by(dba.transaction())) return;
|
|
||||||
|
|
||||||
update().out_.RemoveEdge(edge);
|
|
||||||
}
|
|
||||||
|
|
||||||
void VertexAccessor::RemoveInEdge(mvcc::VersionList<Edge> *edge) {
|
|
||||||
auto &dba = db_accessor();
|
|
||||||
|
|
||||||
SwitchNew();
|
|
||||||
if (current().is_expired_by(dba.transaction())) return;
|
|
||||||
|
|
||||||
update().in_.RemoveEdge(edge);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream &operator<<(std::ostream &os, const VertexAccessor &va) {
|
|
||||||
os << "V(";
|
|
||||||
utils::PrintIterable(os, va.labels(), ":", [&](auto &stream, auto label) {
|
|
||||||
stream << va.db_accessor().LabelName(label);
|
|
||||||
});
|
|
||||||
os << " {";
|
|
||||||
utils::PrintIterable(os, va.Properties(), ", ",
|
|
||||||
[&](auto &stream, const auto &pair) {
|
|
||||||
stream << va.db_accessor().PropertyName(pair.first)
|
|
||||||
<< ": " << pair.second;
|
|
||||||
});
|
|
||||||
return os << "})";
|
|
||||||
}
|
|
@ -1,158 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <limits>
|
|
||||||
#include <set>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <cppitertools/chain.hpp>
|
|
||||||
#include <cppitertools/imap.hpp>
|
|
||||||
|
|
||||||
#include "storage/single_node_ha/edge_accessor.hpp"
|
|
||||||
#include "storage/single_node_ha/record_accessor.hpp"
|
|
||||||
#include "storage/single_node_ha/vertex.hpp"
|
|
||||||
#include "utils/algorithm.hpp"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Provides ways for the client programmer (i.e. code generated
|
|
||||||
* by the compiler) to interact with a Vertex.
|
|
||||||
*
|
|
||||||
* This class indirectly inherits MVCC data structures and
|
|
||||||
* takes care of MVCC versioning.
|
|
||||||
*/
|
|
||||||
class VertexAccessor final : public RecordAccessor<Vertex> {
|
|
||||||
// Helper function for creating an iterator over edges.
|
|
||||||
// @param begin - begin iterator
|
|
||||||
// @param end - end iterator
|
|
||||||
// @param from - if true specifies that the vertex represents `from` part of
|
|
||||||
// the edge, otherwise it specifies `to` part of the edge
|
|
||||||
// @param vertex - one endpoint of every edge
|
|
||||||
// @param db_accessor - database accessor
|
|
||||||
// @return - Iterator over EdgeAccessors
|
|
||||||
template <typename TIterator>
|
|
||||||
static inline auto MakeAccessorIterator(
|
|
||||||
TIterator &&begin, TIterator &&end, bool from,
|
|
||||||
mvcc::VersionList<Vertex> *vertex,
|
|
||||||
database::GraphDbAccessor &db_accessor) {
|
|
||||||
return iter::imap(
|
|
||||||
[from, vertex, &db_accessor](auto &edges_element) {
|
|
||||||
if (from) {
|
|
||||||
return EdgeAccessor(edges_element.edge, db_accessor, vertex,
|
|
||||||
edges_element.vertex, edges_element.edge_type);
|
|
||||||
} else {
|
|
||||||
return EdgeAccessor(edges_element.edge, db_accessor,
|
|
||||||
edges_element.vertex, vertex,
|
|
||||||
edges_element.edge_type);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
utils::Iterable<TIterator>(std::forward<TIterator>(begin),
|
|
||||||
std::forward<TIterator>(end)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
VertexAccessor(mvcc::VersionList<Vertex> *address,
|
|
||||||
database::GraphDbAccessor &db_accessor);
|
|
||||||
|
|
||||||
/** Returns the number of outgoing edges. */
|
|
||||||
size_t out_degree() const;
|
|
||||||
|
|
||||||
/** Returns the number of incoming edges. */
|
|
||||||
size_t in_degree() const;
|
|
||||||
|
|
||||||
/** Adds a label to the Vertex. If the Vertex already has that label the call
|
|
||||||
* has no effect. */
|
|
||||||
void add_label(storage::Label label);
|
|
||||||
|
|
||||||
/** Removes a label from the Vertex. */
|
|
||||||
void remove_label(storage::Label label);
|
|
||||||
|
|
||||||
/** Indicates if the Vertex has the given label. */
|
|
||||||
bool has_label(storage::Label label) const;
|
|
||||||
|
|
||||||
/** Returns all the Labels of the Vertex. */
|
|
||||||
const std::vector<storage::Label> &labels() const;
|
|
||||||
|
|
||||||
/** Returns EdgeAccessors for all incoming edges. */
|
|
||||||
auto in() const {
|
|
||||||
return MakeAccessorIterator(current().in_.begin(), current().in_.end(),
|
|
||||||
false, address(), db_accessor());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns EdgeAccessors for all incoming edges.
|
|
||||||
*
|
|
||||||
* @param dest - The destination vertex filter.
|
|
||||||
* @param edge_types - Edge types filter. At least one be matched. If nullptr
|
|
||||||
* or empty, the parameter is ignored.
|
|
||||||
*/
|
|
||||||
auto in(const VertexAccessor &dest,
|
|
||||||
const std::vector<storage::EdgeType> *edge_types = nullptr) const {
|
|
||||||
return MakeAccessorIterator(current().in_.begin(dest.address(), edge_types),
|
|
||||||
current().in_.end(), false, address(),
|
|
||||||
db_accessor());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns EdgeAccessors for all incoming edges.
|
|
||||||
*
|
|
||||||
* @param edge_types - Edge types filter. At least one be matched. If nullptr
|
|
||||||
* or empty, the parameter is ignored.
|
|
||||||
*/
|
|
||||||
auto in(const std::vector<storage::EdgeType> *edge_types) const {
|
|
||||||
return MakeAccessorIterator(current().in_.begin(nullptr, edge_types),
|
|
||||||
current().in_.end(), false, address(),
|
|
||||||
db_accessor());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns EdgeAccessors for all outgoing edges. */
|
|
||||||
auto out() const {
|
|
||||||
return MakeAccessorIterator(current().out_.begin(), current().out_.end(),
|
|
||||||
true, address(), db_accessor());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns EdgeAccessors for all outgoing edges whose destination is the given
|
|
||||||
* vertex.
|
|
||||||
*
|
|
||||||
* @param dest - The destination vertex filter.
|
|
||||||
* @param edge_types - Edge types filter. At least one be matched. If nullptr
|
|
||||||
* or empty, the parameter is ignored.
|
|
||||||
*/
|
|
||||||
auto out(const VertexAccessor &dest,
|
|
||||||
const std::vector<storage::EdgeType> *edge_types = nullptr) const {
|
|
||||||
return MakeAccessorIterator(
|
|
||||||
current().out_.begin(dest.address(), edge_types), current().out_.end(),
|
|
||||||
true, address(), db_accessor());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns EdgeAccessors for all outgoing edges.
|
|
||||||
*
|
|
||||||
* @param edge_types - Edge types filter. At least one be matched. If nullptr
|
|
||||||
* or empty, the parameter is ignored.
|
|
||||||
*/
|
|
||||||
auto out(const std::vector<storage::EdgeType> *edge_types) const {
|
|
||||||
return MakeAccessorIterator(current().out_.begin(nullptr, edge_types),
|
|
||||||
current().out_.end(), true, address(),
|
|
||||||
db_accessor());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Removes the given edge from the outgoing edges of this vertex. Note that
|
|
||||||
* this operation should always be accompanied by the removal of the edge from
|
|
||||||
* the incoming edges on the other side and edge deletion. */
|
|
||||||
void RemoveOutEdge(mvcc::VersionList<Edge> *edge);
|
|
||||||
|
|
||||||
/** Removes the given edge from the incoming edges of this vertex. Note that
|
|
||||||
* this operation should always be accompanied by the removal of the edge from
|
|
||||||
* the outgoing edges on the other side and edge deletion. */
|
|
||||||
void RemoveInEdge(mvcc::VersionList<Edge> *edge);
|
|
||||||
};
|
|
||||||
|
|
||||||
std::ostream &operator<<(std::ostream &, const VertexAccessor &);
|
|
||||||
|
|
||||||
// hash function for the vertex accessor
|
|
||||||
namespace std {
|
|
||||||
template <>
|
|
||||||
struct hash<VertexAccessor> {
|
|
||||||
size_t operator()(const VertexAccessor &v) const { return v.gid().AsUint(); };
|
|
||||||
};
|
|
||||||
} // namespace std
|
|
@ -1,7 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#ifdef MG_SINGLE_NODE_HA
|
|
||||||
#include "storage/single_node_ha/vertex_accessor.hpp"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// TODO: write documentation for the interface here!
|
|
@ -1,80 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "data_structures/bitset/dynamic_bitset.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
|
|
||||||
// This class is lock free. There is no need to acquire any lock when accessing
|
|
||||||
// this class and this class doesn't acquire any lock on method calls.
|
|
||||||
class CommitLog final {
|
|
||||||
public:
|
|
||||||
static constexpr int kBitsetBlockSize = 32768;
|
|
||||||
|
|
||||||
CommitLog() = default;
|
|
||||||
CommitLog(const CommitLog &) = delete;
|
|
||||||
CommitLog(CommitLog &&) = delete;
|
|
||||||
CommitLog &operator=(const CommitLog &) = delete;
|
|
||||||
CommitLog &operator=(CommitLog &&) = delete;
|
|
||||||
|
|
||||||
bool is_active(TransactionId id) const {
|
|
||||||
return fetch_info(id).is_active();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_committed(TransactionId id) const {
|
|
||||||
return fetch_info(id).is_committed();
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_committed(TransactionId id) { log.set(2 * id); }
|
|
||||||
|
|
||||||
bool is_aborted(TransactionId id) const {
|
|
||||||
return fetch_info(id).is_aborted();
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_aborted(TransactionId id) { log.set(2 * id + 1); }
|
|
||||||
|
|
||||||
// Clears the commit log from bits associated with transactions with an id
|
|
||||||
// lower than `id`.
|
|
||||||
void garbage_collect_older(TransactionId id) { log.delete_prefix(2 * id); }
|
|
||||||
|
|
||||||
class Info final {
|
|
||||||
public:
|
|
||||||
Info() {} // Needed for serialization.
|
|
||||||
enum Status {
|
|
||||||
ACTIVE = 0, // 00
|
|
||||||
COMMITTED = 1, // 01
|
|
||||||
ABORTED = 2, // 10
|
|
||||||
};
|
|
||||||
|
|
||||||
explicit Info(uint8_t flags) {
|
|
||||||
if (flags & ABORTED) {
|
|
||||||
flags_ = ABORTED;
|
|
||||||
} else if (flags & COMMITTED) {
|
|
||||||
flags_ = COMMITTED;
|
|
||||||
} else {
|
|
||||||
flags_ = ACTIVE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_active() const { return flags_ == ACTIVE; }
|
|
||||||
|
|
||||||
bool is_committed() const {
|
|
||||||
if (flags_ & ABORTED) return false;
|
|
||||||
return flags_ & COMMITTED;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_aborted() const { return flags_ & ABORTED; }
|
|
||||||
|
|
||||||
operator uint8_t() const { return flags_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint8_t flags_{0};
|
|
||||||
};
|
|
||||||
|
|
||||||
Info fetch_info(TransactionId id) const { return Info{log.at(2 * id, 2)}; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
DynamicBitset<uint8_t, kBitsetBlockSize> log;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace tx
|
|
@ -1,5 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#ifdef MG_SINGLE_NODE_HA
|
|
||||||
#include "transactions/single_node_ha/engine.hpp"
|
|
||||||
#endif
|
|
@ -1,85 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <mutex>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "glog/logging.h"
|
|
||||||
#include "storage/common/locking/lock_status.hpp"
|
|
||||||
#include "storage/common/locking/record_lock.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
#include "utils/spin_lock.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
|
|
||||||
class Engine;
|
|
||||||
class Transaction;
|
|
||||||
|
|
||||||
class LockStore {
|
|
||||||
class LockHolder {
|
|
||||||
public:
|
|
||||||
LockHolder() = default;
|
|
||||||
|
|
||||||
/// @throw utils::LockTimeoutException
|
|
||||||
LockHolder(RecordLock *lock, const Transaction &tx, tx::Engine &engine)
|
|
||||||
: lock_(lock) {
|
|
||||||
DCHECK(lock != nullptr) << "Lock is nullptr.";
|
|
||||||
auto status = lock_->Lock(tx, engine);
|
|
||||||
|
|
||||||
if (status != LockStatus::Acquired) {
|
|
||||||
lock_ = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LockHolder(const LockHolder &) = delete;
|
|
||||||
LockHolder &operator=(const LockHolder &) = delete;
|
|
||||||
|
|
||||||
LockHolder(LockHolder &&other) : lock_(other.lock_) {
|
|
||||||
other.lock_ = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
LockHolder &operator=(LockHolder &&other) {
|
|
||||||
if (this == &other) return *this;
|
|
||||||
lock_ = other.lock_;
|
|
||||||
other.lock_ = nullptr;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
~LockHolder() {
|
|
||||||
if (lock_ != nullptr) {
|
|
||||||
lock_->Unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool active() const { return lock_ != nullptr; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
RecordLock *lock_{nullptr};
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
|
||||||
/// @throw utils::LockTimeoutException
|
|
||||||
void Take(RecordLock *lock, const tx::Transaction &tx, tx::Engine &engine) {
|
|
||||||
// Creating a lock holder locks the version list to the given transaction.
|
|
||||||
// Note that it's an op that can take a long time (if there are multiple
|
|
||||||
// transactions trying to lock.
|
|
||||||
LockHolder holder{lock, tx, engine};
|
|
||||||
|
|
||||||
// This guard prevents the same transaction from concurrent modificaton of
|
|
||||||
// locks_. This can only happen in distributed memgraph, when there are
|
|
||||||
// multiple edits coming to the same worker in the same transaction at the
|
|
||||||
// same time. IMPORTANT: This guard must come after LockHolder construction,
|
|
||||||
// as that potentially takes a long time and this guard only needs to
|
|
||||||
// protect locks_ update.
|
|
||||||
std::lock_guard<utils::SpinLock> guard{locks_lock_};
|
|
||||||
locks_.emplace_back(std::move(holder));
|
|
||||||
if (!locks_.back().active()) {
|
|
||||||
locks_.pop_back();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
utils::SpinLock locks_lock_;
|
|
||||||
std::vector<LockHolder> locks_;
|
|
||||||
};
|
|
||||||
} // namespace tx
|
|
@ -1,305 +0,0 @@
|
|||||||
#include "transactions/single_node_ha/engine.hpp"
|
|
||||||
|
|
||||||
#include <limits>
|
|
||||||
#include <mutex>
|
|
||||||
|
|
||||||
#include "glog/logging.h"
|
|
||||||
|
|
||||||
#include "durability/single_node_ha/state_delta.hpp"
|
|
||||||
#include "raft/exceptions.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
|
|
||||||
Engine::Engine(raft::RaftInterface *raft,
|
|
||||||
storage::StateDeltaBuffer *delta_buffer)
|
|
||||||
: clog_(std::make_unique<CommitLog>()),
|
|
||||||
raft_(raft),
|
|
||||||
delta_buffer_(delta_buffer) {
|
|
||||||
CHECK(raft) << "Raft can't be nullptr in HA";
|
|
||||||
CHECK(delta_buffer) << "State delta buffer can't be nullptr in HA";
|
|
||||||
}
|
|
||||||
|
|
||||||
Transaction *Engine::Begin() {
|
|
||||||
VLOG(11) << "[Tx] Starting transaction " << counter_ + 1;
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
if (!accepting_transactions_.load() || !replication_errors_.empty())
|
|
||||||
throw TransactionEngineError(
|
|
||||||
"The transaction engine currently isn't accepting new transactions.");
|
|
||||||
|
|
||||||
return BeginTransaction(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
Transaction *Engine::BeginBlocking(std::optional<TransactionId> parent_tx) {
|
|
||||||
Snapshot wait_for_txs;
|
|
||||||
{
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
if (!accepting_transactions_.load() || !replication_errors_.empty())
|
|
||||||
throw TransactionEngineError(
|
|
||||||
"The transaction engine currently isn't accepting new transactions.");
|
|
||||||
|
|
||||||
// Block the engine from accepting new transactions.
|
|
||||||
accepting_transactions_.store(false);
|
|
||||||
|
|
||||||
// Set active transactions to abort ASAP.
|
|
||||||
for (auto transaction : active_) {
|
|
||||||
store_.find(transaction)->second->set_should_abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_for_txs = active_;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for all active transactions except the parent (optional) and ourselves
|
|
||||||
// to end.
|
|
||||||
for (auto id : wait_for_txs) {
|
|
||||||
if (parent_tx && *parent_tx == id) continue;
|
|
||||||
while (Info(id).is_active()) {
|
|
||||||
// TODO reconsider this constant, currently rule-of-thumb chosen
|
|
||||||
std::this_thread::sleep_for(std::chrono::microseconds(100));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only after all transactions have finished, start the blocking transaction.
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
return BeginTransaction(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
CommandId Engine::Advance(TransactionId id) {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
|
|
||||||
auto it = store_.find(id);
|
|
||||||
DCHECK(it != store_.end())
|
|
||||||
<< "Transaction::advance on non-existing transaction";
|
|
||||||
|
|
||||||
return it->second.get()->AdvanceCommand();
|
|
||||||
}
|
|
||||||
|
|
||||||
CommandId Engine::UpdateCommand(TransactionId id) {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
auto it = store_.find(id);
|
|
||||||
DCHECK(it != store_.end())
|
|
||||||
<< "Transaction::advance on non-existing transaction";
|
|
||||||
return it->second->cid();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Engine::Commit(const Transaction &t) {
|
|
||||||
VLOG(11) << "[Tx] Committing transaction " << t.id_;
|
|
||||||
delta_buffer_->Emplace(database::StateDelta::TxCommit(t.id_));
|
|
||||||
auto deltas = delta_buffer_->GetDeltas(t.id_);
|
|
||||||
|
|
||||||
// If we have only two state deltas in our transaction, that means we are
|
|
||||||
// dealing with a read-only transaction which does not need to be replicated
|
|
||||||
// throughout the cluster, so we simply commit it in our storage.
|
|
||||||
//
|
|
||||||
// Also, when the current server is not in the leader mode, the following
|
|
||||||
// holds:
|
|
||||||
//
|
|
||||||
// 1) In CANDIDATE mode we need to be able to commit because Raft is
|
|
||||||
// initialzed in that mode and needs to perform recovery.
|
|
||||||
//
|
|
||||||
// 2) In FOLLOWER mode, Raft will only try to apply state deltas from logs
|
|
||||||
// that are behind the current commit index and are therefore safe to
|
|
||||||
// apply.
|
|
||||||
if (deltas.size() == 2 || !raft_->IsLeader()) {
|
|
||||||
delta_buffer_->Erase(t.id_);
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
clog_->set_committed(t.id_);
|
|
||||||
active_.remove(t.id_);
|
|
||||||
store_.erase(store_.find(t.id_));
|
|
||||||
if (t.blocking()) {
|
|
||||||
accepting_transactions_.store(true);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto log_entry_status = raft_->Emplace(deltas);
|
|
||||||
|
|
||||||
// Log Entry was not successfully emplaced and the transaction should be
|
|
||||||
// aborted
|
|
||||||
if (!log_entry_status) {
|
|
||||||
Abort(t);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// It is important to note the following situation. If our cluster ends up
|
|
||||||
// with a network partition where the current leader can't communicate with
|
|
||||||
// the majority of the peers, and the client is still sending queries to it,
|
|
||||||
// all of the transaction will end up waiting here until the network
|
|
||||||
// partition is resolved. The problem that can occur afterwards is bad.
|
|
||||||
// When the machine transitions from leader to follower mode,
|
|
||||||
// `ReplicationInfo` method will start returning `is_replicated=true`. This
|
|
||||||
// might lead to a problem where we suddenly want to alter the state of the
|
|
||||||
// transaction engine that isn't valid anymore, because the current machine
|
|
||||||
// isn't the leader anymore. This is all handled in the `Transition` method
|
|
||||||
// where once the transition from leader to follower occurs, the mode will
|
|
||||||
// be set to follower first, then the `Reset` method on the transaction
|
|
||||||
// engine will wait for all transactions to finish, and even though we
|
|
||||||
// change the transaction engine state here, the engine will perform a
|
|
||||||
// `Reset` and start recovering from zero, and the invalid changes won't
|
|
||||||
// matter.
|
|
||||||
|
|
||||||
// Wait for Raft to receive confirmation from the majority of followers.
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
if (raft_->SafeToCommit(log_entry_status->term_id,
|
|
||||||
log_entry_status->log_index))
|
|
||||||
break;
|
|
||||||
} catch (const raft::ReplicationTimeoutException &e) {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
if (replication_errors_.insert(t.id_).second) {
|
|
||||||
LOG(WARNING) << e.what();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::this_thread::sleep_for(std::chrono::microseconds(100));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_lock<std::mutex> raft_lock(raft_->WithLock(), std::defer_lock);
|
|
||||||
// We need to acquire the Raft lock so we don't end up racing with a Raft
|
|
||||||
// thread that can reset the engine state. If we can't acquire the lock, and
|
|
||||||
// we end up with reseting the engine, we throw
|
|
||||||
// UnexpectedLeaderChangeException.
|
|
||||||
while (true) {
|
|
||||||
if (raft_lock.try_lock()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// This is the case when we've lost our leader status due to another peer
|
|
||||||
// requesting election.
|
|
||||||
if (reset_active_.load()) throw raft::UnexpectedLeaderChangeException();
|
|
||||||
// This is the case when we're shutting down and we're no longer a valid
|
|
||||||
// leader. `SafeToCommit` will throw `RaftShutdownException` if the
|
|
||||||
// transaction wasn't replicated and the client will receive a negative
|
|
||||||
// response. Otherwise, we'll end up here, and since the transaction was
|
|
||||||
// replciated, we need to inform the client that the query succeeded.
|
|
||||||
if (!raft_->IsLeader()) break;
|
|
||||||
std::this_thread::sleep_for(std::chrono::microseconds(100));
|
|
||||||
}
|
|
||||||
|
|
||||||
delta_buffer_->Erase(t.id_);
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
replication_errors_.erase(t.id_);
|
|
||||||
clog_->set_committed(t.id_);
|
|
||||||
active_.remove(t.id_);
|
|
||||||
store_.erase(store_.find(t.id_));
|
|
||||||
if (t.blocking()) {
|
|
||||||
accepting_transactions_.store(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Engine::Abort(const Transaction &t) {
|
|
||||||
VLOG(11) << "[Tx] Aborting transaction " << t.id_;
|
|
||||||
delta_buffer_->Erase(t.id_);
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
clog_->set_aborted(t.id_);
|
|
||||||
active_.remove(t.id_);
|
|
||||||
store_.erase(store_.find(t.id_));
|
|
||||||
if (t.blocking()) {
|
|
||||||
accepting_transactions_.store(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CommitLog::Info Engine::Info(TransactionId tx) const {
|
|
||||||
return clog_->fetch_info(tx);
|
|
||||||
}
|
|
||||||
|
|
||||||
Snapshot Engine::GlobalGcSnapshot() {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
|
|
||||||
// No active transactions.
|
|
||||||
if (active_.size() == 0) {
|
|
||||||
auto snapshot_copy = active_;
|
|
||||||
snapshot_copy.insert(counter_ + 1);
|
|
||||||
return snapshot_copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
// There are active transactions.
|
|
||||||
auto snapshot_copy = store_.find(active_.front())->second->snapshot();
|
|
||||||
snapshot_copy.insert(active_.front());
|
|
||||||
return snapshot_copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
Snapshot Engine::GlobalActiveTransactions() {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
Snapshot active_transactions = active_;
|
|
||||||
return active_transactions;
|
|
||||||
}
|
|
||||||
|
|
||||||
TransactionId Engine::LocalLast() const {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
return counter_;
|
|
||||||
}
|
|
||||||
|
|
||||||
TransactionId Engine::GlobalLast() const { return LocalLast(); }
|
|
||||||
|
|
||||||
TransactionId Engine::LocalOldestActive() const {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
return active_.empty() ? counter_ + 1 : active_.front();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Engine::GarbageCollectCommitLog(TransactionId tx_id) {
|
|
||||||
clog_->garbage_collect_older(tx_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Engine::LocalForEachActiveTransaction(
|
|
||||||
std::function<void(Transaction &)> f) {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
for (auto transaction : active_) {
|
|
||||||
f(*store_.find(transaction)->second);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Transaction *Engine::RunningTransaction(TransactionId tx_id) {
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
auto found = store_.find(tx_id);
|
|
||||||
CHECK(found != store_.end())
|
|
||||||
<< "Can't return snapshot for an inactive transaction";
|
|
||||||
return found->second.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Engine::Reset() {
|
|
||||||
Snapshot wait_for_txs;
|
|
||||||
{
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
|
|
||||||
// Block the engine from accepting new transactions.
|
|
||||||
accepting_transactions_.store(false);
|
|
||||||
|
|
||||||
// Set active transactions to abort ASAP.
|
|
||||||
for (auto transaction : active_) {
|
|
||||||
store_.find(transaction)->second->set_should_abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_for_txs = active_;
|
|
||||||
reset_active_.store(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for all active transactions to end.
|
|
||||||
for (auto id : wait_for_txs) {
|
|
||||||
while (Info(id).is_active()) {
|
|
||||||
// TODO reconsider this constant, currently rule-of-thumb chosen
|
|
||||||
std::this_thread::sleep_for(std::chrono::microseconds(100));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only after all transactions have finished, reset the engine.
|
|
||||||
std::lock_guard<utils::SpinLock> guard(lock_);
|
|
||||||
counter_ = 0;
|
|
||||||
replication_errors_.clear();
|
|
||||||
store_.clear();
|
|
||||||
active_.clear();
|
|
||||||
{
|
|
||||||
clog_ = nullptr;
|
|
||||||
clog_ = std::make_unique<CommitLog>();
|
|
||||||
}
|
|
||||||
accepting_transactions_.store(true);
|
|
||||||
reset_active_.store(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
Transaction *Engine::BeginTransaction(bool blocking) {
|
|
||||||
TransactionId id{++counter_};
|
|
||||||
Transaction *t = new Transaction(id, active_, *this, blocking);
|
|
||||||
active_.insert(id);
|
|
||||||
store_.emplace(id, t);
|
|
||||||
delta_buffer_->Emplace(database::StateDelta::TxBegin(id));
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace tx
|
|
@ -1,88 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <atomic>
|
|
||||||
#include <optional>
|
|
||||||
#include <unordered_map>
|
|
||||||
#include <unordered_set>
|
|
||||||
|
|
||||||
#include "raft/raft_interface.hpp"
|
|
||||||
#include "storage/single_node_ha/state_delta_buffer.hpp"
|
|
||||||
#include "transactions/commit_log.hpp"
|
|
||||||
#include "transactions/transaction.hpp"
|
|
||||||
#include "utils/spin_lock.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
|
|
||||||
class TransactionEngineError : public utils::BasicException {
|
|
||||||
using utils::BasicException::BasicException;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// High availability single node transaction engine.
|
|
||||||
///
|
|
||||||
/// Requires RaftInterface where it stores StateDeltas containing transaction
|
|
||||||
/// information needed for raft followers when replicating logs.
|
|
||||||
class Engine final {
|
|
||||||
public:
|
|
||||||
Engine(raft::RaftInterface *raft, storage::StateDeltaBuffer *delta_buffer);
|
|
||||||
|
|
||||||
Engine(const Engine &) = delete;
|
|
||||||
Engine(Engine &&) = delete;
|
|
||||||
Engine &operator=(const Engine &) = delete;
|
|
||||||
Engine &operator=(Engine &&) = delete;
|
|
||||||
|
|
||||||
Transaction *Begin();
|
|
||||||
/// Blocking transactions are used when we can't allow any other transaction
|
|
||||||
/// to run (besides this one). This is the reason why this transactions blocks
|
|
||||||
/// the engine from creating new transactions and waits for the existing ones
|
|
||||||
/// to finish.
|
|
||||||
Transaction *BeginBlocking(std::optional<TransactionId> parent_tx);
|
|
||||||
CommandId Advance(TransactionId id);
|
|
||||||
CommandId UpdateCommand(TransactionId id);
|
|
||||||
void Commit(const Transaction &t);
|
|
||||||
void Abort(const Transaction &t);
|
|
||||||
CommitLog::Info Info(TransactionId tx) const;
|
|
||||||
Snapshot GlobalGcSnapshot();
|
|
||||||
Snapshot GlobalActiveTransactions();
|
|
||||||
TransactionId GlobalLast() const;
|
|
||||||
TransactionId LocalLast() const;
|
|
||||||
TransactionId LocalOldestActive() const;
|
|
||||||
void LocalForEachActiveTransaction(std::function<void(Transaction &)> f);
|
|
||||||
Transaction *RunningTransaction(TransactionId tx_id);
|
|
||||||
void GarbageCollectCommitLog(TransactionId tx_id);
|
|
||||||
|
|
||||||
auto &local_lock_graph() { return local_lock_graph_; }
|
|
||||||
const auto &local_lock_graph() const { return local_lock_graph_; }
|
|
||||||
|
|
||||||
/// Reset the internal state of the engine. Use with caution as this will
|
|
||||||
/// block the engine from receiving any new transaction and will hint all
|
|
||||||
/// transactions to abort and will wait for them to finish before reseting
|
|
||||||
/// engines internal state.
|
|
||||||
void Reset();
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Map lock dependencies. Each entry maps (tx_that_wants_lock,
|
|
||||||
// tx_that_holds_lock). Used for local deadlock resolution.
|
|
||||||
// TODO consider global deadlock resolution.
|
|
||||||
ConcurrentMap<TransactionId, TransactionId> local_lock_graph_;
|
|
||||||
|
|
||||||
TransactionId counter_{0};
|
|
||||||
std::unique_ptr<CommitLog> clog_{nullptr};
|
|
||||||
std::unordered_map<TransactionId, std::unique_ptr<Transaction>> store_;
|
|
||||||
Snapshot active_;
|
|
||||||
mutable utils::SpinLock lock_;
|
|
||||||
raft::RaftInterface *raft_{nullptr};
|
|
||||||
storage::StateDeltaBuffer *delta_buffer_{nullptr};
|
|
||||||
std::atomic<bool> accepting_transactions_{true};
|
|
||||||
std::atomic<bool> reset_active_{false};
|
|
||||||
|
|
||||||
// Keep track of transaction that experienced a replication error.
|
|
||||||
// While there is a replication error known to the engine, the engine won't
|
|
||||||
// accept new transactions.
|
|
||||||
std::unordered_set<TransactionId> replication_errors_;
|
|
||||||
|
|
||||||
// Helper method for transaction begin.
|
|
||||||
Transaction *BeginTransaction(bool blocking);
|
|
||||||
};
|
|
||||||
} // namespace tx
|
|
@ -1,98 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <iostream>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "glog/logging.h"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
#include "utils/algorithm.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
|
|
||||||
/// Ascendingly sorted collection of transaction ids.
|
|
||||||
///
|
|
||||||
/// Represents the transactions that were active at
|
|
||||||
/// some point in the discrete transaction time.
|
|
||||||
class Snapshot final {
|
|
||||||
public:
|
|
||||||
Snapshot() = default;
|
|
||||||
Snapshot(std::vector<TransactionId> &&active)
|
|
||||||
: transaction_ids_(std::move(active)) {}
|
|
||||||
|
|
||||||
Snapshot(const Snapshot &) = default;
|
|
||||||
Snapshot(Snapshot &&) = default;
|
|
||||||
Snapshot &operator=(const Snapshot &) = default;
|
|
||||||
Snapshot &operator=(Snapshot &&) = default;
|
|
||||||
|
|
||||||
/// Returns true if this snapshot contains the given
|
|
||||||
/// transaction id.
|
|
||||||
///
|
|
||||||
/// @param xid - The transcation id in question
|
|
||||||
bool contains(TransactionId id) const {
|
|
||||||
return std::binary_search(transaction_ids_.begin(), transaction_ids_.end(),
|
|
||||||
id);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Adds the given transaction id to the end of this Snapshot.
|
|
||||||
/// The given id must be greater then all the existing ones,
|
|
||||||
/// to maintain ascending sort order.
|
|
||||||
///
|
|
||||||
/// @param id - the transaction id to add
|
|
||||||
void insert(TransactionId id) {
|
|
||||||
transaction_ids_.push_back(id);
|
|
||||||
DCHECK(std::is_sorted(transaction_ids_.begin(), transaction_ids_.end()))
|
|
||||||
<< "Snapshot must be sorted";
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Removes the given transaction id from this Snapshot.
|
|
||||||
///
|
|
||||||
/// @param id - the transaction id to remove
|
|
||||||
void remove(TransactionId id) {
|
|
||||||
auto last =
|
|
||||||
std::remove(transaction_ids_.begin(), transaction_ids_.end(), id);
|
|
||||||
transaction_ids_.erase(last, transaction_ids_.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Removes all transactions from this Snapshot.
|
|
||||||
void clear() {
|
|
||||||
transaction_ids_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
TransactionId front() const {
|
|
||||||
DCHECK(transaction_ids_.size()) << "Snapshot.front() on empty Snapshot";
|
|
||||||
return transaction_ids_.front();
|
|
||||||
}
|
|
||||||
|
|
||||||
TransactionId back() const {
|
|
||||||
DCHECK(transaction_ids_.size()) << "Snapshot.back() on empty Snapshot";
|
|
||||||
return transaction_ids_.back();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size() const { return transaction_ids_.size(); }
|
|
||||||
bool empty() const { return transaction_ids_.empty(); }
|
|
||||||
bool operator==(const Snapshot &other) const {
|
|
||||||
return transaction_ids_ == other.transaction_ids_;
|
|
||||||
}
|
|
||||||
auto begin() { return transaction_ids_.begin(); }
|
|
||||||
auto end() { return transaction_ids_.end(); }
|
|
||||||
auto begin() const { return transaction_ids_.cbegin(); }
|
|
||||||
auto end() const { return transaction_ids_.cend(); }
|
|
||||||
|
|
||||||
friend std::ostream &operator<<(std::ostream &stream,
|
|
||||||
const Snapshot &snapshot) {
|
|
||||||
stream << "Snapshot(";
|
|
||||||
utils::PrintIterable(stream, snapshot.transaction_ids_);
|
|
||||||
stream << ")";
|
|
||||||
return stream;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto &transaction_ids() const { return transaction_ids_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<TransactionId> transaction_ids_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace tx
|
|
@ -1,118 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <chrono>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <unordered_set>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
|
||||||
#include "storage/common/locking/record_lock.hpp"
|
|
||||||
#include "transactions/lock_store.hpp"
|
|
||||||
#include "transactions/snapshot.hpp"
|
|
||||||
#include "transactions/type.hpp"
|
|
||||||
#include "utils/exceptions.hpp"
|
|
||||||
|
|
||||||
namespace tx {
|
|
||||||
|
|
||||||
/// Indicates an error in transaction handling (currently
|
|
||||||
/// only command id overflow).
|
|
||||||
class TransactionError : public utils::BasicException {
|
|
||||||
public:
|
|
||||||
using utils::BasicException::BasicException;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A database transaction. Encapsulates an atomic, abortable unit of work. Also
|
|
||||||
/// defines that all db ops are single-threaded within a single transaction
|
|
||||||
class Transaction final {
|
|
||||||
public:
|
|
||||||
/// Returns the maximum possible transcation id
|
|
||||||
static TransactionId MaxId() {
|
|
||||||
return std::numeric_limits<TransactionId>::max();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
friend class Engine;
|
|
||||||
|
|
||||||
// The constructor is private, only the Engine ever uses it.
|
|
||||||
Transaction(TransactionId id, const Snapshot &snapshot, Engine &engine,
|
|
||||||
bool blocking)
|
|
||||||
: id_(id),
|
|
||||||
engine_(engine),
|
|
||||||
snapshot_(snapshot),
|
|
||||||
blocking_(blocking) {}
|
|
||||||
|
|
||||||
// A transaction can't be moved nor copied. it's owned by the transaction
|
|
||||||
// engine, and it's lifetime is managed by it.
|
|
||||||
Transaction(const Transaction &) = delete;
|
|
||||||
Transaction(Transaction &&) = delete;
|
|
||||||
Transaction &operator=(const Transaction &) = delete;
|
|
||||||
Transaction &operator=(Transaction &&) = delete;
|
|
||||||
|
|
||||||
public:
|
|
||||||
/// Acquires the lock over the given RecordLock, preventing other transactions
|
|
||||||
/// from doing the same
|
|
||||||
/// @throw utils::LockTimeoutException
|
|
||||||
void TakeLock(RecordLock &lock) const { locks_.Take(&lock, *this, engine_); }
|
|
||||||
|
|
||||||
/// Transaction's id. Unique in the engine that owns it
|
|
||||||
const TransactionId id_;
|
|
||||||
|
|
||||||
/// The transaction engine to which this transaction belongs
|
|
||||||
Engine &engine_;
|
|
||||||
|
|
||||||
/// Returns the current transaction's current command id
|
|
||||||
// TODO rename to cmd_id (variable and function
|
|
||||||
auto cid() const { return cid_; }
|
|
||||||
|
|
||||||
/// Returns this transaction's snapshot.
|
|
||||||
const Snapshot &snapshot() const { return snapshot_; }
|
|
||||||
|
|
||||||
/// Signal to transaction that it should abort. It doesn't really enforce that
|
|
||||||
/// transaction will abort, but it merely hints too the transaction that it is
|
|
||||||
/// preferable to stop its execution.
|
|
||||||
void set_should_abort() { should_abort_ = true; }
|
|
||||||
|
|
||||||
bool should_abort() const { return should_abort_; }
|
|
||||||
|
|
||||||
auto creation_time() const { return creation_time_; }
|
|
||||||
|
|
||||||
auto blocking() const { return blocking_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
/// Function used to advance the command.
|
|
||||||
/// @throw TransactionError
|
|
||||||
CommandId AdvanceCommand() {
|
|
||||||
if (cid_ == std::numeric_limits<CommandId>::max()) {
|
|
||||||
throw TransactionError(
|
|
||||||
"Reached maximum number of commands in this "
|
|
||||||
"transaction.");
|
|
||||||
}
|
|
||||||
return ++cid_;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Function used to set the command.
|
|
||||||
void SetCommand(CommandId cid) { cid_ = cid; }
|
|
||||||
|
|
||||||
// Index of the current command in the current transaction.
|
|
||||||
CommandId cid_{1};
|
|
||||||
|
|
||||||
// A snapshot of currently active transactions.
|
|
||||||
const Snapshot snapshot_;
|
|
||||||
|
|
||||||
// Record locks held by this transaction.
|
|
||||||
mutable LockStore locks_;
|
|
||||||
|
|
||||||
// True if transaction should abort. Used to signal query executor that it
|
|
||||||
// should stop execution, it is only a hint, transaction can disobey.
|
|
||||||
std::atomic<bool> should_abort_{false};
|
|
||||||
|
|
||||||
// Creation time.
|
|
||||||
const std::chrono::time_point<std::chrono::steady_clock> creation_time_{
|
|
||||||
std::chrono::steady_clock::now()};
|
|
||||||
|
|
||||||
bool blocking_{false};
|
|
||||||
};
|
|
||||||
} // namespace tx
|
|
@ -1,14 +0,0 @@
|
|||||||
/// @file
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
// transcation and command types defined
|
|
||||||
// in a separate header to avoid cyclic dependencies
|
|
||||||
namespace tx {
|
|
||||||
|
|
||||||
/// Type of a tx::Transcation's id member
|
|
||||||
using TransactionId = uint64_t;
|
|
||||||
|
|
||||||
/// Type of a tx::Transcation's command id member
|
|
||||||
using CommandId = uint32_t;
|
|
||||||
}
|
|
@ -1,6 +1,5 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include "database/graph_db.hpp"
|
|
||||||
#include "query/frontend/semantic/symbol_table.hpp"
|
#include "query/frontend/semantic/symbol_table.hpp"
|
||||||
#include "query/plan/operator.hpp"
|
#include "query/plan/operator.hpp"
|
||||||
#include "query/plan/pretty_print.hpp"
|
#include "query/plan/pretty_print.hpp"
|
||||||
|
Loading…
Reference in New Issue
Block a user