Remove distributed
Reviewers: teon.banek Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2213
This commit is contained in:
parent
59b3f84eb9
commit
111dd8bf19
@ -1,9 +1,9 @@
|
||||
- name: Binaries
|
||||
archive:
|
||||
- build_debug/memgraph
|
||||
- build_debug/memgraph_distributed
|
||||
- build_debug/memgraph_ha
|
||||
- build_release/memgraph
|
||||
- build_release/memgraph_distributed
|
||||
- build_release/memgraph_ha
|
||||
- build_release/tools/src/mg_client
|
||||
- build_release/tools/src/mg_import_csv
|
||||
- build_release/tools/src/mg_statsd
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
# Build coverage binaries.
|
||||
cd ..
|
||||
# TODO: uncomment this build once single node, ha and distributed are split
|
||||
# TODO: uncomment this build once single node and ha are split
|
||||
# mkdir build_coverage
|
||||
# cd build_coverage
|
||||
# cmake -DTEST_COVERAGE=ON ..
|
||||
@ -37,12 +37,8 @@
|
||||
mkdir build_release
|
||||
cd build_release
|
||||
cmake -DCMAKE_BUILD_TYPE=release ..
|
||||
TIMEOUT=1200 make -j$THREADS memgraph memgraph_distributed memgraph_ha tools memgraph__macro_benchmark memgraph__stress memgraph__manual__card_fraud_generate_snapshot memgraph__feature_benchmark__kafka__benchmark memgraph__feature_benchmark__ha__read__benchmark memgraph__feature_benchmark__ha__write__benchmark
|
||||
|
||||
# Generate distributed card fraud dataset.
|
||||
cd ../tests/distributed/card_fraud
|
||||
./generate_dataset.sh
|
||||
cd ../../..
|
||||
TIMEOUT=1200 make -j$THREADS
|
||||
cd ..
|
||||
|
||||
# Checkout to parent commit and initialize.
|
||||
cd ../parent
|
||||
@ -78,7 +74,7 @@
|
||||
|
||||
# Build coverage binaries.
|
||||
cd ..
|
||||
# TODO: uncomment this build once single node, ha and distributed are split
|
||||
# TODO: uncomment this build once single node and ha are split
|
||||
# mkdir build_coverage
|
||||
# cd build_coverage
|
||||
# cmake -DTEST_COVERAGE=ON ..
|
||||
@ -96,7 +92,3 @@
|
||||
mkdir output
|
||||
cd output
|
||||
cpack -G DEB --config ../CPackConfig.cmake
|
||||
|
||||
# Generate distributed card fraud dataset.
|
||||
cd ../../tests/distributed/card_fraud
|
||||
./generate_dataset.sh
|
||||
|
@ -109,135 +109,6 @@ target_compile_definitions(mg-single-node PUBLIC MG_SINGLE_NODE)
|
||||
# END Memgraph Single Node
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Memgraph Distributed
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
set(mg_distributed_sources
|
||||
${lcp_common_cpp_files}
|
||||
audit/log.cpp
|
||||
database/distributed/distributed_graph_db.cpp
|
||||
distributed/bfs_rpc_clients.cpp
|
||||
distributed/bfs_subcursor.cpp
|
||||
distributed/cluster_discovery_master.cpp
|
||||
distributed/cluster_discovery_worker.cpp
|
||||
distributed/coordination.cpp
|
||||
distributed/coordination_master.cpp
|
||||
distributed/coordination_worker.cpp
|
||||
distributed/data_manager.cpp
|
||||
distributed/data_rpc_clients.cpp
|
||||
distributed/data_rpc_server.cpp
|
||||
distributed/dgp/partitioner.cpp
|
||||
distributed/dgp/vertex_migrator.cpp
|
||||
distributed/durability_rpc_master.cpp
|
||||
distributed/durability_rpc_worker.cpp
|
||||
distributed/dynamic_worker.cpp
|
||||
distributed/index_rpc_server.cpp
|
||||
distributed/plan_consumer.cpp
|
||||
distributed/plan_dispatcher.cpp
|
||||
distributed/produce_rpc_server.cpp
|
||||
distributed/pull_rpc_clients.cpp
|
||||
distributed/updates_rpc_clients.cpp
|
||||
distributed/updates_rpc_server.cpp
|
||||
query/distributed/interpreter.cpp
|
||||
query/distributed/plan/planner.cpp
|
||||
query/distributed/plan/ops.cpp
|
||||
query/distributed/plan/pretty_print.cpp
|
||||
storage/distributed/concurrent_id_mapper_master.cpp
|
||||
storage/distributed/concurrent_id_mapper_worker.cpp
|
||||
transactions/distributed/engine_master.cpp
|
||||
transactions/distributed/engine_worker.cpp
|
||||
data_structures/concurrent/skiplist_gc.cpp
|
||||
database/distributed/config.cpp
|
||||
database/distributed/graph_db_accessor.cpp
|
||||
durability/distributed/state_delta.cpp
|
||||
durability/distributed/paths.cpp
|
||||
durability/distributed/recovery.cpp
|
||||
durability/distributed/snapshooter.cpp
|
||||
durability/distributed/wal.cpp
|
||||
glue/auth.cpp
|
||||
glue/communication.cpp
|
||||
query/common.cpp
|
||||
query/frontend/ast/pretty_print.cpp
|
||||
query/frontend/ast/cypher_main_visitor.cpp
|
||||
query/frontend/parsing.cpp
|
||||
query/frontend/semantic/required_privileges.cpp
|
||||
query/frontend/semantic/symbol_generator.cpp
|
||||
query/frontend/stripped.cpp
|
||||
query/interpret/awesome_memgraph_functions.cpp
|
||||
query/interpreter.cpp
|
||||
query/plan/operator.cpp
|
||||
query/plan/preprocess.cpp
|
||||
query/plan/pretty_print.cpp
|
||||
query/plan/profile.cpp
|
||||
query/plan/rewrite/index_lookup.cpp
|
||||
query/plan/rule_based_planner.cpp
|
||||
query/plan/variable_start_planner.cpp
|
||||
query/repl.cpp
|
||||
query/distributed/serialization.cpp
|
||||
query/typed_value.cpp
|
||||
storage/common/locking/record_lock.cpp
|
||||
storage/common/types/property_value.cpp
|
||||
storage/common/types/property_value_store.cpp
|
||||
storage/common/types/slk.cpp
|
||||
storage/distributed/edge_accessor.cpp
|
||||
storage/distributed/edges_iterator.cpp
|
||||
storage/distributed/record_accessor.cpp
|
||||
storage/distributed/rpc/serialization.cpp
|
||||
storage/distributed/vertex_accessor.cpp
|
||||
memgraph_init.cpp
|
||||
transactions/distributed/engine_single_node.cpp
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
define_add_lcp(add_lcp_distributed mg_distributed_sources generated_lcp_distributed_files)
|
||||
|
||||
add_lcp_distributed(durability/distributed/state_delta.lcp)
|
||||
add_lcp_distributed(database/distributed/serialization.lcp SLK_SERIALIZE
|
||||
DEPENDS durability/distributed/state_delta.lcp)
|
||||
add_lcp_distributed(distributed/bfs_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/coordination_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/data_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/durability_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/index_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/plan_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/pull_produce_rpc_messages.lcp SLK_SERIALIZE
|
||||
DEPENDS transactions/distributed/serialization.lcp)
|
||||
add_lcp_distributed(distributed/storage_gc_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/token_sharing_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/updates_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(distributed/dynamic_worker_rpc_messages.lcp SLK_SERIALIZE)
|
||||
|
||||
add_lcp_distributed(query/distributed/frontend/ast/ast_serialization.lcp SLK_SERIALIZE
|
||||
DEPENDS query/frontend/ast/ast.lcp)
|
||||
add_lcp_distributed(query/distributed/frontend/semantic/symbol_serialization.lcp SLK_SERIALIZE
|
||||
DEPENDS query/frontend/semantic/symbol.lcp)
|
||||
add_lcp_distributed(query/distributed/plan/ops.lcp SLK_SERIALIZE
|
||||
DEPENDS query/plan/operator.lcp)
|
||||
|
||||
add_lcp_distributed(storage/distributed/rpc/concurrent_id_mapper_rpc_messages.lcp SLK_SERIALIZE)
|
||||
add_lcp_distributed(transactions/distributed/engine_rpc_messages.lcp SLK_SERIALIZE
|
||||
DEPENDS transactions/distributed/serialization.lcp)
|
||||
|
||||
add_custom_target(generate_lcp_distributed DEPENDS generate_lcp_common ${generated_lcp_distributed_files})
|
||||
|
||||
set(MG_DISTRIBUTED_LIBS stdc++fs Threads::Threads fmt cppitertools
|
||||
antlr_opencypher_parser_lib dl glog gflags
|
||||
mg-utils mg-io mg-integrations-kafka mg-requests
|
||||
mg-communication mg-comm-rpc mg-auth)
|
||||
|
||||
# STATIC library used by memgraph executables
|
||||
add_library(mg-distributed STATIC ${mg_distributed_sources})
|
||||
target_link_libraries(mg-distributed ${MG_DISTRIBUTED_LIBS})
|
||||
add_dependencies(mg-distributed generate_opencypher_parser)
|
||||
add_dependencies(mg-distributed generate_lcp_distributed)
|
||||
target_compile_definitions(mg-distributed PUBLIC MG_DISTRIBUTED)
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# END Memgraph Distributed
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Memgraph Single Node High Availability
|
||||
# ----------------------------------------------------------------------------
|
||||
@ -326,7 +197,7 @@ target_compile_definitions(mg-single-node-ha PUBLIC MG_SINGLE_NODE_HA)
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
add_custom_target(generate_lcp)
|
||||
add_dependencies(generate_lcp generate_lcp_single_node generate_lcp_single_node_ha generate_lcp_distributed)
|
||||
add_dependencies(generate_lcp generate_lcp_single_node generate_lcp_single_node_ha)
|
||||
|
||||
string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type)
|
||||
|
||||
@ -405,20 +276,6 @@ install(
|
||||
install(DIRECTORY ${examples}/build/ DESTINATION share/memgraph/examples)
|
||||
|
||||
|
||||
# memgraph distributed main executable
|
||||
add_executable(memgraph_distributed memgraph_distributed.cpp)
|
||||
target_link_libraries(memgraph_distributed mg-distributed kvstore_lib telemetry_lib)
|
||||
set_target_properties(memgraph_distributed PROPERTIES
|
||||
# Set the executable output name to include version information.
|
||||
OUTPUT_NAME "memgraph_distributed-${memgraph_VERSION}-${COMMIT_HASH}_${CMAKE_BUILD_TYPE}"
|
||||
# Output the executable in main binary dir.
|
||||
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
# Create symlink to the built executable.
|
||||
add_custom_command(TARGET memgraph_distributed POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink $<TARGET_FILE:memgraph_distributed> ${CMAKE_BINARY_DIR}/memgraph_distributed
|
||||
BYPRODUCTS ${CMAKE_BINARY_DIR}/memgraph_distributed
|
||||
COMMENT Creating symlink to memgraph distributed executable)
|
||||
|
||||
# memgraph single node high availability executable
|
||||
add_executable(memgraph_ha memgraph_ha.cpp)
|
||||
target_link_libraries(memgraph_ha mg-single-node-ha kvstore_lib telemetry_lib)
|
||||
|
@ -1,114 +0,0 @@
|
||||
#include <limits>
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "utils/flag_validation.hpp"
|
||||
#include "utils/string.hpp"
|
||||
|
||||
// Durability flags.
|
||||
DEFINE_bool(durability_enabled, false,
|
||||
"If durability (database persistence) should be enabled");
|
||||
DEFINE_string(
|
||||
durability_directory, "durability",
|
||||
"Path to directory in which to save snapshots and write-ahead log files.");
|
||||
DEFINE_bool(db_recover_on_startup, false, "Recover database on startup.");
|
||||
DEFINE_VALIDATED_int32(
|
||||
snapshot_cycle_sec, 3600,
|
||||
"Amount of time between two snapshots, in seconds (min 60).",
|
||||
FLAG_IN_RANGE(1, std::numeric_limits<int32_t>::max()));
|
||||
DEFINE_int32(snapshot_max_retained, -1,
|
||||
"Number of retained snapshots, -1 means without limit.");
|
||||
DEFINE_bool(snapshot_on_exit, false, "Snapshot on exiting the database.");
|
||||
|
||||
// Misc flags
|
||||
DEFINE_int32(query_execution_time_sec, 180,
|
||||
"Maximum allowed query execution time. Queries exceeding this "
|
||||
"limit will be aborted. Value of -1 means no limit.");
|
||||
DEFINE_int32(gc_cycle_sec, 30,
|
||||
"Amount of time between starts of two cleaning cycles in seconds. "
|
||||
"-1 to turn off.");
|
||||
// Data location.
|
||||
DEFINE_string(properties_on_disk, "",
|
||||
"Property names of properties which will be stored on available "
|
||||
"disk. Property names have to be separated with comma (,).");
|
||||
|
||||
// Full durability.
|
||||
DEFINE_bool(synchronous_commit, false,
|
||||
"Should a transaction end wait for WAL records to be written to "
|
||||
"disk before the transaction finishes.");
|
||||
|
||||
// Distributed master/worker flags.
|
||||
DEFINE_VALIDATED_HIDDEN_int32(worker_id, 0,
|
||||
"ID of a worker in a distributed system. Igored "
|
||||
"in single-node.",
|
||||
FLAG_IN_RANGE(0, 1 << gid::kWorkerIdSize));
|
||||
DEFINE_HIDDEN_string(master_host, "0.0.0.0",
|
||||
"For master node indicates the host served on. For worker "
|
||||
"node indicates the master location.");
|
||||
DEFINE_VALIDATED_HIDDEN_int32(
|
||||
master_port, 0,
|
||||
"For master node the port on which to serve. For "
|
||||
"worker node indicates the master's port.",
|
||||
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
|
||||
DEFINE_HIDDEN_string(worker_host, "0.0.0.0",
|
||||
"For worker node indicates the host served on. For master "
|
||||
"node this flag is not used.");
|
||||
DEFINE_VALIDATED_HIDDEN_int32(
|
||||
worker_port, 0,
|
||||
"For master node it's unused. For worker node "
|
||||
"indicates the port on which to serve. If zero (default value), a port is "
|
||||
"chosen at random. Sent to the master when registring worker node.",
|
||||
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
|
||||
DEFINE_VALIDATED_HIDDEN_int32(rpc_num_client_workers,
|
||||
std::max(std::thread::hardware_concurrency(), 1U),
|
||||
"Number of client workers (RPC)",
|
||||
FLAG_IN_RANGE(1, INT32_MAX));
|
||||
DEFINE_VALIDATED_HIDDEN_int32(rpc_num_server_workers,
|
||||
std::max(std::thread::hardware_concurrency(), 1U),
|
||||
"Number of server workers (RPC)",
|
||||
FLAG_IN_RANGE(1, INT32_MAX));
|
||||
DEFINE_VALIDATED_int32(recovering_cluster_size, 0,
|
||||
"Number of workers (including master) in the "
|
||||
"previously snapshooted/wal cluster.",
|
||||
FLAG_IN_RANGE(0, INT32_MAX));
|
||||
// TODO (buda): Implement openCypher query because it completely make sense
|
||||
// to being able to start and stop DGP on the fly.
|
||||
// The implementation should be straightforward.
|
||||
DEFINE_bool(dynamic_graph_partitioner_enabled, false,
|
||||
"If the dynamic graph partitioner should be enabled.");
|
||||
|
||||
DEFINE_VALIDATED_uint64(vertex_cache_size, 5000,
|
||||
"Size of cache used for storing remote vertices",
|
||||
FLAG_IN_RANGE(1, std::numeric_limits<uint64_t>::max()));
|
||||
|
||||
DEFINE_VALIDATED_uint64(edge_cache_size, 5000,
|
||||
"Size of cache used for storing remote edges",
|
||||
FLAG_IN_RANGE(1, std::numeric_limits<uint64_t>::max()));
|
||||
|
||||
database::Config::Config()
|
||||
// Durability flags.
|
||||
: durability_enabled{FLAGS_durability_enabled},
|
||||
durability_directory{FLAGS_durability_directory},
|
||||
db_recover_on_startup{FLAGS_db_recover_on_startup},
|
||||
snapshot_cycle_sec{FLAGS_snapshot_cycle_sec},
|
||||
snapshot_max_retained{FLAGS_snapshot_max_retained},
|
||||
snapshot_on_exit{FLAGS_snapshot_on_exit},
|
||||
synchronous_commit{FLAGS_synchronous_commit},
|
||||
// Misc flags.
|
||||
gc_cycle_sec{FLAGS_gc_cycle_sec},
|
||||
query_execution_time_sec{FLAGS_query_execution_time_sec},
|
||||
// Data location.
|
||||
properties_on_disk(utils::Split(FLAGS_properties_on_disk, ",")),
|
||||
// Distributed flags.
|
||||
dynamic_graph_partitioner_enabled{
|
||||
FLAGS_dynamic_graph_partitioner_enabled},
|
||||
rpc_num_client_workers{FLAGS_rpc_num_client_workers},
|
||||
rpc_num_server_workers{FLAGS_rpc_num_server_workers},
|
||||
worker_id{FLAGS_worker_id},
|
||||
master_endpoint{FLAGS_master_host,
|
||||
static_cast<uint16_t>(FLAGS_master_port)},
|
||||
worker_endpoint{FLAGS_worker_host,
|
||||
static_cast<uint16_t>(FLAGS_worker_port)},
|
||||
recovering_cluster_size{FLAGS_recovering_cluster_size},
|
||||
vertex_cache_size{FLAGS_vertex_cache_size},
|
||||
edge_cache_size{FLAGS_edge_cache_size} {}
|
@ -1,909 +0,0 @@
|
||||
#include "database/distributed/distributed_graph_db.hpp"
|
||||
|
||||
#include "distributed/bfs_rpc_clients.hpp"
|
||||
#include "distributed/bfs_rpc_server.hpp"
|
||||
#include "distributed/bfs_subcursor.hpp"
|
||||
#include "distributed/cluster_discovery_master.hpp"
|
||||
#include "distributed/cluster_discovery_worker.hpp"
|
||||
#include "distributed/coordination_master.hpp"
|
||||
#include "distributed/coordination_worker.hpp"
|
||||
#include "distributed/data_manager.hpp"
|
||||
#include "distributed/data_rpc_server.hpp"
|
||||
#include "distributed/durability_rpc_master.hpp"
|
||||
#include "distributed/durability_rpc_worker.hpp"
|
||||
#include "distributed/dynamic_worker.hpp"
|
||||
#include "distributed/index_rpc_messages.hpp"
|
||||
#include "distributed/index_rpc_server.hpp"
|
||||
#include "distributed/plan_consumer.hpp"
|
||||
#include "distributed/plan_dispatcher.hpp"
|
||||
#include "distributed/produce_rpc_server.hpp"
|
||||
#include "distributed/pull_rpc_clients.hpp"
|
||||
#include "distributed/token_sharing_rpc_server.hpp"
|
||||
#include "distributed/updates_rpc_clients.hpp"
|
||||
#include "distributed/updates_rpc_server.hpp"
|
||||
#include "durability/distributed/snapshooter.hpp"
|
||||
#include "storage/distributed/concurrent_id_mapper.hpp"
|
||||
#include "storage/distributed/concurrent_id_mapper_master.hpp"
|
||||
#include "storage/distributed/concurrent_id_mapper_worker.hpp"
|
||||
#include "storage/distributed/storage_gc_master.hpp"
|
||||
#include "storage/distributed/storage_gc_worker.hpp"
|
||||
#include "transactions/distributed/engine_master.hpp"
|
||||
#include "transactions/distributed/engine_worker.hpp"
|
||||
#include "utils/file.hpp"
|
||||
|
||||
using namespace std::literals::chrono_literals;
|
||||
|
||||
namespace database {
|
||||
|
||||
namespace {
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// GraphDbAccessor implementations
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
class MasterAccessor final : public GraphDbAccessor {
|
||||
distributed::Coordination *coordination_;
|
||||
distributed::PullRpcClients *pull_clients_;
|
||||
int worker_id_{0};
|
||||
|
||||
public:
|
||||
MasterAccessor(Master *db, distributed::Coordination *coordination,
|
||||
distributed::PullRpcClients *pull_clients_)
|
||||
: GraphDbAccessor(*db),
|
||||
coordination_(coordination),
|
||||
pull_clients_(pull_clients_),
|
||||
worker_id_(db->WorkerId()) {}
|
||||
|
||||
MasterAccessor(Master *db, tx::TransactionId tx_id,
|
||||
distributed::Coordination *coordination,
|
||||
distributed::PullRpcClients *pull_clients_)
|
||||
: GraphDbAccessor(*db, tx_id),
|
||||
coordination_(coordination),
|
||||
pull_clients_(pull_clients_),
|
||||
worker_id_(db->WorkerId()) {}
|
||||
|
||||
void PostCreateIndex(const LabelPropertyIndex::Key &key) override {
|
||||
std::optional<std::vector<utils::Future<bool>>> index_rpc_completions;
|
||||
|
||||
// Notify all workers to create the index
|
||||
index_rpc_completions.emplace(coordination_->ExecuteOnWorkers<bool>(
|
||||
worker_id_,
|
||||
[&key](int worker_id, communication::rpc::ClientPool &client_pool) {
|
||||
try {
|
||||
client_pool.Call<distributed::CreateIndexRpc>(key.label_,
|
||||
key.property_);
|
||||
return true;
|
||||
} catch (const communication::rpc::RpcFailedException &) {
|
||||
return false;
|
||||
}
|
||||
}));
|
||||
|
||||
if (index_rpc_completions) {
|
||||
// Wait first, check later - so that every thread finishes and none
|
||||
// terminates - this can probably be optimized in case we fail early so
|
||||
// that we notify other workers to stop building indexes
|
||||
for (auto &index_built : *index_rpc_completions) index_built.wait();
|
||||
for (auto &index_built : *index_rpc_completions) {
|
||||
// TODO: `get()` can throw an exception, should we delete the index when
|
||||
// it throws?
|
||||
if (!index_built.get()) {
|
||||
db().storage().label_property_index().DeleteIndex(key);
|
||||
throw IndexCreationOnWorkerException("Index exists on a worker");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PopulateIndexFromBuildIndex(
|
||||
const LabelPropertyIndex::Key &key) override {
|
||||
// Notify all workers to start populating an index if we are the master
|
||||
// since they don't have to wait anymore
|
||||
std::optional<std::vector<utils::Future<bool>>> index_rpc_completions;
|
||||
index_rpc_completions.emplace(coordination_->ExecuteOnWorkers<bool>(
|
||||
worker_id_, [this, &key](int worker_id,
|
||||
communication::rpc::ClientPool &client_pool) {
|
||||
try {
|
||||
client_pool.Call<distributed::PopulateIndexRpc>(
|
||||
key.label_, key.property_, transaction_id());
|
||||
return true;
|
||||
} catch (const communication::rpc::RpcFailedException &) {
|
||||
return false;
|
||||
}
|
||||
}));
|
||||
|
||||
// Populate our own storage
|
||||
GraphDbAccessor::PopulateIndexFromBuildIndex(key);
|
||||
|
||||
// Check if all workers successfully built their indexes and after this we
|
||||
// can set the index as built
|
||||
if (index_rpc_completions) {
|
||||
// Wait first, check later - so that every thread finishes and none
|
||||
// terminates - this can probably be optimized in case we fail early so
|
||||
// that we notify other workers to stop building indexes
|
||||
for (auto &index_built : *index_rpc_completions) index_built.wait();
|
||||
for (auto &index_built : *index_rpc_completions) {
|
||||
// TODO: `get()` can throw an exception, should we delete the index when
|
||||
// it throws?
|
||||
if (!index_built.get()) {
|
||||
db().storage().label_property_index().DeleteIndex(key);
|
||||
throw IndexCreationOnWorkerException("Index exists on a worker");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO (mferencevic): Move this logic into the transaction engine.
|
||||
void AdvanceCommand() override {
|
||||
GraphDbAccessor::AdvanceCommand();
|
||||
auto tx_id = transaction_id();
|
||||
auto futures = pull_clients_->NotifyAllTransactionCommandAdvanced(tx_id);
|
||||
for (auto &future : futures) future.get();
|
||||
}
|
||||
};
|
||||
|
||||
class WorkerAccessor final : public GraphDbAccessor {
|
||||
public:
|
||||
explicit WorkerAccessor(Worker *db)
|
||||
: GraphDbAccessor(*db) {}
|
||||
|
||||
WorkerAccessor(Worker *db, tx::TransactionId tx_id)
|
||||
: GraphDbAccessor(*db, tx_id) {}
|
||||
|
||||
void BuildIndex(storage::Label, storage::Property) override {
|
||||
// TODO: Rethink BuildIndex API or inheritance. It's rather strange that a
|
||||
// derived type blocks this functionality.
|
||||
LOG(FATAL) << "BuildIndex invoked on worker.";
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// RecoveryTransactions implementations
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
class DistributedRecoveryTransactions
|
||||
: public durability::RecoveryTransactions {
|
||||
public:
|
||||
explicit DistributedRecoveryTransactions(GraphDb *db) : db_(db) {}
|
||||
|
||||
void Commit(const tx::TransactionId &tx_id) final {
|
||||
GetAccessor(tx_id)->Commit();
|
||||
accessors_.erase(accessors_.find(tx_id));
|
||||
}
|
||||
|
||||
void Apply(const database::StateDelta &delta) final {
|
||||
delta.Apply(*GetAccessor(delta.transaction_id));
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual GraphDbAccessor *GetAccessor(const tx::TransactionId &tx_id) = 0;
|
||||
|
||||
GraphDb *db_;
|
||||
std::unordered_map<tx::TransactionId, std::unique_ptr<GraphDbAccessor>>
|
||||
accessors_;
|
||||
};
|
||||
|
||||
class MasterRecoveryTransactions final
|
||||
: public DistributedRecoveryTransactions {
|
||||
public:
|
||||
explicit MasterRecoveryTransactions(Master *db)
|
||||
: DistributedRecoveryTransactions(db) {}
|
||||
|
||||
void Begin(const tx::TransactionId &tx_id) final {
|
||||
CHECK(accessors_.find(tx_id) == accessors_.end())
|
||||
<< "Double transaction start";
|
||||
accessors_.emplace(tx_id, db_->Access());
|
||||
}
|
||||
|
||||
void Abort(const tx::TransactionId &tx_id) final {
|
||||
GetAccessor(tx_id)->Abort();
|
||||
accessors_.erase(accessors_.find(tx_id));
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual GraphDbAccessor *GetAccessor(
|
||||
const tx::TransactionId &tx_id) override {
|
||||
auto found = accessors_.find(tx_id);
|
||||
CHECK(found != accessors_.end())
|
||||
<< "Accessor does not exist for transaction: " << tx_id;
|
||||
return found->second.get();
|
||||
}
|
||||
};
|
||||
|
||||
class WorkerRecoveryTransactions final
|
||||
: public DistributedRecoveryTransactions {
|
||||
public:
|
||||
explicit WorkerRecoveryTransactions(Worker *db)
|
||||
: DistributedRecoveryTransactions(db) {}
|
||||
|
||||
void Begin(const tx::TransactionId &tx_id) override {
|
||||
LOG(FATAL) << "Unexpected transaction begin on worker recovery.";
|
||||
}
|
||||
|
||||
void Abort(const tx::TransactionId &tx_id) override {
|
||||
LOG(FATAL) << "Unexpected transaction abort on worker recovery.";
|
||||
}
|
||||
|
||||
protected:
|
||||
GraphDbAccessor *GetAccessor(const tx::TransactionId &tx_id) override {
|
||||
auto found = accessors_.find(tx_id);
|
||||
// Currently accessors are created on transaction_begin, but since workers
|
||||
// don't have a transaction begin, the accessors are not created.
|
||||
if (found == accessors_.end()) {
|
||||
std::tie(found, std::ignore) = accessors_.emplace(tx_id, db_->Access());
|
||||
}
|
||||
return found->second.get();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// GraphDb implementations
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace impl {
|
||||
|
||||
template <template <typename TId> class TMapper>
|
||||
struct TypemapPack {
|
||||
template <typename... TMapperArgs>
|
||||
explicit TypemapPack(TMapperArgs ... args)
|
||||
: label(args...), edge_type(args...), property(args...) {}
|
||||
// TODO this should also be garbage collected
|
||||
TMapper<storage::Label> label;
|
||||
TMapper<storage::EdgeType> edge_type;
|
||||
TMapper<storage::Property> property;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Master
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
class Master {
|
||||
public:
|
||||
explicit Master(const Config &config, database::Master *self)
|
||||
: config_(config), self_(self) {}
|
||||
|
||||
Config config_;
|
||||
std::unique_ptr<Storage> storage_ =
|
||||
std::make_unique<Storage>(config_.worker_id, config_.properties_on_disk);
|
||||
durability::WriteAheadLog wal_{
|
||||
config_.worker_id, config_.durability_directory,
|
||||
config_.durability_enabled, config_.synchronous_commit};
|
||||
|
||||
// TODO: Some things may depend on order of construction/destruction. We also
|
||||
// have a lot of circular pointers among members. It would be a good idea to
|
||||
// clean the mess. Also, be careful of virtual calls to `self_` in
|
||||
// constructors of members.
|
||||
database::Master *self_{nullptr};
|
||||
distributed::MasterCoordination coordination_{config_.master_endpoint,
|
||||
config_.rpc_num_server_workers,
|
||||
config_.rpc_num_client_workers};
|
||||
tx::EngineMaster tx_engine_{&coordination_, &wal_};
|
||||
std::unique_ptr<StorageGcMaster> storage_gc_ =
|
||||
std::make_unique<StorageGcMaster>(storage_.get(), &tx_engine_,
|
||||
config_.gc_cycle_sec, &coordination_);
|
||||
TypemapPack<storage::MasterConcurrentIdMapper> typemap_pack_{&coordination_};
|
||||
distributed::BfsSubcursorStorage subcursor_storage_{&bfs_subcursor_clients_};
|
||||
distributed::BfsRpcServer bfs_subcursor_server_{self_, &coordination_,
|
||||
&subcursor_storage_};
|
||||
distributed::BfsRpcClients bfs_subcursor_clients_{
|
||||
self_, &subcursor_storage_, &coordination_, &data_manager_};
|
||||
distributed::DurabilityRpcMaster durability_rpc_{&coordination_};
|
||||
distributed::DataRpcServer data_server_{self_, &coordination_};
|
||||
distributed::DataRpcClients data_clients_{&coordination_};
|
||||
distributed::PlanDispatcher plan_dispatcher_{&coordination_};
|
||||
distributed::PullRpcClients pull_clients_{&coordination_, &data_manager_};
|
||||
distributed::UpdatesRpcServer updates_server_{self_, &coordination_};
|
||||
distributed::UpdatesRpcClients updates_clients_{&coordination_};
|
||||
distributed::DataManager data_manager_{*self_, data_clients_,
|
||||
config_.vertex_cache_size,
|
||||
config_.edge_cache_size};
|
||||
distributed::ClusterDiscoveryMaster cluster_discovery_{
|
||||
&coordination_, config_.durability_directory};
|
||||
distributed::TokenSharingRpcServer token_sharing_server_{
|
||||
self_, config_.worker_id, &coordination_};
|
||||
distributed::DynamicWorkerAddition dynamic_worker_addition_{self_, &coordination_};
|
||||
};
|
||||
|
||||
} // namespace impl
|
||||
|
||||
Master::Master(Config config)
|
||||
: impl_(std::make_unique<impl::Master>(config, this)) {
|
||||
// Register all transaction based caches for cleanup.
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(
|
||||
impl_->updates_server_);
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(impl_->data_manager_);
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(
|
||||
impl_->subcursor_storage_);
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(
|
||||
impl_->bfs_subcursor_server_);
|
||||
}
|
||||
|
||||
Master::~Master() {}
|
||||
|
||||
std::unique_ptr<GraphDbAccessor> Master::Access() {
|
||||
return std::make_unique<MasterAccessor>(
|
||||
this, &impl_->coordination_, &impl_->pull_clients_);
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphDbAccessor> Master::Access(tx::TransactionId tx_id) {
|
||||
return std::make_unique<MasterAccessor>(
|
||||
this, tx_id, &impl_->coordination_, &impl_->pull_clients_);
|
||||
}
|
||||
|
||||
Storage &Master::storage() { return *impl_->storage_; }
|
||||
|
||||
durability::WriteAheadLog &Master::wal() { return impl_->wal_; }
|
||||
|
||||
tx::Engine &Master::tx_engine() { return impl_->tx_engine_; }
|
||||
|
||||
storage::ConcurrentIdMapper<storage::Label> &Master::label_mapper() {
|
||||
return impl_->typemap_pack_.label;
|
||||
}
|
||||
|
||||
storage::ConcurrentIdMapper<storage::EdgeType> &Master::edge_type_mapper() {
|
||||
return impl_->typemap_pack_.edge_type;
|
||||
}
|
||||
|
||||
storage::ConcurrentIdMapper<storage::Property> &Master::property_mapper() {
|
||||
return impl_->typemap_pack_.property;
|
||||
}
|
||||
|
||||
void Master::CollectGarbage() { impl_->storage_gc_->CollectGarbage(); }
|
||||
|
||||
int Master::WorkerId() const { return impl_->config_.worker_id; }
|
||||
|
||||
std::vector<int> Master::GetWorkerIds() const {
|
||||
return impl_->coordination_.GetWorkerIds();
|
||||
}
|
||||
|
||||
// Makes a local snapshot and forces the workers to do the same. Snapshot is
|
||||
// written here only if workers sucesfully created their own snapshot
|
||||
bool Master::MakeSnapshot(GraphDbAccessor &accessor) {
|
||||
auto workers_snapshot =
|
||||
impl_->durability_rpc_.MakeSnapshot(accessor.transaction_id());
|
||||
if (!workers_snapshot.get()) return false;
|
||||
// This can be further optimized by creating master snapshot at the same
|
||||
// time as workers snapshots but this forces us to delete the master
|
||||
// snapshot if we succeed in creating it and workers somehow fail. Because
|
||||
// we have an assumption that every snapshot that exists on master with
|
||||
// some tx_id visibility also exists on workers
|
||||
const bool status =
|
||||
durability::MakeSnapshot(*this, accessor, impl_->config_.worker_id,
|
||||
impl_->config_.durability_directory,
|
||||
impl_->config_.snapshot_max_retained);
|
||||
if (status) {
|
||||
LOG(INFO) << "Snapshot created successfully.";
|
||||
} else {
|
||||
LOG(ERROR) << "Snapshot creation failed!";
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void Master::ReinitializeStorage() {
|
||||
impl_->storage_gc_->Stop();
|
||||
impl_->storage_ = std::make_unique<Storage>(
|
||||
impl_->config_.worker_id, impl_->config_.properties_on_disk);
|
||||
impl_->storage_gc_->Reinitialize(impl_->storage_.get(), &impl_->tx_engine_);
|
||||
}
|
||||
|
||||
io::network::Endpoint Master::endpoint() const {
|
||||
return impl_->coordination_.GetServerEndpoint();
|
||||
}
|
||||
|
||||
io::network::Endpoint Master::GetEndpoint(int worker_id) {
|
||||
return impl_->coordination_.GetEndpoint(worker_id);
|
||||
}
|
||||
|
||||
void Master::Start() {
|
||||
// Start coordination.
|
||||
CHECK(impl_->coordination_.Start()) << "Couldn't start master coordination!";
|
||||
|
||||
// Start transactional cache cleanup.
|
||||
impl_->tx_engine_.StartTransactionalCacheCleanup();
|
||||
|
||||
if (impl_->config_.durability_enabled)
|
||||
utils::EnsureDirOrDie(impl_->config_.durability_directory);
|
||||
|
||||
// Durability recovery.
|
||||
{
|
||||
// What we recover.
|
||||
std::optional<durability::RecoveryInfo> recovery_info;
|
||||
|
||||
durability::RecoveryData recovery_data;
|
||||
// Recover only if necessary.
|
||||
if (impl_->config_.db_recover_on_startup) {
|
||||
CHECK(durability::VersionConsistency(impl_->config_.durability_directory))
|
||||
<< "Contents of durability directory are not compatible with the "
|
||||
"current version of Memgraph binary!";
|
||||
recovery_info = durability::RecoverOnlySnapshot(
|
||||
impl_->config_.durability_directory, this, &recovery_data,
|
||||
std::nullopt, impl_->config_.worker_id);
|
||||
}
|
||||
|
||||
// Post-recovery setup and checking.
|
||||
impl_->coordination_.SetRecoveredSnapshot(
|
||||
recovery_info ? std::make_optional(
|
||||
std::make_pair(recovery_info->durability_version,
|
||||
recovery_info->snapshot_tx_id))
|
||||
: std::nullopt);
|
||||
|
||||
// Wait till workers report back their recoverable wal txs
|
||||
if (recovery_info) {
|
||||
CHECK(impl_->config_.recovering_cluster_size > 0)
|
||||
<< "Invalid cluster recovery size flag. Recovered cluster size "
|
||||
"should be at least 1";
|
||||
while (impl_->coordination_.CountRecoveredWorkers() !=
|
||||
impl_->config_.recovering_cluster_size - 1) {
|
||||
LOG(INFO) << "Waiting for workers to finish recovering..";
|
||||
std::this_thread::sleep_for(2s);
|
||||
}
|
||||
|
||||
// Get the intersection of recoverable transactions from wal on
|
||||
// workers and on master
|
||||
recovery_data.wal_tx_to_recover =
|
||||
impl_->coordination_.CommonWalTransactions(*recovery_info);
|
||||
MasterRecoveryTransactions recovery_transactions(this);
|
||||
durability::RecoverWal(impl_->config_.durability_directory, this,
|
||||
&recovery_data, &recovery_transactions);
|
||||
durability::RecoverIndexes(this, recovery_data.indexes);
|
||||
auto workers_recovered_wal =
|
||||
impl_->durability_rpc_.RecoverWalAndIndexes(&recovery_data);
|
||||
workers_recovered_wal.get();
|
||||
}
|
||||
|
||||
impl_->dynamic_worker_addition_.Enable();
|
||||
}
|
||||
|
||||
// Start the dynamic graph partitioner inside token sharing server
|
||||
if (impl_->config_.dynamic_graph_partitioner_enabled) {
|
||||
impl_->token_sharing_server_.Start();
|
||||
}
|
||||
|
||||
if (impl_->config_.durability_enabled) {
|
||||
// move any existing snapshots or wal files to a deprecated folder.
|
||||
if (!impl_->config_.db_recover_on_startup &&
|
||||
durability::ContainsDurabilityFiles(
|
||||
impl_->config_.durability_directory)) {
|
||||
durability::MoveToBackup(impl_->config_.durability_directory);
|
||||
LOG(WARNING) << "Since Memgraph was not supposed to recover on startup "
|
||||
"and durability is enabled, your current durability "
|
||||
"files will likely be overriden. To prevent important "
|
||||
"data loss, Memgraph has stored those files into a "
|
||||
".backup directory inside durability directory";
|
||||
}
|
||||
impl_->wal_.Init();
|
||||
snapshot_creator_ = std::make_unique<utils::Scheduler>();
|
||||
snapshot_creator_->Run(
|
||||
"Snapshot", std::chrono::seconds(impl_->config_.snapshot_cycle_sec),
|
||||
[this] {
|
||||
auto dba = this->Access();
|
||||
MakeSnapshot(*dba);
|
||||
});
|
||||
}
|
||||
|
||||
// Start transaction killer.
|
||||
if (impl_->config_.query_execution_time_sec != -1) {
|
||||
transaction_killer_.Run(
|
||||
"TX killer",
|
||||
std::chrono::seconds(std::max(
|
||||
1, std::min(5, impl_->config_.query_execution_time_sec / 4))),
|
||||
[this]() {
|
||||
impl_->tx_engine_.LocalForEachActiveTransaction(
|
||||
[this](tx::Transaction &t) {
|
||||
if (t.creation_time() +
|
||||
std::chrono::seconds(
|
||||
impl_->config_.query_execution_time_sec) <
|
||||
std::chrono::steady_clock::now()) {
|
||||
t.set_should_abort();
|
||||
};
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
bool Master::AwaitShutdown(std::function<void(void)> call_before_shutdown) {
|
||||
bool ret =
|
||||
impl_->coordination_.AwaitShutdown(
|
||||
[this, &call_before_shutdown](bool is_cluster_alive) -> bool {
|
||||
snapshot_creator_ = nullptr;
|
||||
|
||||
// Stop all running transactions. This will allow all shutdowns in
|
||||
// the callback that depend on query execution to be aborted and
|
||||
// cleaned up.
|
||||
// TODO (mferencevic): When we have full cluster management
|
||||
// (detection of failure and automatic failure recovery) this should
|
||||
// this be done directly through the transaction engine (eg. using
|
||||
// cluster degraded/operational hooks and callbacks).
|
||||
is_accepting_transactions_ = false;
|
||||
impl_->tx_engine_.LocalForEachActiveTransaction(
|
||||
[](auto &t) { t.set_should_abort(); });
|
||||
|
||||
// Call the toplevel callback to stop everything that the caller
|
||||
// wants us to stop.
|
||||
call_before_shutdown();
|
||||
|
||||
// Now we stop everything that calls RPCs (garbage collection, etc.)
|
||||
|
||||
// Stop the storage garbage collector.
|
||||
impl_->storage_gc_->Stop();
|
||||
|
||||
// Transactional cache cleanup must be stopped before all of the
|
||||
// objects that were registered for cleanup are destructed.
|
||||
impl_->tx_engine_.StopTransactionalCacheCleanup();
|
||||
|
||||
// We are not a worker, so we can do a snapshot on exit if it's
|
||||
// enabled. Doing this on the master forces workers to do the same
|
||||
// through RPCs. If the cluster is in a degraded state then don't
|
||||
// attempt to do a snapshot because the snapshot can't be created on
|
||||
// all workers. The cluster will have to recover from a previous
|
||||
// snapshot and WALs.
|
||||
if (impl_->config_.snapshot_on_exit) {
|
||||
if (is_cluster_alive) {
|
||||
auto dba = Access();
|
||||
// Here we make the snapshot and return the snapshot creation
|
||||
// success to the caller.
|
||||
return MakeSnapshot(*dba);
|
||||
} else {
|
||||
LOG(WARNING)
|
||||
<< "Because the cluster is in a degraded state we can't "
|
||||
"create a snapshot. The cluster will be recovered from "
|
||||
"previous snapshots and WALs.";
|
||||
}
|
||||
}
|
||||
|
||||
// The shutdown was completed successfully.
|
||||
return true;
|
||||
});
|
||||
|
||||
// Return the shutdown success status.
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Master::Shutdown() { return impl_->coordination_.Shutdown(); }
|
||||
|
||||
distributed::BfsRpcClients &Master::bfs_subcursor_clients() {
|
||||
return impl_->bfs_subcursor_clients_;
|
||||
}
|
||||
|
||||
distributed::DataRpcClients &Master::data_clients() {
|
||||
return impl_->data_clients_;
|
||||
}
|
||||
|
||||
distributed::UpdatesRpcServer &Master::updates_server() {
|
||||
return impl_->updates_server_;
|
||||
}
|
||||
|
||||
distributed::UpdatesRpcClients &Master::updates_clients() {
|
||||
return impl_->updates_clients_;
|
||||
}
|
||||
|
||||
distributed::DataManager &Master::data_manager() {
|
||||
return impl_->data_manager_;
|
||||
}
|
||||
|
||||
distributed::PullRpcClients &Master::pull_clients() {
|
||||
return impl_->pull_clients_;
|
||||
}
|
||||
|
||||
distributed::PlanDispatcher &Master::plan_dispatcher() {
|
||||
return impl_->plan_dispatcher_;
|
||||
}
|
||||
|
||||
VertexAccessor InsertVertexIntoRemote(
|
||||
GraphDbAccessor *dba, int worker_id,
|
||||
const std::vector<storage::Label> &labels,
|
||||
const std::unordered_map<storage::Property, PropertyValue> &properties,
|
||||
std::optional<int64_t> cypher_id) {
|
||||
auto *db = &dba->db();
|
||||
CHECK(db);
|
||||
CHECK(worker_id != db->WorkerId())
|
||||
<< "Not allowed to call InsertVertexIntoRemote for local worker";
|
||||
auto *updates_clients = &db->updates_clients();
|
||||
auto *data_manager = &db->data_manager();
|
||||
CHECK(updates_clients && data_manager);
|
||||
auto created_vertex_info = updates_clients->CreateVertex(
|
||||
worker_id, dba->transaction_id(), labels, properties, cypher_id);
|
||||
auto vertex = std::make_unique<Vertex>();
|
||||
vertex->labels_ = labels;
|
||||
for (auto &kv : properties) vertex->properties_.set(kv.first, kv.second);
|
||||
data_manager->Emplace<Vertex>(
|
||||
dba->transaction_id(), created_vertex_info.gid,
|
||||
distributed::CachedRecordData<Vertex>(created_vertex_info.cypher_id,
|
||||
nullptr, std::move(vertex)));
|
||||
return VertexAccessor({created_vertex_info.gid, worker_id}, *dba);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Worker
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
namespace impl {
|
||||
|
||||
class Worker {
|
||||
public:
|
||||
Config config_;
|
||||
std::unique_ptr<Storage> storage_ =
|
||||
std::make_unique<Storage>(config_.worker_id, config_.properties_on_disk);
|
||||
durability::WriteAheadLog wal_{
|
||||
config_.worker_id, config_.durability_directory,
|
||||
config_.durability_enabled, config_.synchronous_commit};
|
||||
|
||||
Worker(const Config &config, database::Worker *self)
|
||||
: config_(config), self_(self) {}
|
||||
|
||||
// TODO: Some things may depend on order of construction/destruction. We also
|
||||
// have a lot of circular pointers among members. It would be a good idea to
|
||||
// clean the mess. Also, be careful of virtual calls to `self_` in
|
||||
// constructors of members.
|
||||
database::Worker *self_{nullptr};
|
||||
distributed::WorkerCoordination coordination_{
|
||||
config_.worker_endpoint, config_.worker_id, config_.master_endpoint,
|
||||
config_.rpc_num_server_workers, config_.rpc_num_client_workers};
|
||||
tx::EngineWorker tx_engine_{&coordination_, &wal_};
|
||||
std::unique_ptr<StorageGcWorker> storage_gc_ =
|
||||
std::make_unique<StorageGcWorker>(
|
||||
storage_.get(), &tx_engine_, config_.gc_cycle_sec,
|
||||
coordination_.GetClientPool(0), config_.worker_id);
|
||||
TypemapPack<storage::WorkerConcurrentIdMapper> typemap_pack_{
|
||||
coordination_.GetClientPool(0)};
|
||||
distributed::BfsSubcursorStorage subcursor_storage_{&bfs_subcursor_clients_};
|
||||
distributed::BfsRpcServer bfs_subcursor_server_{self_, &coordination_,
|
||||
&subcursor_storage_};
|
||||
distributed::BfsRpcClients bfs_subcursor_clients_{
|
||||
self_, &subcursor_storage_, &coordination_, &data_manager_};
|
||||
distributed::DataRpcServer data_server_{self_, &coordination_};
|
||||
distributed::DataRpcClients data_clients_{&coordination_};
|
||||
distributed::PlanConsumer plan_consumer_{&coordination_};
|
||||
distributed::ProduceRpcServer produce_server_{self_, &tx_engine_, &coordination_,
|
||||
plan_consumer_, &data_manager_};
|
||||
distributed::IndexRpcServer index_rpc_server_{self_, &coordination_};
|
||||
distributed::UpdatesRpcServer updates_server_{self_, &coordination_};
|
||||
distributed::UpdatesRpcClients updates_clients_{&coordination_};
|
||||
distributed::DataManager data_manager_{*self_, data_clients_,
|
||||
config_.vertex_cache_size,
|
||||
config_.edge_cache_size};
|
||||
distributed::DurabilityRpcWorker durability_rpc_{self_, &coordination_};
|
||||
distributed::ClusterDiscoveryWorker cluster_discovery_{
|
||||
&coordination_};
|
||||
distributed::TokenSharingRpcServer token_sharing_server_{
|
||||
self_, config_.worker_id, &coordination_};
|
||||
distributed::DynamicWorkerRegistration dynamic_worker_registration_{
|
||||
coordination_.GetClientPool(0)};
|
||||
};
|
||||
|
||||
} // namespace impl
|
||||
|
||||
Worker::Worker(Config config)
|
||||
: impl_(std::make_unique<impl::Worker>(config, this)) {
|
||||
// Register all transaction based caches for cleanup.
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(
|
||||
impl_->updates_server_);
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(impl_->data_manager_);
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(
|
||||
impl_->produce_server_);
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(
|
||||
impl_->subcursor_storage_);
|
||||
impl_->tx_engine_.RegisterForTransactionalCacheCleanup(
|
||||
impl_->bfs_subcursor_server_);
|
||||
}
|
||||
|
||||
Worker::~Worker() {}
|
||||
|
||||
std::unique_ptr<GraphDbAccessor> Worker::Access() {
|
||||
return std::make_unique<WorkerAccessor>(this);
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphDbAccessor> Worker::Access(tx::TransactionId tx_id) {
|
||||
return std::make_unique<WorkerAccessor>(this, tx_id);
|
||||
}
|
||||
|
||||
Storage &Worker::storage() { return *impl_->storage_; }
|
||||
|
||||
durability::WriteAheadLog &Worker::wal() { return impl_->wal_; }
|
||||
|
||||
tx::Engine &Worker::tx_engine() { return impl_->tx_engine_; }
|
||||
|
||||
storage::ConcurrentIdMapper<storage::Label> &Worker::label_mapper() {
|
||||
return impl_->typemap_pack_.label;
|
||||
}
|
||||
|
||||
storage::ConcurrentIdMapper<storage::EdgeType> &Worker::edge_type_mapper() {
|
||||
return impl_->typemap_pack_.edge_type;
|
||||
}
|
||||
|
||||
storage::ConcurrentIdMapper<storage::Property> &Worker::property_mapper() {
|
||||
return impl_->typemap_pack_.property;
|
||||
}
|
||||
|
||||
void Worker::CollectGarbage() { return impl_->storage_gc_->CollectGarbage(); }
|
||||
|
||||
int Worker::WorkerId() const { return impl_->config_.worker_id; }
|
||||
|
||||
std::vector<int> Worker::GetWorkerIds() const {
|
||||
return impl_->coordination_.GetWorkerIds();
|
||||
}
|
||||
|
||||
bool Worker::MakeSnapshot(GraphDbAccessor &accessor) {
|
||||
// Makes a local snapshot from the visibility of accessor
|
||||
const bool status =
|
||||
durability::MakeSnapshot(*this, accessor, impl_->config_.worker_id,
|
||||
impl_->config_.durability_directory,
|
||||
impl_->config_.snapshot_max_retained);
|
||||
if (status) {
|
||||
LOG(INFO) << "Snapshot created successfully.";
|
||||
} else {
|
||||
LOG(ERROR) << "Snapshot creation failed!";
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void Worker::ReinitializeStorage() {
|
||||
impl_->storage_gc_->Stop();
|
||||
impl_->storage_ = std::make_unique<Storage>(
|
||||
impl_->config_.worker_id, impl_->config_.properties_on_disk);
|
||||
impl_->storage_gc_->Reinitialize(impl_->storage_.get(), &impl_->tx_engine_);
|
||||
}
|
||||
|
||||
void Worker::RecoverWalAndIndexes(durability::RecoveryData *recovery_data) {
|
||||
WorkerRecoveryTransactions recovery_transactions(this);
|
||||
durability::RecoverWal(impl_->config_.durability_directory, this,
|
||||
recovery_data, &recovery_transactions);
|
||||
durability::RecoverIndexes(this, recovery_data->indexes);
|
||||
}
|
||||
|
||||
io::network::Endpoint Worker::endpoint() const {
|
||||
return impl_->coordination_.GetServerEndpoint();
|
||||
}
|
||||
|
||||
io::network::Endpoint Worker::GetEndpoint(int worker_id) {
|
||||
return impl_->coordination_.GetEndpoint(worker_id);
|
||||
}
|
||||
|
||||
void Worker::Start() {
|
||||
// Start coordination.
|
||||
CHECK(impl_->coordination_.Start()) << "Couldn't start worker coordination!";
|
||||
|
||||
// Register to the master.
|
||||
impl_->cluster_discovery_.RegisterWorker(impl_->config_.worker_id,
|
||||
impl_->config_.durability_directory);
|
||||
|
||||
// Start transactional cache cleanup.
|
||||
impl_->tx_engine_.StartTransactionalCacheCleanup();
|
||||
|
||||
if (impl_->config_.durability_enabled)
|
||||
utils::EnsureDirOrDie(impl_->config_.durability_directory);
|
||||
|
||||
// Durability recovery. We need to check this flag for workers that are added
|
||||
// after the "main" cluster recovery.
|
||||
if (impl_->config_.db_recover_on_startup) {
|
||||
// What we should recover (version, transaction_id) pair.
|
||||
auto snapshot_to_recover = impl_->cluster_discovery_.snapshot_to_recover();
|
||||
|
||||
// What we recover.
|
||||
std::optional<durability::RecoveryInfo> recovery_info;
|
||||
|
||||
durability::RecoveryData recovery_data;
|
||||
// Recover only if necessary.
|
||||
if (snapshot_to_recover) {
|
||||
// check version consistency.
|
||||
if (!durability::DistributedVersionConsistency(
|
||||
snapshot_to_recover->first))
|
||||
LOG(FATAL) << "Memgraph worker failed to recover due to version "
|
||||
"inconsistency with the master.";
|
||||
if (!durability::VersionConsistency(impl_->config_.durability_directory))
|
||||
LOG(FATAL)
|
||||
<< "Contents of durability directory are not compatible with the "
|
||||
"current version of Memgraph binary!";
|
||||
recovery_info = durability::RecoverOnlySnapshot(
|
||||
impl_->config_.durability_directory, this, &recovery_data,
|
||||
snapshot_to_recover->second, impl_->config_.worker_id);
|
||||
}
|
||||
|
||||
// Post-recovery setup and checking.
|
||||
if (snapshot_to_recover &&
|
||||
(!recovery_info ||
|
||||
snapshot_to_recover->second != recovery_info->snapshot_tx_id))
|
||||
LOG(FATAL) << "Memgraph worker failed to recover the database state "
|
||||
"recovered on the master";
|
||||
impl_->cluster_discovery_.NotifyWorkerRecovered(recovery_info);
|
||||
} else {
|
||||
// Check with master if we're a dynamically added worker and need to update
|
||||
// our indices.
|
||||
auto indexes = impl_->dynamic_worker_registration_.GetIndicesToCreate();
|
||||
if (!indexes.empty()) {
|
||||
durability::RecoverIndexes(this, indexes);
|
||||
}
|
||||
}
|
||||
|
||||
if (impl_->config_.durability_enabled) {
|
||||
// move any existing snapshots or wal files to a deprecated folder.
|
||||
if (!impl_->config_.db_recover_on_startup &&
|
||||
durability::ContainsDurabilityFiles(
|
||||
impl_->config_.durability_directory)) {
|
||||
durability::MoveToBackup(impl_->config_.durability_directory);
|
||||
LOG(WARNING) << "Since Memgraph was not supposed to recover on startup "
|
||||
"and durability is enabled, your current durability "
|
||||
"files will likely be overriden. To prevent important "
|
||||
"data loss, Memgraph has stored those files into a "
|
||||
".backup directory inside durability directory";
|
||||
}
|
||||
impl_->wal_.Init();
|
||||
}
|
||||
|
||||
// Start transaction killer.
|
||||
if (impl_->config_.query_execution_time_sec != -1) {
|
||||
transaction_killer_.Run(
|
||||
"TX killer",
|
||||
std::chrono::seconds(std::max(
|
||||
1, std::min(5, impl_->config_.query_execution_time_sec / 4))),
|
||||
[this]() {
|
||||
impl_->tx_engine_.LocalForEachActiveTransaction(
|
||||
[this](tx::Transaction &t) {
|
||||
if (t.creation_time() +
|
||||
std::chrono::seconds(
|
||||
impl_->config_.query_execution_time_sec) <
|
||||
std::chrono::steady_clock::now()) {
|
||||
t.set_should_abort();
|
||||
};
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
bool Worker::AwaitShutdown(std::function<void(void)> call_before_shutdown) {
|
||||
bool ret = impl_->coordination_.AwaitShutdown(
|
||||
[this, &call_before_shutdown](bool is_cluster_alive) -> bool {
|
||||
// Stop all running transactions. This will allow all shutdowns in the
|
||||
// callback that depend on query execution to be aborted and cleaned up.
|
||||
// TODO (mferencevic): See the note for this same code for the `Master`.
|
||||
is_accepting_transactions_ = false;
|
||||
impl_->tx_engine_.LocalForEachActiveTransaction(
|
||||
[](auto &t) { t.set_should_abort(); });
|
||||
|
||||
// Call the toplevel callback to stop everything that the caller wants
|
||||
// us to stop.
|
||||
call_before_shutdown();
|
||||
|
||||
// Now we stop everything that calls RPCs (garbage collection, etc.)
|
||||
|
||||
// Stop the storage garbage collector.
|
||||
impl_->storage_gc_->Stop();
|
||||
|
||||
// Transactional cache cleanup must be stopped before all of the objects
|
||||
// that were registered for cleanup are destructed.
|
||||
impl_->tx_engine_.StopTransactionalCacheCleanup();
|
||||
|
||||
// The worker shutdown always succeeds.
|
||||
return true;
|
||||
});
|
||||
|
||||
// Return the shutdown success status.
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Worker::Shutdown() { return impl_->coordination_.Shutdown(); }
|
||||
|
||||
distributed::BfsRpcClients &Worker::bfs_subcursor_clients() {
|
||||
return impl_->bfs_subcursor_clients_;
|
||||
}
|
||||
|
||||
distributed::DataRpcClients &Worker::data_clients() {
|
||||
return impl_->data_clients_;
|
||||
}
|
||||
|
||||
distributed::UpdatesRpcServer &Worker::updates_server() {
|
||||
return impl_->updates_server_;
|
||||
}
|
||||
|
||||
distributed::UpdatesRpcClients &Worker::updates_clients() {
|
||||
return impl_->updates_clients_;
|
||||
}
|
||||
|
||||
distributed::DataManager &Worker::data_manager() {
|
||||
return impl_->data_manager_;
|
||||
}
|
||||
|
||||
distributed::PlanConsumer &Worker::plan_consumer() {
|
||||
return impl_->plan_consumer_;
|
||||
}
|
||||
|
||||
} // namespace database
|
@ -1,109 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "durability/distributed/version.hpp"
|
||||
|
||||
namespace database {
|
||||
class Master final : public GraphDb {
|
||||
public:
|
||||
explicit Master(Config config = Config());
|
||||
~Master();
|
||||
|
||||
std::unique_ptr<GraphDbAccessor> Access() override;
|
||||
std::unique_ptr<GraphDbAccessor> Access(tx::TransactionId) override;
|
||||
|
||||
Storage &storage() override;
|
||||
durability::WriteAheadLog &wal() override;
|
||||
tx::Engine &tx_engine() override;
|
||||
storage::ConcurrentIdMapper<storage::Label> &label_mapper() override;
|
||||
storage::ConcurrentIdMapper<storage::EdgeType> &edge_type_mapper() override;
|
||||
storage::ConcurrentIdMapper<storage::Property> &property_mapper() override;
|
||||
void CollectGarbage() override;
|
||||
int WorkerId() const override;
|
||||
std::vector<int> GetWorkerIds() const override;
|
||||
bool MakeSnapshot(GraphDbAccessor &accessor) override;
|
||||
void ReinitializeStorage() override;
|
||||
|
||||
/** Gets this master's endpoint. */
|
||||
io::network::Endpoint endpoint() const;
|
||||
/** Gets the endpoint of the worker with the given id. */
|
||||
// TODO make const once Coordination::GetEndpoint is const.
|
||||
io::network::Endpoint GetEndpoint(int worker_id);
|
||||
|
||||
void Start();
|
||||
bool AwaitShutdown(std::function<void(void)> call_before_shutdown = [] {});
|
||||
void Shutdown();
|
||||
|
||||
distributed::BfsRpcClients &bfs_subcursor_clients() override;
|
||||
distributed::DataRpcClients &data_clients() override;
|
||||
distributed::UpdatesRpcServer &updates_server() override;
|
||||
distributed::UpdatesRpcClients &updates_clients() override;
|
||||
distributed::DataManager &data_manager() override;
|
||||
|
||||
distributed::PullRpcClients &pull_clients();
|
||||
distributed::PlanDispatcher &plan_dispatcher();
|
||||
distributed::IndexRpcClients &index_rpc_clients();
|
||||
|
||||
private:
|
||||
std::unique_ptr<impl::Master> impl_;
|
||||
|
||||
utils::Scheduler transaction_killer_;
|
||||
std::unique_ptr<utils::Scheduler> snapshot_creator_;
|
||||
};
|
||||
|
||||
class Worker final : public GraphDb {
|
||||
public:
|
||||
explicit Worker(Config config = Config());
|
||||
~Worker();
|
||||
|
||||
std::unique_ptr<GraphDbAccessor> Access() override;
|
||||
std::unique_ptr<GraphDbAccessor> Access(tx::TransactionId) override;
|
||||
|
||||
Storage &storage() override;
|
||||
durability::WriteAheadLog &wal() override;
|
||||
tx::Engine &tx_engine() override;
|
||||
storage::ConcurrentIdMapper<storage::Label> &label_mapper() override;
|
||||
storage::ConcurrentIdMapper<storage::EdgeType> &edge_type_mapper() override;
|
||||
storage::ConcurrentIdMapper<storage::Property> &property_mapper() override;
|
||||
void CollectGarbage() override;
|
||||
int WorkerId() const override;
|
||||
std::vector<int> GetWorkerIds() const override;
|
||||
bool MakeSnapshot(GraphDbAccessor &accessor) override;
|
||||
void ReinitializeStorage() override;
|
||||
void RecoverWalAndIndexes(durability::RecoveryData *recovery_data);
|
||||
|
||||
/** Gets this worker's endpoint. */
|
||||
io::network::Endpoint endpoint() const;
|
||||
/** Gets the endpoint of the worker with the given id. */
|
||||
// TODO make const once Coordination::GetEndpoint is const.
|
||||
io::network::Endpoint GetEndpoint(int worker_id);
|
||||
|
||||
void Start();
|
||||
bool AwaitShutdown(std::function<void(void)> call_before_shutdown = [] {});
|
||||
void Shutdown();
|
||||
|
||||
distributed::BfsRpcClients &bfs_subcursor_clients() override;
|
||||
distributed::DataRpcClients &data_clients() override;
|
||||
distributed::UpdatesRpcServer &updates_server() override;
|
||||
distributed::UpdatesRpcClients &updates_clients() override;
|
||||
distributed::DataManager &data_manager() override;
|
||||
|
||||
distributed::PlanConsumer &plan_consumer();
|
||||
|
||||
private:
|
||||
std::unique_ptr<impl::Worker> impl_;
|
||||
|
||||
utils::Scheduler transaction_killer_;
|
||||
};
|
||||
|
||||
/// Creates a new Vertex on the given worker.
|
||||
/// It is NOT allowed to call this function with this worker's id.
|
||||
VertexAccessor InsertVertexIntoRemote(
|
||||
GraphDbAccessor *dba, int worker_id,
|
||||
const std::vector<storage::Label> &labels,
|
||||
const std::unordered_map<storage::Property, PropertyValue> &properties,
|
||||
std::optional<int64_t> cypher_id);
|
||||
|
||||
} // namespace database
|
@ -1,146 +0,0 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "durability/distributed/recovery.hpp"
|
||||
#include "durability/distributed/wal.hpp"
|
||||
#include "io/network/endpoint.hpp"
|
||||
#include "storage/common/types/types.hpp"
|
||||
#include "storage/distributed/concurrent_id_mapper.hpp"
|
||||
#include "storage/distributed/storage.hpp"
|
||||
#include "storage/distributed/storage_gc.hpp"
|
||||
#include "storage/distributed/vertex_accessor.hpp"
|
||||
#include "transactions/distributed/engine.hpp"
|
||||
#include "utils/scheduler.hpp"
|
||||
|
||||
namespace distributed {
|
||||
class BfsRpcServer;
|
||||
class BfsRpcClients;
|
||||
class DataRpcServer;
|
||||
class DataRpcClients;
|
||||
class PlanDispatcher;
|
||||
class PlanConsumer;
|
||||
class PullRpcClients;
|
||||
class ProduceRpcServer;
|
||||
class UpdatesRpcServer;
|
||||
class UpdatesRpcClients;
|
||||
class DataManager;
|
||||
class IndexRpcClients;
|
||||
} // namespace distributed
|
||||
|
||||
namespace database {
|
||||
namespace impl {
|
||||
class Master;
|
||||
class Worker;
|
||||
} // namespace impl
|
||||
|
||||
/// Database configuration. Initialized from flags, but modifiable.
|
||||
struct Config {
|
||||
Config();
|
||||
|
||||
// Durability flags.
|
||||
bool durability_enabled;
|
||||
std::string durability_directory;
|
||||
bool db_recover_on_startup;
|
||||
int snapshot_cycle_sec;
|
||||
int snapshot_max_retained;
|
||||
int snapshot_on_exit;
|
||||
bool synchronous_commit;
|
||||
|
||||
// Misc flags.
|
||||
int gc_cycle_sec;
|
||||
int query_execution_time_sec;
|
||||
|
||||
// set of properties which will be stored on disk
|
||||
std::vector<std::string> properties_on_disk;
|
||||
|
||||
// Distributed master/worker flags.
|
||||
bool dynamic_graph_partitioner_enabled{false};
|
||||
int rpc_num_client_workers{0};
|
||||
int rpc_num_server_workers{0};
|
||||
int worker_id{0};
|
||||
io::network::Endpoint master_endpoint{"0.0.0.0", 0};
|
||||
io::network::Endpoint worker_endpoint{"0.0.0.0", 0};
|
||||
int recovering_cluster_size{0};
|
||||
|
||||
// Sizes of caches that hold remote data
|
||||
// Default value is same as in config.cpp
|
||||
size_t vertex_cache_size{5000};
|
||||
size_t edge_cache_size{5000};
|
||||
};
|
||||
|
||||
class GraphDbAccessor;
|
||||
|
||||
/// An abstract base class providing the interface for a graph database.
|
||||
///
|
||||
/// Always be sure that GraphDb object is destructed before main exits, i. e.
|
||||
/// GraphDb object shouldn't be part of global/static variable, except if its
|
||||
/// destructor is explicitly called before main exits. Consider code:
|
||||
///
|
||||
/// GraphDb db; // KeyIndex is created as a part of database::Storage
|
||||
/// int main() {
|
||||
/// GraphDbAccessor dba(db);
|
||||
/// auto v = dba.InsertVertex();
|
||||
/// v.add_label(dba.Label(
|
||||
/// "Start")); // New SkipList is created in KeyIndex for LabelIndex.
|
||||
/// // That SkipList creates SkipListGc which
|
||||
/// // initialises static Executor object.
|
||||
/// return 0;
|
||||
/// }
|
||||
///
|
||||
/// After main exits: 1. Executor is destructed, 2. KeyIndex is destructed.
|
||||
/// Destructor of KeyIndex calls delete on created SkipLists which destroy
|
||||
/// SkipListGc that tries to use Excutioner object that doesn't exist anymore.
|
||||
/// -> CRASH
|
||||
class GraphDb {
|
||||
public:
|
||||
GraphDb() {}
|
||||
GraphDb(const GraphDb &) = delete;
|
||||
GraphDb(GraphDb &&) = delete;
|
||||
GraphDb &operator=(const GraphDb &) = delete;
|
||||
GraphDb &operator=(GraphDb &&) = delete;
|
||||
|
||||
virtual ~GraphDb() {}
|
||||
|
||||
/// Create a new accessor by starting a new transaction.
|
||||
virtual std::unique_ptr<GraphDbAccessor> Access() = 0;
|
||||
/// Create an accessor for a running transaction.
|
||||
virtual std::unique_ptr<GraphDbAccessor> Access(tx::TransactionId) = 0;
|
||||
|
||||
virtual Storage &storage() = 0;
|
||||
virtual durability::WriteAheadLog &wal() = 0;
|
||||
virtual tx::Engine &tx_engine() = 0;
|
||||
virtual storage::ConcurrentIdMapper<storage::Label> &label_mapper() = 0;
|
||||
virtual storage::ConcurrentIdMapper<storage::EdgeType>
|
||||
&edge_type_mapper() = 0;
|
||||
virtual storage::ConcurrentIdMapper<storage::Property> &property_mapper() = 0;
|
||||
virtual void CollectGarbage() = 0;
|
||||
|
||||
/// Makes a snapshot from the visibility of the given accessor
|
||||
virtual bool MakeSnapshot(GraphDbAccessor &accessor) = 0;
|
||||
|
||||
/// Releases the storage object safely and creates a new object.
|
||||
/// This is needed because of recovery, otherwise we might try to recover into
|
||||
/// a storage which has already been polluted because of a failed previous
|
||||
/// recovery
|
||||
virtual void ReinitializeStorage() = 0;
|
||||
|
||||
virtual int WorkerId() const = 0;
|
||||
virtual std::vector<int> GetWorkerIds() const = 0;
|
||||
|
||||
virtual distributed::BfsRpcClients &bfs_subcursor_clients() = 0;
|
||||
virtual distributed::DataRpcClients &data_clients() = 0;
|
||||
virtual distributed::UpdatesRpcServer &updates_server() = 0;
|
||||
virtual distributed::UpdatesRpcClients &updates_clients() = 0;
|
||||
virtual distributed::DataManager &data_manager() = 0;
|
||||
|
||||
/// When this is false, no new transactions should be created.
|
||||
bool is_accepting_transactions() const { return is_accepting_transactions_; }
|
||||
|
||||
protected:
|
||||
std::atomic<bool> is_accepting_transactions_{true};
|
||||
};
|
||||
} // namespace database
|
@ -1,560 +0,0 @@
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include "distributed/data_manager.hpp"
|
||||
#include "distributed/updates_rpc_clients.hpp"
|
||||
#include "durability/distributed/state_delta.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "storage/distributed/edge.hpp"
|
||||
#include "storage/distributed/edge_accessor.hpp"
|
||||
#include "storage/distributed/vertex.hpp"
|
||||
#include "storage/distributed/vertex_accessor.hpp"
|
||||
#include "utils/cast.hpp"
|
||||
#include "utils/on_scope_exit.hpp"
|
||||
|
||||
namespace database {
|
||||
|
||||
GraphDbAccessor::GraphDbAccessor(GraphDb &db)
|
||||
: db_(db),
|
||||
transaction_(*db.tx_engine().Begin()),
|
||||
transaction_starter_{true} {}
|
||||
|
||||
GraphDbAccessor::GraphDbAccessor(GraphDb &db, tx::TransactionId tx_id)
|
||||
: db_(db),
|
||||
transaction_(*db.tx_engine().RunningTransaction(tx_id)),
|
||||
transaction_starter_{false} {}
|
||||
|
||||
GraphDbAccessor::~GraphDbAccessor() {
|
||||
if (transaction_starter_ && !commited_ && !aborted_) {
|
||||
this->Abort();
|
||||
}
|
||||
}
|
||||
|
||||
int16_t GraphDbAccessor::worker_id() const {
|
||||
return db_.WorkerId();
|
||||
}
|
||||
|
||||
distributed::DataManager &GraphDbAccessor::data_manager() {
|
||||
return db_.data_manager();
|
||||
}
|
||||
|
||||
distributed::UpdatesRpcClients &GraphDbAccessor::updates_clients() {
|
||||
return db_.updates_clients();
|
||||
}
|
||||
|
||||
tx::TransactionId GraphDbAccessor::transaction_id() const {
|
||||
return transaction_.id_;
|
||||
}
|
||||
|
||||
void GraphDbAccessor::AdvanceCommand() {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
db_.tx_engine().Advance(transaction_.id_);
|
||||
}
|
||||
|
||||
void GraphDbAccessor::Commit() {
|
||||
DCHECK(!commited_ && !aborted_) << "Already aborted or commited transaction.";
|
||||
db_.tx_engine().Commit(transaction_);
|
||||
commited_ = true;
|
||||
}
|
||||
|
||||
void GraphDbAccessor::Abort() {
|
||||
DCHECK(!commited_ && !aborted_) << "Already aborted or commited transaction.";
|
||||
db_.tx_engine().Abort(transaction_);
|
||||
aborted_ = true;
|
||||
}
|
||||
|
||||
bool GraphDbAccessor::should_abort() const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return transaction_.should_abort();
|
||||
}
|
||||
|
||||
durability::WriteAheadLog &GraphDbAccessor::wal() { return db_.wal(); }
|
||||
|
||||
VertexAccessor GraphDbAccessor::InsertVertex(
|
||||
std::optional<gid::Gid> requested_gid, std::optional<int64_t> cypher_id) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
|
||||
auto gid = db_.storage().vertex_generator_.Next(requested_gid);
|
||||
if (!cypher_id) cypher_id = utils::MemcpyCast<int64_t>(gid);
|
||||
auto vertex_vlist =
|
||||
new mvcc::VersionList<Vertex>(transaction_, gid, *cypher_id);
|
||||
|
||||
bool success =
|
||||
db_.storage().vertices_.access().insert(gid, vertex_vlist).second;
|
||||
CHECK(success) << "Attempting to insert a vertex with an existing GID: "
|
||||
<< gid;
|
||||
wal().Emplace(database::StateDelta::CreateVertex(
|
||||
transaction_.id_, vertex_vlist->gid_, vertex_vlist->cypher_id()));
|
||||
auto va = VertexAccessor(storage::VertexAddress(vertex_vlist), *this);
|
||||
return va;
|
||||
}
|
||||
|
||||
std::optional<VertexAccessor> GraphDbAccessor::FindVertexOptional(
|
||||
gid::Gid gid, bool current_state) {
|
||||
auto record_accessor = FindVertexRaw(gid);
|
||||
if (!record_accessor.Visible(transaction(), current_state))
|
||||
return std::nullopt;
|
||||
return record_accessor;
|
||||
}
|
||||
|
||||
VertexAccessor GraphDbAccessor::FindVertexRaw(gid::Gid gid) {
|
||||
return VertexAccessor(
|
||||
storage::VertexAddress(db_.storage().LocalAddress<Vertex>(gid)), *this);
|
||||
}
|
||||
|
||||
VertexAccessor GraphDbAccessor::FindVertex(gid::Gid gid, bool current_state) {
|
||||
auto found = FindVertexOptional(gid, current_state);
|
||||
CHECK(found) << "Unable to find vertex for id: " << gid;
|
||||
return *found;
|
||||
}
|
||||
|
||||
std::optional<EdgeAccessor> GraphDbAccessor::FindEdgeOptional(
|
||||
gid::Gid gid, bool current_state) {
|
||||
auto record_accessor = FindEdgeRaw(gid);
|
||||
if (!record_accessor.Visible(transaction(), current_state))
|
||||
return std::nullopt;
|
||||
return record_accessor;
|
||||
}
|
||||
|
||||
EdgeAccessor GraphDbAccessor::FindEdgeRaw(gid::Gid gid) {
|
||||
return EdgeAccessor(
|
||||
storage::EdgeAddress(db_.storage().LocalAddress<Edge>(gid)), *this);
|
||||
}
|
||||
|
||||
EdgeAccessor GraphDbAccessor::FindEdge(gid::Gid gid, bool current_state) {
|
||||
auto found = FindEdgeOptional(gid, current_state);
|
||||
CHECK(found) << "Unable to find edge for id: " << gid;
|
||||
return *found;
|
||||
}
|
||||
|
||||
void GraphDbAccessor::BuildIndex(storage::Label label,
|
||||
storage::Property property) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
db_.storage().index_build_tx_in_progress_.access().insert(transaction_.id_);
|
||||
|
||||
// on function exit remove the create index transaction from
|
||||
// build_tx_in_progress
|
||||
utils::OnScopeExit on_exit_1([this] {
|
||||
auto removed = db_.storage().index_build_tx_in_progress_.access().remove(
|
||||
transaction_.id_);
|
||||
DCHECK(removed) << "Index creation transaction should be inside set";
|
||||
});
|
||||
|
||||
// Create the index
|
||||
const LabelPropertyIndex::Key key(label, property);
|
||||
if (db_.storage().label_property_index_.CreateIndex(key) == false) {
|
||||
throw IndexExistsException(
|
||||
"Index is either being created by another transaction or already "
|
||||
"exists.");
|
||||
}
|
||||
// Call the hook for inherited classes.
|
||||
PostCreateIndex(key);
|
||||
|
||||
// Everything that happens after the line above ended will be added to the
|
||||
// index automatically, but we still have to add to index everything that
|
||||
// happened earlier. We have to first wait for every transaction that
|
||||
// happend before, or a bit later than CreateIndex to end.
|
||||
{
|
||||
auto wait_transactions = transaction_.engine_.GlobalActiveTransactions();
|
||||
auto active_index_creation_transactions =
|
||||
db_.storage().index_build_tx_in_progress_.access();
|
||||
for (auto id : wait_transactions) {
|
||||
if (active_index_creation_transactions.contains(id)) continue;
|
||||
while (transaction_.engine_.Info(id).is_active()) {
|
||||
// Active index creation set could only now start containing that id,
|
||||
// since that thread could have not written to the set set and to avoid
|
||||
// dead-lock we need to make sure we keep track of that
|
||||
if (active_index_creation_transactions.contains(id)) continue;
|
||||
// TODO reconsider this constant, currently rule-of-thumb chosen
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(100));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This accessor's transaction surely sees everything that happened before
|
||||
// CreateIndex.
|
||||
auto dba = db_.Access();
|
||||
|
||||
// Add transaction to the build_tx_in_progress as this transaction doesn't
|
||||
// change data and shouldn't block other parallel index creations
|
||||
auto read_transaction_id = dba->transaction().id_;
|
||||
db_.storage().index_build_tx_in_progress_.access().insert(
|
||||
read_transaction_id);
|
||||
// on function exit remove the read transaction from build_tx_in_progress
|
||||
utils::OnScopeExit on_exit_2([read_transaction_id, this] {
|
||||
auto removed = db_.storage().index_build_tx_in_progress_.access().remove(
|
||||
read_transaction_id);
|
||||
DCHECK(removed) << "Index building (read) transaction should be inside set";
|
||||
});
|
||||
|
||||
dba->PopulateIndexFromBuildIndex(key);
|
||||
|
||||
dba->EnableIndex(key);
|
||||
dba->Commit();
|
||||
}
|
||||
|
||||
void GraphDbAccessor::DeleteIndex(storage::Label, storage::Property) {
|
||||
throw utils::NotYetImplemented("Distributed drop index");
|
||||
}
|
||||
|
||||
void GraphDbAccessor::EnableIndex(const LabelPropertyIndex::Key &key) {
|
||||
// Commit transaction as we finished applying method on newest visible
|
||||
// records. Write that transaction's ID to the WAL as the index has been
|
||||
// built at this point even if this DBA's transaction aborts for some
|
||||
// reason.
|
||||
auto wal_build_index_tx_id = transaction_id();
|
||||
wal().Emplace(database::StateDelta::BuildIndex(
|
||||
wal_build_index_tx_id, key.label_, LabelName(key.label_), key.property_,
|
||||
PropertyName(key.property_)));
|
||||
|
||||
// After these two operations we are certain that everything is contained in
|
||||
// the index under the assumption that the original index creation transaction
|
||||
// contained no vertex/edge insert/update before this method was invoked.
|
||||
db_.storage().label_property_index_.IndexFinishedBuilding(key);
|
||||
}
|
||||
|
||||
void GraphDbAccessor::PopulateIndex(const LabelPropertyIndex::Key &key) {
|
||||
for (auto vertex : Vertices(key.label_, false)) {
|
||||
if (vertex.PropsAt(key.property_).type() == PropertyValue::Type::Null)
|
||||
continue;
|
||||
db_.storage().label_property_index_.UpdateOnLabelProperty(
|
||||
vertex.address().local(), vertex.GetCurrent());
|
||||
}
|
||||
}
|
||||
|
||||
void GraphDbAccessor::UpdateLabelIndices(storage::Label label,
|
||||
const VertexAccessor &vertex_accessor,
|
||||
const Vertex *const vertex) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
DCHECK(vertex_accessor.is_local()) << "Only local vertices belong in indexes";
|
||||
auto *vlist_ptr = vertex_accessor.address().local();
|
||||
db_.storage().labels_index_.Update(label, vlist_ptr, vertex);
|
||||
db_.storage().label_property_index_.UpdateOnLabel(label, vlist_ptr, vertex);
|
||||
}
|
||||
|
||||
void GraphDbAccessor::UpdatePropertyIndex(
|
||||
storage::Property property, const RecordAccessor<Vertex> &vertex_accessor,
|
||||
const Vertex *const vertex) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
DCHECK(vertex_accessor.is_local()) << "Only local vertices belong in indexes";
|
||||
db_.storage().label_property_index_.UpdateOnProperty(
|
||||
property, vertex_accessor.address().local(), vertex);
|
||||
}
|
||||
|
||||
int64_t GraphDbAccessor::VerticesCount() const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.storage().vertices_.access().size();
|
||||
}
|
||||
|
||||
int64_t GraphDbAccessor::VerticesCount(storage::Label label) const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.storage().labels_index_.Count(label);
|
||||
}
|
||||
|
||||
int64_t GraphDbAccessor::VerticesCount(storage::Label label,
|
||||
storage::Property property) const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
const LabelPropertyIndex::Key key(label, property);
|
||||
DCHECK(db_.storage().label_property_index_.IndexExists(key))
|
||||
<< "Index doesn't exist.";
|
||||
return db_.storage().label_property_index_.Count(key);
|
||||
}
|
||||
|
||||
int64_t GraphDbAccessor::VerticesCount(storage::Label label,
|
||||
storage::Property property,
|
||||
const PropertyValue &value) const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
const LabelPropertyIndex::Key key(label, property);
|
||||
DCHECK(db_.storage().label_property_index_.IndexExists(key))
|
||||
<< "Index doesn't exist.";
|
||||
return db_.storage()
|
||||
.label_property_index_.PositionAndCount(key, value)
|
||||
.second;
|
||||
}
|
||||
|
||||
int64_t GraphDbAccessor::VerticesCount(
|
||||
storage::Label label, storage::Property property,
|
||||
const std::optional<utils::Bound<PropertyValue>> lower,
|
||||
const std::optional<utils::Bound<PropertyValue>> upper) const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
const LabelPropertyIndex::Key key(label, property);
|
||||
DCHECK(db_.storage().label_property_index_.IndexExists(key))
|
||||
<< "Index doesn't exist.";
|
||||
CHECK(lower || upper) << "At least one bound must be provided";
|
||||
CHECK(!lower || lower.value().value().type() != PropertyValue::Type::Null)
|
||||
<< "Null value is not a valid index bound";
|
||||
CHECK(!upper || upper.value().value().type() != PropertyValue::Type::Null)
|
||||
<< "Null value is not a valid index bound";
|
||||
|
||||
if (!upper) {
|
||||
auto lower_pac = db_.storage().label_property_index_.PositionAndCount(
|
||||
key, lower.value().value());
|
||||
int64_t size = db_.storage().label_property_index_.Count(key);
|
||||
return std::max(0l,
|
||||
size - lower_pac.first -
|
||||
(lower.value().IsInclusive() ? 0l : lower_pac.second));
|
||||
|
||||
} else if (!lower) {
|
||||
auto upper_pac = db_.storage().label_property_index_.PositionAndCount(
|
||||
key, upper.value().value());
|
||||
return upper.value().IsInclusive() ? upper_pac.first + upper_pac.second
|
||||
: upper_pac.first;
|
||||
|
||||
} else {
|
||||
auto lower_pac = db_.storage().label_property_index_.PositionAndCount(
|
||||
key, lower.value().value());
|
||||
auto upper_pac = db_.storage().label_property_index_.PositionAndCount(
|
||||
key, upper.value().value());
|
||||
auto result = upper_pac.first - lower_pac.first;
|
||||
if (lower.value().IsExclusive()) result -= lower_pac.second;
|
||||
if (upper.value().IsInclusive()) result += upper_pac.second;
|
||||
return std::max(0l, result);
|
||||
}
|
||||
}
|
||||
|
||||
bool GraphDbAccessor::RemoveVertex(VertexAccessor &vertex_accessor,
|
||||
bool check_empty) {
|
||||
if (vertex_accessor.is_local()) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
vertex_accessor.SwitchNew();
|
||||
// it's possible the vertex was removed already in this transaction
|
||||
// due to it getting matched multiple times by some patterns
|
||||
// we can only delete it once, so check if it's already deleted
|
||||
if (vertex_accessor.GetCurrent()->is_expired_by(transaction_)) return true;
|
||||
if (check_empty &&
|
||||
vertex_accessor.out_degree() + vertex_accessor.in_degree() > 0)
|
||||
return false;
|
||||
|
||||
auto *vlist_ptr = vertex_accessor.address().local();
|
||||
wal().Emplace(database::StateDelta::RemoveVertex(
|
||||
transaction_.id_, vlist_ptr->gid_, check_empty));
|
||||
vlist_ptr->remove(vertex_accessor.GetCurrent(), transaction_);
|
||||
return true;
|
||||
|
||||
} else {
|
||||
auto address = vertex_accessor.address();
|
||||
updates_clients().RemoveVertex(worker_id(), address.worker_id(),
|
||||
transaction_id(), address.gid(),
|
||||
check_empty);
|
||||
// We can't know if we are going to be able to remove vertex until
|
||||
// deferred updates on a remote worker are executed
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void GraphDbAccessor::DetachRemoveVertex(VertexAccessor &vertex_accessor) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
|
||||
vertex_accessor.SwitchNew();
|
||||
|
||||
// Note that when we call RemoveEdge we must take care not to delete from the
|
||||
// collection we are iterating over. This invalidates the iterator in a subtle
|
||||
// way that does not fail in tests, but is NOT correct.
|
||||
for (auto edge_accessor : vertex_accessor.in())
|
||||
RemoveEdge(edge_accessor, true, false);
|
||||
vertex_accessor.SwitchNew();
|
||||
for (auto edge_accessor : vertex_accessor.out())
|
||||
RemoveEdge(edge_accessor, false, true);
|
||||
|
||||
RemoveVertex(vertex_accessor, false);
|
||||
}
|
||||
|
||||
EdgeAccessor GraphDbAccessor::InsertEdge(VertexAccessor &from,
|
||||
VertexAccessor &to,
|
||||
storage::EdgeType edge_type,
|
||||
std::optional<gid::Gid> requested_gid,
|
||||
std::optional<int64_t> cypher_id) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
|
||||
auto edge_address =
|
||||
InsertEdgeOnFrom(&from, &to, edge_type, requested_gid, cypher_id);
|
||||
|
||||
InsertEdgeOnTo(&from, &to, edge_type, edge_address);
|
||||
return EdgeAccessor(edge_address, *this, from.address(), to.address(),
|
||||
edge_type);
|
||||
}
|
||||
|
||||
storage::EdgeAddress GraphDbAccessor::InsertEdgeOnFrom(
|
||||
VertexAccessor *from, VertexAccessor *to,
|
||||
const storage::EdgeType &edge_type,
|
||||
const std::optional<gid::Gid> &requested_gid,
|
||||
const std::optional<int64_t> &cypher_id) {
|
||||
if (from->is_local()) {
|
||||
auto edge_accessor = InsertOnlyEdge(from->address(), to->address(),
|
||||
edge_type, requested_gid, cypher_id);
|
||||
auto edge_address = edge_accessor.address();
|
||||
|
||||
from->SwitchNew();
|
||||
from->update();
|
||||
auto from_updated = from->GetNew();
|
||||
|
||||
// TODO when preparing WAL for distributed, most likely never use
|
||||
// `CREATE_EDGE`, but always have it split into 3 parts (edge insertion,
|
||||
// in/out modification).
|
||||
wal().Emplace(database::StateDelta::CreateEdge(
|
||||
transaction_.id_, edge_accessor.gid(), edge_accessor.CypherId(),
|
||||
from->gid(), to->gid(), edge_type, EdgeTypeName(edge_type)));
|
||||
|
||||
from_updated->out_.emplace(
|
||||
db_.storage().LocalizedAddressIfPossible(to->address()), edge_address,
|
||||
edge_type);
|
||||
return edge_address;
|
||||
} else {
|
||||
auto created_edge_info = updates_clients().CreateEdge(
|
||||
worker_id(), transaction_id(), *from, *to, edge_type, cypher_id);
|
||||
auto edge_address = created_edge_info.edge_address;
|
||||
|
||||
auto guard = storage::GetDataLock(*from);
|
||||
from->update();
|
||||
from->GetNew()->out_.emplace(
|
||||
db().storage().LocalizedAddressIfPossible(to->address()), edge_address,
|
||||
edge_type);
|
||||
data_manager().Emplace<Edge>(
|
||||
transaction_id(), edge_address.gid(),
|
||||
distributed::CachedRecordData<Edge>(
|
||||
created_edge_info.cypher_id, nullptr,
|
||||
std::make_unique<Edge>(from->address(), to->address(), edge_type)));
|
||||
return edge_address;
|
||||
}
|
||||
}
|
||||
|
||||
void GraphDbAccessor::InsertEdgeOnTo(VertexAccessor *from, VertexAccessor *to,
|
||||
const storage::EdgeType &edge_type,
|
||||
const storage::EdgeAddress &edge_address) {
|
||||
if (to->is_local()) {
|
||||
// Ensure that the "to" accessor has the latest version (switch new).
|
||||
// WARNING: Must do that after the above "from->update()" for cases when
|
||||
// we are creating a cycle and "from" and "to" are the same vlist.
|
||||
to->SwitchNew();
|
||||
to->update();
|
||||
auto *to_updated = to->GetNew();
|
||||
to_updated->in_.emplace(
|
||||
db_.storage().LocalizedAddressIfPossible(from->address()), edge_address,
|
||||
edge_type);
|
||||
} else {
|
||||
// The RPC call for the `to` side is already handled if `from` is not
|
||||
// local.
|
||||
if (from->is_local() ||
|
||||
from->address().worker_id() != to->address().worker_id()) {
|
||||
updates_clients().AddInEdge(
|
||||
worker_id(), transaction_id(), *from,
|
||||
db().storage().GlobalizedAddress(edge_address), *to, edge_type);
|
||||
}
|
||||
|
||||
auto guard = storage::GetDataLock(*to);
|
||||
to->update();
|
||||
to->GetNew()->in_.emplace(
|
||||
db().storage().LocalizedAddressIfPossible(from->address()),
|
||||
edge_address, edge_type);
|
||||
}
|
||||
}
|
||||
|
||||
EdgeAccessor GraphDbAccessor::InsertOnlyEdge(
|
||||
storage::VertexAddress from, storage::VertexAddress to,
|
||||
storage::EdgeType edge_type, std::optional<gid::Gid> requested_gid,
|
||||
std::optional<int64_t> cypher_id) {
|
||||
CHECK(from.is_local())
|
||||
<< "`from` address should be local when calling InsertOnlyEdge";
|
||||
auto gid = db_.storage().edge_generator_.Next(requested_gid);
|
||||
if (!cypher_id) cypher_id = utils::MemcpyCast<int64_t>(gid);
|
||||
auto edge_vlist = new mvcc::VersionList<Edge>(transaction_, gid, *cypher_id,
|
||||
from, to, edge_type);
|
||||
// We need to insert edge_vlist to edges_ before calling update since update
|
||||
// can throw and edge_vlist will not be garbage collected if it is not in
|
||||
// edges_ skiplist.
|
||||
bool success = db_.storage().edges_.access().insert(gid, edge_vlist).second;
|
||||
CHECK(success) << "Attempting to insert an edge with an existing GID: "
|
||||
<< gid;
|
||||
auto ea = EdgeAccessor(storage::EdgeAddress(edge_vlist), *this, from, to,
|
||||
edge_type);
|
||||
return ea;
|
||||
}
|
||||
|
||||
int64_t GraphDbAccessor::EdgesCount() const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.storage().edges_.access().size();
|
||||
}
|
||||
|
||||
void GraphDbAccessor::RemoveEdge(EdgeAccessor &edge, bool remove_out_edge,
|
||||
bool remove_in_edge) {
|
||||
if (edge.is_local()) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
// it's possible the edge was removed already in this transaction
|
||||
// due to it getting matched multiple times by some patterns
|
||||
// we can only delete it once, so check if it's already deleted
|
||||
edge.SwitchNew();
|
||||
if (edge.GetCurrent()->is_expired_by(transaction_)) return;
|
||||
if (remove_out_edge) edge.from().RemoveOutEdge(edge.address());
|
||||
if (remove_in_edge) edge.to().RemoveInEdge(edge.address());
|
||||
|
||||
edge.address().local()->remove(edge.GetCurrent(), transaction_);
|
||||
wal().Emplace(
|
||||
database::StateDelta::RemoveEdge(transaction_.id_, edge.gid()));
|
||||
} else {
|
||||
auto edge_addr = edge.GlobalAddress();
|
||||
auto from_addr = db().storage().GlobalizedAddress(edge.from_addr());
|
||||
CHECK(edge_addr.worker_id() == from_addr.worker_id())
|
||||
<< "Edge and it's 'from' vertex not on the same worker";
|
||||
auto to_addr = db().storage().GlobalizedAddress(edge.to_addr());
|
||||
updates_clients().RemoveEdge(worker_id(), edge_addr.worker_id(),
|
||||
transaction_id(), edge_addr.gid(),
|
||||
from_addr.gid(), to_addr);
|
||||
// Another RPC is necessary only if the first did not handle vertices on
|
||||
// both sides.
|
||||
if (edge_addr.worker_id() != to_addr.worker_id()) {
|
||||
updates_clients().RemoveInEdge(worker_id(), to_addr.worker_id(),
|
||||
transaction_id(), to_addr.gid(),
|
||||
edge_addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
storage::Label GraphDbAccessor::Label(const std::string &label_name) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.label_mapper().value_to_id(label_name);
|
||||
}
|
||||
|
||||
const std::string &GraphDbAccessor::LabelName(storage::Label label) const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.label_mapper().id_to_value(label);
|
||||
}
|
||||
|
||||
storage::EdgeType GraphDbAccessor::EdgeType(const std::string &edge_type_name) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.edge_type_mapper().value_to_id(edge_type_name);
|
||||
}
|
||||
|
||||
const std::string &GraphDbAccessor::EdgeTypeName(
|
||||
storage::EdgeType edge_type) const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.edge_type_mapper().id_to_value(edge_type);
|
||||
}
|
||||
|
||||
storage::Property GraphDbAccessor::Property(const std::string &property_name) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.property_mapper().value_to_id(property_name);
|
||||
}
|
||||
|
||||
const std::string &GraphDbAccessor::PropertyName(
|
||||
storage::Property property) const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.property_mapper().id_to_value(property);
|
||||
}
|
||||
|
||||
std::vector<std::string> GraphDbAccessor::IndexInfo() const {
|
||||
std::vector<std::string> info;
|
||||
for (storage::Label label : db_.storage().labels_index_.Keys()) {
|
||||
info.emplace_back(":" + LabelName(label));
|
||||
}
|
||||
for (LabelPropertyIndex::Key key :
|
||||
db_.storage().label_property_index_.Keys()) {
|
||||
info.emplace_back(fmt::format(":{}({})", LabelName(key.label_),
|
||||
PropertyName(key.property_)));
|
||||
}
|
||||
return info;
|
||||
}
|
||||
} // namespace database
|
@ -1,686 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <cppitertools/filter.hpp>
|
||||
#include <cppitertools/imap.hpp>
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "storage/common/types/types.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "storage/distributed/edge_accessor.hpp"
|
||||
#include "storage/distributed/vertex_accessor.hpp"
|
||||
#include "transactions/transaction.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
#include "utils/bound.hpp"
|
||||
#include "utils/exceptions.hpp"
|
||||
|
||||
namespace distributed {
|
||||
class DataManager;
|
||||
class UpdatesRpcClients;
|
||||
|
||||
} // namespace distributed
|
||||
|
||||
namespace database {
|
||||
|
||||
/** Thrown when inserting in an index with constraint. */
|
||||
class ConstraintViolationException : public utils::BasicException {
|
||||
using utils::BasicException::BasicException;
|
||||
};
|
||||
|
||||
/** Thrown when creating an index which already exists. */
|
||||
class IndexExistsException : public utils::BasicException {
|
||||
using utils::BasicException::BasicException;
|
||||
};
|
||||
|
||||
/** Thrown when creating an index which already exists. */
|
||||
class IndexCreationOnWorkerException : public utils::BasicException {
|
||||
using utils::BasicException::BasicException;
|
||||
};
|
||||
|
||||
/// Thrown on concurrent index creation when the transaction engine fails to
|
||||
/// start a new transaction.
|
||||
class TransactionException : public utils::BasicException {
|
||||
using utils::BasicException::BasicException;
|
||||
};
|
||||
|
||||
/**
|
||||
* Base accessor for the database object: exposes functions for operating on the
|
||||
* database. All the functions in this class should be self-sufficient: for
|
||||
* example the function for creating a new Vertex should take care of all the
|
||||
* book-keeping around the creation.
|
||||
*/
|
||||
class GraphDbAccessor {
|
||||
// We need to make friends with this guys since they need to access private
|
||||
// methods for updating indices.
|
||||
// TODO: Rethink this, we have too much long-distance friendship complicating
|
||||
// the code.
|
||||
friend class ::RecordAccessor<Vertex>;
|
||||
friend class ::VertexAccessor;
|
||||
|
||||
protected:
|
||||
// Construction should only be done through GraphDb::Access function and
|
||||
// concrete GraphDbAccessor type.
|
||||
|
||||
/// Creates a new accessor by starting a new transaction.
|
||||
explicit GraphDbAccessor(GraphDb &db);
|
||||
/// Creates an accessor for a running transaction.
|
||||
GraphDbAccessor(GraphDb &db, tx::TransactionId tx_id);
|
||||
|
||||
public:
|
||||
virtual ~GraphDbAccessor();
|
||||
|
||||
GraphDbAccessor(const GraphDbAccessor &other) = delete;
|
||||
GraphDbAccessor(GraphDbAccessor &&other) = delete;
|
||||
GraphDbAccessor &operator=(const GraphDbAccessor &other) = delete;
|
||||
GraphDbAccessor &operator=(GraphDbAccessor &&other) = delete;
|
||||
|
||||
int16_t worker_id() const;
|
||||
distributed::DataManager &data_manager();
|
||||
distributed::UpdatesRpcClients &updates_clients();
|
||||
|
||||
/**
|
||||
* Creates a new Vertex and returns an accessor to it. If the ID is
|
||||
* provided, the created Vertex will have that local ID, and the ID counter
|
||||
* will be increased to it so collisions are avoided. This should only be used
|
||||
* by durability recovery, normal vertex creation should not provide the ID.
|
||||
*
|
||||
* You should NOT make interleaved recovery and normal DB op calls to this
|
||||
* function. Doing so will likely mess up the ID generation and crash MG.
|
||||
* Always perform recovery only once, immediately when the database is
|
||||
* created, before any transactional ops start.
|
||||
*
|
||||
* @param requested_gid The requested GID. Should only be provided when
|
||||
* recovering from durability.
|
||||
* @param cypher_id Take a look under mvcc::VersionList::cypher_id
|
||||
*
|
||||
* @return See above.
|
||||
*/
|
||||
VertexAccessor InsertVertex(
|
||||
std::optional<gid::Gid> requested_gid = std::nullopt,
|
||||
std::optional<int64_t> cypher_id = std::nullopt);
|
||||
|
||||
/**
|
||||
* Removes the vertex of the given accessor. If the vertex has any outgoing or
|
||||
* incoming edges, it is not deleted. See `DetachRemoveVertex` if you want to
|
||||
* remove a vertex regardless of connectivity.
|
||||
*
|
||||
* If the vertex has already been deleted by the current transaction+command,
|
||||
* this function will not do anything and will return true.
|
||||
*
|
||||
* @param vertex_accessor Accessor to vertex.
|
||||
* @param check_empty If the vertex should be checked for existing edges
|
||||
* before deletion.
|
||||
* @return If or not the vertex was deleted.
|
||||
*/
|
||||
bool RemoveVertex(VertexAccessor &vertex_accessor,
|
||||
bool check_empty = true);
|
||||
|
||||
/**
|
||||
* Removes the vertex of the given accessor along with all it's outgoing
|
||||
* and incoming connections.
|
||||
*
|
||||
* @param vertex_accessor Accessor to a vertex.
|
||||
*/
|
||||
void DetachRemoveVertex(VertexAccessor &vertex_accessor);
|
||||
|
||||
/**
|
||||
* Obtains the vertex for the given ID. If there is no vertex for the given
|
||||
* ID, or it's not visible to this accessor's transaction, nullopt is
|
||||
* returned.
|
||||
*
|
||||
* @param gid - The GID of the sought vertex.
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
*/
|
||||
std::optional<VertexAccessor> FindVertexOptional(gid::Gid gid,
|
||||
bool current_state);
|
||||
|
||||
/**
|
||||
* Obtains the vertex accessor for given id without checking if the
|
||||
* vertex is visible.
|
||||
*/
|
||||
VertexAccessor FindVertexRaw(gid::Gid gid);
|
||||
|
||||
/**
|
||||
* Obtains the vertex for the given ID. If there is no vertex for the given
|
||||
* ID, or it's not visible to this accessor's transaction, MG is crashed
|
||||
* using a CHECK.
|
||||
*
|
||||
* @param gid - The GID of the sought vertex.
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
*/
|
||||
VertexAccessor FindVertex(gid::Gid gid, bool current_state);
|
||||
|
||||
/**
|
||||
* Returns iterable over accessors to all the vertices in the graph
|
||||
* visible to the current transaction.
|
||||
*
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
*/
|
||||
auto Vertices(bool current_state) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
// wrap version lists into accessors, which will look for visible versions
|
||||
auto accessors = iter::imap(
|
||||
[this](auto id_vlist) {
|
||||
return VertexAccessor(storage::VertexAddress(id_vlist.second), *this);
|
||||
},
|
||||
db_.storage().vertices_.access());
|
||||
|
||||
// filter out the accessors not visible to the current transaction
|
||||
return iter::filter(
|
||||
[this, current_state](const VertexAccessor &accessor) {
|
||||
return accessor.Visible(transaction(), current_state);
|
||||
},
|
||||
std::move(accessors));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return VertexAccessors which contain the current label for the current
|
||||
* transaction visibilty.
|
||||
* @param label - label for which to return VertexAccessors
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
* @return iterable collection
|
||||
*/
|
||||
auto Vertices(storage::Label label, bool current_state) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return iter::imap(
|
||||
[this](auto vlist) {
|
||||
return VertexAccessor(storage::VertexAddress(vlist), *this);
|
||||
},
|
||||
db_.storage().labels_index_.GetVlists(label, transaction_,
|
||||
current_state));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return VertexAccessors which contain the current label and property for the
|
||||
* given transaction visibility.
|
||||
*
|
||||
* @param label - label for which to return VertexAccessors
|
||||
* @param property - property for which to return VertexAccessors
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
* @return iterable collection
|
||||
*/
|
||||
auto Vertices(storage::Label label, storage::Property property,
|
||||
bool current_state) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
DCHECK(db_.storage().label_property_index_.IndexExists(
|
||||
LabelPropertyIndex::Key(label, property)))
|
||||
<< "Label+property index doesn't exist.";
|
||||
return iter::imap(
|
||||
[this](auto vlist) {
|
||||
return VertexAccessor(storage::VertexAddress(vlist), *this);
|
||||
},
|
||||
db_.storage().label_property_index_.GetVlists(
|
||||
LabelPropertyIndex::Key(label, property), transaction_,
|
||||
current_state));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return VertexAccessors which contain the current label + property, and
|
||||
* those properties are equal to this 'value' for the given transaction
|
||||
* visibility.
|
||||
* @param label - label for which to return VertexAccessors
|
||||
* @param property - property for which to return VertexAccessors
|
||||
* @param value - property value for which to return VertexAccessors
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
* @return iterable collection
|
||||
*/
|
||||
auto Vertices(storage::Label label, storage::Property property,
|
||||
const PropertyValue &value, bool current_state) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
DCHECK(db_.storage().label_property_index_.IndexExists(
|
||||
LabelPropertyIndex::Key(label, property)))
|
||||
<< "Label+property index doesn't exist.";
|
||||
CHECK(value.type() != PropertyValue::Type::Null)
|
||||
<< "Can't query index for propery value type null.";
|
||||
return iter::imap(
|
||||
[this](auto vlist) {
|
||||
return VertexAccessor(storage::VertexAddress(vlist), *this);
|
||||
},
|
||||
db_.storage().label_property_index_.GetVlists(
|
||||
LabelPropertyIndex::Key(label, property), value, transaction_,
|
||||
current_state));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an iterable over VertexAccessors which contain the
|
||||
* given label and whose property value (for the given property)
|
||||
* falls within the given (lower, upper) @c Bound.
|
||||
*
|
||||
* The returned iterator will only contain
|
||||
* vertices/edges whose property value is comparable with the
|
||||
* given bounds (w.r.t. type). This has implications on Cypher
|
||||
* query execuction semantics which have not been resovled yet.
|
||||
*
|
||||
* At least one of the bounds must be specified. Bonds can't be
|
||||
* @c PropertyValue::Null. If both bounds are
|
||||
* specified, their PropertyValue elments must be of comparable
|
||||
* types.
|
||||
*
|
||||
* @param label - label for which to return VertexAccessors
|
||||
* @param property - property for which to return VertexAccessors
|
||||
* @param lower - Lower bound of the interval.
|
||||
* @param upper - Upper bound of the interval.
|
||||
* @param value - property value for which to return VertexAccessors
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
* @return iterable collection of record accessors
|
||||
* satisfy the bounds and are visible to the current transaction.
|
||||
*/
|
||||
auto Vertices(storage::Label label, storage::Property property,
|
||||
const std::optional<utils::Bound<PropertyValue>> lower,
|
||||
const std::optional<utils::Bound<PropertyValue>> upper,
|
||||
bool current_state) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
DCHECK(db_.storage().label_property_index_.IndexExists(
|
||||
LabelPropertyIndex::Key(label, property)))
|
||||
<< "Label+property index doesn't exist.";
|
||||
return iter::imap(
|
||||
[this](auto vlist) {
|
||||
return VertexAccessor(storage::VertexAddress(vlist), *this);
|
||||
},
|
||||
db_.storage().label_property_index_.GetVlists(
|
||||
LabelPropertyIndex::Key(label, property), lower, upper,
|
||||
transaction_, current_state));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Edge and returns an accessor to it. If the ID is
|
||||
* provided, the created Edge will have that ID, and the ID counter will be
|
||||
* increased to it so collisions are avoided. This should only be used by
|
||||
* durability recovery, normal edge creation should not provide the ID.
|
||||
*
|
||||
* You should NOT make interleaved recovery and normal DB op calls to this
|
||||
* function. Doing so will likely mess up the ID generation and crash MG.
|
||||
* Always perform recovery only once, immediately when the database is
|
||||
* created, before any transactional ops start.
|
||||
*
|
||||
* @param from The 'from' vertex.
|
||||
* @param to The 'to' vertex'
|
||||
* @param type Edge type.
|
||||
* @param requested_gid The requested GID. Should only be provided when
|
||||
* recovering from durability.
|
||||
* @param cypher_id Take a look under mvcc::VersionList::cypher_id
|
||||
*
|
||||
* @return An accessor to the edge.
|
||||
*/
|
||||
EdgeAccessor InsertEdge(VertexAccessor &from, VertexAccessor &to,
|
||||
storage::EdgeType type,
|
||||
std::optional<gid::Gid> requested_gid = std::nullopt,
|
||||
std::optional<int64_t> cypher_id = std::nullopt);
|
||||
|
||||
/**
|
||||
* Insert edge into main storage, but don't insert it into from and to
|
||||
* vertices edge lists.
|
||||
*
|
||||
* @param cypher_id Take a look under mvcc::VersionList::cypher_id
|
||||
*/
|
||||
EdgeAccessor InsertOnlyEdge(
|
||||
storage::VertexAddress from, storage::VertexAddress to,
|
||||
storage::EdgeType edge_type,
|
||||
std::optional<gid::Gid> requested_gid = std::nullopt,
|
||||
std::optional<int64_t> cypher_id = std::nullopt);
|
||||
|
||||
/**
|
||||
* Removes an edge from the graph. Parameters can indicate if the edge should
|
||||
* be removed from data structures in vertices it connects. When removing an
|
||||
* edge both arguments should be `true`. `false` is only used when
|
||||
* detach-deleting a vertex.
|
||||
*
|
||||
* @param edge The accessor to an edge.
|
||||
* @param remove_out_edge If the edge should be removed from the its origin
|
||||
* side.
|
||||
* @param remove_in_edge If the edge should be removed from the its
|
||||
* destination side.
|
||||
*/
|
||||
void RemoveEdge(EdgeAccessor &edge, bool remove_out_edge = true,
|
||||
bool remove_in_edge = true);
|
||||
|
||||
/**
|
||||
* Obtains the edge for the given ID. If there is no edge for the given
|
||||
* ID, or it's not visible to this accessor's transaction, nullopt is
|
||||
* returned.
|
||||
*
|
||||
* @param gid - The GID of the sought edge.
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
*/
|
||||
std::optional<EdgeAccessor> FindEdgeOptional(gid::Gid gid,
|
||||
bool current_state);
|
||||
|
||||
/**
|
||||
* Obtains the edge accessor for the given id without checking if the edge
|
||||
* is visible.
|
||||
*/
|
||||
EdgeAccessor FindEdgeRaw(gid::Gid gid);
|
||||
|
||||
/**
|
||||
* Obtains the edge for the given ID. If there is no edge for the given
|
||||
* ID, or it's not visible to this accessor's transaction, MG is crashed
|
||||
* using a CHECK.
|
||||
*
|
||||
* @param gid - The GID of the sought edge.
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
*/
|
||||
EdgeAccessor FindEdge(gid::Gid gid, bool current_state);
|
||||
|
||||
/**
|
||||
* Returns iterable over accessors to all the edges in the graph
|
||||
* visible to the current transaction.
|
||||
*
|
||||
* @param current_state If true then the graph state for the
|
||||
* current transaction+command is returned (insertions, updates and
|
||||
* deletions performed in the current transaction+command are not
|
||||
* ignored).
|
||||
*/
|
||||
auto Edges(bool current_state) {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
|
||||
// wrap version lists into accessors, which will look for visible versions
|
||||
auto accessors = iter::imap(
|
||||
[this](auto id_vlist) {
|
||||
return EdgeAccessor(storage::EdgeAddress(id_vlist.second), *this);
|
||||
},
|
||||
db_.storage().edges_.access());
|
||||
|
||||
// filter out the accessors not visible to the current transaction
|
||||
return iter::filter(
|
||||
[this, current_state](const EdgeAccessor &accessor) {
|
||||
return accessor.Visible(transaction(), current_state);
|
||||
},
|
||||
std::move(accessors));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and returns a new accessor that represents the same graph element
|
||||
* (node / version) as the given `accessor`, but in this `GraphDbAccessor`.
|
||||
*
|
||||
* It is possible that the given `accessor` graph element is not visible in
|
||||
* this `GraphDbAccessor`'s transaction. If that is the case, a `nullopt` is
|
||||
* returned.
|
||||
*
|
||||
* The returned accessor does NOT have the same `current_` set as the given
|
||||
* `accessor`. It has default post-construction `current_` set (`old` if
|
||||
* available, otherwise `new`).
|
||||
*
|
||||
* @param accessor The [Vertex/Edge]Accessor whose underlying graph element we
|
||||
* want in this GraphDbAccessor.
|
||||
* @return See above.
|
||||
* @tparam TAccessor Either VertexAccessor or EdgeAccessor
|
||||
*/
|
||||
template <typename TAccessor>
|
||||
std::optional<TAccessor> Transfer(const TAccessor &accessor) {
|
||||
if (accessor.db_accessor_ == this) return std::make_optional(accessor);
|
||||
|
||||
TAccessor accessor_in_this(accessor.address(), *this);
|
||||
if (accessor_in_this.current_)
|
||||
return std::make_optional(std::move(accessor_in_this));
|
||||
else
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an index for the given (label, property) and populates it with
|
||||
* existing vertices that belong to it.
|
||||
*
|
||||
* You should never call BuildIndex on a GraphDbAccessor (transaction) on
|
||||
* which new vertices have been inserted or existing ones updated. Do it
|
||||
* in a new accessor instead.
|
||||
*
|
||||
* Build index throws if an index for the given (label, property) already
|
||||
* exists (even if it's being built by a concurrent transaction and is not yet
|
||||
* ready for use).
|
||||
*
|
||||
* It also throws if there is another index being built concurrently on the
|
||||
* same database this accessor is for.
|
||||
*
|
||||
* @param label - label to build for
|
||||
* @param property - property to build for
|
||||
*/
|
||||
virtual void BuildIndex(storage::Label label, storage::Property property);
|
||||
|
||||
/// Deletes the index responisble for (label, property).
|
||||
/// At the moment this isn't implemented in distributed.
|
||||
///
|
||||
/// @throws NotYetImplemented
|
||||
void DeleteIndex(storage::Label, storage::Property);
|
||||
|
||||
/// Populates index with vertices containing the key
|
||||
void PopulateIndex(const LabelPropertyIndex::Key &key);
|
||||
|
||||
/// Writes Index (key) creation to wal, marks it as ready for usage
|
||||
void EnableIndex(const LabelPropertyIndex::Key &key);
|
||||
|
||||
/**
|
||||
* @brief - Returns true if the given label+property index already exists and
|
||||
* is ready for use.
|
||||
*/
|
||||
bool LabelPropertyIndexExists(storage::Label label,
|
||||
storage::Property property) const {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.storage().label_property_index_.IndexExists(
|
||||
LabelPropertyIndex::Key(label, property));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief - Returns vector of keys of label-property indices.
|
||||
*/
|
||||
std::vector<LabelPropertyIndex::Key> GetIndicesKeys() {
|
||||
DCHECK(!commited_ && !aborted_) << "Accessor committed or aborted";
|
||||
return db_.storage().label_property_index_.Keys();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return approximate number of all vertices in the database.
|
||||
* Note that this is always an over-estimate and never an under-estimate.
|
||||
*/
|
||||
int64_t VerticesCount() const;
|
||||
|
||||
/*
|
||||
* Return approximate number of all edges in the database.
|
||||
* Note that this is always an over-estimate and never an under-estimate.
|
||||
*/
|
||||
int64_t EdgesCount() const;
|
||||
|
||||
/**
|
||||
* Return approximate number of vertices under indexes with the given label.
|
||||
* Note that this is always an over-estimate and never an under-estimate.
|
||||
*
|
||||
* @param label - label to check for
|
||||
* @return number of vertices with the given label
|
||||
*/
|
||||
int64_t VerticesCount(storage::Label label) const;
|
||||
|
||||
/**
|
||||
* Return approximate number of vertices under indexes with the given label
|
||||
* and property. Note that this is always an over-estimate and never an
|
||||
* under-estimate.
|
||||
*
|
||||
* @param label - label to check for
|
||||
* @param property - property to check for
|
||||
* @return number of vertices with the given label, fails if no such
|
||||
* label+property index exists.
|
||||
*/
|
||||
int64_t VerticesCount(storage::Label label, storage::Property property) const;
|
||||
|
||||
/**
|
||||
* Returns approximate number of vertices that have the given label
|
||||
* and the given value for the given property.
|
||||
*
|
||||
* Assumes that an index for that (label, property) exists.
|
||||
*/
|
||||
int64_t VerticesCount(storage::Label label, storage::Property property,
|
||||
const PropertyValue &value) const;
|
||||
|
||||
/**
|
||||
* Returns approximate number of vertices that have the given label
|
||||
* and whose vaue is in the range defined by upper and lower @c Bound.
|
||||
*
|
||||
* At least one bound must be specified. Neither can be
|
||||
* PropertyValue::Null.
|
||||
*
|
||||
* Assumes that an index for that (label, property) exists.
|
||||
*/
|
||||
int64_t VerticesCount(
|
||||
storage::Label label, storage::Property property,
|
||||
const std::optional<utils::Bound<PropertyValue>> lower,
|
||||
const std::optional<utils::Bound<PropertyValue>> upper) const;
|
||||
|
||||
/**
|
||||
* Obtains the Label for the label's name.
|
||||
* @return See above.
|
||||
*/
|
||||
storage::Label Label(const std::string &label_name);
|
||||
|
||||
/**
|
||||
* Obtains the label name (a string) for the given label.
|
||||
*
|
||||
* @param label a Label.
|
||||
* @return See above.
|
||||
*/
|
||||
const std::string &LabelName(storage::Label label) const;
|
||||
|
||||
/**
|
||||
* Obtains the EdgeType for it's name.
|
||||
* @return See above.
|
||||
*/
|
||||
storage::EdgeType EdgeType(const std::string &edge_type_name);
|
||||
|
||||
/**
|
||||
* Obtains the edge type name (a string) for the given edge type.
|
||||
*
|
||||
* @param edge_type an EdgeType.
|
||||
* @return See above.
|
||||
*/
|
||||
const std::string &EdgeTypeName(storage::EdgeType edge_type) const;
|
||||
|
||||
/**
|
||||
* Obtains the Property for it's name.
|
||||
* @return See above.
|
||||
*/
|
||||
storage::Property Property(const std::string &property_name);
|
||||
|
||||
/**
|
||||
* Obtains the property name (a string) for the given property.
|
||||
*
|
||||
* @param property a Property.
|
||||
* @return See above.
|
||||
*/
|
||||
const std::string &PropertyName(storage::Property property) const;
|
||||
|
||||
/** Returns the id of this accessor's transaction */
|
||||
tx::TransactionId transaction_id() const;
|
||||
|
||||
/** Advances transaction's command id by 1. */
|
||||
virtual void AdvanceCommand();
|
||||
|
||||
/** Commit transaction. */
|
||||
void Commit();
|
||||
|
||||
/** Abort transaction. */
|
||||
void Abort();
|
||||
|
||||
/** Return true if transaction is hinted to abort. */
|
||||
bool should_abort() const;
|
||||
|
||||
const tx::Transaction &transaction() const { return transaction_; }
|
||||
durability::WriteAheadLog &wal();
|
||||
auto &db() { return db_; }
|
||||
const auto &db() const { return db_; }
|
||||
|
||||
/* Returns a list of index names present in the database. */
|
||||
std::vector<std::string> IndexInfo() const;
|
||||
|
||||
/**
|
||||
* Insert this vertex into corresponding label and label+property (if it
|
||||
* exists) index.
|
||||
*
|
||||
* @param label - label with which to insert vertex label record
|
||||
* @param vertex_accessor - vertex_accessor to insert
|
||||
* @param vertex - vertex record to insert
|
||||
*/
|
||||
void UpdateLabelIndices(storage::Label label,
|
||||
const VertexAccessor &vertex_accessor,
|
||||
const Vertex *const vertex);
|
||||
|
||||
protected:
|
||||
/** Called in `BuildIndex` after creating an index, but before populating. */
|
||||
virtual void PostCreateIndex(const LabelPropertyIndex::Key &key) {}
|
||||
|
||||
/** Populates the index from a *new* transaction after creating the index. */
|
||||
virtual void PopulateIndexFromBuildIndex(const LabelPropertyIndex::Key &key) {
|
||||
PopulateIndex(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a new edge to `from` vertex and return the address.
|
||||
* Called from `InsertEdge` as the first step in edge insertion.
|
||||
* */
|
||||
storage::EdgeAddress InsertEdgeOnFrom(
|
||||
VertexAccessor *from, VertexAccessor *to,
|
||||
const storage::EdgeType &edge_type,
|
||||
const std::optional<gid::Gid> &requested_gid,
|
||||
const std::optional<int64_t> &cypher_id);
|
||||
|
||||
/**
|
||||
* Set the newly created edge on `to` vertex.
|
||||
* Called after `InsertEdgeOnFrom` in `InsertEdge`. The given `edge_address`
|
||||
* is from the created edge, returned by `InsertEdgeOnFrom`.
|
||||
*/
|
||||
void InsertEdgeOnTo(VertexAccessor *from, VertexAccessor *to,
|
||||
const storage::EdgeType &edge_type,
|
||||
const storage::EdgeAddress &edge_address);
|
||||
|
||||
private:
|
||||
GraphDb &db_;
|
||||
tx::Transaction &transaction_;
|
||||
// Indicates if this db-accessor started the transaction and should Abort it
|
||||
// upon destruction.
|
||||
bool transaction_starter_;
|
||||
|
||||
bool commited_{false};
|
||||
bool aborted_{false};
|
||||
|
||||
/**
|
||||
* Insert this vertex into corresponding any label + 'property' index.
|
||||
* @param property - vertex will be inserted into indexes which contain this
|
||||
* property
|
||||
* @param vertex_accessor - vertex accessor to insert
|
||||
* @param vertex - vertex to insert
|
||||
*/
|
||||
void UpdatePropertyIndex(storage::Property property,
|
||||
const RecordAccessor<Vertex> &vertex_accessor,
|
||||
const Vertex *const vertex);
|
||||
};
|
||||
|
||||
} // namespace database
|
@ -1,9 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include "durability/distributed/state_delta.hpp"
|
||||
#include "storage/distributed/rpc/serialization.hpp"
|
||||
cpp<#
|
||||
|
||||
;; Generate serialization of state-delta
|
||||
(load "durability/distributed/state_delta.lcp")
|
@ -7,7 +7,3 @@
|
||||
#ifdef MG_SINGLE_NODE_HA
|
||||
#include "database/single_node_ha/graph_db.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef MG_DISTRIBUTED
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#endif
|
||||
|
@ -7,7 +7,3 @@
|
||||
#ifdef MG_SINGLE_NODE_HA
|
||||
#include "database/single_node_ha/graph_db_accessor.hpp"
|
||||
#endif
|
||||
|
||||
#ifdef MG_DISTRIBUTED
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#endif
|
||||
|
@ -1,197 +0,0 @@
|
||||
#include "bfs_rpc_clients.hpp"
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "distributed/bfs_rpc_messages.hpp"
|
||||
#include "distributed/data_manager.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
BfsRpcClients::BfsRpcClients(database::GraphDb *db,
|
||||
BfsSubcursorStorage *subcursor_storage,
|
||||
Coordination *coordination,
|
||||
DataManager *data_manager)
|
||||
: db_(db),
|
||||
subcursor_storage_(subcursor_storage),
|
||||
coordination_(coordination),
|
||||
data_manager_(data_manager) {}
|
||||
|
||||
std::unordered_map<int16_t, int64_t> BfsRpcClients::CreateBfsSubcursors(
|
||||
database::GraphDbAccessor *dba, query::EdgeAtom::Direction direction,
|
||||
const std::vector<storage::EdgeType> &edge_types,
|
||||
const query::plan::ExpansionLambda &filter_lambda,
|
||||
const query::SymbolTable &symbol_table,
|
||||
const query::EvaluationContext &evaluation_context) {
|
||||
auto futures = coordination_->ExecuteOnWorkers<std::pair<int16_t, int64_t>>(
|
||||
db_->WorkerId(), [&](int worker_id, auto &client) {
|
||||
auto res = client.template Call<CreateBfsSubcursorRpc>(
|
||||
dba->transaction_id(), direction, edge_types, filter_lambda,
|
||||
symbol_table, evaluation_context.timestamp,
|
||||
evaluation_context.parameters);
|
||||
return std::make_pair(worker_id, res.member);
|
||||
});
|
||||
std::unordered_map<int16_t, int64_t> subcursor_ids;
|
||||
subcursor_ids.emplace(
|
||||
db_->WorkerId(),
|
||||
subcursor_storage_->Create(dba, direction, edge_types, symbol_table,
|
||||
nullptr, filter_lambda, evaluation_context));
|
||||
for (auto &future : futures) {
|
||||
auto got = subcursor_ids.emplace(future.get());
|
||||
CHECK(got.second) << "CreateBfsSubcursors failed: duplicate worker id";
|
||||
}
|
||||
return subcursor_ids;
|
||||
}
|
||||
|
||||
void BfsRpcClients::RegisterSubcursors(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
|
||||
auto futures = coordination_->ExecuteOnWorkers<void>(
|
||||
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
|
||||
client.template Call<RegisterSubcursorsRpc>(subcursor_ids);
|
||||
});
|
||||
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
|
||||
->RegisterSubcursors(subcursor_ids);
|
||||
// Wait and get all of the replies.
|
||||
for (auto &future : futures) {
|
||||
if (future.valid()) future.get();
|
||||
}
|
||||
}
|
||||
|
||||
void BfsRpcClients::ResetSubcursors(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
|
||||
auto futures = coordination_->ExecuteOnWorkers<void>(
|
||||
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
|
||||
client.template Call<ResetSubcursorRpc>(subcursor_ids.at(worker_id));
|
||||
});
|
||||
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))->Reset();
|
||||
// Wait and get all of the replies.
|
||||
for (auto &future : futures) {
|
||||
if (future.valid()) future.get();
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<VertexAccessor> BfsRpcClients::Pull(
|
||||
int16_t worker_id, int64_t subcursor_id, database::GraphDbAccessor *dba) {
|
||||
if (worker_id == db_->WorkerId()) {
|
||||
return subcursor_storage_->Get(subcursor_id)->Pull();
|
||||
}
|
||||
|
||||
auto res =
|
||||
coordination_->GetClientPool(worker_id)->CallWithLoad<SubcursorPullRpc>(
|
||||
[this, dba](auto *res_reader) {
|
||||
SubcursorPullRes res;
|
||||
slk::Load(&res, res_reader, dba, this->data_manager_);
|
||||
return res;
|
||||
},
|
||||
subcursor_id);
|
||||
return res.vertex;
|
||||
}
|
||||
|
||||
bool BfsRpcClients::ExpandLevel(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids) {
|
||||
auto futures = coordination_->ExecuteOnWorkers<bool>(
|
||||
db_->WorkerId(), [&subcursor_ids](int worker_id, auto &client) {
|
||||
auto res =
|
||||
client.template Call<ExpandLevelRpc>(subcursor_ids.at(worker_id));
|
||||
switch (res.result) {
|
||||
case ExpandResult::SUCCESS:
|
||||
return true;
|
||||
case ExpandResult::FAILURE:
|
||||
return false;
|
||||
case ExpandResult::LAMBDA_ERROR:
|
||||
throw query::QueryRuntimeException(
|
||||
"Expansion condition must evaluate to boolean or null");
|
||||
}
|
||||
});
|
||||
bool expanded =
|
||||
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))->ExpandLevel();
|
||||
for (auto &future : futures) {
|
||||
expanded |= future.get();
|
||||
}
|
||||
return expanded;
|
||||
}
|
||||
|
||||
void BfsRpcClients::SetSource(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
|
||||
storage::VertexAddress source_address) {
|
||||
CHECK(source_address.is_remote())
|
||||
<< "SetSource should be called with global address";
|
||||
|
||||
int worker_id = source_address.worker_id();
|
||||
if (worker_id == db_->WorkerId()) {
|
||||
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
|
||||
->SetSource(source_address);
|
||||
} else {
|
||||
coordination_->GetClientPool(worker_id)->Call<SetSourceRpc>(
|
||||
subcursor_ids.at(worker_id), source_address);
|
||||
}
|
||||
}
|
||||
|
||||
bool BfsRpcClients::ExpandToRemoteVertex(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
|
||||
EdgeAccessor edge, VertexAccessor vertex) {
|
||||
CHECK(!vertex.is_local())
|
||||
<< "ExpandToRemoteVertex should not be called with local vertex";
|
||||
int worker_id = vertex.address().worker_id();
|
||||
auto res =
|
||||
coordination_->GetClientPool(worker_id)->Call<ExpandToRemoteVertexRpc>(
|
||||
subcursor_ids.at(worker_id), edge.GlobalAddress(),
|
||||
vertex.GlobalAddress());
|
||||
return res.member;
|
||||
}
|
||||
|
||||
PathSegment BfsRpcClients::ReconstructPath(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
|
||||
storage::VertexAddress vertex, database::GraphDbAccessor *dba) {
|
||||
int worker_id = vertex.worker_id();
|
||||
if (worker_id == db_->WorkerId()) {
|
||||
return subcursor_storage_->Get(subcursor_ids.at(worker_id))
|
||||
->ReconstructPath(vertex);
|
||||
}
|
||||
|
||||
auto res =
|
||||
coordination_->GetClientPool(worker_id)->CallWithLoad<ReconstructPathRpc>(
|
||||
[this, dba](auto *res_reader) {
|
||||
ReconstructPathRes res;
|
||||
slk::Load(&res, res_reader, dba, this->data_manager_);
|
||||
return res;
|
||||
},
|
||||
subcursor_ids.at(worker_id), vertex);
|
||||
return PathSegment{res.edges, res.next_vertex, res.next_edge};
|
||||
}
|
||||
|
||||
PathSegment BfsRpcClients::ReconstructPath(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
|
||||
storage::EdgeAddress edge, database::GraphDbAccessor *dba) {
|
||||
int worker_id = edge.worker_id();
|
||||
if (worker_id == db_->WorkerId()) {
|
||||
return subcursor_storage_->Get(subcursor_ids.at(worker_id))
|
||||
->ReconstructPath(edge);
|
||||
}
|
||||
auto res =
|
||||
coordination_->GetClientPool(worker_id)->CallWithLoad<ReconstructPathRpc>(
|
||||
[this, dba](auto *res_reader) {
|
||||
ReconstructPathRes res;
|
||||
slk::Load(&res, res_reader, dba, this->data_manager_);
|
||||
return res;
|
||||
},
|
||||
subcursor_ids.at(worker_id), edge);
|
||||
return PathSegment{res.edges, res.next_vertex, res.next_edge};
|
||||
}
|
||||
|
||||
void BfsRpcClients::PrepareForExpand(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids, bool clear,
|
||||
const std::vector<query::TypedValue> &frame) {
|
||||
auto futures = coordination_->ExecuteOnWorkers<void>(
|
||||
db_->WorkerId(),
|
||||
[this, clear, &frame, &subcursor_ids](int worker_id, auto &client) {
|
||||
client.template Call<PrepareForExpandRpc>(
|
||||
subcursor_ids.at(worker_id), clear, frame, db_->WorkerId());
|
||||
});
|
||||
subcursor_storage_->Get(subcursor_ids.at(db_->WorkerId()))
|
||||
->PrepareForExpand(clear, frame);
|
||||
// Wait and get all of the replies.
|
||||
for (auto &future : futures) {
|
||||
if (future.valid()) future.get();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,73 +0,0 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include "distributed/bfs_subcursor.hpp"
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "transactions/transaction.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDb;
|
||||
}
|
||||
|
||||
namespace distributed {
|
||||
|
||||
class DataManager;
|
||||
|
||||
/// Along with `BfsRpcServer`, this class is used to expose `BfsSubcursor`
|
||||
/// interface over the network so that subcursors can communicate during the
|
||||
/// traversal. It is just a thin wrapper making RPC calls that also takes
|
||||
/// care for storing remote data into cache upon receival. Special care is taken
|
||||
/// to avoid sending local RPCs. Instead, subcursor storage is accessed
|
||||
/// directly.
|
||||
class BfsRpcClients {
|
||||
public:
|
||||
BfsRpcClients(database::GraphDb *db,
|
||||
BfsSubcursorStorage *subcursor_storage,
|
||||
Coordination *coordination, DataManager *data_manager);
|
||||
|
||||
std::unordered_map<int16_t, int64_t> CreateBfsSubcursors(
|
||||
database::GraphDbAccessor *dba, query::EdgeAtom::Direction direction,
|
||||
const std::vector<storage::EdgeType> &edge_types,
|
||||
const query::plan::ExpansionLambda &filter_lambda,
|
||||
const query::SymbolTable &symbol_table,
|
||||
const query::EvaluationContext &evaluation_context);
|
||||
|
||||
|
||||
void RegisterSubcursors(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids);
|
||||
|
||||
void ResetSubcursors(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids);
|
||||
|
||||
std::optional<VertexAccessor> Pull(int16_t worker_id, int64_t subcursor_id,
|
||||
database::GraphDbAccessor *dba);
|
||||
|
||||
bool ExpandLevel(const std::unordered_map<int16_t, int64_t> &subcursor_ids);
|
||||
|
||||
void SetSource(const std::unordered_map<int16_t, int64_t> &subcursor_ids,
|
||||
storage::VertexAddress source_address);
|
||||
|
||||
bool ExpandToRemoteVertex(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
|
||||
EdgeAccessor edge, VertexAccessor vertex);
|
||||
|
||||
PathSegment ReconstructPath(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
|
||||
storage::EdgeAddress edge, database::GraphDbAccessor *dba);
|
||||
|
||||
PathSegment ReconstructPath(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids,
|
||||
storage::VertexAddress vertex, database::GraphDbAccessor *dba);
|
||||
|
||||
void PrepareForExpand(
|
||||
const std::unordered_map<int16_t, int64_t> &subcursor_ids, bool clear,
|
||||
const std::vector<query::TypedValue> &frame);
|
||||
|
||||
private:
|
||||
database::GraphDb *db_;
|
||||
distributed::BfsSubcursorStorage *subcursor_storage_;
|
||||
distributed::Coordination *coordination_;
|
||||
distributed::DataManager *data_manager_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,170 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "distributed/bfs_subcursor.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/distributed/plan/ops.hpp"
|
||||
#include "query/distributed/serialization.hpp"
|
||||
#include "storage/distributed/rpc/serialization.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-rpc create-bfs-subcursor
|
||||
(:request
|
||||
((tx-id "::tx::TransactionId")
|
||||
(direction "::query::EdgeAtom::Direction")
|
||||
(edge-types "std::vector<storage::EdgeType>")
|
||||
(filter-lambda "::query::plan::ExpansionLambda"
|
||||
:slk-load (lambda (member)
|
||||
#>cpp
|
||||
slk::Load(&self->${member}, reader, ast_storage);
|
||||
cpp<#))
|
||||
(symbol-table "::query::SymbolTable")
|
||||
(timestamp :int64_t)
|
||||
(parameters "::query::Parameters"))
|
||||
(:serialize (:slk :load-args '((ast-storage "::query::AstStorage *")))))
|
||||
(:response ((member :int64_t))))
|
||||
|
||||
(lcp:define-rpc register-subcursors
|
||||
(:request ((subcursor-ids "std::unordered_map<int16_t, int64_t>")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-rpc reset-subcursor
|
||||
(:request ((subcursor-id :int64_t)))
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-enum expand-result
|
||||
(success failure lambda-error)
|
||||
(:serialize))
|
||||
|
||||
(lcp:define-rpc expand-level
|
||||
(:request ((member :int64_t)))
|
||||
(:response ((result "ExpandResult"))))
|
||||
|
||||
(lcp:define-rpc subcursor-pull
|
||||
(:request ((member :int64_t)))
|
||||
(:response
|
||||
((vertex "std::optional<VertexAccessor>"
|
||||
:slk-save (lambda (member)
|
||||
#>cpp
|
||||
slk::Save(static_cast<bool>(self.${member}), builder);
|
||||
if (self.${member}) {
|
||||
slk::Save(*self.${member}, builder,
|
||||
storage::SendVersions::BOTH, worker_id);
|
||||
}
|
||||
cpp<#)
|
||||
:slk-load (lambda (member)
|
||||
#>cpp
|
||||
bool has_value;
|
||||
slk::Load(&has_value, reader);
|
||||
if (has_value) {
|
||||
self->${member} = slk::LoadVertexAccessor(reader, dba, data_manager);
|
||||
}
|
||||
cpp<#)))
|
||||
(:serialize (:slk :save-args '((worker-id :int16_t))
|
||||
:load-args '((dba "::database::GraphDbAccessor *")
|
||||
(data-manager "::distributed::DataManager *"))))))
|
||||
|
||||
(lcp:define-rpc set-source
|
||||
(:request
|
||||
((subcursor-id :int64_t)
|
||||
(source "::storage::VertexAddress")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-rpc expand-to-remote-vertex
|
||||
(:request
|
||||
((subcursor-id :int64_t)
|
||||
(edge "::storage::EdgeAddress")
|
||||
(vertex "::storage::VertexAddress")))
|
||||
(:response ((member :bool))))
|
||||
|
||||
(lcp:define-rpc reconstruct-path
|
||||
(:request
|
||||
((subcursor-id :int64_t)
|
||||
(vertex "std::optional<storage::VertexAddress>")
|
||||
(edge "std::optional<storage::EdgeAddress>"))
|
||||
(:public
|
||||
#>cpp
|
||||
ReconstructPathReq(int64_t subcursor_id, storage::VertexAddress vertex)
|
||||
: subcursor_id(subcursor_id),
|
||||
vertex(vertex),
|
||||
edge(std::nullopt) {}
|
||||
|
||||
ReconstructPathReq(int64_t subcursor_id, storage::EdgeAddress edge)
|
||||
: subcursor_id(subcursor_id),
|
||||
vertex(std::nullopt),
|
||||
edge(edge) {}
|
||||
cpp<#))
|
||||
(:response
|
||||
((edges "std::vector<EdgeAccessor>"
|
||||
:slk-save (lambda (member)
|
||||
#>cpp
|
||||
size_t size = self.${member}.size();
|
||||
slk::Save(size, builder);
|
||||
for (const auto &v : self.${member}) {
|
||||
slk::Save(v, builder, storage::SendVersions::BOTH, worker_id);
|
||||
}
|
||||
cpp<#)
|
||||
:slk-load (lambda (member)
|
||||
#>cpp
|
||||
size_t size;
|
||||
slk::Load(&size, reader);
|
||||
self->${member}.reserve(size);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
self->${member}.push_back(slk::LoadEdgeAccessor(reader, dba, data_manager));
|
||||
}
|
||||
cpp<#))
|
||||
(next-vertex "std::optional<storage::VertexAddress>")
|
||||
(next-edge "std::optional<storage::EdgeAddress>"))
|
||||
(:serialize (:slk :save-args '((worker-id :int16_t))
|
||||
:load-args '((dba "database::GraphDbAccessor *")
|
||||
(data-manager "distributed::DataManager *"))))
|
||||
(:ctor nil)
|
||||
(:public
|
||||
#>cpp
|
||||
ReconstructPathRes() {}
|
||||
|
||||
ReconstructPathRes(
|
||||
const std::vector<EdgeAccessor> &edges,
|
||||
std::optional<storage::VertexAddress> next_vertex,
|
||||
std::optional<storage::EdgeAddress> next_edge)
|
||||
: edges(edges), next_vertex(std::move(next_vertex)), next_edge(std::move(next_edge)) {
|
||||
CHECK(!static_cast<bool>(next_vertex) || !static_cast<bool>(next_edge))
|
||||
<< "At most one of `next_vertex` and `next_edge` should be set";
|
||||
}
|
||||
cpp<#)))
|
||||
|
||||
(lcp:define-rpc prepare-for-expand
|
||||
(:request
|
||||
((subcursor-id :int64_t)
|
||||
(clear :bool)
|
||||
(frame "std::vector<query::TypedValue>"
|
||||
:slk-save (lambda (member)
|
||||
#>cpp
|
||||
size_t size = self.${member}.size();
|
||||
slk::Save(size, builder);
|
||||
for (const auto &v : self.${member}) {
|
||||
slk::Save(v, builder, storage::SendVersions::ONLY_OLD, self.worker_id);
|
||||
}
|
||||
cpp<#)
|
||||
:slk-load (lambda (member)
|
||||
#>cpp
|
||||
auto *subcursor = subcursor_storage->Get(self->subcursor_id);
|
||||
size_t size;
|
||||
slk::Load(&size, reader);
|
||||
self->${member}.resize(size);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
slk::Load(&self->${member}[i], reader, subcursor->db_accessor(), data_manager);
|
||||
}
|
||||
cpp<#))
|
||||
(worker-id :int16_t :dont-save t))
|
||||
(:serialize (:slk :load-args '((subcursor_storage "distributed::BfsSubcursorStorage *")
|
||||
(data-manager "distributed::DataManager *")))))
|
||||
(:response ()))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,166 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "distributed/bfs_rpc_messages.hpp"
|
||||
#include "distributed/bfs_subcursor.hpp"
|
||||
#include "distributed/coordination.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/// Along with `BfsRpcClients`, this class is used to expose `BfsSubcursor`
|
||||
/// interface over the network so that subcursors can communicate during the
|
||||
/// traversal. It is just a thin wrapper forwarding RPC calls to subcursors in
|
||||
/// subcursor storage.
|
||||
class BfsRpcServer {
|
||||
public:
|
||||
BfsRpcServer(database::GraphDb *db,
|
||||
distributed::Coordination *coordination,
|
||||
BfsSubcursorStorage *subcursor_storage)
|
||||
: db_(db), subcursor_storage_(subcursor_storage) {
|
||||
coordination->Register<CreateBfsSubcursorRpc>([this](auto *req_reader,
|
||||
auto *res_builder) {
|
||||
CreateBfsSubcursorReq req;
|
||||
auto ast_storage = std::make_unique<query::AstStorage>();
|
||||
slk::Load(&req, req_reader, ast_storage.get());
|
||||
database::GraphDbAccessor *dba;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
auto it = db_accessors_.find(req.tx_id);
|
||||
if (it == db_accessors_.end()) {
|
||||
it = db_accessors_.emplace(req.tx_id, db_->Access(req.tx_id)).first;
|
||||
}
|
||||
dba = it->second.get();
|
||||
}
|
||||
query::EvaluationContext evaluation_context;
|
||||
evaluation_context.timestamp = req.timestamp;
|
||||
evaluation_context.parameters = req.parameters;
|
||||
evaluation_context.properties =
|
||||
query::NamesToProperties(ast_storage->properties_, dba);
|
||||
evaluation_context.labels =
|
||||
query::NamesToLabels(ast_storage->labels_, dba);
|
||||
auto id = subcursor_storage_->Create(
|
||||
dba, req.direction, req.edge_types, std::move(req.symbol_table),
|
||||
std::move(ast_storage), req.filter_lambda, evaluation_context);
|
||||
CreateBfsSubcursorRes res(id);
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<RegisterSubcursorsRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
RegisterSubcursorsReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
subcursor_storage_->Get(req.subcursor_ids.at(db_->WorkerId()))
|
||||
->RegisterSubcursors(req.subcursor_ids);
|
||||
RegisterSubcursorsRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<ResetSubcursorRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
ResetSubcursorReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
subcursor_storage_->Get(req.subcursor_id)->Reset();
|
||||
ResetSubcursorRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<SetSourceRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
SetSourceReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
subcursor_storage_->Get(req.subcursor_id)->SetSource(req.source);
|
||||
SetSourceRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<ExpandLevelRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
ExpandLevelReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto subcursor = subcursor_storage_->Get(req.member);
|
||||
ExpandResult result;
|
||||
try {
|
||||
result = subcursor->ExpandLevel() ? ExpandResult::SUCCESS
|
||||
: ExpandResult::FAILURE;
|
||||
} catch (const query::QueryRuntimeException &) {
|
||||
result = ExpandResult::LAMBDA_ERROR;
|
||||
}
|
||||
ExpandLevelRes res(result);
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<SubcursorPullRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
SubcursorPullReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto vertex = subcursor_storage_->Get(req.member)->Pull();
|
||||
SubcursorPullRes res(vertex);
|
||||
slk::Save(res, res_builder, db_->WorkerId());
|
||||
});
|
||||
|
||||
coordination->Register<ExpandToRemoteVertexRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
ExpandToRemoteVertexReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
ExpandToRemoteVertexRes res(
|
||||
subcursor_storage_->Get(req.subcursor_id)
|
||||
->ExpandToLocalVertex(req.edge, req.vertex));
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<ReconstructPathRpc>([this](auto *req_reader,
|
||||
auto *res_builder) {
|
||||
ReconstructPathReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto subcursor = subcursor_storage_->Get(req.subcursor_id);
|
||||
PathSegment result;
|
||||
if (req.vertex) {
|
||||
result = subcursor->ReconstructPath(*req.vertex);
|
||||
} else if (req.edge) {
|
||||
result = subcursor->ReconstructPath(*req.edge);
|
||||
} else {
|
||||
LOG(FATAL) << "`edge` or `vertex` should be set in ReconstructPathReq";
|
||||
}
|
||||
ReconstructPathRes res(result.edges, result.next_vertex,
|
||||
result.next_edge);
|
||||
slk::Save(res, res_builder, db_->WorkerId());
|
||||
});
|
||||
|
||||
coordination->Register<PrepareForExpandRpc>([this](auto *req_reader,
|
||||
auto *res_builder) {
|
||||
PrepareForExpandReq req;
|
||||
slk::Load(&req, req_reader, subcursor_storage_, &db_->data_manager());
|
||||
auto *subcursor = subcursor_storage_->Get(req.subcursor_id);
|
||||
subcursor->PrepareForExpand(req.clear, std::move(req.frame));
|
||||
PrepareForExpandRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
void ClearTransactionalCache(tx::TransactionId oldest_active) {
|
||||
// It is unlikely this will become a performance issue, but if it does, we
|
||||
// should store database accessors in a lock-free map.
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
for (auto it = db_accessors_.begin(); it != db_accessors_.end();) {
|
||||
if (it->first < oldest_active) {
|
||||
it = db_accessors_.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
database::GraphDb *db_;
|
||||
|
||||
std::mutex lock_;
|
||||
std::map<tx::TransactionId, std::unique_ptr<database::GraphDbAccessor>>
|
||||
db_accessors_;
|
||||
BfsSubcursorStorage *subcursor_storage_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,231 +0,0 @@
|
||||
#include "bfs_subcursor.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "distributed/bfs_rpc_clients.hpp"
|
||||
#include "query/exceptions.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "storage/vertex_accessor.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
using query::TypedValue;
|
||||
|
||||
ExpandBfsSubcursor::ExpandBfsSubcursor(
|
||||
database::GraphDbAccessor *dba, query::EdgeAtom::Direction direction,
|
||||
std::vector<storage::EdgeType> edge_types, query::SymbolTable symbol_table,
|
||||
std::unique_ptr<query::AstStorage> ast_storage,
|
||||
query::plan::ExpansionLambda filter_lambda,
|
||||
query::EvaluationContext evaluation_context,
|
||||
BfsRpcClients *bfs_subcursor_clients)
|
||||
: bfs_subcursor_clients_(bfs_subcursor_clients),
|
||||
dba_(dba),
|
||||
direction_(direction),
|
||||
edge_types_(std::move(edge_types)),
|
||||
symbol_table_(std::move(symbol_table)),
|
||||
ast_storage_(std::move(ast_storage)),
|
||||
filter_lambda_(filter_lambda),
|
||||
evaluation_context_(std::move(evaluation_context)),
|
||||
frame_(symbol_table_.max_position()),
|
||||
expression_evaluator_(&frame_, symbol_table_, evaluation_context_, dba_,
|
||||
query::GraphView::OLD),
|
||||
tx_id_(dba->transaction_id()) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void ExpandBfsSubcursor::Reset() {
|
||||
pull_index_ = 0;
|
||||
processed_.clear();
|
||||
to_visit_current_.clear();
|
||||
to_visit_next_.clear();
|
||||
}
|
||||
|
||||
void ExpandBfsSubcursor::SetSource(storage::VertexAddress source_address) {
|
||||
Reset();
|
||||
auto source = VertexAccessor(source_address, *dba_);
|
||||
processed_.emplace(source, std::nullopt);
|
||||
ExpandFromVertex(source);
|
||||
}
|
||||
|
||||
void ExpandBfsSubcursor::PrepareForExpand(
|
||||
bool clear, std::vector<query::TypedValue> frame) {
|
||||
if (clear) {
|
||||
Reset();
|
||||
frame_.elems().assign(std::make_move_iterator(frame.begin()),
|
||||
std::make_move_iterator(frame.end()));
|
||||
} else {
|
||||
std::swap(to_visit_current_, to_visit_next_);
|
||||
to_visit_next_.clear();
|
||||
}
|
||||
}
|
||||
|
||||
bool ExpandBfsSubcursor::ExpandLevel() {
|
||||
bool expanded = false;
|
||||
for (const auto &expansion : to_visit_current_) {
|
||||
expanded |= ExpandFromVertex(expansion.second);
|
||||
}
|
||||
pull_index_ = 0;
|
||||
return expanded;
|
||||
}
|
||||
|
||||
std::optional<VertexAccessor> ExpandBfsSubcursor::Pull() {
|
||||
return pull_index_ < to_visit_next_.size()
|
||||
? std::make_optional(to_visit_next_[pull_index_++].second)
|
||||
: std::nullopt;
|
||||
}
|
||||
|
||||
bool ExpandBfsSubcursor::ExpandToLocalVertex(storage::EdgeAddress edge,
|
||||
VertexAccessor vertex) {
|
||||
CHECK(vertex.address().is_local())
|
||||
<< "ExpandToLocalVertex called with remote vertex";
|
||||
|
||||
edge = dba_->db().storage().LocalizedAddressIfPossible(edge);
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
auto got = processed_.emplace(vertex, edge);
|
||||
if (got.second) {
|
||||
to_visit_next_.emplace_back(edge, vertex);
|
||||
}
|
||||
return got.second;
|
||||
}
|
||||
|
||||
bool ExpandBfsSubcursor::ExpandToLocalVertex(storage::EdgeAddress edge,
|
||||
storage::VertexAddress vertex) {
|
||||
auto vertex_accessor = VertexAccessor(vertex, *dba_);
|
||||
return ExpandToLocalVertex(edge, VertexAccessor(vertex, *dba_));
|
||||
}
|
||||
|
||||
PathSegment ExpandBfsSubcursor::ReconstructPath(
|
||||
storage::EdgeAddress edge_address) {
|
||||
EdgeAccessor edge(edge_address, *dba_);
|
||||
CHECK(edge.address().is_local()) << "ReconstructPath called with remote edge";
|
||||
DCHECK(edge.from_addr().is_local()) << "`from` vertex should always be local";
|
||||
DCHECK(!edge.to_addr().is_local()) << "`to` vertex should be remote when "
|
||||
"calling ReconstructPath with edge";
|
||||
|
||||
PathSegment result;
|
||||
result.edges.emplace_back(edge);
|
||||
ReconstructPathHelper(edge.from(), &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
PathSegment ExpandBfsSubcursor::ReconstructPath(
|
||||
storage::VertexAddress vertex_addr) {
|
||||
VertexAccessor vertex(vertex_addr, *dba_);
|
||||
CHECK(vertex.address().is_local())
|
||||
<< "ReconstructPath called with remote vertex";
|
||||
PathSegment result;
|
||||
ReconstructPathHelper(vertex, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ExpandBfsSubcursor::ReconstructPathHelper(VertexAccessor vertex,
|
||||
PathSegment *result) {
|
||||
auto it = processed_.find(vertex);
|
||||
CHECK(it != processed_.end())
|
||||
<< "ReconstructPath called with unvisited vertex";
|
||||
|
||||
auto in_edge_address = it->second;
|
||||
while (in_edge_address) {
|
||||
// In-edge is stored on another worker. It should be returned to master from
|
||||
// that worker, and path reconstruction should be continued there.
|
||||
if (in_edge_address->is_remote()) {
|
||||
result->next_edge = in_edge_address;
|
||||
break;
|
||||
}
|
||||
|
||||
result->edges.emplace_back(*in_edge_address, *dba_);
|
||||
|
||||
auto &in_edge = result->edges.back();
|
||||
auto next_vertex_address =
|
||||
in_edge.from_is(vertex) ? in_edge.to_addr() : in_edge.from_addr();
|
||||
|
||||
// We own the in-edge, but the next vertex on the path is stored on another
|
||||
// worker.
|
||||
if (next_vertex_address.is_remote()) {
|
||||
result->next_vertex = next_vertex_address;
|
||||
break;
|
||||
}
|
||||
|
||||
vertex = VertexAccessor(next_vertex_address, *dba_);
|
||||
in_edge_address = processed_[vertex];
|
||||
}
|
||||
}
|
||||
|
||||
bool ExpandBfsSubcursor::ExpandToVertex(EdgeAccessor edge,
|
||||
VertexAccessor vertex) {
|
||||
if (filter_lambda_.expression) {
|
||||
frame_[filter_lambda_.inner_edge_symbol] = edge;
|
||||
frame_[filter_lambda_.inner_node_symbol] = vertex;
|
||||
TypedValue result =
|
||||
filter_lambda_.expression->Accept(expression_evaluator_);
|
||||
if (!result.IsNull() && !result.IsBool()) {
|
||||
throw query::QueryRuntimeException(
|
||||
"Expansion condition must evaluate to boolean or null");
|
||||
}
|
||||
if (result.IsNull() || !result.ValueBool()) return false;
|
||||
}
|
||||
|
||||
return vertex.is_local() ? ExpandToLocalVertex(edge.address(), vertex)
|
||||
: bfs_subcursor_clients_->ExpandToRemoteVertex(
|
||||
subcursor_ids_, edge, vertex);
|
||||
}
|
||||
|
||||
bool ExpandBfsSubcursor::ExpandFromVertex(VertexAccessor vertex) {
|
||||
bool expanded = false;
|
||||
if (direction_ != query::EdgeAtom::Direction::IN) {
|
||||
for (const EdgeAccessor &edge : vertex.out(&edge_types_))
|
||||
expanded |= ExpandToVertex(edge, edge.to());
|
||||
}
|
||||
if (direction_ != query::EdgeAtom::Direction::OUT) {
|
||||
for (const EdgeAccessor &edge : vertex.in(&edge_types_))
|
||||
expanded |= ExpandToVertex(edge, edge.from());
|
||||
}
|
||||
return expanded;
|
||||
}
|
||||
|
||||
BfsSubcursorStorage::BfsSubcursorStorage(BfsRpcClients *bfs_subcursor_clients)
|
||||
: bfs_subcursor_clients_(bfs_subcursor_clients) {}
|
||||
|
||||
int64_t BfsSubcursorStorage::Create(
|
||||
database::GraphDbAccessor *dba, query::EdgeAtom::Direction direction,
|
||||
std::vector<storage::EdgeType> edge_types, query::SymbolTable symbol_table,
|
||||
std::unique_ptr<query::AstStorage> ast_storage,
|
||||
query::plan::ExpansionLambda filter_lambda,
|
||||
query::EvaluationContext evaluation_context) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
int64_t id = next_subcursor_id_++;
|
||||
auto got = storage_.emplace(
|
||||
id, std::make_unique<ExpandBfsSubcursor>(
|
||||
dba, direction, std::move(edge_types), std::move(symbol_table),
|
||||
std::move(ast_storage), filter_lambda,
|
||||
std::move(evaluation_context), bfs_subcursor_clients_));
|
||||
CHECK(got.second) << "Subcursor with ID " << id << " already exists";
|
||||
return id;
|
||||
}
|
||||
|
||||
ExpandBfsSubcursor *BfsSubcursorStorage::Get(int64_t subcursor_id) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
auto it = storage_.find(subcursor_id);
|
||||
CHECK(it != storage_.end())
|
||||
<< "Subcursor with ID " << subcursor_id << " not found";
|
||||
return it->second.get();
|
||||
}
|
||||
|
||||
void BfsSubcursorStorage::ClearTransactionalCache(
|
||||
tx::TransactionId oldest_active) {
|
||||
// It is unlikely this will become a performance issue, but if it does, we
|
||||
// should store BFS subcursors in a lock-free map.
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
for (auto it = storage_.begin(); it != storage_.end();) {
|
||||
if (it->second->tx_id() < oldest_active) {
|
||||
it = storage_.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,176 +0,0 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "query/context.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/interpret/eval.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDb;
|
||||
}
|
||||
|
||||
namespace distributed {
|
||||
|
||||
class BfsRpcClients;
|
||||
|
||||
/// Path from BFS source to a vertex might span multiple workers. This struct
|
||||
/// stores information describing segment of a path stored on a worker and
|
||||
/// information necessary to continue path reconstruction on another worker.
|
||||
struct PathSegment {
|
||||
std::vector<EdgeAccessor> edges;
|
||||
std::optional<storage::VertexAddress> next_vertex;
|
||||
std::optional<storage::EdgeAddress> next_edge;
|
||||
};
|
||||
|
||||
/// Class storing the worker-local state of distributed BFS traversal. For each
|
||||
/// traversal (uniquely identified by cursor id), there is one instance of this
|
||||
/// class per worker, and those instances communicate via RPC calls.
|
||||
class ExpandBfsSubcursor {
|
||||
public:
|
||||
ExpandBfsSubcursor(database::GraphDbAccessor *dba,
|
||||
query::EdgeAtom::Direction direction,
|
||||
std::vector<storage::EdgeType> edge_types,
|
||||
query::SymbolTable symbol_table,
|
||||
std::unique_ptr<query::AstStorage> ast_storage,
|
||||
query::plan::ExpansionLambda filter_lambda,
|
||||
query::EvaluationContext evaluation_context,
|
||||
BfsRpcClients *bfs_subcursor_clients);
|
||||
|
||||
// Stores subcursor ids of other workers.
|
||||
void RegisterSubcursors(std::unordered_map<int16_t, int64_t> subcursor_ids) {
|
||||
subcursor_ids_ = std::move(subcursor_ids);
|
||||
}
|
||||
|
||||
/// Sets the source to be used for new expansion.
|
||||
void SetSource(storage::VertexAddress source_address);
|
||||
|
||||
/// Notifies the subcursor that a new expansion should take place.
|
||||
/// `to_visit_next_` must be moved to `to_visit_current_` synchronously for
|
||||
/// all subcursors participating in expansion to avoid race condition with
|
||||
/// `ExpandToRemoteVertex` RPC requests. Also used before setting new source
|
||||
/// with `clear` set to true, to avoid a race condition similar to one
|
||||
/// described above.
|
||||
///
|
||||
/// @param clear if set to true, `Reset` will be called instead of moving
|
||||
/// `to_visit_next_`
|
||||
// @param frame frame for evaluation of filter lambda expression
|
||||
void PrepareForExpand(bool clear, std::vector<query::TypedValue> frame);
|
||||
|
||||
/// Expands the BFS frontier once. Returns true if there was a successful
|
||||
/// expansion.
|
||||
bool ExpandLevel();
|
||||
|
||||
/// Pulls the next vertex in the current BFS frontier, if there is one.
|
||||
std::optional<VertexAccessor> Pull();
|
||||
|
||||
/// Expands to a local vertex, if it wasn't already visited. Returns true if
|
||||
/// expansion was successful.
|
||||
bool ExpandToLocalVertex(storage::EdgeAddress edge, VertexAccessor vertex);
|
||||
bool ExpandToLocalVertex(storage::EdgeAddress edge,
|
||||
storage::VertexAddress vertex);
|
||||
|
||||
/// Reconstruct the part of path ending with given edge, stored on this
|
||||
/// worker.
|
||||
PathSegment ReconstructPath(storage::EdgeAddress edge_address);
|
||||
|
||||
/// Reconstruct the part of path to given vertex stored on this worker.
|
||||
PathSegment ReconstructPath(storage::VertexAddress vertex_addr);
|
||||
|
||||
database::GraphDbAccessor *db_accessor() { return dba_; }
|
||||
|
||||
tx::TransactionId tx_id() { return tx_id_; }
|
||||
|
||||
/// Used to reset subcursor state before starting expansion from new source.
|
||||
void Reset();
|
||||
|
||||
private:
|
||||
/// Expands to a local or remote vertex, returns true if expansion was
|
||||
/// successful.
|
||||
bool ExpandToVertex(EdgeAccessor edge, VertexAccessor vertex);
|
||||
|
||||
/// Tries to expand to all vertices connected to given one and returns true if
|
||||
/// any of them was successful.
|
||||
bool ExpandFromVertex(VertexAccessor vertex);
|
||||
|
||||
/// Helper for path reconstruction doing the actual work.
|
||||
void ReconstructPathHelper(VertexAccessor vertex, PathSegment *result);
|
||||
|
||||
BfsRpcClients *bfs_subcursor_clients_{nullptr};
|
||||
|
||||
database::GraphDbAccessor *dba_;
|
||||
|
||||
/// IDs of subcursors on other workers, used when sending RPCs.
|
||||
std::unordered_map<int16_t, int64_t> subcursor_ids_;
|
||||
|
||||
query::EdgeAtom::Direction direction_;
|
||||
std::vector<storage::EdgeType> edge_types_;
|
||||
|
||||
/// Symbol table and AstStorage for filter lambda evaluation. If subcursor
|
||||
/// doesn't own the filter lambda expression, `ast_storage_` is set to
|
||||
/// nullptr.
|
||||
query::SymbolTable symbol_table_;
|
||||
std::unique_ptr<query::AstStorage> ast_storage_;
|
||||
query::plan::ExpansionLambda filter_lambda_;
|
||||
|
||||
/// Evaluation context, frame and expression evaluator for evaluation of
|
||||
/// filter lambda.
|
||||
query::EvaluationContext evaluation_context_;
|
||||
query::Frame frame_;
|
||||
query::ExpressionEvaluator expression_evaluator_;
|
||||
|
||||
/// Mutex protecting `to_visit_next_` and `processed_`, because there is a
|
||||
/// race between expansions done locally using `ExpandToLocalVertex` and
|
||||
/// incoming `ExpandToRemoteVertex` RPCs.
|
||||
std::mutex mutex_;
|
||||
|
||||
/// List of visited vertices and their incoming edges. Local address is stored
|
||||
/// for local edges, global address for remote edges.
|
||||
std::unordered_map<VertexAccessor, std::optional<storage::EdgeAddress>>
|
||||
processed_;
|
||||
|
||||
/// List of vertices at the current expansion level.
|
||||
std::vector<std::pair<storage::EdgeAddress, VertexAccessor>>
|
||||
to_visit_current_;
|
||||
|
||||
/// List of unvisited vertices reachable from current expansion level.
|
||||
std::vector<std::pair<storage::EdgeAddress, VertexAccessor>> to_visit_next_;
|
||||
|
||||
/// Index of the vertex from `to_visit_next_` to return on next pull.
|
||||
size_t pull_index_;
|
||||
|
||||
// Transaction ID used for transactional cache clean-up mechanism.
|
||||
tx::TransactionId tx_id_;
|
||||
};
|
||||
|
||||
/// Thread-safe storage for BFS subcursors.
|
||||
class BfsSubcursorStorage {
|
||||
public:
|
||||
explicit BfsSubcursorStorage(BfsRpcClients *bfs_subcursor_clients);
|
||||
|
||||
int64_t Create(database::GraphDbAccessor *dba,
|
||||
query::EdgeAtom::Direction direction,
|
||||
std::vector<storage::EdgeType> edge_types,
|
||||
query::SymbolTable symbol_table,
|
||||
std::unique_ptr<query::AstStorage> ast_storage,
|
||||
query::plan::ExpansionLambda filter_lambda,
|
||||
query::EvaluationContext evaluation_context);
|
||||
ExpandBfsSubcursor *Get(int64_t subcursor_id);
|
||||
void ClearTransactionalCache(tx::TransactionId oldest_active);
|
||||
|
||||
private:
|
||||
BfsRpcClients *bfs_subcursor_clients_{nullptr};
|
||||
|
||||
std::mutex mutex_;
|
||||
std::map<int64_t, std::unique_ptr<ExpandBfsSubcursor>> storage_;
|
||||
int64_t next_subcursor_id_{0};
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,23 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace distributed {
|
||||
/// A wrapper for cached vertex/edge from other machines in the distributed
|
||||
/// system.
|
||||
///
|
||||
/// @tparam TRecord Vertex or Edge
|
||||
template <typename TRecord>
|
||||
struct CachedRecordData {
|
||||
CachedRecordData(int64_t cypher_id, std::unique_ptr<TRecord> old_record,
|
||||
std::unique_ptr<TRecord> new_record)
|
||||
: cypher_id(cypher_id),
|
||||
old_record(std::move(old_record)),
|
||||
new_record(std::move(new_record)) {}
|
||||
int64_t cypher_id;
|
||||
std::unique_ptr<TRecord> old_record;
|
||||
std::unique_ptr<TRecord> new_record;
|
||||
};
|
||||
} // namespace distributed
|
@ -1,87 +0,0 @@
|
||||
#include "distributed/cluster_discovery_master.hpp"
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
#include "distributed/coordination_rpc_messages.hpp"
|
||||
#include "io/network/endpoint.hpp"
|
||||
#include "utils/file.hpp"
|
||||
#include "utils/string.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
ClusterDiscoveryMaster::ClusterDiscoveryMaster(
|
||||
MasterCoordination *coordination, const std::string &durability_directory)
|
||||
: coordination_(coordination), durability_directory_(durability_directory) {
|
||||
coordination_->Register<RegisterWorkerRpc>([this](const auto &endpoint,
|
||||
auto *req_reader,
|
||||
auto *res_builder) {
|
||||
bool registration_successful = false;
|
||||
bool durability_error = false;
|
||||
|
||||
RegisterWorkerReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
|
||||
// Compose the worker's endpoint from its connecting address and its
|
||||
// advertised port.
|
||||
io::network::Endpoint worker_endpoint(endpoint.address(), req.port);
|
||||
|
||||
// Create and find out what is our durability directory.
|
||||
utils::EnsureDirOrDie(durability_directory_);
|
||||
auto full_durability_directory =
|
||||
std::filesystem::canonical(durability_directory_);
|
||||
|
||||
// Check whether the worker is running on the same host (detected when it
|
||||
// connects to us over the loopback interface) and whether it has the same
|
||||
// durability directory as us.
|
||||
// TODO (mferencevic): This check should also be done for all workers in
|
||||
// between them because this check only verifies that the worker and master
|
||||
// don't collide, there can still be a collision between workers.
|
||||
if ((utils::StartsWith(endpoint.address(), "127.") ||
|
||||
endpoint.address() == "::1") &&
|
||||
req.durability_directory == full_durability_directory) {
|
||||
durability_error = true;
|
||||
LOG(WARNING)
|
||||
<< "The worker at " << worker_endpoint
|
||||
<< " was started with the same durability directory as the master!";
|
||||
}
|
||||
|
||||
// Register the worker if the durability check succeeded.
|
||||
if (!durability_error) {
|
||||
registration_successful =
|
||||
coordination_->RegisterWorker(req.desired_worker_id, worker_endpoint);
|
||||
}
|
||||
|
||||
// Notify the cluster of the new worker if the registration succeeded.
|
||||
if (registration_successful) {
|
||||
coordination_->ExecuteOnWorkers<
|
||||
void>(0, [req, worker_endpoint](
|
||||
int worker_id,
|
||||
communication::rpc::ClientPool &client_pool) {
|
||||
try {
|
||||
client_pool.Call<ClusterDiscoveryRpc>(req.desired_worker_id,
|
||||
worker_endpoint);
|
||||
} catch (const communication::rpc::RpcFailedException &) {
|
||||
LOG(FATAL)
|
||||
<< "Couldn't notify the cluster of the changed configuration!";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
RegisterWorkerRes res(registration_successful, durability_error,
|
||||
coordination_->RecoveredSnapshotTx(),
|
||||
coordination_->GetWorkers());
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination_->Register<NotifyWorkerRecoveredRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
NotifyWorkerRecoveredReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
coordination_->WorkerRecoveredSnapshot(req.worker_id,
|
||||
req.recovery_info);
|
||||
NotifyWorkerRecoveredRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,24 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "distributed/coordination_master.hpp"
|
||||
|
||||
namespace distributed {
|
||||
using Server = communication::rpc::Server;
|
||||
|
||||
/** Handle cluster discovery on master.
|
||||
*
|
||||
* Cluster discovery on master handles worker registration and broadcasts new
|
||||
* worker information to already registered workers, and already registered
|
||||
* worker information to the new worker.
|
||||
*/
|
||||
class ClusterDiscoveryMaster final {
|
||||
public:
|
||||
ClusterDiscoveryMaster(MasterCoordination *coordination,
|
||||
const std::string &durability_directory);
|
||||
|
||||
private:
|
||||
MasterCoordination *coordination_;
|
||||
std::string durability_directory_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,65 +0,0 @@
|
||||
#include "distributed/cluster_discovery_worker.hpp"
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
#include "distributed/coordination_rpc_messages.hpp"
|
||||
#include "utils/file.hpp"
|
||||
|
||||
namespace distributed {
|
||||
using Server = communication::rpc::Server;
|
||||
|
||||
ClusterDiscoveryWorker::ClusterDiscoveryWorker(WorkerCoordination *coordination)
|
||||
: coordination_(coordination),
|
||||
client_pool_(coordination->GetClientPool(0)) {
|
||||
coordination->Register<ClusterDiscoveryRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
ClusterDiscoveryReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
coordination_->RegisterWorker(req.worker_id, req.endpoint);
|
||||
ClusterDiscoveryRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
void ClusterDiscoveryWorker::RegisterWorker(
|
||||
int worker_id, const std::string &durability_directory) {
|
||||
// Create and find out what is our durability directory.
|
||||
utils::EnsureDirOrDie(durability_directory);
|
||||
auto full_durability_directory =
|
||||
std::filesystem::canonical(durability_directory);
|
||||
|
||||
// Register to the master.
|
||||
try {
|
||||
auto result = client_pool_->Call<RegisterWorkerRpc>(
|
||||
worker_id, coordination_->GetServerEndpoint().port(),
|
||||
full_durability_directory);
|
||||
CHECK(!result.durability_error)
|
||||
<< "This worker was started on the same machine and with the same "
|
||||
"durability directory as the master! Please change the durability "
|
||||
"directory for this worker.";
|
||||
CHECK(result.registration_successful)
|
||||
<< "Unable to assign requested ID (" << worker_id << ") to worker!";
|
||||
|
||||
worker_id_ = worker_id;
|
||||
for (auto &kv : result.workers) {
|
||||
coordination_->RegisterWorker(kv.first, kv.second);
|
||||
}
|
||||
snapshot_to_recover_ = result.snapshot_to_recover;
|
||||
} catch (const communication::rpc::RpcFailedException &e) {
|
||||
LOG(FATAL) << "Couldn't register to the master!";
|
||||
}
|
||||
}
|
||||
|
||||
void ClusterDiscoveryWorker::NotifyWorkerRecovered(
|
||||
const std::optional<durability::RecoveryInfo> &recovery_info) {
|
||||
CHECK(worker_id_ >= 0)
|
||||
<< "Workers id is not yet assigned, preform registration before "
|
||||
"notifying that the recovery finished";
|
||||
try {
|
||||
client_pool_->Call<NotifyWorkerRecoveredRpc>(worker_id_, recovery_info);
|
||||
} catch (const communication::rpc::RpcFailedException &e) {
|
||||
LOG(FATAL) << "Couldn't notify the master that we finished recovering!";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,50 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "communication/rpc/client_pool.hpp"
|
||||
#include "communication/rpc/server.hpp"
|
||||
#include "distributed/coordination_worker.hpp"
|
||||
#include "durability/distributed/recovery.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/** Handle cluster discovery on worker.
|
||||
*
|
||||
* Cluster discovery on worker handles worker registration by sending an rpc
|
||||
* request to master and processes received rpc response with other worker
|
||||
* information.
|
||||
*/
|
||||
class ClusterDiscoveryWorker final {
|
||||
public:
|
||||
ClusterDiscoveryWorker(WorkerCoordination *coordination);
|
||||
|
||||
/**
|
||||
* Registers a worker with the master.
|
||||
*
|
||||
* @param worker_id - Desired ID. If master can't assign the desired worker
|
||||
* id, worker will exit.
|
||||
* @param durability_directory - The durability directory that is used for
|
||||
* this worker.
|
||||
*/
|
||||
void RegisterWorker(int worker_id, const std::string &durability_directory);
|
||||
|
||||
/**
|
||||
* Notifies the master that the worker finished recovering. Assumes that the
|
||||
* worker was already registered with master.
|
||||
*/
|
||||
void NotifyWorkerRecovered(
|
||||
const std::optional<durability::RecoveryInfo> &recovery_info);
|
||||
|
||||
/** Returns the snapshot that should be recovered on workers. Valid only after
|
||||
* registration. */
|
||||
auto snapshot_to_recover() const { return snapshot_to_recover_; }
|
||||
|
||||
private:
|
||||
int worker_id_{-1};
|
||||
distributed::WorkerCoordination *coordination_;
|
||||
communication::rpc::ClientPool *client_pool_;
|
||||
std::optional<std::pair<int64_t, tx::TransactionId>> snapshot_to_recover_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,93 +0,0 @@
|
||||
#include "glog/logging.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
Coordination::Coordination(const io::network::Endpoint &worker_endpoint,
|
||||
int worker_id,
|
||||
const io::network::Endpoint &master_endpoint,
|
||||
int server_workers_count, int client_workers_count)
|
||||
: server_(worker_endpoint, &server_context_, server_workers_count),
|
||||
thread_pool_(client_workers_count, "RPC client") {
|
||||
if (worker_id != 0) {
|
||||
// The master is always worker 0.
|
||||
// We only emplace the master endpoint when this instance isn't the
|
||||
// `MasterCoordination`. This is because we don't know the exact master
|
||||
// endpoint until the master server is started. The `MasterCoordination`
|
||||
// will emplace the master endpoint when the server is started. Eg. if
|
||||
// `0.0.0.0:0` is supplied as the master endpoint that should be first
|
||||
// resolved by the server when it binds to that address and
|
||||
// `server_.endpoint()` should be used.
|
||||
workers_.emplace(0, master_endpoint);
|
||||
}
|
||||
}
|
||||
|
||||
Coordination::~Coordination() {}
|
||||
|
||||
io::network::Endpoint Coordination::GetEndpoint(int worker_id) {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
auto found = workers_.find(worker_id);
|
||||
// TODO (mferencevic): Handle this error situation differently.
|
||||
CHECK(found != workers_.end())
|
||||
<< "No endpoint registered for worker id: " << worker_id;
|
||||
return found->second;
|
||||
}
|
||||
|
||||
io::network::Endpoint Coordination::GetServerEndpoint() {
|
||||
return server_.endpoint();
|
||||
}
|
||||
|
||||
std::vector<int> Coordination::GetWorkerIds() {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
std::vector<int> worker_ids;
|
||||
for (auto worker : workers_) worker_ids.push_back(worker.first);
|
||||
return worker_ids;
|
||||
}
|
||||
|
||||
std::unordered_map<int, io::network::Endpoint> Coordination::GetWorkers() {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
return workers_;
|
||||
}
|
||||
|
||||
communication::rpc::ClientPool *Coordination::GetClientPool(int worker_id) {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
auto found = client_pools_.find(worker_id);
|
||||
if (found != client_pools_.end()) return &found->second;
|
||||
auto found_endpoint = workers_.find(worker_id);
|
||||
// TODO (mferencevic): Handle this error situation differently.
|
||||
CHECK(found_endpoint != workers_.end())
|
||||
<< "No endpoint registered for worker id: " << worker_id;
|
||||
auto &endpoint = found_endpoint->second;
|
||||
return &client_pools_
|
||||
.emplace(std::piecewise_construct,
|
||||
std::forward_as_tuple(worker_id),
|
||||
std::forward_as_tuple(endpoint, &client_context_))
|
||||
.first->second;
|
||||
}
|
||||
|
||||
void Coordination::AddWorker(int worker_id,
|
||||
const io::network::Endpoint &endpoint) {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
workers_.insert({worker_id, endpoint});
|
||||
}
|
||||
|
||||
std::string Coordination::GetWorkerName(const io::network::Endpoint &endpoint) {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
for (const auto &worker : workers_) {
|
||||
if (worker.second == endpoint) {
|
||||
if (worker.first == 0) {
|
||||
return fmt::format("master ({})", worker.second);
|
||||
} else {
|
||||
return fmt::format("worker {} ({})", worker.first, worker.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt::format("unknown worker ({})", endpoint);
|
||||
}
|
||||
|
||||
bool Coordination::IsClusterAlive() { return cluster_alive_; }
|
||||
|
||||
} // namespace distributed
|
@ -1,114 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "communication/rpc/client_pool.hpp"
|
||||
#include "communication/rpc/server.hpp"
|
||||
#include "io/network/endpoint.hpp"
|
||||
#include "utils/future.hpp"
|
||||
#include "utils/thread.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/// Coordination base class. This class is thread safe.
|
||||
class Coordination {
|
||||
protected:
|
||||
Coordination(const io::network::Endpoint &worker_endpoint, int worker_id,
|
||||
const io::network::Endpoint &master_endpoint,
|
||||
int server_workers_count = std::thread::hardware_concurrency(),
|
||||
int client_workers_count = std::thread::hardware_concurrency());
|
||||
~Coordination();
|
||||
|
||||
public:
|
||||
/// Gets the endpoint for the given worker ID from the master.
|
||||
io::network::Endpoint GetEndpoint(int worker_id);
|
||||
|
||||
/// Gets the endpoint for this RPC server.
|
||||
io::network::Endpoint GetServerEndpoint();
|
||||
|
||||
/// Returns all workers id, this includes master (ID 0).
|
||||
std::vector<int> GetWorkerIds();
|
||||
|
||||
/// Gets the mapping of worker id to worker endpoint including master (ID 0).
|
||||
std::unordered_map<int, io::network::Endpoint> GetWorkers();
|
||||
|
||||
/// Returns a cached `ClientPool` for the given `worker_id`.
|
||||
communication::rpc::ClientPool *GetClientPool(int worker_id);
|
||||
|
||||
/// Asynchroniously executes the given function on the rpc client for the
|
||||
/// given worker id. Returns an `utils::Future` of the given `execute`
|
||||
/// function's return type.
|
||||
template <typename TResult>
|
||||
auto ExecuteOnWorker(
|
||||
int worker_id,
|
||||
std::function<TResult(int worker_id, communication::rpc::ClientPool &)>
|
||||
execute) {
|
||||
// TODO (mferencevic): Change this lambda to accept a pointer to
|
||||
// `ClientPool` instead of a reference!
|
||||
auto client_pool = GetClientPool(worker_id);
|
||||
return thread_pool_.Run(execute, worker_id, std::ref(*client_pool));
|
||||
}
|
||||
|
||||
/// Asynchroniously executes the `execute` function on all worker rpc clients
|
||||
/// except the one whose id is `skip_worker_id`. Returns a vector of futures
|
||||
/// contaning the results of the `execute` function.
|
||||
template <typename TResult>
|
||||
auto ExecuteOnWorkers(
|
||||
int skip_worker_id,
|
||||
std::function<TResult(int worker_id, communication::rpc::ClientPool &)>
|
||||
execute) {
|
||||
std::vector<utils::Future<TResult>> futures;
|
||||
// TODO (mferencevic): GetWorkerIds always copies the vector of workers,
|
||||
// this may be an issue...
|
||||
for (auto &worker_id : GetWorkerIds()) {
|
||||
if (worker_id == skip_worker_id) continue;
|
||||
futures.emplace_back(std::move(ExecuteOnWorker(worker_id, execute)));
|
||||
}
|
||||
return futures;
|
||||
}
|
||||
|
||||
template <class TRequestResponse>
|
||||
void Register(std::function<void(slk::Reader *, slk::Builder *)> callback) {
|
||||
server_.Register<TRequestResponse>(callback);
|
||||
}
|
||||
|
||||
template <class TRequestResponse>
|
||||
void Register(std::function<void(const io::network::Endpoint &, slk::Reader *,
|
||||
slk::Builder *)>
|
||||
callback) {
|
||||
server_.Register<TRequestResponse>(callback);
|
||||
}
|
||||
|
||||
/// Returns `true` if the cluster is in a consistent state.
|
||||
bool IsClusterAlive();
|
||||
|
||||
protected:
|
||||
/// Adds a worker to the coordination. This function can be called multiple
|
||||
/// times to replace an existing worker.
|
||||
void AddWorker(int worker_id, const io::network::Endpoint &endpoint);
|
||||
|
||||
/// Gets a worker name for the given endpoint.
|
||||
std::string GetWorkerName(const io::network::Endpoint &endpoint);
|
||||
|
||||
// TODO(mferencevic): distributed is currently hardcoded not to use SSL
|
||||
communication::ServerContext server_context_;
|
||||
communication::rpc::Server server_;
|
||||
|
||||
std::atomic<bool> cluster_alive_{true};
|
||||
|
||||
private:
|
||||
std::unordered_map<int, io::network::Endpoint> workers_;
|
||||
mutable std::mutex lock_;
|
||||
|
||||
// TODO(mferencevic): distributed is currently hardcoded not to use SSL
|
||||
communication::ClientContext client_context_;
|
||||
std::unordered_map<int, communication::rpc::ClientPool> client_pools_;
|
||||
utils::ThreadPool thread_pool_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,233 +0,0 @@
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
#include "communication/rpc/client.hpp"
|
||||
#include "distributed/coordination_master.hpp"
|
||||
#include "distributed/coordination_rpc_messages.hpp"
|
||||
#include "io/network/utils.hpp"
|
||||
#include "utils/string.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
// Send a heartbeat request to the workers every `kHeartbeatIntervalSeconds`.
|
||||
// This constant must be at least 10x smaller than `kHeartbeatMaxDelaySeconds`
|
||||
// that is defined in the worker coordination.
|
||||
const int kHeartbeatIntervalSeconds = 1;
|
||||
|
||||
MasterCoordination::MasterCoordination(const Endpoint &master_endpoint,
|
||||
int server_workers_count,
|
||||
int client_workers_count)
|
||||
: Coordination(master_endpoint, 0, {}, server_workers_count,
|
||||
client_workers_count) {}
|
||||
|
||||
MasterCoordination::~MasterCoordination() {
|
||||
CHECK(!alive_) << "You must call Shutdown and AwaitShutdown on "
|
||||
"distributed::MasterCoordination!";
|
||||
}
|
||||
|
||||
bool MasterCoordination::RegisterWorker(int desired_worker_id,
|
||||
Endpoint endpoint) {
|
||||
// Worker's can't register before the recovery phase on the master is done to
|
||||
// ensure the whole cluster is in a consistent state.
|
||||
while (true) {
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(master_lock_);
|
||||
if (recovery_done_) break;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200));
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> guard(master_lock_);
|
||||
auto workers = GetWorkers();
|
||||
// Check if the desired worker id already exists.
|
||||
if (workers.find(desired_worker_id) != workers.end()) {
|
||||
LOG(WARNING) << "Unable to assign requested ID (" << desired_worker_id
|
||||
<< ") to worker at: " << endpoint;
|
||||
// If the desired worker ID is already assigned, return -1 and don't add
|
||||
// that worker to master coordination.
|
||||
return false;
|
||||
}
|
||||
|
||||
AddWorker(desired_worker_id, endpoint);
|
||||
return true;
|
||||
}
|
||||
|
||||
void MasterCoordination::WorkerRecoveredSnapshot(
|
||||
int worker_id,
|
||||
const std::optional<durability::RecoveryInfo> &recovery_info) {
|
||||
CHECK(recovered_workers_.insert(std::make_pair(worker_id, recovery_info))
|
||||
.second)
|
||||
<< "Worker already notified about finishing recovery";
|
||||
}
|
||||
|
||||
void MasterCoordination::SetRecoveredSnapshot(
|
||||
std::optional<std::pair<int64_t, tx::TransactionId>>
|
||||
recovered_snapshot_tx) {
|
||||
std::lock_guard<std::mutex> guard(master_lock_);
|
||||
recovery_done_ = true;
|
||||
recovered_snapshot_tx_ = recovered_snapshot_tx;
|
||||
}
|
||||
|
||||
int MasterCoordination::CountRecoveredWorkers() const {
|
||||
return recovered_workers_.size();
|
||||
}
|
||||
|
||||
std::optional<std::pair<int64_t, tx::TransactionId>>
|
||||
MasterCoordination::RecoveredSnapshotTx() const {
|
||||
std::lock_guard<std::mutex> guard(master_lock_);
|
||||
CHECK(recovery_done_) << "Recovered snapshot requested before it's available";
|
||||
return recovered_snapshot_tx_;
|
||||
}
|
||||
|
||||
std::vector<tx::TransactionId> MasterCoordination::CommonWalTransactions(
|
||||
const durability::RecoveryInfo &master_info) const {
|
||||
int cluster_size;
|
||||
std::unordered_map<tx::TransactionId, int> tx_cnt;
|
||||
for (auto tx : master_info.wal_recovered) {
|
||||
tx_cnt[tx]++;
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(master_lock_);
|
||||
for (auto worker : recovered_workers_) {
|
||||
// If there is no recovery info we can just return an empty vector since
|
||||
// we can't restore any transaction
|
||||
if (!worker.second) return {};
|
||||
for (auto tx : worker.second->wal_recovered) {
|
||||
tx_cnt[tx]++;
|
||||
}
|
||||
}
|
||||
// Add one because of master
|
||||
cluster_size = recovered_workers_.size() + 1;
|
||||
}
|
||||
|
||||
std::vector<tx::TransactionId> tx_intersection;
|
||||
for (auto tx : tx_cnt) {
|
||||
if (tx.second == cluster_size) {
|
||||
tx_intersection.push_back(tx.first);
|
||||
}
|
||||
}
|
||||
|
||||
return tx_intersection;
|
||||
}
|
||||
|
||||
bool MasterCoordination::Start() {
|
||||
if (!server_.Start()) return false;
|
||||
AddWorker(0, server_.endpoint());
|
||||
scheduler_.Run("Heartbeat", std::chrono::seconds(kHeartbeatIntervalSeconds),
|
||||
[this] { IssueHeartbeats(); });
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MasterCoordination::AwaitShutdown(
|
||||
std::function<bool(bool)> call_before_shutdown) {
|
||||
// Wait for a shutdown notification.
|
||||
while (alive_) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
// Copy the current value of the cluster state.
|
||||
bool is_cluster_alive = cluster_alive_;
|
||||
|
||||
// Call the before shutdown callback.
|
||||
bool ret = call_before_shutdown(is_cluster_alive);
|
||||
|
||||
// Stop the heartbeat scheduler so we don't cause any errors during shutdown.
|
||||
// Also, we manually issue one final heartbeat to all workers so that their
|
||||
// counters are reset. This must be done immediately before issuing shutdown
|
||||
// requests to the workers. The `IssueHeartbeats` will ignore any errors that
|
||||
// occur now because we are in the process of shutting the cluster down.
|
||||
scheduler_.Stop();
|
||||
IssueHeartbeats();
|
||||
|
||||
// Shutdown all workers.
|
||||
auto workers = GetWorkers();
|
||||
std::vector<std::pair<int, io::network::Endpoint>> workers_sorted(
|
||||
workers.begin(), workers.end());
|
||||
std::sort(workers_sorted.begin(), workers_sorted.end(),
|
||||
[](const std::pair<int, io::network::Endpoint> &a,
|
||||
const std::pair<int, io::network::Endpoint> &b) {
|
||||
return a.first < b.first;
|
||||
});
|
||||
LOG(INFO) << "Starting shutdown of all workers.";
|
||||
for (const auto &worker : workers_sorted) {
|
||||
// Skip master (self).
|
||||
if (worker.first == 0) continue;
|
||||
auto client_pool = GetClientPool(worker.first);
|
||||
try {
|
||||
client_pool->Call<StopWorkerRpc>();
|
||||
} catch (const communication::rpc::RpcFailedException &e) {
|
||||
LOG(WARNING) << "Couldn't shutdown " << GetWorkerName(e.endpoint());
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure all workers have died.
|
||||
while (true) {
|
||||
std::vector<std::string> workers_alive;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||
for (const auto &worker : workers_sorted) {
|
||||
// Skip master (self).
|
||||
if (worker.first == 0) continue;
|
||||
if (io::network::CanEstablishConnection(worker.second)) {
|
||||
workers_alive.push_back(GetWorkerName(worker.second));
|
||||
}
|
||||
}
|
||||
if (workers_alive.size() == 0) break;
|
||||
LOG(INFO) << "Waiting for " << utils::Join(workers_alive, ", ")
|
||||
<< " to finish shutting down...";
|
||||
}
|
||||
LOG(INFO) << "Shutdown of all workers is complete.";
|
||||
|
||||
// Some RPC servers might still depend on the cluster status to shut down. At
|
||||
// this point all workers are down which means that the cluster is also not
|
||||
// alive any more.
|
||||
cluster_alive_.store(false);
|
||||
|
||||
// Shutdown our RPC server.
|
||||
server_.Shutdown();
|
||||
server_.AwaitShutdown();
|
||||
|
||||
// Return `true` if the cluster is alive and the `call_before_shutdown`
|
||||
// succeeded.
|
||||
return ret && is_cluster_alive;
|
||||
}
|
||||
|
||||
void MasterCoordination::Shutdown() { alive_.store(false); }
|
||||
|
||||
void MasterCoordination::IssueHeartbeats() {
|
||||
std::lock_guard<std::mutex> guard(master_lock_);
|
||||
auto workers = GetWorkers();
|
||||
for (const auto &worker : workers) {
|
||||
// Skip master (self).
|
||||
if (worker.first == 0) continue;
|
||||
auto client_pool = GetClientPool(worker.first);
|
||||
try {
|
||||
// TODO (mferencevic): Should we retry this call to ignore some transient
|
||||
// communication errors?
|
||||
client_pool->Call<HeartbeatRpc>();
|
||||
} catch (const communication::rpc::RpcFailedException &e) {
|
||||
// If we are not alive that means that we are in the process of a
|
||||
// shutdown. We ignore any exceptions here to stop our Heartbeat from
|
||||
// displaying warnings that the workers may have died (they should die,
|
||||
// we are shutting them down). Note: The heartbeat scheduler must stay
|
||||
// alive to ensure that the workers receive their heartbeat requests
|
||||
// during shutdown (which may take a long time).
|
||||
if (!alive_) continue;
|
||||
LOG(WARNING) << "The " << GetWorkerName(e.endpoint())
|
||||
<< " didn't respond to our heartbeat request. The cluster "
|
||||
"is in a degraded state and we are starting a graceful "
|
||||
"shutdown. Please check the logs on the worker for "
|
||||
"more details.";
|
||||
// Set the `cluster_alive_` flag to `false` to indicate that something
|
||||
// in the cluster failed.
|
||||
cluster_alive_.store(false);
|
||||
// Shutdown the whole cluster.
|
||||
Shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,102 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "durability/distributed/recovery.hpp"
|
||||
#include "io/network/endpoint.hpp"
|
||||
#include "utils/scheduler.hpp"
|
||||
|
||||
namespace distributed {
|
||||
using Endpoint = io::network::Endpoint;
|
||||
|
||||
/** Handles worker registration, getting of other workers' endpoints and
|
||||
* coordinated shutdown in a distributed memgraph. Master side. */
|
||||
class MasterCoordination final : public Coordination {
|
||||
public:
|
||||
explicit MasterCoordination(
|
||||
const Endpoint &master_endpoint,
|
||||
int server_workers_count = std::thread::hardware_concurrency(),
|
||||
int client_workers_count = std::thread::hardware_concurrency());
|
||||
|
||||
~MasterCoordination();
|
||||
|
||||
MasterCoordination(const MasterCoordination &) = delete;
|
||||
MasterCoordination(MasterCoordination &&) = delete;
|
||||
MasterCoordination &operator=(const MasterCoordination &) = delete;
|
||||
MasterCoordination &operator=(MasterCoordination &&) = delete;
|
||||
|
||||
/** Registers a new worker with this master coordination.
|
||||
*
|
||||
* @param desired_worker_id - The ID the worker would like to have.
|
||||
* @return True if the desired ID for the worker is available, or false
|
||||
* if the desired ID is already taken.
|
||||
*/
|
||||
bool RegisterWorker(int desired_worker_id, Endpoint endpoint);
|
||||
|
||||
/*
|
||||
* Worker `worker_id` finished with recovering, adds it to the set of
|
||||
* recovered workers alongside with its recovery_info.
|
||||
*/
|
||||
void WorkerRecoveredSnapshot(
|
||||
int worker_id,
|
||||
const std::optional<durability::RecoveryInfo> &recovery_info);
|
||||
|
||||
/// Sets the recovery info. nullopt indicates nothing was recovered.
|
||||
void SetRecoveredSnapshot(
|
||||
std::optional<std::pair<int64_t, tx::TransactionId>> recovered_snapshot);
|
||||
|
||||
std::optional<std::pair<int64_t, tx::TransactionId>> RecoveredSnapshotTx()
|
||||
const;
|
||||
|
||||
int CountRecoveredWorkers() const;
|
||||
|
||||
std::vector<tx::TransactionId> CommonWalTransactions(
|
||||
const durability::RecoveryInfo &master_info) const;
|
||||
|
||||
/// Starts the coordination and its servers.
|
||||
bool Start();
|
||||
|
||||
/// Waits while the cluster is in a valid state or the `Shutdown` method is
|
||||
/// called (suitable for use with signal handlers). Blocks the calling thread
|
||||
/// until that has finished.
|
||||
/// @param call_before_shutdown function that should be called before
|
||||
/// shutdown, the function gets a bool argument indicating whether the cluster
|
||||
/// is alive and should return a bool indicating whether the shutdown
|
||||
/// succeeded without any issues
|
||||
/// @returns `true` if the shutdown was completed without any issues, `false`
|
||||
/// otherwise
|
||||
bool AwaitShutdown(std::function<bool(bool)> call_before_shutdown =
|
||||
[](bool is_cluster_alive) -> bool { return true; });
|
||||
|
||||
/// Hints that the coordination should start shutting down the whole cluster.
|
||||
void Shutdown();
|
||||
|
||||
private:
|
||||
/// Sends a heartbeat request to all workers.
|
||||
void IssueHeartbeats();
|
||||
|
||||
// Most master functions aren't thread-safe.
|
||||
mutable std::mutex master_lock_;
|
||||
|
||||
// Durabilility recovery info.
|
||||
// Indicates if the recovery phase is done.
|
||||
bool recovery_done_{false};
|
||||
// Set of workers that finished sucesfully recovering snapshot
|
||||
std::map<int, std::optional<durability::RecoveryInfo>> recovered_workers_;
|
||||
// If nullopt nothing was recovered.
|
||||
std::optional<std::pair<int64_t, tx::TransactionId>> recovered_snapshot_tx_;
|
||||
|
||||
// Scheduler that is used to periodically ping all registered workers.
|
||||
utils::Scheduler scheduler_;
|
||||
|
||||
// Flags used for shutdown.
|
||||
std::atomic<bool> alive_{true};
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,47 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "durability/distributed/recovery.hpp"
|
||||
#include "durability/distributed/serialization.hpp"
|
||||
#include "io/network/endpoint.hpp"
|
||||
#include "io/network/serialization.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-rpc register-worker
|
||||
(:request
|
||||
((desired-worker-id :int16_t)
|
||||
(port :uint16_t)
|
||||
(durability-directory "std::string")))
|
||||
(:response
|
||||
((registration-successful :bool)
|
||||
(durability-error :bool)
|
||||
(snapshot-to-recover "std::optional<std::pair<int64_t, tx::TransactionId>>")
|
||||
(workers "std::unordered_map<int, io::network::Endpoint>"))))
|
||||
|
||||
(lcp:define-rpc cluster-discovery
|
||||
(:request
|
||||
((worker-id :int16_t)
|
||||
(endpoint "::io::network::Endpoint")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-rpc stop-worker
|
||||
(:request ())
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-rpc notify-worker-recovered
|
||||
(:request
|
||||
((worker-id :int16_t)
|
||||
(recovery-info "std::optional<durability::RecoveryInfo>")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-rpc heartbeat
|
||||
(:request ())
|
||||
(:response ()))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,109 +0,0 @@
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
#include "distributed/coordination_rpc_messages.hpp"
|
||||
#include "distributed/coordination_worker.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
// Expect that a heartbeat should be received in this time interval. If it is
|
||||
// not received we assume that the communication is broken and start a shutdown.
|
||||
const int kHeartbeatMaxDelaySeconds = 10;
|
||||
|
||||
// Check whether a heartbeat is received every `kHeartbeatCheckSeconds`. It
|
||||
// should be larger than `kHeartbeatIntervalSeconds` defined in the master
|
||||
// coordination because it makes no sense to check more often than the heartbeat
|
||||
// is sent. Also, it must be smaller than `kHeartbeatMaxDelaySeconds` to
|
||||
// function properly.
|
||||
const int kHeartbeatCheckSeconds = 2;
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
WorkerCoordination::WorkerCoordination(
|
||||
const io::network::Endpoint &worker_endpoint, int worker_id,
|
||||
const io::network::Endpoint &master_endpoint, int server_workers_count,
|
||||
int client_workers_count)
|
||||
: Coordination(worker_endpoint, worker_id, master_endpoint,
|
||||
server_workers_count, client_workers_count) {
|
||||
server_.Register<StopWorkerRpc>(
|
||||
[&](auto *req_reader, auto *res_builder) {
|
||||
LOG(INFO) << "The master initiated shutdown of this worker.";
|
||||
Shutdown();
|
||||
});
|
||||
|
||||
server_.Register<HeartbeatRpc>([&](auto *req_reader,
|
||||
auto *res_builder) {
|
||||
std::lock_guard<std::mutex> guard(heartbeat_lock_);
|
||||
last_heartbeat_time_ = std::chrono::steady_clock::now();
|
||||
if (!scheduler_.IsRunning()) {
|
||||
scheduler_.Run(
|
||||
"Heartbeat", std::chrono::seconds(kHeartbeatCheckSeconds), [this] {
|
||||
std::lock_guard<std::mutex> guard(heartbeat_lock_);
|
||||
auto duration =
|
||||
std::chrono::steady_clock::now() - last_heartbeat_time_;
|
||||
if (duration > std::chrono::seconds(kHeartbeatMaxDelaySeconds)) {
|
||||
LOG(WARNING) << "The master hasn't given us a heartbeat request "
|
||||
"for at least "
|
||||
<< kHeartbeatMaxDelaySeconds
|
||||
<< " seconds! We are shutting down...";
|
||||
// Set the `cluster_alive_` flag to `false` to indicate that
|
||||
// something in the cluster failed.
|
||||
cluster_alive_ = false;
|
||||
// Shutdown the worker.
|
||||
Shutdown();
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
WorkerCoordination::~WorkerCoordination() {
|
||||
CHECK(!alive_) << "You must call Shutdown and AwaitShutdown on "
|
||||
"distributed::WorkerCoordination!";
|
||||
}
|
||||
|
||||
void WorkerCoordination::RegisterWorker(int worker_id,
|
||||
io::network::Endpoint endpoint) {
|
||||
AddWorker(worker_id, endpoint);
|
||||
}
|
||||
|
||||
bool WorkerCoordination::Start() {
|
||||
return server_.Start();
|
||||
}
|
||||
|
||||
bool WorkerCoordination::AwaitShutdown(
|
||||
std::function<bool(bool)> call_before_shutdown) {
|
||||
// Wait for a shutdown notification.
|
||||
while (alive_) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
|
||||
// The first thing we need to do is to stop our heartbeat scheduler because
|
||||
// the master stopped their scheduler immediately before issuing the shutdown
|
||||
// request to the worker. This will prevent our heartbeat from timing out on a
|
||||
// regular shutdown.
|
||||
scheduler_.Stop();
|
||||
|
||||
// Copy the current value of the cluster state.
|
||||
bool is_cluster_alive = cluster_alive_;
|
||||
|
||||
// Call the before shutdown callback.
|
||||
bool ret = call_before_shutdown(is_cluster_alive);
|
||||
|
||||
// Shutdown our RPC server.
|
||||
server_.Shutdown();
|
||||
server_.AwaitShutdown();
|
||||
|
||||
// All other cleanup must be done here.
|
||||
|
||||
// Return `true` if the cluster is alive and the `call_before_shutdown`
|
||||
// succeeded.
|
||||
return ret && is_cluster_alive;
|
||||
}
|
||||
|
||||
void WorkerCoordination::Shutdown() { alive_.store(false); }
|
||||
|
||||
} // namespace distributed
|
@ -1,61 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "communication/rpc/server.hpp"
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "utils/scheduler.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/// Handles worker registration, getting of other workers' endpoints and
|
||||
/// coordinated shutdown in a distributed memgraph. Worker side.
|
||||
class WorkerCoordination final : public Coordination {
|
||||
public:
|
||||
WorkerCoordination(
|
||||
const io::network::Endpoint &worker_endpoint, int worker_id,
|
||||
const io::network::Endpoint &master_endpoint,
|
||||
int server_workers_count = std::thread::hardware_concurrency(),
|
||||
int client_workers_count = std::thread::hardware_concurrency());
|
||||
|
||||
~WorkerCoordination();
|
||||
|
||||
WorkerCoordination(const WorkerCoordination &) = delete;
|
||||
WorkerCoordination(WorkerCoordination &&) = delete;
|
||||
WorkerCoordination &operator=(const WorkerCoordination &) = delete;
|
||||
WorkerCoordination &operator=(WorkerCoordination &&) = delete;
|
||||
|
||||
/// Registers the worker with the given endpoint.
|
||||
void RegisterWorker(int worker_id, io::network::Endpoint endpoint);
|
||||
|
||||
/// Starts the coordination and its servers.
|
||||
bool Start();
|
||||
|
||||
/// Starts listening for a remote shutdown command (issued by the master) or
|
||||
/// for the `Shutdown` method to be called (suitable for use with signal
|
||||
/// handlers). Blocks the calling thread until that has finished.
|
||||
/// @param call_before_shutdown function that should be called before
|
||||
/// shutdown, the function gets a bool argument indicating whether the cluster
|
||||
/// is alive and should return a bool indicating whether the shutdown
|
||||
/// succeeded without any issues
|
||||
/// @returns `true` if the shutdown was completed without any issues, `false`
|
||||
/// otherwise
|
||||
bool AwaitShutdown(std::function<bool(bool)> call_before_shutdown =
|
||||
[](bool is_cluster_alive) -> bool { return true; });
|
||||
|
||||
/// Hints that the coordination should start shutting down the worker.
|
||||
void Shutdown();
|
||||
|
||||
private:
|
||||
// Heartbeat variables
|
||||
std::mutex heartbeat_lock_;
|
||||
std::chrono::time_point<std::chrono::steady_clock> last_heartbeat_time_;
|
||||
utils::Scheduler scheduler_;
|
||||
|
||||
// Flag used for shutdown.
|
||||
std::atomic<bool> alive_{true};
|
||||
std::atomic<bool> cluster_alive_{true};
|
||||
};
|
||||
} // namespace distributed
|
@ -1,97 +0,0 @@
|
||||
#include "distributed/data_manager.hpp"
|
||||
|
||||
#include "storage/distributed/storage.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename TCache>
|
||||
void ClearCache(TCache &cache, tx::TransactionId tx_id) {
|
||||
auto access = cache.access();
|
||||
auto found = access.find(tx_id);
|
||||
if (found != access.end()) found->second.Clear();
|
||||
}
|
||||
|
||||
template <typename TCache>
|
||||
void DeleteOld(TCache &cache, tx::TransactionId oldest_active) {
|
||||
auto access = cache.access();
|
||||
for (auto &kv : access) {
|
||||
if (kv.first < oldest_active) {
|
||||
access.remove(kv.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace distributed {
|
||||
|
||||
template <>
|
||||
DataManager::CacheT<Vertex> &DataManager::caches<Vertex>() {
|
||||
return vertices_caches_;
|
||||
}
|
||||
|
||||
template <>
|
||||
DataManager::CacheT<Edge> &DataManager::caches<Edge>() {
|
||||
return edges_caches_;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t DataManager::GetInitSize<Vertex>() const {
|
||||
return vertex_cache_size_;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t DataManager::GetInitSize<Edge>() const {
|
||||
return edge_cache_size_;
|
||||
}
|
||||
|
||||
DataManager::DataManager(database::GraphDb &db,
|
||||
distributed::DataRpcClients &data_clients,
|
||||
size_t vertex_cache_size, size_t edge_cache_size)
|
||||
: vertex_cache_size_(vertex_cache_size),
|
||||
edge_cache_size_(edge_cache_size),
|
||||
db_(db),
|
||||
data_clients_(data_clients) {}
|
||||
|
||||
std::mutex &DataManager::GetLock(tx::TransactionId tx_id) {
|
||||
auto accessor = lock_store_.access();
|
||||
auto found = accessor.find(tx_id);
|
||||
if (found != accessor.end()) return found->second;
|
||||
|
||||
// By passing empty tuple default constructor is used
|
||||
// and std::mutex is created in ConcurrentMap.
|
||||
return accessor.emplace(tx_id, std::make_tuple(tx_id), std::make_tuple())
|
||||
.first->second;
|
||||
}
|
||||
|
||||
template <>
|
||||
void DataManager::LocalizeAddresses<Vertex>(Vertex &vertex) {
|
||||
auto localize_edges = [this](auto &edges) {
|
||||
for (auto &element : edges) {
|
||||
element.vertex = db_.storage().LocalizedAddressIfPossible(element.vertex);
|
||||
element.edge = db_.storage().LocalizedAddressIfPossible(element.edge);
|
||||
}
|
||||
};
|
||||
|
||||
localize_edges(vertex.in_.storage());
|
||||
localize_edges(vertex.out_.storage());
|
||||
}
|
||||
|
||||
template <>
|
||||
void DataManager::LocalizeAddresses(Edge &edge) {
|
||||
edge.from_ = db_.storage().LocalizedAddressIfPossible(edge.from_);
|
||||
edge.to_ = db_.storage().LocalizedAddressIfPossible(edge.to_);
|
||||
}
|
||||
|
||||
void DataManager::ClearCacheForSingleTransaction(tx::TransactionId tx_id) {
|
||||
ClearCache(vertices_caches_, tx_id);
|
||||
ClearCache(edges_caches_, tx_id);
|
||||
}
|
||||
|
||||
void DataManager::ClearTransactionalCache(tx::TransactionId oldest_active) {
|
||||
DeleteOld(vertices_caches_, oldest_active);
|
||||
DeleteOld(edges_caches_, oldest_active);
|
||||
DeleteOld(lock_store_, oldest_active);
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,126 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "distributed/cached_record_data.hpp"
|
||||
#include "distributed/data_rpc_clients.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
#include "utils/cache.hpp"
|
||||
|
||||
class Vertex;
|
||||
class Edge;
|
||||
|
||||
namespace distributed {
|
||||
/// Handles remote data caches for edges and vertices, per transaction.
|
||||
class DataManager {
|
||||
template <typename TRecord>
|
||||
using CacheG =
|
||||
utils::LruCache<gid::Gid, std::shared_ptr<CachedRecordData<TRecord>>>;
|
||||
|
||||
template <typename TRecord>
|
||||
using CacheT = ConcurrentMap<tx::TransactionId, CacheG<TRecord>>;
|
||||
|
||||
public:
|
||||
DataManager(database::GraphDb &db, distributed::DataRpcClients &data_clients,
|
||||
size_t vertex_cache_size, size_t edge_cache_size);
|
||||
|
||||
/// Finds cached element for the given transaction, worker and gid.
|
||||
///
|
||||
/// @tparam TRecord Vertex or Edge
|
||||
template <typename TRecord>
|
||||
std::shared_ptr<CachedRecordData<TRecord>> Find(tx::TransactionId tx_id,
|
||||
int from_worker_id,
|
||||
int worker_id, gid::Gid gid,
|
||||
bool to_update = false) {
|
||||
auto &cache = GetCache<TRecord>(tx_id);
|
||||
std::unique_lock<std::mutex> guard(GetLock(tx_id));
|
||||
auto found = cache.Find(gid);
|
||||
if (found) {
|
||||
auto data = *found;
|
||||
if (to_update && !data->new_record) {
|
||||
data->new_record.reset(data->old_record->CloneData());
|
||||
}
|
||||
|
||||
return data;
|
||||
} else {
|
||||
guard.unlock();
|
||||
auto remote = data_clients_.RemoteElement<TRecord>(from_worker_id,
|
||||
worker_id, tx_id, gid);
|
||||
if (remote.old_record_ptr) LocalizeAddresses(*remote.old_record_ptr);
|
||||
if (remote.new_record_ptr) LocalizeAddresses(*remote.new_record_ptr);
|
||||
|
||||
if (to_update && !remote.new_record_ptr) {
|
||||
remote.new_record_ptr.reset(remote.old_record_ptr->CloneData());
|
||||
}
|
||||
|
||||
guard.lock();
|
||||
auto data =
|
||||
std::make_shared<CachedRecordData<TRecord>>(CachedRecordData<TRecord>{
|
||||
remote.cypher_id, std::move(remote.old_record_ptr),
|
||||
std::move(remote.new_record_ptr)});
|
||||
cache.Insert(gid, data);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the given records as (new, old) data for the given gid.
|
||||
template <typename TRecord>
|
||||
void Emplace(tx::TransactionId tx_id, gid::Gid gid,
|
||||
CachedRecordData<TRecord> data) {
|
||||
std::lock_guard<std::mutex> guard(GetLock(tx_id));
|
||||
// We can't replace existing data because some accessors might be using
|
||||
// it.
|
||||
// TODO - consider if it's necessary and OK to copy just the data content.
|
||||
auto &cache = GetCache<TRecord>(tx_id);
|
||||
auto found = cache.Find(gid);
|
||||
if (!found) {
|
||||
if (data.old_record) LocalizeAddresses(*data.old_record);
|
||||
if (data.new_record) LocalizeAddresses(*data.new_record);
|
||||
cache.Insert(gid, std::make_shared<CachedRecordData<TRecord>>(std::move(data)));
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes all the caches for a single transaction.
|
||||
void ClearCacheForSingleTransaction(tx::TransactionId tx_id);
|
||||
|
||||
/// Clears the cache of local transactions that have expired. The signature of
|
||||
/// this method is dictated by `distributed::TransactionalCacheCleaner`.
|
||||
void ClearTransactionalCache(tx::TransactionId oldest_active);
|
||||
|
||||
private:
|
||||
template <typename TRecord>
|
||||
void LocalizeAddresses(TRecord &record);
|
||||
|
||||
template <typename TRecord>
|
||||
size_t GetInitSize() const;
|
||||
|
||||
template <typename TRecord>
|
||||
CacheG<TRecord> &GetCache(tx::TransactionId tx_id) {
|
||||
auto accessor = caches<TRecord>().access();
|
||||
auto found = accessor.find(tx_id);
|
||||
if (found != accessor.end()) return found->second;
|
||||
|
||||
return accessor
|
||||
.emplace(tx_id, std::make_tuple(tx_id),
|
||||
std::make_tuple(GetInitSize<TRecord>()))
|
||||
.first->second;
|
||||
}
|
||||
|
||||
std::mutex &GetLock(tx::TransactionId tx_id);
|
||||
|
||||
template <typename TRecord>
|
||||
CacheT<TRecord> &caches();
|
||||
|
||||
size_t vertex_cache_size_;
|
||||
size_t edge_cache_size_;
|
||||
|
||||
database::GraphDb &db_;
|
||||
DataRpcClients &data_clients_;
|
||||
ConcurrentMap<tx::TransactionId, std::mutex> lock_store_;
|
||||
CacheT<Vertex> vertices_caches_;
|
||||
CacheT<Edge> edges_caches_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,53 +0,0 @@
|
||||
#include "distributed/data_rpc_clients.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "distributed/data_rpc_messages.hpp"
|
||||
#include "storage/distributed/edge.hpp"
|
||||
#include "storage/distributed/vertex.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
template <>
|
||||
RemoteElementInfo<Edge> DataRpcClients::RemoteElement(int from_worker_id,
|
||||
int worker_id,
|
||||
tx::TransactionId tx_id,
|
||||
gid::Gid gid) {
|
||||
auto response = coordination_->GetClientPool(worker_id)->Call<EdgeRpc>(
|
||||
TxGidPair{tx_id, gid, from_worker_id});
|
||||
return RemoteElementInfo<Edge>(response.cypher_id,
|
||||
std::move(response.edge_old_output),
|
||||
std::move(response.edge_new_output));
|
||||
}
|
||||
|
||||
template <>
|
||||
RemoteElementInfo<Vertex> DataRpcClients::RemoteElement(int from_worker_id,
|
||||
int worker_id,
|
||||
tx::TransactionId tx_id,
|
||||
gid::Gid gid) {
|
||||
auto response = coordination_->GetClientPool(worker_id)->Call<VertexRpc>(
|
||||
TxGidPair{tx_id, gid, from_worker_id});
|
||||
return RemoteElementInfo<Vertex>(response.cypher_id,
|
||||
std::move(response.vertex_old_output),
|
||||
std::move(response.vertex_new_output));
|
||||
}
|
||||
|
||||
std::unordered_map<int, int64_t> DataRpcClients::VertexCounts(
|
||||
tx::TransactionId tx_id) {
|
||||
auto future_results = coordination_->ExecuteOnWorkers<std::pair<int, int64_t>>(
|
||||
-1, [tx_id](int worker_id, communication::rpc::ClientPool &client_pool) {
|
||||
auto response = client_pool.Call<VertexCountRpc>(tx_id);
|
||||
return std::make_pair(worker_id, response.member);
|
||||
});
|
||||
|
||||
std::unordered_map<int, int64_t> results;
|
||||
for (auto &result : future_results) {
|
||||
auto result_pair = result.get();
|
||||
int worker = result_pair.first;
|
||||
int vertex_count = result_pair.second;
|
||||
results[worker] = vertex_count;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,61 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
class RpcWorkerClients;
|
||||
|
||||
template <typename TRecord>
|
||||
struct RemoteElementInfo {
|
||||
RemoteElementInfo() = delete;
|
||||
RemoteElementInfo(const RemoteElementInfo &) = delete;
|
||||
// TODO (buda): The default move constructor should be deleted but it seems
|
||||
// that clang-3.9 doesn't know how to do RVO when this struct is used.
|
||||
RemoteElementInfo(RemoteElementInfo &&) = default;
|
||||
RemoteElementInfo &operator=(const RemoteElementInfo &) = delete;
|
||||
RemoteElementInfo &operator=(RemoteElementInfo &&) = delete;
|
||||
|
||||
RemoteElementInfo(int64_t cypher_id, std::unique_ptr<TRecord> old_record_ptr,
|
||||
std::unique_ptr<TRecord> new_record_ptr)
|
||||
: cypher_id(cypher_id),
|
||||
old_record_ptr(std::move(old_record_ptr)),
|
||||
new_record_ptr(std::move(new_record_ptr)) {}
|
||||
|
||||
int64_t cypher_id;
|
||||
std::unique_ptr<TRecord> old_record_ptr;
|
||||
std::unique_ptr<TRecord> new_record_ptr;
|
||||
};
|
||||
|
||||
/// Provides access to other worker's data.
|
||||
class DataRpcClients {
|
||||
public:
|
||||
explicit DataRpcClients(Coordination *coordination)
|
||||
: coordination_(coordination) {}
|
||||
|
||||
/// Returns a remote worker's record (vertex/edge) data for the given params.
|
||||
/// That worker must own the vertex/edge for the given id, and that vertex
|
||||
/// must be visible in given transaction.
|
||||
template <typename TRecord>
|
||||
RemoteElementInfo<TRecord> RemoteElement(int from_worker_id, int worker_id,
|
||||
tx::TransactionId tx_id,
|
||||
gid::Gid gid);
|
||||
|
||||
/// Returns (worker_id, vertex_count) for each worker and the number of
|
||||
/// vertices on it from the perspective of transaction `tx_id`.
|
||||
std::unordered_map<int, int64_t> VertexCounts(tx::TransactionId tx_id);
|
||||
|
||||
private:
|
||||
Coordination *coordination_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,125 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "storage/distributed/edge.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "storage/distributed/rpc/serialization.hpp"
|
||||
#include "storage/distributed/vertex.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-struct tx-gid-pair ()
|
||||
((tx-id "::tx::TransactionId")
|
||||
(gid "::gid::Gid")
|
||||
(from-worker-id :int64_t))
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:define-rpc vertex
|
||||
(:request ((member "TxGidPair")))
|
||||
(:response
|
||||
((cypher-id :int64_t)
|
||||
(vertex-old-input "const Vertex *"
|
||||
:slk-save
|
||||
(lambda (member)
|
||||
#>cpp
|
||||
bool has_ptr = self.${member};
|
||||
slk::Save(has_ptr, builder);
|
||||
if (has_ptr) {
|
||||
slk::Save(*self.${member}, builder, self.worker_id);
|
||||
}
|
||||
cpp<#)
|
||||
:slk-load
|
||||
(lambda (member)
|
||||
(declare (ignore member))
|
||||
#>cpp
|
||||
bool has_ptr;
|
||||
slk::Load(&has_ptr, reader);
|
||||
if (has_ptr) {
|
||||
self->vertex_old_output = std::make_unique<Vertex>();
|
||||
slk::Load(self->vertex_old_output.get(), reader);
|
||||
}
|
||||
cpp<#))
|
||||
(vertex-new-input "const Vertex *"
|
||||
:slk-save
|
||||
(lambda (member)
|
||||
#>cpp
|
||||
bool has_ptr = self.${member};
|
||||
slk::Save(has_ptr, builder);
|
||||
if (has_ptr) {
|
||||
slk::Save(*self.${member}, builder, self.worker_id);
|
||||
}
|
||||
cpp<#)
|
||||
:slk-load
|
||||
(lambda (member)
|
||||
(declare (ignore member))
|
||||
#>cpp
|
||||
bool has_ptr;
|
||||
slk::Load(&has_ptr, reader);
|
||||
if (has_ptr) {
|
||||
self->vertex_new_output = std::make_unique<Vertex>();
|
||||
slk::Load(self->vertex_new_output.get(), reader);
|
||||
}
|
||||
cpp<#))
|
||||
(worker-id :int64_t :dont-save t)
|
||||
(vertex-old-output "std::unique_ptr<Vertex>" :initarg nil :dont-save t)
|
||||
(vertex-new-output "std::unique_ptr<Vertex>" :initarg nil :dont-save t))))
|
||||
|
||||
(lcp:define-rpc edge
|
||||
(:request ((member "TxGidPair")))
|
||||
(:response
|
||||
((cypher-id :int64_t)
|
||||
(edge-old-input "const Edge *"
|
||||
:slk-save
|
||||
(lambda (member)
|
||||
#>cpp
|
||||
bool has_ptr = self.${member};
|
||||
slk::Save(has_ptr, builder);
|
||||
if (has_ptr) {
|
||||
slk::Save(*self.${member}, builder, self.worker_id);
|
||||
}
|
||||
cpp<#)
|
||||
:slk-load
|
||||
(lambda (member)
|
||||
(declare (ignore member))
|
||||
#>cpp
|
||||
bool has_ptr;
|
||||
slk::Load(&has_ptr, reader);
|
||||
if (has_ptr) {
|
||||
slk::Load(&self->edge_old_output, reader);
|
||||
}
|
||||
cpp<#))
|
||||
(edge-new-input "const Edge *"
|
||||
:slk-save
|
||||
(lambda (member)
|
||||
#>cpp
|
||||
bool has_ptr = self.${member};
|
||||
slk::Save(has_ptr, builder);
|
||||
if (has_ptr) {
|
||||
slk::Save(*self.${member}, builder, self.worker_id);
|
||||
}
|
||||
cpp<#)
|
||||
:slk-load
|
||||
(lambda (member)
|
||||
(declare (ignore member))
|
||||
#>cpp
|
||||
bool has_ptr;
|
||||
slk::Load(&has_ptr, reader);
|
||||
if (has_ptr) {
|
||||
slk::Load(&self->edge_old_output, reader);
|
||||
}
|
||||
cpp<#))
|
||||
(worker-id :int64_t :dont-save t)
|
||||
(edge-old-output "std::unique_ptr<Edge>" :initarg nil :dont-save t)
|
||||
(edge-new-output "std::unique_ptr<Edge>" :initarg nil :dont-save t))))
|
||||
|
||||
(lcp:define-rpc vertex-count
|
||||
(:request ((member "::tx::TransactionId")))
|
||||
(:response ((member :int64_t))))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,63 +0,0 @@
|
||||
#include "distributed/data_rpc_server.hpp"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "distributed/updates_rpc_server.hpp"
|
||||
#include "distributed/data_rpc_messages.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
DataRpcServer::DataRpcServer(database::GraphDb *db,
|
||||
distributed::Coordination *coordination)
|
||||
: db_(db) {
|
||||
coordination->Register<VertexRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
VertexReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto dba = db_->Access(req.member.tx_id);
|
||||
auto vertex = dba->FindVertexRaw(req.member.gid);
|
||||
|
||||
auto *old = vertex.GetOld();
|
||||
auto *newr = vertex.GetNew() ? vertex.GetNew()->CloneData() : nullptr;
|
||||
db_->updates_server().ApplyDeltasToRecord(
|
||||
dba->transaction().id_, req.member.gid,
|
||||
req.member.from_worker_id, &old, &newr);
|
||||
|
||||
VertexRes response(vertex.CypherId(), old, newr, db_->WorkerId());
|
||||
slk::Save(response, res_builder);
|
||||
delete newr;
|
||||
});
|
||||
|
||||
coordination->Register<EdgeRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
EdgeReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto dba = db_->Access(req.member.tx_id);
|
||||
auto edge = dba->FindEdgeRaw(req.member.gid);
|
||||
|
||||
auto *old = edge.GetOld();
|
||||
auto *newr = edge.GetNew() ? edge.GetNew()->CloneData() : nullptr;
|
||||
db_->updates_server().ApplyDeltasToRecord(
|
||||
dba->transaction().id_, req.member.gid,
|
||||
req.member.from_worker_id, &old, &newr);
|
||||
|
||||
EdgeRes response(edge.CypherId(), old, newr, db_->WorkerId());
|
||||
slk::Save(response, res_builder);
|
||||
delete newr;
|
||||
});
|
||||
|
||||
coordination->Register<VertexCountRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
VertexCountReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto dba = db_->Access(req.member);
|
||||
int64_t size = 0;
|
||||
for (auto vertex : dba->Vertices(false)) ++size;
|
||||
VertexCountRes res(size);
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,22 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "distributed/coordination.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDb;
|
||||
}
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/// Serves this worker's data to others.
|
||||
class DataRpcServer {
|
||||
public:
|
||||
DataRpcServer(database::GraphDb *db,
|
||||
distributed::Coordination *coordination);
|
||||
|
||||
private:
|
||||
database::GraphDb *db_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,169 +0,0 @@
|
||||
#include "distributed/dgp/partitioner.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "distributed/updates_rpc_clients.hpp"
|
||||
#include "query/exceptions.hpp"
|
||||
#include "distributed/dgp/vertex_migrator.hpp"
|
||||
#include "utils/flag_validation.hpp"
|
||||
#include "utils/thread/sync.hpp"
|
||||
|
||||
// TODO (buda): Implement openCypher commands to control these parameters.
|
||||
DEFINE_VALIDATED_int32(
|
||||
dgp_improvement_threshold, 10,
|
||||
"How much better should specific node score be to consider "
|
||||
"a migration to another worker. This represents the minimal difference "
|
||||
"between new score that the vertex will have when migrated and the old one "
|
||||
"such that it's migrated.",
|
||||
FLAG_IN_RANGE(1, 100));
|
||||
// TODO (buda): The default here should be int_max because that will allow us to
|
||||
// partition large dataset faster. It should be used for our tests where we can
|
||||
// run the partitioning up front.
|
||||
DEFINE_VALIDATED_int32(dgp_max_batch_size, 2000,
|
||||
"Maximal amount of vertices which should be migrated in "
|
||||
"one dynamic graph partitioner step.",
|
||||
FLAG_IN_RANGE(1, std::numeric_limits<int32_t>::max()));
|
||||
|
||||
namespace distributed::dgp {
|
||||
|
||||
Partitioner::Partitioner(database::GraphDb *db) : db_(db) {}
|
||||
|
||||
std::pair<double, bool> Partitioner::Partition() {
|
||||
auto failed_partitioning_data =
|
||||
std::make_pair(std::numeric_limits<double>::min(), false);
|
||||
// Note, in distributed system TxBegin can throw because the server that
|
||||
// assigns transaction numbers might be unavailable.
|
||||
try {
|
||||
auto dba = db_->Access();
|
||||
VLOG(21) << "Starting DynamicGraphPartitioner in tx: "
|
||||
<< dba->transaction().id_;
|
||||
try {
|
||||
auto data = FindMigrations(*dba);
|
||||
VertexMigrator migrator(dba.get());
|
||||
for (auto &migration : data.migrations) {
|
||||
migrator.MigrateVertex(migration.first, migration.second);
|
||||
}
|
||||
|
||||
auto apply_futures = db_->updates_clients().UpdateApplyAll(
|
||||
db_->WorkerId(), dba->transaction().id_);
|
||||
|
||||
for (auto &future : apply_futures) {
|
||||
switch (future.get()) {
|
||||
case distributed::UpdateResult::SERIALIZATION_ERROR:
|
||||
throw mvcc::SerializationError(
|
||||
"Failed to relocate vertex due to SerializationError");
|
||||
case distributed::UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR:
|
||||
throw query::RemoveAttachedVertexException();
|
||||
case distributed::UpdateResult::UPDATE_DELETED_ERROR:
|
||||
throw query::QueryRuntimeException(
|
||||
"Failed to apply deferred updates due to RecordDeletedError");
|
||||
case distributed::UpdateResult::LOCK_TIMEOUT_ERROR:
|
||||
throw utils::LockTimeoutException(
|
||||
"Failed to apply deferred update due to LockTimeoutException");
|
||||
case distributed::UpdateResult::DONE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dba->Commit();
|
||||
VLOG(21) << "Sucesfully migrated " << data.migrations.size()
|
||||
<< " vertices with score " << data.score << ".";
|
||||
return std::make_pair(data.score, true);
|
||||
} catch (const utils::BasicException &e) {
|
||||
VLOG(21) << "Didn't succeed in relocating; " << e.what();
|
||||
dba->Abort();
|
||||
// Returning VertexAccessors after Abort might not be a good idea. + The
|
||||
// returned migrations are entirely useless because the engine didn't
|
||||
// succeed to migrate anything.
|
||||
return failed_partitioning_data;
|
||||
}
|
||||
} catch (const communication::rpc::RpcFailedException &e) {
|
||||
// Transaction start failed because BeginRpc failed. Nothing to cleanup.
|
||||
// Any other RpcFailedExceptions should be handeled in the inner try block.
|
||||
VLOG(21) << "Failed to start DGP transaction; " << e.what();
|
||||
return failed_partitioning_data;
|
||||
} catch (const std::exception &e) {
|
||||
LOG(FATAL) << "Unhandled exception during partitioning. " << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
MigrationsData Partitioner::FindMigrations(database::GraphDbAccessor &dba) {
|
||||
// Find workers vertex count
|
||||
std::unordered_map<int, int64_t> worker_vertex_count =
|
||||
db_->data_clients().VertexCounts(dba.transaction().id_);
|
||||
|
||||
// TODO (buda): Add total edge count as an option.
|
||||
int64_t total_vertex_count = 0;
|
||||
for (auto worker_vertex_count_pair : worker_vertex_count) {
|
||||
total_vertex_count += worker_vertex_count_pair.second;
|
||||
}
|
||||
|
||||
double average_vertex_count =
|
||||
total_vertex_count * 1.0 / worker_vertex_count.size();
|
||||
if (average_vertex_count == 0)
|
||||
return MigrationsData(std::numeric_limits<double>::min());
|
||||
|
||||
double local_graph_score = 0;
|
||||
|
||||
// Considers all migrations which maximally improve single vertex score
|
||||
std::vector<std::pair<VertexAccessor, int>> migrations;
|
||||
for (const auto &vertex : dba.Vertices(false)) {
|
||||
auto label_counts = CountLabels(vertex);
|
||||
std::unordered_map<int, double> per_label_score;
|
||||
size_t degree = vertex.in_degree() + vertex.out_degree();
|
||||
if (degree == 0) continue;
|
||||
for (auto worker_vertex_count_pair : worker_vertex_count) {
|
||||
int worker = worker_vertex_count_pair.first;
|
||||
int64_t worker_vertex_count = worker_vertex_count_pair.second;
|
||||
per_label_score[worker] =
|
||||
label_counts[worker] * 1.0 / degree -
|
||||
worker_vertex_count * 1.0 / average_vertex_count;
|
||||
}
|
||||
|
||||
auto label_cmp = [](const std::pair<int, double> &p1,
|
||||
const std::pair<int, double> &p2) {
|
||||
return p1.second < p2.second;
|
||||
};
|
||||
|
||||
auto best_label = std::max_element(per_label_score.begin(),
|
||||
per_label_score.end(), label_cmp);
|
||||
|
||||
local_graph_score += best_label->second;
|
||||
|
||||
// Consider as a migration only if the improvement is high enough
|
||||
if (best_label != per_label_score.end() &&
|
||||
best_label->first != db_->WorkerId() &&
|
||||
per_label_score[best_label->first] -
|
||||
FLAGS_dgp_improvement_threshold / 100.0 >=
|
||||
per_label_score[db_->WorkerId()]) {
|
||||
migrations.emplace_back(vertex, best_label->first);
|
||||
}
|
||||
|
||||
if (migrations.size() >= FLAGS_dgp_max_batch_size) break;
|
||||
}
|
||||
|
||||
DLOG(INFO) << "Local graph score: " << local_graph_score;
|
||||
|
||||
return MigrationsData(local_graph_score, std::move(migrations));
|
||||
}
|
||||
|
||||
std::unordered_map<int, int64_t> Partitioner::CountLabels(
|
||||
const VertexAccessor &vertex) const {
|
||||
std::unordered_map<int, int64_t> label_count;
|
||||
for (auto edge : vertex.in()) {
|
||||
auto address = edge.from().address();
|
||||
auto label = address.is_remote() ? address.worker_id() : db_->WorkerId();
|
||||
label_count[label]++;
|
||||
}
|
||||
for (auto edge : vertex.out()) {
|
||||
auto address = edge.to().address();
|
||||
auto label = address.is_remote() ? address.worker_id() : db_->WorkerId();
|
||||
label_count[label]++;
|
||||
}
|
||||
return label_count;
|
||||
}
|
||||
} // namespace distributed::dgp
|
@ -1,89 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <thread>
|
||||
|
||||
#include "distributed/data_rpc_clients.hpp"
|
||||
#include "distributed/token_sharing_rpc_messages.hpp"
|
||||
#include "distributed/dgp/vertex_migrator.hpp"
|
||||
#include "storage/vertex_accessor.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDb;
|
||||
class GraphDbAccessor;
|
||||
}; // namespace database
|
||||
|
||||
namespace distributed::dgp {
|
||||
|
||||
/// Contains a set of vertices and where they should be migrated
|
||||
/// (machine/instance id) + score how good the partitioning is.
|
||||
struct MigrationsData {
|
||||
private:
|
||||
using Migrations = std::vector<std::pair<VertexAccessor, int>>;
|
||||
|
||||
public:
|
||||
MigrationsData(double score, Migrations migrations = Migrations())
|
||||
: score(std::move(score)), migrations(std::move(migrations)) {}
|
||||
|
||||
/// Disable copying because the number of migrations could be huge. The
|
||||
/// expected number is 1k, but a user can configure the database in a way
|
||||
/// where the number of migrations could be much higher.
|
||||
MigrationsData(const MigrationsData &other) = delete;
|
||||
MigrationsData &operator=(const MigrationsData &other) = delete;
|
||||
|
||||
MigrationsData(MigrationsData &&other) = default;
|
||||
MigrationsData &operator=(MigrationsData &&other) = default;
|
||||
|
||||
double score;
|
||||
Migrations migrations;
|
||||
};
|
||||
|
||||
/// Handles dynamic graph partitions, migrates vertices from one worker to
|
||||
/// another based on available scoring which takes into account neighbours of a
|
||||
/// vertex and tries to put it where most of its neighbours are located. Also
|
||||
/// takes into account the number of vertices on the destination and source
|
||||
/// machine.
|
||||
class Partitioner {
|
||||
public:
|
||||
/// The partitioner needs GraphDb because each partition step is a new
|
||||
/// database transactions (database accessor has to be created).
|
||||
/// TODO (buda): Consider passing GraphDbAccessor directly.
|
||||
explicit Partitioner(database::GraphDb *db);
|
||||
|
||||
Partitioner(const Partitioner &other) = delete;
|
||||
Partitioner(Partitioner &&other) = delete;
|
||||
Partitioner &operator=(const Partitioner &other) = delete;
|
||||
Partitioner &operator=(Partitioner &&other) = delete;
|
||||
|
||||
/// Runs one dynamic graph partitioning cycle (step). In case of any error,
|
||||
/// the transaction will be aborted.
|
||||
///
|
||||
/// @return Calculated partitioning score and were the migrations successful.
|
||||
std::pair<double, bool> Partition();
|
||||
|
||||
/// Returns a vector of pairs of `vertex` and `destination` of where should
|
||||
/// some vertex be relocated from the view of `dba` accessor.
|
||||
//
|
||||
/// Each vertex is located on some worker (which in context of migrations we
|
||||
/// call a vertex label). Each vertex has it's score for each different label
|
||||
/// (worker_id) evaluated. This score is calculated by considering
|
||||
/// neighbouring vertices labels. Simply put, each vertex is attracted to be
|
||||
/// located on the same worker as it's neighbouring vertices. Migrations which
|
||||
/// improve that scoring, which also takes into account saturation of other
|
||||
/// workers on which it's considering to migrate this vertex, are determined.
|
||||
MigrationsData FindMigrations(database::GraphDbAccessor &dba);
|
||||
|
||||
/// Counts number of each label (worker_id) on endpoints of edges (in/out) of
|
||||
/// `vertex`.
|
||||
///
|
||||
/// @return A map consisting of (label/machine/instance id, count) key-value
|
||||
/// pairs.
|
||||
std::unordered_map<int, int64_t> CountLabels(
|
||||
const VertexAccessor &vertex) const;
|
||||
|
||||
private:
|
||||
database::GraphDb *db_{nullptr};
|
||||
};
|
||||
|
||||
} // namespace distributed::dgp
|
@ -1,62 +0,0 @@
|
||||
#include "distributed/dgp/vertex_migrator.hpp"
|
||||
|
||||
#include "database/distributed/distributed_graph_db.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
|
||||
namespace distributed::dgp {
|
||||
|
||||
VertexMigrator::VertexMigrator(database::GraphDbAccessor *dba) : dba_(dba) {}
|
||||
|
||||
void VertexMigrator::MigrateVertex(VertexAccessor &vertex, int destination) {
|
||||
auto get_props = [](auto &record) {
|
||||
std::unordered_map<storage::Property, PropertyValue> properties;
|
||||
for (auto prop : record.Properties()) {
|
||||
properties[prop.first] = prop.second;
|
||||
}
|
||||
return properties;
|
||||
};
|
||||
|
||||
auto update_if_moved = [this](auto &vertex) {
|
||||
if (vertex_migrated_to_.count(vertex.gid())) {
|
||||
vertex = VertexAccessor(vertex_migrated_to_[vertex.gid()], *dba_);
|
||||
}
|
||||
};
|
||||
|
||||
auto relocated_vertex = database::InsertVertexIntoRemote(
|
||||
dba_, destination, vertex.labels(), get_props(vertex), vertex.CypherId());
|
||||
|
||||
vertex_migrated_to_[vertex.gid()] = relocated_vertex.address();
|
||||
|
||||
for (auto out_edge : vertex.out()) {
|
||||
auto to = out_edge.to();
|
||||
update_if_moved(to);
|
||||
// Here cypher_id has to be passed to the other machine because this
|
||||
// machine owns the edge.
|
||||
auto new_out_edge =
|
||||
dba_->InsertEdge(relocated_vertex, to, out_edge.EdgeType(),
|
||||
std::nullopt, out_edge.CypherId());
|
||||
for (auto prop : get_props(out_edge)) {
|
||||
new_out_edge.PropsSet(prop.first, prop.second);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto in_edge : vertex.in()) {
|
||||
auto from = in_edge.from();
|
||||
// Continue on self-loops since those edges have already been added
|
||||
// while iterating over out edges.
|
||||
if (from == vertex) continue;
|
||||
update_if_moved(from);
|
||||
// Both gid and cypher_id should be without value because this machine
|
||||
// doesn't own the edge.
|
||||
auto new_in_edge =
|
||||
dba_->InsertEdge(from, relocated_vertex, in_edge.EdgeType(),
|
||||
std::nullopt, in_edge.CypherId());
|
||||
for (auto prop : get_props(in_edge)) {
|
||||
new_in_edge.PropsSet(prop.first, prop.second);
|
||||
}
|
||||
}
|
||||
|
||||
dba_->DetachRemoveVertex(vertex);
|
||||
}
|
||||
} // namespace distributed::dgp
|
@ -1,37 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "storage/distributed/vertex_accessor.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDbAccessor;
|
||||
}; // namespace database
|
||||
|
||||
namespace distributed::dgp {
|
||||
|
||||
/// Migrates vertices from one worker to another (updates edges as well).
|
||||
class VertexMigrator {
|
||||
public:
|
||||
explicit VertexMigrator(database::GraphDbAccessor *dba);
|
||||
|
||||
VertexMigrator(const VertexMigrator &other) = delete;
|
||||
VertexMigrator(VertexMigrator &&other) = delete;
|
||||
VertexMigrator &operator=(const VertexMigrator &other) = delete;
|
||||
VertexMigrator &operator=(VertexMigrator &&other) = delete;
|
||||
|
||||
/// Creates a new vertex on the destination, deletes the old `vertex`, and
|
||||
/// deletes/creates every new edge that it needs since the destination of the
|
||||
/// vertex changed.
|
||||
void MigrateVertex(VertexAccessor &v, int destination);
|
||||
|
||||
private:
|
||||
database::GraphDbAccessor *dba_;
|
||||
std::unordered_map<gid::Gid, storage::VertexAddress> vertex_migrated_to_;
|
||||
};
|
||||
|
||||
} // namespace distributed::dgp
|
@ -1,52 +0,0 @@
|
||||
#include "distributed/durability_rpc_master.hpp"
|
||||
|
||||
#include "distributed/durability_rpc_messages.hpp"
|
||||
#include "transactions/transaction.hpp"
|
||||
#include "utils/future.hpp"
|
||||
|
||||
namespace distributed {
|
||||
utils::Future<bool> DurabilityRpcMaster::MakeSnapshot(tx::TransactionId tx) {
|
||||
return utils::make_future(std::async(std::launch::async, [this, tx] {
|
||||
auto futures = coordination_->ExecuteOnWorkers<bool>(
|
||||
0, [tx](int worker_id, communication::rpc::ClientPool &client_pool) {
|
||||
try {
|
||||
auto res = client_pool.Call<MakeSnapshotRpc>(tx);
|
||||
return res.member;
|
||||
} catch (const communication::rpc::RpcFailedException &e) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
bool created = true;
|
||||
for (auto &future : futures) {
|
||||
created &= future.get();
|
||||
}
|
||||
|
||||
return created;
|
||||
}));
|
||||
}
|
||||
|
||||
utils::Future<bool> DurabilityRpcMaster::RecoverWalAndIndexes(
|
||||
durability::RecoveryData *recovery_data) {
|
||||
return utils::make_future(
|
||||
std::async(std::launch::async, [this, recovery_data] {
|
||||
auto futures = coordination_->ExecuteOnWorkers<bool>(
|
||||
0, [recovery_data](int worker_id,
|
||||
communication::rpc::ClientPool &client_pool) {
|
||||
try {
|
||||
client_pool.Call<RecoverWalAndIndexesRpc>(*recovery_data);
|
||||
return true;
|
||||
} catch (const communication::rpc::RpcFailedException &e) {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
bool recovered = true;
|
||||
for (auto &future : futures) {
|
||||
recovered &= future.get();
|
||||
}
|
||||
|
||||
return recovered;
|
||||
}));
|
||||
}
|
||||
} // namespace distributed
|
@ -1,33 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <future>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "durability/distributed/recovery.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/// Provides an ability to trigger snapshooting on other workers.
|
||||
class DurabilityRpcMaster {
|
||||
public:
|
||||
explicit DurabilityRpcMaster(Coordination *coordination)
|
||||
: coordination_(coordination) {}
|
||||
|
||||
// Sends a snapshot request to workers and returns a future which becomes true
|
||||
// if all workers sucesfully completed their snapshot creation, false
|
||||
// otherwise
|
||||
// @param tx - transaction from which to take db snapshot
|
||||
utils::Future<bool> MakeSnapshot(tx::TransactionId tx);
|
||||
|
||||
utils::Future<bool> RecoverWalAndIndexes(
|
||||
durability::RecoveryData *recovery_data);
|
||||
|
||||
private:
|
||||
Coordination *coordination_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,20 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "durability/distributed/recovery.hpp"
|
||||
#include "durability/distributed/serialization.hpp"
|
||||
#include "transactions/transaction.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-rpc make-snapshot
|
||||
(:request ((member "::tx::TransactionId")))
|
||||
(:response ((member :bool))))
|
||||
|
||||
(lcp:define-rpc recover-wal-and-indexes
|
||||
(:request ((member "::durability::RecoveryData")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,31 +0,0 @@
|
||||
#include "distributed/durability_rpc_worker.hpp"
|
||||
|
||||
#include "database/distributed/distributed_graph_db.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "distributed/durability_rpc_messages.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
DurabilityRpcWorker::DurabilityRpcWorker(
|
||||
database::Worker *db, distributed::Coordination *coordination)
|
||||
: db_(db) {
|
||||
coordination->Register<MakeSnapshotRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
MakeSnapshotReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto dba = db_->Access(req.member);
|
||||
MakeSnapshotRes res(db_->MakeSnapshot(*dba));
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<RecoverWalAndIndexesRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
RecoverWalAndIndexesReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
this->db_->RecoverWalAndIndexes(&req.member);
|
||||
RecoverWalAndIndexesRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,19 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
|
||||
namespace database {
|
||||
class Worker;
|
||||
}; // namespace database
|
||||
|
||||
namespace distributed {
|
||||
|
||||
class DurabilityRpcWorker {
|
||||
public:
|
||||
DurabilityRpcWorker(database::Worker *db, distributed::Coordination *coordination);
|
||||
|
||||
private:
|
||||
database::Worker *db_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,43 +0,0 @@
|
||||
#include "distributed/dynamic_worker.hpp"
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "distributed/dynamic_worker_rpc_messages.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
DynamicWorkerAddition::DynamicWorkerAddition(database::GraphDb *db,
|
||||
distributed::Coordination *coordination)
|
||||
: db_(db), coordination_(coordination) {
|
||||
coordination_->Register<DynamicWorkerRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
DynamicWorkerReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
DynamicWorkerRes res(this->GetIndicesToCreate());
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, std::string>>
|
||||
DynamicWorkerAddition::GetIndicesToCreate() {
|
||||
std::vector<std::pair<std::string, std::string>> indices;
|
||||
if (!enabled_.load()) return indices;
|
||||
for (const auto &key : db_->storage().label_property_index().Keys()) {
|
||||
auto label = db_->label_mapper().id_to_value(key.label_);
|
||||
auto property = db_->property_mapper().id_to_value(key.property_);
|
||||
indices.emplace_back(label, property);
|
||||
}
|
||||
return indices;
|
||||
}
|
||||
|
||||
void DynamicWorkerAddition::Enable() { enabled_.store(true); }
|
||||
|
||||
DynamicWorkerRegistration::DynamicWorkerRegistration(communication::rpc::ClientPool *client_pool)
|
||||
: client_pool_(client_pool) {}
|
||||
|
||||
std::vector<std::pair<std::string, std::string>>
|
||||
DynamicWorkerRegistration::GetIndicesToCreate() {
|
||||
auto result = client_pool_->Call<DynamicWorkerRpc>();
|
||||
return result.recover_indices;
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,46 +0,0 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "communication/rpc/client_pool.hpp"
|
||||
#include "distributed/coordination.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDb;
|
||||
} // namespace database
|
||||
|
||||
namespace distributed {
|
||||
class DynamicWorkerAddition final {
|
||||
public:
|
||||
DynamicWorkerAddition(database::GraphDb *db,
|
||||
distributed::Coordination *coordination);
|
||||
|
||||
/// Enable dynamic worker addition.
|
||||
void Enable();
|
||||
|
||||
private:
|
||||
database::GraphDb *db_{nullptr};
|
||||
distributed::Coordination *coordination_;
|
||||
|
||||
std::atomic<bool> enabled_{false};
|
||||
|
||||
/// Return the indices a dynamically added worker needs to create.
|
||||
std::vector<std::pair<std::string, std::string>> GetIndicesToCreate();
|
||||
};
|
||||
|
||||
class DynamicWorkerRegistration final {
|
||||
public:
|
||||
explicit DynamicWorkerRegistration(
|
||||
communication::rpc::ClientPool *client_pool);
|
||||
|
||||
/// Make a RPC call to master to get indices to create.
|
||||
std::vector<std::pair<std::string, std::string>> GetIndicesToCreate();
|
||||
|
||||
private:
|
||||
communication::rpc::ClientPool *client_pool_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,18 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "slk/serialization.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-rpc dynamic-worker
|
||||
(:request ())
|
||||
(:response
|
||||
((recover-indices "std::vector<std::pair<std::string, std::string>>"))))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,28 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "storage/common/types/types.hpp"
|
||||
#include "storage/distributed/rpc/serialization.hpp"
|
||||
#include "transactions/transaction.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-rpc populate-index
|
||||
(:request
|
||||
((label "::storage::Label")
|
||||
(property "::storage::Property")
|
||||
(tx-id "::tx::TransactionId")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-rpc create-index
|
||||
(:request
|
||||
((label "::storage::Label")
|
||||
(property "::storage::Property")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,35 +0,0 @@
|
||||
#include "distributed/index_rpc_server.hpp"
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "distributed/index_rpc_messages.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
IndexRpcServer::IndexRpcServer(database::GraphDb *db,
|
||||
distributed::Coordination *coordination)
|
||||
: db_(db) {
|
||||
coordination->Register<CreateIndexRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
CreateIndexReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
database::LabelPropertyIndex::Key key{req.label, req.property};
|
||||
db_->storage().label_property_index_.CreateIndex(key);
|
||||
CreateIndexRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<PopulateIndexRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
PopulateIndexReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
database::LabelPropertyIndex::Key key{req.label, req.property};
|
||||
auto dba = db_->Access(req.tx_id);
|
||||
dba->PopulateIndex(key);
|
||||
dba->EnableIndex(key);
|
||||
PopulateIndexRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,19 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDb;
|
||||
}
|
||||
|
||||
namespace distributed {
|
||||
|
||||
class IndexRpcServer {
|
||||
public:
|
||||
IndexRpcServer(database::GraphDb *db, distributed::Coordination *coordination);
|
||||
|
||||
private:
|
||||
database::GraphDb *db_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,44 +0,0 @@
|
||||
#include "distributed/plan_consumer.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
PlanConsumer::PlanConsumer(distributed::Coordination *coordination) {
|
||||
coordination->Register<DispatchPlanRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
DispatchPlanReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
plan_cache_.access().insert(
|
||||
req.plan_id, std::make_unique<PlanPack>(req.plan, req.symbol_table,
|
||||
std::move(req.storage)));
|
||||
DispatchPlanRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<RemovePlanRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
RemovePlanReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
plan_cache_.access().remove(req.member);
|
||||
RemovePlanRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
PlanConsumer::PlanPack &PlanConsumer::PlanForId(int64_t plan_id) const {
|
||||
auto accessor = plan_cache_.access();
|
||||
auto found = accessor.find(plan_id);
|
||||
CHECK(found != accessor.end())
|
||||
<< "Missing plan and symbol table for plan id: " << plan_id;
|
||||
return *found->second;
|
||||
}
|
||||
|
||||
std::vector<int64_t> PlanConsumer::CachedPlanIds() const {
|
||||
std::vector<int64_t> plan_ids;
|
||||
auto access = plan_cache_.access();
|
||||
plan_ids.reserve(access.size());
|
||||
for (auto &kv : access) plan_ids.emplace_back(kv.first);
|
||||
|
||||
return plan_ids;
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,43 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
||||
#include "distributed/plan_rpc_messages.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/** Handles plan consumption from master. Creates and holds a local cache of
|
||||
* plans. Worker side. */
|
||||
class PlanConsumer {
|
||||
public:
|
||||
struct PlanPack {
|
||||
PlanPack(std::shared_ptr<query::plan::LogicalOperator> plan,
|
||||
query::SymbolTable symbol_table, query::AstStorage storage)
|
||||
: plan(plan),
|
||||
symbol_table(std::move(symbol_table)),
|
||||
storage(std::move(storage)) {}
|
||||
|
||||
std::shared_ptr<query::plan::LogicalOperator> plan;
|
||||
query::SymbolTable symbol_table;
|
||||
const query::AstStorage storage;
|
||||
};
|
||||
|
||||
explicit PlanConsumer(distributed::Coordination *coordination);
|
||||
|
||||
/** Return cached plan and symbol table for a given plan id. */
|
||||
PlanPack &PlanForId(int64_t plan_id) const;
|
||||
|
||||
/** Return the ids of all the cached plans. For testing. */
|
||||
std::vector<int64_t> CachedPlanIds() const;
|
||||
|
||||
private:
|
||||
// TODO remove unique_ptr. This is to get it to work, emplacing into a
|
||||
// ConcurrentMap is tricky.
|
||||
mutable ConcurrentMap<int64_t, std::unique_ptr<PlanPack>> plan_cache_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,32 +0,0 @@
|
||||
#include <distributed/plan_dispatcher.hpp>
|
||||
|
||||
namespace distributed {
|
||||
|
||||
PlanDispatcher::PlanDispatcher(Coordination *coordination) : coordination_(coordination) {}
|
||||
|
||||
void PlanDispatcher::DispatchPlan(
|
||||
int64_t plan_id, std::shared_ptr<query::plan::LogicalOperator> plan,
|
||||
const query::SymbolTable &symbol_table) {
|
||||
auto futures = coordination_->ExecuteOnWorkers<void>(
|
||||
0, [plan_id, plan, symbol_table](
|
||||
int worker_id, communication::rpc::ClientPool &client_pool) {
|
||||
client_pool.Call<DispatchPlanRpc>(plan_id, plan, symbol_table);
|
||||
});
|
||||
|
||||
for (auto &future : futures) {
|
||||
future.get();
|
||||
}
|
||||
}
|
||||
|
||||
void PlanDispatcher::RemovePlan(int64_t plan_id) {
|
||||
auto futures = coordination_->ExecuteOnWorkers<void>(
|
||||
0, [plan_id](int worker_id, communication::rpc::ClientPool &client_pool) {
|
||||
client_pool.Call<RemovePlanRpc>(plan_id);
|
||||
});
|
||||
|
||||
for (auto &future : futures) {
|
||||
future.get();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,29 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "distributed/plan_rpc_messages.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/** Handles plan dispatching to all workers. Uses MasterCoordination to
|
||||
* acomplish that. Master side.
|
||||
*/
|
||||
class PlanDispatcher {
|
||||
public:
|
||||
explicit PlanDispatcher(Coordination *coordination);
|
||||
|
||||
/** Dispatch a plan to all workers and wait for their acknowledgement. */
|
||||
void DispatchPlan(int64_t plan_id,
|
||||
std::shared_ptr<query::plan::LogicalOperator> plan,
|
||||
const query::SymbolTable &symbol_table);
|
||||
|
||||
/** Remove a plan from all workers and wait for their acknowledgement. */
|
||||
void RemovePlan(int64_t plan_id);
|
||||
|
||||
private:
|
||||
Coordination *coordination_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,46 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/distributed/plan/ops.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(defun slk-save-plan (member)
|
||||
#>cpp
|
||||
query::plan::LogicalOperator::SaveHelper helper;
|
||||
slk::Save<query::plan::LogicalOperator>(
|
||||
self.${member}, builder, &helper.saved_ops,
|
||||
[&helper](const auto &val, auto *builder) {
|
||||
slk::Save(val, builder, &helper);
|
||||
});
|
||||
cpp<#)
|
||||
|
||||
(defun slk-load-plan (member)
|
||||
#>cpp
|
||||
query::plan::LogicalOperator::SlkLoadHelper helper;
|
||||
slk::Load<query::plan::LogicalOperator>(&self->${member}, reader, &helper.loaded_ops,
|
||||
[&helper](auto *op, auto *reader) {
|
||||
slk::ConstructAndLoad(op, reader, &helper);
|
||||
});
|
||||
self->storage = std::move(helper.ast_storage);
|
||||
cpp<#)
|
||||
|
||||
(lcp:define-rpc dispatch-plan
|
||||
(:request
|
||||
((plan-id :int64_t)
|
||||
(plan "std::shared_ptr<query::plan::LogicalOperator>"
|
||||
:slk-save #'slk-save-plan
|
||||
:slk-load #'slk-load-plan)
|
||||
(symbol-table "::query::SymbolTable")
|
||||
(storage "::query::AstStorage" :initarg nil :dont-save t)))
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-rpc remove-plan
|
||||
(:request ((member :int64_t)))
|
||||
(:response ()))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,212 +0,0 @@
|
||||
#include "distributed/produce_rpc_server.hpp"
|
||||
|
||||
#include "database/distributed/distributed_graph_db.hpp"
|
||||
#include "distributed/data_manager.hpp"
|
||||
#include "distributed/pull_produce_rpc_messages.hpp"
|
||||
#include "query/common.hpp"
|
||||
#include "query/exceptions.hpp"
|
||||
#include "transactions/distributed/engine_worker.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
ProduceRpcServer::OngoingProduce::OngoingProduce(
|
||||
database::Worker *db, tx::TransactionId tx_id,
|
||||
const PlanConsumer::PlanPack &plan_pack, int64_t timestamp,
|
||||
const query::Parameters ¶meters,
|
||||
std::vector<query::Symbol> pull_symbols)
|
||||
: dba_(db->Access(tx_id)),
|
||||
context_{dba_.get()},
|
||||
pull_symbols_(std::move(pull_symbols)),
|
||||
frame_(plan_pack.symbol_table.max_position()),
|
||||
execution_memory_(std::make_unique<utils::MonotonicBufferResource>(
|
||||
query::kExecutionMemoryBlockSize)),
|
||||
cursor_(plan_pack.plan->MakeCursor(execution_memory_.get())) {
|
||||
context_.symbol_table = plan_pack.symbol_table;
|
||||
// TODO: Maybe we want a seperate MemoryResource per pull evaluation
|
||||
context_.evaluation_context.memory = execution_memory_.get();
|
||||
context_.evaluation_context.timestamp = timestamp;
|
||||
context_.evaluation_context.parameters = parameters;
|
||||
context_.evaluation_context.properties =
|
||||
query::NamesToProperties(plan_pack.storage.properties_, dba_.get());
|
||||
context_.evaluation_context.labels =
|
||||
query::NamesToLabels(plan_pack.storage.labels_, dba_.get());
|
||||
}
|
||||
|
||||
std::pair<std::vector<query::TypedValue>, PullState>
|
||||
ProduceRpcServer::OngoingProduce::Pull() {
|
||||
if (!accumulation_.empty()) {
|
||||
auto results = std::move(accumulation_.back());
|
||||
accumulation_.pop_back();
|
||||
for (auto &element : results) {
|
||||
try {
|
||||
query::ReconstructTypedValue(element);
|
||||
} catch (query::ReconstructionException &) {
|
||||
cursor_state_ = PullState::RECONSTRUCTION_ERROR;
|
||||
return std::make_pair(std::move(results), cursor_state_);
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_pair(std::move(results), PullState::CURSOR_IN_PROGRESS);
|
||||
}
|
||||
|
||||
return PullOneFromCursor();
|
||||
}
|
||||
|
||||
PullState ProduceRpcServer::OngoingProduce::Accumulate() {
|
||||
while (true) {
|
||||
auto result = PullOneFromCursor();
|
||||
if (result.second != PullState::CURSOR_IN_PROGRESS)
|
||||
return result.second;
|
||||
else
|
||||
accumulation_.emplace_back(std::move(result.first));
|
||||
}
|
||||
}
|
||||
|
||||
void ProduceRpcServer::OngoingProduce::Reset() {
|
||||
cursor_->Reset();
|
||||
accumulation_.clear();
|
||||
cursor_state_ = PullState::CURSOR_IN_PROGRESS;
|
||||
}
|
||||
|
||||
std::pair<std::vector<query::TypedValue>, PullState>
|
||||
ProduceRpcServer::OngoingProduce::PullOneFromCursor() {
|
||||
std::vector<query::TypedValue> results;
|
||||
|
||||
// Check if we already exhausted this cursor (or it entered an error
|
||||
// state). This happens when we accumulate before normal pull.
|
||||
if (cursor_state_ != PullState::CURSOR_IN_PROGRESS) {
|
||||
return std::make_pair(results, cursor_state_);
|
||||
}
|
||||
|
||||
try {
|
||||
if (cursor_->Pull(frame_, context_)) {
|
||||
results.reserve(pull_symbols_.size());
|
||||
for (const auto &symbol : pull_symbols_) {
|
||||
results.push_back(frame_[symbol]);
|
||||
}
|
||||
} else {
|
||||
cursor_state_ = PullState::CURSOR_EXHAUSTED;
|
||||
cursor_->Shutdown();
|
||||
}
|
||||
} catch (const mvcc::SerializationError &) {
|
||||
cursor_state_ = PullState::SERIALIZATION_ERROR;
|
||||
} catch (const utils::LockTimeoutException &) {
|
||||
cursor_state_ = PullState::LOCK_TIMEOUT_ERROR;
|
||||
} catch (const RecordDeletedError &) {
|
||||
cursor_state_ = PullState::UPDATE_DELETED_ERROR;
|
||||
} catch (const query::ReconstructionException &) {
|
||||
cursor_state_ = PullState::RECONSTRUCTION_ERROR;
|
||||
} catch (const query::RemoveAttachedVertexException &) {
|
||||
cursor_state_ = PullState::UNABLE_TO_DELETE_VERTEX_ERROR;
|
||||
} catch (const query::QueryRuntimeException &) {
|
||||
cursor_state_ = PullState::QUERY_ERROR;
|
||||
} catch (const query::HintedAbortError &) {
|
||||
cursor_state_ = PullState::HINTED_ABORT_ERROR;
|
||||
}
|
||||
return std::make_pair(std::move(results), cursor_state_);
|
||||
}
|
||||
|
||||
ProduceRpcServer::ProduceRpcServer(database::Worker *db,
|
||||
tx::EngineWorker *tx_engine,
|
||||
distributed::Coordination *coordination,
|
||||
const PlanConsumer &plan_consumer,
|
||||
DataManager *data_manager)
|
||||
: db_(db),
|
||||
plan_consumer_(plan_consumer),
|
||||
tx_engine_(tx_engine) {
|
||||
coordination->Register<PullRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
PullReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
PullRes res(Pull(req));
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<ResetCursorRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
ResetCursorReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
Reset(req);
|
||||
ResetCursorRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
CHECK(data_manager);
|
||||
|
||||
coordination->Register<TransactionCommandAdvancedRpc>(
|
||||
[this, data_manager](auto *req_reader, auto *res_builder) {
|
||||
TransactionCommandAdvancedReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
tx_engine_->UpdateCommand(req.member);
|
||||
data_manager->ClearCacheForSingleTransaction(req.member);
|
||||
TransactionCommandAdvancedRes res;
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
void ProduceRpcServer::ClearTransactionalCache(
|
||||
tx::TransactionId oldest_active) {
|
||||
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
|
||||
for (auto it = ongoing_produces_.begin(); it != ongoing_produces_.end();) {
|
||||
if (std::get<0>(it->first) < oldest_active) {
|
||||
it = ongoing_produces_.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProduceRpcServer::OngoingProduce &ProduceRpcServer::GetOngoingProduce(
|
||||
const PullReq &req) {
|
||||
auto key_tuple = std::make_tuple(req.tx_id, req.command_id, req.plan_id);
|
||||
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
|
||||
auto found = ongoing_produces_.find(key_tuple);
|
||||
if (found != ongoing_produces_.end()) {
|
||||
return found->second;
|
||||
}
|
||||
// On the worker cache the snapshot to have one RPC less.
|
||||
tx_engine_->RunningTransaction(req.tx_id, req.tx_snapshot);
|
||||
auto &plan_pack = plan_consumer_.PlanForId(req.plan_id);
|
||||
return ongoing_produces_
|
||||
.emplace(std::piecewise_construct, std::forward_as_tuple(key_tuple),
|
||||
std::forward_as_tuple(db_, req.tx_id, plan_pack, req.timestamp,
|
||||
req.parameters, req.symbols))
|
||||
.first->second;
|
||||
}
|
||||
|
||||
PullResData ProduceRpcServer::Pull(const PullReq &req) {
|
||||
auto &ongoing_produce = GetOngoingProduce(req);
|
||||
|
||||
PullResData result(db_->WorkerId(), req.send_versions);
|
||||
result.pull_state = PullState::CURSOR_IN_PROGRESS;
|
||||
|
||||
if (req.accumulate) {
|
||||
result.pull_state = ongoing_produce.Accumulate();
|
||||
// If an error ocurred, we need to return that error.
|
||||
if (result.pull_state != PullState::CURSOR_EXHAUSTED) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < req.batch_size; ++i) {
|
||||
auto pull_result = ongoing_produce.Pull();
|
||||
result.pull_state = pull_result.second;
|
||||
if (pull_result.second != PullState::CURSOR_IN_PROGRESS) break;
|
||||
result.frames.emplace_back(std::move(pull_result.first));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ProduceRpcServer::Reset(const ResetCursorReq &req) {
|
||||
auto key_tuple = std::make_tuple(req.tx_id, req.command_id, req.plan_id);
|
||||
std::lock_guard<std::mutex> guard{ongoing_produces_lock_};
|
||||
auto found = ongoing_produces_.find(key_tuple);
|
||||
// It is fine if the cursor doesn't exist yet. Creating a new cursor is the
|
||||
// same thing as reseting an existing one.
|
||||
if (found != ongoing_produces_.end()) {
|
||||
found->second.Reset();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,111 +0,0 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "distributed/plan_consumer.hpp"
|
||||
#include "distributed/pull_produce_rpc_messages.hpp"
|
||||
#include "query/context.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/interpret/frame.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
|
||||
namespace database {
|
||||
class Worker;
|
||||
}
|
||||
|
||||
namespace tx {
|
||||
class EngineWorker;
|
||||
}
|
||||
|
||||
namespace distributed {
|
||||
|
||||
class DataManager;
|
||||
|
||||
/// Handles the execution of a plan on the worker, requested by the remote
|
||||
/// master. Assumes that (tx id, command id, plan id) uniquely identifies an
|
||||
/// execution, and that there will never be parallel requests for the same
|
||||
/// execution thus identified.
|
||||
class ProduceRpcServer {
|
||||
/// Encapsulates a Cursor execution in progress. Can be used for pulling a
|
||||
/// single result from the execution, or pulling all and accumulating the
|
||||
/// results. Accumulations are used for synchronizing updates in distributed
|
||||
/// MG (see query::plan::Synchronize).
|
||||
class OngoingProduce {
|
||||
public:
|
||||
OngoingProduce(database::Worker *db, tx::TransactionId tx_id,
|
||||
const PlanConsumer::PlanPack &plan_pack, int64_t timestamp,
|
||||
const query::Parameters ¶meters,
|
||||
std::vector<query::Symbol> pull_symbols);
|
||||
|
||||
/// Returns a vector of typed values (one for each `pull_symbol`), and an
|
||||
/// indication of the pull result. The result data is valid only if the
|
||||
/// returned state is CURSOR_IN_PROGRESS.
|
||||
std::pair<std::vector<query::TypedValue>, PullState> Pull();
|
||||
|
||||
/// Accumulates all the frames pulled from the cursor and returns
|
||||
/// CURSOR_EXHAUSTED. If an error occurs, an appropriate value is returned.
|
||||
PullState Accumulate();
|
||||
|
||||
void Reset();
|
||||
|
||||
private:
|
||||
std::unique_ptr<database::GraphDbAccessor> dba_;
|
||||
query::ExecutionContext context_;
|
||||
std::vector<query::Symbol> pull_symbols_;
|
||||
query::Frame frame_;
|
||||
PullState cursor_state_{PullState::CURSOR_IN_PROGRESS};
|
||||
std::vector<std::vector<query::TypedValue>> accumulation_;
|
||||
// execution_memory_ is unique_ptr because we are passing the address to
|
||||
// cursor_, and we want to preserve the pointer in case we get moved.
|
||||
std::unique_ptr<utils::MonotonicBufferResource> execution_memory_;
|
||||
query::plan::UniqueCursorPtr cursor_;
|
||||
|
||||
/// Pulls and returns a single result from the cursor.
|
||||
std::pair<std::vector<query::TypedValue>, PullState> PullOneFromCursor();
|
||||
};
|
||||
|
||||
public:
|
||||
ProduceRpcServer(database::Worker *db, tx::EngineWorker *tx_engine,
|
||||
distributed::Coordination *coordination,
|
||||
const PlanConsumer &plan_consumer,
|
||||
DataManager *data_manager);
|
||||
|
||||
/// Clears all ongoing produces that are older than the oldest active
|
||||
/// transaction. This function should be registered in the transaction engine
|
||||
/// for transactional cache cleanup.
|
||||
void ClearTransactionalCache(tx::TransactionId oldest_active);
|
||||
|
||||
private:
|
||||
std::mutex ongoing_produces_lock_;
|
||||
/// Mapping of (tx id, command id, plan id) to OngoingProduce.
|
||||
/// The command_id should be the command_id at the initialization of a cursor
|
||||
/// that can call ProduceRpcServer.
|
||||
std::map<std::tuple<tx::TransactionId, tx::CommandId, int64_t>,
|
||||
OngoingProduce>
|
||||
ongoing_produces_;
|
||||
database::Worker *db_;
|
||||
const distributed::PlanConsumer &plan_consumer_;
|
||||
tx::EngineWorker *tx_engine_;
|
||||
|
||||
/// Gets an ongoing produce for the given pull request. Creates a new one if
|
||||
/// there is none currently existing.
|
||||
OngoingProduce &GetOngoingProduce(const PullReq &req);
|
||||
|
||||
/// Performs a single remote pull for the given request.
|
||||
PullResData Pull(const PullReq &req);
|
||||
|
||||
/// Resets the cursor for an ongoing produce.
|
||||
void Reset(const ResetCursorReq &req);
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,206 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "query/context.hpp"
|
||||
#include "query/frontend/semantic/symbol.hpp"
|
||||
#include "query/parameters.hpp"
|
||||
#include "query/distributed/serialization.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:in-impl
|
||||
#>cpp
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "distributed/data_manager.hpp"
|
||||
cpp<#)
|
||||
|
||||
(load "transactions/distributed/serialization.lcp")
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
#>cpp
|
||||
// Forward declare for LoadGraphElement.
|
||||
class DataManager;
|
||||
|
||||
/// The default number of results returned via RPC from remote execution to the
|
||||
/// master that requested it.
|
||||
constexpr int kDefaultBatchSize = 20;
|
||||
cpp<#
|
||||
|
||||
(lcp:define-enum pull-state
|
||||
(cursor-exhausted
|
||||
cursor-in-progress
|
||||
serialization-error
|
||||
lock-timeout-error
|
||||
update-deleted-error
|
||||
reconstruction-error
|
||||
unable-to-delete-vertex-error
|
||||
hinted-abort-error
|
||||
query-error)
|
||||
(:documentation "Returned along with a batch of results in the remote-pull
|
||||
RPC. Indicates the state of execution on the worker.")
|
||||
(:serialize))
|
||||
|
||||
(lcp:define-struct pull-data ()
|
||||
((pull-state "PullState")
|
||||
(frames "std::vector<std::vector<query::TypedValue>>"))
|
||||
(:documentation
|
||||
"The data returned to the end consumer (the Pull operator). Contains only
|
||||
the relevant parts of the response, ready for use."))
|
||||
|
||||
(defun slk-save-frames (member)
|
||||
#>cpp
|
||||
size_t frame_count = self.${member}.size();
|
||||
slk::Save(frame_count, builder);
|
||||
for (const auto &frame : self.${member}) {
|
||||
size_t frame_size = frame.size();
|
||||
slk::Save(frame_size, builder);
|
||||
for (const auto &value : frame) {
|
||||
slk::Save(value, builder, self.send_versions, self.worker_id);
|
||||
}
|
||||
}
|
||||
cpp<#)
|
||||
|
||||
(defun slk-load-frames (member)
|
||||
#>cpp
|
||||
size_t frame_count = 0;
|
||||
slk::Load(&frame_count, reader);
|
||||
self->${member}.reserve(frame_count);
|
||||
for (size_t frame_i = 0; frame_i < frame_count; ++frame_i) {
|
||||
size_t frame_size = 0;
|
||||
slk::Load(&frame_size, reader);
|
||||
std::vector<query::TypedValue> frame(frame_size);
|
||||
for (size_t val_i = 0; val_i < frame_size; ++val_i) {
|
||||
slk::Load(&frame[val_i], reader, dba, data_manager);
|
||||
}
|
||||
self->${member}.emplace_back(std::move(frame));
|
||||
}
|
||||
cpp<#)
|
||||
|
||||
(lcp:define-struct pull-res-data ()
|
||||
((pull-state "PullState")
|
||||
(frames "std::vector<std::vector<query::TypedValue>>"
|
||||
:slk-save #'slk-save-frames
|
||||
:slk-load #'slk-load-frames)
|
||||
(worker-id :int16_t :dont-save t
|
||||
:documentation
|
||||
"Id of the worker on which the response is created, used for
|
||||
serializing vertices (converting local to global addresses). Indicates which
|
||||
of (old, new) records of a graph element should be sent.")
|
||||
(send-versions "::storage::SendVersions" :dont-save t)
|
||||
;; Temporary caches used between deserialization and post-processing
|
||||
;; (transfering the ownership of this data to a Cache).
|
||||
(vertices "std::vector<GraphElementData<Vertex>>" :dont-save t)
|
||||
(edges "std::vector<GraphElementData<Edge>>" :dont-save t)
|
||||
(paths "std::vector<PathData>" :dont-save t))
|
||||
(:documentation
|
||||
"The data of the remote pull response. Post-processing is required after
|
||||
deserialization to initialize Vertex/Edge typed values in the frames (possibly
|
||||
encapsulated in lists/maps) to their proper values. This requires a
|
||||
GraphDbAccessor and therefore can't be done as part of deserialization.
|
||||
|
||||
TODO - make it possible to inject a &GraphDbAcessor from the Pull layer all
|
||||
the way into RPC data deserialization to remove the requirement for
|
||||
post-processing. The current approach of holding references to parts of the
|
||||
frame (potentially embedded in lists/maps) is too error-prone.")
|
||||
(:public
|
||||
#>cpp
|
||||
private:
|
||||
cpp<#
|
||||
(lcp:define-struct (graph-element-data t-record) ()
|
||||
((cypher-id :int64_t)
|
||||
(global-address "::storage::Address<mvcc::VersionList<TRecord>>")
|
||||
(old-record "std::unique_ptr<TRecord>")
|
||||
(new-record "std::unique_ptr<TRecord>")
|
||||
(element-in-frame
|
||||
"::query::TypedValue *"
|
||||
:documentation
|
||||
"The position in frame is optional. This same structure is used for
|
||||
deserializing path elements, in which case the vertex/edge in question is not
|
||||
directly part of the frame."))
|
||||
(:documentation
|
||||
"Temp cache for deserialized vertices and edges. These objects are
|
||||
created during deserialization. They are used immediatelly after during
|
||||
post-processing. The vertex/edge data ownership gets transfered to the Cache,
|
||||
and the `element_in_frame` reference is used to set the appropriate accessor
|
||||
to the appropriate value. Not used on side that generates the response.")
|
||||
(:public
|
||||
#>cpp
|
||||
GraphElementData(int64_t cypher_id, storage::Address<mvcc::VersionList<TRecord>> address,
|
||||
std::unique_ptr<TRecord> old_record, std::unique_ptr<TRecord> new_record,
|
||||
query::TypedValue *element_in_frame)
|
||||
: cypher_id(cypher_id),
|
||||
global_address(address),
|
||||
old_record(std::move(old_record)),
|
||||
new_record(std::move(new_record)),
|
||||
element_in_frame(element_in_frame) {}
|
||||
cpp<#))
|
||||
(lcp:define-struct path-data ()
|
||||
((vertices "std::vector<GraphElementData<Vertex>>")
|
||||
(edges "std::vector<GraphElementData<Edge>>")
|
||||
(path-in-frame "query::TypedValue *"))
|
||||
(:public
|
||||
#>cpp
|
||||
PathData(query::TypedValue *path_in_frame) : path_in_frame(path_in_frame) {}
|
||||
cpp<#)
|
||||
(:documentation "Same like `GraphElementData`, but for paths."))
|
||||
#>cpp
|
||||
public:
|
||||
PullResData() {} // Default constructor required for serialization.
|
||||
PullResData(int worker_id, storage::SendVersions send_versions)
|
||||
: worker_id(worker_id), send_versions(send_versions) {}
|
||||
|
||||
PullResData(const PullResData &) = delete;
|
||||
PullResData &operator=(const PullResData &) = delete;
|
||||
PullResData(PullResData &&) = default;
|
||||
PullResData &operator=(PullResData &&) = default;
|
||||
cpp<#)
|
||||
(:serialize (:slk :load-args '((dba "database::GraphDbAccessor *")
|
||||
(data-manager "distributed::DataManager *")))))
|
||||
|
||||
(lcp:define-rpc pull
|
||||
(:request
|
||||
((tx-id "::tx::TransactionId")
|
||||
(tx-snapshot "::tx::Snapshot"
|
||||
:slk-save #'slk-save-snapshot
|
||||
:slk-load #'slk-load-snapshot)
|
||||
(plan-id :int64_t)
|
||||
(command-id "::tx::CommandId")
|
||||
(timestamp :int64_t)
|
||||
(parameters "::query::Parameters")
|
||||
(symbols "std::vector<query::Symbol>")
|
||||
(accumulate :bool)
|
||||
(batch-size :int64_t)
|
||||
;; Indicates which of (old, new) records of a graph element should be sent.
|
||||
(send-versions "::storage::SendVersions")))
|
||||
(:response
|
||||
((data "PullResData" :initarg :move
|
||||
:slk-load (lambda (m)
|
||||
#>cpp
|
||||
slk::Load(&self->${m}, reader, dba, data_manager);
|
||||
cpp<#)))
|
||||
(:serialize (:slk :load-args '((dba "database::GraphDbAccessor *")
|
||||
(data-manager "distributed::DataManager *"))))))
|
||||
|
||||
;; TODO make a separate RPC for the continuation of an existing pull, as an
|
||||
;; optimization not to have to send the full PullReqData pack every time.
|
||||
|
||||
(lcp:define-rpc reset-cursor
|
||||
(:request
|
||||
((tx-id "::tx::TransactionId")
|
||||
(plan-id :int64_t)
|
||||
(command-id "::tx::CommandId")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:define-rpc transaction-command-advanced
|
||||
(:request ((member "::tx::TransactionId")))
|
||||
(:response ()))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,52 +0,0 @@
|
||||
#include "distributed/pull_rpc_clients.hpp"
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "storage/distributed/edge.hpp"
|
||||
#include "storage/distributed/vertex.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
utils::Future<PullData> PullRpcClients::Pull(
|
||||
database::GraphDbAccessor *dba, int worker_id, int64_t plan_id,
|
||||
tx::CommandId command_id,
|
||||
const query::EvaluationContext &evaluation_context,
|
||||
const std::vector<query::Symbol> &symbols, bool accumulate,
|
||||
int batch_size) {
|
||||
return coordination_->ExecuteOnWorker<PullData>(
|
||||
worker_id, [data_manager = data_manager_, dba, plan_id, command_id,
|
||||
evaluation_context, symbols, accumulate,
|
||||
batch_size](int worker_id, ClientPool &client_pool) {
|
||||
auto load_pull_res = [data_manager, dba](auto *res_reader) {
|
||||
PullRes res;
|
||||
slk::Load(&res, res_reader, dba, data_manager);
|
||||
return res;
|
||||
};
|
||||
auto result = client_pool.CallWithLoad<PullRpc>(
|
||||
load_pull_res, dba->transaction_id(), dba->transaction().snapshot(),
|
||||
plan_id, command_id, evaluation_context.timestamp,
|
||||
evaluation_context.parameters, symbols, accumulate, batch_size,
|
||||
storage::SendVersions::BOTH);
|
||||
return PullData{result.data.pull_state, std::move(result.data.frames)};
|
||||
});
|
||||
}
|
||||
|
||||
utils::Future<void> PullRpcClients::ResetCursor(database::GraphDbAccessor *dba,
|
||||
int worker_id, int64_t plan_id,
|
||||
tx::CommandId command_id) {
|
||||
return coordination_->ExecuteOnWorker<void>(
|
||||
worker_id, [dba, plan_id, command_id](int worker_id, auto &client) {
|
||||
client.template Call<ResetCursorRpc>(dba->transaction_id(), plan_id,
|
||||
command_id);
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<utils::Future<void>>
|
||||
PullRpcClients::NotifyAllTransactionCommandAdvanced(tx::TransactionId tx_id) {
|
||||
return coordination_->ExecuteOnWorkers<void>(
|
||||
0, [tx_id](int worker_id, auto &client) {
|
||||
client.template Call<TransactionCommandAdvancedRpc>(tx_id);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,55 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "distributed/pull_produce_rpc_messages.hpp"
|
||||
#include "query/context.hpp"
|
||||
#include "query/frontend/semantic/symbol.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
#include "utils/future.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
class DataManager;
|
||||
|
||||
/// Provides means of calling for the execution of a plan on some remote worker,
|
||||
/// and getting the results of that execution. The results are returned in
|
||||
/// batches and are therefore accompanied with an enum indicator of the state of
|
||||
/// remote execution.
|
||||
class PullRpcClients {
|
||||
using ClientPool = communication::rpc::ClientPool;
|
||||
|
||||
public:
|
||||
PullRpcClients(Coordination *coordination, DataManager *data_manager)
|
||||
: coordination_(coordination), data_manager_(data_manager) {}
|
||||
|
||||
/// Calls a remote pull asynchroniously. IMPORTANT: take care not to call this
|
||||
/// function for the same (tx_id, worker_id, plan_id, command_id) before the
|
||||
/// previous call has ended.
|
||||
///
|
||||
/// @todo: it might be cleaner to split Pull into {InitRemoteCursor,
|
||||
/// Pull, RemoteAccumulate}, but that's a lot of refactoring and more
|
||||
/// RPC calls.
|
||||
utils::Future<PullData> Pull(
|
||||
database::GraphDbAccessor *dba, int worker_id, int64_t plan_id,
|
||||
tx::CommandId command_id,
|
||||
const query::EvaluationContext &evaluation_context,
|
||||
const std::vector<query::Symbol> &symbols, bool accumulate,
|
||||
int batch_size = kDefaultBatchSize);
|
||||
|
||||
utils::Future<void> ResetCursor(database::GraphDbAccessor *dba, int worker_id,
|
||||
int64_t plan_id, tx::CommandId command_id);
|
||||
|
||||
auto GetWorkerIds() { return coordination_->GetWorkerIds(); }
|
||||
|
||||
std::vector<utils::Future<void>> NotifyAllTransactionCommandAdvanced(
|
||||
tx::TransactionId tx_id);
|
||||
|
||||
private:
|
||||
Coordination *coordination_;
|
||||
DataManager *data_manager_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,18 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "io/network/endpoint.hpp"
|
||||
#include "slk/serialization.hpp"
|
||||
#include "transactions/transaction.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-rpc ran-local-gc
|
||||
(:request
|
||||
((local-oldest-active "::tx::TransactionId")
|
||||
(worker-id :int16_t)))
|
||||
(:response ()))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,17 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "slk/serialization.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-rpc token-transfer
|
||||
(:request ())
|
||||
(:response ()))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,119 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "distributed/dgp/partitioner.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDb;
|
||||
};
|
||||
|
||||
namespace distributed {
|
||||
|
||||
// TODO (buda): dgp_.Run() should be injected. This server shouldn't know
|
||||
// anything about the partitioning.
|
||||
// TODO (buda): It makes more sense to have centralized server which will assign
|
||||
// tokens because error handling would be much easier.
|
||||
// TODO (buda): Broken by design.
|
||||
|
||||
/// Shares the token between dynamic graph partitioners instances across workers
|
||||
/// by passing the token from one worker to another, in a circular fashion. This
|
||||
/// guarantees that no two workers will execute the dynamic graph partitioner
|
||||
/// step in the same time.
|
||||
class TokenSharingRpcServer {
|
||||
public:
|
||||
TokenSharingRpcServer(database::GraphDb *db, int worker_id,
|
||||
distributed::Coordination *coordination)
|
||||
: worker_id_(worker_id), coordination_(coordination), dgp_(db) {
|
||||
coordination_->Register<distributed::TokenTransferRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) { token_ = true; });
|
||||
// TODO (buda): It's not trivial to move this part in the Start method
|
||||
// because worker then doesn't run the step. Will resolve that with
|
||||
// a different implementation of the token assignment.
|
||||
runner_ = std::thread([this]() {
|
||||
while (!shutting_down_) {
|
||||
// If no other instances are connected just wait. It doesn't make sense
|
||||
// to migrate anything because only one machine is available.
|
||||
auto workers = coordination_->GetWorkerIds();
|
||||
if (!(workers.size() > 1)) {
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Wait till we get the token.
|
||||
while (!token_) {
|
||||
if (shutting_down_) break;
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
|
||||
if (shutting_down_) break;
|
||||
|
||||
token_ = false;
|
||||
dgp_.Partition();
|
||||
|
||||
// Transfer token to next.
|
||||
sort(workers.begin(), workers.end());
|
||||
|
||||
int next_worker = -1;
|
||||
auto pos = std::upper_bound(workers.begin(), workers.end(), worker_id_);
|
||||
if (pos != workers.end()) {
|
||||
next_worker = *pos;
|
||||
} else {
|
||||
next_worker = workers[0];
|
||||
}
|
||||
|
||||
// Try to transfer the token until successful.
|
||||
while (!shutting_down_) {
|
||||
try {
|
||||
coordination_->GetClientPool(next_worker)->Call<TokenTransferRpc>();
|
||||
break;
|
||||
} catch (const communication::rpc::RpcFailedException &e) {
|
||||
DLOG(WARNING) << "Unable to transfer token to worker "
|
||||
<< next_worker;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Starts the token sharing server which in turn starts the dynamic graph
|
||||
/// partitioner.
|
||||
void Start() {
|
||||
started_ = true;
|
||||
token_ = true;
|
||||
}
|
||||
|
||||
~TokenSharingRpcServer() {
|
||||
shutting_down_ = true;
|
||||
if (runner_.joinable()) runner_.join();
|
||||
if (started_ && worker_id_ == 0) {
|
||||
// Wait till we get the token back otherwise some worker might try to
|
||||
// migrate to another worker while that worker is shutting down or
|
||||
// something else bad might happen.
|
||||
// TODO (buda): Solve this better in the future since this blocks
|
||||
// shutting down until spinner steps complete.
|
||||
while (!token_) {
|
||||
// Cluster state has to be examined here because if one of the workers
|
||||
// is down it doesn't make sense to wait for the token because token
|
||||
// probably won't arrive back.
|
||||
if (!coordination_->IsClusterAlive()) return;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int worker_id_;
|
||||
distributed::Coordination *coordination_;
|
||||
|
||||
std::atomic<bool> started_{false};
|
||||
std::atomic<bool> token_{false};
|
||||
std::atomic<bool> shutting_down_{false};
|
||||
std::thread runner_;
|
||||
|
||||
distributed::dgp::Partitioner dgp_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,117 +0,0 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "distributed/updates_rpc_clients.hpp"
|
||||
#include "query/exceptions.hpp"
|
||||
#include "utils/thread/sync.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
namespace {
|
||||
void RaiseIfRemoteError(UpdateResult result) {
|
||||
switch (result) {
|
||||
case UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR:
|
||||
throw query::RemoveAttachedVertexException();
|
||||
case UpdateResult::SERIALIZATION_ERROR:
|
||||
throw mvcc::SerializationError();
|
||||
case UpdateResult::LOCK_TIMEOUT_ERROR:
|
||||
throw utils::LockTimeoutException(
|
||||
"Remote LockTimeoutError during edge creation");
|
||||
case UpdateResult::UPDATE_DELETED_ERROR:
|
||||
throw RecordDeletedError();
|
||||
case UpdateResult::DONE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
UpdateResult UpdatesRpcClients::Update(int this_worker_id, int to_worker_id,
|
||||
const database::StateDelta &delta) {
|
||||
return coordination_->GetClientPool(to_worker_id)
|
||||
->Call<UpdateRpc>(delta, this_worker_id)
|
||||
.member;
|
||||
}
|
||||
|
||||
CreatedVertexInfo UpdatesRpcClients::CreateVertex(
|
||||
int worker_id, tx::TransactionId tx_id,
|
||||
const std::vector<storage::Label> &labels,
|
||||
const std::unordered_map<storage::Property, PropertyValue> &properties,
|
||||
std::optional<int64_t> cypher_id) {
|
||||
auto res = coordination_->GetClientPool(worker_id)->Call<CreateVertexRpc>(
|
||||
CreateVertexReqData{tx_id, labels, properties, cypher_id});
|
||||
CHECK(res.member.result == UpdateResult::DONE)
|
||||
<< "Remote Vertex creation result not UpdateResult::DONE";
|
||||
return CreatedVertexInfo(res.member.cypher_id, res.member.gid);
|
||||
}
|
||||
|
||||
CreatedEdgeInfo UpdatesRpcClients::CreateEdge(
|
||||
int this_worker_id, tx::TransactionId tx_id, VertexAccessor &from,
|
||||
VertexAccessor &to, storage::EdgeType edge_type,
|
||||
std::optional<int64_t> cypher_id) {
|
||||
CHECK(from.address().is_remote()) << "In CreateEdge `from` must be remote";
|
||||
int from_worker = from.address().worker_id();
|
||||
auto res =
|
||||
coordination_->GetClientPool(from_worker)
|
||||
->Call<CreateEdgeRpc>(CreateEdgeReqData{this_worker_id,
|
||||
from.gid(), to.GlobalAddress(), edge_type, tx_id, cypher_id});
|
||||
RaiseIfRemoteError(res.member.result);
|
||||
return CreatedEdgeInfo(res.member.cypher_id,
|
||||
storage::EdgeAddress{res.member.gid, from_worker});
|
||||
}
|
||||
|
||||
void UpdatesRpcClients::AddInEdge(int this_worker_id, tx::TransactionId tx_id,
|
||||
VertexAccessor &from,
|
||||
storage::EdgeAddress edge_address,
|
||||
VertexAccessor &to,
|
||||
storage::EdgeType edge_type) {
|
||||
CHECK(to.address().is_remote() && edge_address.is_remote() &&
|
||||
(from.GlobalAddress().worker_id() != to.address().worker_id()))
|
||||
<< "AddInEdge should only be called when `to` is remote and "
|
||||
"`from` is not on the same worker as `to`.";
|
||||
auto worker_id = to.GlobalAddress().worker_id();
|
||||
auto res = coordination_->GetClientPool(worker_id)->Call<AddInEdgeRpc>(
|
||||
AddInEdgeReqData{this_worker_id, from.GlobalAddress(), edge_address,
|
||||
to.gid(), edge_type, tx_id});
|
||||
RaiseIfRemoteError(res.member);
|
||||
}
|
||||
|
||||
void UpdatesRpcClients::RemoveVertex(int this_worker_id, int to_worker_id,
|
||||
tx::TransactionId tx_id, gid::Gid gid,
|
||||
bool check_empty) {
|
||||
auto res = coordination_->GetClientPool(to_worker_id)->Call<RemoveVertexRpc>(
|
||||
RemoveVertexReqData{this_worker_id, gid, tx_id, check_empty});
|
||||
RaiseIfRemoteError(res.member);
|
||||
}
|
||||
|
||||
void UpdatesRpcClients::RemoveEdge(int this_worker_id, int to_worker_id,
|
||||
tx::TransactionId tx_id, gid::Gid edge_gid,
|
||||
gid::Gid vertex_from_id,
|
||||
storage::VertexAddress vertex_to_addr) {
|
||||
auto res =
|
||||
coordination_->GetClientPool(to_worker_id)
|
||||
->Call<RemoveEdgeRpc>(RemoveEdgeData{this_worker_id, tx_id, edge_gid,
|
||||
vertex_from_id, vertex_to_addr});
|
||||
RaiseIfRemoteError(res.member);
|
||||
}
|
||||
|
||||
void UpdatesRpcClients::RemoveInEdge(int this_worker_id, int to_worker_id,
|
||||
tx::TransactionId tx_id,
|
||||
gid::Gid vertex_id,
|
||||
storage::EdgeAddress edge_address) {
|
||||
CHECK(edge_address.is_remote()) << "RemoveInEdge edge_address is local.";
|
||||
auto res = coordination_->GetClientPool(to_worker_id)
|
||||
->Call<RemoveInEdgeRpc>(RemoveInEdgeData{
|
||||
this_worker_id, tx_id, vertex_id, edge_address});
|
||||
RaiseIfRemoteError(res.member);
|
||||
}
|
||||
|
||||
std::vector<utils::Future<UpdateResult>> UpdatesRpcClients::UpdateApplyAll(
|
||||
int skip_worker_id, tx::TransactionId tx_id) {
|
||||
return coordination_->ExecuteOnWorkers<UpdateResult>(
|
||||
skip_worker_id, [tx_id](int worker_id, auto &client) {
|
||||
auto res = client.template Call<UpdateApplyRpc>(tx_id);
|
||||
return res.member;
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,82 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "distributed/updates_rpc_messages.hpp"
|
||||
#include "durability/distributed/state_delta.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
#include "storage/common/types/types.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
#include "utils/future.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/// Exposes the functionality to send updates to other workers (that own the
|
||||
/// graph element we are updating). Also enables us to call for a worker to
|
||||
/// apply the accumulated deferred updates, or discard them.
|
||||
class UpdatesRpcClients {
|
||||
public:
|
||||
explicit UpdatesRpcClients(Coordination *coordination)
|
||||
: coordination_(coordination) {}
|
||||
|
||||
/// Sends an update delta to the given worker.
|
||||
UpdateResult Update(int this_worker_id, int to_worker_id,
|
||||
const database::StateDelta &delta);
|
||||
|
||||
/// Creates a vertex on the given worker and returns it's id.
|
||||
CreatedVertexInfo CreateVertex(
|
||||
int worker_id, tx::TransactionId tx_id,
|
||||
const std::vector<storage::Label> &labels,
|
||||
const std::unordered_map<storage::Property, PropertyValue> &properties,
|
||||
std::optional<int64_t> cypher_id = std::nullopt);
|
||||
|
||||
/// Creates an edge on the given worker and returns it's address. If the `to`
|
||||
/// vertex is on the same worker as `from`, then all remote CRUD will be
|
||||
/// handled by a call to this function. Otherwise a separate call to
|
||||
/// `AddInEdge` might be necessary. Throws all the exceptions that can
|
||||
/// occur remotely as a result of updating a vertex.
|
||||
CreatedEdgeInfo CreateEdge(int this_worker_id, tx::TransactionId tx_id,
|
||||
VertexAccessor &from, VertexAccessor &to,
|
||||
storage::EdgeType edge_type,
|
||||
std::optional<int64_t> cypher_id = std::nullopt);
|
||||
// TODO (buda): Another machine in the cluster is asked to create an edge.
|
||||
// cypher_id should be generated in that process. It probably doesn't make
|
||||
// sense to have optional cypher id here. Maybe for the recovery purposes.
|
||||
|
||||
/// Adds the edge with the given address to the `to` vertex as an incoming
|
||||
/// edge. Only used when `to` is remote and not on the same worker as `from`.
|
||||
void AddInEdge(int this_worker_id, tx::TransactionId tx_id,
|
||||
VertexAccessor &from, storage::EdgeAddress edge_address,
|
||||
VertexAccessor &to, storage::EdgeType edge_type);
|
||||
|
||||
/// Removes a vertex from the other worker.
|
||||
void RemoveVertex(int this_worker_id, int to_worker_id,
|
||||
tx::TransactionId tx_id, gid::Gid gid, bool check_empty);
|
||||
|
||||
/// Removes an edge on another worker. This also handles the `from` vertex
|
||||
/// outgoing edge, as that vertex is on the same worker as the edge. If the
|
||||
/// `to` vertex is on the same worker, then that side is handled too by the
|
||||
/// single RPC call, otherwise a separate call has to be made to
|
||||
/// RemoveInEdge.
|
||||
void RemoveEdge(int this_worker_id, int to_worker_id, tx::TransactionId tx_id,
|
||||
gid::Gid edge_gid, gid::Gid vertex_from_id,
|
||||
storage::VertexAddress vertex_to_addr);
|
||||
|
||||
void RemoveInEdge(int this_worker_id, int to_worker_id,
|
||||
tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::EdgeAddress edge_address);
|
||||
|
||||
/// Calls for all the workers (except the given one) to apply their updates
|
||||
/// and returns the future results.
|
||||
std::vector<utils::Future<UpdateResult>> UpdateApplyAll(
|
||||
int skip_worker_id, tx::TransactionId tx_id);
|
||||
|
||||
private:
|
||||
Coordination *coordination_;
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,136 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "communication/rpc/messages.hpp"
|
||||
#include "database/distributed/serialization.hpp"
|
||||
#include "durability/distributed/state_delta.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "storage/distributed/rpc/serialization.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace distributed)
|
||||
|
||||
(lcp:define-enum update-result
|
||||
(done
|
||||
serialization-error
|
||||
lock-timeout-error
|
||||
update-deleted-error
|
||||
unable-to-delete-vertex-error)
|
||||
(:documentation "The result of sending or applying a deferred update to a worker.")
|
||||
(:serialize))
|
||||
|
||||
(lcp:define-rpc update
|
||||
(:request ((member "::database::StateDelta")
|
||||
(worker-id :int64_t)))
|
||||
(:response ((member "UpdateResult"))))
|
||||
|
||||
(lcp:define-rpc update-apply
|
||||
(:request ((member "::tx::TransactionId")))
|
||||
(:response ((member "UpdateResult"))))
|
||||
|
||||
(lcp:define-struct create-result ()
|
||||
((result "UpdateResult")
|
||||
(cypher-id :int64_t :documentation "Only valid if creation was successful.")
|
||||
(gid "::gid::Gid" :documentation "Only valid if creation was successful."))
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:define-struct create-vertex-req-data ()
|
||||
((tx-id "::tx::TransactionId")
|
||||
(labels "std::vector<storage::Label>")
|
||||
(properties "std::unordered_map<storage::Property, PropertyValue>")
|
||||
(cypher-id "std::optional<int64_t>"))
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:define-rpc create-vertex
|
||||
(:request ((member "CreateVertexReqData")))
|
||||
(:response ((member "CreateResult"))))
|
||||
|
||||
(lcp:define-struct create-edge-req-data ()
|
||||
((worker-id :int64_t)
|
||||
(from "::gid::Gid")
|
||||
(to "::storage::VertexAddress")
|
||||
(edge-type "::storage::EdgeType")
|
||||
(tx-id "::tx::TransactionId")
|
||||
(cypher-id "std::optional<int64_t>"))
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:define-rpc create-edge
|
||||
(:request ((member "CreateEdgeReqData")))
|
||||
(:response ((member "CreateResult"))))
|
||||
|
||||
(lcp:define-struct add-in-edge-req-data ()
|
||||
((worker-id :int64_t)
|
||||
(from "::storage::VertexAddress")
|
||||
(edge-address "::storage::EdgeAddress")
|
||||
(to "::gid::Gid")
|
||||
(edge-type "::storage::EdgeType")
|
||||
(tx-id "::tx::TransactionId"))
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:define-rpc add-in-edge
|
||||
(:request ((member "AddInEdgeReqData")))
|
||||
(:response ((member "UpdateResult"))))
|
||||
|
||||
(lcp:define-struct remove-vertex-req-data ()
|
||||
((worker-id :int64_t)
|
||||
(gid "::gid::Gid")
|
||||
(tx-id "::tx::TransactionId")
|
||||
(check-empty :bool))
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:define-rpc remove-vertex
|
||||
(:request ((member "RemoveVertexReqData")))
|
||||
(:response ((member "UpdateResult"))))
|
||||
|
||||
(lcp:define-struct remove-edge-data ()
|
||||
((worker-id :int64_t)
|
||||
(tx-id "::tx::TransactionId")
|
||||
(edge-id "::gid::Gid")
|
||||
(vertex-from-id "::gid::Gid")
|
||||
(vertex-to-address "::storage::VertexAddress"))
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:define-rpc remove-edge
|
||||
(:request ((member "RemoveEdgeData")))
|
||||
(:response ((member "UpdateResult"))))
|
||||
|
||||
(lcp:define-struct remove-in-edge-data ()
|
||||
((worker-id :int64_t)
|
||||
(tx-id "::tx::TransactionId")
|
||||
(vertex "::gid::Gid")
|
||||
(edge-address "::storage::EdgeAddress"))
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:define-rpc remove-in-edge
|
||||
(:request ((member "RemoveInEdgeData")))
|
||||
(:response ((member "UpdateResult"))))
|
||||
|
||||
(lcp:define-struct created-info ()
|
||||
((cypher-id "int64_t")
|
||||
(gid "::gid::Gid"))
|
||||
(:public #>cpp
|
||||
CreatedInfo(int64_t cypher_id, gid::Gid gid)
|
||||
: cypher_id(cypher_id), gid(gid) {}
|
||||
cpp<#))
|
||||
|
||||
(lcp:define-struct created-vertex-info ()
|
||||
((cypher-id "int64_t")
|
||||
(gid "::gid::Gid"))
|
||||
(:public #>cpp
|
||||
CreatedVertexInfo(int64_t cypher_id, gid::Gid gid)
|
||||
: cypher_id(cypher_id), gid(gid) {}
|
||||
cpp<#))
|
||||
|
||||
(lcp:define-struct created-edge-info ()
|
||||
((cypher-id "int64_t")
|
||||
(edge-address "::storage::EdgeAddress"))
|
||||
(:public #>cpp
|
||||
CreatedEdgeInfo(int64_t cypher_id, storage::EdgeAddress edge_address)
|
||||
: cypher_id(cypher_id), edge_address(edge_address) {}
|
||||
cpp<#))
|
||||
|
||||
(lcp:pop-namespace) ;; distributed
|
@ -1,495 +0,0 @@
|
||||
#include "distributed/updates_rpc_server.hpp"
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include "utils/thread/sync.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
template <typename TRecordAccessor>
|
||||
UpdateResult UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::Emplace(
|
||||
const database::StateDelta &delta, int worker_id) {
|
||||
auto gid = std::is_same<TRecordAccessor, VertexAccessor>::value
|
||||
? delta.vertex_id
|
||||
: delta.edge_id;
|
||||
std::lock_guard<utils::SpinLock> guard{lock_};
|
||||
auto found = deltas_.find(gid);
|
||||
if (found == deltas_.end()) {
|
||||
found = deltas_
|
||||
.emplace(gid, std::make_pair(FindAccessor(gid),
|
||||
std::vector<DeltaPair>{}))
|
||||
.first;
|
||||
}
|
||||
|
||||
found->second.second.emplace_back(delta, worker_id);
|
||||
|
||||
// TODO call `RecordAccessor::update` to force serialization errors to
|
||||
// fail-fast (as opposed to when all the deltas get applied).
|
||||
//
|
||||
// This is problematic because `VersionList::update` needs to become
|
||||
// thread-safe within the same transaction. Note that the concurrency is
|
||||
// possible both between the owner worker interpretation thread and an RPC
|
||||
// thread (current thread), as well as multiple RPC threads if this
|
||||
// object's lock is released (perhaps desirable).
|
||||
//
|
||||
// A potential solution *might* be that `LockStore::Lock` returns a `bool`
|
||||
// indicating if the caller was the one obtaining the lock (not the same
|
||||
// as lock already being held by the same transaction).
|
||||
//
|
||||
// Another thing that needs to be done (if we do this) is ensuring that
|
||||
// `LockStore::Take` is thread-safe when called in parallel in the same
|
||||
// transaction. Currently it's thread-safe only when called in parallel
|
||||
// from different transactions (only one manages to take the RecordLock).
|
||||
//
|
||||
// Deferring the implementation of this as it's tricky, and essentially an
|
||||
// optimization.
|
||||
//
|
||||
// try {
|
||||
// found->second.first.update();
|
||||
// } catch (const mvcc::SerializationError &) {
|
||||
// return UpdateResult::SERIALIZATION_ERROR;
|
||||
// } catch (const RecordDeletedError &) {
|
||||
// return UpdateResult::UPDATE_DELETED_ERROR;
|
||||
// } catch (const utils::LockTimeoutException &) {
|
||||
// return UpdateResult::LOCK_TIMEOUT_ERROR;
|
||||
// }
|
||||
return UpdateResult::DONE;
|
||||
}
|
||||
|
||||
template <typename TRecordAccessor>
|
||||
CreatedInfo UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::CreateVertex(
|
||||
const std::vector<storage::Label> &labels,
|
||||
const std::unordered_map<storage::Property, PropertyValue> &properties,
|
||||
std::optional<int64_t> cypher_id) {
|
||||
auto result = db_accessor_->InsertVertex(std::nullopt, cypher_id);
|
||||
for (auto &label : labels) result.add_label(label);
|
||||
for (auto &kv : properties) result.PropsSet(kv.first, kv.second);
|
||||
std::lock_guard<utils::SpinLock> guard{lock_};
|
||||
deltas_.emplace(result.gid(),
|
||||
std::make_pair(result, std::vector<DeltaPair>{}));
|
||||
return CreatedInfo(result.CypherId(), result.gid());
|
||||
}
|
||||
|
||||
template <typename TRecordAccessor>
|
||||
CreatedInfo UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::CreateEdge(
|
||||
gid::Gid from, storage::VertexAddress to, storage::EdgeType edge_type,
|
||||
int worker_id, std::optional<int64_t> cypher_id) {
|
||||
auto &db = db_accessor_->db();
|
||||
auto from_addr = db.storage().LocalizedAddressIfPossible(
|
||||
storage::VertexAddress(from, worker_id));
|
||||
auto to_addr = db.storage().LocalizedAddressIfPossible(to);
|
||||
auto edge = db_accessor_->InsertOnlyEdge(from_addr, to_addr, edge_type,
|
||||
std::nullopt, cypher_id);
|
||||
std::lock_guard<utils::SpinLock> guard{lock_};
|
||||
deltas_.emplace(edge.gid(),
|
||||
std::make_pair(edge, std::vector<DeltaPair>{}));
|
||||
return CreatedInfo(edge.CypherId(), edge.gid());
|
||||
}
|
||||
|
||||
template <typename TRecordAccessor>
|
||||
UpdateResult UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::Apply() {
|
||||
std::lock_guard<utils::SpinLock> guard{lock_};
|
||||
for (auto &kv : deltas_) {
|
||||
auto &record_accessor = kv.second.first;
|
||||
// We need to reconstruct the record as in the meantime some local
|
||||
// update might have updated it.
|
||||
record_accessor.Reconstruct();
|
||||
for (auto &pair : kv.second.second) {
|
||||
auto delta = pair.delta;
|
||||
try {
|
||||
auto &dba = *db_accessor_;
|
||||
switch (delta.type) {
|
||||
case database::StateDelta::Type::TRANSACTION_BEGIN:
|
||||
case database::StateDelta::Type::TRANSACTION_COMMIT:
|
||||
case database::StateDelta::Type::TRANSACTION_ABORT:
|
||||
case database::StateDelta::Type::CREATE_VERTEX:
|
||||
case database::StateDelta::Type::CREATE_EDGE:
|
||||
case database::StateDelta::Type::BUILD_INDEX:
|
||||
LOG(FATAL) << "Can only apply record update deltas for remote "
|
||||
"graph element";
|
||||
case database::StateDelta::Type::REMOVE_VERTEX:
|
||||
if (!db_accessor().RemoveVertex(
|
||||
reinterpret_cast<VertexAccessor &>(record_accessor),
|
||||
delta.check_empty)) {
|
||||
return UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR;
|
||||
}
|
||||
break;
|
||||
case database::StateDelta::Type::SET_PROPERTY_VERTEX:
|
||||
case database::StateDelta::Type::SET_PROPERTY_EDGE:
|
||||
record_accessor.PropsSet(delta.property, delta.value);
|
||||
break;
|
||||
case database::StateDelta::Type::ADD_LABEL:
|
||||
reinterpret_cast<VertexAccessor &>(record_accessor)
|
||||
.add_label(delta.label);
|
||||
break;
|
||||
case database::StateDelta::Type::REMOVE_LABEL:
|
||||
reinterpret_cast<VertexAccessor &>(record_accessor)
|
||||
.remove_label(delta.label);
|
||||
break;
|
||||
case database::StateDelta::Type::ADD_OUT_EDGE:
|
||||
record_accessor.update();
|
||||
reinterpret_cast<Vertex &>(*record_accessor.GetNew())
|
||||
.out_.emplace(dba.db().storage().LocalizedAddressIfPossible(
|
||||
delta.vertex_to_address),
|
||||
dba.db().storage().LocalizedAddressIfPossible(
|
||||
delta.edge_address),
|
||||
delta.edge_type);
|
||||
dba.wal().Emplace(delta);
|
||||
break;
|
||||
case database::StateDelta::Type::ADD_IN_EDGE:
|
||||
record_accessor.update();
|
||||
reinterpret_cast<Vertex &>(*record_accessor.GetNew())
|
||||
.in_.emplace(dba.db().storage().LocalizedAddressIfPossible(
|
||||
delta.vertex_from_address),
|
||||
dba.db().storage().LocalizedAddressIfPossible(
|
||||
delta.edge_address),
|
||||
delta.edge_type);
|
||||
dba.wal().Emplace(delta);
|
||||
break;
|
||||
case database::StateDelta::Type::REMOVE_EDGE:
|
||||
// We only remove the edge as a result of this StateDelta,
|
||||
// because the removal of edge from vertex in/out is performed
|
||||
// in REMOVE_[IN/OUT]_EDGE deltas.
|
||||
db_accessor_->RemoveEdge(
|
||||
reinterpret_cast<EdgeAccessor &>(record_accessor), false,
|
||||
false);
|
||||
break;
|
||||
case database::StateDelta::Type::REMOVE_OUT_EDGE:
|
||||
reinterpret_cast<VertexAccessor &>(record_accessor)
|
||||
.RemoveOutEdge(delta.edge_address);
|
||||
break;
|
||||
case database::StateDelta::Type::REMOVE_IN_EDGE:
|
||||
reinterpret_cast<VertexAccessor &>(record_accessor)
|
||||
.RemoveInEdge(delta.edge_address);
|
||||
break;
|
||||
}
|
||||
} catch (const mvcc::SerializationError &) {
|
||||
return UpdateResult::SERIALIZATION_ERROR;
|
||||
} catch (const RecordDeletedError &) {
|
||||
return UpdateResult::UPDATE_DELETED_ERROR;
|
||||
} catch (const utils::LockTimeoutException &) {
|
||||
return UpdateResult::LOCK_TIMEOUT_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return UpdateResult::DONE;
|
||||
}
|
||||
|
||||
template <typename TRecordAccessor>
|
||||
void UpdatesRpcServer::TransactionUpdates<TRecordAccessor>::ApplyDeltasToRecord(
|
||||
gid::Gid gid, int worker_id, TRecord **old, TRecord **newr) {
|
||||
std::lock_guard<utils::SpinLock> guard{lock_};
|
||||
auto found = deltas_.find(gid);
|
||||
if (found == deltas_.end()) return;
|
||||
|
||||
auto update = [](auto **old, auto **newr) {
|
||||
if (!*newr) {
|
||||
DCHECK(*old) << "Trying to create new record but pointer to old record "
|
||||
"is nullptr.";
|
||||
|
||||
*newr = (*old)->CloneData();
|
||||
}
|
||||
};
|
||||
|
||||
for (auto &pair : found->second.second) {
|
||||
auto delta = pair.delta;
|
||||
if (worker_id != pair.worker_id) continue;
|
||||
|
||||
switch (delta.type) {
|
||||
case database::StateDelta::Type::SET_PROPERTY_VERTEX:
|
||||
case database::StateDelta::Type::SET_PROPERTY_EDGE:
|
||||
update(old, newr);
|
||||
(*newr)->properties_.set(delta.property, delta.value);
|
||||
break;
|
||||
case database::StateDelta::Type::ADD_LABEL: {
|
||||
update(old, newr);
|
||||
auto &labels = reinterpret_cast<Vertex *>(*newr)->labels_;
|
||||
if (!utils::Contains(labels, delta.label)) {
|
||||
labels.emplace_back(delta.label);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case database::StateDelta::Type::REMOVE_LABEL: {
|
||||
update(old, newr);
|
||||
auto &labels = reinterpret_cast<Vertex *>(*newr)->labels_;
|
||||
auto found = std::find(labels.begin(), labels.end(), delta.label);
|
||||
if (found == labels.end()) continue;
|
||||
std::swap(*found, labels.back());
|
||||
labels.pop_back();
|
||||
break;
|
||||
}
|
||||
case database::StateDelta::Type::ADD_OUT_EDGE:
|
||||
update(old, newr);
|
||||
reinterpret_cast<Vertex *>(*newr)->out_.emplace(
|
||||
delta.vertex_to_address, delta.edge_address, delta.edge_type);
|
||||
break;
|
||||
case database::StateDelta::Type::ADD_IN_EDGE:
|
||||
update(old, newr);
|
||||
reinterpret_cast<Vertex *>(*newr)->in_.emplace(
|
||||
delta.vertex_from_address, delta.edge_address, delta.edge_type);
|
||||
break;
|
||||
case database::StateDelta::Type::REMOVE_OUT_EDGE:
|
||||
update(old, newr);
|
||||
reinterpret_cast<Vertex *>(*newr)->out_.RemoveEdge(delta.edge_address);
|
||||
break;
|
||||
case database::StateDelta::Type::REMOVE_IN_EDGE:
|
||||
update(old, newr);
|
||||
reinterpret_cast<Vertex *>(*newr)->in_.RemoveEdge(delta.edge_address);
|
||||
break;
|
||||
default:
|
||||
// Effects of REMOVE VERTEX and REMOVE EDGE aren't visible in the
|
||||
// current command id so we can safely ignore this case.
|
||||
// Other deltas we're ignoring don't update record.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UpdatesRpcServer::UpdatesRpcServer(database::GraphDb *db,
|
||||
distributed::Coordination *coordination)
|
||||
: db_(db) {
|
||||
coordination->Register<UpdateRpc>([this](auto *req_reader,
|
||||
auto *res_builder) {
|
||||
UpdateReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
using DeltaType = database::StateDelta::Type;
|
||||
auto &delta = req.member;
|
||||
switch (delta.type) {
|
||||
case DeltaType::SET_PROPERTY_VERTEX:
|
||||
case DeltaType::ADD_LABEL:
|
||||
case DeltaType::REMOVE_LABEL:
|
||||
case database::StateDelta::Type::REMOVE_OUT_EDGE:
|
||||
case database::StateDelta::Type::REMOVE_IN_EDGE: {
|
||||
UpdateRes res(GetUpdates(vertex_updates_, delta.transaction_id)
|
||||
.Emplace(delta, req.worker_id));
|
||||
slk::Save(res, res_builder);
|
||||
return;
|
||||
}
|
||||
case DeltaType::SET_PROPERTY_EDGE: {
|
||||
UpdateRes res(GetUpdates(edge_updates_, delta.transaction_id)
|
||||
.Emplace(delta, req.worker_id));
|
||||
slk::Save(res, res_builder);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
LOG(FATAL) << "Can't perform a remote update with delta type: "
|
||||
<< static_cast<int>(req.member.type);
|
||||
}
|
||||
});
|
||||
|
||||
coordination->Register<UpdateApplyRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
UpdateApplyReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
UpdateApplyRes res(Apply(req.member));
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<CreateVertexRpc>([this](auto *req_reader,
|
||||
auto *res_builder) {
|
||||
CreateVertexReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto result = GetUpdates(vertex_updates_, req.member.tx_id)
|
||||
.CreateVertex(req.member.labels, req.member.properties,
|
||||
req.member.cypher_id);
|
||||
CreateVertexRes res(
|
||||
CreateResult{UpdateResult::DONE, result.cypher_id, result.gid});
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<CreateEdgeRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
CreateEdgeReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto data = req.member;
|
||||
auto creation_result = CreateEdge(data);
|
||||
|
||||
// If `from` and `to` are both on this worker, we handle it in this
|
||||
// RPC call. Do it only if CreateEdge succeeded.
|
||||
if (creation_result.result == UpdateResult::DONE &&
|
||||
data.to.worker_id() == db_->WorkerId()) {
|
||||
auto to_delta = database::StateDelta::AddInEdge(
|
||||
data.tx_id, data.to.gid(), {data.from, db_->WorkerId()},
|
||||
{creation_result.gid, db_->WorkerId()}, data.edge_type);
|
||||
creation_result.result =
|
||||
GetUpdates(vertex_updates_, data.tx_id)
|
||||
.Emplace(to_delta, data.worker_id);
|
||||
}
|
||||
|
||||
CreateEdgeRes res(creation_result);
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<AddInEdgeRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
AddInEdgeReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto to_delta = database::StateDelta::AddInEdge(
|
||||
req.member.tx_id, req.member.to, req.member.from,
|
||||
req.member.edge_address, req.member.edge_type);
|
||||
auto result = GetUpdates(vertex_updates_, req.member.tx_id)
|
||||
.Emplace(to_delta, req.member.worker_id);
|
||||
AddInEdgeRes res(result);
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<RemoveVertexRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
RemoveVertexReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto to_delta = database::StateDelta::RemoveVertex(
|
||||
req.member.tx_id, req.member.gid, req.member.check_empty);
|
||||
auto result = GetUpdates(vertex_updates_, req.member.tx_id)
|
||||
.Emplace(to_delta, req.member.worker_id);
|
||||
RemoveVertexRes res(result);
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<RemoveEdgeRpc>(
|
||||
[this](auto *req_reader, auto *res_builder) {
|
||||
RemoveEdgeReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
RemoveEdgeRes res(RemoveEdge(req.member));
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
|
||||
coordination->Register<RemoveInEdgeRpc>([this](auto *req_reader,
|
||||
auto *res_builder) {
|
||||
RemoveInEdgeReq req;
|
||||
slk::Load(&req, req_reader);
|
||||
auto data = req.member;
|
||||
RemoveInEdgeRes res(
|
||||
GetUpdates(vertex_updates_, data.tx_id)
|
||||
.Emplace(database::StateDelta::RemoveInEdge(data.tx_id, data.vertex,
|
||||
data.edge_address),
|
||||
data.worker_id));
|
||||
slk::Save(res, res_builder);
|
||||
});
|
||||
}
|
||||
|
||||
UpdateResult UpdatesRpcServer::Apply(tx::TransactionId tx_id) {
|
||||
auto apply = [tx_id](auto &collection) {
|
||||
auto access = collection.access();
|
||||
auto found = access.find(tx_id);
|
||||
if (found == access.end()) {
|
||||
return UpdateResult::DONE;
|
||||
}
|
||||
auto result = found->second.Apply();
|
||||
access.remove(tx_id);
|
||||
return result;
|
||||
};
|
||||
|
||||
auto vertex_result = apply(vertex_updates_);
|
||||
auto edge_result = apply(edge_updates_);
|
||||
if (vertex_result != UpdateResult::DONE) return vertex_result;
|
||||
if (edge_result != UpdateResult::DONE) return edge_result;
|
||||
return UpdateResult::DONE;
|
||||
}
|
||||
|
||||
template <>
|
||||
void UpdatesRpcServer::ApplyDeltasToRecord<Vertex>(tx::TransactionId tx_id,
|
||||
gid::Gid gid, int worker_id,
|
||||
Vertex **old,
|
||||
Vertex **newr) {
|
||||
auto access = vertex_updates_.access();
|
||||
auto found = access.find(tx_id);
|
||||
if (found != access.end())
|
||||
found->second.ApplyDeltasToRecord(gid, worker_id, old, newr);
|
||||
}
|
||||
|
||||
template <>
|
||||
void UpdatesRpcServer::ApplyDeltasToRecord<Edge>(tx::TransactionId tx_id,
|
||||
gid::Gid gid, int worker_id,
|
||||
Edge **old,
|
||||
Edge **newr) {
|
||||
auto access = edge_updates_.access();
|
||||
auto found = access.find(tx_id);
|
||||
if (found != access.end())
|
||||
found->second.ApplyDeltasToRecord(gid, worker_id, old, newr);
|
||||
}
|
||||
|
||||
void UpdatesRpcServer::ClearTransactionalCache(
|
||||
tx::TransactionId oldest_active) {
|
||||
auto vertex_access = vertex_updates_.access();
|
||||
for (auto &kv : vertex_access) {
|
||||
if (kv.first < oldest_active) {
|
||||
vertex_access.remove(kv.first);
|
||||
}
|
||||
}
|
||||
auto edge_access = edge_updates_.access();
|
||||
for (auto &kv : edge_access) {
|
||||
if (kv.first < oldest_active) {
|
||||
edge_access.remove(kv.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Gets/creates the TransactionUpdates for the given transaction.
|
||||
template <typename TAccessor>
|
||||
UpdatesRpcServer::TransactionUpdates<TAccessor> &UpdatesRpcServer::GetUpdates(
|
||||
MapT<TAccessor> &updates, tx::TransactionId tx_id) {
|
||||
return updates.access()
|
||||
.emplace(tx_id, std::make_tuple(tx_id),
|
||||
std::make_tuple(std::ref(db_), tx_id))
|
||||
.first->second;
|
||||
}
|
||||
|
||||
CreateResult UpdatesRpcServer::CreateEdge(const CreateEdgeReqData &req) {
|
||||
auto ids = GetUpdates(edge_updates_, req.tx_id)
|
||||
.CreateEdge(req.from, req.to, req.edge_type, db_->WorkerId(),
|
||||
req.cypher_id);
|
||||
|
||||
// cypher_id doesn't have to be inserted because edge is stored
|
||||
// somewhere else in the cluster. Here is only vertex update.
|
||||
auto from_delta = database::StateDelta::AddOutEdge(
|
||||
req.tx_id, req.from, req.to, {ids.gid, db_->WorkerId()}, req.edge_type);
|
||||
|
||||
auto result = GetUpdates(vertex_updates_, req.tx_id)
|
||||
.Emplace(from_delta, req.worker_id);
|
||||
return {result, ids.cypher_id, ids.gid};
|
||||
}
|
||||
|
||||
UpdateResult UpdatesRpcServer::RemoveEdge(const RemoveEdgeData &data) {
|
||||
// Edge removal.
|
||||
auto deletion_delta =
|
||||
database::StateDelta::RemoveEdge(data.tx_id, data.edge_id);
|
||||
auto result = GetUpdates(edge_updates_, data.tx_id)
|
||||
.Emplace(deletion_delta, data.worker_id);
|
||||
|
||||
// Out-edge removal, for sure is local.
|
||||
if (result == UpdateResult::DONE) {
|
||||
auto remove_out_delta = database::StateDelta::RemoveOutEdge(
|
||||
data.tx_id, data.vertex_from_id, {data.edge_id, db_->WorkerId()});
|
||||
result = GetUpdates(vertex_updates_, data.tx_id)
|
||||
.Emplace(remove_out_delta, data.worker_id);
|
||||
}
|
||||
|
||||
// In-edge removal, might not be local.
|
||||
if (result == UpdateResult::DONE &&
|
||||
data.vertex_to_address.worker_id() == db_->WorkerId()) {
|
||||
auto remove_in_delta = database::StateDelta::RemoveInEdge(
|
||||
data.tx_id, data.vertex_to_address.gid(),
|
||||
{data.edge_id, db_->WorkerId()});
|
||||
result = GetUpdates(vertex_updates_, data.tx_id)
|
||||
.Emplace(remove_in_delta, data.worker_id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <>
|
||||
VertexAccessor
|
||||
UpdatesRpcServer::TransactionUpdates<VertexAccessor>::FindAccessor(
|
||||
gid::Gid gid) {
|
||||
return db_accessor_->FindVertex(gid, false);
|
||||
}
|
||||
|
||||
template <>
|
||||
EdgeAccessor UpdatesRpcServer::TransactionUpdates<EdgeAccessor>::FindAccessor(
|
||||
gid::Gid gid) {
|
||||
return db_accessor_->FindEdge(gid, false);
|
||||
}
|
||||
|
||||
} // namespace distributed
|
@ -1,136 +0,0 @@
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
||||
#include "database/distributed/distributed_graph_db.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "distributed/coordination.hpp"
|
||||
#include "distributed/updates_rpc_messages.hpp"
|
||||
#include "durability/distributed/state_delta.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
#include "storage/common/types/types.hpp"
|
||||
#include "storage/distributed/edge_accessor.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "storage/distributed/vertex_accessor.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
#include "utils/thread/sync.hpp"
|
||||
|
||||
namespace distributed {
|
||||
|
||||
/// An RPC server that accepts and holds deferred updates (deltas) until it's
|
||||
/// told to apply or discard them. The updates are organized and applied per
|
||||
/// transaction in this single updates server.
|
||||
///
|
||||
/// Attempts to get serialization and update-after-delete errors to happen as
|
||||
/// soon as possible during query execution (fail fast).
|
||||
class UpdatesRpcServer {
|
||||
// Remote updates for one transaction.
|
||||
template <typename TRecordAccessor>
|
||||
class TransactionUpdates {
|
||||
struct DeltaPair {
|
||||
DeltaPair(const database::StateDelta &delta, int worker_id)
|
||||
: delta(delta), worker_id(worker_id) {}
|
||||
|
||||
database::StateDelta delta;
|
||||
int worker_id;
|
||||
};
|
||||
|
||||
public:
|
||||
using TRecord = typename std::remove_pointer<decltype(
|
||||
std::declval<TRecordAccessor>().GetNew())>::type;
|
||||
|
||||
TransactionUpdates(database::GraphDb *db,
|
||||
tx::TransactionId tx_id)
|
||||
: db_accessor_(db->Access(tx_id)) {}
|
||||
|
||||
/// Adds a delta and returns the result. Does not modify the state (data)
|
||||
/// of the graph element the update is for, but calls the `update` method
|
||||
/// to fail-fast on serialization and update-after-delete errors.
|
||||
UpdateResult Emplace(const database::StateDelta &delta, int worker_id);
|
||||
|
||||
/// Creates a new vertex and returns it's cypher_id and gid.
|
||||
CreatedInfo CreateVertex(
|
||||
const std::vector<storage::Label> &labels,
|
||||
const std::unordered_map<storage::Property, PropertyValue> &properties,
|
||||
std::optional<int64_t> cypher_id = std::nullopt);
|
||||
|
||||
/// Creates a new edge and returns it's cypher_id and gid. Does not update
|
||||
/// vertices at the end of the edge.
|
||||
CreatedInfo CreateEdge(gid::Gid from, storage::VertexAddress to,
|
||||
storage::EdgeType edge_type, int worker_id,
|
||||
std::optional<int64_t> cypher_id = std::nullopt);
|
||||
|
||||
/// Applies all the deltas on the record.
|
||||
UpdateResult Apply();
|
||||
|
||||
/// Applies all deltas made by certain worker to given old and new record.
|
||||
/// This method could change newr pointer, and if it does it wont free that
|
||||
/// memory. In case that update method needs to be called on records, new
|
||||
/// record will be created by calling CloneData on old record. Caller
|
||||
/// has to make sure to free that memory.
|
||||
void ApplyDeltasToRecord(gid::Gid gid, int worker_id, TRecord **old,
|
||||
TRecord **newr);
|
||||
|
||||
auto &db_accessor() { return *db_accessor_; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<database::GraphDbAccessor> db_accessor_;
|
||||
std::unordered_map<gid::Gid,
|
||||
std::pair<TRecordAccessor, std::vector<DeltaPair>>>
|
||||
deltas_;
|
||||
// Multiple workers might be sending remote updates concurrently.
|
||||
utils::SpinLock lock_;
|
||||
|
||||
// Helper method specialized for [Vertex|Edge]Accessor.
|
||||
TRecordAccessor FindAccessor(gid::Gid gid);
|
||||
};
|
||||
|
||||
public:
|
||||
UpdatesRpcServer(database::GraphDb *db,
|
||||
distributed::Coordination *coordination);
|
||||
|
||||
/// Applies all existsing updates for the given transaction ID. If there are
|
||||
/// no updates for that transaction, nothing happens. Clears the updates
|
||||
/// cache after applying them, regardless of the result.
|
||||
UpdateResult Apply(tx::TransactionId tx_id);
|
||||
|
||||
/// Applies all deltas made by certain worker to given old and new record.
|
||||
/// This method could change newr pointer, and if it does it wont free that
|
||||
/// memory. In case that update method needs to be called on records, new
|
||||
/// record will be created by calling CloneData on old record. Caller
|
||||
/// has to make sure to free that memory.
|
||||
template <typename TRecord>
|
||||
void ApplyDeltasToRecord(tx::TransactionId tx_id, gid::Gid, int worker_id,
|
||||
TRecord **old, TRecord **newr);
|
||||
|
||||
/// Clears the cache of local transactions that are completed. The signature
|
||||
/// of this method is dictated by `distributed::TransactionalCacheCleaner`.
|
||||
void ClearTransactionalCache(tx::TransactionId oldest_active);
|
||||
|
||||
private:
|
||||
database::GraphDb *db_;
|
||||
|
||||
template <typename TAccessor>
|
||||
using MapT = ConcurrentMap<tx::TransactionId, TransactionUpdates<TAccessor>>;
|
||||
MapT<VertexAccessor> vertex_updates_;
|
||||
MapT<EdgeAccessor> edge_updates_;
|
||||
|
||||
// Gets/creates the TransactionUpdates for the given transaction.
|
||||
template <typename TAccessor>
|
||||
TransactionUpdates<TAccessor> &GetUpdates(MapT<TAccessor> &updates,
|
||||
tx::TransactionId tx_id);
|
||||
|
||||
// Performs edge creation for the given request.
|
||||
CreateResult CreateEdge(const CreateEdgeReqData &req);
|
||||
|
||||
// Performs edge removal for the given request.
|
||||
UpdateResult RemoveEdge(const RemoveEdgeData &data);
|
||||
};
|
||||
|
||||
} // namespace distributed
|
@ -1,92 +0,0 @@
|
||||
#include "durability/distributed/paths.hpp"
|
||||
|
||||
#include <filesystem>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include "glog/logging.h"
|
||||
|
||||
#include "transactions/type.hpp"
|
||||
#include "utils/string.hpp"
|
||||
#include "utils/timestamp.hpp"
|
||||
|
||||
namespace durability {
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
std::optional<tx::TransactionId> TransactionIdFromWalFilename(
|
||||
const std::string &name) {
|
||||
auto nullopt = std::nullopt;
|
||||
// Get the max_transaction_id from the file name that has format
|
||||
// "XXXXX__max_transaction_<MAX_TRANS_ID>_worker_<Worker_ID>"
|
||||
auto file_name_split = utils::RSplit(name, "__", 1);
|
||||
if (file_name_split.size() != 2) {
|
||||
LOG(WARNING) << "Unable to parse WAL file name: " << name;
|
||||
return nullopt;
|
||||
}
|
||||
if (utils::StartsWith(file_name_split[1], "current"))
|
||||
return std::numeric_limits<tx::TransactionId>::max();
|
||||
file_name_split = utils::Split(file_name_split[1], "_");
|
||||
if (file_name_split.size() != 5) {
|
||||
LOG(WARNING) << "Unable to parse WAL file name: " << name;
|
||||
return nullopt;
|
||||
}
|
||||
auto &tx_id_str = file_name_split[2];
|
||||
try {
|
||||
return std::stoll(tx_id_str);
|
||||
} catch (std::invalid_argument &) {
|
||||
LOG(WARNING) << "Unable to parse WAL file name tx ID: " << tx_id_str;
|
||||
return nullopt;
|
||||
} catch (std::out_of_range &) {
|
||||
LOG(WARNING) << "WAL file name tx ID too large: " << tx_id_str;
|
||||
return nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
fs::path MakeSnapshotPath(const fs::path &durability_dir, const int worker_id,
|
||||
tx::TransactionId tx_id) {
|
||||
std::string date_str =
|
||||
utils::Timestamp(utils::Timestamp::Now())
|
||||
.ToString("{:04d}_{:02d}_{:02d}__{:02d}_{:02d}_{:02d}_{:05d}");
|
||||
auto file_name = date_str + "_worker_" + std::to_string(worker_id) + "_tx_" +
|
||||
std::to_string(tx_id);
|
||||
return durability_dir / kSnapshotDir / file_name;
|
||||
}
|
||||
|
||||
/// Generates a file path for a write-ahead log file. If given a transaction ID
|
||||
/// the file name will contain it. Otherwise the file path is for the "current"
|
||||
/// WAL file for which the max tx id is still unknown.
|
||||
fs::path WalFilenameForTransactionId(const std::filesystem::path &wal_dir,
|
||||
int worker_id,
|
||||
std::optional<tx::TransactionId> tx_id) {
|
||||
auto file_name = utils::Timestamp::Now().ToIso8601();
|
||||
if (tx_id) {
|
||||
file_name += "__max_transaction_" + std::to_string(*tx_id);
|
||||
} else {
|
||||
file_name += "__current";
|
||||
}
|
||||
file_name = file_name + "_Worker_" + std::to_string(worker_id);
|
||||
return wal_dir / file_name;
|
||||
}
|
||||
|
||||
std::optional<tx::TransactionId> TransactionIdFromSnapshotFilename(
|
||||
const std::string &name) {
|
||||
auto nullopt = std::nullopt;
|
||||
auto file_name_split = utils::RSplit(name, "_tx_", 1);
|
||||
if (file_name_split.size() != 2) {
|
||||
LOG(WARNING) << "Unable to parse snapshot file name: " << name;
|
||||
return nullopt;
|
||||
}
|
||||
try {
|
||||
return std::stoll(file_name_split[1]);
|
||||
} catch (std::invalid_argument &) {
|
||||
LOG(WARNING) << "Unable to parse snapshot file name tx ID: "
|
||||
<< file_name_split[1];
|
||||
return nullopt;
|
||||
} catch (std::out_of_range &) {
|
||||
LOG(WARNING) << "Unable to parse snapshot file name tx ID: "
|
||||
<< file_name_split[1];
|
||||
return nullopt;
|
||||
}
|
||||
}
|
||||
} // namespace durability
|
@ -1,40 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <optional>
|
||||
|
||||
#include "transactions/type.hpp"
|
||||
|
||||
namespace durability {
|
||||
const std::string kSnapshotDir = "snapshots";
|
||||
const std::string kWalDir = "wal";
|
||||
const std::string kBackupDir = ".backup";
|
||||
|
||||
/// Returns the transaction id contained in the file name. If the filename is
|
||||
/// not a parseable WAL file name, nullopt is returned. If the filename
|
||||
/// represents the "current" WAL file, then the maximum possible transaction ID
|
||||
/// is returned because that's appropriate for the recovery logic (the current
|
||||
/// WAL does not yet have a maximum transaction ID and can't be discarded by
|
||||
/// the recovery regardless of the snapshot from which the transaction starts).
|
||||
std::optional<tx::TransactionId> TransactionIdFromWalFilename(
|
||||
const std::string &name);
|
||||
|
||||
/** Generates a path for a DB snapshot in the given folder in a well-defined
|
||||
* sortable format with worker id and transaction from which the snapshot is
|
||||
* created appended to the file name. */
|
||||
std::filesystem::path MakeSnapshotPath(
|
||||
const std::filesystem::path &durability_dir, int worker_id,
|
||||
tx::TransactionId tx_id);
|
||||
|
||||
/// Returns the transaction id contained in the file name. If the filename is
|
||||
/// not a parseable WAL file name, nullopt is returned.
|
||||
std::optional<tx::TransactionId> TransactionIdFromSnapshotFilename(
|
||||
const std::string &name);
|
||||
|
||||
/// Generates a file path for a write-ahead log file of a specified worker. If
|
||||
/// given a transaction ID the file name will contain it. Otherwise the file
|
||||
/// path is for the "current" WAL file for which the max tx id is still unknown.
|
||||
std::filesystem::path WalFilenameForTransactionId(
|
||||
const std::filesystem::path &wal_dir, int worker_id,
|
||||
std::optional<tx::TransactionId> tx_id = std::nullopt);
|
||||
} // namespace durability
|
@ -1,502 +0,0 @@
|
||||
#include "durability/distributed/recovery.hpp"
|
||||
|
||||
#include <filesystem>
|
||||
#include <limits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "durability/distributed/paths.hpp"
|
||||
#include "durability/distributed/snapshot_decoder.hpp"
|
||||
#include "durability/distributed/snapshot_value.hpp"
|
||||
#include "durability/distributed/version.hpp"
|
||||
#include "durability/distributed/wal.hpp"
|
||||
#include "durability/hashed_file_reader.hpp"
|
||||
#include "glue/communication.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "storage/distributed/indexes/label_property_index.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
#include "utils/algorithm.hpp"
|
||||
#include "utils/file.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace durability {
|
||||
|
||||
using communication::bolt::Value;
|
||||
bool ReadSnapshotSummary(HashedFileReader &buffer, int64_t &vertex_count,
|
||||
int64_t &edge_count, uint64_t &hash) {
|
||||
auto pos = buffer.Tellg();
|
||||
auto offset = sizeof(vertex_count) + sizeof(edge_count) + sizeof(hash);
|
||||
buffer.Seek(-offset, std::ios_base::end);
|
||||
bool r_val = buffer.ReadType(vertex_count, false) &&
|
||||
buffer.ReadType(edge_count, false) &&
|
||||
buffer.ReadType(hash, false);
|
||||
buffer.Seek(pos);
|
||||
return r_val;
|
||||
}
|
||||
|
||||
bool VersionConsistency(const fs::path &durability_dir) {
|
||||
for (const auto &durability_type : {kSnapshotDir, kWalDir}) {
|
||||
auto recovery_dir = durability_dir / durability_type;
|
||||
if (!fs::exists(recovery_dir) || !fs::is_directory(recovery_dir)) continue;
|
||||
|
||||
for (const auto &file : fs::directory_iterator(recovery_dir)) {
|
||||
HashedFileReader reader;
|
||||
SnapshotDecoder<HashedFileReader> decoder(reader);
|
||||
|
||||
// The following checks are ok because we are only trying to detect
|
||||
// version inconsistencies.
|
||||
if (!reader.Open(fs::path(file))) continue;
|
||||
|
||||
std::array<uint8_t, 4> target_magic_number =
|
||||
(durability_type == kSnapshotDir) ? durability::kSnapshotMagic
|
||||
: durability::kWalMagic;
|
||||
std::array<uint8_t, 4> magic_number;
|
||||
if (!reader.Read(magic_number.data(), magic_number.size())) continue;
|
||||
if (magic_number != target_magic_number) continue;
|
||||
|
||||
if (reader.EndOfFile()) continue;
|
||||
|
||||
Value dv;
|
||||
if (!decoder.ReadValue(&dv, Value::Type::Int) ||
|
||||
dv.ValueInt() != durability::kVersion)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DistributedVersionConsistency(const int64_t master_version) {
|
||||
return durability::kVersion == master_version;
|
||||
}
|
||||
|
||||
bool ContainsDurabilityFiles(const fs::path &durability_dir) {
|
||||
for (const auto &durability_type : {kSnapshotDir, kWalDir}) {
|
||||
auto recovery_dir = durability_dir / durability_type;
|
||||
if (fs::exists(recovery_dir) && fs::is_directory(recovery_dir) &&
|
||||
!fs::is_empty(recovery_dir))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MoveToBackup(const fs::path &durability_dir) {
|
||||
auto backup_dir = durability_dir / kBackupDir;
|
||||
utils::EnsureDirOrDie(backup_dir);
|
||||
utils::EnsureDirOrDie(backup_dir / kSnapshotDir);
|
||||
utils::EnsureDirOrDie(backup_dir / kWalDir);
|
||||
for (const auto &durability_type : {kSnapshotDir, kWalDir}) {
|
||||
auto recovery_dir = durability_dir / durability_type;
|
||||
if (!fs::exists(recovery_dir) || !fs::is_directory(recovery_dir)) continue;
|
||||
for (const auto &file : fs::directory_iterator(recovery_dir)) {
|
||||
auto filename = fs::path(file).filename();
|
||||
fs::rename(file, backup_dir / durability_type / filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
using communication::bolt::Value;
|
||||
|
||||
#define RETURN_IF_NOT(condition) \
|
||||
if (!(condition)) { \
|
||||
reader.Close(); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
bool RecoverSnapshot(const fs::path &snapshot_file, database::GraphDb *db,
|
||||
RecoveryData *recovery_data, int worker_id) {
|
||||
HashedFileReader reader;
|
||||
SnapshotDecoder<HashedFileReader> decoder(reader);
|
||||
|
||||
RETURN_IF_NOT(reader.Open(snapshot_file));
|
||||
|
||||
auto magic_number = durability::kSnapshotMagic;
|
||||
reader.Read(magic_number.data(), magic_number.size());
|
||||
RETURN_IF_NOT(magic_number == durability::kSnapshotMagic);
|
||||
|
||||
// Read the vertex and edge count, and the hash, from the end of the snapshot.
|
||||
int64_t vertex_count;
|
||||
int64_t edge_count;
|
||||
uint64_t hash;
|
||||
RETURN_IF_NOT(
|
||||
durability::ReadSnapshotSummary(reader, vertex_count, edge_count, hash));
|
||||
|
||||
Value dv;
|
||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::Int) &&
|
||||
dv.ValueInt() == durability::kVersion);
|
||||
|
||||
// Checks worker id was set correctly
|
||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::Int) &&
|
||||
dv.ValueInt() == worker_id);
|
||||
|
||||
// Vertex and edge generator ids
|
||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::Int));
|
||||
uint64_t vertex_generator_cnt = dv.ValueInt();
|
||||
db->storage().VertexGenerator().SetId(std::max(
|
||||
db->storage().VertexGenerator().LocalCount(), vertex_generator_cnt));
|
||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::Int));
|
||||
uint64_t edge_generator_cnt = dv.ValueInt();
|
||||
db->storage().EdgeGenerator().SetId(
|
||||
std::max(db->storage().EdgeGenerator().LocalCount(), edge_generator_cnt));
|
||||
|
||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::Int));
|
||||
recovery_data->snapshooter_tx_id = dv.ValueInt();
|
||||
// Transaction snapshot of the transaction that created the snapshot.
|
||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::List));
|
||||
for (const auto &value : dv.ValueList()) {
|
||||
RETURN_IF_NOT(value.IsInt());
|
||||
recovery_data->snapshooter_tx_snapshot.emplace_back(value.ValueInt());
|
||||
}
|
||||
|
||||
// A list of label+property indexes.
|
||||
RETURN_IF_NOT(decoder.ReadValue(&dv, Value::Type::List));
|
||||
auto index_value = dv.ValueList();
|
||||
for (auto it = index_value.begin(); it != index_value.end();) {
|
||||
auto label = *it++;
|
||||
RETURN_IF_NOT(it != index_value.end());
|
||||
auto property = *it++;
|
||||
RETURN_IF_NOT(label.IsString() && property.IsString());
|
||||
recovery_data->indexes.emplace_back(label.ValueString(),
|
||||
property.ValueString());
|
||||
}
|
||||
|
||||
auto dba = db->Access();
|
||||
std::unordered_map<gid::Gid,
|
||||
std::pair<storage::VertexAddress, storage::VertexAddress>>
|
||||
edge_gid_endpoints_mapping;
|
||||
|
||||
for (int64_t i = 0; i < vertex_count; ++i) {
|
||||
auto vertex = decoder.ReadSnapshotVertex();
|
||||
RETURN_IF_NOT(vertex);
|
||||
|
||||
auto vertex_accessor = dba->InsertVertex(vertex->gid, vertex->cypher_id);
|
||||
for (const auto &label : vertex->labels) {
|
||||
vertex_accessor.add_label(dba->Label(label));
|
||||
}
|
||||
for (const auto &property_pair : vertex->properties) {
|
||||
vertex_accessor.PropsSet(dba->Property(property_pair.first),
|
||||
glue::ToPropertyValue(property_pair.second));
|
||||
}
|
||||
auto vertex_record = vertex_accessor.GetNew();
|
||||
for (const auto &edge : vertex->in) {
|
||||
vertex_record->in_.emplace(edge.vertex, edge.address,
|
||||
dba->EdgeType(edge.type));
|
||||
edge_gid_endpoints_mapping[edge.address.gid()] = {
|
||||
edge.vertex, vertex_accessor.GlobalAddress()};
|
||||
}
|
||||
for (const auto &edge : vertex->out) {
|
||||
vertex_record->out_.emplace(edge.vertex, edge.address,
|
||||
dba->EdgeType(edge.type));
|
||||
edge_gid_endpoints_mapping[edge.address.gid()] = {
|
||||
vertex_accessor.GlobalAddress(), edge.vertex};
|
||||
}
|
||||
}
|
||||
|
||||
auto vertex_transform_to_local_if_possible =
|
||||
[&dba, worker_id](storage::VertexAddress &address) {
|
||||
if (address.is_local()) return;
|
||||
// If the worker id matches it should be a local apperance
|
||||
if (address.worker_id() == worker_id) {
|
||||
address = storage::VertexAddress(
|
||||
dba->db().storage().LocalAddress<Vertex>(address.gid()));
|
||||
CHECK(address.is_local()) << "Address should be local but isn't";
|
||||
}
|
||||
};
|
||||
|
||||
auto edge_transform_to_local_if_possible =
|
||||
[&dba, worker_id](storage::EdgeAddress &address) {
|
||||
if (address.is_local()) return;
|
||||
// If the worker id matches it should be a local apperance
|
||||
if (address.worker_id() == worker_id) {
|
||||
address = storage::EdgeAddress(
|
||||
dba->db().storage().LocalAddress<Edge>(address.gid()));
|
||||
CHECK(address.is_local()) << "Address should be local but isn't";
|
||||
}
|
||||
};
|
||||
|
||||
Value dv_cypher_id;
|
||||
|
||||
for (int64_t i = 0; i < edge_count; ++i) {
|
||||
RETURN_IF_NOT(
|
||||
decoder.ReadValue(&dv, communication::bolt::Value::Type::Edge));
|
||||
auto &edge = dv.ValueEdge();
|
||||
|
||||
// Read cypher_id
|
||||
RETURN_IF_NOT(decoder.ReadValue(&dv_cypher_id,
|
||||
communication::bolt::Value::Type::Int));
|
||||
auto cypher_id = dv_cypher_id.ValueInt();
|
||||
|
||||
// We have to take full edge endpoints from vertices since the endpoints
|
||||
// found here don't containt worker_id, and this can't be changed since this
|
||||
// edges must be bolt-compliant
|
||||
auto &edge_endpoints = edge_gid_endpoints_mapping[edge.id.AsUint()];
|
||||
|
||||
storage::VertexAddress from;
|
||||
storage::VertexAddress to;
|
||||
std::tie(from, to) = edge_endpoints;
|
||||
|
||||
// From and to are written in the global_address format and we should
|
||||
// convert them back to local format for speedup - if possible
|
||||
vertex_transform_to_local_if_possible(from);
|
||||
vertex_transform_to_local_if_possible(to);
|
||||
|
||||
auto edge_accessor = dba->InsertOnlyEdge(from, to, dba->EdgeType(edge.type),
|
||||
edge.id.AsUint(), cypher_id);
|
||||
|
||||
for (const auto &property_pair : edge.properties)
|
||||
edge_accessor.PropsSet(dba->Property(property_pair.first),
|
||||
glue::ToPropertyValue(property_pair.second));
|
||||
}
|
||||
|
||||
// Vertex and edge counts are included in the hash. Re-read them to update the
|
||||
// hash.
|
||||
reader.ReadType(vertex_count);
|
||||
reader.ReadType(edge_count);
|
||||
if (!reader.Close() || reader.hash() != hash) {
|
||||
dba->Abort();
|
||||
return false;
|
||||
}
|
||||
|
||||
// We have to replace global_ids with local ids where possible for all edges
|
||||
// in every vertex and this can only be done after we inserted the edges; this
|
||||
// is to speedup execution
|
||||
for (auto &vertex_accessor : dba->Vertices(true)) {
|
||||
auto vertex = vertex_accessor.GetNew();
|
||||
auto iterate_and_transform =
|
||||
[vertex_transform_to_local_if_possible,
|
||||
edge_transform_to_local_if_possible](Edges &edges) {
|
||||
Edges transformed;
|
||||
for (auto &element : edges) {
|
||||
auto vertex = element.vertex;
|
||||
vertex_transform_to_local_if_possible(vertex);
|
||||
|
||||
auto edge = element.edge;
|
||||
edge_transform_to_local_if_possible(edge);
|
||||
|
||||
transformed.emplace(vertex, edge, element.edge_type);
|
||||
}
|
||||
|
||||
return transformed;
|
||||
};
|
||||
|
||||
vertex->in_ = iterate_and_transform(vertex->in_);
|
||||
vertex->out_ = iterate_and_transform(vertex->out_);
|
||||
}
|
||||
|
||||
// Ensure that the next transaction ID in the recovered DB will be greater
|
||||
// than the latest one we have recovered. Do this to make sure that
|
||||
// subsequently created snapshots and WAL files will have transactional info
|
||||
// that does not interfere with that found in previous snapshots and WAL.
|
||||
tx::TransactionId max_id = recovery_data->snapshooter_tx_id;
|
||||
auto &snap = recovery_data->snapshooter_tx_snapshot;
|
||||
if (!snap.empty()) {
|
||||
max_id = std::max(max_id, *std::max_element(snap.begin(), snap.end()));
|
||||
}
|
||||
dba->db().tx_engine().EnsureNextIdGreater(max_id);
|
||||
dba->Commit();
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef RETURN_IF_NOT
|
||||
|
||||
std::vector<fs::path> GetWalFiles(const fs::path &wal_dir) {
|
||||
// Get paths to all the WAL files and sort them (on date).
|
||||
std::vector<fs::path> wal_files;
|
||||
if (!fs::exists(wal_dir)) return {};
|
||||
for (auto &wal_file : fs::directory_iterator(wal_dir))
|
||||
wal_files.emplace_back(wal_file);
|
||||
std::sort(wal_files.begin(), wal_files.end());
|
||||
return wal_files;
|
||||
}
|
||||
|
||||
bool ApplyOverDeltas(
|
||||
const std::vector<fs::path> &wal_files, tx::TransactionId first_to_recover,
|
||||
const std::function<void(const database::StateDelta &)> &f) {
|
||||
for (auto &wal_file : wal_files) {
|
||||
auto wal_file_max_tx_id = TransactionIdFromWalFilename(wal_file.filename());
|
||||
if (!wal_file_max_tx_id || *wal_file_max_tx_id < first_to_recover) continue;
|
||||
|
||||
HashedFileReader wal_reader;
|
||||
if (!wal_reader.Open(wal_file)) return false;
|
||||
|
||||
communication::bolt::Decoder<HashedFileReader> decoder(wal_reader);
|
||||
|
||||
auto magic_number = durability::kWalMagic;
|
||||
wal_reader.Read(magic_number.data(), magic_number.size());
|
||||
if (magic_number != durability::kWalMagic) return false;
|
||||
|
||||
Value dv;
|
||||
if (!decoder.ReadValue(&dv, Value::Type::Int) ||
|
||||
dv.ValueInt() != durability::kVersion)
|
||||
return false;
|
||||
|
||||
while (true) {
|
||||
auto delta = database::StateDelta::Decode(wal_reader, decoder);
|
||||
if (!delta) break;
|
||||
f(*delta);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
auto FirstWalTxToRecover(const RecoveryData &recovery_data) {
|
||||
auto &tx_sn = recovery_data.snapshooter_tx_snapshot;
|
||||
auto first_to_recover = tx_sn.empty() ? recovery_data.snapshooter_tx_id + 1
|
||||
: *std::min(tx_sn.begin(), tx_sn.end());
|
||||
return first_to_recover;
|
||||
}
|
||||
|
||||
std::vector<tx::TransactionId> ReadWalRecoverableTransactions(
|
||||
const fs::path &wal_dir, database::GraphDb *db,
|
||||
const RecoveryData &recovery_data) {
|
||||
auto wal_files = GetWalFiles(wal_dir);
|
||||
|
||||
std::unordered_set<tx::TransactionId> committed_set;
|
||||
auto first_to_recover = FirstWalTxToRecover(recovery_data);
|
||||
ApplyOverDeltas(
|
||||
wal_files, first_to_recover, [&](const database::StateDelta &delta) {
|
||||
if (delta.transaction_id >= first_to_recover &&
|
||||
delta.type == database::StateDelta::Type::TRANSACTION_COMMIT) {
|
||||
committed_set.insert(delta.transaction_id);
|
||||
}
|
||||
});
|
||||
|
||||
std::vector<tx::TransactionId> committed_tx_ids(committed_set.size());
|
||||
for (auto id : committed_set) committed_tx_ids.push_back(id);
|
||||
return committed_tx_ids;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
RecoveryInfo RecoverOnlySnapshot(
|
||||
const fs::path &durability_dir, database::GraphDb *db,
|
||||
RecoveryData *recovery_data,
|
||||
std::optional<tx::TransactionId> required_snapshot_tx_id, int worker_id) {
|
||||
// Attempt to recover from snapshot files in reverse order (from newest
|
||||
// backwards).
|
||||
const auto snapshot_dir = durability_dir / kSnapshotDir;
|
||||
std::vector<fs::path> snapshot_files;
|
||||
if (fs::exists(snapshot_dir) && fs::is_directory(snapshot_dir))
|
||||
for (auto &file : fs::directory_iterator(snapshot_dir))
|
||||
snapshot_files.emplace_back(file);
|
||||
std::sort(snapshot_files.rbegin(), snapshot_files.rend());
|
||||
for (auto &snapshot_file : snapshot_files) {
|
||||
if (required_snapshot_tx_id) {
|
||||
auto snapshot_file_tx_id =
|
||||
TransactionIdFromSnapshotFilename(snapshot_file);
|
||||
if (!snapshot_file_tx_id ||
|
||||
snapshot_file_tx_id.value() != *required_snapshot_tx_id) {
|
||||
LOG(INFO) << "Skipping snapshot file '" << snapshot_file
|
||||
<< "' because it does not match the required snapshot tx id: "
|
||||
<< *required_snapshot_tx_id;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
LOG(INFO) << "Starting snapshot recovery from: " << snapshot_file;
|
||||
if (!RecoverSnapshot(snapshot_file, db, recovery_data, worker_id)) {
|
||||
db->ReinitializeStorage();
|
||||
recovery_data->Clear();
|
||||
LOG(WARNING) << "Snapshot recovery failed, trying older snapshot...";
|
||||
continue;
|
||||
} else {
|
||||
LOG(INFO) << "Snapshot recovery successful.";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If snapshot recovery is required, and we failed, don't even deal with
|
||||
// the WAL recovery.
|
||||
if (required_snapshot_tx_id &&
|
||||
recovery_data->snapshooter_tx_id != *required_snapshot_tx_id)
|
||||
return {durability::kVersion, recovery_data->snapshooter_tx_id, {}};
|
||||
|
||||
return {durability::kVersion, recovery_data->snapshooter_tx_id,
|
||||
ReadWalRecoverableTransactions(durability_dir / kWalDir, db,
|
||||
*recovery_data)};
|
||||
}
|
||||
|
||||
// TODO - finer-grained recovery feedback could be useful here.
|
||||
void RecoverWal(const fs::path &durability_dir, database::GraphDb *db,
|
||||
RecoveryData *recovery_data,
|
||||
RecoveryTransactions *transactions) {
|
||||
auto wal_dir = durability_dir / kWalDir;
|
||||
auto wal_files = GetWalFiles(wal_dir);
|
||||
// Track which transaction should be recovered first, and define logic for
|
||||
// which transactions should be skipped in recovery.
|
||||
auto &tx_sn = recovery_data->snapshooter_tx_snapshot;
|
||||
auto first_to_recover = FirstWalTxToRecover(*recovery_data);
|
||||
|
||||
// Set of transactions which can be recovered, since not every transaction in
|
||||
// wal can be recovered because it might not be present on some workers (there
|
||||
// wasn't enough time for it to flush to disk or similar)
|
||||
std::unordered_set<tx::TransactionId> common_wal_tx;
|
||||
for (auto tx_id : recovery_data->wal_tx_to_recover)
|
||||
common_wal_tx.insert(tx_id);
|
||||
|
||||
auto should_skip = [&tx_sn, recovery_data, &common_wal_tx,
|
||||
first_to_recover](tx::TransactionId tx_id) {
|
||||
return tx_id < first_to_recover ||
|
||||
(tx_id < recovery_data->snapshooter_tx_id &&
|
||||
!utils::Contains(tx_sn, tx_id)) ||
|
||||
!utils::Contains(common_wal_tx, tx_id);
|
||||
};
|
||||
|
||||
// Ensure that the next transaction ID in the recovered DB will be greater
|
||||
// than the latest one we have recovered. Do this to make sure that
|
||||
// subsequently created snapshots and WAL files will have transactional info
|
||||
// that does not interfere with that found in previous snapshots and WAL.
|
||||
tx::TransactionId max_observed_tx_id{0};
|
||||
|
||||
// Read all the WAL files whose max_tx_id is not smaller than
|
||||
// min_tx_to_recover.
|
||||
ApplyOverDeltas(
|
||||
wal_files, first_to_recover, [&](const database::StateDelta &delta) {
|
||||
max_observed_tx_id = std::max(max_observed_tx_id, delta.transaction_id);
|
||||
if (should_skip(delta.transaction_id)) return;
|
||||
switch (delta.type) {
|
||||
case database::StateDelta::Type::TRANSACTION_BEGIN:
|
||||
transactions->Begin(delta.transaction_id);
|
||||
break;
|
||||
case database::StateDelta::Type::TRANSACTION_ABORT:
|
||||
transactions->Abort(delta.transaction_id);
|
||||
break;
|
||||
case database::StateDelta::Type::TRANSACTION_COMMIT:
|
||||
transactions->Commit(delta.transaction_id);
|
||||
break;
|
||||
case database::StateDelta::Type::BUILD_INDEX:
|
||||
// TODO index building might still be problematic in HA
|
||||
recovery_data->indexes.emplace_back(delta.label_name,
|
||||
delta.property_name);
|
||||
break;
|
||||
default:
|
||||
transactions->Apply(delta);
|
||||
}
|
||||
});
|
||||
|
||||
// TODO when implementing proper error handling return one of the following:
|
||||
// - WAL fully recovered
|
||||
// - WAL partially recovered
|
||||
// - WAL recovery error
|
||||
|
||||
db->tx_engine().EnsureNextIdGreater(max_observed_tx_id);
|
||||
}
|
||||
|
||||
void RecoverIndexes(
|
||||
database::GraphDb *db,
|
||||
const std::vector<std::pair<std::string, std::string>> &indexes) {
|
||||
auto db_accessor_indices = db->Access();
|
||||
for (const auto &label_prop : indexes) {
|
||||
const database::LabelPropertyIndex::Key key{
|
||||
db_accessor_indices->Label(label_prop.first),
|
||||
db_accessor_indices->Property(label_prop.second)};
|
||||
db_accessor_indices->db().storage().label_property_index().CreateIndex(key);
|
||||
db_accessor_indices->PopulateIndex(key);
|
||||
db_accessor_indices->EnableIndex(key);
|
||||
}
|
||||
db_accessor_indices->Commit();
|
||||
}
|
||||
|
||||
} // namespace durability
|
@ -1,132 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "durability/distributed/state_delta.hpp"
|
||||
#include "durability/hashed_file_reader.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
|
||||
namespace database {
|
||||
class GraphDb;
|
||||
};
|
||||
|
||||
namespace durability {
|
||||
|
||||
/// Stores info on what was (or needs to be) recovered from durability.
|
||||
struct RecoveryInfo {
|
||||
RecoveryInfo() {}
|
||||
RecoveryInfo(const int64_t durability_version,
|
||||
tx::TransactionId snapshot_tx_id,
|
||||
const std::vector<tx::TransactionId> &wal_recovered)
|
||||
: durability_version(durability_version),
|
||||
snapshot_tx_id(snapshot_tx_id),
|
||||
wal_recovered(wal_recovered) {}
|
||||
int64_t durability_version;
|
||||
tx::TransactionId snapshot_tx_id;
|
||||
std::vector<tx::TransactionId> wal_recovered;
|
||||
|
||||
bool operator==(const RecoveryInfo &other) const {
|
||||
return durability_version == other.durability_version &&
|
||||
snapshot_tx_id == other.snapshot_tx_id &&
|
||||
wal_recovered == other.wal_recovered;
|
||||
}
|
||||
bool operator!=(const RecoveryInfo &other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
// A data structure for exchanging info between main recovery function and
|
||||
// snapshot and WAL recovery functions.
|
||||
struct RecoveryData {
|
||||
tx::TransactionId snapshooter_tx_id{0};
|
||||
std::vector<tx::TransactionId> wal_tx_to_recover{};
|
||||
std::vector<tx::TransactionId> snapshooter_tx_snapshot;
|
||||
// A collection into which the indexes should be added so they
|
||||
// can be rebuilt at the end of the recovery transaction.
|
||||
std::vector<std::pair<std::string, std::string>> indexes;
|
||||
|
||||
void Clear() {
|
||||
snapshooter_tx_id = 0;
|
||||
snapshooter_tx_snapshot.clear();
|
||||
indexes.clear();
|
||||
}
|
||||
};
|
||||
|
||||
/** Reads snapshot metadata from the end of the file without messing up the
|
||||
* hash. */
|
||||
bool ReadSnapshotSummary(HashedFileReader &buffer, int64_t &vertex_count,
|
||||
int64_t &edge_count, uint64_t &hash);
|
||||
|
||||
/**
|
||||
* Checks version consistency within the durability directory.
|
||||
*
|
||||
* @param durability_dir - Path to durability directory.
|
||||
* @return - True if snapshot and WAL versions are compatible with
|
||||
* ` current memgraph binary.
|
||||
*/
|
||||
bool VersionConsistency(const std::filesystem::path &durability_dir);
|
||||
|
||||
/**
|
||||
* Checks whether the current memgraph binary (on a worker) is
|
||||
* version consistent with the cluster master.
|
||||
*
|
||||
* @param master_version - Version of the master.
|
||||
* @return - True if versions match.
|
||||
*/
|
||||
bool DistributedVersionConsistency(const int64_t master_version);
|
||||
|
||||
/**
|
||||
* Checks whether the durability directory contains snapshot
|
||||
* or write-ahead log file.
|
||||
*
|
||||
* @param durability_dir - Path to durability directory.
|
||||
* @return - True if durability directory contains either a snapshot
|
||||
* or WAL file.
|
||||
*/
|
||||
bool ContainsDurabilityFiles(const std::filesystem::path &durabilty_dir);
|
||||
|
||||
/**
|
||||
* Backup snapshots and WAL files to a backup folder.
|
||||
*
|
||||
* @param durability_dir - Path to durability directory.
|
||||
*/
|
||||
void MoveToBackup(const std::filesystem::path &durability_dir);
|
||||
|
||||
/**
|
||||
* Recovers database from the latest possible snapshot. If recovering fails,
|
||||
* false is returned and db_accessor aborts transaction, else true is returned
|
||||
* and transaction is commited.
|
||||
*
|
||||
* @param durability_dir - Path to durability directory.
|
||||
* @param db - The database to recover into.
|
||||
* @param required_snapshot_tx_id - Only used on distributed worker. Indicates
|
||||
* what the master recovered. The same snapshot must be recovered on the
|
||||
* worker.
|
||||
* @return - recovery info
|
||||
*/
|
||||
RecoveryInfo RecoverOnlySnapshot(
|
||||
const std::filesystem::path &durability_dir, database::GraphDb *db,
|
||||
durability::RecoveryData *recovery_data,
|
||||
std::optional<tx::TransactionId> required_snapshot_tx_id, int worker_id);
|
||||
|
||||
/** Interface for accessing transactions during WAL recovery. */
|
||||
class RecoveryTransactions {
|
||||
public:
|
||||
virtual ~RecoveryTransactions() {}
|
||||
|
||||
virtual void Begin(const tx::TransactionId &) = 0;
|
||||
virtual void Abort(const tx::TransactionId &) = 0;
|
||||
virtual void Commit(const tx::TransactionId &) = 0;
|
||||
virtual void Apply(const database::StateDelta &) = 0;
|
||||
};
|
||||
|
||||
void RecoverWal(const std::filesystem::path &durability_dir,
|
||||
database::GraphDb *db, RecoveryData *recovery_data,
|
||||
RecoveryTransactions *transactions);
|
||||
|
||||
void RecoverIndexes(
|
||||
database::GraphDb *db,
|
||||
const std::vector<std::pair<std::string, std::string>> &indexes);
|
||||
|
||||
} // namespace durability
|
@ -1,34 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "durability/distributed/recovery.hpp"
|
||||
#include "slk/serialization.hpp"
|
||||
|
||||
namespace slk {
|
||||
|
||||
inline void Save(const durability::RecoveryInfo &info, slk::Builder *builder) {
|
||||
slk::Save(info.durability_version, builder);
|
||||
slk::Save(info.snapshot_tx_id, builder);
|
||||
slk::Save(info.wal_recovered, builder);
|
||||
}
|
||||
|
||||
inline void Load(durability::RecoveryInfo *info, slk::Reader *reader) {
|
||||
slk::Load(&info->durability_version, reader);
|
||||
slk::Load(&info->snapshot_tx_id, reader);
|
||||
slk::Load(&info->wal_recovered, reader);
|
||||
}
|
||||
|
||||
inline void Save(const durability::RecoveryData &data, slk::Builder *builder) {
|
||||
slk::Save(data.snapshooter_tx_id, builder);
|
||||
slk::Save(data.wal_tx_to_recover, builder);
|
||||
slk::Save(data.snapshooter_tx_snapshot, builder);
|
||||
slk::Save(data.indexes, builder);
|
||||
}
|
||||
|
||||
inline void Load(durability::RecoveryData *data, slk::Reader *reader) {
|
||||
slk::Load(&data->snapshooter_tx_id, reader);
|
||||
slk::Load(&data->wal_tx_to_recover, reader);
|
||||
slk::Load(&data->snapshooter_tx_snapshot, reader);
|
||||
slk::Load(&data->indexes, reader);
|
||||
}
|
||||
|
||||
} // namespace slk
|
@ -1,142 +0,0 @@
|
||||
#include "durability/distributed/snapshooter.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "durability/distributed/paths.hpp"
|
||||
#include "durability/distributed/snapshot_encoder.hpp"
|
||||
#include "durability/distributed/version.hpp"
|
||||
#include "durability/hashed_file_writer.hpp"
|
||||
#include "utils/file.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace durability {
|
||||
|
||||
// Snapshot layout is described in durability/version.hpp
|
||||
static_assert(durability::kVersion == 6,
|
||||
"Wrong snapshot version, please update!");
|
||||
|
||||
namespace {
|
||||
bool Encode(const fs::path &snapshot_file, database::GraphDb &db,
|
||||
database::GraphDbAccessor &dba, int worker_id) {
|
||||
try {
|
||||
HashedFileWriter buffer(snapshot_file);
|
||||
SnapshotEncoder<HashedFileWriter> encoder(buffer);
|
||||
int64_t vertex_num = 0, edge_num = 0;
|
||||
|
||||
encoder.WriteRAW(durability::kSnapshotMagic.data(),
|
||||
durability::kSnapshotMagic.size());
|
||||
encoder.WriteInt(durability::kVersion);
|
||||
|
||||
// Writes the worker id to snapshot, used to guarantee consistent cluster
|
||||
// state after recovery
|
||||
encoder.WriteInt(worker_id);
|
||||
|
||||
// Write the number of generated vertex and edges, used to recover
|
||||
// generators internal states
|
||||
encoder.WriteInt(db.storage().VertexGenerator().LocalCount());
|
||||
encoder.WriteInt(db.storage().EdgeGenerator().LocalCount());
|
||||
|
||||
// Write the ID of the transaction doing the snapshot.
|
||||
encoder.WriteInt(dba.transaction_id());
|
||||
|
||||
// Write the transaction snapshot into the snapshot. It's used when
|
||||
// recovering from the combination of snapshot and write-ahead-log.
|
||||
{
|
||||
std::vector<communication::bolt::Value> tx_snapshot;
|
||||
for (int64_t tx : dba.transaction().snapshot())
|
||||
tx_snapshot.emplace_back(tx);
|
||||
encoder.WriteList(tx_snapshot);
|
||||
}
|
||||
|
||||
// Write label+property indexes as list ["label", "property", ...]
|
||||
{
|
||||
std::vector<communication::bolt::Value> index_vec;
|
||||
for (const auto &key : dba.GetIndicesKeys()) {
|
||||
index_vec.emplace_back(dba.LabelName(key.label_));
|
||||
index_vec.emplace_back(dba.PropertyName(key.property_));
|
||||
}
|
||||
encoder.WriteList(index_vec);
|
||||
}
|
||||
|
||||
for (const auto &vertex : dba.Vertices(false)) {
|
||||
encoder.WriteSnapshotVertex(vertex);
|
||||
vertex_num++;
|
||||
}
|
||||
for (const auto &edge : dba.Edges(false)) {
|
||||
encoder.WriteEdge(glue::ToBoltEdge(edge));
|
||||
encoder.WriteInt(edge.CypherId());
|
||||
edge_num++;
|
||||
}
|
||||
buffer.WriteValue(vertex_num);
|
||||
buffer.WriteValue(edge_num);
|
||||
buffer.WriteValue(buffer.hash());
|
||||
buffer.Close();
|
||||
} catch (const std::ifstream::failure &) {
|
||||
if (fs::exists(snapshot_file) && !fs::remove(snapshot_file)) {
|
||||
LOG(ERROR) << "Error while removing corrupted snapshot file: "
|
||||
<< snapshot_file;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Removes snapshot files so that only `max_retained` latest ones are kept. If
|
||||
// `max_retained == -1`, all the snapshots are retained.
|
||||
void RemoveOldSnapshots(const fs::path &snapshot_dir, int max_retained) {
|
||||
if (max_retained == -1) return;
|
||||
std::vector<fs::path> files;
|
||||
for (auto &file : fs::directory_iterator(snapshot_dir))
|
||||
files.push_back(file.path());
|
||||
if (static_cast<int>(files.size()) <= max_retained) return;
|
||||
sort(files.begin(), files.end());
|
||||
for (int i = 0; i < static_cast<int>(files.size()) - max_retained; ++i) {
|
||||
if (!fs::remove(files[i])) {
|
||||
LOG(ERROR) << "Error while removing file: " << files[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Removes write-ahead log files that are no longer necessary (they don't get
|
||||
// used when recovering from the latest snapshot.
|
||||
void RemoveOldWals(const fs::path &wal_dir,
|
||||
const tx::Transaction &snapshot_transaction) {
|
||||
if (!fs::exists(wal_dir)) return;
|
||||
// We can remove all the WAL files that will not be used when restoring from
|
||||
// the snapshot created in the given transaction.
|
||||
auto min_trans_id = snapshot_transaction.snapshot().empty()
|
||||
? snapshot_transaction.id_ + 1
|
||||
: snapshot_transaction.snapshot().front();
|
||||
for (auto &wal_file : fs::directory_iterator(wal_dir)) {
|
||||
auto tx_id = TransactionIdFromWalFilename(wal_file.path().filename());
|
||||
if (tx_id && tx_id.value() < min_trans_id) {
|
||||
bool result = fs::remove(wal_file);
|
||||
DCHECK(result) << "Unable to delete old wal file: " << wal_file;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
bool MakeSnapshot(database::GraphDb &db, database::GraphDbAccessor &dba,
|
||||
int worker_id, const fs::path &durability_dir,
|
||||
int snapshot_max_retained) {
|
||||
if (!utils::EnsureDir(durability_dir / kSnapshotDir)) return false;
|
||||
const auto snapshot_file =
|
||||
MakeSnapshotPath(durability_dir, worker_id, dba.transaction_id());
|
||||
if (fs::exists(snapshot_file)) return false;
|
||||
if (Encode(snapshot_file, db, dba, worker_id)) {
|
||||
RemoveOldSnapshots(durability_dir / kSnapshotDir, snapshot_max_retained);
|
||||
RemoveOldWals(durability_dir / kWalDir, dba.transaction());
|
||||
return true;
|
||||
} else {
|
||||
std::error_code error_code; // Just for exception suppression.
|
||||
fs::remove(snapshot_file, error_code);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace durability
|
@ -1,20 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
#include "database/distributed/graph_db.hpp"
|
||||
|
||||
namespace durability {
|
||||
|
||||
/**
|
||||
* Make snapshot and save it in snapshots folder. Returns true if successful.
|
||||
* @param db - database for which we are creating a snapshot
|
||||
* @param dba - db accessor with which we are creating a snapshot (reading data)
|
||||
* @param durability_dir - directory where durability data is stored.
|
||||
* @param snapshot_max_retained - maximum number of snapshots to retain.
|
||||
*/
|
||||
bool MakeSnapshot(database::GraphDb &db, database::GraphDbAccessor &dba,
|
||||
int worker_id, const std::filesystem::path &durability_dir,
|
||||
int snapshot_max_retained);
|
||||
|
||||
} // namespace durability
|
@ -1,105 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "communication/bolt/v1/decoder/decoder.hpp"
|
||||
#include "durability/distributed/snapshot_value.hpp"
|
||||
|
||||
namespace durability {
|
||||
|
||||
template <typename Buffer>
|
||||
class SnapshotDecoder : public communication::bolt::Decoder<Buffer> {
|
||||
public:
|
||||
explicit SnapshotDecoder(Buffer &buffer)
|
||||
: communication::bolt::Decoder<Buffer>(buffer) {}
|
||||
|
||||
std::optional<SnapshotVertex> ReadSnapshotVertex() {
|
||||
communication::bolt::Value dv;
|
||||
SnapshotVertex vertex;
|
||||
|
||||
// Read global id, labels and properties of the vertex
|
||||
if (!communication::bolt::Decoder<Buffer>::ReadValue(
|
||||
&dv, communication::bolt::Value::Type::Vertex)) {
|
||||
DLOG(WARNING) << "Unable to read snapshot vertex";
|
||||
return std::nullopt;
|
||||
}
|
||||
auto &read_vertex = dv.ValueVertex();
|
||||
vertex.gid = read_vertex.id.AsUint();
|
||||
vertex.labels = read_vertex.labels;
|
||||
vertex.properties = read_vertex.properties;
|
||||
|
||||
// Read cypher_id
|
||||
if (!communication::bolt::Decoder<Buffer>::ReadValue(
|
||||
&dv, communication::bolt::Value::Type::Int)) {
|
||||
DLOG(WARNING) << "Unable to read vertex cypher_id";
|
||||
return std::nullopt;
|
||||
}
|
||||
vertex.cypher_id = dv.ValueInt();
|
||||
|
||||
// Read in edges
|
||||
if (!communication::bolt::Decoder<Buffer>::ReadValue(
|
||||
&dv, communication::bolt::Value::Type::Int)) {
|
||||
DLOG(WARNING) << "[ReadSnapshotVertex] Couldn't read number of in "
|
||||
"edges in vertex!";
|
||||
return std::nullopt;
|
||||
}
|
||||
for (int i = 0; i < dv.ValueInt(); ++i) {
|
||||
auto edge = ReadSnapshotEdge();
|
||||
if (!edge) return std::nullopt;
|
||||
vertex.in.emplace_back(*edge);
|
||||
}
|
||||
|
||||
// Read out edges
|
||||
if (!communication::bolt::Decoder<Buffer>::ReadValue(
|
||||
&dv, communication::bolt::Value::Type::Int)) {
|
||||
DLOG(WARNING) << "[ReadSnapshotVertex] Couldn't read number of out "
|
||||
"edges in vertex!";
|
||||
return std::nullopt;
|
||||
}
|
||||
for (int i = 0; i < dv.ValueInt(); ++i) {
|
||||
auto edge = ReadSnapshotEdge();
|
||||
if (!edge) return std::nullopt;
|
||||
vertex.out.emplace_back(*edge);
|
||||
}
|
||||
|
||||
VLOG(20) << "[ReadSnapshotVertex] Success";
|
||||
return vertex;
|
||||
}
|
||||
|
||||
private:
|
||||
std::optional<InlinedVertexEdge> ReadSnapshotEdge() {
|
||||
communication::bolt::Value dv;
|
||||
InlinedVertexEdge edge;
|
||||
|
||||
VLOG(20) << "[ReadSnapshotEdge] Start";
|
||||
|
||||
// Read global id of this edge
|
||||
if (!communication::bolt::Decoder<Buffer>::ReadValue(
|
||||
&dv, communication::bolt::Value::Type::Int)) {
|
||||
DLOG(WARNING) << "[ReadSnapshotEdge] Couldn't read Global ID!";
|
||||
return std::nullopt;
|
||||
}
|
||||
edge.address = storage::EdgeAddress(static_cast<uint64_t>(dv.ValueInt()));
|
||||
|
||||
// Read global vertex id of the other side of the edge
|
||||
// (global id of from/to vertexes).
|
||||
if (!communication::bolt::Decoder<Buffer>::ReadValue(
|
||||
&dv, communication::bolt::Value::Type::Int)) {
|
||||
DLOG(WARNING) << "[ReadSnapshotEdge] Couldn't read from/to address!";
|
||||
return std::nullopt;
|
||||
}
|
||||
edge.vertex = storage::VertexAddress(static_cast<uint64_t>(dv.ValueInt()));
|
||||
|
||||
// Read edge type
|
||||
if (!communication::bolt::Decoder<Buffer>::ReadValue(
|
||||
&dv, communication::bolt::Value::Type::String)) {
|
||||
DLOG(WARNING) << "[ReadSnapshotEdge] Couldn't read type!";
|
||||
return std::nullopt;
|
||||
}
|
||||
edge.type = dv.ValueString();
|
||||
|
||||
VLOG(20) << "[ReadSnapshotEdge] Success";
|
||||
return edge;
|
||||
}
|
||||
};
|
||||
}; // namespace durability
|
@ -1,58 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "communication/bolt/v1/encoder/base_encoder.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "glue/communication.hpp"
|
||||
#include "utils/cast.hpp"
|
||||
|
||||
namespace durability {
|
||||
|
||||
template <typename Buffer>
|
||||
class SnapshotEncoder : public communication::bolt::BaseEncoder<Buffer> {
|
||||
public:
|
||||
explicit SnapshotEncoder(Buffer &buffer)
|
||||
: communication::bolt::BaseEncoder<Buffer>(buffer) {}
|
||||
void WriteSnapshotVertex(const VertexAccessor &vertex) {
|
||||
communication::bolt::BaseEncoder<Buffer>::WriteVertex(
|
||||
glue::ToBoltVertex(vertex));
|
||||
|
||||
// Write cypher_id
|
||||
this->WriteInt(vertex.CypherId());
|
||||
|
||||
// Write in edges without properties
|
||||
this->WriteUInt(vertex.in_degree());
|
||||
auto edges_in = vertex.in();
|
||||
for (const auto &edge : edges_in) {
|
||||
this->WriteSnapshotEdge(edge, true);
|
||||
}
|
||||
|
||||
// Write out edges without properties
|
||||
this->WriteUInt(vertex.out_degree());
|
||||
auto edges_out = vertex.out();
|
||||
for (const auto &edge : edges_out) {
|
||||
this->WriteSnapshotEdge(edge, false);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void WriteUInt(const uint64_t &value) {
|
||||
this->WriteInt(utils::MemcpyCast<int64_t>(value));
|
||||
}
|
||||
|
||||
// Writes edge without properties
|
||||
void WriteSnapshotEdge(const EdgeAccessor &edge, bool write_from) {
|
||||
// Write global id of the edge
|
||||
WriteUInt(edge.GlobalAddress().raw());
|
||||
|
||||
// Write to/from global id
|
||||
if (write_from)
|
||||
WriteUInt(edge.from().GlobalAddress().raw());
|
||||
else
|
||||
WriteUInt(edge.to().GlobalAddress().raw());
|
||||
|
||||
// Write type
|
||||
this->WriteString(edge.db_accessor().EdgeTypeName(edge.EdgeType()));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace durability
|
@ -1,44 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "communication/bolt/v1/value.hpp"
|
||||
#include "storage/common/types/property_value.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "utils/algorithm.hpp"
|
||||
#include "utils/exceptions.hpp"
|
||||
|
||||
namespace durability {
|
||||
|
||||
/** Forward declartion of SnapshotEdge. */
|
||||
struct InlinedVertexEdge;
|
||||
|
||||
/**
|
||||
* Structure used when reading a Vertex with the decoder.
|
||||
* The decoder writes data into this structure.
|
||||
*/
|
||||
struct SnapshotVertex {
|
||||
gid::Gid gid;
|
||||
int64_t cypher_id;
|
||||
std::vector<std::string> labels;
|
||||
std::map<std::string, communication::bolt::Value> properties;
|
||||
// Vector of edges without properties
|
||||
std::vector<InlinedVertexEdge> in;
|
||||
std::vector<InlinedVertexEdge> out;
|
||||
};
|
||||
|
||||
/**
|
||||
* Structure used when reading an Edge with the snapshot decoder.
|
||||
* The decoder writes data into this structure.
|
||||
*/
|
||||
struct InlinedVertexEdge {
|
||||
// Addresses down below must always be global_address and never direct
|
||||
// pointers to a record.
|
||||
storage::EdgeAddress address;
|
||||
storage::VertexAddress vertex;
|
||||
std::string type;
|
||||
};
|
||||
|
||||
} // namespace durability
|
@ -1,411 +0,0 @@
|
||||
#include "durability/distributed/state_delta.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "communication/bolt/v1/value.hpp"
|
||||
#include "database/distributed/graph_db_accessor.hpp"
|
||||
#include "glue/communication.hpp"
|
||||
|
||||
namespace database {
|
||||
|
||||
StateDelta StateDelta::TxBegin(tx::TransactionId tx_id) {
|
||||
return {StateDelta::Type::TRANSACTION_BEGIN, tx_id};
|
||||
}
|
||||
|
||||
StateDelta StateDelta::TxCommit(tx::TransactionId tx_id) {
|
||||
return {StateDelta::Type::TRANSACTION_COMMIT, tx_id};
|
||||
}
|
||||
|
||||
StateDelta StateDelta::TxAbort(tx::TransactionId tx_id) {
|
||||
return {StateDelta::Type::TRANSACTION_ABORT, tx_id};
|
||||
}
|
||||
|
||||
StateDelta StateDelta::CreateVertex(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
int64_t cypher_id) {
|
||||
StateDelta op(StateDelta::Type::CREATE_VERTEX, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.cypher_id = cypher_id;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::CreateEdge(tx::TransactionId tx_id, gid::Gid edge_id,
|
||||
int64_t cypher_id, gid::Gid vertex_from_id,
|
||||
gid::Gid vertex_to_id,
|
||||
storage::EdgeType edge_type,
|
||||
const std::string &edge_type_name) {
|
||||
StateDelta op(StateDelta::Type::CREATE_EDGE, tx_id);
|
||||
op.edge_id = edge_id;
|
||||
op.cypher_id = cypher_id;
|
||||
op.vertex_from_id = vertex_from_id;
|
||||
op.vertex_to_id = vertex_to_id;
|
||||
op.edge_type = edge_type;
|
||||
op.edge_type_name = edge_type_name;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::AddOutEdge(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::VertexAddress vertex_to_address,
|
||||
storage::EdgeAddress edge_address,
|
||||
storage::EdgeType edge_type) {
|
||||
CHECK(vertex_to_address.is_remote() && edge_address.is_remote())
|
||||
<< "WAL can only contain global addresses.";
|
||||
StateDelta op(StateDelta::Type::ADD_OUT_EDGE, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.vertex_to_address = vertex_to_address;
|
||||
op.edge_address = edge_address;
|
||||
op.edge_type = edge_type;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::RemoveOutEdge(tx::TransactionId tx_id,
|
||||
gid::Gid vertex_id,
|
||||
storage::EdgeAddress edge_address) {
|
||||
CHECK(edge_address.is_remote()) << "WAL can only contain global addresses.";
|
||||
StateDelta op(StateDelta::Type::REMOVE_OUT_EDGE, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.edge_address = edge_address;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::AddInEdge(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::VertexAddress vertex_from_address,
|
||||
storage::EdgeAddress edge_address,
|
||||
storage::EdgeType edge_type) {
|
||||
CHECK(vertex_from_address.is_remote() && edge_address.is_remote())
|
||||
<< "WAL can only contain global addresses.";
|
||||
StateDelta op(StateDelta::Type::ADD_IN_EDGE, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.vertex_from_address = vertex_from_address;
|
||||
op.edge_address = edge_address;
|
||||
op.edge_type = edge_type;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::RemoveInEdge(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::EdgeAddress edge_address) {
|
||||
CHECK(edge_address.is_remote()) << "WAL can only contain global addresses.";
|
||||
StateDelta op(StateDelta::Type::REMOVE_IN_EDGE, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.edge_address = edge_address;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::PropsSetVertex(tx::TransactionId tx_id,
|
||||
gid::Gid vertex_id,
|
||||
storage::Property property,
|
||||
const std::string &property_name,
|
||||
const PropertyValue &value) {
|
||||
StateDelta op(StateDelta::Type::SET_PROPERTY_VERTEX, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.property = property;
|
||||
op.property_name = property_name;
|
||||
op.value = value;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::PropsSetEdge(tx::TransactionId tx_id, gid::Gid edge_id,
|
||||
storage::Property property,
|
||||
const std::string &property_name,
|
||||
const PropertyValue &value) {
|
||||
StateDelta op(StateDelta::Type::SET_PROPERTY_EDGE, tx_id);
|
||||
op.edge_id = edge_id;
|
||||
op.property = property;
|
||||
op.property_name = property_name;
|
||||
op.value = value;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::AddLabel(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::Label label,
|
||||
const std::string &label_name) {
|
||||
StateDelta op(StateDelta::Type::ADD_LABEL, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.label = label;
|
||||
op.label_name = label_name;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::RemoveLabel(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::Label label,
|
||||
const std::string &label_name) {
|
||||
StateDelta op(StateDelta::Type::REMOVE_LABEL, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.label = label;
|
||||
op.label_name = label_name;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::RemoveVertex(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
bool check_empty) {
|
||||
StateDelta op(StateDelta::Type::REMOVE_VERTEX, tx_id);
|
||||
op.vertex_id = vertex_id;
|
||||
op.check_empty = check_empty;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::RemoveEdge(tx::TransactionId tx_id, gid::Gid edge_id) {
|
||||
StateDelta op(StateDelta::Type::REMOVE_EDGE, tx_id);
|
||||
op.edge_id = edge_id;
|
||||
return op;
|
||||
}
|
||||
|
||||
StateDelta StateDelta::BuildIndex(tx::TransactionId tx_id, storage::Label label,
|
||||
const std::string &label_name,
|
||||
storage::Property property,
|
||||
const std::string &property_name) {
|
||||
StateDelta op(StateDelta::Type::BUILD_INDEX, tx_id);
|
||||
op.label = label;
|
||||
op.label_name = label_name;
|
||||
op.property = property;
|
||||
op.property_name = property_name;
|
||||
return op;
|
||||
}
|
||||
|
||||
void StateDelta::Encode(
|
||||
HashedFileWriter &writer,
|
||||
communication::bolt::BaseEncoder<HashedFileWriter> &encoder) const {
|
||||
encoder.WriteInt(static_cast<int64_t>(type));
|
||||
encoder.WriteInt(static_cast<int64_t>(transaction_id));
|
||||
|
||||
switch (type) {
|
||||
case Type::TRANSACTION_BEGIN:
|
||||
case Type::TRANSACTION_COMMIT:
|
||||
case Type::TRANSACTION_ABORT:
|
||||
break;
|
||||
case Type::CREATE_VERTEX:
|
||||
encoder.WriteInt(vertex_id);
|
||||
encoder.WriteInt(cypher_id);
|
||||
break;
|
||||
case Type::CREATE_EDGE:
|
||||
encoder.WriteInt(edge_id);
|
||||
encoder.WriteInt(cypher_id);
|
||||
encoder.WriteInt(vertex_from_id);
|
||||
encoder.WriteInt(vertex_to_id);
|
||||
encoder.WriteInt(edge_type.Id());
|
||||
encoder.WriteString(edge_type_name);
|
||||
break;
|
||||
case Type::ADD_OUT_EDGE:
|
||||
encoder.WriteInt(vertex_id);
|
||||
encoder.WriteInt(vertex_to_address.raw());
|
||||
encoder.WriteInt(edge_address.raw());
|
||||
encoder.WriteInt(edge_type.Id());
|
||||
break;
|
||||
case Type::REMOVE_OUT_EDGE:
|
||||
encoder.WriteInt(vertex_id);
|
||||
encoder.WriteInt(edge_address.raw());
|
||||
break;
|
||||
case Type::ADD_IN_EDGE:
|
||||
encoder.WriteInt(vertex_id);
|
||||
encoder.WriteInt(vertex_from_address.raw());
|
||||
encoder.WriteInt(edge_address.raw());
|
||||
encoder.WriteInt(edge_type.Id());
|
||||
break;
|
||||
case Type::REMOVE_IN_EDGE:
|
||||
encoder.WriteInt(vertex_id);
|
||||
encoder.WriteInt(edge_address.raw());
|
||||
break;
|
||||
case Type::SET_PROPERTY_VERTEX:
|
||||
encoder.WriteInt(vertex_id);
|
||||
encoder.WriteInt(property.Id());
|
||||
encoder.WriteString(property_name);
|
||||
encoder.WriteValue(glue::ToBoltValue(value));
|
||||
break;
|
||||
case Type::SET_PROPERTY_EDGE:
|
||||
encoder.WriteInt(edge_id);
|
||||
encoder.WriteInt(property.Id());
|
||||
encoder.WriteString(property_name);
|
||||
encoder.WriteValue(glue::ToBoltValue(value));
|
||||
break;
|
||||
case Type::ADD_LABEL:
|
||||
case Type::REMOVE_LABEL:
|
||||
encoder.WriteInt(vertex_id);
|
||||
encoder.WriteInt(label.Id());
|
||||
encoder.WriteString(label_name);
|
||||
break;
|
||||
case Type::REMOVE_VERTEX:
|
||||
encoder.WriteInt(vertex_id);
|
||||
break;
|
||||
case Type::REMOVE_EDGE:
|
||||
encoder.WriteInt(edge_id);
|
||||
break;
|
||||
case Type::BUILD_INDEX:
|
||||
encoder.WriteInt(label.Id());
|
||||
encoder.WriteString(label_name);
|
||||
encoder.WriteInt(property.Id());
|
||||
encoder.WriteString(property_name);
|
||||
break;
|
||||
}
|
||||
|
||||
writer.WriteValue(writer.hash());
|
||||
}
|
||||
|
||||
#define DECODE_MEMBER(member, value_f) \
|
||||
if (!decoder.ReadValue(&dv)) return nullopt; \
|
||||
r_val.member = dv.value_f();
|
||||
|
||||
#define DECODE_MEMBER_CAST(member, value_f, type) \
|
||||
if (!decoder.ReadValue(&dv)) return nullopt; \
|
||||
r_val.member = static_cast<type>(dv.value_f());
|
||||
|
||||
std::optional<StateDelta> StateDelta::Decode(
|
||||
HashedFileReader &reader,
|
||||
communication::bolt::Decoder<HashedFileReader> &decoder) {
|
||||
using std::nullopt;
|
||||
|
||||
StateDelta r_val;
|
||||
// The decoded value used as a temporary while decoding.
|
||||
communication::bolt::Value dv;
|
||||
|
||||
try {
|
||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
||||
r_val.type = static_cast<enum StateDelta::Type>(dv.ValueInt());
|
||||
DECODE_MEMBER(transaction_id, ValueInt)
|
||||
|
||||
switch (r_val.type) {
|
||||
case Type::TRANSACTION_BEGIN:
|
||||
case Type::TRANSACTION_COMMIT:
|
||||
case Type::TRANSACTION_ABORT:
|
||||
break;
|
||||
case Type::CREATE_VERTEX:
|
||||
DECODE_MEMBER(vertex_id, ValueInt)
|
||||
DECODE_MEMBER(cypher_id, ValueInt)
|
||||
break;
|
||||
case Type::CREATE_EDGE:
|
||||
DECODE_MEMBER(edge_id, ValueInt)
|
||||
DECODE_MEMBER(cypher_id, ValueInt)
|
||||
DECODE_MEMBER(vertex_from_id, ValueInt)
|
||||
DECODE_MEMBER(vertex_to_id, ValueInt)
|
||||
DECODE_MEMBER_CAST(edge_type, ValueInt, storage::EdgeType)
|
||||
DECODE_MEMBER(edge_type_name, ValueString)
|
||||
break;
|
||||
case Type::ADD_OUT_EDGE:
|
||||
DECODE_MEMBER(vertex_id, ValueInt)
|
||||
DECODE_MEMBER_CAST(vertex_to_address, ValueInt, storage::VertexAddress)
|
||||
DECODE_MEMBER_CAST(edge_address, ValueInt, storage::EdgeAddress)
|
||||
DECODE_MEMBER_CAST(edge_type, ValueInt, storage::EdgeType)
|
||||
break;
|
||||
case Type::REMOVE_OUT_EDGE:
|
||||
DECODE_MEMBER(vertex_id, ValueInt)
|
||||
DECODE_MEMBER_CAST(edge_address, ValueInt, storage::EdgeAddress)
|
||||
break;
|
||||
case Type::ADD_IN_EDGE:
|
||||
DECODE_MEMBER(vertex_id, ValueInt)
|
||||
DECODE_MEMBER_CAST(vertex_from_address, ValueInt,
|
||||
storage::VertexAddress)
|
||||
DECODE_MEMBER_CAST(edge_address, ValueInt, storage::EdgeAddress)
|
||||
DECODE_MEMBER_CAST(edge_type, ValueInt, storage::EdgeType)
|
||||
break;
|
||||
case Type::REMOVE_IN_EDGE:
|
||||
DECODE_MEMBER(vertex_id, ValueInt)
|
||||
DECODE_MEMBER_CAST(edge_address, ValueInt, storage::EdgeAddress)
|
||||
break;
|
||||
case Type::SET_PROPERTY_VERTEX:
|
||||
DECODE_MEMBER(vertex_id, ValueInt)
|
||||
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
|
||||
DECODE_MEMBER(property_name, ValueString)
|
||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
||||
r_val.value = glue::ToPropertyValue(dv);
|
||||
break;
|
||||
case Type::SET_PROPERTY_EDGE:
|
||||
DECODE_MEMBER(edge_id, ValueInt)
|
||||
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
|
||||
DECODE_MEMBER(property_name, ValueString)
|
||||
if (!decoder.ReadValue(&dv)) return nullopt;
|
||||
r_val.value = glue::ToPropertyValue(dv);
|
||||
break;
|
||||
case Type::ADD_LABEL:
|
||||
case Type::REMOVE_LABEL:
|
||||
DECODE_MEMBER(vertex_id, ValueInt)
|
||||
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
|
||||
DECODE_MEMBER(label_name, ValueString)
|
||||
break;
|
||||
case Type::REMOVE_VERTEX:
|
||||
DECODE_MEMBER(vertex_id, ValueInt)
|
||||
break;
|
||||
case Type::REMOVE_EDGE:
|
||||
DECODE_MEMBER(edge_id, ValueInt)
|
||||
break;
|
||||
case Type::BUILD_INDEX:
|
||||
DECODE_MEMBER_CAST(label, ValueInt, storage::Label)
|
||||
DECODE_MEMBER(label_name, ValueString)
|
||||
DECODE_MEMBER_CAST(property, ValueInt, storage::Property)
|
||||
DECODE_MEMBER(property_name, ValueString)
|
||||
break;
|
||||
}
|
||||
|
||||
auto decoder_hash = reader.hash();
|
||||
uint64_t encoded_hash;
|
||||
if (!reader.ReadType(encoded_hash, true)) return nullopt;
|
||||
if (decoder_hash != encoded_hash) return nullopt;
|
||||
|
||||
return r_val;
|
||||
} catch (communication::bolt::ValueException &) {
|
||||
return nullopt;
|
||||
} catch (std::ifstream::failure &) {
|
||||
return nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
#undef DECODE_MEMBER
|
||||
|
||||
void StateDelta::Apply(GraphDbAccessor &dba) const {
|
||||
switch (type) {
|
||||
// Transactional state is not recovered.
|
||||
case Type::TRANSACTION_BEGIN:
|
||||
case Type::TRANSACTION_COMMIT:
|
||||
case Type::TRANSACTION_ABORT:
|
||||
LOG(FATAL) << "Transaction handling not handled in Apply";
|
||||
break;
|
||||
case Type::CREATE_VERTEX:
|
||||
dba.InsertVertex(vertex_id, cypher_id);
|
||||
break;
|
||||
case Type::CREATE_EDGE: {
|
||||
auto from = dba.FindVertex(vertex_from_id, true);
|
||||
auto to = dba.FindVertex(vertex_to_id, true);
|
||||
dba.InsertEdge(from, to, dba.EdgeType(edge_type_name), edge_id,
|
||||
cypher_id);
|
||||
break;
|
||||
}
|
||||
case Type::ADD_OUT_EDGE:
|
||||
case Type::REMOVE_OUT_EDGE:
|
||||
case Type::ADD_IN_EDGE:
|
||||
case Type::REMOVE_IN_EDGE:
|
||||
LOG(FATAL) << "Partial edge creation/deletion not yet supported in Apply";
|
||||
case Type::SET_PROPERTY_VERTEX: {
|
||||
auto vertex = dba.FindVertex(vertex_id, true);
|
||||
vertex.PropsSet(dba.Property(property_name), value);
|
||||
break;
|
||||
}
|
||||
case Type::SET_PROPERTY_EDGE: {
|
||||
auto edge = dba.FindEdge(edge_id, true);
|
||||
edge.PropsSet(dba.Property(property_name), value);
|
||||
break;
|
||||
}
|
||||
case Type::ADD_LABEL: {
|
||||
auto vertex = dba.FindVertex(vertex_id, true);
|
||||
vertex.add_label(dba.Label(label_name));
|
||||
break;
|
||||
}
|
||||
case Type::REMOVE_LABEL: {
|
||||
auto vertex = dba.FindVertex(vertex_id, true);
|
||||
vertex.remove_label(dba.Label(label_name));
|
||||
break;
|
||||
}
|
||||
case Type::REMOVE_VERTEX: {
|
||||
auto vertex = dba.FindVertex(vertex_id, true);
|
||||
dba.DetachRemoveVertex(vertex);
|
||||
break;
|
||||
}
|
||||
case Type::REMOVE_EDGE: {
|
||||
auto edge = dba.FindEdge(edge_id, true);
|
||||
dba.RemoveEdge(edge);
|
||||
break;
|
||||
}
|
||||
case Type::BUILD_INDEX: {
|
||||
LOG(FATAL) << "Index handling not handled in Apply";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}; // namespace database
|
@ -1,151 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include "communication/bolt/v1/decoder/decoder.hpp"
|
||||
#include "communication/bolt/v1/encoder/base_encoder.hpp"
|
||||
#include "durability/hashed_file_reader.hpp"
|
||||
#include "durability/hashed_file_writer.hpp"
|
||||
#include "storage/common/types/property_value.hpp"
|
||||
#include "storage/common/types/types.hpp"
|
||||
#include "storage/distributed/address_types.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "utils/typeinfo.hpp"
|
||||
cpp<#
|
||||
|
||||
(lcp:namespace database)
|
||||
|
||||
#>cpp
|
||||
class GraphDbAccessor;
|
||||
cpp<#
|
||||
|
||||
(lcp:define-struct state-delta ()
|
||||
(
|
||||
;; Members valid for every delta.
|
||||
(type "Type")
|
||||
(transaction-id "::tx::TransactionId")
|
||||
;; Members valid only for some deltas, see StateDelta::Type comments above.
|
||||
;; TODO: when preparing the WAL for distributed, most likely remove Gids and
|
||||
;; only keep addresses.
|
||||
(vertex-id "::gid::Gid")
|
||||
(edge-id "::gid::Gid")
|
||||
(cypher-id :int64_t)
|
||||
(edge-address "::storage::EdgeAddress")
|
||||
(vertex-from-id "::gid::Gid")
|
||||
(vertex-from-address "::storage::VertexAddress")
|
||||
(vertex-to-id "::gid::Gid")
|
||||
(vertex-to-address "::storage::VertexAddress")
|
||||
(edge-type "::storage::EdgeType")
|
||||
(edge-type-name "std::string")
|
||||
(property "::storage::Property")
|
||||
(property-name "std::string")
|
||||
(value "PropertyValue" :initval "PropertyValue::Null")
|
||||
(label "::storage::Label")
|
||||
(label-name "std::string")
|
||||
(check-empty :bool))
|
||||
(:documentation
|
||||
"Describes single change to the database state. Used for durability (WAL) and
|
||||
for distributed remote storage changes.
|
||||
|
||||
Labels, Properties and EdgeTypes are stored both as values (integers) and
|
||||
strings (their names). The values are used when applying deltas in a running
|
||||
database. Names are used when recovering the database as it's not guaranteed
|
||||
that after recovery the old name<->value mapping will be preserved.
|
||||
|
||||
TODO: ensure the mapping is preserved after recovery and don't save strings
|
||||
in StateDeltas.")
|
||||
(:public
|
||||
(lcp:define-enum type
|
||||
(transaction-begin
|
||||
transaction-commit
|
||||
transaction-abort
|
||||
create-vertex ;; vertex_id
|
||||
create-edge ;; edge_id, from_vertex_id, to_vertex_id, edge_type, edge_type_name
|
||||
add-out-edge ;; vertex_id, edge_address, vertex_to_address, edge_type
|
||||
remove-out-edge ;; vertex_id, edge_address
|
||||
add-in-edge ;; vertex_id, edge_address, vertex_from_address, edge_type
|
||||
remove-in-edge ;; vertex_id, edge_address
|
||||
set-property-vertex ;; vertex_id, property, property_name, property_value
|
||||
set-property-edge ;; edge_id, property, property_name, property_value
|
||||
;; remove property is done by setting a PropertyValue::Null
|
||||
add-label ;; vertex_id, label, label_name
|
||||
remove-label ;; vertex_id, label, label_name
|
||||
remove-vertex ;; vertex_id, check_empty
|
||||
remove-edge ;; edge_id
|
||||
build-index ;; label, label_name, property, property_name
|
||||
)
|
||||
(:documentation
|
||||
"Defines StateDelta type. For each type the comment indicates which values
|
||||
need to be stored. All deltas have the transaction_id member, so that's
|
||||
omitted in the comment.")
|
||||
(:serialize))
|
||||
#>cpp
|
||||
StateDelta() = default;
|
||||
StateDelta(const enum Type &type, tx::TransactionId tx_id)
|
||||
: type(type), transaction_id(tx_id) {}
|
||||
|
||||
/** Attempts to decode a StateDelta from the given decoder. Returns the
|
||||
* decoded value if successful, otherwise returns nullopt. */
|
||||
static std::optional<StateDelta> Decode(
|
||||
HashedFileReader &reader,
|
||||
communication::bolt::Decoder<HashedFileReader> &decoder);
|
||||
|
||||
/** Encodes the delta using primitive encoder, and writes out the new hash
|
||||
* with delta to the writer */
|
||||
void Encode(
|
||||
HashedFileWriter &writer,
|
||||
communication::bolt::BaseEncoder<HashedFileWriter> &encoder) const;
|
||||
|
||||
static StateDelta TxBegin(tx::TransactionId tx_id);
|
||||
static StateDelta TxCommit(tx::TransactionId tx_id);
|
||||
static StateDelta TxAbort(tx::TransactionId tx_id);
|
||||
static StateDelta CreateVertex(tx::TransactionId tx_id,
|
||||
gid::Gid vertex_id,
|
||||
int64_t cypher_id);
|
||||
static StateDelta CreateEdge(tx::TransactionId tx_id, gid::Gid edge_id,
|
||||
int64_t cypher_id,
|
||||
gid::Gid vertex_from_id,
|
||||
gid::Gid vertex_to_id,
|
||||
storage::EdgeType edge_type,
|
||||
const std::string &edge_type_name);
|
||||
static StateDelta AddOutEdge(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::VertexAddress vertex_to_address,
|
||||
storage::EdgeAddress edge_address,
|
||||
storage::EdgeType edge_type);
|
||||
static StateDelta RemoveOutEdge(tx::TransactionId tx_id,
|
||||
gid::Gid vertex_id,
|
||||
storage::EdgeAddress edge_address);
|
||||
static StateDelta AddInEdge(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::VertexAddress vertex_from_address,
|
||||
storage::EdgeAddress edge_address,
|
||||
storage::EdgeType edge_type);
|
||||
static StateDelta RemoveInEdge(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::EdgeAddress edge_address);
|
||||
static StateDelta PropsSetVertex(tx::TransactionId tx_id,
|
||||
gid::Gid vertex_id,
|
||||
storage::Property property,
|
||||
const std::string &property_name,
|
||||
const PropertyValue &value);
|
||||
static StateDelta PropsSetEdge(tx::TransactionId tx_id, gid::Gid edge_id,
|
||||
storage::Property property,
|
||||
const std::string &property_name,
|
||||
const PropertyValue &value);
|
||||
static StateDelta AddLabel(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::Label label,
|
||||
const std::string &label_name);
|
||||
static StateDelta RemoveLabel(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
storage::Label label,
|
||||
const std::string &label_name);
|
||||
static StateDelta RemoveVertex(tx::TransactionId tx_id, gid::Gid vertex_id,
|
||||
bool check_empty);
|
||||
static StateDelta RemoveEdge(tx::TransactionId tx_id, gid::Gid edge_id);
|
||||
static StateDelta BuildIndex(tx::TransactionId tx_id, storage::Label label,
|
||||
const std::string &label_name,
|
||||
storage::Property property,
|
||||
const std::string &property_name);
|
||||
|
||||
/// Applies CRUD delta to database accessor. Fails on other types of deltas
|
||||
void Apply(GraphDbAccessor &dba) const;
|
||||
cpp<#)
|
||||
(:serialize (:slk)))
|
||||
|
||||
(lcp:pop-namespace) ;; database
|
@ -1,52 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
///
|
||||
///
|
||||
/// IMPORTANT: Please update this file for every snapshot format change!!!
|
||||
/// TODO (buda): This is not rock solid.
|
||||
///
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
|
||||
namespace durability {
|
||||
|
||||
constexpr std::array<uint8_t, 4> kSnapshotMagic{{'M', 'G', 's', 'n'}};
|
||||
constexpr std::array<uint8_t, 4> kWalMagic{{'M', 'G', 'w', 'l'}};
|
||||
|
||||
// The current default version of snapshot and WAL encoding / decoding.
|
||||
constexpr int64_t kVersion{6};
|
||||
|
||||
// Snapshot format (version 6):
|
||||
// 1) Magic number + snapshot version
|
||||
// 2) Distributed worker ID
|
||||
//
|
||||
// The following two entries indicate the starting points for generating new
|
||||
// vertex/edge IDs in the DB. They are important when there are vertices/edges
|
||||
// that were moved to another worker (in distributed Memgraph).
|
||||
// 3) Vertex generator ID
|
||||
// 4) Edge generator ID
|
||||
//
|
||||
// The following two entries are required when recovering from snapshot combined
|
||||
// with WAL to determine record visibility.
|
||||
// 5) Transactional ID of the snapshooter
|
||||
// 6) Transactional snapshot of the snapshooter
|
||||
//
|
||||
// 7) A list of label+property indices.
|
||||
//
|
||||
// We must inline edges with nodes because some edges might be stored on other
|
||||
// worker (edges are always stored only on the worker of the edge source).
|
||||
// 8) Bolt encoded nodes. Each node is written in the following format:
|
||||
// * gid, labels, properties
|
||||
// * cypher_id
|
||||
// * inlined edges (edge address, other endpoint address and edge type)
|
||||
// 9) Bolt encoded edges. Each edge is written in the following format:
|
||||
// * gid
|
||||
// * from, to
|
||||
// * edge_type
|
||||
// * properties
|
||||
// * cypher_id
|
||||
//
|
||||
// 10) Snapshot summary (number of nodes, number of edges, hash)
|
||||
|
||||
} // namespace durability
|
@ -1,161 +0,0 @@
|
||||
#include "durability/distributed/wal.hpp"
|
||||
|
||||
#include "durability/distributed/paths.hpp"
|
||||
#include "durability/distributed/version.hpp"
|
||||
#include "utils/file.hpp"
|
||||
#include "utils/flag_validation.hpp"
|
||||
|
||||
DEFINE_HIDDEN_int32(
|
||||
wal_flush_interval_millis, 2,
|
||||
"Interval between two write-ahead log flushes, in milliseconds.");
|
||||
|
||||
DEFINE_HIDDEN_int32(
|
||||
wal_rotate_deltas_count, 10000,
|
||||
"How many write-ahead deltas should be stored in a single WAL file "
|
||||
"before rotating it.");
|
||||
|
||||
DEFINE_VALIDATED_HIDDEN_int32(wal_buffer_size, 4096,
|
||||
"Write-ahead log buffer size.",
|
||||
FLAG_IN_RANGE(1, 1 << 30));
|
||||
|
||||
namespace durability {
|
||||
WriteAheadLog::WriteAheadLog(int worker_id,
|
||||
const std::filesystem::path &durability_dir,
|
||||
bool durability_enabled, bool synchronous_commit)
|
||||
: deltas_{FLAGS_wal_buffer_size},
|
||||
wal_file_{worker_id, durability_dir},
|
||||
durability_enabled_(durability_enabled),
|
||||
synchronous_commit_(synchronous_commit) {
|
||||
if (durability_enabled_) {
|
||||
utils::EnsureDirOrDie(durability_dir);
|
||||
}
|
||||
}
|
||||
|
||||
WriteAheadLog::~WriteAheadLog() {
|
||||
if (durability_enabled_) {
|
||||
if (!synchronous_commit_) scheduler_.Stop();
|
||||
wal_file_.Flush(deltas_);
|
||||
}
|
||||
}
|
||||
|
||||
WriteAheadLog::WalFile::WalFile(int worker_id,
|
||||
const std::filesystem::path &durability_dir)
|
||||
: worker_id_(worker_id), wal_dir_{durability_dir / kWalDir} {}
|
||||
|
||||
WriteAheadLog::WalFile::~WalFile() {
|
||||
if (!current_wal_file_.empty()) writer_.Close();
|
||||
}
|
||||
|
||||
void WriteAheadLog::WalFile::Init() {
|
||||
if (!utils::EnsureDir(wal_dir_)) {
|
||||
LOG(ERROR) << "Can't write to WAL directory: " << wal_dir_;
|
||||
current_wal_file_ = std::filesystem::path();
|
||||
} else {
|
||||
current_wal_file_ = WalFilenameForTransactionId(wal_dir_, worker_id_);
|
||||
// TODO: Fix error handling, the encoder_ returns `true` or `false`.
|
||||
try {
|
||||
writer_.Open(current_wal_file_);
|
||||
encoder_.WriteRAW(durability::kWalMagic.data(),
|
||||
durability::kWalMagic.size());
|
||||
encoder_.WriteInt(durability::kVersion);
|
||||
writer_.Flush();
|
||||
} catch (std::ios_base::failure &) {
|
||||
LOG(ERROR) << "Failed to open write-ahead log file: "
|
||||
<< current_wal_file_;
|
||||
current_wal_file_ = std::filesystem::path();
|
||||
}
|
||||
}
|
||||
latest_tx_ = 0;
|
||||
current_wal_file_delta_count_ = 0;
|
||||
}
|
||||
|
||||
void WriteAheadLog::WalFile::Flush(RingBuffer<database::StateDelta> &buffer) {
|
||||
std::lock_guard<std::mutex> flush_lock(flush_mutex_);
|
||||
if (current_wal_file_.empty()) {
|
||||
LOG(ERROR) << "Write-ahead log file uninitialized, discarding data.";
|
||||
buffer.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
auto delta = buffer.pop();
|
||||
if (!delta) break;
|
||||
latest_tx_ = std::max(latest_tx_, delta->transaction_id);
|
||||
delta->Encode(writer_, encoder_);
|
||||
writer_.Flush();
|
||||
if (++current_wal_file_delta_count_ >= FLAGS_wal_rotate_deltas_count)
|
||||
RotateFile();
|
||||
}
|
||||
writer_.Flush();
|
||||
} catch (std::ios_base::failure &) {
|
||||
LOG(ERROR) << "Failed to write to write-ahead log, discarding data.";
|
||||
buffer.clear();
|
||||
return;
|
||||
} catch (std::filesystem::filesystem_error &) {
|
||||
LOG(ERROR) << "Failed to rotate write-ahead log.";
|
||||
buffer.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void WriteAheadLog::WalFile::RotateFile() {
|
||||
writer_.Flush();
|
||||
writer_.Close();
|
||||
std::filesystem::rename(
|
||||
current_wal_file_,
|
||||
WalFilenameForTransactionId(wal_dir_, worker_id_, latest_tx_));
|
||||
Init();
|
||||
}
|
||||
|
||||
void WriteAheadLog::Init() {
|
||||
if (durability_enabled_) {
|
||||
enabled_ = true;
|
||||
wal_file_.Init();
|
||||
if (!synchronous_commit_) {
|
||||
scheduler_.Run("WAL",
|
||||
std::chrono::milliseconds(FLAGS_wal_flush_interval_millis),
|
||||
[this]() { wal_file_.Flush(deltas_); });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WriteAheadLog::Emplace(const database::StateDelta &delta) {
|
||||
if (durability_enabled_ && enabled_) {
|
||||
deltas_.emplace(delta);
|
||||
if (synchronous_commit_ && IsStateDeltaTransactionEnd(delta)) {
|
||||
wal_file_.Flush(deltas_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool WriteAheadLog::IsStateDeltaTransactionEnd(
|
||||
const database::StateDelta &delta) {
|
||||
switch (delta.type) {
|
||||
case database::StateDelta::Type::TRANSACTION_COMMIT:
|
||||
case database::StateDelta::Type::TRANSACTION_ABORT:
|
||||
return true;
|
||||
case database::StateDelta::Type::TRANSACTION_BEGIN:
|
||||
case database::StateDelta::Type::CREATE_VERTEX:
|
||||
case database::StateDelta::Type::CREATE_EDGE:
|
||||
case database::StateDelta::Type::ADD_OUT_EDGE:
|
||||
case database::StateDelta::Type::REMOVE_OUT_EDGE:
|
||||
case database::StateDelta::Type::ADD_IN_EDGE:
|
||||
case database::StateDelta::Type::REMOVE_IN_EDGE:
|
||||
case database::StateDelta::Type::SET_PROPERTY_VERTEX:
|
||||
case database::StateDelta::Type::SET_PROPERTY_EDGE:
|
||||
case database::StateDelta::Type::ADD_LABEL:
|
||||
case database::StateDelta::Type::REMOVE_LABEL:
|
||||
case database::StateDelta::Type::REMOVE_VERTEX:
|
||||
case database::StateDelta::Type::REMOVE_EDGE:
|
||||
case database::StateDelta::Type::BUILD_INDEX:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void WriteAheadLog::Flush() {
|
||||
if (enabled_) {
|
||||
wal_file_.Flush(deltas_);
|
||||
}
|
||||
}
|
||||
} // namespace durability
|
@ -1,99 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include "communication/bolt/v1/encoder/base_encoder.hpp"
|
||||
#include "data_structures/ring_buffer.hpp"
|
||||
#include "durability/distributed/state_delta.hpp"
|
||||
#include "storage/common/types/property_value.hpp"
|
||||
#include "storage/common/types/types.hpp"
|
||||
#include "storage/distributed/gid.hpp"
|
||||
#include "transactions/type.hpp"
|
||||
#include "utils/scheduler.hpp"
|
||||
|
||||
namespace durability {
|
||||
|
||||
/// A database StateDelta log for durability. Buffers and periodically
|
||||
/// serializes small-granulation database deltas (StateDelta).
|
||||
///
|
||||
/// The order is not deterministic in a multithreaded scenario (multiple DB
|
||||
/// transactions). This is fine, the recovery process should be immune to this
|
||||
/// indeterminism.
|
||||
class WriteAheadLog {
|
||||
public:
|
||||
WriteAheadLog(int worker_id, const std::filesystem::path &durability_dir,
|
||||
bool durability_enabled, bool synchronous_commit);
|
||||
~WriteAheadLog();
|
||||
|
||||
/// Initializes the WAL. Called at the end of GraphDb construction, after
|
||||
/// (optional) recovery. Also responsible for initializing the wal_file.
|
||||
void Init();
|
||||
|
||||
/// Emplaces the given DeltaState onto the buffer, if the WAL is enabled.
|
||||
/// If the WAL is configured to work in synchronous commit mode, emplace will
|
||||
/// flush the buffers if a delta represents a transaction end.
|
||||
void Emplace(const database::StateDelta &delta);
|
||||
|
||||
/// Flushes every delta currently in the ring buffer.
|
||||
/// This method should only be called from tests.
|
||||
void Flush();
|
||||
|
||||
private:
|
||||
/// Groups the logic of WAL file handling (flushing, naming, rotating)
|
||||
class WalFile {
|
||||
public:
|
||||
WalFile(int worker_id, const std::filesystem::path &wal__dir);
|
||||
~WalFile();
|
||||
|
||||
/// Initializes the WAL file. Must be called before first flush. Can be
|
||||
/// called after Flush() to re-initialize stuff.
|
||||
void Init();
|
||||
|
||||
/// Flushes all the deltas in the buffer to the WAL file. If necessary
|
||||
/// rotates the file.
|
||||
void Flush(RingBuffer<database::StateDelta> &buffer);
|
||||
|
||||
private:
|
||||
/// Mutex used for flushing wal data
|
||||
std::mutex flush_mutex_;
|
||||
int worker_id_;
|
||||
const std::filesystem::path wal_dir_;
|
||||
HashedFileWriter writer_;
|
||||
communication::bolt::BaseEncoder<HashedFileWriter> encoder_{writer_};
|
||||
|
||||
/// The file to which the WAL flushes data. The path is fixed, the file gets
|
||||
/// moved when the WAL gets rotated.
|
||||
std::filesystem::path current_wal_file_;
|
||||
|
||||
/// Number of deltas in the current wal file.
|
||||
int current_wal_file_delta_count_{0};
|
||||
|
||||
/// The latest transaction whose delta is recorded in the current WAL file.
|
||||
/// Zero indicates that no deltas have so far been written to the current
|
||||
/// WAL file.
|
||||
tx::TransactionId latest_tx_{0};
|
||||
|
||||
void RotateFile();
|
||||
};
|
||||
|
||||
RingBuffer<database::StateDelta> deltas_;
|
||||
utils::Scheduler scheduler_;
|
||||
WalFile wal_file_;
|
||||
|
||||
/// Used for disabling the durability feature of the DB.
|
||||
bool durability_enabled_{false};
|
||||
/// Used for disabling the WAL during DB recovery.
|
||||
bool enabled_{false};
|
||||
/// Should every WAL write be synced with the underlying storage.
|
||||
bool synchronous_commit_{false};
|
||||
|
||||
/// Checks whether the given state delta represents a transaction end,
|
||||
/// TRANSACTION_COMMIT and TRANSACTION_ABORT.
|
||||
bool IsStateDeltaTransactionEnd(const database::StateDelta &delta);
|
||||
};
|
||||
} // namespace durability
|
@ -1,174 +0,0 @@
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <exception>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <thread>
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include "communication/server.hpp"
|
||||
#include "database/distributed/distributed_graph_db.hpp"
|
||||
#include "integrations/kafka/exceptions.hpp"
|
||||
#include "integrations/kafka/streams.hpp"
|
||||
#include "memgraph_init.hpp"
|
||||
#include "query/distributed/interpreter.hpp"
|
||||
#include "query/exceptions.hpp"
|
||||
#include "telemetry/telemetry.hpp"
|
||||
#include "utils/flag_validation.hpp"
|
||||
|
||||
// General purpose flags.
|
||||
DEFINE_string(interface, "0.0.0.0",
|
||||
"Communication interface on which to listen.");
|
||||
DEFINE_VALIDATED_int32(port, 7687, "Communication port on which to listen.",
|
||||
FLAG_IN_RANGE(0, std::numeric_limits<uint16_t>::max()));
|
||||
DEFINE_VALIDATED_int32(num_workers,
|
||||
std::max(std::thread::hardware_concurrency(), 1U),
|
||||
"Number of workers (Bolt)", FLAG_IN_RANGE(1, INT32_MAX));
|
||||
DEFINE_VALIDATED_int32(session_inactivity_timeout, 1800,
|
||||
"Time in seconds after which inactive sessions will be "
|
||||
"closed.",
|
||||
FLAG_IN_RANGE(1, INT32_MAX));
|
||||
DEFINE_string(cert_file, "", "Certificate file to use.");
|
||||
DEFINE_string(key_file, "", "Key file to use.");
|
||||
|
||||
DEFINE_bool(telemetry_enabled, false,
|
||||
"Set to true to enable telemetry. We collect information about the "
|
||||
"running system (CPU and memory information) and information about "
|
||||
"the database runtime (vertex and edge counts and resource usage) "
|
||||
"to allow for easier improvement of the product.");
|
||||
|
||||
// Audit logging flags.
|
||||
DEFINE_bool(audit_enabled, false, "Set to true to enable audit logging.");
|
||||
DEFINE_VALIDATED_int32(audit_buffer_size, audit::kBufferSizeDefault,
|
||||
"Maximum number of items in the audit log buffer.",
|
||||
FLAG_IN_RANGE(1, INT32_MAX));
|
||||
DEFINE_VALIDATED_int32(
|
||||
audit_buffer_flush_interval_ms, audit::kBufferFlushIntervalMillisDefault,
|
||||
"Interval (in milliseconds) used for flushing the audit log buffer.",
|
||||
FLAG_IN_RANGE(10, INT32_MAX));
|
||||
|
||||
using ServerT = communication::Server<BoltSession, SessionData>;
|
||||
using communication::ServerContext;
|
||||
|
||||
// Distributed flags.
|
||||
DEFINE_HIDDEN_bool(
|
||||
master, false,
|
||||
"If this Memgraph server is the master in a distributed deployment.");
|
||||
DEFINE_HIDDEN_bool(
|
||||
worker, false,
|
||||
"If this Memgraph server is a worker in a distributed deployment.");
|
||||
DECLARE_int32(worker_id);
|
||||
|
||||
void MasterMain() {
|
||||
google::SetUsageMessage("Memgraph distributed master");
|
||||
|
||||
auto durability_directory = std::filesystem::path(FLAGS_durability_directory);
|
||||
|
||||
auth::Init();
|
||||
auth::Auth auth{durability_directory / "auth"};
|
||||
|
||||
audit::Log audit_log{durability_directory / "audit", FLAGS_audit_buffer_size,
|
||||
FLAGS_audit_buffer_flush_interval_ms};
|
||||
if (FLAGS_audit_enabled) {
|
||||
audit_log.Start();
|
||||
}
|
||||
CHECK(utils::SignalHandler::RegisterHandler(
|
||||
utils::Signal::User2, [&audit_log]() { audit_log.ReopenLog(); }))
|
||||
<< "Unable to register SIGUSR2 handler!";
|
||||
|
||||
database::Master db;
|
||||
query::DistributedInterpreter interpreter(&db);
|
||||
SessionData session_data{&db, &interpreter, &auth, &audit_log};
|
||||
|
||||
integrations::kafka::Streams kafka_streams{
|
||||
durability_directory / "streams",
|
||||
[&session_data](
|
||||
const std::string &query,
|
||||
const std::map<std::string, communication::bolt::Value> ¶ms) {
|
||||
KafkaStreamWriter(session_data, query, params);
|
||||
}};
|
||||
|
||||
try {
|
||||
// Recover possible streams.
|
||||
kafka_streams.Recover();
|
||||
} catch (const integrations::kafka::KafkaStreamException &e) {
|
||||
LOG(ERROR) << e.what();
|
||||
}
|
||||
|
||||
session_data.interpreter->auth_ = &auth;
|
||||
session_data.interpreter->kafka_streams_ = &kafka_streams;
|
||||
|
||||
ServerContext context;
|
||||
std::string service_name = "Bolt";
|
||||
if (FLAGS_key_file != "" && FLAGS_cert_file != "") {
|
||||
context = ServerContext(FLAGS_key_file, FLAGS_cert_file);
|
||||
service_name = "BoltS";
|
||||
}
|
||||
|
||||
ServerT server({FLAGS_interface, static_cast<uint16_t>(FLAGS_port)},
|
||||
&session_data, &context, FLAGS_session_inactivity_timeout,
|
||||
service_name, FLAGS_num_workers);
|
||||
|
||||
// Handler for regular termination signals
|
||||
auto shutdown = [&db] {
|
||||
// We call the shutdown method on the worker database so that we exit
|
||||
// cleanly.
|
||||
db.Shutdown();
|
||||
};
|
||||
|
||||
InitSignalHandlers(shutdown);
|
||||
|
||||
// Start the database.
|
||||
db.Start();
|
||||
|
||||
// Start the Bolt server.
|
||||
CHECK(server.Start()) << "Couldn't start the Bolt server!";
|
||||
|
||||
// The return code of `AwaitShutdown` is ignored because we want the database
|
||||
// to exit cleanly no matter what.
|
||||
db.AwaitShutdown([&server] {
|
||||
// Server needs to be shutdown first and then the database. This prevents a
|
||||
// race condition when a transaction is accepted during server shutdown.
|
||||
server.Shutdown();
|
||||
server.AwaitShutdown();
|
||||
});
|
||||
}
|
||||
|
||||
void WorkerMain() {
|
||||
google::SetUsageMessage("Memgraph distributed worker");
|
||||
database::Worker db;
|
||||
|
||||
// Handler for regular termination signals
|
||||
auto shutdown = [&db] {
|
||||
// We call the shutdown method on the worker database so that we exit
|
||||
// cleanly.
|
||||
db.Shutdown();
|
||||
};
|
||||
|
||||
InitSignalHandlers(shutdown);
|
||||
|
||||
// Start the database.
|
||||
db.Start();
|
||||
|
||||
// The return code of `AwaitShutdown` is ignored because we want the database
|
||||
// to exit cleanly no matter what.
|
||||
db.AwaitShutdown();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
auto memgraph_main = [&]() {
|
||||
CHECK(!(FLAGS_master && FLAGS_worker))
|
||||
<< "Can't run Memgraph as worker and master at the same time!";
|
||||
CHECK(FLAGS_master || FLAGS_worker)
|
||||
<< "You must specify that Memgraph should be either a master or a worker!";
|
||||
if (FLAGS_master)
|
||||
MasterMain();
|
||||
else
|
||||
WorkerMain();
|
||||
};
|
||||
|
||||
return WithInit(argc, argv, memgraph_main);
|
||||
}
|
@ -124,21 +124,12 @@ void KafkaStreamWriter(
|
||||
for (const auto &kv : params)
|
||||
params_pv.emplace(kv.first, glue::ToPropertyValue(kv.second));
|
||||
try {
|
||||
#ifndef MG_DISTRIBUTED
|
||||
(*session_data.interpreter)(query, dba, params_pv, false).PullAll(stream);
|
||||
dba.Commit();
|
||||
#else
|
||||
(*session_data.interpreter)(query, *dba, params_pv, false).PullAll(stream);
|
||||
dba->Commit();
|
||||
#endif
|
||||
} catch (const utils::BasicException &e) {
|
||||
LOG(WARNING) << "[Kafka] query execution failed with an exception: "
|
||||
<< e.what();
|
||||
#ifndef MG_DISTRIBUTED
|
||||
dba.Abort();
|
||||
#else
|
||||
dba->Abort();
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,56 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "query/frontend/ast/ast.hpp"
|
||||
#include "query/distributed/serialization.hpp"
|
||||
#include "storage/distributed/rpc/serialization.hpp"
|
||||
cpp<#
|
||||
|
||||
(load "query/frontend/ast/ast.lcp")
|
||||
|
||||
(lcp:namespace query)
|
||||
|
||||
#>cpp
|
||||
/// Primary function for saving Ast nodes via SLK.
|
||||
void SaveAstPointer(const Tree *ast, slk::Builder *builder);
|
||||
|
||||
Tree *Load(AstStorage *ast, slk::Reader *reader);
|
||||
|
||||
/// Primary function for loading Ast nodes via SLK.
|
||||
template <class TAst>
|
||||
TAst *LoadAstPointer(AstStorage *ast, slk::Reader *reader) {
|
||||
static_assert(std::is_base_of<query::Tree, TAst>::value);
|
||||
bool has_ptr = false;
|
||||
slk::Load(&has_ptr, reader);
|
||||
if (!has_ptr) {
|
||||
return nullptr;
|
||||
}
|
||||
auto *ret = utils::Downcast<TAst>(Load(ast, reader));
|
||||
if (!ret) {
|
||||
throw slk::SlkDecodeException("Loading unknown Ast node type");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
cpp<#
|
||||
|
||||
(lcp:in-impl
|
||||
#>cpp
|
||||
void SaveAstPointer(const Tree *ast, slk::Builder *builder) {
|
||||
slk::Save(static_cast<bool>(ast), builder);
|
||||
if (!ast) {
|
||||
return;
|
||||
}
|
||||
slk::Save(*ast, builder);
|
||||
}
|
||||
|
||||
Tree *Load(AstStorage *ast, slk::Reader *reader) {
|
||||
std::unique_ptr<Tree> root;
|
||||
slk::ConstructAndLoad(&root, reader, ast);
|
||||
ast->storage_.emplace_back(std::move(root));
|
||||
return ast->storage_.back().get();
|
||||
}
|
||||
cpp<#)
|
||||
|
||||
(lcp:pop-namespace) ;; namespace query
|
@ -1,10 +0,0 @@
|
||||
#>cpp
|
||||
#pragma once
|
||||
|
||||
#include "query/frontend/semantic/symbol.hpp"
|
||||
cpp<#
|
||||
|
||||
;; Generate serialization code
|
||||
;; TODO: This should be merged with query/distributed/serialization
|
||||
(load "query/frontend/semantic/symbol.lcp")
|
||||
|
@ -1,167 +0,0 @@
|
||||
#include "query/distributed/interpreter.hpp"
|
||||
|
||||
#include "database/distributed/distributed_graph_db.hpp"
|
||||
#include "distributed/plan_dispatcher.hpp"
|
||||
#include "query/frontend/semantic/symbol_generator.hpp"
|
||||
#include "query/distributed/plan/planner.hpp"
|
||||
#include "query/distributed/plan/pretty_print.hpp"
|
||||
#include "query/plan/planner.hpp"
|
||||
#include "query/plan/rule_based_planner.hpp"
|
||||
#include "query/plan/vertex_count_cache.hpp"
|
||||
|
||||
namespace query {
|
||||
|
||||
namespace {
|
||||
|
||||
class DistributedLogicalPlan final : public LogicalPlan {
|
||||
public:
|
||||
DistributedLogicalPlan(plan::DistributedPlan plan, double cost,
|
||||
distributed::PlanDispatcher *plan_dispatcher)
|
||||
: plan_(std::move(plan)), plan_dispatcher_(plan_dispatcher), cost_(cost) {
|
||||
CHECK(plan_dispatcher_);
|
||||
for (const auto &plan_pair : plan_.worker_plans) {
|
||||
const auto &plan_id = plan_pair.first;
|
||||
const auto &worker_plan = plan_pair.second;
|
||||
plan_dispatcher_->DispatchPlan(plan_id, worker_plan, plan_.symbol_table);
|
||||
}
|
||||
}
|
||||
|
||||
~DistributedLogicalPlan() {
|
||||
for (const auto &plan_pair : plan_.worker_plans) {
|
||||
const auto &plan_id = plan_pair.first;
|
||||
try {
|
||||
plan_dispatcher_->RemovePlan(plan_id);
|
||||
} catch (const communication::rpc::RpcFailedException &) {
|
||||
// We ignore RPC exceptions here because the other side can be possibly
|
||||
// shutting down. TODO: If that is not the case then something is really
|
||||
// wrong with the cluster!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const plan::LogicalOperator &GetRoot() const override {
|
||||
return *plan_.master_plan;
|
||||
}
|
||||
double GetCost() const override { return cost_; }
|
||||
const SymbolTable &GetSymbolTable() const override {
|
||||
return plan_.symbol_table;
|
||||
}
|
||||
const AstStorage &GetAstStorage() const override {
|
||||
return plan_.ast_storage;
|
||||
}
|
||||
|
||||
private:
|
||||
plan::DistributedPlan plan_;
|
||||
distributed::PlanDispatcher *plan_dispatcher_{nullptr};
|
||||
double cost_;
|
||||
};
|
||||
|
||||
class DistributedPostProcessor final {
|
||||
// Original plan before rewrite, needed only for temporary cost estimation
|
||||
// implementation.
|
||||
std::unique_ptr<plan::LogicalOperator> original_plan_;
|
||||
std::atomic<int64_t> *next_plan_id_;
|
||||
Parameters parameters_;
|
||||
|
||||
public:
|
||||
using ProcessedPlan = plan::DistributedPlan;
|
||||
|
||||
DistributedPostProcessor(const Parameters ¶meters,
|
||||
std::atomic<int64_t> *next_plan_id)
|
||||
: next_plan_id_(next_plan_id), parameters_(parameters) {}
|
||||
|
||||
template <class TPlanningContext>
|
||||
plan::DistributedPlan Rewrite(std::unique_ptr<plan::LogicalOperator> plan,
|
||||
TPlanningContext *context) {
|
||||
plan::PostProcessor post_processor(parameters_);
|
||||
original_plan_ = post_processor.Rewrite(std::move(plan), context);
|
||||
const auto &property_names = context->ast_storage->properties_;
|
||||
std::vector<storage::Property> properties_by_ix;
|
||||
properties_by_ix.reserve(property_names.size());
|
||||
for (const auto &name : property_names) {
|
||||
properties_by_ix.push_back(context->db->Property(name));
|
||||
}
|
||||
return MakeDistributedPlan(*context->ast_storage, *original_plan_,
|
||||
*context->symbol_table, *next_plan_id_,
|
||||
properties_by_ix);
|
||||
}
|
||||
|
||||
template <class TVertexCounts>
|
||||
double EstimatePlanCost(const plan::DistributedPlan &plan,
|
||||
TVertexCounts *vertex_counts) {
|
||||
// TODO: Make cost estimation work with distributed plan.
|
||||
return ::query::plan::EstimatePlanCost(vertex_counts, parameters_,
|
||||
*original_plan_);
|
||||
}
|
||||
|
||||
template <class TPlanningContext>
|
||||
plan::DistributedPlan MergeWithCombinator(plan::DistributedPlan curr_plan,
|
||||
plan::DistributedPlan last_plan,
|
||||
const Tree &combinator,
|
||||
TPlanningContext *context) {
|
||||
throw utils::NotYetImplemented("query combinator");
|
||||
}
|
||||
|
||||
template <class TPlanningContext>
|
||||
plan::DistributedPlan MakeDistinct(plan::DistributedPlan last_op,
|
||||
TPlanningContext *context) {
|
||||
throw utils::NotYetImplemented("query combinator");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
DistributedInterpreter::DistributedInterpreter(database::Master *db)
|
||||
: plan_dispatcher_(&db->plan_dispatcher()) {}
|
||||
|
||||
std::unique_ptr<LogicalPlan> DistributedInterpreter::MakeLogicalPlan(
|
||||
CypherQuery *query, AstStorage ast_storage, const Parameters ¶meters,
|
||||
database::GraphDbAccessor *db_accessor) {
|
||||
auto vertex_counts = plan::MakeVertexCountCache(db_accessor);
|
||||
auto symbol_table = MakeSymbolTable(query);
|
||||
auto planning_context = plan::MakePlanningContext(&ast_storage, &symbol_table,
|
||||
query, &vertex_counts);
|
||||
DistributedPostProcessor distributed_post_processor(parameters,
|
||||
&next_plan_id_);
|
||||
plan::DistributedPlan plan;
|
||||
double cost;
|
||||
std::tie(plan, cost) = plan::MakeLogicalPlan(
|
||||
&planning_context, &distributed_post_processor, FLAGS_query_cost_planner);
|
||||
VLOG(10) << "[Interpreter] Created plan for distributed execution "
|
||||
<< next_plan_id_ - 1;
|
||||
return std::make_unique<DistributedLogicalPlan>(std::move(plan), cost,
|
||||
plan_dispatcher_);
|
||||
}
|
||||
|
||||
Interpreter::Results DistributedInterpreter::operator()(
|
||||
const std::string &query_string, database::GraphDbAccessor &db_accessor,
|
||||
const std::map<std::string, PropertyValue> ¶ms,
|
||||
bool in_explicit_transaction) {
|
||||
AstStorage ast_storage;
|
||||
Parameters parameters;
|
||||
|
||||
auto queries = StripAndParseQuery(query_string, ¶meters, &ast_storage,
|
||||
&db_accessor, params);
|
||||
ParsedQuery &parsed_query = queries.second;
|
||||
|
||||
if (utils::IsSubtype(*parsed_query.query, ProfileQuery::kType)) {
|
||||
throw utils::NotYetImplemented("PROFILE in a distributed query");
|
||||
}
|
||||
|
||||
return Interpreter::operator()(query_string, db_accessor, params,
|
||||
in_explicit_transaction);
|
||||
}
|
||||
|
||||
void DistributedInterpreter::PrettyPrintPlan(
|
||||
const database::GraphDbAccessor &dba,
|
||||
const plan::LogicalOperator *plan_root, std::ostream *out) {
|
||||
plan::DistributedPrettyPrint(dba, plan_root, out);
|
||||
}
|
||||
|
||||
std::string DistributedInterpreter::PlanToJson(
|
||||
const database::GraphDbAccessor &dba,
|
||||
const plan::LogicalOperator *plan_root) {
|
||||
return plan::DistributedPlanToJson(dba, plan_root).dump();
|
||||
}
|
||||
|
||||
} // namespace query
|
@ -1,38 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "query/interpreter.hpp"
|
||||
|
||||
namespace database {
|
||||
class Master;
|
||||
}
|
||||
|
||||
namespace distributed {
|
||||
class PlanDispatcher;
|
||||
}
|
||||
|
||||
namespace query {
|
||||
|
||||
class DistributedInterpreter final : public Interpreter {
|
||||
public:
|
||||
DistributedInterpreter(database::Master *db);
|
||||
|
||||
Results operator()(const std::string &, database::GraphDbAccessor &,
|
||||
const std::map<std::string, PropertyValue> &,
|
||||
bool in_explicit_transaction) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<LogicalPlan> MakeLogicalPlan(
|
||||
CypherQuery *, AstStorage, const Parameters &,
|
||||
database::GraphDbAccessor *) override;
|
||||
|
||||
void PrettyPrintPlan(const database::GraphDbAccessor &,
|
||||
const plan::LogicalOperator *, std::ostream *) override;
|
||||
|
||||
std::string PlanToJson(const database::GraphDbAccessor &,
|
||||
const plan::LogicalOperator *) override;
|
||||
|
||||
std::atomic<int64_t> next_plan_id_{0};
|
||||
distributed::PlanDispatcher *plan_dispatcher_{nullptr};
|
||||
};
|
||||
|
||||
} // namespace query
|
File diff suppressed because it is too large
Load Diff
@ -1,357 +0,0 @@
|
||||
#>cpp
|
||||
/// @file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "query/distributed/frontend/ast/ast_serialization.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
#include "query/distributed/serialization.hpp"
|
||||
#include "storage/distributed/rpc/serialization.hpp"
|
||||
cpp<#
|
||||
|
||||
(load "query/plan/operator.lcp")
|
||||
|
||||
(lcp:namespace query)
|
||||
(lcp:namespace plan)
|
||||
|
||||
#>cpp
|
||||
class PullRemote;
|
||||
class Synchronize;
|
||||
class PullRemoteOrderBy;
|
||||
class DistributedExpand;
|
||||
class DistributedExpandBfs;
|
||||
class DistributedCreateNode;
|
||||
class DistributedCreateExpand;
|
||||
|
||||
using DistributedOperatorCompositeVisitor =
|
||||
::utils::CompositeVisitor<PullRemote, Synchronize, PullRemoteOrderBy,
|
||||
DistributedExpand, DistributedExpandBfs,
|
||||
DistributedCreateNode, DistributedCreateExpand>;
|
||||
|
||||
/// Base class for visiting regular and distributed LogicalOperator instances.
|
||||
///
|
||||
/// HierarchicalLogicalOperatorVisitor is inherited virtually, so that potential
|
||||
/// multiple inheritance of DistributedOperatorVisitor and other
|
||||
/// HierarchicalLogicalOperatorVisitor derived types is possible. Note that
|
||||
/// virtual inheritance resolves the diamond problem, but this still carries a
|
||||
/// cost. For example, you can no longer use static_cast to downcast a type even
|
||||
/// though you are 100% sure downcast would be correct. dynamic_cast should work
|
||||
/// as usual.
|
||||
class DistributedOperatorVisitor : public virtual HierarchicalLogicalOperatorVisitor,
|
||||
public DistributedOperatorCompositeVisitor {
|
||||
public:
|
||||
using DistributedOperatorCompositeVisitor::PostVisit;
|
||||
using DistributedOperatorCompositeVisitor::PreVisit;
|
||||
using HierarchicalLogicalOperatorVisitor::PostVisit;
|
||||
using HierarchicalLogicalOperatorVisitor::PreVisit;
|
||||
using HierarchicalLogicalOperatorVisitor::Visit;
|
||||
};
|
||||
cpp<#
|
||||
|
||||
(lcp:define-class pull-remote (logical-operator)
|
||||
((input "std::shared_ptr<LogicalOperator>" :scope :public
|
||||
:slk-save #'slk-save-operator-pointer
|
||||
:slk-load #'slk-load-operator-pointer)
|
||||
(plan-id :int64_t :initval 0 :scope :public)
|
||||
(symbols "std::vector<Symbol>" :scope :public))
|
||||
(:documentation
|
||||
"An operator in distributed Memgraph that yields both local and remote (from
|
||||
other workers) frames. Obtaining remote frames is done through RPC calls to
|
||||
`distributed::ProduceRpcServer`s running on all the workers.
|
||||
|
||||
This operator aims to yield results as fast as possible and lose minimal
|
||||
time on data transfer. It gives no guarantees on result order.")
|
||||
(:public
|
||||
#>cpp
|
||||
PullRemote() {}
|
||||
PullRemote(const std::shared_ptr<LogicalOperator> &input, int64_t plan_id,
|
||||
const std::vector<Symbol> &symbols)
|
||||
: input_(input), plan_id_(plan_id), symbols_(symbols) {}
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
|
||||
std::vector<Symbol> OutputSymbols(const SymbolTable &) const override;
|
||||
std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override;
|
||||
|
||||
bool HasSingleInput() const override { return true; }
|
||||
std::shared_ptr<LogicalOperator> input() const override { return input_; }
|
||||
void set_input(std::shared_ptr<LogicalOperator> input) override {
|
||||
input_ = input;
|
||||
}
|
||||
cpp<#)
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(defun slk-load-pull-remote (member)
|
||||
#>cpp
|
||||
std::shared_ptr<query::plan::LogicalOperator> op;
|
||||
slk::Load<query::plan::LogicalOperator>(
|
||||
&op, reader, &helper->loaded_ops,
|
||||
[&helper](auto *op, auto *reader) {
|
||||
slk::ConstructAndLoad(op, reader, helper);
|
||||
});
|
||||
self->${member} = std::static_pointer_cast<query::plan::PullRemote>(op);
|
||||
cpp<#)
|
||||
|
||||
(lcp:define-class synchronize (logical-operator)
|
||||
((input "std::shared_ptr<LogicalOperator>" :scope :public
|
||||
:slk-save #'slk-save-operator-pointer
|
||||
:slk-load #'slk-load-operator-pointer)
|
||||
(pull-remote "std::shared_ptr<PullRemote>" :scope :public
|
||||
:slk-save #'slk-save-operator-pointer
|
||||
:slk-load #'slk-load-pull-remote
|
||||
:clone (lambda (source dest)
|
||||
#>cpp
|
||||
if (${source}) {
|
||||
std::shared_ptr<LogicalOperator> tmp = ${source}->Clone(storage);
|
||||
${dest} = std::static_pointer_cast<PullRemote>(tmp);
|
||||
} else {
|
||||
${dest} = nullptr;
|
||||
}
|
||||
cpp<#))
|
||||
(advance-command :bool :initval "false" :scope :public))
|
||||
(:documentation
|
||||
"Operator used to synchronize stages of plan execution between the master and
|
||||
all the workers. Synchronization is necessary in queries that update that
|
||||
graph state because updates (as well as creations and deletions) are deferred
|
||||
to avoid multithreaded modification of graph element data (as it's not
|
||||
thread-safe).
|
||||
|
||||
Logic of the synchronize operator is:
|
||||
|
||||
1. If there is a Pull, tell all the workers to pull on that plan and
|
||||
accumulate results without sending them to the master. This is async.
|
||||
2. Accumulate local results, in parallel with 1. getting executed on workers.
|
||||
3. Wait till the master and all the workers are done accumulating.
|
||||
4. Advance the command, if necessary.
|
||||
5. Tell all the workers to apply their updates. This is async.
|
||||
6. Apply local updates, in parallel with 5. on the workers.
|
||||
7. Notify workers that the command has advanced, if necessary.
|
||||
8. Yield all the results, first local, then from Pull if available.")
|
||||
(:public
|
||||
#>cpp
|
||||
Synchronize() {}
|
||||
Synchronize(const std::shared_ptr<LogicalOperator> &input,
|
||||
const std::shared_ptr<PullRemote> &pull_remote,
|
||||
bool advance_command)
|
||||
: input_(input),
|
||||
pull_remote_(pull_remote),
|
||||
advance_command_(advance_command) {}
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
|
||||
std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override;
|
||||
|
||||
std::vector<Symbol> OutputSymbols(
|
||||
const SymbolTable &symbol_table) const override {
|
||||
return input_->OutputSymbols(symbol_table);
|
||||
}
|
||||
|
||||
bool HasSingleInput() const override { return true; }
|
||||
std::shared_ptr<LogicalOperator> input() const override { return input_; }
|
||||
void set_input(std::shared_ptr<LogicalOperator> input) override {
|
||||
input_ = input;
|
||||
}
|
||||
cpp<#)
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-class pull-remote-order-by (logical-operator)
|
||||
((input "std::shared_ptr<LogicalOperator>" :scope :public
|
||||
:slk-save #'slk-save-operator-pointer
|
||||
:slk-load #'slk-load-operator-pointer)
|
||||
(plan-id :int64_t :initval 0 :scope :public)
|
||||
(symbols "std::vector<Symbol>" :scope :public)
|
||||
(order-by "std::vector<Expression *>" :scope :public
|
||||
:slk-save #'slk-save-ast-vector
|
||||
:slk-load (slk-load-ast-vector "Expression"))
|
||||
(compare "TypedValueVectorCompare" :scope :public))
|
||||
(:documentation
|
||||
"Operator that merges distributed OrderBy operators.
|
||||
Instead of using a regular OrderBy on master (which would collect all remote
|
||||
results and order them), we can have each worker do an OrderBy locally and
|
||||
have the master rely on the fact that the results are ordered and merge them
|
||||
by having only one result from each worker.")
|
||||
(:public
|
||||
#>cpp
|
||||
PullRemoteOrderBy() {}
|
||||
PullRemoteOrderBy(
|
||||
const std::shared_ptr<LogicalOperator> &input, int64_t plan_id,
|
||||
const std::vector<SortItem> &order_by, const std::vector<Symbol> &symbols);
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
|
||||
|
||||
std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override;
|
||||
std::vector<Symbol> OutputSymbols(const SymbolTable &) const override;
|
||||
|
||||
bool HasSingleInput() const override { return true; }
|
||||
std::shared_ptr<LogicalOperator> input() const override { return input_; }
|
||||
void set_input(std::shared_ptr<LogicalOperator> input) override {
|
||||
input_ = input;
|
||||
}
|
||||
cpp<#)
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-class distributed-expand (logical-operator)
|
||||
((input "std::shared_ptr<LogicalOperator>" :scope :public
|
||||
:slk-save #'slk-save-operator-pointer
|
||||
:slk-load #'slk-load-operator-pointer)
|
||||
(input-symbol "Symbol" :scope :public)
|
||||
(common "ExpandCommon" :scope :public)
|
||||
(graph-view "GraphView" :scope :public
|
||||
:documentation
|
||||
"State from which the input node should get expanded."))
|
||||
(:documentation "Distributed version of Expand operator")
|
||||
(:public
|
||||
#>cpp
|
||||
DistributedExpand() {}
|
||||
DistributedExpand(const std::shared_ptr<LogicalOperator> &input,
|
||||
Symbol input_symbol, Symbol node_symbol, Symbol edge_symbol,
|
||||
EdgeAtom::Direction direction,
|
||||
const std::vector<storage::EdgeType> &edge_types,
|
||||
bool existing_node, GraphView graph_view);
|
||||
DistributedExpand(const std::shared_ptr<LogicalOperator> &input,
|
||||
Symbol input_symbol, const ExpandCommon &common);
|
||||
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
|
||||
std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override;
|
||||
|
||||
bool HasSingleInput() const override { return true; }
|
||||
std::shared_ptr<LogicalOperator> input() const override { return input_; }
|
||||
void set_input(std::shared_ptr<LogicalOperator> input) override {
|
||||
input_ = input;
|
||||
}
|
||||
cpp<#)
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-class distributed-expand-bfs (logical-operator)
|
||||
((input "std::shared_ptr<LogicalOperator>" :scope :public
|
||||
:slk-save #'slk-save-operator-pointer
|
||||
:slk-load #'slk-load-operator-pointer)
|
||||
(input-symbol "Symbol" :scope :public)
|
||||
(common "ExpandCommon" :scope :public)
|
||||
(lower-bound "Expression *" :scope :public
|
||||
:documentation "Optional lower bound, default is 1"
|
||||
:slk-save #'slk-save-ast-pointer
|
||||
:slk-load (slk-load-ast-pointer "Expression"))
|
||||
(upper-bound "Expression *" :scope :public
|
||||
:documentation "Optional upper bound, default is infinity"
|
||||
:slk-save #'slk-save-ast-pointer
|
||||
:slk-load (slk-load-ast-pointer "Expression"))
|
||||
(filter-lambda "ExpansionLambda" :scope :public
|
||||
:documentation "Filter that must be satisfied for expansion to succeed."
|
||||
:slk-load (lambda (member)
|
||||
#>cpp
|
||||
slk::Load(&self->${member}, reader, &helper->ast_storage);
|
||||
cpp<#)))
|
||||
(:documentation "BFS expansion operator suited for distributed execution.")
|
||||
(:public
|
||||
#>cpp
|
||||
DistributedExpandBfs() {}
|
||||
DistributedExpandBfs(const std::shared_ptr<LogicalOperator> &input,
|
||||
Symbol input_symbol, Symbol node_symbol,
|
||||
Symbol edge_symbol, EdgeAtom::Direction direction,
|
||||
const std::vector<storage::EdgeType> &edge_types,
|
||||
bool existing_node, Expression *lower_bound,
|
||||
Expression *upper_bound,
|
||||
const ExpansionLambda &filter_lambda);
|
||||
DistributedExpandBfs(const std::shared_ptr<LogicalOperator> &input,
|
||||
Symbol input_symbol, const ExpandCommon &common,
|
||||
Expression *lower_bound, Expression *upper_bound,
|
||||
const ExpansionLambda &filter_lambda);
|
||||
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
|
||||
std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override;
|
||||
|
||||
bool HasSingleInput() const override { return true; }
|
||||
std::shared_ptr<LogicalOperator> input() const override { return input_; }
|
||||
void set_input(std::shared_ptr<LogicalOperator> input) override {
|
||||
input_ = input;
|
||||
}
|
||||
cpp<#)
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-class distributed-create-node (logical-operator)
|
||||
((input "std::shared_ptr<LogicalOperator>" :scope :public
|
||||
:slk-save #'slk-save-operator-pointer
|
||||
:slk-load #'slk-load-operator-pointer)
|
||||
(node-info "NodeCreationInfo" :scope :public
|
||||
:slk-save (lambda (m)
|
||||
#>cpp
|
||||
slk::Save(self.${m}, builder, helper);
|
||||
cpp<#)
|
||||
:slk-load (lambda (m)
|
||||
#>cpp
|
||||
slk::Load(&self->${m}, reader, helper);
|
||||
cpp<#))
|
||||
(on-random-worker :bool :initval "false" :scope :public))
|
||||
(:documentation "Create nodes in distributed environment.")
|
||||
(:public
|
||||
#>cpp
|
||||
DistributedCreateNode() {}
|
||||
DistributedCreateNode(const std::shared_ptr<LogicalOperator> &input,
|
||||
const NodeCreationInfo &node_info, bool on_random_worker);
|
||||
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
|
||||
std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override;
|
||||
|
||||
bool HasSingleInput() const override { return true; }
|
||||
std::shared_ptr<LogicalOperator> input() const override { return input_; }
|
||||
void set_input(std::shared_ptr<LogicalOperator> input) override {
|
||||
input_ = input;
|
||||
}
|
||||
cpp<#)
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(lcp:define-class distributed-create-expand (logical-operator)
|
||||
((node-info "NodeCreationInfo" :scope :public
|
||||
:slk-save (lambda (m)
|
||||
#>cpp
|
||||
slk::Save(self.${m}, builder, helper);
|
||||
cpp<#)
|
||||
:slk-load (lambda (m)
|
||||
#>cpp
|
||||
slk::Load(&self->${m}, reader, helper);
|
||||
cpp<#))
|
||||
(edge-info "EdgeCreationInfo" :scope :public
|
||||
:slk-save (lambda (m)
|
||||
#>cpp
|
||||
slk::Save(self.${m}, builder, helper);
|
||||
cpp<#)
|
||||
:slk-load (lambda (m)
|
||||
#>cpp
|
||||
slk::Load(&self->${m}, reader, helper);
|
||||
cpp<#))
|
||||
(input "std::shared_ptr<LogicalOperator>" :scope :public
|
||||
:slk-save #'slk-save-operator-pointer
|
||||
:slk-load #'slk-load-operator-pointer)
|
||||
(input-symbol "Symbol" :scope :public)
|
||||
(existing-node :bool :scope :public))
|
||||
(:documentation "Distributed version of CreateExpand")
|
||||
(:public
|
||||
#>cpp
|
||||
DistributedCreateExpand() {}
|
||||
DistributedCreateExpand(const NodeCreationInfo &node_info,
|
||||
const EdgeCreationInfo &edge_info,
|
||||
const std::shared_ptr<LogicalOperator> &input,
|
||||
Symbol input_symbol, bool existing_node);
|
||||
bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override;
|
||||
UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override;
|
||||
std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override;
|
||||
|
||||
bool HasSingleInput() const override { return true; }
|
||||
std::shared_ptr<LogicalOperator> input() const override { return input_; }
|
||||
void set_input(std::shared_ptr<LogicalOperator> input) override {
|
||||
input_ = input;
|
||||
}
|
||||
cpp<#)
|
||||
(:serialize (:slk))
|
||||
(:clone))
|
||||
|
||||
(lcp:pop-namespace) ;; plan
|
||||
(lcp:pop-namespace) ;; query
|
File diff suppressed because it is too large
Load Diff
@ -1,31 +0,0 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/plan/operator.hpp"
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
/// Complete plan split into master/worker parts.
|
||||
struct DistributedPlan {
|
||||
int64_t master_plan_id;
|
||||
/// Plan to be executed on the master server.
|
||||
std::unique_ptr<LogicalOperator> master_plan;
|
||||
/// Pairs of {plan_id, plan} for execution on each worker.
|
||||
std::vector<std::pair<int64_t, std::shared_ptr<LogicalOperator>>>
|
||||
worker_plans;
|
||||
/// Ast storage with newly added expressions.
|
||||
AstStorage ast_storage;
|
||||
/// Symbol table with newly added symbols.
|
||||
SymbolTable symbol_table;
|
||||
};
|
||||
|
||||
/// Creates a `DistributedPlan` from a regular plan.
|
||||
DistributedPlan MakeDistributedPlan(
|
||||
const AstStorage &ast_storage, const LogicalOperator &plan,
|
||||
const SymbolTable &symbol_table, std::atomic<int64_t> &next_plan_id,
|
||||
const std::vector<storage::Property> &properties_by_ix);
|
||||
|
||||
} // namespace query::plan
|
@ -1,249 +0,0 @@
|
||||
#include "query/distributed/plan/pretty_print.hpp"
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
bool DistributedPlanPrinter::PreVisit(query::plan::DistributedExpand &op) {
|
||||
WithPrintLn([&](auto &out) {
|
||||
out << "* DistributedExpand (" << op.input_symbol_.name() << ")"
|
||||
<< (op.common_.direction == query::EdgeAtom::Direction::IN ? "<-" : "-")
|
||||
<< "[" << op.common_.edge_symbol.name();
|
||||
utils::PrintIterable(out, op.common_.edge_types, "|",
|
||||
[this](auto &stream, const auto &edge_type) {
|
||||
stream << ":" << dba_->EdgeTypeName(edge_type);
|
||||
});
|
||||
out << "]"
|
||||
<< (op.common_.direction == query::EdgeAtom::Direction::OUT ? "->"
|
||||
: "-")
|
||||
<< "(" << op.common_.node_symbol.name() << ")";
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DistributedPlanPrinter::PreVisit(query::plan::DistributedExpandBfs &op) {
|
||||
WithPrintLn([&](auto &out) {
|
||||
out << "* DistributedExpandBfs (" << op.input_symbol_.name() << ")"
|
||||
<< (op.common_.direction == query::EdgeAtom::Direction::IN ? "<-" : "-")
|
||||
<< "[" << op.common_.edge_symbol.name();
|
||||
utils::PrintIterable(out, op.common_.edge_types, "|",
|
||||
[this](auto &stream, const auto &edge_type) {
|
||||
stream << ":" << dba_->EdgeTypeName(edge_type);
|
||||
});
|
||||
out << "]"
|
||||
<< (op.common_.direction == query::EdgeAtom::Direction::OUT ? "->"
|
||||
: "-")
|
||||
<< "(" << op.common_.node_symbol.name() << ")";
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DistributedPlanPrinter::PreVisit(query::plan::PullRemote &op) {
|
||||
WithPrintLn([&op](auto &out) {
|
||||
out << "* PullRemote [" << op.plan_id_ << "] {";
|
||||
utils::PrintIterable(out, op.symbols_, ", ",
|
||||
[](auto &out, const auto &sym) { out << sym.name(); });
|
||||
out << "}";
|
||||
});
|
||||
WithPrintLn([](auto &out) { out << "|\\"; });
|
||||
++depth_;
|
||||
WithPrintLn([](auto &out) { out << "* workers"; });
|
||||
--depth_;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DistributedPlanPrinter::PreVisit(query::plan::PullRemoteOrderBy &op) {
|
||||
WithPrintLn([&op](auto &out) {
|
||||
out << "* PullRemoteOrderBy {";
|
||||
utils::PrintIterable(out, op.symbols_, ", ",
|
||||
[](auto &out, const auto &sym) { out << sym.name(); });
|
||||
out << "}";
|
||||
});
|
||||
|
||||
WithPrintLn([](auto &out) { out << "|\\"; });
|
||||
++depth_;
|
||||
WithPrintLn([](auto &out) { out << "* workers"; });
|
||||
--depth_;
|
||||
return true;
|
||||
}
|
||||
|
||||
#define PRE_VISIT(TOp) \
|
||||
bool DistributedPlanPrinter::PreVisit(TOp &) { \
|
||||
WithPrintLn([](auto &out) { out << "* " << #TOp; }); \
|
||||
return true; \
|
||||
}
|
||||
|
||||
PRE_VISIT(DistributedCreateNode);
|
||||
|
||||
bool DistributedPlanPrinter::PreVisit(DistributedCreateExpand &op) {
|
||||
WithPrintLn([&](auto &out) {
|
||||
out << "* DistributedCreateExpand (" << op.input_symbol_.name() << ")"
|
||||
<< (op.edge_info_.direction == query::EdgeAtom::Direction::IN ? "<-"
|
||||
: "-")
|
||||
<< "[" << op.edge_info_.symbol.name() << ":"
|
||||
<< dba_->EdgeTypeName(op.edge_info_.edge_type) << "]"
|
||||
<< (op.edge_info_.direction == query::EdgeAtom::Direction::OUT ? "->"
|
||||
: "-")
|
||||
<< "(" << op.node_info_.symbol.name() << ")";
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef PRE_VISIT
|
||||
|
||||
bool DistributedPlanPrinter::PreVisit(query::plan::Synchronize &op) {
|
||||
WithPrintLn([&op](auto &out) {
|
||||
out << "* Synchronize";
|
||||
if (op.advance_command_) out << " (ADV CMD)";
|
||||
});
|
||||
if (op.pull_remote_) Branch(*op.pull_remote_);
|
||||
op.input_->Accept(*this);
|
||||
return false;
|
||||
}
|
||||
|
||||
void DistributedPrettyPrint(const database::GraphDbAccessor &dba,
|
||||
const LogicalOperator *plan_root,
|
||||
std::ostream *out) {
|
||||
DistributedPlanPrinter printer(&dba, out);
|
||||
// FIXME(mtomic): We should make visitors that take const argument.
|
||||
const_cast<LogicalOperator *>(plan_root)->Accept(printer);
|
||||
}
|
||||
|
||||
nlohmann::json DistributedPlanToJson(const database::GraphDbAccessor &dba,
|
||||
const LogicalOperator *plan_root) {
|
||||
impl::DistributedPlanToJsonVisitor visitor(&dba);
|
||||
const_cast<LogicalOperator *>(plan_root)->Accept(visitor);
|
||||
return visitor.output();
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// DistributedPlanToJsonVisitor implementation
|
||||
//
|
||||
// The JSON formatted plan is consumed (or will be) by Memgraph Lab, and
|
||||
// therefore should not be changed before synchronizing with whoever is
|
||||
// maintaining Memgraph Lab. Hopefully, one day integration tests will exist and
|
||||
// there will be no need to be super careful.
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
bool DistributedPlanToJsonVisitor::PreVisit(DistributedExpand &op) {
|
||||
json self;
|
||||
self["name"] = "DistributedExpand";
|
||||
self["input_symbol"] = ToJson(op.input_symbol_);
|
||||
self["node_symbol"] = ToJson(op.common_.node_symbol);
|
||||
self["edge_symbol"] = ToJson(op.common_.edge_symbol);
|
||||
self["edge_types"] = ToJson(op.common_.edge_types, *dba_);
|
||||
self["direction"] = ToString(op.common_.direction);
|
||||
self["existing_node"] = op.common_.existing_node;
|
||||
|
||||
op.input_->Accept(*this);
|
||||
self["input"] = PopOutput();
|
||||
|
||||
output_ = std::move(self);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DistributedPlanToJsonVisitor::PreVisit(DistributedExpandBfs &op) {
|
||||
json self;
|
||||
self["name"] = "DistributedExpandBfs";
|
||||
self["input_symbol"] = ToJson(op.input_symbol_);
|
||||
self["node_symbol"] = ToJson(op.common_.node_symbol);
|
||||
self["edge_symbol"] = ToJson(op.common_.edge_symbol);
|
||||
self["edge_types"] = ToJson(op.common_.edge_types, *dba_);
|
||||
self["direction"] = ToString(op.common_.direction);
|
||||
self["lower_bound"] = op.lower_bound_ ? ToJson(op.lower_bound_) : json();
|
||||
self["upper_bound"] = op.upper_bound_ ? ToJson(op.upper_bound_) : json();
|
||||
self["existing_node"] = op.common_.existing_node;
|
||||
|
||||
self["filter_lambda"] = op.filter_lambda_.expression
|
||||
? ToJson(op.filter_lambda_.expression)
|
||||
: json();
|
||||
|
||||
op.input_->Accept(*this);
|
||||
self["input"] = PopOutput();
|
||||
|
||||
output_ = std::move(self);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DistributedPlanToJsonVisitor::PreVisit(PullRemote &op) {
|
||||
json self;
|
||||
self["name"] = "PullRemote";
|
||||
self["symbols"] = ToJson(op.symbols_);
|
||||
|
||||
op.input_->Accept(*this);
|
||||
self["input"] = PopOutput();
|
||||
|
||||
output_ = std::move(self);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DistributedPlanToJsonVisitor::PreVisit(PullRemoteOrderBy &op) {
|
||||
json self;
|
||||
self["name"] = "PullRemoteOrderBy";
|
||||
|
||||
for (auto i = 0; i < op.order_by_.size(); ++i) {
|
||||
json json;
|
||||
json["ordering"] = ToString(op.compare_.ordering_[i]);
|
||||
json["expression"] = ToJson(op.order_by_[i]);
|
||||
self["order_by"].push_back(json);
|
||||
}
|
||||
self["symbols"] = ToJson(op.symbols_);
|
||||
|
||||
op.input_->Accept(*this);
|
||||
self["input"] = PopOutput();
|
||||
|
||||
output_ = std::move(self);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DistributedPlanToJsonVisitor::PreVisit(DistributedCreateNode &op) {
|
||||
json self;
|
||||
self["name"] = "DistributedCreateNode";
|
||||
self["node_info"] = ToJson(op.node_info_, *dba_);
|
||||
self["on_random_worker"] = op.on_random_worker_;
|
||||
|
||||
op.input_->Accept(*this);
|
||||
self["input"] = PopOutput();
|
||||
|
||||
output_ = std::move(self);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DistributedPlanToJsonVisitor::PreVisit(DistributedCreateExpand &op) {
|
||||
json self;
|
||||
self["name"] = "DistributedCreateExpand";
|
||||
self["input_symbol"] = ToJson(op.input_symbol_);
|
||||
self["node_info"] = ToJson(op.node_info_, *dba_);
|
||||
self["edge_info"] = ToJson(op.edge_info_, *dba_);
|
||||
self["existing_node"] = op.existing_node_;
|
||||
|
||||
op.input_->Accept(*this);
|
||||
self["input"] = PopOutput();
|
||||
|
||||
output_ = std::move(self);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DistributedPlanToJsonVisitor::PreVisit(Synchronize &op) {
|
||||
json self;
|
||||
self["name"] = "Synchronize";
|
||||
self["advance_command"] = op.advance_command_;
|
||||
|
||||
op.input_->Accept(*this);
|
||||
self["input"] = PopOutput();
|
||||
|
||||
if (op.pull_remote_) {
|
||||
op.pull_remote_->Accept(*this);
|
||||
self["pull_remote"] = PopOutput();
|
||||
} else {
|
||||
self["pull_remote"] = json();
|
||||
}
|
||||
|
||||
output_ = std::move(self);
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace impl
|
||||
|
||||
} // namespace query::plan
|
@ -1,71 +0,0 @@
|
||||
/// @file
|
||||
#pragma once
|
||||
|
||||
#include "query/distributed/plan/ops.hpp"
|
||||
#include "query/plan/pretty_print.hpp"
|
||||
|
||||
#include <json/json.hpp>
|
||||
|
||||
namespace query::plan {
|
||||
|
||||
void DistributedPrettyPrint(const database::GraphDbAccessor &dba,
|
||||
const LogicalOperator *plan_root,
|
||||
std::ostream *out);
|
||||
|
||||
inline void DistributedPrettyPrint(const database::GraphDbAccessor &dba,
|
||||
const LogicalOperator *plan_root) {
|
||||
DistributedPrettyPrint(dba, plan_root, &std::cout);
|
||||
}
|
||||
|
||||
nlohmann::json DistributedPlanToJson(const database::GraphDbAccessor &dba,
|
||||
const LogicalOperator *plan_root);
|
||||
|
||||
class DistributedPlanPrinter : public PlanPrinter,
|
||||
public DistributedOperatorVisitor {
|
||||
public:
|
||||
using DistributedOperatorVisitor::PostVisit;
|
||||
using DistributedOperatorVisitor::PreVisit;
|
||||
using DistributedOperatorVisitor::Visit;
|
||||
using PlanPrinter::PlanPrinter;
|
||||
using PlanPrinter::PostVisit;
|
||||
using PlanPrinter::PreVisit;
|
||||
using PlanPrinter::Visit;
|
||||
|
||||
bool PreVisit(DistributedExpand &) override;
|
||||
bool PreVisit(DistributedExpandBfs &) override;
|
||||
|
||||
bool PreVisit(PullRemote &) override;
|
||||
bool PreVisit(PullRemoteOrderBy &) override;
|
||||
|
||||
bool PreVisit(DistributedCreateNode &) override;
|
||||
bool PreVisit(DistributedCreateExpand &) override;
|
||||
bool PreVisit(Synchronize &) override;
|
||||
};
|
||||
|
||||
namespace impl {
|
||||
|
||||
class DistributedPlanToJsonVisitor : public PlanToJsonVisitor,
|
||||
public DistributedOperatorVisitor {
|
||||
public:
|
||||
using DistributedOperatorVisitor::PostVisit;
|
||||
using DistributedOperatorVisitor::PreVisit;
|
||||
using DistributedOperatorVisitor::Visit;
|
||||
using PlanToJsonVisitor::PlanToJsonVisitor;
|
||||
using PlanToJsonVisitor::PostVisit;
|
||||
using PlanToJsonVisitor::PreVisit;
|
||||
using PlanToJsonVisitor::Visit;
|
||||
|
||||
bool PreVisit(DistributedExpand &) override;
|
||||
bool PreVisit(DistributedExpandBfs &) override;
|
||||
|
||||
bool PreVisit(PullRemote &) override;
|
||||
bool PreVisit(PullRemoteOrderBy &) override;
|
||||
|
||||
bool PreVisit(DistributedCreateNode &) override;
|
||||
bool PreVisit(DistributedCreateExpand &) override;
|
||||
bool PreVisit(Synchronize &) override;
|
||||
};
|
||||
|
||||
} // namespace impl
|
||||
|
||||
} // namespace query::plan
|
@ -1,224 +0,0 @@
|
||||
#include "query/distributed/serialization.hpp"
|
||||
|
||||
#include "distributed/data_manager.hpp"
|
||||
#include "query/distributed/frontend/ast/ast_serialization.hpp"
|
||||
|
||||
namespace slk {
|
||||
|
||||
void Save(const query::TypedValue &value, slk::Builder *builder,
|
||||
storage::SendVersions versions, int16_t worker_id) {
|
||||
switch (value.type()) {
|
||||
case query::TypedValue::Type::Null:
|
||||
slk::Save(static_cast<uint8_t>(0), builder);
|
||||
return;
|
||||
case query::TypedValue::Type::Bool:
|
||||
slk::Save(static_cast<uint8_t>(1), builder);
|
||||
slk::Save(value.Value<bool>(), builder);
|
||||
return;
|
||||
case query::TypedValue::Type::Int:
|
||||
slk::Save(static_cast<uint8_t>(2), builder);
|
||||
slk::Save(value.Value<int64_t>(), builder);
|
||||
return;
|
||||
case query::TypedValue::Type::Double:
|
||||
slk::Save(static_cast<uint8_t>(3), builder);
|
||||
slk::Save(value.Value<double>(), builder);
|
||||
return;
|
||||
case query::TypedValue::Type::String:
|
||||
slk::Save(static_cast<uint8_t>(4), builder);
|
||||
slk::Save(std::string(value.ValueString()), builder);
|
||||
return;
|
||||
case query::TypedValue::Type::List: {
|
||||
slk::Save(static_cast<uint8_t>(5), builder);
|
||||
const auto &values = value.ValueList();
|
||||
size_t size = values.size();
|
||||
slk::Save(size, builder);
|
||||
for (const auto &v : values) {
|
||||
slk::Save(v, builder, versions, worker_id);
|
||||
}
|
||||
return;
|
||||
}
|
||||
case query::TypedValue::Type::Map: {
|
||||
slk::Save(static_cast<uint8_t>(6), builder);
|
||||
const auto &map = value.ValueMap();
|
||||
size_t size = map.size();
|
||||
slk::Save(size, builder);
|
||||
for (const auto &kv : map) {
|
||||
slk::Save(std::string(kv.first), builder);
|
||||
slk::Save(kv.second, builder, versions, worker_id);
|
||||
}
|
||||
return;
|
||||
}
|
||||
case query::TypedValue::Type::Vertex: {
|
||||
slk::Save(static_cast<uint8_t>(7), builder);
|
||||
slk::Save(value.ValueVertex(), builder, versions, worker_id);
|
||||
return;
|
||||
}
|
||||
case query::TypedValue::Type::Edge: {
|
||||
slk::Save(static_cast<uint8_t>(8), builder);
|
||||
slk::Save(value.ValueEdge(), builder, versions, worker_id);
|
||||
return;
|
||||
}
|
||||
case query::TypedValue::Type::Path: {
|
||||
slk::Save(static_cast<uint8_t>(9), builder);
|
||||
const auto &path = value.ValuePath();
|
||||
size_t v_size = path.vertices().size();
|
||||
slk::Save(v_size, builder);
|
||||
for (const auto &v : path.vertices()) {
|
||||
slk::Save(v, builder, versions, worker_id);
|
||||
}
|
||||
size_t e_size = path.edges().size();
|
||||
slk::Save(e_size, builder);
|
||||
for (const auto &e : path.edges()) {
|
||||
slk::Save(e, builder, versions, worker_id);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Load(query::TypedValue *value, slk::Reader *reader,
|
||||
database::GraphDbAccessor *dba,
|
||||
distributed::DataManager *data_manager) {
|
||||
uint8_t type;
|
||||
slk::Load(&type, reader);
|
||||
switch (type) {
|
||||
case static_cast<uint8_t>(0):
|
||||
*value = query::TypedValue();
|
||||
return;
|
||||
case static_cast<uint8_t>(1): {
|
||||
bool v;
|
||||
slk::Load(&v, reader);
|
||||
*value = v;
|
||||
return;
|
||||
}
|
||||
case static_cast<uint8_t>(2): {
|
||||
int64_t v;
|
||||
slk::Load(&v, reader);
|
||||
*value = v;
|
||||
return;
|
||||
}
|
||||
case static_cast<uint8_t>(3): {
|
||||
double v;
|
||||
slk::Load(&v, reader);
|
||||
*value = v;
|
||||
return;
|
||||
}
|
||||
case static_cast<uint8_t>(4): {
|
||||
std::string v;
|
||||
slk::Load(&v, reader);
|
||||
*value = std::move(v);
|
||||
return;
|
||||
}
|
||||
case static_cast<uint8_t>(5): {
|
||||
size_t size;
|
||||
slk::Load(&size, reader);
|
||||
std::vector<query::TypedValue> list;
|
||||
list.resize(size);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
slk::Load(&list[i], reader, dba, data_manager);
|
||||
}
|
||||
*value = std::move(list);
|
||||
return;
|
||||
}
|
||||
case static_cast<uint8_t>(6): {
|
||||
size_t size;
|
||||
slk::Load(&size, reader);
|
||||
std::map<std::string, query::TypedValue> map;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
std::string key;
|
||||
slk::Load(&key, reader);
|
||||
slk::Load(&map[key], reader, dba, data_manager);
|
||||
}
|
||||
*value = std::move(map);
|
||||
return;
|
||||
}
|
||||
case static_cast<uint8_t>(7):
|
||||
*value = slk::LoadVertexAccessor(reader, dba, data_manager);
|
||||
return;
|
||||
case static_cast<uint8_t>(8):
|
||||
*value = slk::LoadEdgeAccessor(reader, dba, data_manager);
|
||||
return;
|
||||
case static_cast<uint8_t>(9): {
|
||||
size_t v_size;
|
||||
slk::Load(&v_size, reader);
|
||||
auto *memory = value->GetMemoryResource();
|
||||
std::vector<VertexAccessor, utils::Allocator<VertexAccessor>> vertices(
|
||||
memory);
|
||||
vertices.reserve(v_size);
|
||||
for (size_t i = 0; i < v_size; ++i) {
|
||||
vertices.push_back(slk::LoadVertexAccessor(reader, dba, data_manager));
|
||||
}
|
||||
size_t e_size;
|
||||
slk::Load(&e_size, reader);
|
||||
std::vector<EdgeAccessor, utils::Allocator<EdgeAccessor>> edges(memory);
|
||||
edges.reserve(e_size);
|
||||
for (size_t i = 0; i < e_size; ++i) {
|
||||
edges.push_back(slk::LoadEdgeAccessor(reader, dba, data_manager));
|
||||
}
|
||||
query::Path path(vertices[0], memory);
|
||||
path.vertices() = std::move(vertices);
|
||||
path.edges() = std::move(edges);
|
||||
*value = std::move(path);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
throw slk::SlkDecodeException("Trying to load unknown TypedValue!");
|
||||
}
|
||||
}
|
||||
|
||||
void Save(const query::Parameters ¶meters, slk::Builder *builder) {
|
||||
slk::Save(parameters.size(), builder);
|
||||
for (auto &entry : parameters) {
|
||||
slk::Save(entry, builder);
|
||||
}
|
||||
}
|
||||
|
||||
void Load(query::Parameters *parameters, slk::Reader *reader) {
|
||||
size_t size = 0;
|
||||
slk::Load(&size, reader);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
std::pair<int, PropertyValue> entry;
|
||||
slk::Load(&entry, reader);
|
||||
parameters->Add(entry.first, entry.second);
|
||||
}
|
||||
}
|
||||
|
||||
void Save(const query::TypedValueVectorCompare &comparator,
|
||||
slk::Builder *builder) {
|
||||
slk::Save(comparator.ordering_, builder);
|
||||
}
|
||||
|
||||
void Load(query::TypedValueVectorCompare *comparator, slk::Reader *reader) {
|
||||
slk::Load(&comparator->ordering_, reader);
|
||||
}
|
||||
|
||||
|
||||
void Save(const query::GraphView &graph_view, slk::Builder *builder) {
|
||||
uint8_t enum_value = 0;
|
||||
switch (graph_view) {
|
||||
case query::GraphView::OLD:
|
||||
enum_value = 0;
|
||||
break;
|
||||
case query::GraphView::NEW:
|
||||
enum_value = 1;
|
||||
break;
|
||||
}
|
||||
slk::Save(enum_value, builder);
|
||||
}
|
||||
|
||||
void Load(query::GraphView *graph_view, slk::Reader *reader) {
|
||||
uint8_t enum_value;
|
||||
slk::Load(&enum_value, reader);
|
||||
switch (enum_value) {
|
||||
case static_cast<uint8_t>(0):
|
||||
*graph_view = query::GraphView::OLD;
|
||||
break;
|
||||
case static_cast<uint8_t>(1):
|
||||
*graph_view = query::GraphView::NEW;
|
||||
break;
|
||||
default:
|
||||
throw slk::SlkDecodeException("Trying to load unknown enum value!");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace slk
|
@ -1,45 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "query/common.hpp"
|
||||
#include "query/context.hpp"
|
||||
#include "query/distributed/frontend/semantic/symbol_serialization.hpp"
|
||||
#include "query/frontend/semantic/symbol_table.hpp"
|
||||
#include "query/typed_value.hpp"
|
||||
#include "storage/distributed/rpc/serialization.hpp"
|
||||
|
||||
namespace distributed {
|
||||
class DataManager;
|
||||
}
|
||||
|
||||
namespace slk {
|
||||
|
||||
inline void Save(const query::SymbolTable &symbol_table,
|
||||
slk::Builder *builder) {
|
||||
slk::Save(symbol_table.table_, builder);
|
||||
}
|
||||
|
||||
inline void Load(query::SymbolTable *symbol_table, slk::Reader *reader) {
|
||||
slk::Load(&symbol_table->table_, reader);
|
||||
}
|
||||
|
||||
void Save(const query::TypedValue &value, slk::Builder *builder,
|
||||
storage::SendVersions versions, int16_t worker_id);
|
||||
|
||||
void Load(query::TypedValue *value, slk::Reader *reader,
|
||||
database::GraphDbAccessor *dba,
|
||||
distributed::DataManager *data_manager);
|
||||
|
||||
void Save(const query::GraphView &graph_view, slk::Builder *builder);
|
||||
|
||||
void Load(query::GraphView *graph_view, slk::Reader *reader);
|
||||
|
||||
void Save(const query::TypedValueVectorCompare &comparator,
|
||||
slk::Builder *builder);
|
||||
|
||||
void Load(query::TypedValueVectorCompare *comparator, slk::Reader *reader);
|
||||
|
||||
void Save(const query::Parameters ¶meters, slk::Builder *builder);
|
||||
|
||||
void Load(query::Parameters *parameters, slk::Reader *reader);
|
||||
|
||||
} // namespace slk
|
@ -684,7 +684,6 @@ TypedValue Assert(TypedValue *args, int64_t nargs, const EvaluationContext &ctx,
|
||||
return TypedValue(args[0], ctx.memory);
|
||||
}
|
||||
|
||||
#if defined(MG_SINGLE_NODE) || defined(MG_SINGLE_NODE_HA)
|
||||
TypedValue Counter(TypedValue *args, int64_t nargs,
|
||||
const EvaluationContext &context,
|
||||
database::GraphDbAccessor *) {
|
||||
@ -716,28 +715,6 @@ TypedValue Counter(TypedValue *args, int64_t nargs,
|
||||
|
||||
return TypedValue(value, context.memory);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MG_DISTRIBUTED
|
||||
TypedValue WorkerId(TypedValue *args, int64_t nargs,
|
||||
const EvaluationContext &ctx, database::GraphDbAccessor *) {
|
||||
if (nargs != 1) {
|
||||
throw QueryRuntimeException("'workerId' requires exactly one argument.");
|
||||
}
|
||||
const auto &arg = args[0];
|
||||
switch (arg.type()) {
|
||||
case TypedValue::Type::Vertex:
|
||||
return TypedValue(arg.ValueVertex().GlobalAddress().worker_id(),
|
||||
ctx.memory);
|
||||
case TypedValue::Type::Edge:
|
||||
return TypedValue(arg.ValueEdge().GlobalAddress().worker_id(),
|
||||
ctx.memory);
|
||||
default:
|
||||
throw QueryRuntimeException(
|
||||
"'workerId' argument must be a node or an edge.");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TypedValue Id(TypedValue *args, int64_t nargs, const EvaluationContext &ctx,
|
||||
database::GraphDbAccessor *dba) {
|
||||
@ -1068,12 +1045,7 @@ NameToFunction(const std::string &function_name) {
|
||||
|
||||
// Memgraph specific functions
|
||||
if (function_name == "ASSERT") return Assert;
|
||||
#if defined(MG_SINGLE_NODE) || defined(MG_SINGLE_NODE_HA)
|
||||
if (function_name == "COUNTER") return Counter;
|
||||
#endif
|
||||
#ifdef MG_DISTRIBUTED
|
||||
if (function_name == "WORKERID") return WorkerId;
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user