75950664a7
Summary: This diff splits single node and distributed storage from each other. Currently all of the storage code is copied into two directories (one single node, one distributed). The logic used in the storage implementation isn't touched, it will be refactored in following diffs. To clean the working directory after this diff you should execute: ``` rm database/state_delta.capnp rm database/state_delta.hpp rm storage/concurrent_id_mapper_rpc_messages.capnp rm storage/concurrent_id_mapper_rpc_messages.hpp ``` Reviewers: teon.banek, buda, msantl Reviewed By: teon.banek, msantl Subscribers: teon.banek, pullbot Differential Revision: https://phabricator.memgraph.io/D1625
164 lines
5.3 KiB
C++
164 lines
5.3 KiB
C++
#pragma once
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "communication/bolt/v1/encoder/base_encoder.hpp"
|
|
#include "durability/distributed/version.hpp"
|
|
#include "durability/hashed_file_writer.hpp"
|
|
#include "durability/paths.hpp"
|
|
#include "glue/communication.hpp"
|
|
#include "query/typed_value.hpp"
|
|
#include "utils/file.hpp"
|
|
|
|
#include "graph_state.hpp"
|
|
|
|
namespace snapshot_generation {
|
|
|
|
// Snapshot layout is described in durability/version.hpp
|
|
static_assert(durability::kVersion == 6,
|
|
"Wrong snapshot version, please update!");
|
|
|
|
class SnapshotWriter {
|
|
public:
|
|
SnapshotWriter(const std::string &path, int worker_id,
|
|
uint64_t vertex_generator_local_count = 0,
|
|
uint64_t edge_generator_local_count = 0)
|
|
: worker_id_(worker_id), buffer_(path) {
|
|
encoder_.WriteRAW(durability::kSnapshotMagic.data(),
|
|
durability::kSnapshotMagic.size());
|
|
encoder_.WriteValue(durability::kVersion);
|
|
encoder_.WriteInt(worker_id_);
|
|
encoder_.WriteInt(vertex_generator_local_count);
|
|
encoder_.WriteInt(edge_generator_local_count);
|
|
encoder_.WriteInt(0);
|
|
encoder_.WriteList(std::vector<communication::bolt::Value>{});
|
|
}
|
|
|
|
// reference to `buffer_` gets broken when moving, so let's just forbid moving
|
|
SnapshotWriter(SnapshotWriter &&rhs) = delete;
|
|
SnapshotWriter &operator=(SnapshotWriter &&rhs) = delete;
|
|
|
|
template <typename TValue>
|
|
void WriteList(const std::vector<TValue> &list) {
|
|
encoder_.WriteList(
|
|
std::vector<communication::bolt::Value>(list.begin(), list.end()));
|
|
}
|
|
|
|
storage::VertexAddress DefaultVertexAddress(gid::Gid gid) {
|
|
return storage::VertexAddress(gid, gid::CreatorWorker(gid));
|
|
}
|
|
|
|
storage::EdgeAddress DefaultEdgeAddress(gid::Gid gid) {
|
|
return storage::EdgeAddress(gid, gid::CreatorWorker(gid));
|
|
}
|
|
|
|
void WriteInlineEdge(const Edge &edge, bool write_from) {
|
|
encoder_.WriteInt(DefaultEdgeAddress(edge.gid).raw());
|
|
encoder_.WriteInt(write_from ? DefaultVertexAddress(edge.from).raw()
|
|
: DefaultVertexAddress(edge.to).raw());
|
|
encoder_.WriteString(edge.type);
|
|
}
|
|
|
|
void WriteNode(const Node &node,
|
|
const std::unordered_map<gid::Gid, Edge> &edges) {
|
|
encoder_.WriteRAW(
|
|
utils::UnderlyingCast(communication::bolt::Marker::TinyStruct) + 3);
|
|
encoder_.WriteRAW(
|
|
utils::UnderlyingCast(communication::bolt::Signature::Node));
|
|
encoder_.WriteInt(node.gid);
|
|
|
|
WriteList(node.labels);
|
|
std::map<std::string, communication::bolt::Value> props;
|
|
for (const auto &prop : node.props) {
|
|
props[prop.first] = glue::ToBoltValue(prop.second);
|
|
}
|
|
encoder_.WriteMap(props);
|
|
|
|
// cypher_id
|
|
encoder_.WriteInt(utils::MemcpyCast<int64_t>(node.gid));
|
|
|
|
encoder_.WriteInt(node.in_edges.size());
|
|
for (const auto &edge_idx : node.in_edges) {
|
|
WriteInlineEdge(edges.at(edge_idx), true);
|
|
}
|
|
|
|
encoder_.WriteInt(node.out_edges.size());
|
|
for (const auto &edge_idx : node.out_edges) {
|
|
WriteInlineEdge(edges.at(edge_idx), false);
|
|
}
|
|
|
|
++nodes_written_;
|
|
}
|
|
|
|
void WriteEdge(const Edge &edge) {
|
|
encoder_.WriteRAW(
|
|
utils::UnderlyingCast(communication::bolt::Marker::TinyStruct) + 5);
|
|
encoder_.WriteRAW(
|
|
utils::UnderlyingCast(communication::bolt::Signature::Relationship));
|
|
encoder_.WriteInt(edge.gid);
|
|
encoder_.WriteInt(edge.from);
|
|
encoder_.WriteInt(edge.to);
|
|
encoder_.WriteString(edge.type);
|
|
std::map<std::string, communication::bolt::Value> props;
|
|
for (const auto &prop : edge.props) {
|
|
props[prop.first] = glue::ToBoltValue(prop.second);
|
|
}
|
|
encoder_.WriteMap(props);
|
|
|
|
// cypher_id
|
|
encoder_.WriteInt(utils::MemcpyCast<int64_t>(edge.gid));
|
|
|
|
++edges_written_;
|
|
}
|
|
|
|
void Close() {
|
|
buffer_.WriteValue(nodes_written_);
|
|
buffer_.WriteValue(edges_written_);
|
|
buffer_.WriteValue(buffer_.hash());
|
|
buffer_.Close();
|
|
}
|
|
|
|
private:
|
|
int worker_id_;
|
|
int64_t nodes_written_{0};
|
|
int64_t edges_written_{0};
|
|
|
|
HashedFileWriter buffer_;
|
|
communication::bolt::BaseEncoder<HashedFileWriter> encoder_{buffer_};
|
|
};
|
|
|
|
void WriteToSnapshot(GraphState &state, const std::string &path) {
|
|
for (int worker_id = 0; worker_id < state.NumWorkers(); ++worker_id) {
|
|
const std::experimental::filesystem::path durability_dir =
|
|
path / std::experimental::filesystem::path("worker_" +
|
|
std::to_string(worker_id));
|
|
if (!utils::EnsureDir(durability_dir / "snapshots")) {
|
|
LOG(ERROR) << "Unable to create durability directory!";
|
|
exit(0);
|
|
}
|
|
// Pretend like this is coming from transaction numbered 0 - this is hack
|
|
// TODO(dgleich): Change this to something more reasonable
|
|
const auto snapshot_file =
|
|
durability::MakeSnapshotPath(durability_dir, worker_id, 0);
|
|
|
|
SnapshotWriter writer(snapshot_file, worker_id,
|
|
state.NumNodesOnWorker(worker_id),
|
|
state.NumEdgesOnWorker(worker_id));
|
|
|
|
writer.WriteList(state.Indices());
|
|
|
|
for (const auto &node : state.NodesOnWorker(worker_id)) {
|
|
writer.WriteNode(state.GetNode(node), state.GetEdges());
|
|
}
|
|
|
|
for (const auto &edge : state.EdgesOnWorker(worker_id)) {
|
|
writer.WriteEdge(state.GetEdge(edge));
|
|
}
|
|
|
|
writer.Close();
|
|
}
|
|
}
|
|
|
|
} // namespace snapshot_generation
|