Parallelize edge recovery
This commit is contained in:
parent
f5a49ed29f
commit
0516e93060
@ -1,4 +1,4 @@
|
|||||||
// Copyright 2022 Memgraph Ltd.
|
// Copyright 2023 Memgraph Ltd.
|
||||||
//
|
//
|
||||||
// Use of this software is governed by the Business Source License
|
// Use of this software is governed by the Business Source License
|
||||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||||
@ -10,7 +10,9 @@
|
|||||||
// licenses/APL.txt.
|
// licenses/APL.txt.
|
||||||
|
|
||||||
#include "storage/v2/durability/snapshot.hpp"
|
#include "storage/v2/durability/snapshot.hpp"
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#include "spdlog/spdlog.h"
|
||||||
#include "storage/v2/durability/exceptions.hpp"
|
#include "storage/v2/durability/exceptions.hpp"
|
||||||
#include "storage/v2/durability/paths.hpp"
|
#include "storage/v2/durability/paths.hpp"
|
||||||
#include "storage/v2/durability/serialization.hpp"
|
#include "storage/v2/durability/serialization.hpp"
|
||||||
@ -157,6 +159,100 @@ SnapshotInfo ReadSnapshotInfo(const std::filesystem::path &path) {
|
|||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// n is 0-indexed
|
||||||
|
uint64_t GetNthEdgeStartOffset(Decoder &snapshot, const uint64_t n) {
|
||||||
|
for (uint64_t i = 0; i < n; ++i) {
|
||||||
|
{
|
||||||
|
const auto marker = snapshot.ReadMarker();
|
||||||
|
if (!marker || *marker != Marker::SECTION_EDGE) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip edge.
|
||||||
|
auto gid = snapshot.ReadUint();
|
||||||
|
if (!gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
|
||||||
|
// Recover properties.
|
||||||
|
{
|
||||||
|
auto props_size = snapshot.ReadUint();
|
||||||
|
if (!props_size) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
for (uint64_t j = 0; j < *props_size; ++j) {
|
||||||
|
auto key = snapshot.ReadUint();
|
||||||
|
if (!key) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
auto value = snapshot.SkipPropertyValue();
|
||||||
|
if (!value) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const auto offset = snapshot.GetPosition();
|
||||||
|
MG_ASSERT(offset.has_value(), "Unexpected");
|
||||||
|
return *offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TFunc>
|
||||||
|
void LoadPartialEdges(const std::filesystem::path &path, utils::SkipList<Edge> &edges, const uint64_t from_position,
|
||||||
|
const uint64_t edges_count, const NameIdMapper &name_id_mapper, const Config::Items items,
|
||||||
|
TFunc get_property_from_id) {
|
||||||
|
Decoder snapshot;
|
||||||
|
snapshot.Initialize(path, kSnapshotMagic);
|
||||||
|
|
||||||
|
// Recover edges.
|
||||||
|
auto edge_acc = edges.access();
|
||||||
|
uint64_t last_edge_gid = 0;
|
||||||
|
spdlog::info("Recovering {} edges.", edges_count);
|
||||||
|
if (!snapshot.SetPosition(from_position)) throw RecoveryFailure("Couldn't read data from snapshot!");
|
||||||
|
for (uint64_t i = 0; i < edges_count; ++i) {
|
||||||
|
{
|
||||||
|
const auto marker = snapshot.ReadMarker();
|
||||||
|
if (!marker || *marker != Marker::SECTION_EDGE) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (items.properties_on_edges) {
|
||||||
|
// Insert edge.
|
||||||
|
auto gid = snapshot.ReadUint();
|
||||||
|
if (!gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
if (i > 0 && *gid <= last_edge_gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
last_edge_gid = *gid;
|
||||||
|
spdlog::debug("Recovering edge {} with properties.", *gid);
|
||||||
|
auto [it, inserted] = edge_acc.insert(Edge{Gid::FromUint(*gid), nullptr});
|
||||||
|
if (!inserted) throw RecoveryFailure("The edge must be inserted here!");
|
||||||
|
|
||||||
|
// Recover properties.
|
||||||
|
{
|
||||||
|
auto props_size = snapshot.ReadUint();
|
||||||
|
if (!props_size) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
auto &props = it->properties;
|
||||||
|
for (uint64_t j = 0; j < *props_size; ++j) {
|
||||||
|
auto key = snapshot.ReadUint();
|
||||||
|
if (!key) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
auto value = snapshot.ReadPropertyValue();
|
||||||
|
if (!value) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
SPDLOG_TRACE("Recovered property \"{}\" with value \"{}\" for edge {}.",
|
||||||
|
name_id_mapper.IdToName(snapshot_id_map.at(*key)), *value, *gid);
|
||||||
|
props.SetProperty(get_property_from_id(*key), *value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Read edge GID.
|
||||||
|
auto gid = snapshot.ReadUint();
|
||||||
|
if (!gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
if (i > 0 && *gid <= last_edge_gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
last_edge_gid = *gid;
|
||||||
|
|
||||||
|
spdlog::debug("Ensuring edge {} doesn't have any properties.", *gid);
|
||||||
|
// Read properties.
|
||||||
|
{
|
||||||
|
auto props_size = snapshot.ReadUint();
|
||||||
|
if (!props_size) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
if (*props_size != 0)
|
||||||
|
throw RecoveryFailure(
|
||||||
|
"The snapshot has properties on edges, but the storage is "
|
||||||
|
"configured without properties on edges!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spdlog::info("Partial edges are recovered.");
|
||||||
|
}
|
||||||
|
|
||||||
RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipList<Vertex> *vertices,
|
RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipList<Vertex> *vertices,
|
||||||
utils::SkipList<Edge> *edges,
|
utils::SkipList<Edge> *edges,
|
||||||
std::deque<std::pair<std::string, uint64_t>> *epoch_history,
|
std::deque<std::pair<std::string, uint64_t>> *epoch_history,
|
||||||
@ -226,65 +322,29 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis
|
|||||||
// Reset current edge count.
|
// Reset current edge count.
|
||||||
edge_count->store(0, std::memory_order_release);
|
edge_count->store(0, std::memory_order_release);
|
||||||
|
|
||||||
|
spdlog::info("Recovering edges.");
|
||||||
{
|
{
|
||||||
// Recover edges.
|
// Recover edges.
|
||||||
auto edge_acc = edges->access();
|
auto edge_acc = edges->access();
|
||||||
uint64_t last_edge_gid = 0;
|
uint64_t last_edge_gid = 0;
|
||||||
if (snapshot_has_edges) {
|
if (snapshot_has_edges) {
|
||||||
spdlog::info("Recovering {} edges.", info.edges_count);
|
|
||||||
if (!snapshot.SetPosition(info.offset_edges)) throw RecoveryFailure("Couldn't read data from snapshot!");
|
if (!snapshot.SetPosition(info.offset_edges)) throw RecoveryFailure("Couldn't read data from snapshot!");
|
||||||
for (uint64_t i = 0; i < info.edges_count; ++i) {
|
std::vector<uint64_t> offsets;
|
||||||
{
|
offsets.push_back(info.offset_edges);
|
||||||
const auto marker = snapshot.ReadMarker();
|
constexpr auto kEdgeChunkCount = 8;
|
||||||
if (!marker || *marker != Marker::SECTION_EDGE) throw RecoveryFailure("Invalid snapshot data!");
|
const auto edge_chunk_size = static_cast<uint64_t>(info.edges_count / kEdgeChunkCount);
|
||||||
}
|
while (offsets.size() < kEdgeChunkCount) {
|
||||||
|
offsets.push_back(GetNthEdgeStartOffset(snapshot, edge_chunk_size));
|
||||||
if (items.properties_on_edges) {
|
}
|
||||||
// Insert edge.
|
std::vector<std::jthread> threads;
|
||||||
auto gid = snapshot.ReadUint();
|
threads.reserve(offsets.size());
|
||||||
if (!gid) throw RecoveryFailure("Invalid snapshot data!");
|
for (const auto offset : offsets) {
|
||||||
if (i > 0 && *gid <= last_edge_gid) throw RecoveryFailure("Invalid snapshot data!");
|
threads.emplace_back([path, edges, offset, edge_chunk_size, name_id_mapper, items, &get_property_from_id] {
|
||||||
last_edge_gid = *gid;
|
LoadPartialEdges(path, *edges, offset, edge_chunk_size, *name_id_mapper, items, get_property_from_id);
|
||||||
spdlog::debug("Recovering edge {} with properties.", *gid);
|
});
|
||||||
auto [it, inserted] = edge_acc.insert(Edge{Gid::FromUint(*gid), nullptr});
|
|
||||||
if (!inserted) throw RecoveryFailure("The edge must be inserted here!");
|
|
||||||
|
|
||||||
// Recover properties.
|
|
||||||
{
|
|
||||||
auto props_size = snapshot.ReadUint();
|
|
||||||
if (!props_size) throw RecoveryFailure("Invalid snapshot data!");
|
|
||||||
auto &props = it->properties;
|
|
||||||
for (uint64_t j = 0; j < *props_size; ++j) {
|
|
||||||
auto key = snapshot.ReadUint();
|
|
||||||
if (!key) throw RecoveryFailure("Invalid snapshot data!");
|
|
||||||
auto value = snapshot.ReadPropertyValue();
|
|
||||||
if (!value) throw RecoveryFailure("Invalid snapshot data!");
|
|
||||||
SPDLOG_TRACE("Recovered property \"{}\" with value \"{}\" for edge {}.",
|
|
||||||
name_id_mapper->IdToName(snapshot_id_map.at(*key)), *value, *gid);
|
|
||||||
props.SetProperty(get_property_from_id(*key), *value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Read edge GID.
|
|
||||||
auto gid = snapshot.ReadUint();
|
|
||||||
if (!gid) throw RecoveryFailure("Invalid snapshot data!");
|
|
||||||
if (i > 0 && *gid <= last_edge_gid) throw RecoveryFailure("Invalid snapshot data!");
|
|
||||||
last_edge_gid = *gid;
|
|
||||||
|
|
||||||
spdlog::debug("Ensuring edge {} doesn't have any properties.", *gid);
|
|
||||||
// Read properties.
|
|
||||||
{
|
|
||||||
auto props_size = snapshot.ReadUint();
|
|
||||||
if (!props_size) throw RecoveryFailure("Invalid snapshot data!");
|
|
||||||
if (*props_size != 0)
|
|
||||||
throw RecoveryFailure(
|
|
||||||
"The snapshot has properties on edges, but the storage is "
|
|
||||||
"configured without properties on edges!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
spdlog::info("Edges are recovered.");
|
|
||||||
}
|
}
|
||||||
|
spdlog::info("Edges are recovered.");
|
||||||
|
|
||||||
// Recover vertices (labels and properties).
|
// Recover vertices (labels and properties).
|
||||||
if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!");
|
if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!");
|
||||||
@ -369,6 +429,68 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis
|
|||||||
}
|
}
|
||||||
spdlog::info("Vertices are recovered.");
|
spdlog::info("Vertices are recovered.");
|
||||||
|
|
||||||
|
// Recover vertices (labels and properties).
|
||||||
|
if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!");
|
||||||
|
for (uint64_t i = 0; i < info.vertices_count; ++i) {
|
||||||
|
{
|
||||||
|
auto marker = snapshot.ReadMarker();
|
||||||
|
if (!marker || *marker != Marker::SECTION_VERTEX) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert vertex.
|
||||||
|
auto gid = snapshot.ReadUint();
|
||||||
|
if (!gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
|
||||||
|
// Skip labels.
|
||||||
|
{
|
||||||
|
auto labels_size = snapshot.ReadUint();
|
||||||
|
if (!labels_size) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
for (uint64_t j = 0; j < *labels_size; ++j) {
|
||||||
|
auto label = snapshot.ReadUint();
|
||||||
|
if (!label) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip properties.
|
||||||
|
{
|
||||||
|
auto props_size = snapshot.ReadUint();
|
||||||
|
if (!props_size) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
for (uint64_t j = 0; j < *props_size; ++j) {
|
||||||
|
auto key = snapshot.ReadUint();
|
||||||
|
if (!key) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
auto value = snapshot.SkipPropertyValue();
|
||||||
|
if (!value) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip in edges.
|
||||||
|
{
|
||||||
|
auto in_size = snapshot.ReadUint();
|
||||||
|
if (!in_size) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
for (uint64_t j = 0; j < *in_size; ++j) {
|
||||||
|
auto edge_gid = snapshot.ReadUint();
|
||||||
|
if (!edge_gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
auto from_gid = snapshot.ReadUint();
|
||||||
|
if (!from_gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
auto edge_type = snapshot.ReadUint();
|
||||||
|
if (!edge_type) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip out edges.
|
||||||
|
auto out_size = snapshot.ReadUint();
|
||||||
|
if (!out_size) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
for (uint64_t j = 0; j < *out_size; ++j) {
|
||||||
|
auto edge_gid = snapshot.ReadUint();
|
||||||
|
if (!edge_gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
auto to_gid = snapshot.ReadUint();
|
||||||
|
if (!to_gid) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
auto edge_type = snapshot.ReadUint();
|
||||||
|
if (!edge_type) throw RecoveryFailure("Invalid snapshot data!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spdlog::info("Vertices are recovered twice.");
|
||||||
|
|
||||||
// Recover vertices (in/out edges).
|
// Recover vertices (in/out edges).
|
||||||
spdlog::info("Recovering connectivity.");
|
spdlog::info("Recovering connectivity.");
|
||||||
if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!");
|
if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!");
|
||||||
|
Loading…
Reference in New Issue
Block a user