Add metadata flag + runtime check during id scan

This commit is contained in:
gvolfing 2024-03-15 13:56:43 +01:00
parent 96651170a0
commit 4f6d8c10ec
10 changed files with 89 additions and 17 deletions

View File

@ -119,6 +119,10 @@ modifications:
value: "false"
override: true
- name: "storage_enable_edges_metadata"
value: "false"
override: true
- name: "query_callable_mappings_path"
value: "/etc/memgraph/apoc_compatibility_mappings.json"
override: true

View File

@ -131,6 +131,11 @@ DEFINE_uint64(storage_recovery_thread_count,
DEFINE_bool(storage_enable_schema_metadata, false,
"Controls whether metadata should be collected about the resident labels and edge types.");
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_bool(storage_enable_edges_metadata, false,
"Controls whether additional metadata should be stored about the edges in order to do faster traversals on "
"certain queries.");
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DEFINE_bool(telemetry_enabled, false,
"Set to true to enable telemetry. We collect information about the "

View File

@ -84,6 +84,8 @@ DECLARE_bool(storage_parallel_schema_recovery);
DECLARE_uint64(storage_recovery_thread_count);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_bool(storage_enable_schema_metadata);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_bool(storage_enable_edges_metadata);
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
DECLARE_bool(telemetry_enabled);

View File

@ -332,6 +332,7 @@ int main(int argc, char **argv) {
.durability_directory = FLAGS_data_directory + "/rocksdb_durability",
.wal_directory = FLAGS_data_directory + "/rocksdb_wal"},
.salient.items = {.properties_on_edges = FLAGS_storage_properties_on_edges,
.enable_edges_metadata = FLAGS_storage_enable_edges_metadata,
.enable_schema_metadata = FLAGS_storage_enable_schema_metadata},
.salient.storage_mode = memgraph::flags::ParseStorageMode()};
spdlog::info("config recover on startup {}, flags {} {}", db_config.durability.recover_on_startup,

View File

@ -716,8 +716,7 @@ int main(int argc, char *argv[]) {
.recover_on_startup = false,
.snapshot_wal_mode = memgraph::storage::Config::Durability::SnapshotWalMode::DISABLED,
.snapshot_on_exit = true},
.salient = {.items = {.properties_on_edges = FLAGS_storage_properties_on_edges}},
};
.salient = {.items = {.properties_on_edges = FLAGS_storage_properties_on_edges}}};
memgraph::replication::ReplicationState repl_state{memgraph::storage::ReplicationStateRootPath(config)};
auto store = memgraph::dbms::CreateInMemoryStorage(config, repl_state);

View File

@ -36,6 +36,7 @@ struct SalientConfig {
StorageMode storage_mode{StorageMode::IN_MEMORY_TRANSACTIONAL};
struct Items {
bool properties_on_edges{true};
bool enable_edges_metadata{false};
bool enable_schema_metadata{false};
friend bool operator==(const Items &lrh, const Items &rhs) = default;
} items;
@ -45,11 +46,13 @@ struct SalientConfig {
inline void to_json(nlohmann::json &data, SalientConfig::Items const &items) {
data = nlohmann::json{{"properties_on_edges", items.properties_on_edges},
{"enable_edges_metadata", items.enable_edges_metadata},
{"enable_schema_metadata", items.enable_schema_metadata}};
}
inline void from_json(const nlohmann::json &data, SalientConfig::Items &items) {
data.at("properties_on_edges").get_to(items.properties_on_edges);
data.at("enable_edges_metadata").get_to(items.enable_edges_metadata);
data.at("enable_schema_metadata").get_to(items.enable_schema_metadata);
}

View File

@ -1,4 +1,4 @@
// Copyright 2023 Memgraph Ltd.
// Copyright 2024 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -49,4 +49,18 @@ inline bool operator<(const Edge &first, const Edge &second) { return first.gid
inline bool operator==(const Edge &first, const Gid &second) { return first.gid == second; }
inline bool operator<(const Edge &first, const Gid &second) { return first.gid < second; }
struct EdgeMetadata {
EdgeMetadata(Gid gid, Vertex *from_vertex) : gid(gid), from_vertex(from_vertex) {}
Gid gid;
Vertex *from_vertex;
};
static_assert(alignof(Edge) >= 8, "The Edge should be aligned to at least 8!");
inline bool operator==(const EdgeMetadata &first, const EdgeMetadata &second) { return first.gid == second.gid; }
inline bool operator<(const EdgeMetadata &first, const EdgeMetadata &second) { return first.gid < second.gid; }
inline bool operator==(const EdgeMetadata &first, const Gid &second) { return first.gid == second; }
inline bool operator<(const EdgeMetadata &first, const Gid &second) { return first.gid < second; }
} // namespace memgraph::storage

View File

@ -339,6 +339,11 @@ Result<EdgeAccessor> InMemoryStorage::InMemoryAccessor::CreateEdge(VertexAccesso
if (delta) {
delta->prev.Set(&*it);
}
if (config_.enable_edges_metadata) {
auto acc = mem_storage->edges_metadata_.access();
auto [_, inserted] = acc.insert(EdgeMetadata(gid, from->vertex_));
MG_ASSERT(inserted, "The edge must be inserted here!");
}
}
utils::AtomicMemoryBlock atomic_memory_block{
[this, edge, from_vertex = from_vertex, edge_type = edge_type, to_vertex = to_vertex]() {
@ -441,6 +446,11 @@ Result<EdgeAccessor> InMemoryStorage::InMemoryAccessor::CreateEdgeEx(VertexAcces
if (delta) {
delta->prev.Set(&*it);
}
if (config_.enable_edges_metadata) {
auto acc = mem_storage->edges_metadata_.access();
auto [_, inserted] = acc.insert(EdgeMetadata(gid, from->vertex_));
MG_ASSERT(inserted, "The edge must be inserted here!");
}
}
utils::AtomicMemoryBlock atomic_memory_block{
[this, edge, from_vertex = from_vertex, edge_type = edge_type, to_vertex = to_vertex]() {
@ -979,8 +989,10 @@ void InMemoryStorage::InMemoryAccessor::FastDiscardOfDeltas(uint64_t oldest_acti
if (!current_deleted_edges.empty()) {
// 3.c) remove from edge skip_list
auto edge_acc = mem_storage->edges_.access();
auto edge_metadata_acc = mem_storage->edges_metadata_.access();
for (auto gid : current_deleted_edges) {
edge_acc.remove(gid);
edge_metadata_acc.remove(gid);
}
}
}
@ -1222,8 +1234,10 @@ void InMemoryStorage::InMemoryAccessor::Abort() {
// EDGES
{
auto edges_acc = mem_storage->edges_.access();
auto edges_metadata_acc = mem_storage->edges_metadata_.access();
for (auto gid : my_deleted_edges) {
edges_acc.remove(gid);
edges_metadata_acc.remove(gid);
}
}
}
@ -1434,25 +1448,45 @@ std::optional<EdgeAccessor> InMemoryStorage::InMemoryAccessor::FindEdge(Gid gid,
if (edge_it == edge_acc.end()) {
return std::nullopt;
}
auto *edge_ptr = &(*edge_it);
auto vertices_acc = mem_storage->vertices_.access();
// TODO replace this logic once we have a proper edge struct in place.
// This should be only temporary, currently we have to do this whole
// lookup through all the vertices, since the edge struct only has a
// pointer to it's GID, it has no information whatsoever about the from
// and to vertices.
auto acc = mem_storage->vertices_.access();
auto maybe_edge_info = std::invoke([&]() -> std::optional<std::tuple<EdgeRef, EdgeTypeId, Vertex *, Vertex *>> {
for (auto &vertex : acc) {
for (auto &edge : vertex.out_edges) {
if (std::get<2>(edge).ptr == edge_ptr) {
return std::make_tuple(std::get<2>(edge), std::get<0>(edge), &vertex, std::get<1>(edge));
}
auto extract_edge_info =
[&](Vertex *from_vertex) -> std::optional<std::tuple<EdgeRef, EdgeTypeId, Vertex *, Vertex *>> {
for (auto &out_edge : from_vertex->out_edges) {
if (std::get<2>(out_edge).ptr == edge_ptr) {
return std::make_tuple(std::get<2>(out_edge), std::get<0>(out_edge), from_vertex, std::get<1>(out_edge));
}
}
return std::nullopt;
});
};
if (mem_storage->config_.salient.items.enable_edges_metadata) {
auto edge_metadata_acc = mem_storage->edges_metadata_.access();
auto edge_metadata_it = edge_metadata_acc.find(gid);
MG_ASSERT(edge_metadata_it != edge_metadata_acc.end(), "Invalid database state!");
auto maybe_edge_info = extract_edge_info(edge_metadata_it->from_vertex);
if (!maybe_edge_info) {
return std::nullopt;
}
auto &edge_info = *maybe_edge_info;
return EdgeAccessor::Create(std::get<0>(edge_info), std::get<1>(edge_info), std::get<2>(edge_info),
std::get<3>(edge_info), storage_, &transaction_);
}
// If metadata on edges is not enables we will have to do
// a full scan.
std::optional<std::tuple<EdgeRef, EdgeTypeId, Vertex *, Vertex *>> maybe_edge_info;
for (auto &from_vertex : vertices_acc) {
maybe_edge_info = extract_edge_info(&from_vertex);
if (maybe_edge_info) {
break;
}
}
if (!maybe_edge_info) {
return std::nullopt;
@ -1812,8 +1846,10 @@ void InMemoryStorage::CollectGarbage(std::unique_lock<utils::ResourceLock> main_
}
{
auto edge_acc = edges_.access();
auto edge_metadata_acc = edges_metadata_.access();
for (auto edge : current_deleted_edges) {
MG_ASSERT(edge_acc.remove(edge), "Invalid database state!");
MG_ASSERT(edge_metadata_acc.remove(edge), "Invalid database state!");
}
}
@ -1835,10 +1871,12 @@ void InMemoryStorage::CollectGarbage(std::unique_lock<utils::ResourceLock> main_
// EXPENSIVE full scan, is only run if an IN_MEMORY_ANALYTICAL transaction involved any deletions
if (need_full_scan_edges) {
auto edge_acc = edges_.access();
auto edge_metadata_acc = edges_metadata_.access();
for (auto &edge : edge_acc) {
// a deleted edge which as no deltas must have come from IN_MEMORY_ANALYTICAL deletion
if (edge.delta == nullptr && edge.deleted) {
edge_acc.remove(edge);
edge_metadata_acc.remove(edge.gid);
}
}
}

View File

@ -425,6 +425,7 @@ class InMemoryStorage final : public Storage {
// Main object storage
utils::SkipList<storage::Vertex> vertices_;
utils::SkipList<storage::Edge> edges_;
utils::SkipList<storage::EdgeMetadata> edges_metadata_;
// Durability
durability::Recovery recovery_;

View File

@ -124,6 +124,11 @@ startup_config_dict = {
"false",
"Controls whether metadata should be collected about the resident labels and edge types.",
),
"storage_enable_edges_metadata": (
"false",
"false",
"Controls whether additional metadata should be stored about the edges in order to do faster traversals on certain queries.",
),
"password_encryption_algorithm": ("bcrypt", "bcrypt", "The password encryption algorithm used for authentication."),
"pulsar_service_url": ("", "", "Default URL used while connecting to Pulsar brokers."),
"query_execution_timeout_sec": (