diff --git a/config/flags.yaml b/config/flags.yaml index b551f90e4..45d833e70 100644 --- a/config/flags.yaml +++ b/config/flags.yaml @@ -119,6 +119,10 @@ modifications: value: "false" override: true + - name: "storage_enable_edges_metadata" + value: "false" + override: true + - name: "query_callable_mappings_path" value: "/etc/memgraph/apoc_compatibility_mappings.json" override: true diff --git a/src/flags/general.cpp b/src/flags/general.cpp index cd2c95c60..c36262ee6 100644 --- a/src/flags/general.cpp +++ b/src/flags/general.cpp @@ -131,6 +131,11 @@ DEFINE_uint64(storage_recovery_thread_count, DEFINE_bool(storage_enable_schema_metadata, false, "Controls whether metadata should be collected about the resident labels and edge types."); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_bool(storage_enable_edges_metadata, false, + "Controls whether additional metadata should be stored about the edges in order to do faster traversals on " + "certain queries."); + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_bool(telemetry_enabled, false, "Set to true to enable telemetry. We collect information about the " diff --git a/src/flags/general.hpp b/src/flags/general.hpp index a1e8729ab..56c9802a9 100644 --- a/src/flags/general.hpp +++ b/src/flags/general.hpp @@ -84,6 +84,8 @@ DECLARE_bool(storage_parallel_schema_recovery); DECLARE_uint64(storage_recovery_thread_count); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_bool(storage_enable_schema_metadata); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_bool(storage_enable_edges_metadata); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_bool(telemetry_enabled); diff --git a/src/memgraph.cpp b/src/memgraph.cpp index d896bcc4c..3b58b964d 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -332,6 +332,7 @@ int main(int argc, char **argv) { .durability_directory = FLAGS_data_directory + "/rocksdb_durability", .wal_directory = FLAGS_data_directory + "/rocksdb_wal"}, .salient.items = {.properties_on_edges = FLAGS_storage_properties_on_edges, + .enable_edges_metadata = FLAGS_storage_enable_edges_metadata, .enable_schema_metadata = FLAGS_storage_enable_schema_metadata}, .salient.storage_mode = memgraph::flags::ParseStorageMode()}; spdlog::info("config recover on startup {}, flags {} {}", db_config.durability.recover_on_startup, diff --git a/src/mg_import_csv.cpp b/src/mg_import_csv.cpp index 75a7c3bbb..ed5873545 100644 --- a/src/mg_import_csv.cpp +++ b/src/mg_import_csv.cpp @@ -716,8 +716,7 @@ int main(int argc, char *argv[]) { .recover_on_startup = false, .snapshot_wal_mode = memgraph::storage::Config::Durability::SnapshotWalMode::DISABLED, .snapshot_on_exit = true}, - .salient = {.items = {.properties_on_edges = FLAGS_storage_properties_on_edges}}, - }; + .salient = {.items = {.properties_on_edges = FLAGS_storage_properties_on_edges}}}; memgraph::replication::ReplicationState repl_state{memgraph::storage::ReplicationStateRootPath(config)}; auto store = memgraph::dbms::CreateInMemoryStorage(config, repl_state); diff --git a/src/storage/v2/config.hpp b/src/storage/v2/config.hpp index b2a55a40a..bcad34bca 100644 --- a/src/storage/v2/config.hpp +++ b/src/storage/v2/config.hpp @@ -36,6 +36,7 @@ struct SalientConfig { StorageMode storage_mode{StorageMode::IN_MEMORY_TRANSACTIONAL}; struct Items { bool properties_on_edges{true}; + bool enable_edges_metadata{false}; bool enable_schema_metadata{false}; friend bool operator==(const Items &lrh, const Items &rhs) = default; } items; @@ -45,11 +46,13 @@ struct SalientConfig { inline void to_json(nlohmann::json &data, SalientConfig::Items const &items) { data = nlohmann::json{{"properties_on_edges", items.properties_on_edges}, + {"enable_edges_metadata", items.enable_edges_metadata}, {"enable_schema_metadata", items.enable_schema_metadata}}; } inline void from_json(const nlohmann::json &data, SalientConfig::Items &items) { data.at("properties_on_edges").get_to(items.properties_on_edges); + data.at("enable_edges_metadata").get_to(items.enable_edges_metadata); data.at("enable_schema_metadata").get_to(items.enable_schema_metadata); } diff --git a/src/storage/v2/edge.hpp b/src/storage/v2/edge.hpp index bdb224dfb..2eb91d94c 100644 --- a/src/storage/v2/edge.hpp +++ b/src/storage/v2/edge.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -49,4 +49,18 @@ inline bool operator<(const Edge &first, const Edge &second) { return first.gid inline bool operator==(const Edge &first, const Gid &second) { return first.gid == second; } inline bool operator<(const Edge &first, const Gid &second) { return first.gid < second; } +struct EdgeMetadata { + EdgeMetadata(Gid gid, Vertex *from_vertex) : gid(gid), from_vertex(from_vertex) {} + + Gid gid; + Vertex *from_vertex; +}; + +static_assert(alignof(Edge) >= 8, "The Edge should be aligned to at least 8!"); + +inline bool operator==(const EdgeMetadata &first, const EdgeMetadata &second) { return first.gid == second.gid; } +inline bool operator<(const EdgeMetadata &first, const EdgeMetadata &second) { return first.gid < second.gid; } +inline bool operator==(const EdgeMetadata &first, const Gid &second) { return first.gid == second; } +inline bool operator<(const EdgeMetadata &first, const Gid &second) { return first.gid < second; } + } // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index 129960261..7a615bf60 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -339,6 +339,11 @@ Result InMemoryStorage::InMemoryAccessor::CreateEdge(VertexAccesso if (delta) { delta->prev.Set(&*it); } + if (config_.enable_edges_metadata) { + auto acc = mem_storage->edges_metadata_.access(); + auto [_, inserted] = acc.insert(EdgeMetadata(gid, from->vertex_)); + MG_ASSERT(inserted, "The edge must be inserted here!"); + } } utils::AtomicMemoryBlock atomic_memory_block{ [this, edge, from_vertex = from_vertex, edge_type = edge_type, to_vertex = to_vertex]() { @@ -441,6 +446,11 @@ Result InMemoryStorage::InMemoryAccessor::CreateEdgeEx(VertexAcces if (delta) { delta->prev.Set(&*it); } + if (config_.enable_edges_metadata) { + auto acc = mem_storage->edges_metadata_.access(); + auto [_, inserted] = acc.insert(EdgeMetadata(gid, from->vertex_)); + MG_ASSERT(inserted, "The edge must be inserted here!"); + } } utils::AtomicMemoryBlock atomic_memory_block{ [this, edge, from_vertex = from_vertex, edge_type = edge_type, to_vertex = to_vertex]() { @@ -979,8 +989,10 @@ void InMemoryStorage::InMemoryAccessor::FastDiscardOfDeltas(uint64_t oldest_acti if (!current_deleted_edges.empty()) { // 3.c) remove from edge skip_list auto edge_acc = mem_storage->edges_.access(); + auto edge_metadata_acc = mem_storage->edges_metadata_.access(); for (auto gid : current_deleted_edges) { edge_acc.remove(gid); + edge_metadata_acc.remove(gid); } } } @@ -1222,8 +1234,10 @@ void InMemoryStorage::InMemoryAccessor::Abort() { // EDGES { auto edges_acc = mem_storage->edges_.access(); + auto edges_metadata_acc = mem_storage->edges_metadata_.access(); for (auto gid : my_deleted_edges) { edges_acc.remove(gid); + edges_metadata_acc.remove(gid); } } } @@ -1434,25 +1448,45 @@ std::optional InMemoryStorage::InMemoryAccessor::FindEdge(Gid gid, if (edge_it == edge_acc.end()) { return std::nullopt; } + auto *edge_ptr = &(*edge_it); + auto vertices_acc = mem_storage->vertices_.access(); - // TODO replace this logic once we have a proper edge struct in place. - // This should be only temporary, currently we have to do this whole - // lookup through all the vertices, since the edge struct only has a - // pointer to it's GID, it has no information whatsoever about the from - // and to vertices. - auto acc = mem_storage->vertices_.access(); - - auto maybe_edge_info = std::invoke([&]() -> std::optional> { - for (auto &vertex : acc) { - for (auto &edge : vertex.out_edges) { - if (std::get<2>(edge).ptr == edge_ptr) { - return std::make_tuple(std::get<2>(edge), std::get<0>(edge), &vertex, std::get<1>(edge)); - } + auto extract_edge_info = + [&](Vertex *from_vertex) -> std::optional> { + for (auto &out_edge : from_vertex->out_edges) { + if (std::get<2>(out_edge).ptr == edge_ptr) { + return std::make_tuple(std::get<2>(out_edge), std::get<0>(out_edge), from_vertex, std::get<1>(out_edge)); } } return std::nullopt; - }); + }; + + if (mem_storage->config_.salient.items.enable_edges_metadata) { + auto edge_metadata_acc = mem_storage->edges_metadata_.access(); + auto edge_metadata_it = edge_metadata_acc.find(gid); + MG_ASSERT(edge_metadata_it != edge_metadata_acc.end(), "Invalid database state!"); + + auto maybe_edge_info = extract_edge_info(edge_metadata_it->from_vertex); + + if (!maybe_edge_info) { + return std::nullopt; + } + + auto &edge_info = *maybe_edge_info; + return EdgeAccessor::Create(std::get<0>(edge_info), std::get<1>(edge_info), std::get<2>(edge_info), + std::get<3>(edge_info), storage_, &transaction_); + } + + // If metadata on edges is not enables we will have to do + // a full scan. + std::optional> maybe_edge_info; + for (auto &from_vertex : vertices_acc) { + maybe_edge_info = extract_edge_info(&from_vertex); + if (maybe_edge_info) { + break; + } + } if (!maybe_edge_info) { return std::nullopt; @@ -1812,8 +1846,10 @@ void InMemoryStorage::CollectGarbage(std::unique_lock main_ } { auto edge_acc = edges_.access(); + auto edge_metadata_acc = edges_metadata_.access(); for (auto edge : current_deleted_edges) { MG_ASSERT(edge_acc.remove(edge), "Invalid database state!"); + MG_ASSERT(edge_metadata_acc.remove(edge), "Invalid database state!"); } } @@ -1835,10 +1871,12 @@ void InMemoryStorage::CollectGarbage(std::unique_lock main_ // EXPENSIVE full scan, is only run if an IN_MEMORY_ANALYTICAL transaction involved any deletions if (need_full_scan_edges) { auto edge_acc = edges_.access(); + auto edge_metadata_acc = edges_metadata_.access(); for (auto &edge : edge_acc) { // a deleted edge which as no deltas must have come from IN_MEMORY_ANALYTICAL deletion if (edge.delta == nullptr && edge.deleted) { edge_acc.remove(edge); + edge_metadata_acc.remove(edge.gid); } } } diff --git a/src/storage/v2/inmemory/storage.hpp b/src/storage/v2/inmemory/storage.hpp index d1b2a9e58..d6bcb4ba1 100644 --- a/src/storage/v2/inmemory/storage.hpp +++ b/src/storage/v2/inmemory/storage.hpp @@ -425,6 +425,7 @@ class InMemoryStorage final : public Storage { // Main object storage utils::SkipList vertices_; utils::SkipList edges_; + utils::SkipList edges_metadata_; // Durability durability::Recovery recovery_; diff --git a/tests/e2e/configuration/default_config.py b/tests/e2e/configuration/default_config.py index 65a850f0b..4d12afae9 100644 --- a/tests/e2e/configuration/default_config.py +++ b/tests/e2e/configuration/default_config.py @@ -124,6 +124,11 @@ startup_config_dict = { "false", "Controls whether metadata should be collected about the resident labels and edge types.", ), + "storage_enable_edges_metadata": ( + "false", + "false", + "Controls whether additional metadata should be stored about the edges in order to do faster traversals on certain queries.", + ), "password_encryption_algorithm": ("bcrypt", "bcrypt", "The password encryption algorithm used for authentication."), "pulsar_service_url": ("", "", "Default URL used while connecting to Pulsar brokers."), "query_execution_timeout_sec": (