diff --git a/.github/workflows/package_all.yaml b/.github/workflows/package_all.yaml deleted file mode 100644 index f1831616a..000000000 --- a/.github/workflows/package_all.yaml +++ /dev/null @@ -1,263 +0,0 @@ -name: Package All - -# TODO(gitbuda): Cleanup docker container if GHA job was canceled. - -on: - workflow_dispatch: - inputs: - memgraph_version: - description: "Memgraph version to upload as. If empty upload is skipped. Format: 'X.Y.Z'" - required: false - build_type: - type: choice - description: "Memgraph Build type. Default value is Release." - default: 'Release' - options: - - Release - - RelWithDebInfo - -jobs: - centos-7: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package centos-7 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: centos-7 - path: build/output/centos-7/memgraph*.rpm - - centos-9: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package centos-9 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: centos-9 - path: build/output/centos-9/memgraph*.rpm - - debian-10: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package debian-10 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: debian-10 - path: build/output/debian-10/memgraph*.deb - - debian-11: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package debian-11 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: debian-11 - path: build/output/debian-11/memgraph*.deb - - docker: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - cd release/package - ./run.sh package debian-11 ${{ github.event.inputs.build_type }} --for-docker - ./run.sh docker - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: docker - path: build/output/docker/memgraph*.tar.gz - - ubuntu-1804: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package ubuntu-18.04 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: ubuntu-18.04 - path: build/output/ubuntu-18.04/memgraph*.deb - - ubuntu-2004: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package ubuntu-20.04 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: ubuntu-20.04 - path: build/output/ubuntu-20.04/memgraph*.deb - - ubuntu-2204: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package ubuntu-22.04 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: ubuntu-22.04 - path: build/output/ubuntu-22.04/memgraph*.deb - - debian-11-platform: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package debian-11 ${{ github.event.inputs.build_type }} --for-platform - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: debian-11-platform - path: build/output/debian-11/memgraph*.deb - - fedora-36: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package fedora-36 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: fedora-36 - path: build/output/fedora-36/memgraph*.rpm - - amzn-2: - runs-on: [self-hosted, DockerMgBuild, X64] - timeout-minutes: 60 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package amzn-2 ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: amzn-2 - path: build/output/amzn-2/memgraph*.rpm - - debian-11-arm: - runs-on: [self-hosted, DockerMgBuild, ARM64, strange] - timeout-minutes: 120 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package debian-11-arm ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: debian-11-aarch64 - path: build/output/debian-11-arm/memgraph*.deb - - ubuntu-2204-arm: - runs-on: [self-hosted, DockerMgBuild, ARM64, strange] - timeout-minutes: 120 - steps: - - name: "Set up repository" - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Required because of release/get_version.py - - name: "Build package" - run: | - ./release/package/run.sh package ubuntu-22.04-arm ${{ github.event.inputs.build_type }} - - name: "Upload package" - uses: actions/upload-artifact@v3 - with: - name: ubuntu-22.04-aarch64 - path: build/output/ubuntu-22.04-arm/memgraph*.deb - - upload-to-s3: - # only run upload if we specified version. Allows for runs without upload - if: "${{ github.event.inputs.memgraph_version != '' }}" - needs: [centos-7, centos-9, debian-10, debian-11, docker, ubuntu-1804, ubuntu-2004, ubuntu-2204, debian-11-platform, fedora-36, amzn-2, debian-11-arm, ubuntu-2204-arm] - runs-on: ubuntu-latest - steps: - - name: Download artifacts - uses: actions/download-artifact@v3 - with: - # name: # if name input parameter is not provided, all artifacts are downloaded - # and put in directories named after each one. - path: build/output/release - - name: Upload to S3 - uses: jakejarvis/s3-sync-action@v0.5.1 - env: - AWS_S3_BUCKET: "download.memgraph.com" - AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: "eu-west-1" - SOURCE_DIR: "build/output/release" - DEST_DIR: "memgraph/v${{ github.event.inputs.memgraph_version }}/" diff --git a/.github/workflows/package_specific.yaml b/.github/workflows/package_memgraph.yaml similarity index 87% rename from .github/workflows/package_specific.yaml rename to .github/workflows/package_memgraph.yaml index c599f65ef..48a61ca53 100644 --- a/.github/workflows/package_specific.yaml +++ b/.github/workflows/package_memgraph.yaml @@ -1,4 +1,4 @@ -name: Package Specific +name: Package memgraph # TODO(gitbuda): Cleanup docker container if GHA job was canceled. @@ -10,16 +10,17 @@ on: required: false build_type: type: choice - description: "Memgraph Build type. Default value is Release." + description: "Memgraph Build type. Default value is Release" default: 'Release' options: - Release - RelWithDebInfo target_os: type: choice - description: "Target OS for which memgraph will be packaged. Default is Ubuntu 22.04" + description: "Target OS for which memgraph will be packaged. Select 'all' if you want to package for every listed OS. Default is Ubuntu 22.04" default: 'ubuntu-22_04' options: + - all - amzn-2 - centos-7 - centos-9 @@ -36,7 +37,7 @@ on: jobs: amzn-2: - if: ${{ github.event.inputs.target_os == 'amzn-2' }} + if: ${{ github.event.inputs.target_os == 'amzn-2' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -54,7 +55,7 @@ jobs: path: build/output/amzn-2/memgraph*.rpm centos-7: - if: ${{ github.event.inputs.target_os == 'centos-7' }} + if: ${{ github.event.inputs.target_os == 'centos-7' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -72,7 +73,7 @@ jobs: path: build/output/centos-7/memgraph*.rpm centos-9: - if: ${{ github.event.inputs.target_os == 'centos-9' }} + if: ${{ github.event.inputs.target_os == 'centos-9' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -90,7 +91,7 @@ jobs: path: build/output/centos-9/memgraph*.rpm debian-10: - if: ${{ github.event.inputs.target_os == 'debian-10' }} + if: ${{ github.event.inputs.target_os == 'debian-10' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -108,7 +109,7 @@ jobs: path: build/output/debian-10/memgraph*.deb debian-11: - if: ${{ github.event.inputs.target_os == 'debian-11' }} + if: ${{ github.event.inputs.target_os == 'debian-11' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -126,7 +127,7 @@ jobs: path: build/output/debian-11/memgraph*.deb debian-11-arm: - if: ${{ github.event.inputs.target_os == 'debian-11-arm' }} + if: ${{ github.event.inputs.target_os == 'debian-11-arm' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, ARM64, strange] timeout-minutes: 120 steps: @@ -144,7 +145,7 @@ jobs: path: build/output/debian-11-arm/memgraph*.deb debian-11-platform: - if: ${{ github.event.inputs.target_os == 'debian-11-platform' }} + if: ${{ github.event.inputs.target_os == 'debian-11-platform' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -162,7 +163,7 @@ jobs: path: build/output/debian-11/memgraph*.deb docker: - if: ${{ github.event.inputs.target_os == 'docker' }} + if: ${{ github.event.inputs.target_os == 'docker' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -182,7 +183,7 @@ jobs: path: build/output/docker/memgraph*.tar.gz fedora-36: - if: ${{ github.event.inputs.target_os == 'fedora-36' }} + if: ${{ github.event.inputs.target_os == 'fedora-36' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -200,7 +201,7 @@ jobs: path: build/output/fedora-36/memgraph*.rpm ubuntu-18_04: - if: ${{ github.event.inputs.target_os == 'ubuntu-18_04' }} + if: ${{ github.event.inputs.target_os == 'ubuntu-18_04' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -218,7 +219,7 @@ jobs: path: build/output/ubuntu-18.04/memgraph*.deb ubuntu-20_04: - if: ${{ github.event.inputs.target_os == 'ubuntu-20_04' }} + if: ${{ github.event.inputs.target_os == 'ubuntu-20_04' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -236,7 +237,7 @@ jobs: path: build/output/ubuntu-20.04/memgraph*.deb ubuntu-22_04: - if: ${{ github.event.inputs.target_os == 'ubuntu-22_04' }} + if: ${{ github.event.inputs.target_os == 'ubuntu-22_04' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, X64] timeout-minutes: 60 steps: @@ -254,7 +255,7 @@ jobs: path: build/output/ubuntu-22.04/memgraph*.deb ubuntu-22_04-arm: - if: ${{ github.event.inputs.target_os == 'ubuntu-22_04-arm' }} + if: ${{ github.event.inputs.target_os == 'ubuntu-22_04-arm' || github.event.inputs.target_os == 'all' }} runs-on: [self-hosted, DockerMgBuild, ARM64, strange] timeout-minutes: 120 steps: diff --git a/.github/workflows/release_debian10.yaml b/.github/workflows/release_debian10.yaml index b7f4adc7d..9a38e4cfb 100644 --- a/.github/workflows/release_debian10.yaml +++ b/.github/workflows/release_debian10.yaml @@ -178,7 +178,7 @@ jobs: release_build: name: "Release build" - runs-on: [self-hosted, Linux, X64, Debian10] + runs-on: [self-hosted, Linux, X64, Debian10, BigMemory] env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} diff --git a/.sonarcloud.properties b/.sonarcloud.properties new file mode 100644 index 000000000..2c53f52f2 --- /dev/null +++ b/.sonarcloud.properties @@ -0,0 +1,22 @@ +# Path to sources +sonar.sources = . +# sonar.exclusions= +sonar.inclusions=src,include,query_modules + +# Path to tests +sonar.tests = tests/ +# sonar.test.exclusions= +# sonar.test.inclusions= + +# Source encoding +# sonar.sourceEncoding= + +# Exclusions for copy-paste detection +# sonar.cpd.exclusions= + +# Python version (for python projects only) +# sonar.python.version= + +# C++ standard version (for C++ projects only) +# If not specified, it defaults to the latest supported standard +# sonar.cfamily.reportingCppStandardOverride=c++98|c++11|c++14|c++17|c++20 diff --git a/config/flags.yaml b/config/flags.yaml index 621d27345..b551f90e4 100644 --- a/config/flags.yaml +++ b/config/flags.yaml @@ -111,6 +111,14 @@ modifications: value: "false" override: true + - name: "storage_parallel_schema_recovery" + value: "false" + override: true + + - name: "storage_enable_schema_metadata" + value: "false" + override: true + - name: "query_callable_mappings_path" value: "/etc/memgraph/apoc_compatibility_mappings.json" override: true diff --git a/include/_mgp.hpp b/include/_mgp.hpp index 58685b440..4f6797739 100644 --- a/include/_mgp.hpp +++ b/include/_mgp.hpp @@ -234,8 +234,6 @@ inline mgp_type *type_duration() { return MgInvoke(mgp_type_duration inline mgp_type *type_nullable(mgp_type *type) { return MgInvoke(mgp_type_nullable, type); } -// mgp_graph - inline bool create_label_index(mgp_graph *graph, const char *label) { return MgInvoke(mgp_create_label_index, graph, label); } @@ -284,6 +282,10 @@ inline mgp_list *list_all_unique_constraints(mgp_graph *graph, mgp_memory *memor return MgInvoke(mgp_list_all_unique_constraints, graph, memory); } +// mgp_graph + +inline bool graph_is_transactional(mgp_graph *graph) { return MgInvoke(mgp_graph_is_transactional, graph); } + inline bool graph_is_mutable(mgp_graph *graph) { return MgInvoke(mgp_graph_is_mutable, graph); } inline mgp_vertex *graph_create_vertex(mgp_graph *graph, mgp_memory *memory) { @@ -376,6 +378,8 @@ inline mgp_list *list_copy(mgp_list *list, mgp_memory *memory) { inline void list_destroy(mgp_list *list) { mgp_list_destroy(list); } +inline bool list_contains_deleted(mgp_list *list) { return MgInvoke(mgp_list_contains_deleted, list); } + inline void list_append(mgp_list *list, mgp_value *val) { MgInvokeVoid(mgp_list_append, list, val); } inline void list_append_extend(mgp_list *list, mgp_value *val) { MgInvokeVoid(mgp_list_append_extend, list, val); } @@ -394,6 +398,8 @@ inline mgp_map *map_copy(mgp_map *map, mgp_memory *memory) { return MgInvoke(mgp_map_contains_deleted, map); } + inline void map_insert(mgp_map *map, const char *key, mgp_value *value) { MgInvokeVoid(mgp_map_insert, map, key, value); } @@ -442,6 +448,8 @@ inline mgp_vertex *vertex_copy(mgp_vertex *v, mgp_memory *memory) { inline void vertex_destroy(mgp_vertex *v) { mgp_vertex_destroy(v); } +inline bool vertex_is_deleted(mgp_vertex *v) { return MgInvoke(mgp_vertex_is_deleted, v); } + inline bool vertex_equal(mgp_vertex *v1, mgp_vertex *v2) { return MgInvoke(mgp_vertex_equal, v1, v2); } inline size_t vertex_labels_count(mgp_vertex *v) { return MgInvoke(mgp_vertex_labels_count, v); } @@ -494,6 +502,8 @@ inline mgp_edge *edge_copy(mgp_edge *e, mgp_memory *memory) { return MgInvoke(mgp_edge_is_deleted, e); } + inline bool edge_equal(mgp_edge *e1, mgp_edge *e2) { return MgInvoke(mgp_edge_equal, e1, e2); } inline mgp_edge_type edge_get_type(mgp_edge *e) { return MgInvoke(mgp_edge_get_type, e); } @@ -530,6 +540,8 @@ inline mgp_path *path_copy(mgp_path *path, mgp_memory *memory) { inline void path_destroy(mgp_path *path) { mgp_path_destroy(path); } +inline bool path_contains_deleted(mgp_path *path) { return MgInvoke(mgp_path_contains_deleted, path); } + inline void path_expand(mgp_path *path, mgp_edge *edge) { MgInvokeVoid(mgp_path_expand, path, edge); } inline void path_pop(mgp_path *path) { MgInvokeVoid(mgp_path_pop, path); } diff --git a/include/mg_procedure.h b/include/mg_procedure.h index 857c5f4dd..93ef241d8 100644 --- a/include/mg_procedure.h +++ b/include/mg_procedure.h @@ -429,6 +429,9 @@ enum mgp_error mgp_list_copy(struct mgp_list *list, struct mgp_memory *memory, s /// Free the memory used by the given mgp_list and contained elements. void mgp_list_destroy(struct mgp_list *list); +/// Return whether the given mgp_list contains any deleted values. +enum mgp_error mgp_list_contains_deleted(struct mgp_list *list, int *result); + /// Append a copy of mgp_value to mgp_list if capacity allows. /// The list copies the given value and therefore does not take ownership of the /// original value. You still need to call mgp_value_destroy to free the @@ -469,6 +472,9 @@ enum mgp_error mgp_map_copy(struct mgp_map *map, struct mgp_memory *memory, stru /// Free the memory used by the given mgp_map and contained items. void mgp_map_destroy(struct mgp_map *map); +/// Return whether the given mgp_map contains any deleted values. +enum mgp_error mgp_map_contains_deleted(struct mgp_map *map, int *result); + /// Insert a new mapping from a NULL terminated character string to a value. /// If a mapping with the same key already exists, it is *not* replaced. /// In case of insertion, both the string and the value are copied into the map. @@ -552,6 +558,9 @@ enum mgp_error mgp_path_copy(struct mgp_path *path, struct mgp_memory *memory, s /// Free the memory used by the given mgp_path and contained vertices and edges. void mgp_path_destroy(struct mgp_path *path); +/// Return whether the given mgp_path contains any deleted values. +enum mgp_error mgp_path_contains_deleted(struct mgp_path *path, int *result); + /// Append an edge continuing from the last vertex on the path. /// The edge is copied into the path. Therefore, the path does not take /// ownership of the original edge, so you still need to free the edge memory @@ -725,6 +734,9 @@ enum mgp_error mgp_vertex_copy(struct mgp_vertex *v, struct mgp_memory *memory, /// Free the memory used by a mgp_vertex. void mgp_vertex_destroy(struct mgp_vertex *v); +/// Return whether the given mgp_vertex is deleted. +enum mgp_error mgp_vertex_is_deleted(struct mgp_vertex *v, int *result); + /// Result is non-zero if given vertices are equal, otherwise 0. enum mgp_error mgp_vertex_equal(struct mgp_vertex *v1, struct mgp_vertex *v2, int *result); @@ -819,6 +831,9 @@ enum mgp_error mgp_edge_copy(struct mgp_edge *e, struct mgp_memory *memory, stru /// Free the memory used by a mgp_edge. void mgp_edge_destroy(struct mgp_edge *e); +/// Return whether the given mgp_edge is deleted. +enum mgp_error mgp_edge_is_deleted(struct mgp_edge *e, int *result); + /// Result is non-zero if given edges are equal, otherwise 0. enum mgp_error mgp_edge_equal(struct mgp_edge *e1, struct mgp_edge *e2, int *result); @@ -941,6 +956,12 @@ enum mgp_error mgp_list_all_unique_constraints(struct mgp_graph *graph, struct m /// Current implementation always returns without errors. enum mgp_error mgp_graph_is_mutable(struct mgp_graph *graph, int *result); +/// Result is non-zero if the graph is in transactional storage mode. +/// If a graph is not in transactional mode (i.e. analytical mode), then vertices and edges can be missing +/// because changes from other transactions are visible. +/// Current implementation always returns without errors. +enum mgp_error mgp_graph_is_transactional(struct mgp_graph *graph, int *result); + /// Add a new vertex to the graph. /// Resulting vertex must be freed using mgp_vertex_destroy. /// Return mgp_error::MGP_ERROR_IMMUTABLE_OBJECT if `graph` is immutable. diff --git a/include/mgp.hpp b/include/mgp.hpp index 6296d2e5c..3f7ed591e 100644 --- a/include/mgp.hpp +++ b/include/mgp.hpp @@ -246,6 +246,8 @@ class Graph { /// @brief Returns whether the graph is mutable. bool IsMutable() const; + /// @brief Returns whether the graph is in a transactional storage mode. + bool IsTransactional() const; /// @brief Creates a node and adds it to the graph. Node CreateNode(); /// @brief Deletes a node from the graph. @@ -512,6 +514,9 @@ class List { ~List(); + /// @brief Returns wheter the list contains any deleted values. + bool ContainsDeleted() const; + /// @brief Returns the size of the list. size_t Size() const; /// @brief Returns whether the list is empty. @@ -618,6 +623,9 @@ class Map { ~Map(); + /// @brief Returns wheter the map contains any deleted values. + bool ContainsDeleted() const; + /// @brief Returns the size of the map. size_t Size() const; @@ -730,6 +738,9 @@ class Node { ~Node(); + /// @brief Returns wheter the node has been deleted. + bool IsDeleted() const; + /// @brief Returns the node’s ID. mgp::Id Id() const; @@ -811,6 +822,9 @@ class Relationship { ~Relationship(); + /// @brief Returns wheter the relationship has been deleted. + bool IsDeleted() const; + /// @brief Returns the relationship’s ID. mgp::Id Id() const; @@ -876,6 +890,9 @@ class Path { ~Path(); + /// @brief Returns wheter the path contains any deleted values. + bool ContainsDeleted() const; + /// Returns the path length (number of relationships). size_t Length() const; @@ -1995,6 +2012,8 @@ inline bool Graph::ContainsRelationship(const Relationship &relationship) const inline bool Graph::IsMutable() const { return mgp::graph_is_mutable(graph_); } +inline bool Graph::IsTransactional() const { return mgp::graph_is_transactional(graph_); } + inline Node Graph::CreateNode() { auto *vertex = mgp::MemHandlerCallback(graph_create_vertex, graph_); auto node = Node(vertex); @@ -2442,6 +2461,8 @@ inline List::~List() { } } +inline bool List::ContainsDeleted() const { return mgp::list_contains_deleted(ptr_); } + inline size_t List::Size() const { return mgp::list_size(ptr_); } inline bool List::Empty() const { return Size() == 0; } @@ -2568,6 +2589,8 @@ inline Map::~Map() { } } +inline bool Map::ContainsDeleted() const { return mgp::map_contains_deleted(ptr_); } + inline size_t Map::Size() const { return mgp::map_size(ptr_); } inline bool Map::Empty() const { return Size() == 0; } @@ -2733,6 +2756,8 @@ inline Node::~Node() { } } +inline bool Node::IsDeleted() const { return mgp::vertex_is_deleted(ptr_); } + inline mgp::Id Node::Id() const { return Id::FromInt(mgp::vertex_get_id(ptr_).as_int); } inline mgp::Labels Node::Labels() const { return mgp::Labels(ptr_); } @@ -2884,6 +2909,8 @@ inline Relationship::~Relationship() { } } +inline bool Relationship::IsDeleted() const { return mgp::edge_is_deleted(ptr_); } + inline mgp::Id Relationship::Id() const { return Id::FromInt(mgp::edge_get_id(ptr_).as_int); } inline std::string_view Relationship::Type() const { return mgp::edge_get_type(ptr_).name; } @@ -2989,6 +3016,8 @@ inline Path::~Path() { } } +inline bool Path::ContainsDeleted() const { return mgp::path_contains_deleted(ptr_); } + inline size_t Path::Length() const { return mgp::path_size(ptr_); } inline Node Path::GetNodeAt(size_t index) const { diff --git a/licenses/BSL.txt b/licenses/BSL.txt index deca9a407..9cca9dccf 100644 --- a/licenses/BSL.txt +++ b/licenses/BSL.txt @@ -36,7 +36,7 @@ ADDITIONAL USE GRANT: You may use the Licensed Work in accordance with the 3. using the Licensed Work to create a work or solution which competes (or might reasonably be expected to compete) with the Licensed Work. -CHANGE DATE: 2027-30-10 +CHANGE DATE: 2027-08-12 CHANGE LICENSE: Apache License, Version 2.0 For information about alternative licensing arrangements, please visit: https://memgraph.com/legal. diff --git a/query_modules/schema.cpp b/query_modules/schema.cpp index 1b3035bab..848ccedc4 100644 --- a/query_modules/schema.cpp +++ b/query_modules/schema.cpp @@ -108,31 +108,83 @@ void Schema::ProcessPropertiesRel(mgp::Record &record, const std::string_view &t record.Insert(std::string(kReturnMandatory).c_str(), mandatory); } +struct Property { + std::string name; + mgp::Value value; + + Property(const std::string &name, mgp::Value &&value) : name(name), value(std::move(value)) {} +}; + +struct LabelsHash { + std::size_t operator()(const std::set &set) const { + std::size_t seed = set.size(); + for (const auto &i : set) { + seed ^= std::hash{}(i) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } +}; + +struct LabelsComparator { + bool operator()(const std::set &lhs, const std::set &rhs) const { return lhs == rhs; } +}; + +struct PropertyComparator { + bool operator()(const Property &lhs, const Property &rhs) const { return lhs.name < rhs.name; } +}; + +struct PropertyInfo { + std::set properties; + bool mandatory; +}; + void Schema::NodeTypeProperties(mgp_list * /*args*/, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { mgp::MemoryDispatcherGuard guard{memory}; const auto record_factory = mgp::RecordFactory(result); try { - const mgp::Graph graph = mgp::Graph(memgraph_graph); - for (auto node : graph.Nodes()) { - std::string type; - mgp::List labels = mgp::List(); + std::unordered_map, PropertyInfo, LabelsHash, LabelsComparator> node_types_properties; + + for (auto node : mgp::Graph(memgraph_graph).Nodes()) { + std::set labels_set = {}; for (auto label : node.Labels()) { - labels.AppendExtend(mgp::Value(label)); - type += ":`" + std::string(label) + "`"; + labels_set.emplace(label); + } + + if (node_types_properties.find(labels_set) == node_types_properties.end()) { + node_types_properties[labels_set] = PropertyInfo{std::set(), true}; } if (node.Properties().empty()) { - auto record = record_factory.NewRecord(); - ProcessPropertiesNode(record, type, labels, "", "", false); + node_types_properties[labels_set].mandatory = false; // if there is node with no property, it is not mandatory continue; } + auto &property_info = node_types_properties.at(labels_set); for (auto &[key, prop] : node.Properties()) { - auto property_type = mgp::List(); + property_info.properties.emplace(key, std::move(prop)); + if (property_info.mandatory) { + property_info.mandatory = + property_info.properties.size() == 1; // if there is only one property, it is mandatory + } + } + } + + for (auto &[labels, property_info] : node_types_properties) { + std::string label_type; + mgp::List labels_list = mgp::List(); + for (auto const &label : labels) { + label_type += ":`" + std::string(label) + "`"; + labels_list.AppendExtend(mgp::Value(label)); + } + for (auto const &prop : property_info.properties) { auto record = record_factory.NewRecord(); - property_type.AppendExtend(mgp::Value(TypeOf(prop.Type()))); - ProcessPropertiesNode(record, type, labels, key, property_type, true); + ProcessPropertiesNode(record, label_type, labels_list, prop.name, TypeOf(prop.value.Type()), + property_info.mandatory); + } + if (property_info.properties.empty()) { + auto record = record_factory.NewRecord(); + ProcessPropertiesNode(record, label_type, labels_list, "", "", false); } } @@ -144,23 +196,41 @@ void Schema::NodeTypeProperties(mgp_list * /*args*/, mgp_graph *memgraph_graph, void Schema::RelTypeProperties(mgp_list * /*args*/, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { mgp::MemoryDispatcherGuard guard{memory}; + + std::unordered_map rel_types_properties; const auto record_factory = mgp::RecordFactory(result); try { const mgp::Graph graph = mgp::Graph(memgraph_graph); - for (auto rel : graph.Relationships()) { - std::string type = ":`" + std::string(rel.Type()) + "`"; + std::string rel_type = std::string(rel.Type()); + if (rel_types_properties.find(rel_type) == rel_types_properties.end()) { + rel_types_properties[rel_type] = PropertyInfo{std::set(), true}; + } + if (rel.Properties().empty()) { - auto record = record_factory.NewRecord(); - ProcessPropertiesRel(record, type, "", "", false); + rel_types_properties[rel_type].mandatory = false; // if there is rel with no property, it is not mandatory continue; } + auto &property_info = rel_types_properties.at(rel_type); for (auto &[key, prop] : rel.Properties()) { - auto property_type = mgp::List(); + property_info.properties.emplace(key, std::move(prop)); + if (property_info.mandatory) { + property_info.mandatory = + property_info.properties.size() == 1; // if there is only one property, it is mandatory + } + } + } + + for (auto &[type, property_info] : rel_types_properties) { + std::string type_str = ":`" + std::string(type) + "`"; + for (auto const &prop : property_info.properties) { auto record = record_factory.NewRecord(); - property_type.AppendExtend(mgp::Value(TypeOf(prop.Type()))); - ProcessPropertiesRel(record, type, key, property_type, true); + ProcessPropertiesRel(record, type_str, prop.name, TypeOf(prop.value.Type()), property_info.mandatory); + } + if (property_info.properties.empty()) { + auto record = record_factory.NewRecord(); + ProcessPropertiesRel(record, type_str, "", "", false); } } diff --git a/release/get_version.py b/release/get_version.py index cfce88475..a8539fab4 100755 --- a/release/get_version.py +++ b/release/get_version.py @@ -104,7 +104,9 @@ def retry(retry_limit, timeout=100): except Exception: time.sleep(timeout) return func(*args, **kwargs) + return wrapper + return inner_func @@ -200,19 +202,19 @@ if args.version: try: current_branch = get_output("git", "rev-parse", "--abbrev-ref", "HEAD") if current_branch != "master": - branches = get_output("git", "branch") - if "master" in branches: + branches = get_output("git", "branch", "-r", "--list", "origin/master") + if "origin/master" in branches: # If master is present locally, the fetch is allowed to fail # because this script will still be able to compare against the # master branch. try: - get_output("git", "fetch", "origin", "master:master") + get_output("git", "fetch", "origin", "master") except Exception: pass else: # If master is not present locally, the fetch command has to # succeed because something else will fail otherwise. - get_output("git", "fetch", "origin", "master:master") + get_output("git", "fetch", "origin", "master") except Exception: print("Fatal error while ensuring local master branch.") sys.exit(1) @@ -232,7 +234,7 @@ for branch in branches: match = branch_regex.match(branch) if match is not None: version = tuple(map(int, match.group(1).split("."))) - master_branch_merge = get_output("git", "merge-base", "master", branch) + master_branch_merge = get_output("git", "merge-base", "origin/master", branch) versions.append((version, branch, master_branch_merge)) versions.sort(reverse=True) @@ -243,7 +245,7 @@ current_version = None for version in versions: version_tuple, branch, master_branch_merge = version current_branch_merge = get_output("git", "merge-base", current_hash, branch) - master_current_merge = get_output("git", "merge-base", current_hash, "master") + master_current_merge = get_output("git", "merge-base", current_hash, "origin/master") # The first check checks whether this commit is a child of `master` and # the version branch was created before us. # The second check checks whether this commit is a child of the version diff --git a/src/communication/bolt/v1/states/handlers.hpp b/src/communication/bolt/v1/states/handlers.hpp index f873f0e6e..3b5a67b17 100644 --- a/src/communication/bolt/v1/states/handlers.hpp +++ b/src/communication/bolt/v1/states/handlers.hpp @@ -367,14 +367,16 @@ State HandleReset(TSession &session, const Marker marker) { return State::Close; } - if (!session.encoder_.MessageSuccess()) { - spdlog::trace("Couldn't send success message!"); - return State::Close; + try { + session.Abort(); + if (!session.encoder_.MessageSuccess({})) { + spdlog::trace("Couldn't send success message!"); + return State::Close; + } + return State::Idle; + } catch (const std::exception &e) { + return HandleFailure(session, e); } - - session.Abort(); - - return State::Idle; } template @@ -397,19 +399,17 @@ State HandleBegin(TSession &session, const State state, const Marker marker) { DMG_ASSERT(!session.encoder_buffer_.HasData(), "There should be no data to write in this state"); - if (!session.encoder_.MessageSuccess({})) { - spdlog::trace("Couldn't send success message!"); - return State::Close; - } - try { session.Configure(extra.ValueMap()); session.BeginTransaction(extra.ValueMap()); + if (!session.encoder_.MessageSuccess({})) { + spdlog::trace("Couldn't send success message!"); + return State::Close; + } + return State::Idle; } catch (const std::exception &e) { return HandleFailure(session, e); } - - return State::Idle; } template @@ -427,11 +427,11 @@ State HandleCommit(TSession &session, const State state, const Marker marker) { DMG_ASSERT(!session.encoder_buffer_.HasData(), "There should be no data to write in this state"); try { + session.CommitTransaction(); if (!session.encoder_.MessageSuccess({})) { spdlog::trace("Couldn't send success message!"); return State::Close; } - session.CommitTransaction(); return State::Idle; } catch (const std::exception &e) { return HandleFailure(session, e); @@ -453,11 +453,11 @@ State HandleRollback(TSession &session, const State state, const Marker marker) DMG_ASSERT(!session.encoder_buffer_.HasData(), "There should be no data to write in this state"); try { + session.RollbackTransaction(); if (!session.encoder_.MessageSuccess({})) { spdlog::trace("Couldn't send success message!"); return State::Close; } - session.RollbackTransaction(); return State::Idle; } catch (const std::exception &e) { return HandleFailure(session, e); diff --git a/src/dbms/CMakeLists.txt b/src/dbms/CMakeLists.txt index 8ec1e0972..f1df4985a 100644 --- a/src/dbms/CMakeLists.txt +++ b/src/dbms/CMakeLists.txt @@ -1,3 +1,3 @@ -add_library(mg-dbms STATIC database.cpp replication_handler.cpp inmemory/replication_handlers.cpp) +add_library(mg-dbms STATIC dbms_handler.cpp database.cpp replication_handler.cpp replication_client.cpp inmemory/replication_handlers.cpp) target_link_libraries(mg-dbms mg-utils mg-storage-v2 mg-query) diff --git a/src/dbms/constants.hpp b/src/dbms/constants.hpp index 3ca61056b..e7ea9987b 100644 --- a/src/dbms/constants.hpp +++ b/src/dbms/constants.hpp @@ -15,4 +15,10 @@ namespace memgraph::dbms { constexpr static const char *kDefaultDB = "memgraph"; //!< Name of the default database +#ifdef MG_EXPERIMENTAL_REPLICATION_MULTITENANCY +constexpr bool allow_mt_repl = true; +#else +constexpr bool allow_mt_repl = false; +#endif + } // namespace memgraph::dbms diff --git a/src/dbms/database.cpp b/src/dbms/database.cpp index 411e282e8..74ee13892 100644 --- a/src/dbms/database.cpp +++ b/src/dbms/database.cpp @@ -21,7 +21,7 @@ template struct memgraph::utils::Gatekeeper; namespace memgraph::dbms { -Database::Database(storage::Config config, const replication::ReplicationState &repl_state) +Database::Database(storage::Config config, replication::ReplicationState &repl_state) : trigger_store_(config.durability.storage_directory / "triggers"), streams_{config.durability.storage_directory / "streams"}, plan_cache_{FLAGS_query_plan_cache_max_size}, diff --git a/src/dbms/database.hpp b/src/dbms/database.hpp index 457aa1c1d..878fe7672 100644 --- a/src/dbms/database.hpp +++ b/src/dbms/database.hpp @@ -48,7 +48,7 @@ class Database { * * @param config storage configuration */ - explicit Database(storage::Config config, const replication::ReplicationState &repl_state); + explicit Database(storage::Config config, replication::ReplicationState &repl_state); /** * @brief Returns the raw storage pointer. @@ -95,7 +95,7 @@ class Database { * * @return storage::StorageMode */ - storage::StorageMode GetStorageMode() const { return storage_->GetStorageMode(); } + storage::StorageMode GetStorageMode() const noexcept { return storage_->GetStorageMode(); } /** * @brief Get the storage info diff --git a/src/dbms/database_handler.hpp b/src/dbms/database_handler.hpp index a6b3b563b..617e614c3 100644 --- a/src/dbms/database_handler.hpp +++ b/src/dbms/database_handler.hpp @@ -51,8 +51,7 @@ class DatabaseHandler : public Handler { * @param config Storage configuration * @return HandlerT::NewResult */ - HandlerT::NewResult New(std::string_view name, storage::Config config, - const replication::ReplicationState &repl_state) { + HandlerT::NewResult New(std::string_view name, storage::Config config, replication::ReplicationState &repl_state) { // Control that no one is using the same data directory if (std::any_of(begin(), end(), [&](auto &elem) { auto db_acc = elem.second.access(); diff --git a/src/dbms/dbms_handler.cpp b/src/dbms/dbms_handler.cpp new file mode 100644 index 000000000..0af9364bf --- /dev/null +++ b/src/dbms/dbms_handler.cpp @@ -0,0 +1,75 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "dbms/dbms_handler.hpp" + +namespace memgraph::dbms { +#ifdef MG_ENTERPRISE +DbmsHandler::DbmsHandler( + storage::Config config, + memgraph::utils::Synchronized *auth, + bool recovery_on_startup, bool delete_on_drop) + : default_config_{std::move(config)}, + delete_on_drop_(delete_on_drop), + repl_state_{ReplicationStateRootPath(default_config_)} { + // TODO: Decouple storage config from dbms config + // TODO: Save individual db configs inside the kvstore and restore from there + storage::UpdatePaths(default_config_, default_config_.durability.storage_directory / "databases"); + const auto &db_dir = default_config_.durability.storage_directory; + const auto durability_dir = db_dir / ".durability"; + utils::EnsureDirOrDie(db_dir); + utils::EnsureDirOrDie(durability_dir); + durability_ = std::make_unique(durability_dir); + + // Generate the default database + MG_ASSERT(!NewDefault_().HasError(), "Failed while creating the default DB."); + + // Recover previous databases + if (recovery_on_startup) { + for (const auto &[name, _] : *durability_) { + if (name == kDefaultDB) continue; // Already set + spdlog::info("Restoring database {}.", name); + MG_ASSERT(!New_(name).HasError(), "Failed while creating database {}.", name); + spdlog::info("Database {} restored.", name); + } + } else { // Clear databases from the durability list and auth + auto locked_auth = auth->Lock(); + for (const auto &[name, _] : *durability_) { + if (name == kDefaultDB) continue; + locked_auth->DeleteDatabase(name); + durability_->Delete(name); + } + } + + // Startup replication state (if recovered at startup) + auto replica = [this](replication::RoleReplicaData const &data) { + // Register handlers + InMemoryReplicationHandlers::Register(this, *data.server); + if (!data.server->Start()) { + spdlog::error("Unable to start the replication server."); + return false; + } + return true; + }; + // Replication frequent check start + auto main = [this](replication::RoleMainData &data) { + for (auto &client : data.registered_replicas_) { + StartReplicaClient(*this, client); + } + return true; + }; + // Startup proccess for main/replica + MG_ASSERT(std::visit(memgraph::utils::Overloaded{replica, main}, repl_state_.ReplicationData()), + "Replica recovery failure!"); +} +#endif + +} // namespace memgraph::dbms diff --git a/src/dbms/dbms_handler.hpp b/src/dbms/dbms_handler.hpp index 27ab963d4..3151398ab 100644 --- a/src/dbms/dbms_handler.hpp +++ b/src/dbms/dbms_handler.hpp @@ -26,9 +26,11 @@ #include "auth/auth.hpp" #include "constants.hpp" #include "dbms/database.hpp" +#include "dbms/inmemory/replication_handlers.hpp" #ifdef MG_ENTERPRISE #include "dbms/database_handler.hpp" #endif +#include "dbms/replication_client.hpp" #include "global.hpp" #include "query/config.hpp" #include "query/interpreter_context.hpp" @@ -102,52 +104,22 @@ class DbmsHandler { * @param recovery_on_startup restore databases (and its content) and authentication data * @param delete_on_drop when dropping delete any associated directories on disk */ - DbmsHandler(storage::Config config, const replication::ReplicationState &repl_state, auto *auth, - bool recovery_on_startup, bool delete_on_drop) - : lock_{utils::RWLock::Priority::READ}, - default_config_{std::move(config)}, - repl_state_(repl_state), - delete_on_drop_(delete_on_drop) { - // TODO: Decouple storage config from dbms config - // TODO: Save individual db configs inside the kvstore and restore from there - storage::UpdatePaths(default_config_, default_config_.durability.storage_directory / "databases"); - const auto &db_dir = default_config_.durability.storage_directory; - const auto durability_dir = db_dir / ".durability"; - utils::EnsureDirOrDie(db_dir); - utils::EnsureDirOrDie(durability_dir); - durability_ = std::make_unique(durability_dir); - - // Generate the default database - MG_ASSERT(!NewDefault_().HasError(), "Failed while creating the default DB."); - // Recover previous databases - if (recovery_on_startup) { - for (const auto &[name, _] : *durability_) { - if (name == kDefaultDB) continue; // Already set - spdlog::info("Restoring database {}.", name); - MG_ASSERT(!New_(name).HasError(), "Failed while creating database {}.", name); - spdlog::info("Database {} restored.", name); - } - } else { // Clear databases from the durability list and auth - auto locked_auth = auth->Lock(); - for (const auto &[name, _] : *durability_) { - if (name == kDefaultDB) continue; - locked_auth->DeleteDatabase(name); - durability_->Delete(name); - } - } - } + DbmsHandler(storage::Config config, + memgraph::utils::Synchronized *auth, + bool recovery_on_startup, bool delete_on_drop); // TODO If more arguments are added use a config strut #else /** * @brief Initialize the handler. A single database is supported in community edition. * * @param configs storage configuration */ - DbmsHandler(storage::Config config, const replication::ReplicationState &repl_state) - : db_gatekeeper_{[&] { + DbmsHandler(storage::Config config) + : repl_state_{ReplicationStateRootPath(config)}, + db_gatekeeper_{[&] { config.name = kDefaultDB; return std::move(config); }(), - repl_state} {} + repl_state_} {} #endif #ifdef MG_ENTERPRISE @@ -248,6 +220,12 @@ class DbmsHandler { #endif } + replication::ReplicationState &ReplicationState() { return repl_state_; } + replication::ReplicationState const &ReplicationState() const { return repl_state_; } + + bool IsMain() const { return repl_state_.IsMain(); } + bool IsReplica() const { return repl_state_.IsReplica(); } + /** * @brief Return the statistics all databases. * @@ -536,14 +514,15 @@ class DbmsHandler { throw UnknownDatabaseException("Tried to retrieve an unknown database \"{}\".", name); } - mutable LockT lock_; //!< protective lock - storage::Config default_config_; //!< Storage configuration used when creating new databases - const replication::ReplicationState &repl_state_; //!< Global replication state - DatabaseHandler db_handler_; //!< multi-tenancy storage handler - std::unique_ptr durability_; //!< list of active dbs (pointer so we can postpone its creation) - bool delete_on_drop_; //!< Flag defining if dropping storage also deletes its directory - std::set> defunct_dbs_; //!< Databases that are in an unknown state due to various failures -#else + mutable LockT lock_{utils::RWLock::Priority::READ}; //!< protective lock + storage::Config default_config_; //!< Storage configuration used when creating new databases + DatabaseHandler db_handler_; //!< multi-tenancy storage handler + std::unique_ptr durability_; //!< list of active dbs (pointer so we can postpone its creation) + bool delete_on_drop_; //!< Flag defining if dropping storage also deletes its directory + std::set defunct_dbs_; //!< Databases that are in an unknown state due to various failures +#endif + replication::ReplicationState repl_state_; //!< Global replication state +#ifndef MG_ENTERPRISE mutable utils::Gatekeeper db_gatekeeper_; //!< Single databases gatekeeper #endif }; diff --git a/src/dbms/inmemory/replication_handlers.cpp b/src/dbms/inmemory/replication_handlers.cpp index ce1f6da20..5eba61878 100644 --- a/src/dbms/inmemory/replication_handlers.cpp +++ b/src/dbms/inmemory/replication_handlers.cpp @@ -10,6 +10,7 @@ // licenses/APL.txt. #include "dbms/inmemory/replication_handlers.hpp" +#include #include "dbms/constants.hpp" #include "dbms/dbms_handler.hpp" #include "replication/replication_server.hpp" @@ -187,9 +188,9 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle storage::replication::Decoder decoder(req_reader); auto *storage = static_cast(db_acc->get()->storage()); - utils::EnsureDirOrDie(storage->snapshot_directory_); + utils::EnsureDirOrDie(storage->recovery_.snapshot_directory_); - const auto maybe_snapshot_path = decoder.ReadFile(storage->snapshot_directory_); + const auto maybe_snapshot_path = decoder.ReadFile(storage->recovery_.snapshot_directory_); MG_ASSERT(maybe_snapshot_path, "Failed to load snapshot!"); spdlog::info("Received snapshot saved to {}", *maybe_snapshot_path); @@ -219,7 +220,10 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle storage->timestamp_ = std::max(storage->timestamp_, recovery_info.next_timestamp); spdlog::trace("Recovering indices and constraints from snapshot."); - storage::durability::RecoverIndicesAndConstraints(recovered_snapshot.indices_constraints, &storage->indices_, + memgraph::storage::durability::RecoverIndicesAndStats(recovered_snapshot.indices_constraints.indices, + &storage->indices_, &storage->vertices_, + storage->name_id_mapper_.get()); + memgraph::storage::durability::RecoverConstraints(recovered_snapshot.indices_constraints.constraints, &storage->constraints_, &storage->vertices_, storage->name_id_mapper_.get()); } catch (const storage::durability::RecoveryFailure &e) { @@ -233,7 +237,7 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle spdlog::trace("Deleting old snapshot files due to snapshot recovery."); // Delete other durability files - auto snapshot_files = storage::durability::GetSnapshotFiles(storage->snapshot_directory_, storage->uuid_); + auto snapshot_files = storage::durability::GetSnapshotFiles(storage->recovery_.snapshot_directory_, storage->uuid_); for (const auto &[path, uuid, _] : snapshot_files) { if (path != *maybe_snapshot_path) { spdlog::trace("Deleting snapshot file {}", path); @@ -242,7 +246,7 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle } spdlog::trace("Deleting old WAL files due to snapshot recovery."); - auto wal_files = storage::durability::GetWalFiles(storage->wal_directory_, storage->uuid_); + auto wal_files = storage::durability::GetWalFiles(storage->recovery_.wal_directory_, storage->uuid_); if (wal_files) { for (const auto &wal_file : *wal_files) { spdlog::trace("Deleting WAL file {}", wal_file.path); @@ -267,7 +271,7 @@ void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handle storage::replication::Decoder decoder(req_reader); auto *storage = static_cast(db_acc->get()->storage()); - utils::EnsureDirOrDie(storage->wal_directory_); + utils::EnsureDirOrDie(storage->recovery_.wal_directory_); for (auto i = 0; i < wal_file_number; ++i) { LoadWal(storage, &decoder); @@ -289,7 +293,7 @@ void InMemoryReplicationHandlers::CurrentWalHandler(dbms::DbmsHandler *dbms_hand storage::replication::Decoder decoder(req_reader); auto *storage = static_cast(db_acc->get()->storage()); - utils::EnsureDirOrDie(storage->wal_directory_); + utils::EnsureDirOrDie(storage->recovery_.wal_directory_); LoadWal(storage, &decoder); @@ -370,8 +374,9 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage constexpr bool kSharedAccess = false; std::optional> commit_timestamp_and_accessor; - auto get_transaction = [storage, &commit_timestamp_and_accessor](uint64_t commit_timestamp, - bool unique = kSharedAccess) { + auto const get_transaction = [storage, &commit_timestamp_and_accessor]( + uint64_t commit_timestamp, + bool unique = kSharedAccess) -> storage::InMemoryStorage::ReplicationAccessor * { if (!commit_timestamp_and_accessor) { std::unique_ptr acc = nullptr; if (unique) { @@ -415,9 +420,11 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage spdlog::trace(" Delete vertex {}", delta.vertex_create_delete.gid.AsUint()); auto *transaction = get_transaction(timestamp); auto vertex = transaction->FindVertex(delta.vertex_create_delete.gid, View::NEW); - if (!vertex) throw utils::BasicException("Invalid transaction!"); + if (!vertex) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); auto ret = transaction->DeleteVertex(&*vertex); - if (ret.HasError() || !ret.GetValue()) throw utils::BasicException("Invalid transaction!"); + if (ret.HasError() || !ret.GetValue()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::VERTEX_ADD_LABEL: { @@ -425,9 +432,11 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage delta.vertex_add_remove_label.label); auto *transaction = get_transaction(timestamp); auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW); - if (!vertex) throw utils::BasicException("Invalid transaction!"); + if (!vertex) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); auto ret = vertex->AddLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label)); - if (ret.HasError() || !ret.GetValue()) throw utils::BasicException("Invalid transaction!"); + if (ret.HasError() || !ret.GetValue()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::VERTEX_REMOVE_LABEL: { @@ -435,9 +444,11 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage delta.vertex_add_remove_label.label); auto *transaction = get_transaction(timestamp); auto vertex = transaction->FindVertex(delta.vertex_add_remove_label.gid, View::NEW); - if (!vertex) throw utils::BasicException("Invalid transaction!"); + if (!vertex) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); auto ret = vertex->RemoveLabel(transaction->NameToLabel(delta.vertex_add_remove_label.label)); - if (ret.HasError() || !ret.GetValue()) throw utils::BasicException("Invalid transaction!"); + if (ret.HasError() || !ret.GetValue()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::VERTEX_SET_PROPERTY: { @@ -445,10 +456,12 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage delta.vertex_edge_set_property.property, delta.vertex_edge_set_property.value); auto *transaction = get_transaction(timestamp); auto vertex = transaction->FindVertex(delta.vertex_edge_set_property.gid, View::NEW); - if (!vertex) throw utils::BasicException("Invalid transaction!"); + if (!vertex) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); auto ret = vertex->SetProperty(transaction->NameToProperty(delta.vertex_edge_set_property.property), delta.vertex_edge_set_property.value); - if (ret.HasError()) throw utils::BasicException("Invalid transaction!"); + if (ret.HasError()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::EDGE_CREATE: { @@ -457,13 +470,16 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage delta.edge_create_delete.from_vertex.AsUint(), delta.edge_create_delete.to_vertex.AsUint()); auto *transaction = get_transaction(timestamp); auto from_vertex = transaction->FindVertex(delta.edge_create_delete.from_vertex, View::NEW); - if (!from_vertex) throw utils::BasicException("Invalid transaction!"); + if (!from_vertex) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); auto to_vertex = transaction->FindVertex(delta.edge_create_delete.to_vertex, View::NEW); - if (!to_vertex) throw utils::BasicException("Invalid transaction!"); + if (!to_vertex) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); auto edge = transaction->CreateEdgeEx(&*from_vertex, &*to_vertex, transaction->NameToEdgeType(delta.edge_create_delete.edge_type), delta.edge_create_delete.gid); - if (edge.HasError()) throw utils::BasicException("Invalid transaction!"); + if (edge.HasError()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::EDGE_DELETE: { @@ -472,16 +488,17 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage delta.edge_create_delete.from_vertex.AsUint(), delta.edge_create_delete.to_vertex.AsUint()); auto *transaction = get_transaction(timestamp); auto from_vertex = transaction->FindVertex(delta.edge_create_delete.from_vertex, View::NEW); - if (!from_vertex) throw utils::BasicException("Invalid transaction!"); + if (!from_vertex) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); auto to_vertex = transaction->FindVertex(delta.edge_create_delete.to_vertex, View::NEW); - if (!to_vertex) throw utils::BasicException("Invalid transaction!"); - auto edges = from_vertex->OutEdges(View::NEW, {transaction->NameToEdgeType(delta.edge_create_delete.edge_type)}, - &*to_vertex); - if (edges.HasError()) throw utils::BasicException("Invalid transaction!"); - if (edges->edges.size() != 1) throw utils::BasicException("Invalid transaction!"); - auto &edge = (*edges).edges[0]; - auto ret = transaction->DeleteEdge(&edge); - if (ret.HasError()) throw utils::BasicException("Invalid transaction!"); + if (!to_vertex) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + auto edgeType = transaction->NameToEdgeType(delta.edge_create_delete.edge_type); + auto edge = + transaction->FindEdge(delta.edge_create_delete.gid, View::NEW, edgeType, &*from_vertex, &*to_vertex); + if (!edge) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + if (auto ret = transaction->DeleteEdge(&*edge); ret.HasError()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::EDGE_SET_PROPERTY: { @@ -498,7 +515,8 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage // yields an accessor that is only valid for managing the edge's // properties. auto edge = edge_acc.find(delta.vertex_edge_set_property.gid); - if (edge == edge_acc.end()) throw utils::BasicException("Invalid transaction!"); + if (edge == edge_acc.end()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); // The edge visibility check must be done here manually because we // don't allow direct access to the edges through the public API. { @@ -530,7 +548,8 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage } } }); - if (!is_visible) throw utils::BasicException("Invalid transaction!"); + if (!is_visible) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); } EdgeRef edge_ref(&*edge); // Here we create an edge accessor that we will use to get the @@ -543,7 +562,8 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto ret = ea.SetProperty(transaction->NameToProperty(delta.vertex_edge_set_property.property), delta.vertex_edge_set_property.value); - if (ret.HasError()) throw utils::BasicException("Invalid transaction!"); + if (ret.HasError()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } @@ -553,7 +573,8 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage throw utils::BasicException("Invalid commit data!"); auto ret = commit_timestamp_and_accessor->second.Commit(commit_timestamp_and_accessor->first, false /* not main */); - if (ret.HasError()) throw utils::BasicException("Invalid transaction!"); + if (ret.HasError()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); commit_timestamp_and_accessor = std::nullopt; break; } @@ -563,14 +584,14 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage // Need to send the timestamp auto *transaction = get_transaction(timestamp, kUniqueAccess); if (transaction->CreateIndex(storage->NameToLabel(delta.operation_label.label)).HasError()) - throw utils::BasicException("Invalid transaction!"); + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::LABEL_INDEX_DROP: { spdlog::trace(" Drop label index on :{}", delta.operation_label.label); auto *transaction = get_transaction(timestamp, kUniqueAccess); if (transaction->DropIndex(storage->NameToLabel(delta.operation_label.label)).HasError()) - throw utils::BasicException("Invalid transaction!"); + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::LABEL_INDEX_STATS_SET: { @@ -601,7 +622,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage ->CreateIndex(storage->NameToLabel(delta.operation_label_property.label), storage->NameToProperty(delta.operation_label_property.property)) .HasError()) - throw utils::BasicException("Invalid transaction!"); + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP: { @@ -612,7 +633,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage ->DropIndex(storage->NameToLabel(delta.operation_label_property.label), storage->NameToProperty(delta.operation_label_property.property)) .HasError()) - throw utils::BasicException("Invalid transaction!"); + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET: { @@ -644,7 +665,8 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto ret = transaction->CreateExistenceConstraint(storage->NameToLabel(delta.operation_label_property.label), storage->NameToProperty(delta.operation_label_property.property)); - if (ret.HasError()) throw utils::BasicException("Invalid transaction!"); + if (ret.HasError()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP: { @@ -655,7 +677,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage ->DropExistenceConstraint(storage->NameToLabel(delta.operation_label_property.label), storage->NameToProperty(delta.operation_label_property.property)) .HasError()) - throw utils::BasicException("Invalid transaction!"); + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::UNIQUE_CONSTRAINT_CREATE: { @@ -670,7 +692,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto ret = transaction->CreateUniqueConstraint(storage->NameToLabel(delta.operation_label_properties.label), properties); if (!ret.HasValue() || ret.GetValue() != UniqueConstraints::CreationStatus::SUCCESS) - throw utils::BasicException("Invalid transaction!"); + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); break; } case WalDeltaData::Type::UNIQUE_CONSTRAINT_DROP: { @@ -685,7 +707,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage auto ret = transaction->DropUniqueConstraint(storage->NameToLabel(delta.operation_label_properties.label), properties); if (ret != UniqueConstraints::DeletionStatus::SUCCESS) { - throw utils::BasicException("Invalid transaction!"); + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); } break; } diff --git a/src/dbms/inmemory/storage_helper.hpp b/src/dbms/inmemory/storage_helper.hpp index 347c16928..1cd9f9f4e 100644 --- a/src/dbms/inmemory/storage_helper.hpp +++ b/src/dbms/inmemory/storage_helper.hpp @@ -22,14 +22,8 @@ namespace memgraph::dbms { -#ifdef MG_EXPERIMENTAL_REPLICATION_MULTITENANCY -constexpr bool allow_mt_repl = true; -#else -constexpr bool allow_mt_repl = false; -#endif - -inline std::unique_ptr CreateInMemoryStorage( - storage::Config config, const ::memgraph::replication::ReplicationState &repl_state) { +inline std::unique_ptr CreateInMemoryStorage(storage::Config config, + ::memgraph::replication::ReplicationState &repl_state) { const auto wal_mode = config.durability.snapshot_wal_mode; const auto name = config.name; auto storage = std::make_unique(std::move(config)); diff --git a/src/dbms/replication_client.cpp b/src/dbms/replication_client.cpp new file mode 100644 index 000000000..bfa4c622f --- /dev/null +++ b/src/dbms/replication_client.cpp @@ -0,0 +1,34 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "dbms/replication_client.hpp" + +namespace memgraph::dbms { + +void StartReplicaClient(DbmsHandler &dbms_handler, replication::ReplicationClient &client) { + // No client error, start instance level client + auto const &endpoint = client.rpc_client_.Endpoint(); + spdlog::trace("Replication client started at: {}:{}", endpoint.address, endpoint.port); + client.StartFrequentCheck([&dbms_handler](std::string_view name) { + // Working connection, check if any database has been left behind + dbms_handler.ForEach([name](dbms::Database *db) { + // Specific database <-> replica client + db->storage()->repl_storage_state_.WithClient(name, [&](storage::ReplicationStorageClient *client) { + if (client->State() == storage::replication::ReplicaState::MAYBE_BEHIND) { + // Database <-> replica might be behind, check and recover + client->TryCheckReplicaStateAsync(db->storage()); + } + }); + }); + }); +} + +} // namespace memgraph::dbms diff --git a/src/dbms/replication_client.hpp b/src/dbms/replication_client.hpp new file mode 100644 index 000000000..c1bac91a2 --- /dev/null +++ b/src/dbms/replication_client.hpp @@ -0,0 +1,21 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "dbms/dbms_handler.hpp" +#include "replication/replication_client.hpp" + +namespace memgraph::dbms { + +void StartReplicaClient(DbmsHandler &dbms_handler, replication::ReplicationClient &client); + +} // namespace memgraph::dbms diff --git a/src/dbms/replication_handler.cpp b/src/dbms/replication_handler.cpp index cff93fd6b..2cbe2c432 100644 --- a/src/dbms/replication_handler.cpp +++ b/src/dbms/replication_handler.cpp @@ -15,6 +15,7 @@ #include "dbms/dbms_handler.hpp" #include "dbms/inmemory/replication_handlers.hpp" #include "dbms/inmemory/storage_helper.hpp" +#include "dbms/replication_client.hpp" #include "replication/state.hpp" using memgraph::replication::ReplicationClientConfig; @@ -41,6 +42,8 @@ std::string RegisterReplicaErrorToString(RegisterReplicaError error) { } } // namespace +ReplicationHandler::ReplicationHandler(DbmsHandler &dbms_handler) : dbms_handler_(dbms_handler) {} + bool ReplicationHandler::SetReplicationRoleMain() { auto const main_handler = [](RoleMainData const &) { // If we are already MAIN, we don't want to change anything @@ -56,42 +59,49 @@ bool ReplicationHandler::SetReplicationRoleMain() { // STEP 2) Change to MAIN // TODO: restore replication servers if false? - if (!repl_state_.SetReplicationRoleMain()) { + if (!dbms_handler_.ReplicationState().SetReplicationRoleMain()) { // TODO: Handle recovery on failure??? return false; } // STEP 3) We are now MAIN, update storage local epoch + const auto &epoch = + std::get(std::as_const(dbms_handler_.ReplicationState()).ReplicationData()).epoch_; dbms_handler_.ForEach([&](Database *db) { auto *storage = db->storage(); - storage->repl_storage_state_.epoch_ = std::get(std::as_const(repl_state_).ReplicationData()).epoch_; + storage->repl_storage_state_.epoch_ = epoch; }); return true; }; // TODO: under lock - return std::visit(utils::Overloaded{main_handler, replica_handler}, repl_state_.ReplicationData()); + return std::visit(utils::Overloaded{main_handler, replica_handler}, + dbms_handler_.ReplicationState().ReplicationData()); } bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication::ReplicationServerConfig &config) { // We don't want to restart the server if we're already a REPLICA - if (repl_state_.IsReplica()) { + if (dbms_handler_.ReplicationState().IsReplica()) { return false; } - // Remove registered replicas + // TODO StorageState needs to be synched. Could have a dangling reference if someone adds a database as we are + // deleting the replica. + // Remove database specific clients dbms_handler_.ForEach([&](Database *db) { auto *storage = db->storage(); storage->repl_storage_state_.replication_clients_.WithLock([](auto &clients) { clients.clear(); }); }); + // Remove instance level clients + std::get(dbms_handler_.ReplicationState().ReplicationData()).registered_replicas_.clear(); // Creates the server - repl_state_.SetReplicationRoleReplica(config); + dbms_handler_.ReplicationState().SetReplicationRoleReplica(config); // Start const auto success = - std::visit(utils::Overloaded{[](auto) { + std::visit(utils::Overloaded{[](RoleMainData const &) { // ASSERT return false; }, @@ -104,36 +114,37 @@ bool ReplicationHandler::SetReplicationRoleReplica(const memgraph::replication:: } return true; }}, - repl_state_.ReplicationData()); + dbms_handler_.ReplicationState().ReplicationData()); // TODO Handle error (restore to main?) return success; } auto ReplicationHandler::RegisterReplica(const memgraph::replication::ReplicationClientConfig &config) -> memgraph::utils::BasicResult { - MG_ASSERT(repl_state_.IsMain(), "Only main instance can register a replica!"); + MG_ASSERT(dbms_handler_.ReplicationState().IsMain(), "Only main instance can register a replica!"); - auto res = repl_state_.RegisterReplica(config); - switch (res) { - case memgraph::replication::RegisterReplicaError::NOT_MAIN: - MG_ASSERT(false, "Only main instance can register a replica!"); - return {}; - case memgraph::replication::RegisterReplicaError::NAME_EXISTS: - return memgraph::dbms::RegisterReplicaError::NAME_EXISTS; - case memgraph::replication::RegisterReplicaError::END_POINT_EXISTS: - return memgraph::dbms::RegisterReplicaError::END_POINT_EXISTS; - case memgraph::replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED: - return memgraph::dbms::RegisterReplicaError::COULD_NOT_BE_PERSISTED; - case memgraph::replication::RegisterReplicaError::SUCCESS: - break; - } - - bool all_clients_good = true; + auto instance_client = dbms_handler_.ReplicationState().RegisterReplica(config); + if (instance_client.HasError()) switch (instance_client.GetError()) { + case memgraph::replication::RegisterReplicaError::NOT_MAIN: + MG_ASSERT(false, "Only main instance can register a replica!"); + return {}; + case memgraph::replication::RegisterReplicaError::NAME_EXISTS: + return memgraph::dbms::RegisterReplicaError::NAME_EXISTS; + case memgraph::replication::RegisterReplicaError::END_POINT_EXISTS: + return memgraph::dbms::RegisterReplicaError::END_POINT_EXISTS; + case memgraph::replication::RegisterReplicaError::COULD_NOT_BE_PERSISTED: + return memgraph::dbms::RegisterReplicaError::COULD_NOT_BE_PERSISTED; + case memgraph::replication::RegisterReplicaError::SUCCESS: + break; + } if (!allow_mt_repl && dbms_handler_.All().size() > 1) { spdlog::warn("Multi-tenant replication is currently not supported!"); } + bool all_clients_good = true; + + // Add database specific clients (NOTE Currently all databases are connected to each replica) dbms_handler_.ForEach([&](Database *db) { auto *storage = db->storage(); if (!allow_mt_repl && storage->id() != kDefaultDB) { @@ -143,18 +154,29 @@ auto ReplicationHandler::RegisterReplica(const memgraph::replication::Replicatio if (storage->storage_mode_ != storage::StorageMode::IN_MEMORY_TRANSACTIONAL) return; all_clients_good &= - storage->repl_storage_state_.replication_clients_.WithLock([storage, &config](auto &clients) -> bool { - auto client = storage->CreateReplicationClient(config, &storage->repl_storage_state_.epoch_); - client->Start(); - - if (client->State() == storage::replication::ReplicaState::INVALID) { + storage->repl_storage_state_.replication_clients_.WithLock([storage, &instance_client](auto &storage_clients) { + auto client = std::make_unique(*instance_client.GetValue()); + client->Start(storage); + // After start the storage <-> replica state should be READY or RECOVERING (if correctly started) + // MAYBE_BEHIND isn't a statement of the current state, this is the default value + // Failed to start due to branching of MAIN and REPLICA + if (client->State() == storage::replication::ReplicaState::MAYBE_BEHIND) { return false; } - clients.push_back(std::move(client)); + storage_clients.push_back(std::move(client)); return true; }); }); - if (!all_clients_good) return RegisterReplicaError::CONNECTION_FAILED; // TODO: this happen to 1 or many...what to do + + // NOTE Currently if any databases fails, we revert back + if (!all_clients_good) { + spdlog::error("Failed to register all databases to the REPLICA \"{}\"", config.name); + UnregisterReplica(config.name); + return RegisterReplicaError::CONNECTION_FAILED; + } + + // No client error, start instance level client + StartReplicaClient(dbms_handler_, *instance_client.GetValue()); return {}; } @@ -163,60 +185,66 @@ auto ReplicationHandler::UnregisterReplica(std::string_view name) -> UnregisterR return UnregisterReplicaResult::NOT_MAIN; }; auto const main_handler = [this, name](RoleMainData &mainData) -> UnregisterReplicaResult { - if (!repl_state_.TryPersistUnregisterReplica(name)) { + if (!dbms_handler_.ReplicationState().TryPersistUnregisterReplica(name)) { return UnregisterReplicaResult::COULD_NOT_BE_PERSISTED; } - auto const n_unregistered = - std::erase_if(mainData.registered_replicas_, - [&](ReplicationClientConfig const ®istered_config) { return registered_config.name == name; }); - - dbms_handler_.ForEach([&](Database *db) { - db->storage()->repl_storage_state_.replication_clients_.WithLock( - [&](auto &clients) { std::erase_if(clients, [&](const auto &client) { return client->Name() == name; }); }); + // Remove database specific clients + dbms_handler_.ForEach([name](Database *db) { + db->storage()->repl_storage_state_.replication_clients_.WithLock([&name](auto &clients) { + std::erase_if(clients, [name](const auto &client) { return client->Name() == name; }); + }); }); - + // Remove instance level clients + auto const n_unregistered = + std::erase_if(mainData.registered_replicas_, [name](auto const &client) { return client.name_ == name; }); return n_unregistered != 0 ? UnregisterReplicaResult::SUCCESS : UnregisterReplicaResult::CAN_NOT_UNREGISTER; }; - return std::visit(utils::Overloaded{main_handler, replica_handler}, repl_state_.ReplicationData()); + return std::visit(utils::Overloaded{main_handler, replica_handler}, + dbms_handler_.ReplicationState().ReplicationData()); } -auto ReplicationHandler::GetRole() const -> memgraph::replication::ReplicationRole { return repl_state_.GetRole(); } +auto ReplicationHandler::GetRole() const -> memgraph::replication::ReplicationRole { + return dbms_handler_.ReplicationState().GetRole(); +} -bool ReplicationHandler::IsMain() const { return repl_state_.IsMain(); } +bool ReplicationHandler::IsMain() const { return dbms_handler_.ReplicationState().IsMain(); } -bool ReplicationHandler::IsReplica() const { return repl_state_.IsReplica(); } +bool ReplicationHandler::IsReplica() const { return dbms_handler_.ReplicationState().IsReplica(); } -void RestoreReplication(const replication::ReplicationState &repl_state, storage::Storage &storage) { +// Per storage +// NOTE Storage will connect to all replicas. Future work might change this +void RestoreReplication(replication::ReplicationState &repl_state, storage::Storage &storage) { spdlog::info("Restoring replication role."); /// MAIN - auto const recover_main = [&storage](RoleMainData const &mainData) { - for (const auto &config : mainData.registered_replicas_) { - spdlog::info("Replica {} restoration started for {}.", config.name, storage.id()); + auto const recover_main = [&storage](RoleMainData &mainData) { + // Each individual client has already been restored and started. Here we just go through each database and start its + // client + for (auto &instance_client : mainData.registered_replicas_) { + spdlog::info("Replica {} restoration started for {}.", instance_client.name_, storage.id()); - auto register_replica = [&storage](const memgraph::replication::ReplicationClientConfig &config) - -> memgraph::utils::BasicResult { - return storage.repl_storage_state_.replication_clients_.WithLock( - [&storage, &config](auto &clients) -> utils::BasicResult { - auto client = storage.CreateReplicationClient(config, &storage.repl_storage_state_.epoch_); - client->Start(); + const auto &ret = storage.repl_storage_state_.replication_clients_.WithLock( + [&](auto &storage_clients) -> utils::BasicResult { + auto client = std::make_unique(instance_client); + client->Start(&storage); + // After start the storage <-> replica state should be READY or RECOVERING (if correctly started) + // MAYBE_BEHIND isn't a statement of the current state, this is the default value + // Failed to start due to branching of MAIN and REPLICA + if (client->State() == storage::replication::ReplicaState::MAYBE_BEHIND) { + spdlog::warn("Connection failed when registering replica {}. Replica will still be registered.", + instance_client.name_); + } + storage_clients.push_back(std::move(client)); + return {}; + }); - if (client->State() == storage::replication::ReplicaState::INVALID) { - spdlog::warn("Connection failed when registering replica {}. Replica will still be registered.", - client->Name()); - } - clients.push_back(std::move(client)); - return {}; - }); - }; - - auto ret = register_replica(config); if (ret.HasError()) { MG_ASSERT(RegisterReplicaError::CONNECTION_FAILED != ret.GetError()); - LOG_FATAL("Failure when restoring replica {}: {}.", config.name, RegisterReplicaErrorToString(ret.GetError())); + LOG_FATAL("Failure when restoring replica {}: {}.", instance_client.name_, + RegisterReplicaErrorToString(ret.GetError())); } - spdlog::info("Replica {} restored for {}.", config.name, storage.id()); + spdlog::info("Replica {} restored for {}.", instance_client.name_, storage.id()); } spdlog::info("Replication role restored to MAIN."); }; @@ -229,6 +257,6 @@ void RestoreReplication(const replication::ReplicationState &repl_state, storage recover_main, recover_replica, }, - std::as_const(repl_state).ReplicationData()); + repl_state.ReplicationData()); } } // namespace memgraph::dbms diff --git a/src/dbms/replication_handler.hpp b/src/dbms/replication_handler.hpp index e50c47969..dc95407b1 100644 --- a/src/dbms/replication_handler.hpp +++ b/src/dbms/replication_handler.hpp @@ -36,8 +36,7 @@ enum class UnregisterReplicaResult : uint8_t { /// A handler type that keep in sync current ReplicationState and the MAIN/REPLICA-ness of Storage /// TODO: extend to do multiple storages struct ReplicationHandler { - ReplicationHandler(memgraph::replication::ReplicationState &replState, DbmsHandler &dbms_handler) - : repl_state_(replState), dbms_handler_(dbms_handler) {} + explicit ReplicationHandler(DbmsHandler &dbms_handler); // as REPLICA, become MAIN bool SetReplicationRoleMain(); @@ -58,12 +57,11 @@ struct ReplicationHandler { bool IsReplica() const; private: - memgraph::replication::ReplicationState &repl_state_; DbmsHandler &dbms_handler_; }; /// A handler type that keep in sync current ReplicationState and the MAIN/REPLICA-ness of Storage /// TODO: extend to do multiple storages -void RestoreReplication(const replication::ReplicationState &repl_state, storage::Storage &storage); +void RestoreReplication(replication::ReplicationState &repl_state, storage::Storage &storage); } // namespace memgraph::dbms diff --git a/src/flags/general.cpp b/src/flags/general.cpp index be060c52d..6bee2e5b3 100644 --- a/src/flags/general.cpp +++ b/src/flags/general.cpp @@ -104,9 +104,19 @@ DEFINE_bool(storage_snapshot_on_exit, false, "Controls whether the storage creat DEFINE_uint64(storage_items_per_batch, memgraph::storage::Config::Durability().items_per_batch, "The number of edges and vertices stored in a batch in a snapshot file."); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,misc-unused-parameters) +DEFINE_VALIDATED_bool( + storage_parallel_index_recovery, false, + "Controls whether the index creation can be done in a multithreaded fashion.", { + spdlog::warn( + "storage_parallel_index_recovery flag is deprecated. Check storage_mode_parallel_schema_recovery for more " + "details."); + return true; + }); + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -DEFINE_bool(storage_parallel_index_recovery, false, - "Controls whether the index creation can be done in a multithreaded fashion."); +DEFINE_bool(storage_parallel_schema_recovery, false, + "Controls whether the indices and constraints creation can be done in a multithreaded fashion."); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_uint64(storage_recovery_thread_count, @@ -114,6 +124,10 @@ DEFINE_uint64(storage_recovery_thread_count, memgraph::storage::Config::Durability().recovery_thread_count), "The number of threads used to recover persisted data from disk."); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_bool(storage_enable_schema_metadata, false, + "Controls whether metadata should be collected about the resident labels and edge types."); + #ifdef MG_ENTERPRISE // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DEFINE_bool(storage_delete_on_drop, true, diff --git a/src/flags/general.hpp b/src/flags/general.hpp index f9b8f3517..890f32cd6 100644 --- a/src/flags/general.hpp +++ b/src/flags/general.hpp @@ -73,10 +73,15 @@ DECLARE_uint64(storage_wal_file_flush_every_n_tx); DECLARE_bool(storage_snapshot_on_exit); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_uint64(storage_items_per_batch); +// storage_parallel_index_recovery deprecated; use storage_parallel_schema_recovery instead // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_bool(storage_parallel_index_recovery); // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_bool(storage_parallel_schema_recovery); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_uint64(storage_recovery_thread_count); +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_bool(storage_enable_schema_metadata); #ifdef MG_ENTERPRISE // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) DECLARE_bool(storage_delete_on_drop); diff --git a/src/glue/SessionHL.cpp b/src/glue/SessionHL.cpp index cc7910d1c..bff12d188 100644 --- a/src/glue/SessionHL.cpp +++ b/src/glue/SessionHL.cpp @@ -234,11 +234,55 @@ std::pair, std::optional> SessionHL::Interpret( throw memgraph::communication::bolt::ClientError(e.what()); } } -void SessionHL::RollbackTransaction() { interpreter_.RollbackTransaction(); } -void SessionHL::CommitTransaction() { interpreter_.CommitTransaction(); } -void SessionHL::BeginTransaction(const std::map &extra) { - interpreter_.BeginTransaction(ToQueryExtras(extra)); + +void SessionHL::RollbackTransaction() { + try { + interpreter_.RollbackTransaction(); + } catch (const memgraph::query::QueryException &e) { + // Count the number of specific exceptions thrown + metrics::IncrementCounter(GetExceptionName(e)); + // Wrap QueryException into ClientError, because we want to allow the + // client to fix their query. + throw memgraph::communication::bolt::ClientError(e.what()); + } catch (const memgraph::query::ReplicationException &e) { + // Count the number of specific exceptions thrown + metrics::IncrementCounter(GetExceptionName(e)); + throw memgraph::communication::bolt::ClientError(e.what()); + } } + +void SessionHL::CommitTransaction() { + try { + interpreter_.CommitTransaction(); + } catch (const memgraph::query::QueryException &e) { + // Count the number of specific exceptions thrown + metrics::IncrementCounter(GetExceptionName(e)); + // Wrap QueryException into ClientError, because we want to allow the + // client to fix their query. + throw memgraph::communication::bolt::ClientError(e.what()); + } catch (const memgraph::query::ReplicationException &e) { + // Count the number of specific exceptions thrown + metrics::IncrementCounter(GetExceptionName(e)); + throw memgraph::communication::bolt::ClientError(e.what()); + } +} + +void SessionHL::BeginTransaction(const std::map &extra) { + try { + interpreter_.BeginTransaction(ToQueryExtras(extra)); + } catch (const memgraph::query::QueryException &e) { + // Count the number of specific exceptions thrown + metrics::IncrementCounter(GetExceptionName(e)); + // Wrap QueryException into ClientError, because we want to allow the + // client to fix their query. + throw memgraph::communication::bolt::ClientError(e.what()); + } catch (const memgraph::query::ReplicationException &e) { + // Count the number of specific exceptions thrown + metrics::IncrementCounter(GetExceptionName(e)); + throw memgraph::communication::bolt::ClientError(e.what()); + } +} + void SessionHL::Configure(const std::map &run_time_info) { #ifdef MG_ENTERPRISE std::string db; diff --git a/src/glue/communication.cpp b/src/glue/communication.cpp index fdf5129f6..60181e877 100644 --- a/src/glue/communication.cpp +++ b/src/glue/communication.cpp @@ -127,6 +127,8 @@ storage::Result ToBoltValue(const query::TypedValue &value, const storage return Value(value.ValueLocalDateTime()); case query::TypedValue::Type::Duration: return Value(value.ValueDuration()); + case query::TypedValue::Type::Function: + throw communication::bolt::ValueException("Unsupported conversion from TypedValue::Function to Value"); case query::TypedValue::Type::Graph: auto maybe_graph = ToBoltGraph(value.ValueGraph(), db, view); if (maybe_graph.HasError()) return maybe_graph.GetError(); diff --git a/src/integrations/pulsar/consumer.hpp b/src/integrations/pulsar/consumer.hpp index 1caa366ad..06ed3a550 100644 --- a/src/integrations/pulsar/consumer.hpp +++ b/src/integrations/pulsar/consumer.hpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -10,6 +10,7 @@ // licenses/APL.txt. #pragma once + #include #include #include diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 983dd61f9..057b30982 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -65,10 +65,13 @@ void InitFromCypherlFile(memgraph::query::InterpreterContext &ctx, memgraph::dbm std::string line; while (std::getline(file, line)) { if (!line.empty()) { - auto results = interpreter.Prepare(line, {}, {}); - memgraph::query::DiscardValueResultStream stream; - interpreter.Pull(&stream, {}, results.qid); - + try { + auto results = interpreter.Prepare(line, {}, {}); + memgraph::query::DiscardValueResultStream stream; + interpreter.Pull(&stream, {}, results.qid); + } catch (const memgraph::query::UserAlreadyExistsException &e) { + spdlog::warn("{} The rest of the init-file will be run.", e.what()); + } if (audit_log) { audit_log->Record("", "", line, {}, memgraph::dbms::kDefaultDB); } @@ -291,7 +294,8 @@ int main(int argc, char **argv) { memgraph::storage::Config db_config{ .gc = {.type = memgraph::storage::Config::Gc::Type::PERIODIC, .interval = std::chrono::seconds(FLAGS_storage_gc_cycle_sec)}, - .items = {.properties_on_edges = FLAGS_storage_properties_on_edges}, + .items = {.properties_on_edges = FLAGS_storage_properties_on_edges, + .enable_schema_metadata = FLAGS_storage_enable_schema_metadata}, .durability = {.storage_directory = FLAGS_data_directory, .recover_on_startup = FLAGS_storage_recover_on_startup || FLAGS_data_recovery_on_startup, .snapshot_retention_count = FLAGS_storage_snapshot_retention_count, @@ -301,7 +305,9 @@ int main(int argc, char **argv) { .restore_replication_state_on_startup = FLAGS_replication_restore_state_on_startup, .items_per_batch = FLAGS_storage_items_per_batch, .recovery_thread_count = FLAGS_storage_recovery_thread_count, - .allow_parallel_index_creation = FLAGS_storage_parallel_index_recovery}, + // deprecated + .allow_parallel_index_creation = FLAGS_storage_parallel_index_recovery, + .allow_parallel_schema_creation = FLAGS_storage_parallel_schema_recovery}, .transaction = {.isolation_level = memgraph::flags::ParseIsolationLevel()}, .disk = {.main_storage_directory = FLAGS_data_directory + "/rocksdb_main_storage", .label_index_directory = FLAGS_data_directory + "/rocksdb_label_index", @@ -368,34 +374,17 @@ int main(int argc, char **argv) { std::unique_ptr auth_checker; auth_glue(&auth_, auth_handler, auth_checker); - memgraph::replication::ReplicationState repl_state(ReplicationStateRootPath(db_config)); - - memgraph::dbms::DbmsHandler dbms_handler(db_config, repl_state + memgraph::dbms::DbmsHandler dbms_handler(db_config #ifdef MG_ENTERPRISE , &auth_, FLAGS_data_recovery_on_startup, FLAGS_storage_delete_on_drop #endif ); auto db_acc = dbms_handler.Get(); - memgraph::query::InterpreterContext interpreter_context_(interp_config, &dbms_handler, &repl_state, - auth_handler.get(), auth_checker.get()); - MG_ASSERT(db_acc, "Failed to access the main database"); - // TODO: Move it somewhere better - // Startup replication state (if recovered at startup) - MG_ASSERT(std::visit(memgraph::utils::Overloaded{[](memgraph::replication::RoleMainData const &) { return true; }, - [&](memgraph::replication::RoleReplicaData const &data) { - // Register handlers - memgraph::dbms::InMemoryReplicationHandlers::Register( - &dbms_handler, *data.server); - if (!data.server->Start()) { - spdlog::error("Unable to start the replication server."); - return false; - } - return true; - }}, - repl_state.ReplicationData()), - "Replica recovery failure!"); + memgraph::query::InterpreterContext interpreter_context_( + interp_config, &dbms_handler, &dbms_handler.ReplicationState(), auth_handler.get(), auth_checker.get()); + MG_ASSERT(db_acc, "Failed to access the main database"); memgraph::query::procedure::gModuleRegistry.SetModulesDirectory(memgraph::flags::ParseQueryModulesDirectory(), FLAGS_data_directory); diff --git a/src/query/common.cpp b/src/query/common.cpp index 793ae8044..3c75ed5ec 100644 --- a/src/query/common.cpp +++ b/src/query/common.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -62,6 +62,7 @@ bool TypedValueCompare(const TypedValue &a, const TypedValue &b) { case TypedValue::Type::Edge: case TypedValue::Type::Path: case TypedValue::Type::Graph: + case TypedValue::Type::Function: throw QueryRuntimeException("Comparison is not defined for values of type {}.", a.type()); case TypedValue::Type::Null: LOG_FATAL("Invalid type"); diff --git a/src/query/db_accessor.cpp b/src/query/db_accessor.cpp index df3fb808a..16ad399a0 100644 --- a/src/query/db_accessor.cpp +++ b/src/query/db_accessor.cpp @@ -15,6 +15,7 @@ #include #include +#include "storage/v2/storage_mode.hpp" #include "utils/pmr/unordered_set.hpp" namespace memgraph::query { @@ -139,6 +140,8 @@ std::optional SubgraphDbAccessor::FindVertex(storage::Gid gid, s query::Graph *SubgraphDbAccessor::getGraph() { return graph_; } +storage::StorageMode SubgraphDbAccessor::GetStorageMode() const noexcept { return db_accessor_.GetStorageMode(); } + DbAccessor *SubgraphDbAccessor::GetAccessor() { return &db_accessor_; } VertexAccessor SubgraphVertexAccessor::GetVertexAccessor() const { return impl_; } diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index f616dc5a2..ed7dde409 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -42,6 +42,8 @@ class EdgeAccessor final { explicit EdgeAccessor(storage::EdgeAccessor impl) : impl_(std::move(impl)) {} + bool IsDeleted() const { return impl_.IsDeleted(); } + bool IsVisible(storage::View view) const { return impl_.IsVisible(view); } storage::EdgeTypeId EdgeType() const { return impl_.EdgeType(); } @@ -543,7 +545,7 @@ class DbAccessor final { void Abort() { accessor_->Abort(); } - storage::StorageMode GetStorageMode() const { return accessor_->GetCreationStorageMode(); } + storage::StorageMode GetStorageMode() const noexcept { return accessor_->GetCreationStorageMode(); } bool LabelIndexExists(storage::LabelId label) const { return accessor_->LabelIndexExists(label); } @@ -595,6 +597,13 @@ class DbAccessor final { return accessor_->ApproximateVertexCount(label, property, lower, upper); } + std::vector ListAllPossiblyPresentVertexLabels() const { + return accessor_->ListAllPossiblyPresentVertexLabels(); + } + std::vector ListAllPossiblyPresentEdgeTypes() const { + return accessor_->ListAllPossiblyPresentEdgeTypes(); + } + storage::IndicesInfo ListAllIndices() const { return accessor_->ListAllIndices(); } storage::ConstraintsInfo ListAllConstraints() const { return accessor_->ListAllConstraints(); } @@ -693,6 +702,8 @@ class SubgraphDbAccessor final { Graph *getGraph(); + storage::StorageMode GetStorageMode() const noexcept; + DbAccessor *GetAccessor(); }; diff --git a/src/query/exceptions.hpp b/src/query/exceptions.hpp index 1b2e712f9..ac8cc8fe8 100644 --- a/src/query/exceptions.hpp +++ b/src/query/exceptions.hpp @@ -126,6 +126,12 @@ class InfoInMulticommandTxException : public QueryException { SPECIALIZE_GET_EXCEPTION_NAME(InfoInMulticommandTxException) }; +class UserAlreadyExistsException : public QueryException { + public: + using QueryException::QueryException; + SPECIALIZE_GET_EXCEPTION_NAME(UserAlreadyExistsException) +}; + /** * An exception for an illegal operation that can not be detected * before the query starts executing over data. diff --git a/src/query/frame_change.hpp b/src/query/frame_change.hpp index 32fe1f36e..7baf1fe41 100644 --- a/src/query/frame_change.hpp +++ b/src/query/frame_change.hpp @@ -8,41 +8,42 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. +#include +#include #include "query/typed_value.hpp" +#include "utils/fnv.hpp" #include "utils/memory.hpp" #include "utils/pmr/unordered_map.hpp" #include "utils/pmr/vector.hpp" namespace memgraph::query { // Key is hash output, value is vector of unique elements -using CachedType = utils::pmr::unordered_map>; +using CachedType = utils::pmr::unordered_map>; struct CachedValue { + using allocator_type = utils::Allocator; + // Cached value, this can be probably templateized CachedType cache_; - explicit CachedValue(utils::MemoryResource *mem) : cache_(mem) {} + explicit CachedValue(utils::MemoryResource *mem) : cache_{mem} {}; + CachedValue(const CachedValue &other, utils::MemoryResource *mem) : cache_(other.cache_, mem) {} + CachedValue(CachedValue &&other, utils::MemoryResource *mem) : cache_(std::move(other.cache_), mem){}; - CachedValue(CachedType &&cache, memgraph::utils::MemoryResource *memory) : cache_(std::move(cache), memory) {} + CachedValue(CachedValue &&other) noexcept : CachedValue(std::move(other), other.GetMemoryResource()) {} - CachedValue(const CachedValue &other, memgraph::utils::MemoryResource *memory) : cache_(other.cache_, memory) {} + CachedValue(const CachedValue &other) + : CachedValue(other, std::allocator_traits::select_on_container_copy_construction( + other.GetMemoryResource()) + .GetMemoryResource()) {} - CachedValue(CachedValue &&other, memgraph::utils::MemoryResource *memory) : cache_(std::move(other.cache_), memory) {} - - CachedValue(CachedValue &&other) noexcept = delete; - - /// Copy construction without memgraph::utils::MemoryResource is not allowed. - CachedValue(const CachedValue &) = delete; + utils::MemoryResource *GetMemoryResource() const { return cache_.get_allocator().GetMemoryResource(); } CachedValue &operator=(const CachedValue &) = delete; CachedValue &operator=(CachedValue &&) = delete; ~CachedValue() = default; - memgraph::utils::MemoryResource *GetMemoryResource() const noexcept { - return cache_.get_allocator().GetMemoryResource(); - } - bool CacheValue(const TypedValue &maybe_list) { if (!maybe_list.IsList()) { return false; @@ -70,7 +71,7 @@ struct CachedValue { } private: - static bool IsValueInVec(const std::vector &vec_values, const TypedValue &value) { + static bool IsValueInVec(const utils::pmr::vector &vec_values, const TypedValue &value) { return std::any_of(vec_values.begin(), vec_values.end(), [&value](auto &vec_value) { const auto is_value_equal = vec_value == value; if (is_value_equal.IsNull()) return false; @@ -82,35 +83,70 @@ struct CachedValue { // Class tracks keys for which user can cache values which help with faster search or faster retrieval // in the future. Used for IN LIST operator. class FrameChangeCollector { + /** Allocator type so that STL containers are aware that we need one */ + using allocator_type = utils::Allocator; + public: - explicit FrameChangeCollector() : tracked_values_(&memory_resource_){}; + explicit FrameChangeCollector(utils::MemoryResource *mem = utils::NewDeleteResource()) : tracked_values_{mem} {} + + FrameChangeCollector(FrameChangeCollector &&other, utils::MemoryResource *mem) + : tracked_values_(std::move(other.tracked_values_), mem) {} + FrameChangeCollector(const FrameChangeCollector &other, utils::MemoryResource *mem) + : tracked_values_(other.tracked_values_, mem) {} + + FrameChangeCollector(const FrameChangeCollector &other) + : FrameChangeCollector(other, std::allocator_traits::select_on_container_copy_construction( + other.GetMemoryResource()) + .GetMemoryResource()){}; + + FrameChangeCollector(FrameChangeCollector &&other) noexcept + : FrameChangeCollector(std::move(other), other.GetMemoryResource()) {} + + /** Copy assign other, utils::MemoryResource of `this` is used */ + FrameChangeCollector &operator=(const FrameChangeCollector &) = default; + + /** Move assign other, utils::MemoryResource of `this` is used. */ + FrameChangeCollector &operator=(FrameChangeCollector &&) noexcept = default; + + utils::MemoryResource *GetMemoryResource() const { return tracked_values_.get_allocator().GetMemoryResource(); } CachedValue &AddTrackingKey(const std::string &key) { - const auto &[it, _] = tracked_values_.emplace(key, tracked_values_.get_allocator().GetMemoryResource()); + const auto &[it, _] = tracked_values_.emplace( + std::piecewise_construct, std::forward_as_tuple(utils::pmr::string(key, utils::NewDeleteResource())), + std::forward_as_tuple()); return it->second; } - bool IsKeyTracked(const std::string &key) const { return tracked_values_.contains(key); } + bool IsKeyTracked(const std::string &key) const { + return tracked_values_.contains(utils::pmr::string(key, utils::NewDeleteResource())); + } bool IsKeyValueCached(const std::string &key) const { - return IsKeyTracked(key) && !tracked_values_.at(key).cache_.empty(); + return IsKeyTracked(key) && !tracked_values_.at(utils::pmr::string(key, utils::NewDeleteResource())).cache_.empty(); } bool ResetTrackingValue(const std::string &key) { - if (!tracked_values_.contains(key)) { + if (!tracked_values_.contains(utils::pmr::string(key, utils::NewDeleteResource()))) { return false; } - tracked_values_.erase(key); + tracked_values_.erase(utils::pmr::string(key, utils::NewDeleteResource())); AddTrackingKey(key); return true; } - CachedValue &GetCachedValue(const std::string &key) { return tracked_values_.at(key); } + CachedValue &GetCachedValue(const std::string &key) { + return tracked_values_.at(utils::pmr::string(key, utils::NewDeleteResource())); + } bool IsTrackingValues() const { return !tracked_values_.empty(); } + ~FrameChangeCollector() = default; + private: - utils::MonotonicBufferResource memory_resource_{0}; - memgraph::utils::pmr::unordered_map tracked_values_; + struct PmrStringHash { + size_t operator()(const utils::pmr::string &key) const { return utils::Fnv(key); } + }; + + utils::pmr::unordered_map tracked_values_; }; } // namespace memgraph::query diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index dc11c3887..59860d5b0 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -1818,6 +1818,10 @@ class EdgeAtom : public memgraph::query::PatternAtom { memgraph::query::Identifier *inner_edge{nullptr}; /// Argument identifier for the destination node of the edge. memgraph::query::Identifier *inner_node{nullptr}; + /// Argument identifier for the currently-accumulated path. + memgraph::query::Identifier *accumulated_path{nullptr}; + /// Argument identifier for the weight of the currently-accumulated path. + memgraph::query::Identifier *accumulated_weight{nullptr}; /// Evaluates the result of the lambda. memgraph::query::Expression *expression{nullptr}; @@ -1825,6 +1829,8 @@ class EdgeAtom : public memgraph::query::PatternAtom { Lambda object; object.inner_edge = inner_edge ? inner_edge->Clone(storage) : nullptr; object.inner_node = inner_node ? inner_node->Clone(storage) : nullptr; + object.accumulated_path = accumulated_path ? accumulated_path->Clone(storage) : nullptr; + object.accumulated_weight = accumulated_weight ? accumulated_weight->Clone(storage) : nullptr; object.expression = expression ? expression->Clone(storage) : nullptr; return object; } @@ -2928,7 +2934,7 @@ class DatabaseInfoQuery : public memgraph::query::Query { static const utils::TypeInfo kType; const utils::TypeInfo &GetTypeInfo() const override { return kType; } - enum class InfoType { INDEX, CONSTRAINT }; + enum class InfoType { INDEX, CONSTRAINT, EDGE_TYPES, NODE_LABELS }; DEFVISITABLE(QueryVisitor); @@ -3025,7 +3031,7 @@ class ReplicationQuery : public memgraph::query::Query { enum class SyncMode { SYNC, ASYNC }; - enum class ReplicaState { READY, REPLICATING, RECOVERY, INVALID }; + enum class ReplicaState { READY, REPLICATING, RECOVERY, MAYBE_BEHIND }; ReplicationQuery() = default; diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index 4bf7f36fd..7002ee4b9 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -124,6 +124,14 @@ antlrcpp::Any CypherMainVisitor::visitDatabaseInfoQuery(MemgraphCypher::Database info_query->info_type_ = DatabaseInfoQuery::InfoType::CONSTRAINT; return info_query; } + if (ctx->edgetypeInfo()) { + info_query->info_type_ = DatabaseInfoQuery::InfoType::EDGE_TYPES; + return info_query; + } + if (ctx->nodelabelInfo()) { + info_query->info_type_ = DatabaseInfoQuery::InfoType::NODE_LABELS; + return info_query; + } // Should never get here throw utils::NotYetImplemented("Database info query: '{}'", ctx->getText()); } @@ -1268,28 +1276,59 @@ antlrcpp::Any CypherMainVisitor::visitCallProcedure(MemgraphCypher::CallProcedur call_proc->result_identifiers_.push_back(storage_->Create(result_alias)); } } else { - const auto &maybe_found = - procedure::FindProcedure(procedure::gModuleRegistry, call_proc->procedure_name_, utils::NewDeleteResource()); - if (!maybe_found) { - throw SemanticException("There is no procedure named '{}'.", call_proc->procedure_name_); + call_proc->is_write_ = maybe_found->second->info.is_write; + + auto *yield_ctx = ctx->yieldProcedureResults(); + if (!yield_ctx) { + if (!maybe_found->second->results.empty() && !call_proc->void_procedure_) { + throw SemanticException( + "CALL without YIELD may only be used on procedures which do not " + "return any result fields."); + } + // When we return, we will release the lock on modules. This means that + // someone may reload the procedure and change the result signature. But to + // keep the implementation simple, we ignore the case as the rest of the + // code doesn't really care whether we yield or not, so it should not break. + return call_proc; } - const auto &[module, proc] = *maybe_found; - call_proc->result_fields_.reserve(proc->results.size()); - call_proc->result_identifiers_.reserve(proc->results.size()); - for (const auto &[result_name, desc] : proc->results) { - bool is_deprecated = desc.second; - if (is_deprecated) continue; - call_proc->result_fields_.emplace_back(result_name); - call_proc->result_identifiers_.push_back(storage_->Create(std::string(result_name))); + if (yield_ctx->getTokens(MemgraphCypher::ASTERISK).empty()) { + call_proc->result_fields_.reserve(yield_ctx->procedureResult().size()); + call_proc->result_identifiers_.reserve(yield_ctx->procedureResult().size()); + for (auto *result : yield_ctx->procedureResult()) { + MG_ASSERT(result->variable().size() == 1 || result->variable().size() == 2); + call_proc->result_fields_.push_back(std::any_cast(result->variable()[0]->accept(this))); + std::string result_alias; + if (result->variable().size() == 2) { + result_alias = std::any_cast(result->variable()[1]->accept(this)); + } else { + result_alias = std::any_cast(result->variable()[0]->accept(this)); + } + call_proc->result_identifiers_.push_back(storage_->Create(result_alias)); + } + } else { + const auto &maybe_found = + procedure::FindProcedure(procedure::gModuleRegistry, call_proc->procedure_name_, utils::NewDeleteResource()); + if (!maybe_found) { + throw SemanticException("There is no procedure named '{}'.", call_proc->procedure_name_); + } + const auto &[module, proc] = *maybe_found; + call_proc->result_fields_.reserve(proc->results.size()); + call_proc->result_identifiers_.reserve(proc->results.size()); + for (const auto &[result_name, desc] : proc->results) { + bool is_deprecated = desc.second; + if (is_deprecated) continue; + call_proc->result_fields_.emplace_back(result_name); + call_proc->result_identifiers_.push_back(storage_->Create(std::string(result_name))); + } + // When we leave the scope, we will release the lock on modules. This means + // that someone may reload the procedure and change its result signature. We + // are fine with this, because if new result fields were added then we yield + // the subset of those and that will appear to a user as if they used the + // procedure before reload. Any subsequent `CALL ... YIELD *` will fetch the + // new fields as well. In case the result signature has had some result + // fields removed, then the query execution will report an error that we are + // yielding missing fields. The user can then just retry the query. } - // When we leave the scope, we will release the lock on modules. This means - // that someone may reload the procedure and change its result signature. We - // are fine with this, because if new result fields were added then we yield - // the subset of those and that will appear to a user as if they used the - // procedure before reload. Any subsequent `CALL ... YIELD *` will fetch the - // new fields as well. In case the result signature has had some result - // fields removed, then the query execution will report an error that we are - // yielding missing fields. The user can then just retry the query. } return call_proc; @@ -1978,6 +2017,15 @@ antlrcpp::Any CypherMainVisitor::visitRelationshipPattern(MemgraphCypher::Relati edge_lambda.inner_edge = storage_->Create(traversed_edge_variable); auto traversed_node_variable = std::any_cast(lambda->traversed_node->accept(this)); edge_lambda.inner_node = storage_->Create(traversed_node_variable); + if (lambda->accumulated_path) { + auto accumulated_path_variable = std::any_cast(lambda->accumulated_path->accept(this)); + edge_lambda.accumulated_path = storage_->Create(accumulated_path_variable); + + if (lambda->accumulated_weight) { + auto accumulated_weight_variable = std::any_cast(lambda->accumulated_weight->accept(this)); + edge_lambda.accumulated_weight = storage_->Create(accumulated_weight_variable); + } + } edge_lambda.expression = std::any_cast(lambda->expression()->accept(this)); return edge_lambda; }; @@ -2002,6 +2050,15 @@ antlrcpp::Any CypherMainVisitor::visitRelationshipPattern(MemgraphCypher::Relati // In variable expansion inner variables are mandatory. anonymous_identifiers.push_back(&edge->filter_lambda_.inner_edge); anonymous_identifiers.push_back(&edge->filter_lambda_.inner_node); + + // TODO: In what use case do we need accumulated path and weight here? + if (edge->filter_lambda_.accumulated_path) { + anonymous_identifiers.push_back(&edge->filter_lambda_.accumulated_path); + + if (edge->filter_lambda_.accumulated_weight) { + anonymous_identifiers.push_back(&edge->filter_lambda_.accumulated_weight); + } + } break; case 1: if (edge->type_ == EdgeAtom::Type::WEIGHTED_SHORTEST_PATH || @@ -2013,9 +2070,21 @@ antlrcpp::Any CypherMainVisitor::visitRelationshipPattern(MemgraphCypher::Relati // Add mandatory inner variables for filter lambda. anonymous_identifiers.push_back(&edge->filter_lambda_.inner_edge); anonymous_identifiers.push_back(&edge->filter_lambda_.inner_node); + if (edge->filter_lambda_.accumulated_path) { + anonymous_identifiers.push_back(&edge->filter_lambda_.accumulated_path); + + if (edge->filter_lambda_.accumulated_weight) { + anonymous_identifiers.push_back(&edge->filter_lambda_.accumulated_weight); + } + } } else { // Other variable expands only have the filter lambda. edge->filter_lambda_ = visit_lambda(relationshipLambdas[0]); + if (edge->filter_lambda_.accumulated_weight) { + throw SemanticException( + "Accumulated weight in filter lambda can be used only with " + "shortest paths expansion."); + } } break; case 2: diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index 53f1fc765..d387002d8 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -47,9 +47,13 @@ indexInfo : INDEX INFO ; constraintInfo : CONSTRAINT INFO ; +edgetypeInfo : EDGE_TYPES INFO ; + +nodelabelInfo : NODE_LABELS INFO ; + buildInfo : BUILD INFO ; -databaseInfoQuery : SHOW ( indexInfo | constraintInfo ) ; +databaseInfoQuery : SHOW ( indexInfo | constraintInfo | edgetypeInfo | nodelabelInfo ) ; systemInfoQuery : SHOW ( storageInfo | buildInfo ) ; @@ -175,7 +179,7 @@ relationshipDetail : '[' ( name=variable )? ( relationshipTypes )? ( variableExp | '[' ( name=variable )? ( relationshipTypes )? ( variableExpansion )? relationshipLambda ( total_weight=variable )? (relationshipLambda )? ']' | '[' ( name=variable )? ( relationshipTypes )? ( variableExpansion )? (properties )* ( relationshipLambda total_weight=variable )? (relationshipLambda )? ']'; -relationshipLambda: '(' traversed_edge=variable ',' traversed_node=variable '|' expression ')'; +relationshipLambda: '(' traversed_edge=variable ',' traversed_node=variable ( ',' accumulated_path=variable )? ( ',' accumulated_weight=variable )? '|' expression ')'; variableExpansion : '*' (BFS | WSHORTEST | ALLSHORTEST)? ( expression )? ( '..' ( expression )? )? ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index bac189a53..d585acbb1 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -61,6 +61,7 @@ memgraphCypherKeyword : cypherKeyword | GRANT | HEADER | IDENTIFIED + | NODE_LABELS | NULLIF | IMPORT | INACTIVE diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 index db1a1ae76..1b44a6e79 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 @@ -89,6 +89,7 @@ MULTI_DATABASE_EDIT : M U L T I UNDERSCORE D A T A B A S E UNDERSCORE E D I MULTI_DATABASE_USE : M U L T I UNDERSCORE D A T A B A S E UNDERSCORE U S E ; NEXT : N E X T ; NO : N O ; +NODE_LABELS : N O D E UNDERSCORE L A B E L S ; NOTHING : N O T H I N G ; ON_DISK_TRANSACTIONAL : O N UNDERSCORE D I S K UNDERSCORE T R A N S A C T I O N A L ; NULLIF : N U L L I F ; diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index b5b75e26e..04772cded 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -38,6 +38,9 @@ class PrivilegeExtractor : public QueryVisitor, public HierarchicalTreeVis void Visit(DatabaseInfoQuery &info_query) override { switch (info_query.info_type_) { case DatabaseInfoQuery::InfoType::INDEX: + // TODO: Reconsider priviliges, this 4 should have the same. + case DatabaseInfoQuery::InfoType::EDGE_TYPES: + case DatabaseInfoQuery::InfoType::NODE_LABELS: // TODO: This should be INDEX | STATS, but we don't have support for // *or* with privileges. AddPrivilege(AuthQuery::Privilege::INDEX); diff --git a/src/query/frontend/semantic/symbol_generator.cpp b/src/query/frontend/semantic/symbol_generator.cpp index 30790ee4e..a3e855301 100644 --- a/src/query/frontend/semantic/symbol_generator.cpp +++ b/src/query/frontend/semantic/symbol_generator.cpp @@ -658,8 +658,16 @@ bool SymbolGenerator::PreVisit(EdgeAtom &edge_atom) { scope.in_edge_range = false; scope.in_pattern = false; if (edge_atom.filter_lambda_.expression) { - VisitWithIdentifiers(edge_atom.filter_lambda_.expression, - {edge_atom.filter_lambda_.inner_edge, edge_atom.filter_lambda_.inner_node}); + std::vector filter_lambda_identifiers{edge_atom.filter_lambda_.inner_edge, + edge_atom.filter_lambda_.inner_node}; + if (edge_atom.filter_lambda_.accumulated_path) { + filter_lambda_identifiers.emplace_back(edge_atom.filter_lambda_.accumulated_path); + + if (edge_atom.filter_lambda_.accumulated_weight) { + filter_lambda_identifiers.emplace_back(edge_atom.filter_lambda_.accumulated_weight); + } + } + VisitWithIdentifiers(edge_atom.filter_lambda_.expression, filter_lambda_identifiers); } else { // Create inner symbols, but don't bind them in scope, since they are to // be used in the missing filter expression. @@ -668,6 +676,17 @@ bool SymbolGenerator::PreVisit(EdgeAtom &edge_atom) { auto *inner_node = edge_atom.filter_lambda_.inner_node; inner_node->MapTo( symbol_table_->CreateSymbol(inner_node->name_, inner_node->user_declared_, Symbol::Type::VERTEX)); + if (edge_atom.filter_lambda_.accumulated_path) { + auto *accumulated_path = edge_atom.filter_lambda_.accumulated_path; + accumulated_path->MapTo( + symbol_table_->CreateSymbol(accumulated_path->name_, accumulated_path->user_declared_, Symbol::Type::PATH)); + + if (edge_atom.filter_lambda_.accumulated_weight) { + auto *accumulated_weight = edge_atom.filter_lambda_.accumulated_weight; + accumulated_weight->MapTo(symbol_table_->CreateSymbol( + accumulated_weight->name_, accumulated_weight->user_declared_, Symbol::Type::NUMBER)); + } + } } if (edge_atom.weight_lambda_.expression) { VisitWithIdentifiers(edge_atom.weight_lambda_.expression, diff --git a/src/query/interpret/awesome_memgraph_functions.cpp b/src/query/interpret/awesome_memgraph_functions.cpp index 2cfd11f8c..6f49ee99f 100644 --- a/src/query/interpret/awesome_memgraph_functions.cpp +++ b/src/query/interpret/awesome_memgraph_functions.cpp @@ -593,6 +593,7 @@ TypedValue ValueType(const TypedValue *args, int64_t nargs, const FunctionContex case TypedValue::Type::Duration: return TypedValue("DURATION", ctx.memory); case TypedValue::Type::Graph: + case TypedValue::Type::Function: throw QueryRuntimeException("Cannot fetch graph as it is not standardized openCypher type name"); } } diff --git a/src/query/interpret/eval.hpp b/src/query/interpret/eval.hpp index 916082bb2..f4f3126cd 100644 --- a/src/query/interpret/eval.hpp +++ b/src/query/interpret/eval.hpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -28,8 +29,10 @@ #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol_table.hpp" #include "query/interpret/frame.hpp" +#include "query/procedure/mg_procedure_impl.hpp" #include "query/typed_value.hpp" #include "spdlog/spdlog.h" +#include "storage/v2/storage_mode.hpp" #include "utils/exceptions.hpp" #include "utils/frame_change_id.hpp" #include "utils/logging.hpp" @@ -187,6 +190,8 @@ class ExpressionEvaluator : public ExpressionVisitor { utils::MemoryResource *GetMemoryResource() const { return ctx_->memory; } + void ResetPropertyLookupCache() { property_lookup_cache_.clear(); } + TypedValue Visit(NamedExpression &named_expression) override { const auto &symbol = symbol_table_->at(named_expression); auto value = named_expression.expression_->Accept(*this); @@ -837,6 +842,8 @@ class ExpressionEvaluator : public ExpressionVisitor { TypedValue Visit(Function &function) override { FunctionContext function_ctx{dba_, ctx_->memory, ctx_->timestamp, &ctx_->counters, view_}; + bool is_transactional = storage::IsTransactional(dba_->GetStorageMode()); + TypedValue res(ctx_->memory); // Stack allocate evaluated arguments when there's a small number of them. if (function.arguments_.size() <= 8) { TypedValue arguments[8] = {TypedValue(ctx_->memory), TypedValue(ctx_->memory), TypedValue(ctx_->memory), @@ -845,19 +852,20 @@ class ExpressionEvaluator : public ExpressionVisitor { for (size_t i = 0; i < function.arguments_.size(); ++i) { arguments[i] = function.arguments_[i]->Accept(*this); } - auto res = function.function_(arguments, function.arguments_.size(), function_ctx); - MG_ASSERT(res.GetMemoryResource() == ctx_->memory); - return res; + res = function.function_(arguments, function.arguments_.size(), function_ctx); } else { TypedValue::TVector arguments(ctx_->memory); arguments.reserve(function.arguments_.size()); for (const auto &argument : function.arguments_) { arguments.emplace_back(argument->Accept(*this)); } - auto res = function.function_(arguments.data(), arguments.size(), function_ctx); - MG_ASSERT(res.GetMemoryResource() == ctx_->memory); - return res; + res = function.function_(arguments.data(), arguments.size(), function_ctx); } + MG_ASSERT(res.GetMemoryResource() == ctx_->memory); + if (!is_transactional && res.ContainsDeleted()) [[unlikely]] { + return TypedValue(ctx_->memory); + } + return res; } TypedValue Visit(Reduce &reduce) override { @@ -903,7 +911,17 @@ class ExpressionEvaluator : public ExpressionVisitor { return TypedValue(std::move(result), ctx_->memory); } - TypedValue Visit(Exists &exists) override { return TypedValue{frame_->at(symbol_table_->at(exists)), ctx_->memory}; } + TypedValue Visit(Exists &exists) override { + TypedValue &frame_exists_value = frame_->at(symbol_table_->at(exists)); + if (!frame_exists_value.IsFunction()) [[unlikely]] { + throw QueryRuntimeException( + "Unexpected behavior: Exists expected a function, got {}. Please report the problem on GitHub issues", + frame_exists_value.type()); + } + TypedValue result{ctx_->memory}; + frame_exists_value.ValueFunction()(&result); + return result; + } TypedValue Visit(All &all) override { auto list_value = all.list_expression_->Accept(*this); diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 354f13dc3..7292c4591 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -274,8 +274,7 @@ inline auto convertToReplicationMode(const ReplicationQuery::SyncMode &sync_mode class ReplQueryHandler final : public query::ReplicationQueryHandler { public: - explicit ReplQueryHandler(dbms::DbmsHandler *dbms_handler, memgraph::replication::ReplicationState *repl_state) - : dbms_handler_(dbms_handler), handler_{*repl_state, *dbms_handler} {} + explicit ReplQueryHandler(dbms::DbmsHandler *dbms_handler) : dbms_handler_(dbms_handler), handler_{*dbms_handler} {} /// @throw QueryRuntimeException if an error ocurred. void SetReplicationRole(ReplicationQuery::ReplicationRole replication_role, std::optional port) override { @@ -404,8 +403,8 @@ class ReplQueryHandler final : public query::ReplicationQueryHandler { case storage::replication::ReplicaState::RECOVERY: replica.state = ReplicationQuery::ReplicaState::RECOVERY; break; - case storage::replication::ReplicaState::INVALID: - replica.state = ReplicationQuery::ReplicaState::INVALID; + case storage::replication::ReplicaState::MAYBE_BEHIND: + replica.state = ReplicationQuery::ReplicaState::MAYBE_BEHIND; break; } @@ -479,7 +478,7 @@ Callback HandleAuthQuery(AuthQuery *auth_query, InterpreterContext *interpreter_ MG_ASSERT(password.IsString() || password.IsNull()); if (!auth->CreateUser(username, password.IsString() ? std::make_optional(std::string(password.ValueString())) : std::nullopt)) { - throw QueryRuntimeException("User '{}' already exists.", username); + throw UserAlreadyExistsException("User '{}' already exists.", username); } // If the license is not valid we create users with admin access @@ -713,8 +712,7 @@ Callback HandleAuthQuery(AuthQuery *auth_query, InterpreterContext *interpreter_ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters ¶meters, dbms::DbmsHandler *dbms_handler, const query::InterpreterConfig &config, - std::vector *notifications, - memgraph::replication::ReplicationState *repl_state) { + std::vector *notifications) { // TODO: MemoryResource for EvaluationContext, it should probably be passed as // the argument to Callback. EvaluationContext evaluation_context; @@ -734,8 +732,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & notifications->emplace_back(SeverityLevel::WARNING, NotificationCode::REPLICA_PORT_WARNING, "Be careful the replication port must be different from the memgraph port!"); } - callback.fn = [handler = ReplQueryHandler{dbms_handler, repl_state}, role = repl_query->role_, - maybe_port]() mutable { + callback.fn = [handler = ReplQueryHandler{dbms_handler}, role = repl_query->role_, maybe_port]() mutable { handler.SetReplicationRole(role, maybe_port); return std::vector>(); }; @@ -747,7 +744,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::SHOW_REPLICATION_ROLE: { callback.header = {"replication role"}; - callback.fn = [handler = ReplQueryHandler{dbms_handler, repl_state}] { + callback.fn = [handler = ReplQueryHandler{dbms_handler}] { auto mode = handler.ShowReplicationRole(); switch (mode) { case ReplicationQuery::ReplicationRole::MAIN: { @@ -766,7 +763,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & auto socket_address = repl_query->socket_address_->Accept(evaluator); const auto replica_check_frequency = config.replication_replica_check_frequency; - callback.fn = [handler = ReplQueryHandler{dbms_handler, repl_state}, name, socket_address, sync_mode, + callback.fn = [handler = ReplQueryHandler{dbms_handler}, name, socket_address, sync_mode, replica_check_frequency]() mutable { handler.RegisterReplica(name, std::string(socket_address.ValueString()), sync_mode, replica_check_frequency); return std::vector>(); @@ -777,7 +774,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } case ReplicationQuery::Action::DROP_REPLICA: { const auto &name = repl_query->replica_name_; - callback.fn = [handler = ReplQueryHandler{dbms_handler, repl_state}, name]() mutable { + callback.fn = [handler = ReplQueryHandler{dbms_handler}, name]() mutable { handler.DropReplica(name); return std::vector>(); }; @@ -789,7 +786,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & callback.header = { "name", "socket_address", "sync_mode", "current_timestamp_of_replica", "number_of_timestamp_behind_master", "state"}; - callback.fn = [handler = ReplQueryHandler{dbms_handler, repl_state}, replica_nfields = callback.header.size()] { + callback.fn = [handler = ReplQueryHandler{dbms_handler}, replica_nfields = callback.header.size()] { const auto &replicas = handler.ShowReplicas(); auto typed_replicas = std::vector>{}; typed_replicas.reserve(replicas.size()); @@ -822,7 +819,7 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & case ReplicationQuery::ReplicaState::RECOVERY: typed_replica.emplace_back("recovery"); break; - case ReplicationQuery::ReplicaState::INVALID: + case ReplicationQuery::ReplicaState::MAYBE_BEHIND: typed_replica.emplace_back("invalid"); break; } @@ -2263,15 +2260,14 @@ PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transa PreparedQuery PrepareReplicationQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::vector *notifications, dbms::DbmsHandler &dbms_handler, - const InterpreterConfig &config, - memgraph::replication::ReplicationState *repl_state) { + const InterpreterConfig &config) { if (in_explicit_transaction) { throw ReplicationModificationInMulticommandTxException(); } auto *replication_query = utils::Downcast(parsed_query.query); - auto callback = HandleReplicationQuery(replication_query, parsed_query.parameters, &dbms_handler, config, - notifications, repl_state); + auto callback = + HandleReplicationQuery(replication_query, parsed_query.parameters, &dbms_handler, config, notifications); return PreparedQuery{callback.header, std::move(parsed_query.required_privileges), [callback_fn = std::move(callback.fn), pull_plan = std::shared_ptr{nullptr}]( @@ -3046,6 +3042,46 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici }; break; } + case DatabaseInfoQuery::InfoType::EDGE_TYPES: { + header = {"edge types"}; + handler = [storage = current_db.db_acc_->get()->storage(), dba] { + if (!storage->config_.items.enable_schema_metadata) { + throw QueryRuntimeException( + "The metadata collection for edge-types is disabled. To enable it, restart your instance and set the " + "storage-enable-schema-metadata flag to True."); + } + auto edge_types = dba->ListAllPossiblyPresentEdgeTypes(); + std::vector> results; + results.reserve(edge_types.size()); + for (auto &edge_type : edge_types) { + results.push_back({TypedValue(storage->EdgeTypeToName(edge_type))}); + } + + return std::pair{results, QueryHandlerResult::COMMIT}; + }; + + break; + } + case DatabaseInfoQuery::InfoType::NODE_LABELS: { + header = {"node labels"}; + handler = [storage = current_db.db_acc_->get()->storage(), dba] { + if (!storage->config_.items.enable_schema_metadata) { + throw QueryRuntimeException( + "The metadata collection for node-labels is disabled. To enable it, restart your instance and set the " + "storage-enable-schema-metadata flag to True."); + } + auto node_labels = dba->ListAllPossiblyPresentVertexLabels(); + std::vector> results; + results.reserve(node_labels.size()); + for (auto &node_label : node_labels) { + results.push_back({TypedValue(storage->LabelToName(node_label))}); + } + + return std::pair{results, QueryHandlerResult::COMMIT}; + }; + + break; + } } return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges), @@ -3349,8 +3385,7 @@ PreparedQuery PrepareConstraintQuery(ParsedQuery parsed_query, bool in_explicit_ PreparedQuery PrepareMultiDatabaseQuery(ParsedQuery parsed_query, CurrentDB ¤t_db, InterpreterContext *interpreter_context, - std::optional> on_change_cb, - memgraph::replication::ReplicationState *repl_state) { + std::optional> on_change_cb) { #ifdef MG_ENTERPRISE if (!license::global_license_checker.IsEnterpriseValidFast()) { throw QueryException("Trying to use enterprise feature without a valid license."); @@ -3361,9 +3396,11 @@ PreparedQuery PrepareMultiDatabaseQuery(ParsedQuery parsed_query, CurrentDB &cur auto *query = utils::Downcast(parsed_query.query); auto *db_handler = interpreter_context->dbms_handler; + const bool is_replica = interpreter_context->repl_state->IsReplica(); + switch (query->action_) { case MultiDatabaseQuery::Action::CREATE: - if (repl_state->IsReplica()) { + if (is_replica) { throw QueryException("Query forbidden on the replica!"); } return PreparedQuery{ @@ -3408,12 +3445,12 @@ PreparedQuery PrepareMultiDatabaseQuery(ParsedQuery parsed_query, CurrentDB &cur if (current_db.in_explicit_db_) { throw QueryException("Database switching is prohibited if session explicitly defines the used database"); } - if (!dbms::allow_mt_repl && repl_state->IsReplica()) { + if (!dbms::allow_mt_repl && is_replica) { throw QueryException("Query forbidden on the replica!"); } return PreparedQuery{{"STATUS"}, std::move(parsed_query.required_privileges), - [db_name = query->db_name_, db_handler, ¤t_db, on_change_cb]( + [db_name = query->db_name_, db_handler, ¤t_db, on_change = std::move(on_change_cb)]( AnyStream *stream, std::optional n) -> std::optional { std::vector> status; std::string res; @@ -3423,7 +3460,7 @@ PreparedQuery PrepareMultiDatabaseQuery(ParsedQuery parsed_query, CurrentDB &cur res = "Already using " + db_name; } else { auto tmp = db_handler->Get(db_name); - if (on_change_cb) (*on_change_cb)(db_name); // Will trow if cb fails + if (on_change) (*on_change)(db_name); // Will trow if cb fails current_db.SetCurrentDB(std::move(tmp), false); res = "Using " + db_name; } @@ -3442,7 +3479,7 @@ PreparedQuery PrepareMultiDatabaseQuery(ParsedQuery parsed_query, CurrentDB &cur query->db_name_}; case MultiDatabaseQuery::Action::DROP: - if (repl_state->IsReplica()) { + if (is_replica) { throw QueryException("Query forbidden on the replica!"); } return PreparedQuery{ @@ -3765,9 +3802,9 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, &query_execution->notifications, current_db_); } else if (utils::Downcast(parsed_query.query)) { /// TODO: make replication DB agnostic - prepared_query = PrepareReplicationQuery(std::move(parsed_query), in_explicit_transaction_, - &query_execution->notifications, *interpreter_context_->dbms_handler, - interpreter_context_->config, interpreter_context_->repl_state); + prepared_query = + PrepareReplicationQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->notifications, + *interpreter_context_->dbms_handler, interpreter_context_->config); } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareLockPathQuery(std::move(parsed_query), in_explicit_transaction_, current_db_); } else if (utils::Downcast(parsed_query.query)) { @@ -3807,8 +3844,8 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, throw MultiDatabaseQueryInMulticommandTxException(); } /// SYSTEM (Replication) + INTERPRETER - prepared_query = PrepareMultiDatabaseQuery(std::move(parsed_query), current_db_, interpreter_context_, on_change_, - interpreter_context_->repl_state); + prepared_query = + PrepareMultiDatabaseQuery(std::move(parsed_query), current_db_, interpreter_context_, on_change_); } else if (utils::Downcast(parsed_query.query)) { /// SYSTEM PURE ("SHOW DATABASES") /// INTERPRETER (TODO: "SHOW DATABASE") diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 1c8d021c7..82269ca27 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -1138,6 +1138,11 @@ class ExpandVariableCursor : public Cursor { edges_it_.emplace_back(edges_.back().begin()); } + if (self_.filter_lambda_.accumulated_path_symbol) { + // Add initial vertex of path to the accumulated path + frame[self_.filter_lambda_.accumulated_path_symbol.value()] = Path(vertex); + } + // reset the frame value to an empty edge list auto *pull_memory = context.evaluation_context.memory; frame[self_.common_.edge_symbol] = TypedValue::TVector(pull_memory); @@ -1234,6 +1239,13 @@ class ExpandVariableCursor : public Cursor { // Skip expanding out of filtered expansion. frame[self_.filter_lambda_.inner_edge_symbol] = current_edge.first; frame[self_.filter_lambda_.inner_node_symbol] = current_vertex; + if (self_.filter_lambda_.accumulated_path_symbol) { + MG_ASSERT(frame[self_.filter_lambda_.accumulated_path_symbol.value()].IsPath(), + "Accumulated path must be path"); + Path &accumulated_path = frame[self_.filter_lambda_.accumulated_path_symbol.value()].ValuePath(); + accumulated_path.Expand(current_edge.first); + accumulated_path.Expand(current_vertex); + } if (self_.filter_lambda_.expression && !EvaluateFilter(evaluator, self_.filter_lambda_.expression)) continue; // we are doing depth-first search, so place the current @@ -1546,6 +1558,13 @@ class SingleSourceShortestPathCursor : public query::plan::Cursor { #endif frame[self_.filter_lambda_.inner_edge_symbol] = edge; frame[self_.filter_lambda_.inner_node_symbol] = vertex; + if (self_.filter_lambda_.accumulated_path_symbol) { + MG_ASSERT(frame[self_.filter_lambda_.accumulated_path_symbol.value()].IsPath(), + "Accumulated path must have Path type"); + Path &accumulated_path = frame[self_.filter_lambda_.accumulated_path_symbol.value()].ValuePath(); + accumulated_path.Expand(edge); + accumulated_path.Expand(vertex); + } if (self_.filter_lambda_.expression) { TypedValue result = self_.filter_lambda_.expression->Accept(evaluator); @@ -1607,6 +1626,11 @@ class SingleSourceShortestPathCursor : public query::plan::Cursor { const auto &vertex = vertex_value.ValueVertex(); processed_.emplace(vertex, std::nullopt); + if (self_.filter_lambda_.accumulated_path_symbol) { + // Add initial vertex of path to the accumulated path + frame[self_.filter_lambda_.accumulated_path_symbol.value()] = Path(vertex); + } + expand_from_vertex(vertex); // go back to loop start and see if we expanded anything @@ -1677,6 +1701,10 @@ class SingleSourceShortestPathCursor : public query::plan::Cursor { namespace { void CheckWeightType(TypedValue current_weight, utils::MemoryResource *memory) { + if (current_weight.IsNull()) { + return; + } + if (!current_weight.IsNumeric() && !current_weight.IsDuration()) { throw QueryRuntimeException("Calculated weight must be numeric or a Duration, got {}.", current_weight.type()); } @@ -1694,6 +1722,34 @@ void CheckWeightType(TypedValue current_weight, utils::MemoryResource *memory) { } } +void ValidateWeightTypes(const TypedValue &lhs, const TypedValue &rhs) { + if ((lhs.IsNumeric() && rhs.IsNumeric()) || (lhs.IsDuration() && rhs.IsDuration())) { + return; + } + throw QueryRuntimeException(utils::MessageWithLink( + "All weights should be of the same type, either numeric or a Duration. Please update the weight " + "expression or the filter expression.", + "https://memgr.ph/wsp")); +} + +TypedValue CalculateNextWeight(const std::optional &weight_lambda, + const TypedValue &total_weight, ExpressionEvaluator evaluator) { + if (!weight_lambda) { + return {}; + } + auto *memory = evaluator.GetMemoryResource(); + TypedValue current_weight = weight_lambda->expression->Accept(evaluator); + CheckWeightType(current_weight, memory); + + if (total_weight.IsNull()) { + return current_weight; + } + + ValidateWeightTypes(current_weight, total_weight); + + return TypedValue(current_weight, memory) + total_weight; +} + } // namespace class ExpandWeightedShortestPathCursor : public query::plan::Cursor { @@ -1722,7 +1778,6 @@ class ExpandWeightedShortestPathCursor : public query::plan::Cursor { auto expand_pair = [this, &evaluator, &frame, &create_state, &context]( const EdgeAccessor &edge, const VertexAccessor &vertex, const TypedValue &total_weight, int64_t depth) { - auto *memory = evaluator.GetMemoryResource(); #ifdef MG_ENTERPRISE if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && !(context.auth_checker->Has(vertex, storage::View::OLD, @@ -1731,32 +1786,31 @@ class ExpandWeightedShortestPathCursor : public query::plan::Cursor { return; } #endif + + frame[self_.weight_lambda_->inner_edge_symbol] = edge; + frame[self_.weight_lambda_->inner_node_symbol] = vertex; + TypedValue next_weight = CalculateNextWeight(self_.weight_lambda_, total_weight, evaluator); + if (self_.filter_lambda_.expression) { frame[self_.filter_lambda_.inner_edge_symbol] = edge; frame[self_.filter_lambda_.inner_node_symbol] = vertex; + if (self_.filter_lambda_.accumulated_path_symbol) { + MG_ASSERT(frame[self_.filter_lambda_.accumulated_path_symbol.value()].IsPath(), + "Accumulated path must be path"); + Path &accumulated_path = frame[self_.filter_lambda_.accumulated_path_symbol.value()].ValuePath(); + accumulated_path.Expand(edge); + accumulated_path.Expand(vertex); + + if (self_.filter_lambda_.accumulated_weight_symbol) { + frame[self_.filter_lambda_.accumulated_weight_symbol.value()] = next_weight; + } + } if (!EvaluateFilter(evaluator, self_.filter_lambda_.expression)) return; } - frame[self_.weight_lambda_->inner_edge_symbol] = edge; - frame[self_.weight_lambda_->inner_node_symbol] = vertex; - - TypedValue current_weight = self_.weight_lambda_->expression->Accept(evaluator); - - CheckWeightType(current_weight, memory); - auto next_state = create_state(vertex, depth); - TypedValue next_weight = std::invoke([&] { - if (total_weight.IsNull()) { - return current_weight; - } - - ValidateWeightTypes(current_weight, total_weight); - - return TypedValue(current_weight, memory) + total_weight; - }); - auto found_it = total_cost_.find(next_state); if (found_it != total_cost_.end() && (found_it->second.IsNull() || (found_it->second <= next_weight).ValueBool())) return; @@ -1796,6 +1850,10 @@ class ExpandWeightedShortestPathCursor : public query::plan::Cursor { // Skip expansion for such nodes. if (node.IsNull()) continue; } + if (self_.filter_lambda_.accumulated_path_symbol) { + // Add initial vertex of path to the accumulated path + frame[self_.filter_lambda_.accumulated_path_symbol.value()] = Path(vertex); + } if (self_.upper_bound_) { upper_bound_ = EvaluateInt(&evaluator, self_.upper_bound_, "Max depth in weighted shortest path expansion"); upper_bound_set_ = true; @@ -1808,12 +1866,17 @@ class ExpandWeightedShortestPathCursor : public query::plan::Cursor { "Maximum depth in weighted shortest path expansion must be at " "least 1."); + frame[self_.weight_lambda_->inner_edge_symbol] = TypedValue(); + frame[self_.weight_lambda_->inner_node_symbol] = vertex; + TypedValue current_weight = + CalculateNextWeight(self_.weight_lambda_, /* total_weight */ TypedValue(), evaluator); + // Clear existing data structures. previous_.clear(); total_cost_.clear(); yielded_vertices_.clear(); - pq_.emplace(TypedValue(), 0, vertex, std::nullopt); + pq_.emplace(current_weight, 0, vertex, std::nullopt); // We are adding the starting vertex to the set of yielded vertices // because we don't want to yield paths that end with the starting // vertex. @@ -1913,15 +1976,6 @@ class ExpandWeightedShortestPathCursor : public query::plan::Cursor { // Keeps track of vertices for which we yielded a path already. utils::pmr::unordered_set yielded_vertices_; - static void ValidateWeightTypes(const TypedValue &lhs, const TypedValue &rhs) { - if (!((lhs.IsNumeric() && lhs.IsNumeric()) || (rhs.IsDuration() && rhs.IsDuration()))) { - throw QueryRuntimeException(utils::MessageWithLink( - "All weights should be of the same type, either numeric or a Duration. Please update the weight " - "expression or the filter expression.", - "https://memgr.ph/wsp")); - } - } - // Priority queue comparator. Keep lowest weight on top of the queue. class PriorityQueueComparator { public: @@ -1979,36 +2033,32 @@ class ExpandAllShortestPathsCursor : public query::plan::Cursor { // queue. auto expand_vertex = [this, &evaluator, &frame](const EdgeAccessor &edge, const EdgeAtom::Direction direction, const TypedValue &total_weight, int64_t depth) { - auto *memory = evaluator.GetMemoryResource(); - auto const &next_vertex = direction == EdgeAtom::Direction::IN ? edge.From() : edge.To(); + // Evaluate current weight + frame[self_.weight_lambda_->inner_edge_symbol] = edge; + frame[self_.weight_lambda_->inner_node_symbol] = next_vertex; + TypedValue next_weight = CalculateNextWeight(self_.weight_lambda_, total_weight, evaluator); + // If filter expression exists, evaluate filter if (self_.filter_lambda_.expression) { frame[self_.filter_lambda_.inner_edge_symbol] = edge; frame[self_.filter_lambda_.inner_node_symbol] = next_vertex; + if (self_.filter_lambda_.accumulated_path_symbol) { + MG_ASSERT(frame[self_.filter_lambda_.accumulated_path_symbol.value()].IsPath(), + "Accumulated path must be path"); + Path &accumulated_path = frame[self_.filter_lambda_.accumulated_path_symbol.value()].ValuePath(); + accumulated_path.Expand(edge); + accumulated_path.Expand(next_vertex); + + if (self_.filter_lambda_.accumulated_weight_symbol) { + frame[self_.filter_lambda_.accumulated_weight_symbol.value()] = next_weight; + } + } if (!EvaluateFilter(evaluator, self_.filter_lambda_.expression)) return; } - // Evaluate current weight - frame[self_.weight_lambda_->inner_edge_symbol] = edge; - frame[self_.weight_lambda_->inner_node_symbol] = next_vertex; - - TypedValue current_weight = self_.weight_lambda_->expression->Accept(evaluator); - - CheckWeightType(current_weight, memory); - - TypedValue next_weight = std::invoke([&] { - if (total_weight.IsNull()) { - return current_weight; - } - - ValidateWeightTypes(current_weight, total_weight); - - return TypedValue(current_weight, memory) + total_weight; - }); - auto found_it = visited_cost_.find(next_vertex); // Check if the vertex has already been processed. if (found_it != visited_cost_.end()) { @@ -2200,7 +2250,17 @@ class ExpandAllShortestPathsCursor : public query::plan::Cursor { traversal_stack_.clear(); total_cost_.clear(); - expand_from_vertex(*start_vertex, TypedValue(), 0); + if (self_.filter_lambda_.accumulated_path_symbol) { + // Add initial vertex of path to the accumulated path + frame[self_.filter_lambda_.accumulated_path_symbol.value()] = Path(*start_vertex); + } + + frame[self_.weight_lambda_->inner_edge_symbol] = TypedValue(); + frame[self_.weight_lambda_->inner_node_symbol] = *start_vertex; + TypedValue current_weight = + CalculateNextWeight(self_.weight_lambda_, /* total_weight */ TypedValue(), evaluator); + + expand_from_vertex(*start_vertex, current_weight, 0); visited_cost_.emplace(*start_vertex, 0); frame[self_.common_.edge_symbol] = TypedValue::TVector(memory); } @@ -2252,15 +2312,6 @@ class ExpandAllShortestPathsCursor : public query::plan::Cursor { // Stack indicating the traversal level. utils::pmr::list> traversal_stack_; - static void ValidateWeightTypes(const TypedValue &lhs, const TypedValue &rhs) { - if (!((lhs.IsNumeric() && lhs.IsNumeric()) || (rhs.IsDuration() && rhs.IsDuration()))) { - throw QueryRuntimeException(utils::MessageWithLink( - "All weights should be of the same type, either numeric or a Duration. Please update the weight " - "expression or the filter expression.", - "https://memgr.ph/wsp")); - } - } - // Priority queue comparator. Keep lowest weight on top of the queue. class PriorityQueueComparator { public: @@ -2500,13 +2551,16 @@ std::vector EvaluatePatternFilter::ModifiedSymbols(const SymbolTable &ta } bool EvaluatePatternFilter::EvaluatePatternFilterCursor::Pull(Frame &frame, ExecutionContext &context) { - OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP("EvaluatePatternFilter"); + std::function function = [&frame, self = this->self_, input_cursor = this->input_cursor_.get(), + &context](TypedValue *return_value) { + OOMExceptionEnabler oom_exception; + input_cursor->Reset(); - input_cursor_->Reset(); - - frame[self_.output_symbol_] = TypedValue(input_cursor_->Pull(frame, context), context.evaluation_context.memory); + *return_value = TypedValue(input_cursor->Pull(frame, context), context.evaluation_context.memory); + }; + frame[self_.output_symbol_] = TypedValue(std::move(function)); return true; } @@ -3622,6 +3676,7 @@ class AggregateCursor : public Cursor { void ProcessOne(const Frame &frame, ExpressionEvaluator *evaluator) { // Preallocated group_by, since most of the time the aggregation key won't be unique reused_group_by_.clear(); + evaluator->ResetPropertyLookupCache(); for (Expression *expression : self_.group_by_) { reused_group_by_.emplace_back(expression->Accept(*evaluator)); @@ -4780,6 +4835,12 @@ class CallProcedureCursor : public Cursor { AbortCheck(context); + auto skip_rows_with_deleted_values = [this]() { + while (result_row_it_ != result_->rows.end() && result_row_it_->has_deleted_values) { + ++result_row_it_; + } + }; + // We need to fetch new procedure results after pulling from input. // TODO: Look into openCypher's distinction between procedures returning an // empty result set vs procedures which return `void`. We currently don't @@ -4789,7 +4850,7 @@ class CallProcedureCursor : public Cursor { // It might be a good idea to resolve the procedure name once, at the // start. Unfortunately, this could deadlock if we tried to invoke a // procedure from a module (read lock) and reload a module (write lock) - // inside the same execution thread. Also, our RWLock is setup so that + // inside the same execution thread. Also, our RWLock is set up so that // it's not possible for a single thread to request multiple read locks. // Builtin module registration in query/procedure/module.cpp depends on // this locking scheme. @@ -4837,6 +4898,7 @@ class CallProcedureCursor : public Cursor { graph_view); result_->signature = &proc->results; + result_->is_transactional = storage::IsTransactional(context.db_accessor->GetStorageMode()); // Use special memory as invoking procedure is complex // TODO: This will probably need to be changed when we add support for @@ -4861,6 +4923,9 @@ class CallProcedureCursor : public Cursor { throw QueryRuntimeException("{}: {}", self_->procedure_name_, *result_->error_msg); } result_row_it_ = result_->rows.begin(); + if (!result_->is_transactional) { + skip_rows_with_deleted_values(); + } stream_exhausted = result_row_it_ == result_->rows.end(); } @@ -4890,6 +4955,9 @@ class CallProcedureCursor : public Cursor { } } ++result_row_it_; + if (!result_->is_transactional) { + skip_rows_with_deleted_values(); + } return true; } diff --git a/src/query/plan/operator.hpp b/src/query/plan/operator.hpp index 03df07378..8fa3d3a7c 100644 --- a/src/query/plan/operator.hpp +++ b/src/query/plan/operator.hpp @@ -917,12 +917,18 @@ struct ExpansionLambda { Symbol inner_node_symbol; /// Expression used in lambda during expansion. Expression *expression; + /// Currently expanded accumulated path symbol. + std::optional accumulated_path_symbol; + /// Currently expanded accumulated weight symbol. + std::optional accumulated_weight_symbol; ExpansionLambda Clone(AstStorage *storage) const { ExpansionLambda object; object.inner_edge_symbol = inner_edge_symbol; object.inner_node_symbol = inner_node_symbol; object.expression = expression ? expression->Clone(storage) : nullptr; + object.accumulated_path_symbol = accumulated_path_symbol; + object.accumulated_weight_symbol = accumulated_weight_symbol; return object; } }; diff --git a/src/query/plan/preprocess.cpp b/src/query/plan/preprocess.cpp index e03c51841..22899cbc0 100644 --- a/src/query/plan/preprocess.cpp +++ b/src/query/plan/preprocess.cpp @@ -74,6 +74,13 @@ std::vector NormalizePatterns(const SymbolTable &symbol_table, const // Remove symbols which are bound by lambda arguments. collector.symbols_.erase(symbol_table.at(*edge->filter_lambda_.inner_edge)); collector.symbols_.erase(symbol_table.at(*edge->filter_lambda_.inner_node)); + if (edge->filter_lambda_.accumulated_path) { + collector.symbols_.erase(symbol_table.at(*edge->filter_lambda_.accumulated_path)); + + if (edge->filter_lambda_.accumulated_weight) { + collector.symbols_.erase(symbol_table.at(*edge->filter_lambda_.accumulated_weight)); + } + } if (edge->type_ == EdgeAtom::Type::WEIGHTED_SHORTEST_PATH || edge->type_ == EdgeAtom::Type::ALL_SHORTEST_PATHS) { collector.symbols_.erase(symbol_table.at(*edge->weight_lambda_.inner_edge)); @@ -295,6 +302,13 @@ void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table, prop_pair.second->Accept(collector); collector.symbols_.emplace(symbol_table.at(*atom->filter_lambda_.inner_node)); collector.symbols_.emplace(symbol_table.at(*atom->filter_lambda_.inner_edge)); + if (atom->filter_lambda_.accumulated_path) { + collector.symbols_.emplace(symbol_table.at(*atom->filter_lambda_.accumulated_path)); + + if (atom->filter_lambda_.accumulated_weight) { + collector.symbols_.emplace(symbol_table.at(*atom->filter_lambda_.accumulated_weight)); + } + } // First handle the inline property filter. auto *property_lookup = storage.Create(atom->filter_lambda_.inner_edge, prop_pair.first); auto *prop_equal = storage.Create(property_lookup, prop_pair.second); diff --git a/src/query/plan/rewrite/index_lookup.hpp b/src/query/plan/rewrite/index_lookup.hpp index 4054f8c12..590bad5f4 100644 --- a/src/query/plan/rewrite/index_lookup.hpp +++ b/src/query/plan/rewrite/index_lookup.hpp @@ -106,6 +106,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { prev_ops_.pop_back(); ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_); op.expression_ = removal.trimmed_expression; + if (op.expression_) { + Filters leftover_filters; + leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_); + op.all_filters_ = std::move(leftover_filters); + } // edge uniqueness filter comes always before filter in plan generation LogicalOperator *input = op.input().get(); @@ -171,6 +176,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { if (expand.common_.existing_node) { return true; } + if (expand.type_ == EdgeAtom::Type::BREADTH_FIRST && expand.filter_lambda_.accumulated_path_symbol) { + // When accumulated path is used, we cannot use ST shortest path algorithm. + return false; + } + std::unique_ptr indexed_scan; ScanAll dst_scan(expand.input(), expand.common_.node_symbol, storage::View::OLD); // With expand to existing we only get real gains with BFS, because we use a diff --git a/src/query/plan/rewrite/join.hpp b/src/query/plan/rewrite/join.hpp index 65e32b3e8..e346ded45 100644 --- a/src/query/plan/rewrite/join.hpp +++ b/src/query/plan/rewrite/join.hpp @@ -59,6 +59,12 @@ class JoinRewriter final : public HierarchicalLogicalOperatorVisitor { ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_); op.expression_ = removal.trimmed_expression; + if (op.expression_) { + Filters leftover_filters; + leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_); + op.all_filters_ = std::move(leftover_filters); + } + if (!op.expression_ || utils::Contains(filter_exprs_for_removal_, op.expression_)) { SetOnParent(op.input()); } diff --git a/src/query/plan/rule_based_planner.cpp b/src/query/plan/rule_based_planner.cpp index cd223dd8e..f3d0c1487 100644 --- a/src/query/plan/rule_based_planner.cpp +++ b/src/query/plan/rule_based_planner.cpp @@ -17,6 +17,7 @@ #include #include +#include "query/plan/preprocess.hpp" #include "utils/algorithm.hpp" #include "utils/exceptions.hpp" #include "utils/logging.hpp" @@ -516,14 +517,25 @@ bool HasBoundFilterSymbols(const std::unordered_set &bound_symbols, cons Expression *ExtractFilters(const std::unordered_set &bound_symbols, Filters &filters, AstStorage &storage) { Expression *filter_expr = nullptr; + std::vector and_joinable_filters{}; for (auto filters_it = filters.begin(); filters_it != filters.end();) { if (HasBoundFilterSymbols(bound_symbols, *filters_it)) { - filter_expr = impl::BoolJoin(storage, filter_expr, filters_it->expression); + and_joinable_filters.emplace_back(*filters_it); filters_it = filters.erase(filters_it); } else { filters_it++; } } + // Idea here is to join filters in a way + // that pattern filter ( exists() ) is at the end + // so if any of the AND filters before + // evaluate to false we don't need to + // evaluate pattern ( exists() ) filter + std::partition(and_joinable_filters.begin(), and_joinable_filters.end(), + [](const FilterInfo &filter_info) { return filter_info.type != FilterInfo::Type::Pattern; }); + for (auto &and_joinable_filter : and_joinable_filters) { + filter_expr = impl::BoolJoin(storage, filter_expr, and_joinable_filter.expression); + } return filter_expr; } diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp index bdac76a93..074bd1c88 100644 --- a/src/query/plan/rule_based_planner.hpp +++ b/src/query/plan/rule_based_planner.hpp @@ -511,10 +511,6 @@ class RuleBasedPlanner { std::set visited_expansion_groups; - last_op = - GenerateExpansionOnAlreadySeenSymbols(std::move(last_op), matching, visited_expansion_groups, symbol_table, - storage, bound_symbols, new_symbols, named_paths, filters, view); - // We want to create separate branches of scan operators for each expansion group group of patterns // Whenever there are 2 scan branches, they will be joined with a Cartesian operator @@ -528,6 +524,14 @@ class RuleBasedPlanner { continue; } + last_op = + GenerateExpansionOnAlreadySeenSymbols(std::move(last_op), matching, visited_expansion_groups, symbol_table, + storage, bound_symbols, new_symbols, named_paths, filters, view); + + if (visited_expansion_groups.contains(expansion.expansion_group_id)) { + continue; + } + std::unique_ptr starting_expansion_operator = nullptr; if (!initial_expansion_done) { starting_expansion_operator = std::move(last_op); @@ -705,9 +709,9 @@ class RuleBasedPlanner { std::optional total_weight; if (edge->type_ == EdgeAtom::Type::WEIGHTED_SHORTEST_PATH || edge->type_ == EdgeAtom::Type::ALL_SHORTEST_PATHS) { - weight_lambda.emplace(ExpansionLambda{symbol_table.at(*edge->weight_lambda_.inner_edge), - symbol_table.at(*edge->weight_lambda_.inner_node), - edge->weight_lambda_.expression}); + weight_lambda.emplace(ExpansionLambda{.inner_edge_symbol = symbol_table.at(*edge->weight_lambda_.inner_edge), + .inner_node_symbol = symbol_table.at(*edge->weight_lambda_.inner_node), + .expression = edge->weight_lambda_.expression}); total_weight.emplace(symbol_table.at(*edge->total_weight_)); } @@ -715,12 +719,28 @@ class RuleBasedPlanner { ExpansionLambda filter_lambda; filter_lambda.inner_edge_symbol = symbol_table.at(*edge->filter_lambda_.inner_edge); filter_lambda.inner_node_symbol = symbol_table.at(*edge->filter_lambda_.inner_node); + if (edge->filter_lambda_.accumulated_path) { + filter_lambda.accumulated_path_symbol = symbol_table.at(*edge->filter_lambda_.accumulated_path); + + if (edge->filter_lambda_.accumulated_weight) { + filter_lambda.accumulated_weight_symbol = symbol_table.at(*edge->filter_lambda_.accumulated_weight); + } + } { // Bind the inner edge and node symbols so they're available for // inline filtering in ExpandVariable. bool inner_edge_bound = bound_symbols.insert(filter_lambda.inner_edge_symbol).second; bool inner_node_bound = bound_symbols.insert(filter_lambda.inner_node_symbol).second; MG_ASSERT(inner_edge_bound && inner_node_bound, "An inner edge and node can't be bound from before"); + if (filter_lambda.accumulated_path_symbol) { + bool accumulated_path_bound = bound_symbols.insert(*filter_lambda.accumulated_path_symbol).second; + MG_ASSERT(accumulated_path_bound, "The accumulated path can't be bound from before"); + + if (filter_lambda.accumulated_weight_symbol) { + bool accumulated_weight_bound = bound_symbols.insert(*filter_lambda.accumulated_weight_symbol).second; + MG_ASSERT(accumulated_weight_bound, "The accumulated weight can't be bound from before"); + } + } } // Join regular filters with lambda filter expression, so that they // are done inline together. Semantic analysis should guarantee that @@ -731,15 +751,34 @@ class RuleBasedPlanner { // filtering (they use the inner symbols. If they were not collected, // we have to remove them manually because no other filter-extraction // will ever bind them again. - filters.erase( - std::remove_if(filters.begin(), filters.end(), - [e = filter_lambda.inner_edge_symbol, n = filter_lambda.inner_node_symbol](FilterInfo &fi) { - return utils::Contains(fi.used_symbols, e) || utils::Contains(fi.used_symbols, n); - }), - filters.end()); + std::vector inner_symbols = {filter_lambda.inner_edge_symbol, filter_lambda.inner_node_symbol}; + if (filter_lambda.accumulated_path_symbol) { + inner_symbols.emplace_back(*filter_lambda.accumulated_path_symbol); + + if (filter_lambda.accumulated_weight_symbol) { + inner_symbols.emplace_back(*filter_lambda.accumulated_weight_symbol); + } + } + + filters.erase(std::remove_if(filters.begin(), filters.end(), + [&inner_symbols](FilterInfo &fi) { + for (const auto &symbol : inner_symbols) { + if (utils::Contains(fi.used_symbols, symbol)) return true; + } + return false; + }), + filters.end()); + // Unbind the temporarily bound inner symbols for filtering. bound_symbols.erase(filter_lambda.inner_edge_symbol); bound_symbols.erase(filter_lambda.inner_node_symbol); + if (filter_lambda.accumulated_path_symbol) { + bound_symbols.erase(*filter_lambda.accumulated_path_symbol); + + if (filter_lambda.accumulated_weight_symbol) { + bound_symbols.erase(*filter_lambda.accumulated_weight_symbol); + } + } if (total_weight) { bound_symbols.insert(*total_weight); @@ -862,13 +901,14 @@ class RuleBasedPlanner { std::unique_ptr GenFilters(std::unique_ptr last_op, const std::unordered_set &bound_symbols, Filters &filters, AstStorage &storage, const SymbolTable &symbol_table) { - auto all_filters = filters; auto pattern_filters = ExtractPatternFilters(filters, symbol_table, storage, bound_symbols); auto *filter_expr = impl::ExtractFilters(bound_symbols, filters, storage); if (filter_expr) { - last_op = - std::make_unique(std::move(last_op), std::move(pattern_filters), filter_expr, std::move(all_filters)); + Filters operator_filters; + operator_filters.CollectFilterExpression(filter_expr, symbol_table); + last_op = std::make_unique(std::move(last_op), std::move(pattern_filters), filter_expr, + std::move(operator_filters)); } return last_op; } diff --git a/src/query/plan/variable_start_planner.cpp b/src/query/plan/variable_start_planner.cpp index 1c230628a..4aa3580d0 100644 --- a/src/query/plan/variable_start_planner.cpp +++ b/src/query/plan/variable_start_planner.cpp @@ -72,8 +72,9 @@ void AddNextExpansions(const Symbol &node_symbol, const Matching &matching, cons // We are not expanding from node1, so flip the expansion. DMG_ASSERT(expansion.node2 && symbol_table.at(*expansion.node2->identifier_) == node_symbol, "Expected node_symbol to be bound in node2"); - if (expansion.edge->type_ != EdgeAtom::Type::BREADTH_FIRST) { + if (expansion.edge->type_ != EdgeAtom::Type::BREADTH_FIRST && !expansion.edge->filter_lambda_.accumulated_path) { // BFS must *not* be flipped. Doing that changes the BFS results. + // When filter lambda uses accumulated path, path must not be flipped. std::swap(expansion.node1, expansion.node2); expansion.is_flipped = true; if (expansion.direction != EdgeAtom::Direction::BOTH) { diff --git a/src/query/procedure/mg_procedure_impl.cpp b/src/query/procedure/mg_procedure_impl.cpp index f87377ba5..2eea1cecb 100644 --- a/src/query/procedure/mg_procedure_impl.cpp +++ b/src/query/procedure/mg_procedure_impl.cpp @@ -32,6 +32,7 @@ #include "query/procedure/mg_procedure_helpers.hpp" #include "query/stream/common.hpp" #include "storage/v2/property_value.hpp" +#include "storage/v2/storage_mode.hpp" #include "storage/v2/view.hpp" #include "utils/algorithm.hpp" #include "utils/concepts.hpp" @@ -313,12 +314,61 @@ mgp_value_type FromTypedValueType(memgraph::query::TypedValue::Type type) { return MGP_VALUE_TYPE_LOCAL_DATE_TIME; case memgraph::query::TypedValue::Type::Duration: return MGP_VALUE_TYPE_DURATION; + case memgraph::query::TypedValue::Type::Function: + throw std::logic_error{"mgp_value for TypedValue::Type::Function doesn't exist."}; case memgraph::query::TypedValue::Type::Graph: throw std::logic_error{"mgp_value for TypedValue::Type::Graph doesn't exist."}; } } } // namespace +bool IsDeleted(const mgp_vertex *vertex) { return vertex->getImpl().impl_.vertex_->deleted; } + +bool IsDeleted(const mgp_edge *edge) { return edge->impl.IsDeleted(); } + +bool ContainsDeleted(const mgp_path *path) { + return std::ranges::any_of(path->vertices, [](const auto &vertex) { return IsDeleted(&vertex); }) || + std::ranges::any_of(path->edges, [](const auto &edge) { return IsDeleted(&edge); }); +} + +bool ContainsDeleted(const mgp_list *list) { + return std::ranges::any_of(list->elems, [](const auto &elem) { return ContainsDeleted(&elem); }); +} + +bool ContainsDeleted(const mgp_map *map) { + return std::ranges::any_of(map->items, [](const auto &item) { return ContainsDeleted(&item.second); }); +} + +bool ContainsDeleted(const mgp_value *val) { + switch (val->type) { + // Value types + case MGP_VALUE_TYPE_NULL: + case MGP_VALUE_TYPE_BOOL: + case MGP_VALUE_TYPE_INT: + case MGP_VALUE_TYPE_DOUBLE: + case MGP_VALUE_TYPE_STRING: + case MGP_VALUE_TYPE_DATE: + case MGP_VALUE_TYPE_LOCAL_TIME: + case MGP_VALUE_TYPE_LOCAL_DATE_TIME: + case MGP_VALUE_TYPE_DURATION: + return false; + // Reference types + case MGP_VALUE_TYPE_LIST: + return ContainsDeleted(val->list_v); + case MGP_VALUE_TYPE_MAP: + return ContainsDeleted(val->map_v); + case MGP_VALUE_TYPE_VERTEX: + return IsDeleted(val->vertex_v); + case MGP_VALUE_TYPE_EDGE: + return IsDeleted(val->edge_v); + case MGP_VALUE_TYPE_PATH: + return ContainsDeleted(val->path_v); + default: + throw memgraph::query::QueryRuntimeException("Value of unknown type"); + } + return false; +} + memgraph::query::TypedValue ToTypedValue(const mgp_value &val, memgraph::utils::MemoryResource *memory) { switch (val.type) { case MGP_VALUE_TYPE_NULL: @@ -1001,6 +1051,10 @@ mgp_error mgp_list_copy(mgp_list *list, mgp_memory *memory, mgp_list **result) { void mgp_list_destroy(mgp_list *list) { DeleteRawMgpObject(list); } +mgp_error mgp_list_contains_deleted(mgp_list *list, int *result) { + return WrapExceptions([list, result] { *result = ContainsDeleted(list); }); +} + namespace { void MgpListAppendExtend(mgp_list &list, const mgp_value &value) { list.elems.push_back(value); } } // namespace @@ -1052,6 +1106,10 @@ mgp_error mgp_map_copy(mgp_map *map, mgp_memory *memory, mgp_map **result) { void mgp_map_destroy(mgp_map *map) { DeleteRawMgpObject(map); } +mgp_error mgp_map_contains_deleted(mgp_map *map, int *result) { + return WrapExceptions([map, result] { *result = ContainsDeleted(map); }); +} + mgp_error mgp_map_insert(mgp_map *map, const char *key, mgp_value *value) { return WrapExceptions([&] { auto emplace_result = map->items.emplace(key, *value); @@ -1175,6 +1233,10 @@ mgp_error mgp_path_copy(mgp_path *path, mgp_memory *memory, mgp_path **result) { void mgp_path_destroy(mgp_path *path) { DeleteRawMgpObject(path); } +mgp_error mgp_path_contains_deleted(mgp_path *path, int *result) { + return WrapExceptions([path, result] { *result = ContainsDeleted(path); }); +} + mgp_error mgp_path_expand(mgp_path *path, mgp_edge *edge) { return WrapExceptions([path, edge] { MG_ASSERT(Call(mgp_path_size, path) == path->vertices.size() - 1, "Invalid mgp_path"); @@ -1558,8 +1620,9 @@ mgp_error mgp_result_new_record(mgp_result *res, mgp_result_record **result) { auto *memory = res->rows.get_allocator().GetMemoryResource(); MG_ASSERT(res->signature, "Expected to have a valid signature"); res->rows.push_back(mgp_result_record{ - res->signature, - memgraph::utils::pmr::map(memory)}); + .signature = res->signature, + .values = memgraph::utils::pmr::map(memory), + .ignore_deleted_values = !res->is_transactional}); return &res->rows.back(); }, result); @@ -1574,10 +1637,14 @@ mgp_error mgp_result_record_insert(mgp_result_record *record, const char *field_ if (find_it == record->signature->end()) { throw std::out_of_range{fmt::format("The result doesn't have any field named '{}'.", field_name)}; } + if (record->ignore_deleted_values && ContainsDeleted(val)) [[unlikely]] { + record->has_deleted_values = true; + return; + } const auto *type = find_it->second.first; if (!type->SatisfiesType(*val)) { throw std::logic_error{ - fmt::format("The type of value doesn't satisfies the type '{}'!", type->GetPresentableName())}; + fmt::format("The type of value doesn't satisfy the type '{}'!", type->GetPresentableName())}; } record->values.emplace(field_name, ToTypedValue(*val, memory)); }); @@ -1744,7 +1811,7 @@ memgraph::storage::PropertyValue ToPropertyValue(const mgp_value &value) { return memgraph::storage::PropertyValue{memgraph::storage::TemporalData{memgraph::storage::TemporalType::Duration, value.duration_v->duration.microseconds}}; case MGP_VALUE_TYPE_VERTEX: - throw ValueConversionException{"A vertex is not a valid property value! "}; + throw ValueConversionException{"A vertex is not a valid property value!"}; case MGP_VALUE_TYPE_EDGE: throw ValueConversionException{"An edge is not a valid property value!"}; case MGP_VALUE_TYPE_PATH: @@ -1960,6 +2027,10 @@ mgp_error mgp_vertex_copy(mgp_vertex *v, mgp_memory *memory, mgp_vertex **result void mgp_vertex_destroy(mgp_vertex *v) { DeleteRawMgpObject(v); } +mgp_error mgp_vertex_is_deleted(mgp_vertex *v, int *result) { + return WrapExceptions([v] { return IsDeleted(v); }, result); +} + mgp_error mgp_vertex_equal(mgp_vertex *v1, mgp_vertex *v2, int *result) { // NOLINTNEXTLINE(clang-diagnostic-unevaluated-expression) static_assert(noexcept(*v1 == *v2)); @@ -2317,6 +2388,10 @@ mgp_error mgp_edge_copy(mgp_edge *e, mgp_memory *memory, mgp_edge **result) { void mgp_edge_destroy(mgp_edge *e) { DeleteRawMgpObject(e); } +mgp_error mgp_edge_is_deleted(mgp_edge *e, int *result) { + return WrapExceptions([e] { return IsDeleted(e); }, result); +} + mgp_error mgp_edge_equal(mgp_edge *e1, mgp_edge *e2, int *result) { // NOLINTNEXTLINE(clang-diagnostic-unevaluated-expression) static_assert(noexcept(*e1 == *e2)); @@ -2862,6 +2937,11 @@ mgp_error mgp_list_all_unique_constraints(mgp_graph *graph, mgp_memory *memory, }); } +mgp_error mgp_graph_is_transactional(mgp_graph *graph, int *result) { + *result = IsTransactional(graph->storage_mode) ? 1 : 0; + return mgp_error::MGP_ERROR_NO_ERROR; +} + mgp_error mgp_graph_is_mutable(mgp_graph *graph, int *result) { *result = MgpGraphIsMutable(*graph) ? 1 : 0; return mgp_error::MGP_ERROR_NO_ERROR; @@ -3672,7 +3752,8 @@ std::ostream &PrintValue(const TypedValue &value, std::ostream *stream) { case TypedValue::Type::Edge: case TypedValue::Type::Path: case TypedValue::Type::Graph: - LOG_FATAL("value must not be a graph element"); + case TypedValue::Type::Function: + LOG_FATAL("value must not be a graph|function element"); } } diff --git a/src/query/procedure/mg_procedure_impl.hpp b/src/query/procedure/mg_procedure_impl.hpp index 7b5301381..17cac4eca 100644 --- a/src/query/procedure/mg_procedure_impl.hpp +++ b/src/query/procedure/mg_procedure_impl.hpp @@ -560,23 +560,24 @@ struct mgp_graph { // TODO: Merge `mgp_graph` and `mgp_memory` into a single `mgp_context`. The // `ctx` field is out of place here. memgraph::query::ExecutionContext *ctx; + memgraph::storage::StorageMode storage_mode; static mgp_graph WritableGraph(memgraph::query::DbAccessor &acc, memgraph::storage::View view, memgraph::query::ExecutionContext &ctx) { - return mgp_graph{&acc, view, &ctx}; + return mgp_graph{&acc, view, &ctx, acc.GetStorageMode()}; } static mgp_graph NonWritableGraph(memgraph::query::DbAccessor &acc, memgraph::storage::View view) { - return mgp_graph{&acc, view, nullptr}; + return mgp_graph{&acc, view, nullptr, acc.GetStorageMode()}; } static mgp_graph WritableGraph(memgraph::query::SubgraphDbAccessor &acc, memgraph::storage::View view, memgraph::query::ExecutionContext &ctx) { - return mgp_graph{&acc, view, &ctx}; + return mgp_graph{&acc, view, &ctx, acc.GetStorageMode()}; } static mgp_graph NonWritableGraph(memgraph::query::SubgraphDbAccessor &acc, memgraph::storage::View view) { - return mgp_graph{&acc, view, nullptr}; + return mgp_graph{&acc, view, nullptr, acc.GetStorageMode()}; } }; @@ -585,6 +586,8 @@ struct mgp_result_record { const memgraph::utils::pmr::map> *signature; memgraph::utils::pmr::map values; + bool ignore_deleted_values = false; + bool has_deleted_values = false; }; struct mgp_result { @@ -599,6 +602,7 @@ struct mgp_result { std::pair> *signature; memgraph::utils::pmr::vector rows; std::optional error_msg; + bool is_transactional = true; }; struct mgp_func_result { @@ -614,6 +618,7 @@ struct mgp_func_context { memgraph::query::DbAccessor *impl; memgraph::storage::View view; }; + struct mgp_properties_iterator { using allocator_type = memgraph::utils::Allocator; @@ -724,6 +729,7 @@ struct ProcedureInfo { bool is_batched{false}; std::optional required_privilege = std::nullopt; }; + struct mgp_proc { using allocator_type = memgraph::utils::Allocator; @@ -984,4 +990,6 @@ struct mgp_messages { storage_type messages; }; +bool ContainsDeleted(const mgp_value *val); + memgraph::query::TypedValue ToTypedValue(const mgp_value &val, memgraph::utils::MemoryResource *memory); diff --git a/src/query/procedure/py_module.cpp b/src/query/procedure/py_module.cpp index 1e687a91e..3fd09b0ab 100644 --- a/src/query/procedure/py_module.cpp +++ b/src/query/procedure/py_module.cpp @@ -25,6 +25,7 @@ #include "query/exceptions.hpp" #include "query/procedure/mg_procedure_helpers.hpp" #include "query/procedure/mg_procedure_impl.hpp" +#include "storage/v2/storage_mode.hpp" #include "utils/memory.hpp" #include "utils/on_scope_exit.hpp" #include "utils/pmr/vector.hpp" @@ -867,7 +868,37 @@ py::Object MgpListToPyTuple(mgp_list *list, PyObject *py_graph) { } namespace { -std::optional AddRecordFromPython(mgp_result *result, py::Object py_record, mgp_memory *memory) { +struct RecordFieldCache { + PyObject *key; + PyObject *val; + const char *field_name; + mgp_value *field_val; +}; + +std::optional InsertField(PyObject *key, PyObject *val, mgp_result_record *record, + const char *field_name, mgp_value *field_val) { + if (mgp_result_record_insert(record, field_name, field_val) != mgp_error::MGP_ERROR_NO_ERROR) { + std::stringstream ss; + ss << "Unable to insert field '" << py::Object::FromBorrow(key) << "' with value: '" << py::Object::FromBorrow(val) + << "'; did you set the correct field type?"; + const auto &msg = ss.str(); + PyErr_SetString(PyExc_ValueError, msg.c_str()); + mgp_value_destroy(field_val); + return py::FetchError(); + } + mgp_value_destroy(field_val); + return std::nullopt; +} + +void SkipRecord(mgp_value *field_val, std::vector ¤t_record_cache) { + mgp_value_destroy(field_val); + for (auto &cache_entry : current_record_cache) { + mgp_value_destroy(cache_entry.field_val); + } +} + +std::optional AddRecordFromPython(mgp_result *result, py::Object py_record, mgp_graph *graph, + mgp_memory *memory) { py::Object py_mgp(PyImport_ImportModule("mgp")); if (!py_mgp) return py::FetchError(); auto record_cls = py_mgp.GetAttr("Record"); @@ -888,15 +919,27 @@ std::optional AddRecordFromPython(mgp_result *result, py::Obj py::Object items(PyDict_Items(fields.Ptr())); if (!items) return py::FetchError(); mgp_result_record *record{nullptr}; - if (RaiseExceptionFromErrorCode(mgp_result_new_record(result, &record))) { - return py::FetchError(); + const auto is_transactional = storage::IsTransactional(graph->storage_mode); + if (is_transactional) { + // IN_MEMORY_ANALYTICAL must first verify that the record contains no deleted values + if (RaiseExceptionFromErrorCode(mgp_result_new_record(result, &record))) { + return py::FetchError(); + } } + std::vector current_record_cache{}; + + utils::OnScopeExit clear_record_cache{[¤t_record_cache] { + for (auto &record : current_record_cache) { + mgp_value_destroy(record.field_val); + } + }}; + Py_ssize_t len = PyList_GET_SIZE(items.Ptr()); for (Py_ssize_t i = 0; i < len; ++i) { auto *item = PyList_GET_ITEM(items.Ptr(), i); if (!item) return py::FetchError(); MG_ASSERT(PyTuple_Check(item)); - auto *key = PyTuple_GetItem(item, 0); + PyObject *key = PyTuple_GetItem(item, 0); if (!key) return py::FetchError(); if (!PyUnicode_Check(key)) { std::stringstream ss; @@ -905,30 +948,48 @@ std::optional AddRecordFromPython(mgp_result *result, py::Obj PyErr_SetString(PyExc_TypeError, msg.c_str()); return py::FetchError(); } - const auto *field_name = PyUnicode_AsUTF8(key); + const char *field_name = PyUnicode_AsUTF8(key); if (!field_name) return py::FetchError(); - auto *val = PyTuple_GetItem(item, 1); + PyObject *val = PyTuple_GetItem(item, 1); if (!val) return py::FetchError(); // This memory is one dedicated for mg_procedure. mgp_value *field_val = PyObjectToMgpValueWithPythonExceptions(val, memory); if (field_val == nullptr) { return py::FetchError(); } - if (mgp_result_record_insert(record, field_name, field_val) != mgp_error::MGP_ERROR_NO_ERROR) { - std::stringstream ss; - ss << "Unable to insert field '" << py::Object::FromBorrow(key) << "' with value: '" - << py::Object::FromBorrow(val) << "'; did you set the correct field type?"; - const auto &msg = ss.str(); - PyErr_SetString(PyExc_ValueError, msg.c_str()); - mgp_value_destroy(field_val); - return py::FetchError(); + + if (!is_transactional) { + // If a deleted value is being inserted into a record, skip the whole record + if (ContainsDeleted(field_val)) { + SkipRecord(field_val, current_record_cache); + return std::nullopt; + } + current_record_cache.emplace_back( + RecordFieldCache{.key = key, .val = val, .field_name = field_name, .field_val = field_val}); + } else { + auto maybe_exc = InsertField(key, val, record, field_name, field_val); + if (maybe_exc) return maybe_exc; } - mgp_value_destroy(field_val); } + + if (is_transactional) { + return std::nullopt; + } + + // IN_MEMORY_ANALYTICAL only adds a new record after verifying that it contains no deleted values + if (RaiseExceptionFromErrorCode(mgp_result_new_record(result, &record))) { + return py::FetchError(); + } + for (auto &cache_entry : current_record_cache) { + auto maybe_exc = + InsertField(cache_entry.key, cache_entry.val, record, cache_entry.field_name, cache_entry.field_val); + if (maybe_exc) return maybe_exc; + } + return std::nullopt; } -std::optional AddMultipleRecordsFromPython(mgp_result *result, py::Object py_seq, +std::optional AddMultipleRecordsFromPython(mgp_result *result, py::Object py_seq, mgp_graph *graph, mgp_memory *memory) { Py_ssize_t len = PySequence_Size(py_seq.Ptr()); if (len == -1) return py::FetchError(); @@ -938,7 +999,7 @@ std::optional AddMultipleRecordsFromPython(mgp_result *result for (Py_ssize_t i = 0, curr_item = 0; i < len; ++i, ++curr_item) { py::Object py_record(PySequence_GetItem(py_seq.Ptr(), curr_item)); if (!py_record) return py::FetchError(); - auto maybe_exc = AddRecordFromPython(result, py_record, memory); + auto maybe_exc = AddRecordFromPython(result, py_record, graph, memory); if (maybe_exc) return maybe_exc; // Once PySequence_DelSlice deletes "transformed" objects, starting index is 0 again. if (i && i % del_cnt == 0) { @@ -952,14 +1013,14 @@ std::optional AddMultipleRecordsFromPython(mgp_result *result } std::optional AddMultipleBatchRecordsFromPython(mgp_result *result, py::Object py_seq, - mgp_memory *memory) { + mgp_graph *graph, mgp_memory *memory) { Py_ssize_t len = PySequence_Size(py_seq.Ptr()); if (len == -1) return py::FetchError(); result->rows.reserve(len); for (Py_ssize_t i = 0; i < len; ++i) { py::Object py_record(PySequence_GetItem(py_seq.Ptr(), i)); if (!py_record) return py::FetchError(); - auto maybe_exc = AddRecordFromPython(result, py_record, memory); + auto maybe_exc = AddRecordFromPython(result, py_record, graph, memory); if (maybe_exc) return maybe_exc; } PySequence_DelSlice(py_seq.Ptr(), 0, PySequence_Size(py_seq.Ptr())); @@ -1015,11 +1076,11 @@ void CallPythonProcedure(const py::Object &py_cb, mgp_list *args, mgp_graph *gra if (!py_res) return py::FetchError(); if (PySequence_Check(py_res.Ptr())) { if (is_batched) { - return AddMultipleBatchRecordsFromPython(result, py_res, memory); + return AddMultipleBatchRecordsFromPython(result, py_res, graph, memory); } - return AddMultipleRecordsFromPython(result, py_res, memory); + return AddMultipleRecordsFromPython(result, py_res, graph, memory); } - return AddRecordFromPython(result, py_res, memory); + return AddRecordFromPython(result, py_res, graph, memory); }; // It is *VERY IMPORTANT* to note that this code takes great care not to keep @@ -1114,9 +1175,9 @@ void CallPythonTransformation(const py::Object &py_cb, mgp_messages *msgs, mgp_g auto py_res = py_cb.Call(py_graph, py_messages); if (!py_res) return py::FetchError(); if (PySequence_Check(py_res.Ptr())) { - return AddMultipleRecordsFromPython(result, py_res, memory); + return AddMultipleRecordsFromPython(result, py_res, graph, memory); } - return AddRecordFromPython(result, py_res, memory); + return AddRecordFromPython(result, py_res, graph, memory); }; // It is *VERY IMPORTANT* to note that this code takes great care not to keep @@ -1164,9 +1225,27 @@ void CallPythonFunction(const py::Object &py_cb, mgp_list *args, mgp_graph *grap auto call = [&](py::Object py_graph) -> utils::BasicResult, mgp_value *> { py::Object py_args(MgpListToPyTuple(args, py_graph.Ptr())); if (!py_args) return {py::FetchError()}; + const auto is_transactional = storage::IsTransactional(graph->storage_mode); auto py_res = py_cb.Call(py_graph, py_args); if (!py_res) return {py::FetchError()}; mgp_value *ret_val = PyObjectToMgpValueWithPythonExceptions(py_res.Ptr(), memory); + if (!is_transactional && ContainsDeleted(ret_val)) { + mgp_value_destroy(ret_val); + mgp_value *null_val{nullptr}; + mgp_error last_error{mgp_error::MGP_ERROR_NO_ERROR}; + + last_error = mgp_value_make_null(memory, &null_val); + + if (last_error == mgp_error::MGP_ERROR_UNABLE_TO_ALLOCATE) { + throw std::bad_alloc{}; + } + if (last_error != mgp_error::MGP_ERROR_NO_ERROR) { + throw std::runtime_error{"Unexpected error while creating mgp_value"}; + } + + return null_val; + } + if (ret_val == nullptr) { return {py::FetchError()}; } diff --git a/src/query/stream/streams.cpp b/src/query/stream/streams.cpp index e93bf06a7..896607e94 100644 --- a/src/query/stream/streams.cpp +++ b/src/query/stream/streams.cpp @@ -99,7 +99,7 @@ void CallCustomTransformation(const std::string &transformation_name, const std: mgp_messages mgp_messages{mgp_messages::storage_type{&memory_resource}}; std::transform(messages.begin(), messages.end(), std::back_inserter(mgp_messages.messages), [](const TMessage &message) { return mgp_message{message}; }); - mgp_graph graph{&db_accessor, storage::View::OLD, nullptr}; + mgp_graph graph{&db_accessor, storage::View::OLD, nullptr, db_accessor.GetStorageMode()}; mgp_memory memory{&memory_resource}; result.rows.clear(); result.error_msg.reset(); diff --git a/src/query/typed_value.cpp b/src/query/typed_value.cpp index 13db88e1c..ea883e428 100644 --- a/src/query/typed_value.cpp +++ b/src/query/typed_value.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2023 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -22,6 +22,7 @@ #include "storage/v2/temporal.hpp" #include "utils/exceptions.hpp" #include "utils/fnv.hpp" +#include "utils/logging.hpp" #include "utils/memory.hpp" namespace memgraph::query { @@ -215,6 +216,9 @@ TypedValue::TypedValue(const TypedValue &other, utils::MemoryResource *memory) : case Type::Duration: new (&duration_v) utils::Duration(other.duration_v); return; + case Type::Function: + new (&function_v) std::function(other.function_v); + return; case Type::Graph: auto *graph_ptr = utils::Allocator(memory_).new_object(*other.graph_v); new (&graph_v) std::unique_ptr(graph_ptr); @@ -268,6 +272,9 @@ TypedValue::TypedValue(TypedValue &&other, utils::MemoryResource *memory) : memo case Type::Duration: new (&duration_v) utils::Duration(other.duration_v); break; + case Type::Function: + new (&function_v) std::function(other.function_v); + break; case Type::Graph: if (other.GetMemoryResource() == memory_) { new (&graph_v) std::unique_ptr(std::move(other.graph_v)); @@ -343,6 +350,7 @@ DEFINE_VALUE_AND_TYPE_GETTERS(utils::Date, Date, date_v) DEFINE_VALUE_AND_TYPE_GETTERS(utils::LocalTime, LocalTime, local_time_v) DEFINE_VALUE_AND_TYPE_GETTERS(utils::LocalDateTime, LocalDateTime, local_date_time_v) DEFINE_VALUE_AND_TYPE_GETTERS(utils::Duration, Duration, duration_v) +DEFINE_VALUE_AND_TYPE_GETTERS(std::function, Function, function_v) Graph &TypedValue::ValueGraph() { if (type_ != Type::Graph) { @@ -362,6 +370,38 @@ bool TypedValue::IsGraph() const { return type_ == Type::Graph; } #undef DEFINE_VALUE_AND_TYPE_GETTERS +bool TypedValue::ContainsDeleted() const { + switch (type_) { + // Value types + case Type::Null: + case Type::Bool: + case Type::Int: + case Type::Double: + case Type::String: + case Type::Date: + case Type::LocalTime: + case Type::LocalDateTime: + case Type::Duration: + return false; + // Reference types + case Type::List: + return std::ranges::any_of(list_v, [](const auto &elem) { return elem.ContainsDeleted(); }); + case Type::Map: + return std::ranges::any_of(map_v, [](const auto &item) { return item.second.ContainsDeleted(); }); + case Type::Vertex: + return vertex_v.impl_.vertex_->deleted; + case Type::Edge: + return edge_v.IsDeleted(); + case Type::Path: + return std::ranges::any_of(path_v.vertices(), + [](auto &vertex_acc) { return vertex_acc.impl_.vertex_->deleted; }) || + std::ranges::any_of(path_v.edges(), [](auto &edge_acc) { return edge_acc.IsDeleted(); }); + default: + throw TypedValueException("Value of unknown type"); + } + return false; +} + bool TypedValue::IsNull() const { return type_ == Type::Null; } bool TypedValue::IsNumeric() const { return IsInt() || IsDouble(); } @@ -417,6 +457,8 @@ std::ostream &operator<<(std::ostream &os, const TypedValue::Type &type) { return os << "duration"; case TypedValue::Type::Graph: return os << "graph"; + case TypedValue::Type::Function: + return os << "function"; } LOG_FATAL("Unsupported TypedValue::Type"); } @@ -569,6 +611,9 @@ TypedValue &TypedValue::operator=(const TypedValue &other) { case Type::Duration: new (&duration_v) utils::Duration(other.duration_v); return *this; + case Type::Function: + new (&function_v) std::function(other.function_v); + return *this; } LOG_FATAL("Unsupported TypedValue::Type"); } @@ -628,6 +673,9 @@ TypedValue &TypedValue::operator=(TypedValue &&other) noexcept(false) { case Type::Duration: new (&duration_v) utils::Duration(other.duration_v); break; + case Type::Function: + new (&function_v) std::function{other.function_v}; + break; case Type::Graph: if (other.GetMemoryResource() == memory_) { new (&graph_v) std::unique_ptr(std::move(other.graph_v)); @@ -676,6 +724,9 @@ void TypedValue::DestroyValue() { case Type::LocalDateTime: case Type::Duration: break; + case Type::Function: + std::destroy_at(&function_v); + break; case Type::Graph: { auto *graph = graph_v.release(); std::destroy_at(&graph_v); @@ -1153,6 +1204,8 @@ size_t TypedValue::Hash::operator()(const TypedValue &value) const { case TypedValue::Type::Duration: return utils::DurationHash{}(value.ValueDuration()); break; + case TypedValue::Type::Function: + throw TypedValueException("Unsupported hash function for Function"); case TypedValue::Type::Graph: throw TypedValueException("Unsupported hash function for Graph"); } diff --git a/src/query/typed_value.hpp b/src/query/typed_value.hpp index c215e2276..a1353869a 100644 --- a/src/query/typed_value.hpp +++ b/src/query/typed_value.hpp @@ -84,7 +84,8 @@ class TypedValue { LocalTime, LocalDateTime, Duration, - Graph + Graph, + Function }; // TypedValue at this exact moment of compilation is an incomplete type, and @@ -420,6 +421,9 @@ class TypedValue { new (&graph_v) std::unique_ptr(graph_ptr); } + explicit TypedValue(std::function &&other) + : function_v(std::move(other)), type_(Type::Function) {} + /** * Construct with the value of other. * Default utils::NewDeleteResource() is used for allocations. After the move, @@ -451,6 +455,7 @@ class TypedValue { TypedValue &operator=(const utils::LocalTime &); TypedValue &operator=(const utils::LocalDateTime &); TypedValue &operator=(const utils::Duration &); + TypedValue &operator=(const std::function &); /** Copy assign other, utils::MemoryResource of `this` is used */ TypedValue &operator=(const TypedValue &other); @@ -506,9 +511,12 @@ class TypedValue { DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalDateTime, LocalDateTime) DECLARE_VALUE_AND_TYPE_GETTERS(utils::Duration, Duration) DECLARE_VALUE_AND_TYPE_GETTERS(Graph, Graph) + DECLARE_VALUE_AND_TYPE_GETTERS(std::function, Function) #undef DECLARE_VALUE_AND_TYPE_GETTERS + bool ContainsDeleted() const; + /** Checks if value is a TypedValue::Null. */ bool IsNull() const; @@ -550,6 +558,7 @@ class TypedValue { utils::Duration duration_v; // As the unique_ptr is not allocator aware, it requires special attention when copying or moving graphs std::unique_ptr graph_v; + std::function function_v; }; /** diff --git a/src/replication/CMakeLists.txt b/src/replication/CMakeLists.txt index 772ae5591..597ed096a 100644 --- a/src/replication/CMakeLists.txt +++ b/src/replication/CMakeLists.txt @@ -6,8 +6,10 @@ target_sources(mg-replication include/replication/epoch.hpp include/replication/config.hpp include/replication/mode.hpp + include/replication/messages.hpp include/replication/role.hpp include/replication/status.hpp + include/replication/replication_client.hpp include/replication/replication_server.hpp PRIVATE @@ -15,6 +17,8 @@ target_sources(mg-replication epoch.cpp config.cpp status.cpp + messages.cpp + replication_client.cpp replication_server.cpp ) target_include_directories(mg-replication PUBLIC include) diff --git a/src/replication/include/replication/messages.hpp b/src/replication/include/replication/messages.hpp new file mode 100644 index 000000000..57cf29351 --- /dev/null +++ b/src/replication/include/replication/messages.hpp @@ -0,0 +1,44 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "rpc/messages.hpp" +#include "slk/serialization.hpp" + +namespace memgraph::replication { + +struct FrequentHeartbeatReq { + static const utils::TypeInfo kType; // TODO: make constexpr? + static const utils::TypeInfo &GetTypeInfo() { return kType; } // WHAT? + + static void Load(FrequentHeartbeatReq *self, memgraph::slk::Reader *reader); + static void Save(const FrequentHeartbeatReq &self, memgraph::slk::Builder *builder); + FrequentHeartbeatReq() = default; +}; + +struct FrequentHeartbeatRes { + static const utils::TypeInfo kType; + static const utils::TypeInfo &GetTypeInfo() { return kType; } + + static void Load(FrequentHeartbeatRes *self, memgraph::slk::Reader *reader); + static void Save(const FrequentHeartbeatRes &self, memgraph::slk::Builder *builder); + FrequentHeartbeatRes() = default; + explicit FrequentHeartbeatRes(bool success) : success(success) {} + + bool success; +}; + +using FrequentHeartbeatRpc = rpc::RequestResponse; + +void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder); + +} // namespace memgraph::replication diff --git a/src/replication/include/replication/replication_client.hpp b/src/replication/include/replication/replication_client.hpp new file mode 100644 index 000000000..16e1010bf --- /dev/null +++ b/src/replication/include/replication/replication_client.hpp @@ -0,0 +1,82 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "replication/config.hpp" +#include "replication/messages.hpp" +#include "rpc/client.hpp" +#include "utils/scheduler.hpp" +#include "utils/thread_pool.hpp" + +#include +#include + +namespace memgraph::replication { + +template +concept InvocableWithStringView = std::invocable; + +struct ReplicationClient { + explicit ReplicationClient(const memgraph::replication::ReplicationClientConfig &config); + + ~ReplicationClient(); + ReplicationClient(ReplicationClient const &) = delete; + ReplicationClient &operator=(ReplicationClient const &) = delete; + ReplicationClient(ReplicationClient &&) noexcept = delete; + ReplicationClient &operator=(ReplicationClient &&) noexcept = delete; + + template + void StartFrequentCheck(F &&callback) { + // Help the user to get the most accurate replica state possible. + if (replica_check_frequency_ > std::chrono::seconds(0)) { + replica_checker_.Run("Replica Checker", replica_check_frequency_, [this, cb = std::forward(callback)] { + try { + bool success = false; + { + auto stream{rpc_client_.Stream()}; + success = stream.AwaitResponse().success; + } + if (success) { + cb(name_); + } + } catch (const rpc::RpcFailedException &) { + // Nothing to do...wait for a reconnect + } + }); + } + } + + std::string name_; + communication::ClientContext rpc_context_; + rpc::Client rpc_client_; + std::chrono::seconds replica_check_frequency_; + + memgraph::replication::ReplicationMode mode_{memgraph::replication::ReplicationMode::SYNC}; + // This thread pool is used for background tasks so we don't + // block the main storage thread + // We use only 1 thread for 2 reasons: + // - background tasks ALWAYS contain some kind of RPC communication. + // We can't have multiple RPC communication from a same client + // because that's not logically valid (e.g. you cannot send a snapshot + // and WAL at a same time because WAL will arrive earlier and be applied + // before the snapshot which is not correct) + // - the implementation is simplified as we have a total control of what + // this pool is executing. Also, we can simply queue multiple tasks + // and be sure of the execution order. + // Not having mulitple possible threads in the same client allows us + // to ignore concurrency problems inside the client. + utils::ThreadPool thread_pool_{1}; + + utils::Scheduler replica_checker_; +}; + +} // namespace memgraph::replication diff --git a/src/replication/include/replication/replication_server.hpp b/src/replication/include/replication/replication_server.hpp index e9ca1b549..5ff41b8a5 100644 --- a/src/replication/include/replication/replication_server.hpp +++ b/src/replication/include/replication/replication_server.hpp @@ -17,30 +17,6 @@ namespace memgraph::replication { -struct FrequentHeartbeatReq { - static const utils::TypeInfo kType; // TODO: make constexpr? - static const utils::TypeInfo &GetTypeInfo() { return kType; } // WHAT? - - static void Load(FrequentHeartbeatReq *self, memgraph::slk::Reader *reader); - static void Save(const FrequentHeartbeatReq &self, memgraph::slk::Builder *builder); - FrequentHeartbeatReq() = default; -}; - -struct FrequentHeartbeatRes { - static const utils::TypeInfo kType; - static const utils::TypeInfo &GetTypeInfo() { return kType; } - - static void Load(FrequentHeartbeatRes *self, memgraph::slk::Reader *reader); - static void Save(const FrequentHeartbeatRes &self, memgraph::slk::Builder *builder); - FrequentHeartbeatRes() = default; - explicit FrequentHeartbeatRes(bool success) : success(success) {} - - bool success; -}; - -// TODO: move to own header -using FrequentHeartbeatRpc = rpc::RequestResponse; - class ReplicationServer { public: explicit ReplicationServer(const memgraph::replication::ReplicationServerConfig &config); diff --git a/src/replication/include/replication/state.hpp b/src/replication/include/replication/state.hpp index 0460d0a9d..76aec1053 100644 --- a/src/replication/include/replication/state.hpp +++ b/src/replication/include/replication/state.hpp @@ -11,19 +11,22 @@ #pragma once -#include -#include -#include -#include - #include "kvstore/kvstore.hpp" #include "replication/config.hpp" #include "replication/epoch.hpp" #include "replication/mode.hpp" +#include "replication/replication_client.hpp" #include "replication/role.hpp" #include "replication_server.hpp" #include "status.hpp" #include "utils/result.hpp" +#include "utils/synchronized.hpp" + +#include +#include +#include +#include +#include namespace memgraph::replication { @@ -32,8 +35,17 @@ enum class RolePersisted : uint8_t { UNKNOWN_OR_NO, YES }; enum class RegisterReplicaError : uint8_t { NAME_EXISTS, END_POINT_EXISTS, COULD_NOT_BE_PERSISTED, NOT_MAIN, SUCCESS }; struct RoleMainData { + RoleMainData() = default; + explicit RoleMainData(ReplicationEpoch e) : epoch_(std::move(e)) {} + ~RoleMainData() = default; + + RoleMainData(RoleMainData const &) = delete; + RoleMainData &operator=(RoleMainData const &) = delete; + RoleMainData(RoleMainData &&) = default; + RoleMainData &operator=(RoleMainData &&) = default; + ReplicationEpoch epoch_; - std::vector registered_replicas_; + std::list registered_replicas_{}; }; struct RoleReplicaData { @@ -41,8 +53,10 @@ struct RoleReplicaData { std::unique_ptr server; }; +// Global (instance) level object struct ReplicationState { explicit ReplicationState(std::optional durability_dir); + ~ReplicationState() = default; ReplicationState(ReplicationState const &) = delete; ReplicationState(ReplicationState &&) = delete; @@ -74,7 +88,7 @@ struct ReplicationState { // TODO: locked access auto ReplicationData() -> ReplicationData_t & { return replication_data_; } auto ReplicationData() const -> ReplicationData_t const & { return replication_data_; } - auto RegisterReplica(const ReplicationClientConfig &config) -> RegisterReplicaError; + utils::BasicResult RegisterReplica(const ReplicationClientConfig &config); bool SetReplicationRoleMain(); diff --git a/src/replication/messages.cpp b/src/replication/messages.cpp new file mode 100644 index 000000000..4503e9df2 --- /dev/null +++ b/src/replication/messages.cpp @@ -0,0 +1,65 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "replication/messages.hpp" +#include "rpc/messages.hpp" +#include "slk/serialization.hpp" +#include "slk/streams.hpp" + +namespace memgraph::slk { +// Serialize code for FrequentHeartbeatRes +void Save(const memgraph::replication::FrequentHeartbeatRes &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self.success, builder); +} +void Load(memgraph::replication::FrequentHeartbeatRes *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(&self->success, reader); +} + +// Serialize code for FrequentHeartbeatReq +void Save(const memgraph::replication::FrequentHeartbeatReq & /*self*/, memgraph::slk::Builder * /*builder*/) { + /* Nothing to serialize */ +} +void Load(memgraph::replication::FrequentHeartbeatReq * /*self*/, memgraph::slk::Reader * /*reader*/) { + /* Nothing to serialize */ +} + +} // namespace memgraph::slk + +namespace memgraph::replication { + +constexpr utils::TypeInfo FrequentHeartbeatReq::kType{utils::TypeId::REP_FREQUENT_HEARTBEAT_REQ, "FrequentHeartbeatReq", + nullptr}; + +constexpr utils::TypeInfo FrequentHeartbeatRes::kType{utils::TypeId::REP_FREQUENT_HEARTBEAT_RES, "FrequentHeartbeatRes", + nullptr}; + +void FrequentHeartbeatReq::Save(const FrequentHeartbeatReq &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self, builder); +} +void FrequentHeartbeatReq::Load(FrequentHeartbeatReq *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(self, reader); +} +void FrequentHeartbeatRes::Save(const FrequentHeartbeatRes &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self, builder); +} +void FrequentHeartbeatRes::Load(FrequentHeartbeatRes *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(self, reader); +} + +void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder) { + FrequentHeartbeatReq req; + FrequentHeartbeatReq::Load(&req, req_reader); + memgraph::slk::Load(&req, req_reader); + FrequentHeartbeatRes res{true}; + memgraph::slk::Save(res, res_builder); +} + +} // namespace memgraph::replication diff --git a/src/replication/replication_client.cpp b/src/replication/replication_client.cpp new file mode 100644 index 000000000..d14250c2a --- /dev/null +++ b/src/replication/replication_client.cpp @@ -0,0 +1,40 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "replication/replication_client.hpp" + +namespace memgraph::replication { + +static auto CreateClientContext(const memgraph::replication::ReplicationClientConfig &config) + -> communication::ClientContext { + return (config.ssl) ? communication::ClientContext{config.ssl->key_file, config.ssl->cert_file} + : communication::ClientContext{}; +} + +ReplicationClient::ReplicationClient(const memgraph::replication::ReplicationClientConfig &config) + : name_{config.name}, + rpc_context_{CreateClientContext(config)}, + rpc_client_{io::network::Endpoint(io::network::Endpoint::needs_resolving, config.ip_address, config.port), + &rpc_context_}, + replica_check_frequency_{config.replica_check_frequency}, + mode_{config.mode} {} + +ReplicationClient::~ReplicationClient() { + auto endpoint = rpc_client_.Endpoint(); + try { + spdlog::trace("Closing replication client on {}:{}", endpoint.address, endpoint.port); + } catch (...) { + // Logging can throw. Not a big deal, just ignore. + } + thread_pool_.Shutdown(); +} + +} // namespace memgraph::replication diff --git a/src/replication/replication_server.cpp b/src/replication/replication_server.cpp index 7d0ff3cc2..f79ea2add 100644 --- a/src/replication/replication_server.cpp +++ b/src/replication/replication_server.cpp @@ -10,25 +10,7 @@ // licenses/APL.txt. #include "replication/replication_server.hpp" -#include "rpc/messages.hpp" -#include "slk/serialization.hpp" -#include "slk/streams.hpp" - -namespace memgraph::slk { - -// Serialize code for FrequentHeartbeatRes -void Save(const memgraph::replication::FrequentHeartbeatRes &self, memgraph::slk::Builder *builder) { - memgraph::slk::Save(self.success, builder); -} -void Load(memgraph::replication::FrequentHeartbeatRes *self, memgraph::slk::Reader *reader) { - memgraph::slk::Load(&self->success, reader); -} - -// Serialize code for FrequentHeartbeatReq -void Save(const memgraph::replication::FrequentHeartbeatReq &self, memgraph::slk::Builder *builder) {} -void Load(memgraph::replication::FrequentHeartbeatReq *self, memgraph::slk::Reader *reader) {} - -} // namespace memgraph::slk +#include "replication/messages.hpp" namespace memgraph::replication { namespace { @@ -39,13 +21,6 @@ auto CreateServerContext(const memgraph::replication::ReplicationServerConfig &c : communication::ServerContext{}; } -void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder) { - FrequentHeartbeatReq req; - memgraph::slk::Load(&req, req_reader); - FrequentHeartbeatRes res{true}; - memgraph::slk::Save(res, res_builder); -} - // NOTE: The replication server must have a single thread for processing // because there is no need for more processing threads - each replica can // have only a single main server. Also, the single-threaded guarantee @@ -53,25 +28,6 @@ void FrequentHeartbeatHandler(slk::Reader *req_reader, slk::Builder *res_builder constexpr auto kReplicationServerThreads = 1; } // namespace -constexpr utils::TypeInfo FrequentHeartbeatReq::kType{utils::TypeId::REP_FREQUENT_HEARTBEAT_REQ, "FrequentHeartbeatReq", - nullptr}; - -constexpr utils::TypeInfo FrequentHeartbeatRes::kType{utils::TypeId::REP_FREQUENT_HEARTBEAT_RES, "FrequentHeartbeatRes", - nullptr}; - -void FrequentHeartbeatReq::Save(const FrequentHeartbeatReq &self, memgraph::slk::Builder *builder) { - memgraph::slk::Save(self, builder); -} -void FrequentHeartbeatReq::Load(FrequentHeartbeatReq *self, memgraph::slk::Reader *reader) { - memgraph::slk::Load(self, reader); -} -void FrequentHeartbeatRes::Save(const FrequentHeartbeatRes &self, memgraph::slk::Builder *builder) { - memgraph::slk::Save(self, builder); -} -void FrequentHeartbeatRes::Load(FrequentHeartbeatRes *self, memgraph::slk::Reader *reader) { - memgraph::slk::Load(self, reader); -} - ReplicationServer::ReplicationServer(const memgraph::replication::ReplicationServerConfig &config) : rpc_server_context_{CreateServerContext(config)}, rpc_server_{io::network::Endpoint{config.ip_address, config.port}, &rpc_server_context_, diff --git a/src/replication/state.cpp b/src/replication/state.cpp index 4551eba7e..60c390e17 100644 --- a/src/replication/state.cpp +++ b/src/replication/state.cpp @@ -11,9 +11,11 @@ #include "replication/state.hpp" +#include "replication/replication_client.hpp" #include "replication/replication_server.hpp" #include "replication/status.hpp" #include "utils/file.hpp" +#include "utils/result.hpp" #include "utils/variant_helpers.hpp" constexpr auto kReplicationDirectory = std::string_view{"replication"}; @@ -125,12 +127,9 @@ auto ReplicationState::FetchReplicationData() -> FetchReplicationResult_t { return std::visit( utils::Overloaded{ [&](durability::MainRole &&r) -> FetchReplicationResult_t { - auto res = RoleMainData{ - .epoch_ = std::move(r.epoch), - }; + auto res = RoleMainData{std::move(r.epoch)}; auto b = durability_->begin(durability::kReplicationReplicaPrefix); auto e = durability_->end(durability::kReplicationReplicaPrefix); - res.registered_replicas_.reserve(durability_->Size(durability::kReplicationReplicaPrefix)); for (; b != e; ++b) { auto const &[replica_name, replica_data] = *b; auto json = nlohmann::json::parse(replica_data, nullptr, false); @@ -141,7 +140,8 @@ auto ReplicationState::FetchReplicationData() -> FetchReplicationResult_t { if (key_name != data.config.name) { return FetchReplicationError::PARSE_ERROR; } - res.registered_replicas_.emplace_back(std::move(data.config)); + // Instance clients + res.registered_replicas_.emplace_back(data.config); } catch (...) { return FetchReplicationError::PARSE_ERROR; } @@ -221,7 +221,7 @@ bool ReplicationState::SetReplicationRoleMain() { if (!TryPersistRoleMain(new_epoch)) { return false; } - replication_data_ = RoleMainData{.epoch_ = ReplicationEpoch{new_epoch}}; + replication_data_ = RoleMainData{ReplicationEpoch{new_epoch}}; return true; } @@ -233,16 +233,14 @@ bool ReplicationState::SetReplicationRoleReplica(const ReplicationServerConfig & return true; } -auto ReplicationState::RegisterReplica(const ReplicationClientConfig &config) -> RegisterReplicaError { - auto const replica_handler = [](RoleReplicaData const &) -> RegisterReplicaError { - return RegisterReplicaError::NOT_MAIN; - }; - auto const main_handler = [this, &config](RoleMainData &mainData) -> RegisterReplicaError { +utils::BasicResult ReplicationState::RegisterReplica( + const ReplicationClientConfig &config) { + auto const replica_handler = [](RoleReplicaData const &) { return RegisterReplicaError::NOT_MAIN; }; + ReplicationClient *client{nullptr}; + auto const main_handler = [&client, &config, this](RoleMainData &mainData) -> RegisterReplicaError { // name check auto name_check = [&config](auto const &replicas) { - auto name_matches = [&name = config.name](ReplicationClientConfig const ®istered_config) { - return registered_config.name == name; - }; + auto name_matches = [&name = config.name](auto const &replica) { return replica.name_ == name; }; return std::any_of(replicas.begin(), replicas.end(), name_matches); }; if (name_check(mainData.registered_replicas_)) { @@ -251,8 +249,9 @@ auto ReplicationState::RegisterReplica(const ReplicationClientConfig &config) -> // endpoint check auto endpoint_check = [&](auto const &replicas) { - auto endpoint_matches = [&config](ReplicationClientConfig const ®istered_config) { - return registered_config.ip_address == config.ip_address && registered_config.port == config.port; + auto endpoint_matches = [&config](auto const &replica) { + const auto &ep = replica.rpc_client_.Endpoint(); + return ep.address == config.ip_address && ep.port == config.port; }; return std::any_of(replicas.begin(), replicas.end(), endpoint_matches); }; @@ -266,10 +265,14 @@ auto ReplicationState::RegisterReplica(const ReplicationClientConfig &config) -> } // set - mainData.registered_replicas_.emplace_back(config); + client = &mainData.registered_replicas_.emplace_back(config); return RegisterReplicaError::SUCCESS; }; - return std::visit(utils::Overloaded{main_handler, replica_handler}, replication_data_); + const auto &res = std::visit(utils::Overloaded{main_handler, replica_handler}, replication_data_); + if (res == RegisterReplicaError::SUCCESS) { + return client; + } + return res; } } // namespace memgraph::replication diff --git a/src/rpc/client.hpp b/src/rpc/client.hpp index f727391ac..1fd3fff8d 100644 --- a/src/rpc/client.hpp +++ b/src/rpc/client.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include "communication/client.hpp" #include "io/network/endpoint.hpp" @@ -41,16 +42,25 @@ class Client { StreamHandler(Client *self, std::unique_lock &&guard, std::function res_load) - : self_(self), - guard_(std::move(guard)), - req_builder_([self](const uint8_t *data, size_t size, bool have_more) { - if (!self->client_->Write(data, size, have_more)) throw GenericRpcFailedException(); - }), - res_load_(res_load) {} + : self_(self), guard_(std::move(guard)), req_builder_(GenBuilderCallback(self, this)), res_load_(res_load) {} public: - StreamHandler(StreamHandler &&) noexcept = default; - StreamHandler &operator=(StreamHandler &&) noexcept = default; + StreamHandler(StreamHandler &&other) noexcept + : self_{std::exchange(other.self_, nullptr)}, + defunct_{std::exchange(other.defunct_, true)}, + guard_{std::move(other.guard_)}, + req_builder_{std::move(other.req_builder_), GenBuilderCallback(self_, this)}, + res_load_{std::move(other.res_load_)} {} + StreamHandler &operator=(StreamHandler &&other) noexcept { + if (&other != this) { + self_ = std::exchange(other.self_, nullptr); + defunct_ = std::exchange(other.defunct_, true); + guard_ = std::move(other.guard_); + req_builder_ = slk::Builder(std::move(other.req_builder_, GenBuilderCallback(self_, this))); + res_load_ = std::move(other.res_load_); + } + return *this; + } StreamHandler(const StreamHandler &) = delete; StreamHandler &operator=(const StreamHandler &) = delete; @@ -70,10 +80,18 @@ class Client { while (true) { auto ret = slk::CheckStreamComplete(self_->client_->GetData(), self_->client_->GetDataSize()); if (ret.status == slk::StreamStatus::INVALID) { + // Logically invalid state, connection is still up, defunct stream and release + defunct_ = true; + guard_.unlock(); throw GenericRpcFailedException(); - } else if (ret.status == slk::StreamStatus::PARTIAL) { + } + if (ret.status == slk::StreamStatus::PARTIAL) { if (!self_->client_->Read(ret.stream_size - self_->client_->GetDataSize(), /* exactly_len = */ false)) { + // Failed connection, abort and let somebody retry in the future + defunct_ = true; + self_->Abort(); + guard_.unlock(); throw GenericRpcFailedException(); } } else { @@ -103,7 +121,9 @@ class Client { // Check the response ID. if (res_id != res_type.id && res_id != utils::TypeId::UNKNOWN) { spdlog::error("Message response was of unexpected type"); - self_->client_ = std::nullopt; + // Logically invalid state, connection is still up, defunct stream and release + defunct_ = true; + guard_.unlock(); throw GenericRpcFailedException(); } @@ -112,8 +132,23 @@ class Client { return res_load_(&res_reader); } + bool IsDefunct() const { return defunct_; } + private: + static auto GenBuilderCallback(Client *client, StreamHandler *self) { + return [client, self](const uint8_t *data, size_t size, bool have_more) { + if (self->defunct_) throw GenericRpcFailedException(); + if (!client->client_->Write(data, size, have_more)) { + self->defunct_ = true; + client->Abort(); + self->guard_.unlock(); + throw GenericRpcFailedException(); + } + }; + } + Client *self_; + bool defunct_ = false; std::unique_lock guard_; slk::Builder req_builder_; std::function res_load_; @@ -179,7 +214,7 @@ class Client { TRequestResponse::Request::Save(request, handler.GetBuilder()); // Return the handler to the user. - return std::move(handler); + return handler; } /// Call a previously defined and registered RPC call. This function can diff --git a/src/slk/streams.cpp b/src/slk/streams.cpp index 5125d635a..dc5ef8c3c 100644 --- a/src/slk/streams.cpp +++ b/src/slk/streams.cpp @@ -30,7 +30,7 @@ void Builder::Save(const uint8_t *data, uint64_t size) { to_write = kSegmentMaxDataSize - pos_; } - memcpy(segment_ + sizeof(SegmentSize) + pos_, data + offset, to_write); + memcpy(segment_.data() + sizeof(SegmentSize) + pos_, data + offset, to_write); size -= to_write; pos_ += to_write; @@ -48,15 +48,15 @@ void Builder::FlushSegment(bool final_segment) { size_t total_size = sizeof(SegmentSize) + pos_; SegmentSize size = pos_; - memcpy(segment_, &size, sizeof(SegmentSize)); + memcpy(segment_.data(), &size, sizeof(SegmentSize)); if (final_segment) { SegmentSize footer = 0; - memcpy(segment_ + total_size, &footer, sizeof(SegmentSize)); + memcpy(segment_.data() + total_size, &footer, sizeof(SegmentSize)); total_size += sizeof(SegmentSize); } - write_func_(segment_, total_size, !final_segment); + write_func_(segment_.data(), total_size, !final_segment); pos_ = 0; } diff --git a/src/slk/streams.hpp b/src/slk/streams.hpp index 587b7830b..691189443 100644 --- a/src/slk/streams.hpp +++ b/src/slk/streams.hpp @@ -46,7 +46,11 @@ static_assert(kSegmentMaxDataSize <= std::numeric_limits::max(), /// Builder used to create a SLK segment stream. class Builder { public: - Builder(std::function write_func); + explicit Builder(std::function write_func); + Builder(Builder &&other, std::function write_func) + : write_func_{std::move(write_func)}, pos_{std::exchange(other.pos_, 0)}, segment_{other.segment_} { + other.write_func_ = [](const uint8_t *, size_t, bool) { /* Moved builder is defunct, no write possible */ }; + } /// Function used internally by SLK to serialize the data. void Save(const uint8_t *data, uint64_t size); @@ -59,7 +63,7 @@ class Builder { std::function write_func_; size_t pos_{0}; - uint8_t segment_[kSegmentMaxTotalSize]; + std::array segment_; }; /// Exception that will be thrown if segments can't be decoded from the byte diff --git a/src/storage/v2/CMakeLists.txt b/src/storage/v2/CMakeLists.txt index 9f6d8d4d7..150a02cc7 100644 --- a/src/storage/v2/CMakeLists.txt +++ b/src/storage/v2/CMakeLists.txt @@ -39,6 +39,7 @@ add_library(mg-storage-v2 STATIC replication/slk.cpp replication/rpc.cpp replication/replication_storage_state.cpp - inmemory/replication/replication_client.cpp + inmemory/replication/recovery.cpp ) + target_link_libraries(mg-storage-v2 mg::replication Threads::Threads mg-utils gflags absl::flat_hash_map mg-rpc mg-slk mg-events mg-memory) diff --git a/src/storage/v2/config.hpp b/src/storage/v2/config.hpp index 7ea7e95b7..dee2afe87 100644 --- a/src/storage/v2/config.hpp +++ b/src/storage/v2/config.hpp @@ -40,6 +40,7 @@ struct Config { struct Items { bool properties_on_edges{true}; + bool enable_schema_metadata{false}; friend bool operator==(const Items &lrh, const Items &rhs) = default; } items; @@ -64,7 +65,10 @@ struct Config { uint64_t items_per_batch{1'000'000}; uint64_t recovery_thread_count{8}; + // deprecated bool allow_parallel_index_creation{false}; + + bool allow_parallel_schema_creation{false}; friend bool operator==(const Durability &lrh, const Durability &rhs) = default; } durability; diff --git a/src/storage/v2/constraints/constraints.cpp b/src/storage/v2/constraints/constraints.cpp index 42128511c..6a6554db4 100644 --- a/src/storage/v2/constraints/constraints.cpp +++ b/src/storage/v2/constraints/constraints.cpp @@ -29,4 +29,8 @@ Constraints::Constraints(const Config &config, StorageMode storage_mode) { }; }); } + +void Constraints::AbortEntries(std::span vertices, uint64_t exact_start_timestamp) const { + static_cast(unique_constraints_.get())->AbortEntries(vertices, exact_start_timestamp); +} } // namespace memgraph::storage diff --git a/src/storage/v2/constraints/constraints.hpp b/src/storage/v2/constraints/constraints.hpp index 8469a5470..1f5ef999e 100644 --- a/src/storage/v2/constraints/constraints.hpp +++ b/src/storage/v2/constraints/constraints.hpp @@ -11,6 +11,8 @@ #pragma once +#include + #include "storage/v2/config.hpp" #include "storage/v2/constraints/existence_constraints.hpp" #include "storage/v2/constraints/unique_constraints.hpp" @@ -27,6 +29,8 @@ struct Constraints { Constraints &operator=(Constraints &&) = delete; ~Constraints() = default; + void AbortEntries(std::span vertices, uint64_t exact_start_timestamp) const; + std::unique_ptr existence_constraints_; std::unique_ptr unique_constraints_; }; diff --git a/src/storage/v2/constraints/existence_constraints.cpp b/src/storage/v2/constraints/existence_constraints.cpp index a0d303c03..956e0a208 100644 --- a/src/storage/v2/constraints/existence_constraints.cpp +++ b/src/storage/v2/constraints/existence_constraints.cpp @@ -11,10 +11,11 @@ #include "storage/v2/constraints/existence_constraints.hpp" #include "storage/v2/constraints/constraints.hpp" +#include "storage/v2/constraints/utils.hpp" #include "storage/v2/id_types.hpp" #include "storage/v2/mvcc.hpp" #include "utils/logging.hpp" - +#include "utils/rw_spin_lock.hpp" namespace memgraph::storage { bool ExistenceConstraints::ConstraintExists(LabelId label, PropertyId property) const { @@ -55,4 +56,70 @@ void ExistenceConstraints::LoadExistenceConstraints(const std::vector ExistenceConstraints::ValidateVertexOnConstraint( + const Vertex &vertex, const LabelId &label, const PropertyId &property) { + if (!vertex.deleted && utils::Contains(vertex.labels, label) && !vertex.properties.HasProperty(property)) { + return ConstraintViolation{ConstraintViolation::Type::EXISTENCE, label, std::set{property}}; + } + return std::nullopt; +} + +std::variant +ExistenceConstraints::GetCreationFunction( + const std::optional &par_exec_info) { + if (par_exec_info.has_value()) { + return ExistenceConstraints::MultipleThreadsConstraintValidation{par_exec_info.value()}; + } + return ExistenceConstraints::SingleThreadConstraintValidation{}; +} + +[[nodiscard]] std::optional ExistenceConstraints::ValidateVerticesOnConstraint( + utils::SkipList::Accessor vertices, LabelId label, PropertyId property, + const std::optional ¶llel_exec_info) { + auto calling_existence_validation_function = GetCreationFunction(parallel_exec_info); + return std::visit( + [&vertices, &label, &property](auto &calling_object) { return calling_object(vertices, label, property); }, + calling_existence_validation_function); +} + +std::optional ExistenceConstraints::MultipleThreadsConstraintValidation::operator()( + const utils::SkipList::Accessor &vertices, const LabelId &label, const PropertyId &property) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + + const auto &vertex_batches = parallel_exec_info.vertex_recovery_info; + MG_ASSERT(!vertex_batches.empty(), + "The size of batches should always be greater than zero if you want to use the parallel version of index " + "creation!"); + const auto thread_count = std::min(parallel_exec_info.thread_count, vertex_batches.size()); + + std::atomic batch_counter = 0; + memgraph::utils::Synchronized, utils::RWSpinLock> maybe_error{}; + { + std::vector threads; + threads.reserve(thread_count); + + for (auto i{0U}; i < thread_count; ++i) { + threads.emplace_back([&maybe_error, &vertex_batches, &batch_counter, &vertices, &label, &property]() { + do_per_thread_validation(maybe_error, ValidateVertexOnConstraint, vertex_batches, batch_counter, vertices, + label, property); + }); + } + } + if (maybe_error.Lock()->has_value()) { + return maybe_error->value(); + } + return std::nullopt; +} + +std::optional ExistenceConstraints::SingleThreadConstraintValidation::operator()( + const utils::SkipList::Accessor &vertices, const LabelId &label, const PropertyId &property) { + for (const Vertex &vertex : vertices) { + if (auto violation = ValidateVertexOnConstraint(vertex, label, property); violation.has_value()) { + return violation; + } + } + return std::nullopt; +} + } // namespace memgraph::storage diff --git a/src/storage/v2/constraints/existence_constraints.hpp b/src/storage/v2/constraints/existence_constraints.hpp index 77f7bc43a..c3b68828a 100644 --- a/src/storage/v2/constraints/existence_constraints.hpp +++ b/src/storage/v2/constraints/existence_constraints.hpp @@ -11,34 +11,45 @@ #pragma once +#include #include +#include +#include #include "storage/v2/constraints/constraint_violation.hpp" +#include "storage/v2/durability/recovery_type.hpp" #include "storage/v2/vertex.hpp" #include "utils/skip_list.hpp" +#include "utils/synchronized.hpp" namespace memgraph::storage { class ExistenceConstraints { + private: + std::vector> constraints_; + public: + struct MultipleThreadsConstraintValidation { + std::optional operator()(const utils::SkipList::Accessor &vertices, + const LabelId &label, const PropertyId &property); + + const durability::ParallelizedSchemaCreationInfo ¶llel_exec_info; + }; + struct SingleThreadConstraintValidation { + std::optional operator()(const utils::SkipList::Accessor &vertices, + const LabelId &label, const PropertyId &property); + }; + [[nodiscard]] static std::optional ValidateVertexOnConstraint(const Vertex &vertex, - LabelId label, - PropertyId property) { - if (!vertex.deleted && utils::Contains(vertex.labels, label) && !vertex.properties.HasProperty(property)) { - return ConstraintViolation{ConstraintViolation::Type::EXISTENCE, label, std::set{property}}; - } - return std::nullopt; - } + const LabelId &label, + const PropertyId &property); [[nodiscard]] static std::optional ValidateVerticesOnConstraint( - utils::SkipList::Accessor vertices, LabelId label, PropertyId property) { - for (const auto &vertex : vertices) { - if (auto violation = ValidateVertexOnConstraint(vertex, label, property); violation.has_value()) { - return violation; - } - } - return std::nullopt; - } + utils::SkipList::Accessor vertices, LabelId label, PropertyId property, + const std::optional ¶llel_exec_info = std::nullopt); + + static std::variant GetCreationFunction( + const std::optional &); bool ConstraintExists(LabelId label, PropertyId property) const; @@ -54,9 +65,6 @@ class ExistenceConstraints { std::vector> ListConstraints() const; void LoadExistenceConstraints(const std::vector &keys); - - private: - std::vector> constraints_; }; } // namespace memgraph::storage diff --git a/src/storage/v2/constraints/utils.hpp b/src/storage/v2/constraints/utils.hpp new file mode 100644 index 000000000..ca48708ff --- /dev/null +++ b/src/storage/v2/constraints/utils.hpp @@ -0,0 +1,42 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include +#include "storage/v2/vertex.hpp" +#include "utils/skip_list.hpp" + +namespace memgraph::storage { +template +void do_per_thread_validation(ErrorType &maybe_error, Func &&func, + const std::vector> &vertex_batches, + std::atomic &batch_counter, + const memgraph::utils::SkipList::Accessor &vertices, + Args &&...args) { + while (!maybe_error.ReadLock()->has_value()) { + const auto batch_index = batch_counter.fetch_add(1, std::memory_order_acquire); + if (batch_index >= vertex_batches.size()) { + return; + } + const auto &[gid_start, batch_size] = vertex_batches[batch_index]; + + auto vertex_curr = vertices.find(gid_start); + DMG_ASSERT(vertex_curr != vertices.end(), "No vertex was found with given gid"); + for (auto i{0U}; i < batch_size; ++i, ++vertex_curr) { + const auto violation = func(*vertex_curr, std::forward(args)...); + if (!violation.has_value()) [[likely]] { + continue; + } + maybe_error.WithLock([&violation](auto &maybe_error) { maybe_error = *violation; }); + break; + } + } +} +} // namespace memgraph::storage diff --git a/src/storage/v2/disk/edge_import_mode_cache.cpp b/src/storage/v2/disk/edge_import_mode_cache.cpp index 2a29a1606..b6621281f 100644 --- a/src/storage/v2/disk/edge_import_mode_cache.cpp +++ b/src/storage/v2/disk/edge_import_mode_cache.cpp @@ -10,7 +10,9 @@ // licenses/APL.txt. #include "storage/v2/disk//edge_import_mode_cache.hpp" + #include + #include "storage/v2/disk/label_property_index.hpp" #include "storage/v2/indices/indices.hpp" #include "storage/v2/inmemory/label_index.hpp" @@ -28,7 +30,7 @@ EdgeImportModeCache::EdgeImportModeCache(const Config &config) InMemoryLabelIndex::Iterable EdgeImportModeCache::Vertices(LabelId label, View view, Storage *storage, Transaction *transaction) const { auto *mem_label_index = static_cast(in_memory_indices_.label_index_.get()); - return mem_label_index->Vertices(label, view, storage, transaction); + return mem_label_index->Vertices(label, vertices_.access(), view, storage, transaction); } InMemoryLabelPropertyIndex::Iterable EdgeImportModeCache::Vertices( @@ -37,11 +39,13 @@ InMemoryLabelPropertyIndex::Iterable EdgeImportModeCache::Vertices( Transaction *transaction) const { auto *mem_label_property_index = static_cast(in_memory_indices_.label_property_index_.get()); - return mem_label_property_index->Vertices(label, property, lower_bound, upper_bound, view, storage, transaction); + return mem_label_property_index->Vertices(label, property, vertices_.access(), lower_bound, upper_bound, view, + storage, transaction); } -bool EdgeImportModeCache::CreateIndex(LabelId label, PropertyId property, - const std::optional ¶llel_exec_info) { +bool EdgeImportModeCache::CreateIndex( + LabelId label, PropertyId property, + const std::optional ¶llel_exec_info) { auto *mem_label_property_index = static_cast(in_memory_indices_.label_property_index_.get()); bool res = mem_label_property_index->CreateIndex(label, property, vertices_.access(), parallel_exec_info); @@ -51,8 +55,8 @@ bool EdgeImportModeCache::CreateIndex(LabelId label, PropertyId property, return res; } -bool EdgeImportModeCache::CreateIndex(LabelId label, - const std::optional ¶llel_exec_info) { +bool EdgeImportModeCache::CreateIndex( + LabelId label, const std::optional ¶llel_exec_info) { auto *mem_label_index = static_cast(in_memory_indices_.label_index_.get()); bool res = mem_label_index->CreateIndex(label, vertices_.access(), parallel_exec_info); if (res) { diff --git a/src/storage/v2/disk/edge_import_mode_cache.hpp b/src/storage/v2/disk/edge_import_mode_cache.hpp index 78ffcff59..02af960d5 100644 --- a/src/storage/v2/disk/edge_import_mode_cache.hpp +++ b/src/storage/v2/disk/edge_import_mode_cache.hpp @@ -42,9 +42,10 @@ class EdgeImportModeCache final { View view, Storage *storage, Transaction *transaction) const; bool CreateIndex(LabelId label, PropertyId property, - const std::optional ¶llel_exec_info = {}); + const std::optional ¶llel_exec_info = {}); - bool CreateIndex(LabelId label, const std::optional ¶llel_exec_info = {}); + bool CreateIndex(LabelId label, + const std::optional ¶llel_exec_info = {}); bool VerticesWithLabelPropertyScanned(LabelId label, PropertyId property) const; diff --git a/src/storage/v2/disk/label_property_index.hpp b/src/storage/v2/disk/label_property_index.hpp index 26f972d79..a6842200f 100644 --- a/src/storage/v2/disk/label_property_index.hpp +++ b/src/storage/v2/disk/label_property_index.hpp @@ -17,10 +17,6 @@ namespace memgraph::storage { -/// TODO: andi. Too many copies, extract at one place -using ParallelizedIndexCreationInfo = - std::pair> /*vertex_recovery_info*/, uint64_t /*thread_count*/>; - class DiskLabelPropertyIndex : public storage::LabelPropertyIndex { public: explicit DiskLabelPropertyIndex(const Config &config); diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index 09b28943c..3f0ab9572 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -71,6 +71,37 @@ namespace memgraph::storage { +namespace { + +auto FindEdges(const View view, EdgeTypeId edge_type, const VertexAccessor *from_vertex, VertexAccessor *to_vertex) + -> Result { + auto use_out_edges = [](Vertex const *from_vertex, Vertex const *to_vertex) { + // Obtain the locks by `gid` order to avoid lock cycles. + auto guard_from = std::unique_lock{from_vertex->lock, std::defer_lock}; + auto guard_to = std::unique_lock{to_vertex->lock, std::defer_lock}; + if (from_vertex->gid < to_vertex->gid) { + guard_from.lock(); + guard_to.lock(); + } else if (from_vertex->gid > to_vertex->gid) { + guard_to.lock(); + guard_from.lock(); + } else { + // The vertices are the same vertex, only lock one. + guard_from.lock(); + } + + // With the potentially cheaper side FindEdges + const auto out_n = from_vertex->out_edges.size(); + const auto in_n = to_vertex->in_edges.size(); + return out_n <= in_n; + }; + + return use_out_edges(from_vertex->vertex_, to_vertex->vertex_) ? from_vertex->OutEdges(view, {edge_type}, to_vertex) + : to_vertex->InEdges(view, {edge_type}, from_vertex); +} + +} // namespace + using OOMExceptionEnabler = utils::MemoryTracker::OutOfMemoryExceptionEnabler; namespace { @@ -944,11 +975,28 @@ Result DiskStorage::DiskAccessor::CreateEdge(VertexAccessor *from, transaction_.manyDeltasCache.Invalidate(from_vertex, edge_type, EdgeDirection::OUT); transaction_.manyDeltasCache.Invalidate(to_vertex, edge_type, EdgeDirection::IN); + if (storage_->config_.items.enable_schema_metadata) { + storage_->stored_edge_types_.try_insert(edge_type); + } storage_->edge_count_.fetch_add(1, std::memory_order_acq_rel); return EdgeAccessor(edge, edge_type, from_vertex, to_vertex, storage_, &transaction_); } +std::optional DiskStorage::DiskAccessor::FindEdge(Gid gid, View view, EdgeTypeId edge_type, + VertexAccessor *from_vertex, + VertexAccessor *to_vertex) { + auto res = FindEdges(view, edge_type, from_vertex, to_vertex); + if (res.HasError()) return std::nullopt; // TODO: use a Result type + + auto const it = std::ranges::find_if( + res->edges, [gid](EdgeAccessor const &edge_accessor) { return edge_accessor.edge_.ptr->gid == gid; }); + + if (it == res->edges.end()) return std::nullopt; // TODO: use a Result type + + return *it; +} + Result DiskStorage::DiskAccessor::EdgeSetFrom(EdgeAccessor * /*edge*/, VertexAccessor * /*new_from*/) { MG_ASSERT(false, "EdgeSetFrom is currently only implemented for InMemory storage"); return Error::NONEXISTENT_OBJECT; diff --git a/src/storage/v2/disk/storage.hpp b/src/storage/v2/disk/storage.hpp index 3575e685d..8640462de 100644 --- a/src/storage/v2/disk/storage.hpp +++ b/src/storage/v2/disk/storage.hpp @@ -121,6 +121,9 @@ class DiskStorage final : public Storage { Result CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type) override; + std::optional FindEdge(Gid gid, View view, EdgeTypeId edge_type, VertexAccessor *from_vertex, + VertexAccessor *to_vertex) override; + Result EdgeSetFrom(EdgeAccessor *edge, VertexAccessor *new_from) override; Result EdgeSetTo(EdgeAccessor *edge, VertexAccessor *new_to) override; @@ -313,12 +316,6 @@ class DiskStorage final : public Storage { uint64_t CommitTimestamp(std::optional desired_commit_timestamp = {}); - auto CreateReplicationClient(const memgraph::replication::ReplicationClientConfig & /*config*/, - const memgraph::replication::ReplicationEpoch * /*current_epoch*/) - -> std::unique_ptr override { - throw utils::BasicException("Disk storage mode does not support replication."); - } - std::unique_ptr kvstore_; DurableMetadata durable_metadata_; EdgeImportMode edge_import_status_{EdgeImportMode::INACTIVE}; diff --git a/src/storage/v2/durability/durability.cpp b/src/storage/v2/durability/durability.cpp index 1240bd52e..6a89b7b5a 100644 --- a/src/storage/v2/durability/durability.cpp +++ b/src/storage/v2/durability/durability.cpp @@ -9,8 +9,6 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#include "storage/v2/durability/durability.hpp" - #include #include #include @@ -20,23 +18,29 @@ #include #include +#include #include #include #include +#include "flags/all.hpp" +#include "gflags/gflags.h" #include "replication/epoch.hpp" +#include "storage/v2/durability/durability.hpp" +#include "storage/v2/durability/metadata.hpp" #include "storage/v2/durability/paths.hpp" #include "storage/v2/durability/snapshot.hpp" #include "storage/v2/durability/wal.hpp" #include "storage/v2/inmemory/label_index.hpp" #include "storage/v2/inmemory/label_property_index.hpp" #include "storage/v2/inmemory/unique_constraints.hpp" +#include "storage/v2/name_id_mapper.hpp" #include "utils/event_histogram.hpp" +#include "utils/flag_validation.hpp" #include "utils/logging.hpp" #include "utils/memory_tracker.hpp" #include "utils/message.hpp" #include "utils/timer.hpp" - namespace memgraph::metrics { extern const Event SnapshotRecoveryLatency_us; } // namespace memgraph::metrics @@ -96,6 +100,7 @@ std::vector GetSnapshotFiles(const std::filesystem::path MG_ASSERT(!error_code, "Couldn't recover data because an error occurred: {}!", error_code.message()); } + std::sort(snapshot_files.begin(), snapshot_files.end()); return snapshot_files; } @@ -106,13 +111,17 @@ std::optional> GetWalFiles(const std::filesystem: std::vector wal_files; std::error_code error_code; + // There could be multiple "current" WAL files, the "_current" tag just means that the previous session didn't + // finalize. We cannot skip based on name, will be able to skip based on invalid data or sequence number, so the + // actual current wal will be skipped for (const auto &item : std::filesystem::directory_iterator(wal_directory, error_code)) { if (!item.is_regular_file()) continue; try { auto info = ReadWalInfo(item.path()); - if ((uuid.empty() || info.uuid == uuid) && (!current_seq_num || info.seq_num < *current_seq_num)) + if ((uuid.empty() || info.uuid == uuid) && (!current_seq_num || info.seq_num < *current_seq_num)) { wal_files.emplace_back(info.seq_num, info.from_timestamp, info.to_timestamp, std::move(info.uuid), std::move(info.epoch_id), item.path()); + } } catch (const RecoveryFailure &e) { spdlog::warn("Failed to read {}", item.path()); continue; @@ -120,6 +129,7 @@ std::optional> GetWalFiles(const std::filesystem: } MG_ASSERT(!error_code, "Couldn't recover data because an error occurred: {}!", error_code.message()); + // Sort based on the sequence number, not the file name std::sort(wal_files.begin(), wal_files.end()); return std::move(wal_files); } @@ -128,15 +138,23 @@ std::optional> GetWalFiles(const std::filesystem: // indices and constraints must be recovered after the data recovery is done // to ensure that the indices and constraints are consistent at the end of the // recovery process. -void RecoverIndicesAndConstraints(const RecoveredIndicesAndConstraints &indices_constraints, Indices *indices, - Constraints *constraints, utils::SkipList *vertices, - NameIdMapper *name_id_mapper, - const std::optional ¶llel_exec_info) { + +void RecoverConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadata &constraints_metadata, + Constraints *constraints, utils::SkipList *vertices, NameIdMapper *name_id_mapper, + const std::optional ¶llel_exec_info) { + RecoverExistenceConstraints(constraints_metadata, constraints, vertices, name_id_mapper, parallel_exec_info); + RecoverUniqueConstraints(constraints_metadata, constraints, vertices, name_id_mapper, parallel_exec_info); +} + +void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices, + utils::SkipList *vertices, NameIdMapper *name_id_mapper, + const std::optional ¶llel_exec_info) { spdlog::info("Recreating indices from metadata."); + // Recover label indices. - spdlog::info("Recreating {} label indices from metadata.", indices_constraints.indices.label.size()); + spdlog::info("Recreating {} label indices from metadata.", indices_metadata.label.size()); auto *mem_label_index = static_cast(indices->label_index_.get()); - for (const auto &item : indices_constraints.indices.label) { + for (const auto &item : indices_metadata.label) { if (!mem_label_index->CreateIndex(item, vertices->access(), parallel_exec_info)) { throw RecoveryFailure("The label index must be created here!"); } @@ -145,9 +163,10 @@ void RecoverIndicesAndConstraints(const RecoveredIndicesAndConstraints &indices_ spdlog::info("Label indices are recreated."); spdlog::info("Recreating index statistics from metadata."); + // Recover label indices statistics. - spdlog::info("Recreating {} label index statistics from metadata.", indices_constraints.indices.label_stats.size()); - for (const auto &item : indices_constraints.indices.label_stats) { + spdlog::info("Recreating {} label index statistics from metadata.", indices_metadata.label_stats.size()); + for (const auto &item : indices_metadata.label_stats) { mem_label_index->SetIndexStats(item.first, item.second); spdlog::info("Statistics for index on :{} are recreated from metadata", name_id_mapper->IdToName(item.first.AsUint())); @@ -155,10 +174,9 @@ void RecoverIndicesAndConstraints(const RecoveredIndicesAndConstraints &indices_ spdlog::info("Label indices statistics are recreated."); // Recover label+property indices. - spdlog::info("Recreating {} label+property indices from metadata.", - indices_constraints.indices.label_property.size()); + spdlog::info("Recreating {} label+property indices from metadata.", indices_metadata.label_property.size()); auto *mem_label_property_index = static_cast(indices->label_property_index_.get()); - for (const auto &item : indices_constraints.indices.label_property) { + for (const auto &item : indices_metadata.label_property) { if (!mem_label_property_index->CreateIndex(item.first, item.second, vertices->access(), parallel_exec_info)) throw RecoveryFailure("The label+property index must be created here!"); spdlog::info("Index on :{}({}) is recreated from metadata", name_id_mapper->IdToName(item.first.AsUint()), @@ -168,8 +186,8 @@ void RecoverIndicesAndConstraints(const RecoveredIndicesAndConstraints &indices_ // Recover label+property indices statistics. spdlog::info("Recreating {} label+property indices statistics from metadata.", - indices_constraints.indices.label_property_stats.size()); - for (const auto &item : indices_constraints.indices.label_property_stats) { + indices_metadata.label_property_stats.size()); + for (const auto &item : indices_metadata.label_property_stats) { const auto label_id = item.first; const auto property_id = item.second.first; const auto &stats = item.second.second; @@ -182,14 +200,20 @@ void RecoverIndicesAndConstraints(const RecoveredIndicesAndConstraints &indices_ spdlog::info("Indices are recreated."); spdlog::info("Recreating constraints from metadata."); - // Recover existence constraints. - spdlog::info("Recreating {} existence constraints from metadata.", indices_constraints.constraints.existence.size()); - for (const auto &[label, property] : indices_constraints.constraints.existence) { +} + +void RecoverExistenceConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadata &constraints_metadata, + Constraints *constraints, utils::SkipList *vertices, + NameIdMapper *name_id_mapper, + const std::optional ¶llel_exec_info) { + spdlog::info("Recreating {} existence constraints from metadata.", constraints_metadata.existence.size()); + for (const auto &[label, property] : constraints_metadata.existence) { if (constraints->existence_constraints_->ConstraintExists(label, property)) { throw RecoveryFailure("The existence constraint already exists!"); } - if (auto violation = ExistenceConstraints::ValidateVerticesOnConstraint(vertices->access(), label, property); + if (auto violation = + ExistenceConstraints::ValidateVerticesOnConstraint(vertices->access(), label, property, parallel_exec_info); violation.has_value()) { throw RecoveryFailure("The existence constraint failed because it couldn't be validated!"); } @@ -199,38 +223,57 @@ void RecoverIndicesAndConstraints(const RecoveredIndicesAndConstraints &indices_ name_id_mapper->IdToName(property.AsUint())); } spdlog::info("Existence constraints are recreated from metadata."); +} - // Recover unique constraints. - spdlog::info("Recreating {} unique constraints from metadata.", indices_constraints.constraints.unique.size()); - for (const auto &item : indices_constraints.constraints.unique) { +void RecoverUniqueConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadata &constraints_metadata, + Constraints *constraints, utils::SkipList *vertices, NameIdMapper *name_id_mapper, + const std::optional ¶llel_exec_info) { + spdlog::info("Recreating {} unique constraints from metadata.", constraints_metadata.unique.size()); + + for (const auto &[label, properties] : constraints_metadata.unique) { auto *mem_unique_constraints = static_cast(constraints->unique_constraints_.get()); - auto ret = mem_unique_constraints->CreateConstraint(item.first, item.second, vertices->access()); + auto ret = mem_unique_constraints->CreateConstraint(label, properties, vertices->access(), parallel_exec_info); if (ret.HasError() || ret.GetValue() != UniqueConstraints::CreationStatus::SUCCESS) throw RecoveryFailure("The unique constraint must be created here!"); std::vector property_names; - property_names.reserve(item.second.size()); - for (const auto &prop : item.second) { + property_names.reserve(properties.size()); + for (const auto &prop : properties) { property_names.emplace_back(name_id_mapper->IdToName(prop.AsUint())); } const auto property_names_joined = utils::Join(property_names, ","); - spdlog::info("Unique constraint on :{}({}) is recreated from metadata", - name_id_mapper->IdToName(item.first.AsUint()), property_names_joined); + spdlog::info("Unique constraint on :{}({}) is recreated from metadata", name_id_mapper->IdToName(label.AsUint()), + property_names_joined); } spdlog::info("Unique constraints are recreated from metadata."); spdlog::info("Constraints are recreated from metadata."); } -std::optional RecoverData(const std::filesystem::path &snapshot_directory, - const std::filesystem::path &wal_directory, std::string *uuid, - ReplicationStorageState &repl_storage_state, utils::SkipList *vertices, - utils::SkipList *edges, std::atomic *edge_count, - NameIdMapper *name_id_mapper, Indices *indices, Constraints *constraints, - const Config &config, uint64_t *wal_seq_num) { +std::optional GetParallelExecInfo(const RecoveryInfo &recovery_info, + const Config &config) { + return config.durability.allow_parallel_schema_creation + ? std::make_optional(ParallelizedSchemaCreationInfo{recovery_info.vertex_batches, + config.durability.recovery_thread_count}) + : std::nullopt; +} + +std::optional GetParallelExecInfoIndices(const RecoveryInfo &recovery_info, + const Config &config) { + return config.durability.allow_parallel_schema_creation || config.durability.allow_parallel_index_creation + ? std::make_optional(ParallelizedSchemaCreationInfo{recovery_info.vertex_batches, + config.durability.recovery_thread_count}) + : std::nullopt; +} + +std::optional Recovery::RecoverData(std::string *uuid, ReplicationStorageState &repl_storage_state, + utils::SkipList *vertices, utils::SkipList *edges, + std::atomic *edge_count, NameIdMapper *name_id_mapper, + Indices *indices, Constraints *constraints, const Config &config, + uint64_t *wal_seq_num) { utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; - spdlog::info("Recovering persisted data using snapshot ({}) and WAL directory ({}).", snapshot_directory, - wal_directory); - if (!utils::DirExists(snapshot_directory) && !utils::DirExists(wal_directory)) { + spdlog::info("Recovering persisted data using snapshot ({}) and WAL directory ({}).", snapshot_directory_, + wal_directory_); + if (!utils::DirExists(snapshot_directory_) && !utils::DirExists(wal_directory_)) { spdlog::warn(utils::MessageWithLink("Snapshot or WAL directory don't exist, there is nothing to recover.", "https://memgr.ph/durability")); return std::nullopt; @@ -239,15 +282,13 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di auto *const epoch_history = &repl_storage_state.history; utils::Timer timer; - auto snapshot_files = GetSnapshotFiles(snapshot_directory); + auto snapshot_files = GetSnapshotFiles(snapshot_directory_); RecoveryInfo recovery_info; RecoveredIndicesAndConstraints indices_constraints; std::optional snapshot_timestamp; if (!snapshot_files.empty()) { - spdlog::info("Try recovering from snapshot directory {}.", snapshot_directory); - // Order the files by name - std::sort(snapshot_files.begin(), snapshot_files.end()); + spdlog::info("Try recovering from snapshot directory {}.", wal_directory_); // UUID used for durability is the UUID of the last snapshot file. *uuid = snapshot_files.back().uuid; @@ -277,18 +318,17 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di snapshot_timestamp = recovered_snapshot->snapshot_info.start_timestamp; repl_storage_state.epoch_.SetEpoch(std::move(recovered_snapshot->snapshot_info.epoch_id)); - if (!utils::DirExists(wal_directory)) { - const auto par_exec_info = config.durability.allow_parallel_index_creation - ? std::make_optional(std::make_pair(recovery_info.vertex_batches, - config.durability.recovery_thread_count)) - : std::nullopt; - RecoverIndicesAndConstraints(indices_constraints, indices, constraints, vertices, name_id_mapper, par_exec_info); + if (!utils::DirExists(wal_directory_)) { + RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper, + GetParallelExecInfoIndices(recovery_info, config)); + RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper, + GetParallelExecInfo(recovery_info, config)); return recovered_snapshot->recovery_info; } } else { - spdlog::info("No snapshot file was found, collecting information from WAL directory {}.", wal_directory); + spdlog::info("No snapshot file was found, collecting information from WAL directory {}.", wal_directory_); std::error_code error_code; - if (!utils::DirExists(wal_directory)) return std::nullopt; + if (!utils::DirExists(wal_directory_)) return std::nullopt; // We use this smaller struct that contains only a subset of information // necessary for the rest of the recovery function. // Also, the struct is sorted primarily on the path it contains. @@ -302,7 +342,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di auto operator<=>(const WalFileInfo &) const = default; }; std::vector wal_files; - for (const auto &item : std::filesystem::directory_iterator(wal_directory, error_code)) { + for (const auto &item : std::filesystem::directory_iterator(wal_directory_, error_code)) { if (!item.is_regular_file()) continue; try { auto info = ReadWalInfo(item.path()); @@ -323,7 +363,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di repl_storage_state.epoch_.SetEpoch(std::move(wal_files.back().epoch_id)); } - auto maybe_wal_files = GetWalFiles(wal_directory, *uuid); + auto maybe_wal_files = GetWalFiles(wal_directory_, *uuid); if (!maybe_wal_files) { spdlog::warn( utils::MessageWithLink("Couldn't get WAL file info from the WAL directory.", "https://memgr.ph/durability")); @@ -409,12 +449,10 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di spdlog::info("All necessary WAL files are loaded successfully."); } - const auto par_exec_info = - config.durability.allow_parallel_index_creation && !recovery_info.vertex_batches.empty() - ? std::make_optional(std::make_pair(recovery_info.vertex_batches, config.durability.recovery_thread_count)) - : std::nullopt; - - RecoverIndicesAndConstraints(indices_constraints, indices, constraints, vertices, name_id_mapper, par_exec_info); + RecoverIndicesAndStats(indices_constraints.indices, indices, vertices, name_id_mapper, + GetParallelExecInfoIndices(recovery_info, config)); + RecoverConstraints(indices_constraints.constraints, constraints, vertices, name_id_mapper, + GetParallelExecInfo(recovery_info, config)); memgraph::metrics::Measure(memgraph::metrics::SnapshotRecoveryLatency_us, std::chrono::duration_cast(timer.Elapsed()).count()); diff --git a/src/storage/v2/durability/durability.hpp b/src/storage/v2/durability/durability.hpp index 8bb1223c4..97e2c7efc 100644 --- a/src/storage/v2/durability/durability.hpp +++ b/src/storage/v2/durability/durability.hpp @@ -23,6 +23,7 @@ #include "storage/v2/config.hpp" #include "storage/v2/constraints/constraints.hpp" #include "storage/v2/durability/metadata.hpp" +#include "storage/v2/durability/recovery_type.hpp" #include "storage/v2/durability/wal.hpp" #include "storage/v2/edge.hpp" #include "storage/v2/indices/indices.hpp" @@ -94,27 +95,50 @@ std::optional> GetWalFiles(const std::filesystem: std::string_view uuid = "", std::optional current_seq_num = {}); -using ParallelizedIndexCreationInfo = - std::pair> /*vertex_recovery_info*/, uint64_t /*thread_count*/>; - -// Helper function used to recover all discovered indices and constraints. The -// indices and constraints must be recovered after the data recovery is done -// to ensure that the indices and constraints are consistent at the end of the +// Helper function used to recover all discovered indices. The +// indices must be recovered after the data recovery is done +// to ensure that the indices consistent at the end of the // recovery process. /// @throw RecoveryFailure -void RecoverIndicesAndConstraints( - const RecoveredIndicesAndConstraints &indices_constraints, Indices *indices, Constraints *constraints, - utils::SkipList *vertices, NameIdMapper *name_id_mapper, - const std::optional ¶llel_exec_info = std::nullopt); +void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadata &indices_metadata, Indices *indices, + utils::SkipList *vertices, NameIdMapper *name_id_mapper, + const std::optional ¶llel_exec_info = std::nullopt); -/// Recovers data either from a snapshot and/or WAL files. +// Helper function used to recover all discovered constraints. The +// constraints must be recovered after the data recovery is done +// to ensure that the constraints are consistent at the end of the +// recovery process. /// @throw RecoveryFailure -/// @throw std::bad_alloc -std::optional RecoverData(const std::filesystem::path &snapshot_directory, - const std::filesystem::path &wal_directory, std::string *uuid, - ReplicationStorageState &repl_storage_state, utils::SkipList *vertices, - utils::SkipList *edges, std::atomic *edge_count, - NameIdMapper *name_id_mapper, Indices *indices, Constraints *constraints, - const Config &config, uint64_t *wal_seq_num); +void RecoverConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadata &constraints_metadata, + Constraints *constraints, utils::SkipList *vertices, NameIdMapper *name_id_mapper, + const std::optional ¶llel_exec_info = std::nullopt); + +std::optional GetParallelExecInfo(const RecoveryInfo &recovery_info, + const Config &config); + +std::optional GetParallelExecInfoIndices(const RecoveryInfo &recovery_info, + const Config &config); + +void RecoverExistenceConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadata &, Constraints *, + utils::SkipList *, NameIdMapper *, + const std::optional &); + +void RecoverUniqueConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadata &, Constraints *, + utils::SkipList *, NameIdMapper *, + const std::optional &); +struct Recovery { + public: + /// Recovers data either from a snapshot and/or WAL files. + /// @throw RecoveryFailure + /// @throw std::bad_alloc + std::optional RecoverData(std::string *uuid, ReplicationStorageState &repl_storage_state, + utils::SkipList *vertices, utils::SkipList *edges, + std::atomic *edge_count, NameIdMapper *name_id_mapper, + Indices *indices, Constraints *constraints, const Config &config, + uint64_t *wal_seq_num); + + const std::filesystem::path snapshot_directory_; + const std::filesystem::path wal_directory_; +}; } // namespace memgraph::storage::durability diff --git a/src/storage/v2/durability/metadata.hpp b/src/storage/v2/durability/metadata.hpp index 1045d4f97..42e24e723 100644 --- a/src/storage/v2/durability/metadata.hpp +++ b/src/storage/v2/durability/metadata.hpp @@ -38,14 +38,14 @@ struct RecoveryInfo { /// Structure used to track indices and constraints during recovery. struct RecoveredIndicesAndConstraints { - struct { + struct IndicesMetadata { std::vector label; std::vector> label_property; std::vector> label_stats; std::vector>> label_property_stats; } indices; - struct { + struct ConstraintsMetadata { std::vector> existence; std::vector>> unique; } constraints; diff --git a/src/storage/v2/durability/recovery_type.hpp b/src/storage/v2/durability/recovery_type.hpp new file mode 100644 index 000000000..972cd53f2 --- /dev/null +++ b/src/storage/v2/durability/recovery_type.hpp @@ -0,0 +1,23 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include +#include "storage/v2/id_types.hpp" + +namespace memgraph::storage::durability { +struct ParallelizedSchemaCreationInfo { + std::vector> vertex_recovery_info; + uint64_t thread_count; +}; +} // namespace memgraph::storage::durability diff --git a/src/storage/v2/edge_accessor.cpp b/src/storage/v2/edge_accessor.cpp index 3d97f537a..7e7166117 100644 --- a/src/storage/v2/edge_accessor.cpp +++ b/src/storage/v2/edge_accessor.cpp @@ -25,6 +25,13 @@ namespace memgraph::storage { +bool EdgeAccessor::IsDeleted() const { + if (!storage_->config_.items.properties_on_edges) { + return false; + } + return edge_.ptr->deleted; +} + bool EdgeAccessor::IsVisible(const View view) const { bool exists = true; bool deleted = true; diff --git a/src/storage/v2/edge_accessor.hpp b/src/storage/v2/edge_accessor.hpp index 365619149..a1c52d0a5 100644 --- a/src/storage/v2/edge_accessor.hpp +++ b/src/storage/v2/edge_accessor.hpp @@ -44,6 +44,8 @@ class EdgeAccessor final { transaction_(transaction), for_deleted_(for_deleted) {} + bool IsDeleted() const; + /// @return true if the object is visible from the current transaction bool IsVisible(View view) const; diff --git a/src/storage/v2/indices/indices.cpp b/src/storage/v2/indices/indices.cpp index bf7295de2..e0b194ad4 100644 --- a/src/storage/v2/indices/indices.cpp +++ b/src/storage/v2/indices/indices.cpp @@ -17,6 +17,21 @@ namespace memgraph::storage { +void Indices::AbortEntries(LabelId labelId, std::span vertices, uint64_t exact_start_timestamp) const { + static_cast(label_index_.get())->AbortEntries(labelId, vertices, exact_start_timestamp); +} + +void Indices::AbortEntries(PropertyId property, std::span const> vertices, + uint64_t exact_start_timestamp) const { + static_cast(label_property_index_.get()) + ->AbortEntries(property, vertices, exact_start_timestamp); +} +void Indices::AbortEntries(LabelId label, std::span const> vertices, + uint64_t exact_start_timestamp) const { + static_cast(label_property_index_.get()) + ->AbortEntries(label, vertices, exact_start_timestamp); +} + void Indices::RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp) const { static_cast(label_index_.get())->RemoveObsoleteEntries(oldest_active_start_timestamp); static_cast(label_property_index_.get()) @@ -50,4 +65,8 @@ Indices::Indices(const Config &config, StorageMode storage_mode) { }); } +Indices::IndexStats Indices::Analysis() const { + return {static_cast(label_index_.get())->Analysis(), + static_cast(label_property_index_.get())->Analysis()}; +} } // namespace memgraph::storage diff --git a/src/storage/v2/indices/indices.hpp b/src/storage/v2/indices/indices.hpp index 9a71107cd..33bd429e6 100644 --- a/src/storage/v2/indices/indices.hpp +++ b/src/storage/v2/indices/indices.hpp @@ -12,6 +12,9 @@ #pragma once #include +#include + +#include "storage/v2/id_types.hpp" #include "storage/v2/indices/label_index.hpp" #include "storage/v2/indices/label_property_index.hpp" #include "storage/v2/storage_mode.hpp" @@ -32,6 +35,20 @@ struct Indices { /// TODO: unused in disk indices void RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp) const; + /// Surgical removal of entries that was inserted this transaction + /// TODO: unused in disk indices + void AbortEntries(LabelId labelId, std::span vertices, uint64_t exact_start_timestamp) const; + void AbortEntries(PropertyId property, std::span const> vertices, + uint64_t exact_start_timestamp) const; + void AbortEntries(LabelId label, std::span const> vertices, + uint64_t exact_start_timestamp) const; + + struct IndexStats { + std::vector label; + LabelPropertyIndex::IndexStats property_label; + }; + IndexStats Analysis() const; + // Indices are updated whenever an update occurs, instead of only on commit or // advance command. This is necessary because we want indices to support `NEW` // view for use in Merge. diff --git a/src/storage/v2/indices/indices_utils.hpp b/src/storage/v2/indices/indices_utils.hpp index 0caad6686..59b492ba3 100644 --- a/src/storage/v2/indices/indices_utils.hpp +++ b/src/storage/v2/indices/indices_utils.hpp @@ -11,6 +11,7 @@ #include #include "storage/v2/delta.hpp" +#include "storage/v2/durability/recovery_type.hpp" #include "storage/v2/mvcc.hpp" #include "storage/v2/transaction.hpp" #include "storage/v2/vertex.hpp" @@ -20,9 +21,6 @@ namespace memgraph::storage { -using ParallelizedIndexCreationInfo = - std::pair> /*vertex_recovery_info*/, uint64_t /*thread_count*/>; - /// Traverses deltas visible from transaction with start timestamp greater than /// the provided timestamp, and calls the provided callback function for each /// delta. If the callback ever returns true, traversal is stopped and the @@ -259,11 +257,12 @@ inline void CreateIndexOnSingleThread(utils::SkipList::Accessor &vertice template inline void CreateIndexOnMultipleThreads(utils::SkipList::Accessor &vertices, TSKiplistIter skiplist_iter, TIndex &index, TIndexKey key, - const ParallelizedIndexCreationInfo ¶llel_exec_info, const TFunc &func) { + const durability::ParallelizedSchemaCreationInfo ¶llel_exec_info, + const TFunc &func) { utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; - const auto &vertex_batches = parallel_exec_info.first; - const auto thread_count = std::min(parallel_exec_info.second, vertex_batches.size()); + const auto &vertex_batches = parallel_exec_info.vertex_recovery_info; + const auto thread_count = std::min(parallel_exec_info.thread_count, vertex_batches.size()); MG_ASSERT(!vertex_batches.empty(), "The size of batches should always be greater than zero if you want to use the parallel version of index " diff --git a/src/storage/v2/indices/label_property_index.hpp b/src/storage/v2/indices/label_property_index.hpp index 84908b3e9..e8389fbea 100644 --- a/src/storage/v2/indices/label_property_index.hpp +++ b/src/storage/v2/indices/label_property_index.hpp @@ -19,6 +19,11 @@ namespace memgraph::storage { class LabelPropertyIndex { public: + struct IndexStats { + std::map> l2p; + std::map> p2l; + }; + LabelPropertyIndex() = default; LabelPropertyIndex(const LabelPropertyIndex &) = delete; LabelPropertyIndex(LabelPropertyIndex &&) = delete; diff --git a/src/storage/v2/inmemory/label_index.cpp b/src/storage/v2/inmemory/label_index.cpp index 31c3634be..b833c97ff 100644 --- a/src/storage/v2/inmemory/label_index.cpp +++ b/src/storage/v2/inmemory/label_index.cpp @@ -10,8 +10,12 @@ // licenses/APL.txt. #include "storage/v2/inmemory/label_index.hpp" + +#include + #include "storage/v2/constraints/constraints.hpp" #include "storage/v2/indices/indices_utils.hpp" +#include "storage/v2/inmemory/storage.hpp" namespace memgraph::storage { @@ -22,8 +26,9 @@ void InMemoryLabelIndex::UpdateOnAddLabel(LabelId added_label, Vertex *vertex_af acc.insert(Entry{vertex_after_update, tx.start_timestamp}); } -bool InMemoryLabelIndex::CreateIndex(LabelId label, utils::SkipList::Accessor vertices, - const std::optional ¶llel_exec_info) { +bool InMemoryLabelIndex::CreateIndex( + LabelId label, utils::SkipList::Accessor vertices, + const std::optional ¶llel_exec_info) { const auto create_index_seq = [this](LabelId label, utils::SkipList::Accessor &vertices, std::map>::iterator it) { using IndexAccessor = decltype(it->second.access()); @@ -38,7 +43,7 @@ bool InMemoryLabelIndex::CreateIndex(LabelId label, utils::SkipList::Acc const auto create_index_par = [this](LabelId label, utils::SkipList::Accessor &vertices, std::map>::iterator label_it, - const ParallelizedIndexCreationInfo ¶llel_exec_info) { + const durability::ParallelizedSchemaCreationInfo ¶llel_exec_info) { using IndexAccessor = decltype(label_it->second.access()); CreateIndexOnMultipleThreads(vertices, label_it, index_, label, parallel_exec_info, @@ -96,9 +101,23 @@ void InMemoryLabelIndex::RemoveObsoleteEntries(uint64_t oldest_active_start_time } } -InMemoryLabelIndex::Iterable::Iterable(utils::SkipList::Accessor index_accessor, LabelId label, View view, - Storage *storage, Transaction *transaction) - : index_accessor_(std::move(index_accessor)), +void InMemoryLabelIndex::AbortEntries(LabelId labelId, std::span vertices, + uint64_t exact_start_timestamp) { + auto const it = index_.find(labelId); + if (it == index_.end()) return; + + auto &label_storage = it->second; + auto vertices_acc = label_storage.access(); + for (auto *vertex : vertices) { + vertices_acc.remove(Entry{vertex, exact_start_timestamp}); + } +} + +InMemoryLabelIndex::Iterable::Iterable(utils::SkipList::Accessor index_accessor, + utils::SkipList::ConstAccessor vertices_accessor, LabelId label, + View view, Storage *storage, Transaction *transaction) + : pin_accessor_(std::move(vertices_accessor)), + index_accessor_(std::move(index_accessor)), label_(label), view_(view), storage_(storage), @@ -147,9 +166,21 @@ void InMemoryLabelIndex::RunGC() { InMemoryLabelIndex::Iterable InMemoryLabelIndex::Vertices(LabelId label, View view, Storage *storage, Transaction *transaction) { + DMG_ASSERT(storage->storage_mode_ == StorageMode::IN_MEMORY_TRANSACTIONAL || + storage->storage_mode_ == StorageMode::IN_MEMORY_ANALYTICAL, + "LabelIndex trying to access InMemory vertices from OnDisk!"); + auto vertices_acc = static_cast(storage)->vertices_.access(); const auto it = index_.find(label); MG_ASSERT(it != index_.end(), "Index for label {} doesn't exist", label.AsUint()); - return {it->second.access(), label, view, storage, transaction}; + return {it->second.access(), std::move(vertices_acc), label, view, storage, transaction}; +} + +InMemoryLabelIndex::Iterable InMemoryLabelIndex::Vertices( + LabelId label, memgraph::utils::SkipList::ConstAccessor vertices_acc, View view, + Storage *storage, Transaction *transaction) { + const auto it = index_.find(label); + MG_ASSERT(it != index_.end(), "Index for label {} doesn't exist", label.AsUint()); + return {it->second.access(), std::move(vertices_acc), label, view, storage, transaction}; } void InMemoryLabelIndex::SetIndexStats(const storage::LabelId &label, const storage::LabelIndexStats &stats) { @@ -187,4 +218,12 @@ bool InMemoryLabelIndex::DeleteIndexStats(const storage::LabelId &label) { return false; } +std::vector InMemoryLabelIndex::Analysis() const { + std::vector res; + res.reserve(index_.size()); + for (const auto &[label, _] : index_) { + res.emplace_back(label); + } + return res; +} } // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/label_index.hpp b/src/storage/v2/inmemory/label_index.hpp index 7d606574b..2411f0ba1 100644 --- a/src/storage/v2/inmemory/label_index.hpp +++ b/src/storage/v2/inmemory/label_index.hpp @@ -11,7 +11,10 @@ #pragma once +#include + #include "storage/v2/constraints/constraints.hpp" +#include "storage/v2/durability/recovery_type.hpp" #include "storage/v2/indices/label_index.hpp" #include "storage/v2/indices/label_index_stats.hpp" #include "storage/v2/vertex.hpp" @@ -20,9 +23,6 @@ namespace memgraph::storage { -using ParallelizedIndexCreationInfo = - std::pair> /*vertex_recovery_info*/, uint64_t /*thread_count*/>; - class InMemoryLabelIndex : public storage::LabelIndex { private: struct Entry { @@ -45,7 +45,7 @@ class InMemoryLabelIndex : public storage::LabelIndex { /// @throw std::bad_alloc bool CreateIndex(LabelId label, utils::SkipList::Accessor vertices, - const std::optional ¶llel_exec_info); + const std::optional ¶llel_exec_info); /// Returns false if there was no index to drop bool DropIndex(LabelId label) override; @@ -56,10 +56,15 @@ class InMemoryLabelIndex : public storage::LabelIndex { void RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp); + /// Surgical removal of entries that was inserted this transaction + void AbortEntries(LabelId labelId, std::span vertices, uint64_t exact_start_timestamp); + + std::vector Analysis() const; + class Iterable { public: - Iterable(utils::SkipList::Accessor index_accessor, LabelId label, View view, Storage *storage, - Transaction *transaction); + Iterable(utils::SkipList::Accessor index_accessor, utils::SkipList::ConstAccessor vertices_accessor, + LabelId label, View view, Storage *storage, Transaction *transaction); class Iterator { public: @@ -85,6 +90,7 @@ class InMemoryLabelIndex : public storage::LabelIndex { Iterator end() { return {this, index_accessor_.end()}; } private: + utils::SkipList::ConstAccessor pin_accessor_; utils::SkipList::Accessor index_accessor_; LabelId label_; View view_; @@ -98,6 +104,9 @@ class InMemoryLabelIndex : public storage::LabelIndex { Iterable Vertices(LabelId label, View view, Storage *storage, Transaction *transaction); + Iterable Vertices(LabelId label, memgraph::utils::SkipList::ConstAccessor vertices_acc, + View view, Storage *storage, Transaction *transaction); + void SetIndexStats(const storage::LabelId &label, const storage::LabelIndexStats &stats); std::optional GetIndexStats(const storage::LabelId &label) const; diff --git a/src/storage/v2/inmemory/label_property_index.cpp b/src/storage/v2/inmemory/label_property_index.cpp index fa5cce444..c8333fb95 100644 --- a/src/storage/v2/inmemory/label_property_index.cpp +++ b/src/storage/v2/inmemory/label_property_index.cpp @@ -12,6 +12,8 @@ #include "storage/v2/inmemory/label_property_index.hpp" #include "storage/v2/constraints/constraints.hpp" #include "storage/v2/indices/indices_utils.hpp" +#include "storage/v2/inmemory/storage.hpp" +#include "utils/logging.hpp" namespace memgraph::storage { @@ -33,9 +35,9 @@ bool InMemoryLabelPropertyIndex::Entry::operator<(const PropertyValue &rhs) cons bool InMemoryLabelPropertyIndex::Entry::operator==(const PropertyValue &rhs) const { return value == rhs; } -bool InMemoryLabelPropertyIndex::CreateIndex(LabelId label, PropertyId property, - utils::SkipList::Accessor vertices, - const std::optional ¶llel_exec_info) { +bool InMemoryLabelPropertyIndex::CreateIndex( + LabelId label, PropertyId property, utils::SkipList::Accessor vertices, + const std::optional ¶llel_exec_info) { spdlog::trace("Vertices size when creating index: {}", vertices.size()); auto create_index_seq = [this](LabelId label, PropertyId property, utils::SkipList::Accessor &vertices, std::map, utils::SkipList>::iterator it) { @@ -52,7 +54,7 @@ bool InMemoryLabelPropertyIndex::CreateIndex(LabelId label, PropertyId property, auto create_index_par = [this](LabelId label, PropertyId property, utils::SkipList::Accessor &vertices, std::map, utils::SkipList>::iterator label_property_it, - const ParallelizedIndexCreationInfo ¶llel_exec_info) { + const durability::ParallelizedSchemaCreationInfo ¶llel_exec_info) { using IndexAccessor = decltype(label_property_it->second.access()); CreateIndexOnMultipleThreads( @@ -101,11 +103,12 @@ void InMemoryLabelPropertyIndex::UpdateOnSetProperty(PropertyId property, const return; } - if (!indices_by_property_.contains(property)) { + auto index = indices_by_property_.find(property); + if (index == indices_by_property_.end()) { return; } - for (const auto &[_, storage] : indices_by_property_.at(property)) { + for (const auto &[_, storage] : index->second) { auto acc = storage->access(); acc.insert(Entry{value, vertex, tx.start_timestamp}); } @@ -220,12 +223,14 @@ const PropertyValue kSmallestMap = PropertyValue(std::map(0), std::numeric_limits::min()}); -InMemoryLabelPropertyIndex::Iterable::Iterable(utils::SkipList::Accessor index_accessor, LabelId label, +InMemoryLabelPropertyIndex::Iterable::Iterable(utils::SkipList::Accessor index_accessor, + utils::SkipList::ConstAccessor vertices_accessor, LabelId label, PropertyId property, const std::optional> &lower_bound, const std::optional> &upper_bound, View view, Storage *storage, Transaction *transaction) - : index_accessor_(std::move(index_accessor)), + : pin_accessor_(std::move(vertices_accessor)), + index_accessor_(std::move(index_accessor)), label_(label), property_(property), lower_bound_(lower_bound), @@ -428,9 +433,57 @@ InMemoryLabelPropertyIndex::Iterable InMemoryLabelPropertyIndex::Vertices( LabelId label, PropertyId property, const std::optional> &lower_bound, const std::optional> &upper_bound, View view, Storage *storage, Transaction *transaction) { + DMG_ASSERT(storage->storage_mode_ == StorageMode::IN_MEMORY_TRANSACTIONAL || + storage->storage_mode_ == StorageMode::IN_MEMORY_ANALYTICAL, + "PropertyLabel index trying to access InMemory vertices from OnDisk!"); + auto vertices_acc = static_cast(storage)->vertices_.access(); auto it = index_.find({label, property}); MG_ASSERT(it != index_.end(), "Index for label {} and property {} doesn't exist", label.AsUint(), property.AsUint()); - return {it->second.access(), label, property, lower_bound, upper_bound, view, storage, transaction}; + return {it->second.access(), std::move(vertices_acc), label, property, lower_bound, upper_bound, view, storage, + transaction}; } +InMemoryLabelPropertyIndex::Iterable InMemoryLabelPropertyIndex::Vertices( + LabelId label, PropertyId property, + memgraph::utils::SkipList::ConstAccessor vertices_acc, + const std::optional> &lower_bound, + const std::optional> &upper_bound, View view, Storage *storage, + Transaction *transaction) { + auto it = index_.find({label, property}); + MG_ASSERT(it != index_.end(), "Index for label {} and property {} doesn't exist", label.AsUint(), property.AsUint()); + return {it->second.access(), std::move(vertices_acc), label, property, lower_bound, upper_bound, view, storage, + transaction}; +} + +void InMemoryLabelPropertyIndex::AbortEntries(PropertyId property, + std::span const> vertices, + uint64_t exact_start_timestamp) { + auto const it = indices_by_property_.find(property); + if (it == indices_by_property_.end()) return; + + auto &indices = it->second; + for (const auto &[_, index] : indices) { + auto index_acc = index->access(); + for (auto const &[value, vertex] : vertices) { + index_acc.remove(Entry{value, vertex, exact_start_timestamp}); + } + } +} + +void InMemoryLabelPropertyIndex::AbortEntries(LabelId label, + std::span const> vertices, + uint64_t exact_start_timestamp) { + for (auto &[label_prop, storage] : index_) { + if (label_prop.first != label) { + continue; + } + + auto index_acc = storage.access(); + for (const auto &[property, vertex] : vertices) { + if (!property.IsNull()) { + index_acc.remove(Entry{property, vertex, exact_start_timestamp}); + } + } + } +} } // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/label_property_index.hpp b/src/storage/v2/inmemory/label_property_index.hpp index 7f8c54909..8bc4148bb 100644 --- a/src/storage/v2/inmemory/label_property_index.hpp +++ b/src/storage/v2/inmemory/label_property_index.hpp @@ -11,18 +11,19 @@ #pragma once +#include + #include "storage/v2/constraints/constraints.hpp" +#include "storage/v2/durability/recovery_type.hpp" +#include "storage/v2/id_types.hpp" #include "storage/v2/indices/label_property_index.hpp" #include "storage/v2/indices/label_property_index_stats.hpp" +#include "storage/v2/property_value.hpp" #include "utils/rw_lock.hpp" #include "utils/synchronized.hpp" namespace memgraph::storage { -/// TODO: andi. Too many copies, extract at one place -using ParallelizedIndexCreationInfo = - std::pair> /*vertex_recovery_info*/, uint64_t /*thread_count*/>; - class InMemoryLabelPropertyIndex : public storage::LabelPropertyIndex { private: struct Entry { @@ -42,7 +43,7 @@ class InMemoryLabelPropertyIndex : public storage::LabelPropertyIndex { /// @throw std::bad_alloc bool CreateIndex(LabelId label, PropertyId property, utils::SkipList::Accessor vertices, - const std::optional ¶llel_exec_info); + const std::optional ¶llel_exec_info); /// @throw std::bad_alloc void UpdateOnAddLabel(LabelId added_label, Vertex *vertex_after_update, const Transaction &tx) override; @@ -61,10 +62,25 @@ class InMemoryLabelPropertyIndex : public storage::LabelPropertyIndex { void RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp); + void AbortEntries(PropertyId property, std::span const> vertices, + uint64_t exact_start_timestamp); + void AbortEntries(LabelId label, std::span const> vertices, + uint64_t exact_start_timestamp); + + IndexStats Analysis() const { + IndexStats res{}; + for (const auto &[lp, _] : index_) { + const auto &[label, property] = lp; + res.l2p[label].emplace_back(property); + res.p2l[property].emplace_back(label); + } + return res; + } + class Iterable { public: - Iterable(utils::SkipList::Accessor index_accessor, LabelId label, PropertyId property, - const std::optional> &lower_bound, + Iterable(utils::SkipList::Accessor index_accessor, utils::SkipList::ConstAccessor vertices_accessor, + LabelId label, PropertyId property, const std::optional> &lower_bound, const std::optional> &upper_bound, View view, Storage *storage, Transaction *transaction); @@ -92,6 +108,7 @@ class InMemoryLabelPropertyIndex : public storage::LabelPropertyIndex { Iterator end(); private: + utils::SkipList::ConstAccessor pin_accessor_; utils::SkipList::Accessor index_accessor_; LabelId label_; PropertyId property_; @@ -131,6 +148,12 @@ class InMemoryLabelPropertyIndex : public storage::LabelPropertyIndex { const std::optional> &upper_bound, View view, Storage *storage, Transaction *transaction); + Iterable Vertices(LabelId label, PropertyId property, + memgraph::utils::SkipList::ConstAccessor vertices_acc, + const std::optional> &lower_bound, + const std::optional> &upper_bound, View view, Storage *storage, + Transaction *transaction); + private: std::map, utils::SkipList> index_; std::unordered_map *>> indices_by_property_; diff --git a/src/storage/v2/inmemory/replication/recovery.cpp b/src/storage/v2/inmemory/replication/recovery.cpp new file mode 100644 index 000000000..536c7c8fc --- /dev/null +++ b/src/storage/v2/inmemory/replication/recovery.cpp @@ -0,0 +1,240 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "storage/v2/inmemory/replication/recovery.hpp" +#include +#include +#include +#include +#include "storage/v2/durability/durability.hpp" +#include "storage/v2/inmemory/storage.hpp" +#include "storage/v2/replication/recovery.hpp" +#include "utils/on_scope_exit.hpp" +#include "utils/variant_helpers.hpp" + +namespace memgraph::storage { + +// Handler for transferring the current WAL file whose data is +// contained in the internal buffer and the file. +class InMemoryCurrentWalHandler { + public: + explicit InMemoryCurrentWalHandler(InMemoryStorage const *storage, rpc::Client &rpc_client); + void AppendFilename(const std::string &filename); + + void AppendSize(size_t size); + + void AppendFileData(utils::InputFile *file); + + void AppendBufferData(const uint8_t *buffer, size_t buffer_size); + + /// @throw rpc::RpcFailedException + replication::CurrentWalRes Finalize(); + + private: + rpc::Client::StreamHandler stream_; +}; + +////// CurrentWalHandler ////// +InMemoryCurrentWalHandler::InMemoryCurrentWalHandler(InMemoryStorage const *storage, rpc::Client &rpc_client) + : stream_(rpc_client.Stream(storage->id())) {} + +void InMemoryCurrentWalHandler::AppendFilename(const std::string &filename) { + replication::Encoder encoder(stream_.GetBuilder()); + encoder.WriteString(filename); +} + +void InMemoryCurrentWalHandler::AppendSize(const size_t size) { + replication::Encoder encoder(stream_.GetBuilder()); + encoder.WriteUint(size); +} + +void InMemoryCurrentWalHandler::AppendFileData(utils::InputFile *file) { + replication::Encoder encoder(stream_.GetBuilder()); + encoder.WriteFileData(file); +} + +void InMemoryCurrentWalHandler::AppendBufferData(const uint8_t *buffer, const size_t buffer_size) { + replication::Encoder encoder(stream_.GetBuilder()); + encoder.WriteBuffer(buffer, buffer_size); +} + +replication::CurrentWalRes InMemoryCurrentWalHandler::Finalize() { return stream_.AwaitResponse(); } + +////// ReplicationClient Helpers ////// +replication::WalFilesRes TransferWalFiles(std::string db_name, rpc::Client &client, + const std::vector &wal_files) { + MG_ASSERT(!wal_files.empty(), "Wal files list is empty!"); + auto stream = client.Stream(std::move(db_name), wal_files.size()); + replication::Encoder encoder(stream.GetBuilder()); + for (const auto &wal : wal_files) { + spdlog::debug("Sending wal file: {}", wal); + encoder.WriteFile(wal); + } + return stream.AwaitResponse(); +} + +replication::SnapshotRes TransferSnapshot(std::string db_name, rpc::Client &client, const std::filesystem::path &path) { + auto stream = client.Stream(std::move(db_name)); + replication::Encoder encoder(stream.GetBuilder()); + encoder.WriteFile(path); + return stream.AwaitResponse(); +} + +uint64_t ReplicateCurrentWal(const InMemoryStorage *storage, rpc::Client &client, durability::WalFile const &wal_file) { + InMemoryCurrentWalHandler stream{storage, client}; + stream.AppendFilename(wal_file.Path().filename()); + utils::InputFile file; + MG_ASSERT(file.Open(wal_file.Path()), "Failed to open current WAL file at {}!", wal_file.Path()); + const auto [buffer, buffer_size] = wal_file.CurrentFileBuffer(); + stream.AppendSize(file.GetSize() + buffer_size); + stream.AppendFileData(&file); + stream.AppendBufferData(buffer, buffer_size); + auto response = stream.Finalize(); + return response.current_commit_timestamp; +} + +/// This method tries to find the optimal path for recoverying a single replica. +/// Based on the last commit transfered to replica it tries to update the +/// replica using durability files - WALs and Snapshots. WAL files are much +/// smaller in size as they contain only the Deltas (changes) made during the +/// transactions while Snapshots contain all the data. For that reason we prefer +/// WALs as much as possible. As the WAL file that is currently being updated +/// can change during the process we ignore it as much as possible. Also, it +/// uses the transaction lock so locking it can be really expensive. After we +/// fetch the list of finalized WALs, we try to find the longest chain of +/// sequential WALs, starting from the latest one, that will update the recovery +/// with the all missed updates. If the WAL chain cannot be created, replica is +/// behind by a lot, so we use the regular recovery process, we send the latest +/// snapshot and all the necessary WAL files, starting from the newest WAL that +/// contains a timestamp before the snapshot. If we registered the existence of +/// the current WAL, we add the sequence number we read from it to the recovery +/// process. After all the other steps are finished, if the current WAL contains +/// the same sequence number, it's the same WAL we read while fetching the +/// recovery steps, so we can safely send it to the replica. +/// We assume that the property of preserving at least 1 WAL before the snapshot +/// is satisfied as we extract the timestamp information from it. +std::vector GetRecoverySteps(uint64_t replica_commit, utils::FileRetainer::FileLocker *file_locker, + const InMemoryStorage *storage) { + std::vector recovery_steps; + auto locker_acc = file_locker->Access(); + + // First check if we can recover using the current wal file only + // otherwise save the seq_num of the current wal file + // This lock is also necessary to force the missed transaction to finish. + std::optional current_wal_seq_num; + std::optional current_wal_from_timestamp; + + std::unique_lock transaction_guard( + storage->engine_lock_); // Hold the storage lock so the current wal file cannot be changed + (void)locker_acc.AddPath(storage->recovery_.wal_directory_); // Protect all WALs from being deleted + + if (storage->wal_file_) { + current_wal_seq_num.emplace(storage->wal_file_->SequenceNumber()); + current_wal_from_timestamp.emplace(storage->wal_file_->FromTimestamp()); + // No need to hold the lock since the current WAL is present and we can simply skip them + transaction_guard.unlock(); + } + + // Read in finalized WAL files (excluding the current/active WAL) + utils::OnScopeExit + release_wal_dir( // Each individually used file will be locked, so at the end, the dir can be released + [&locker_acc, &wal_dir = storage->recovery_.wal_directory_]() { (void)locker_acc.RemovePath(wal_dir); }); + // Get WAL files, ordered by timestamp, from oldest to newest + auto wal_files = durability::GetWalFiles(storage->recovery_.wal_directory_, storage->uuid_, current_wal_seq_num); + MG_ASSERT(wal_files, "Wal files could not be loaded"); + if (transaction_guard.owns_lock()) + transaction_guard.unlock(); // In case we didn't have a current wal file, we can unlock only now since there is no + // guarantee what we'll see after we add the wal file + + // Read in snapshot files + (void)locker_acc.AddPath(storage->recovery_.snapshot_directory_); // Protect all snapshots from being deleted + utils::OnScopeExit + release_snapshot_dir( // Each individually used file will be locked, so at the end, the dir can be released + [&locker_acc, &snapshot_dir = storage->recovery_.snapshot_directory_]() { + (void)locker_acc.RemovePath(snapshot_dir); + }); + auto snapshot_files = durability::GetSnapshotFiles(storage->recovery_.snapshot_directory_, storage->uuid_); + std::optional latest_snapshot{}; + if (!snapshot_files.empty()) { + latest_snapshot.emplace(std::move(snapshot_files.back())); + } + + auto add_snapshot = [&]() { + if (!latest_snapshot) return; + const auto lock_success = locker_acc.AddPath(latest_snapshot->path); + MG_ASSERT(!lock_success.HasError(), "Tried to lock a nonexistant snapshot path."); + recovery_steps.emplace_back(std::in_place_type_t{}, std::move(latest_snapshot->path)); + }; + + // Check if we need the snapshot or if the WAL chain is enough + if (!wal_files->empty()) { + // Find WAL chain that contains the replica's commit timestamp + auto wal_chain_it = wal_files->rbegin(); + auto prev_seq{wal_chain_it->seq_num}; + for (; wal_chain_it != wal_files->rend(); ++wal_chain_it) { + if (prev_seq - wal_chain_it->seq_num > 1) { + // Broken chain, must have a snapshot that covers the missing commits + if (wal_chain_it->from_timestamp > replica_commit) { + // Chain does not go far enough, check the snapshot + MG_ASSERT(latest_snapshot, "Missing snapshot, while the WAL chain does not cover enough time."); + // Check for a WAL file that connects the snapshot to the chain + for (;; --wal_chain_it) { + // Going from the newest WAL files, find the first one that has a from_timestamp older than the snapshot + // NOTE: It could be that the only WAL needed is the current one + if (wal_chain_it->from_timestamp <= latest_snapshot->start_timestamp) { + break; + } + if (wal_chain_it == wal_files->rbegin()) break; + } + // Add snapshot to recovery steps + add_snapshot(); + } + break; + } + + if (wal_chain_it->to_timestamp <= replica_commit) { + // Got to a WAL that is older than what we need to recover the replica + break; + } + + prev_seq = wal_chain_it->seq_num; + } + + // Copy and lock the chain part we need, from oldest to newest + RecoveryWals rw{}; + rw.reserve(std::distance(wal_files->rbegin(), wal_chain_it)); + for (auto wal_it = wal_chain_it.base(); wal_it != wal_files->end(); ++wal_it) { + const auto lock_success = locker_acc.AddPath(wal_it->path); + MG_ASSERT(!lock_success.HasError(), "Tried to lock a nonexistant WAL path."); + rw.emplace_back(std::move(wal_it->path)); + } + if (!rw.empty()) { + recovery_steps.emplace_back(std::in_place_type_t{}, std::move(rw)); + } + + } else { + // No WAL chain, check if we need the snapshot + if (!current_wal_from_timestamp || replica_commit < *current_wal_from_timestamp) { + // No current wal or current wal too new + add_snapshot(); + } + } + + // In all cases, if we have a current wal file we need to use itW + if (current_wal_seq_num) { + // NOTE: File not handled directly, so no need to lock it + recovery_steps.emplace_back(RecoveryCurrentWal{*current_wal_seq_num}); + } + + return recovery_steps; +} + +} // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/replication/recovery.hpp b/src/storage/v2/inmemory/replication/recovery.hpp new file mode 100644 index 000000000..2025800ab --- /dev/null +++ b/src/storage/v2/inmemory/replication/recovery.hpp @@ -0,0 +1,32 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. +#pragma once + +#include "storage/v2/durability/durability.hpp" +#include "storage/v2/replication/recovery.hpp" +#include "storage/v2/replication/replication_client.hpp" + +namespace memgraph::storage { +class InMemoryStorage; + +////// ReplicationClient Helpers ////// + +replication::WalFilesRes TransferWalFiles(std::string db_name, rpc::Client &client, + const std::vector &wal_files); + +replication::SnapshotRes TransferSnapshot(std::string db_name, rpc::Client &client, const std::filesystem::path &path); + +uint64_t ReplicateCurrentWal(const InMemoryStorage *storage, rpc::Client &client, durability::WalFile const &wal_file); + +auto GetRecoverySteps(uint64_t replica_commit, utils::FileRetainer::FileLocker *file_locker, + const InMemoryStorage *storage) -> std::vector; + +} // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/replication/replication_client.cpp b/src/storage/v2/inmemory/replication/replication_client.cpp deleted file mode 100644 index b8ecc1c72..000000000 --- a/src/storage/v2/inmemory/replication/replication_client.cpp +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright 2023 Memgraph Ltd. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source -// License, and you may not use this file except in compliance with the Business Source License. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -#include "storage/v2/inmemory/replication/replication_client.hpp" - -#include "storage/v2/durability/durability.hpp" -#include "storage/v2/inmemory/storage.hpp" - -namespace memgraph::storage { - -namespace { -template -[[maybe_unused]] inline constexpr bool always_false_v = false; -} // namespace - -// Handler for transfering the current WAL file whose data is -// contained in the internal buffer and the file. -class CurrentWalHandler { - public: - explicit CurrentWalHandler(ReplicationClient *self); - void AppendFilename(const std::string &filename); - - void AppendSize(size_t size); - - void AppendFileData(utils::InputFile *file); - - void AppendBufferData(const uint8_t *buffer, size_t buffer_size); - - /// @throw rpc::RpcFailedException - replication::CurrentWalRes Finalize(); - - private: - ReplicationClient *self_; - rpc::Client::StreamHandler stream_; -}; - -////// CurrentWalHandler ////// -CurrentWalHandler::CurrentWalHandler(ReplicationClient *self) - : self_(self), stream_(self_->rpc_client_.Stream(self->GetStorageId())) {} - -void CurrentWalHandler::AppendFilename(const std::string &filename) { - replication::Encoder encoder(stream_.GetBuilder()); - encoder.WriteString(filename); -} - -void CurrentWalHandler::AppendSize(const size_t size) { - replication::Encoder encoder(stream_.GetBuilder()); - encoder.WriteUint(size); -} - -void CurrentWalHandler::AppendFileData(utils::InputFile *file) { - replication::Encoder encoder(stream_.GetBuilder()); - encoder.WriteFileData(file); -} - -void CurrentWalHandler::AppendBufferData(const uint8_t *buffer, const size_t buffer_size) { - replication::Encoder encoder(stream_.GetBuilder()); - encoder.WriteBuffer(buffer, buffer_size); -} - -replication::CurrentWalRes CurrentWalHandler::Finalize() { return stream_.AwaitResponse(); } - -////// ReplicationClient Helpers ////// - -replication::WalFilesRes TransferWalFiles(std::string db_name, rpc::Client &client, - const std::vector &wal_files) { - MG_ASSERT(!wal_files.empty(), "Wal files list is empty!"); - auto stream = client.Stream(std::move(db_name), wal_files.size()); - replication::Encoder encoder(stream.GetBuilder()); - for (const auto &wal : wal_files) { - spdlog::debug("Sending wal file: {}", wal); - encoder.WriteFile(wal); - } - return stream.AwaitResponse(); -} - -replication::SnapshotRes TransferSnapshot(std::string db_name, rpc::Client &client, const std::filesystem::path &path) { - auto stream = client.Stream(std::move(db_name)); - replication::Encoder encoder(stream.GetBuilder()); - encoder.WriteFile(path); - return stream.AwaitResponse(); -} - -uint64_t ReplicateCurrentWal(CurrentWalHandler &stream, durability::WalFile const &wal_file) { - stream.AppendFilename(wal_file.Path().filename()); - utils::InputFile file; - MG_ASSERT(file.Open(wal_file.Path()), "Failed to open current WAL file!"); - const auto [buffer, buffer_size] = wal_file.CurrentFileBuffer(); - stream.AppendSize(file.GetSize() + buffer_size); - stream.AppendFileData(&file); - stream.AppendBufferData(buffer, buffer_size); - auto response = stream.Finalize(); - return response.current_commit_timestamp; -} - -////// ReplicationClient ////// - -InMemoryReplicationClient::InMemoryReplicationClient(InMemoryStorage *storage, - const memgraph::replication::ReplicationClientConfig &config, - const memgraph::replication::ReplicationEpoch *epoch) - : ReplicationClient{storage, config, epoch} {} - -void InMemoryReplicationClient::RecoverReplica(uint64_t replica_commit) { - spdlog::debug("Starting replica recover"); - auto *storage = static_cast(storage_); - while (true) { - auto file_locker = storage->file_retainer_.AddLocker(); - - const auto steps = GetRecoverySteps(replica_commit, &file_locker); - int i = 0; - for (const InMemoryReplicationClient::RecoveryStep &recovery_step : steps) { - spdlog::trace("Recovering in step: {}", i++); - try { - std::visit( - [&, this](T &&arg) { - using StepType = std::remove_cvref_t; - if constexpr (std::is_same_v) { // TODO: split into 3 overloads - spdlog::debug("Sending the latest snapshot file: {}", arg); - auto response = TransferSnapshot(storage->id(), rpc_client_, arg); - replica_commit = response.current_commit_timestamp; - } else if constexpr (std::is_same_v) { - spdlog::debug("Sending the latest wal files"); - auto response = TransferWalFiles(storage->id(), rpc_client_, arg); - replica_commit = response.current_commit_timestamp; - spdlog::debug("Wal files successfully transferred."); - } else if constexpr (std::is_same_v) { - std::unique_lock transaction_guard(storage->engine_lock_); - if (storage->wal_file_ && storage->wal_file_->SequenceNumber() == arg.current_wal_seq_num) { - storage->wal_file_->DisableFlushing(); - transaction_guard.unlock(); - spdlog::debug("Sending current wal file"); - auto streamHandler = CurrentWalHandler{this}; - replica_commit = ReplicateCurrentWal(streamHandler, *storage->wal_file_); - storage->wal_file_->EnableFlushing(); - } else { - spdlog::debug("Cannot recover using current wal file"); - } - } else { - static_assert(always_false_v, "Missing type from variant visitor"); - } - }, - recovery_step); - } catch (const rpc::RpcFailedException &) { - { - std::unique_lock client_guard{client_lock_}; - replica_state_.store(replication::ReplicaState::INVALID); - } - HandleRpcFailure(); - return; - } - } - - spdlog::trace("Current timestamp on replica: {}", replica_commit); - // To avoid the situation where we read a correct commit timestamp in - // one thread, and after that another thread commits a different a - // transaction and THEN we set the state to READY in the first thread, - // we set this lock before checking the timestamp. - // We will detect that the state is invalid during the next commit, - // because replication::AppendDeltasRpc sends the last commit timestamp which - // replica checks if it's the same last commit timestamp it received - // and we will go to recovery. - // By adding this lock, we can avoid that, and go to RECOVERY immediately. - std::unique_lock client_guard{client_lock_}; - const auto last_commit_timestamp = LastCommitTimestamp(); - SPDLOG_INFO("Replica timestamp: {}", replica_commit); - SPDLOG_INFO("Last commit: {}", last_commit_timestamp); - if (last_commit_timestamp == replica_commit) { - replica_state_.store(replication::ReplicaState::READY); - return; - } - } -} - -/// This method tries to find the optimal path for recoverying a single replica. -/// Based on the last commit transfered to replica it tries to update the -/// replica using durability files - WALs and Snapshots. WAL files are much -/// smaller in size as they contain only the Deltas (changes) made during the -/// transactions while Snapshots contain all the data. For that reason we prefer -/// WALs as much as possible. As the WAL file that is currently being updated -/// can change during the process we ignore it as much as possible. Also, it -/// uses the transaction lock so locking it can be really expensive. After we -/// fetch the list of finalized WALs, we try to find the longest chain of -/// sequential WALs, starting from the latest one, that will update the recovery -/// with the all missed updates. If the WAL chain cannot be created, replica is -/// behind by a lot, so we use the regular recovery process, we send the latest -/// snapshot and all the necessary WAL files, starting from the newest WAL that -/// contains a timestamp before the snapshot. If we registered the existence of -/// the current WAL, we add the sequence number we read from it to the recovery -/// process. After all the other steps are finished, if the current WAL contains -/// the same sequence number, it's the same WAL we read while fetching the -/// recovery steps, so we can safely send it to the replica. -/// We assume that the property of preserving at least 1 WAL before the snapshot -/// is satisfied as we extract the timestamp information from it. -std::vector InMemoryReplicationClient::GetRecoverySteps( - const uint64_t replica_commit, utils::FileRetainer::FileLocker *file_locker) { - // First check if we can recover using the current wal file only - // otherwise save the seq_num of the current wal file - // This lock is also necessary to force the missed transaction to finish. - std::optional current_wal_seq_num; - std::optional current_wal_from_timestamp; - auto *storage = static_cast(storage_); - if (std::unique_lock transtacion_guard(storage->engine_lock_); storage->wal_file_) { - current_wal_seq_num.emplace(storage->wal_file_->SequenceNumber()); - current_wal_from_timestamp.emplace(storage->wal_file_->FromTimestamp()); - } - - auto locker_acc = file_locker->Access(); - auto wal_files = durability::GetWalFiles(storage->wal_directory_, storage->uuid_, current_wal_seq_num); - MG_ASSERT(wal_files, "Wal files could not be loaded"); - - auto snapshot_files = durability::GetSnapshotFiles(storage->snapshot_directory_, storage->uuid_); - std::optional latest_snapshot; - if (!snapshot_files.empty()) { - std::sort(snapshot_files.begin(), snapshot_files.end()); - latest_snapshot.emplace(std::move(snapshot_files.back())); - } - - std::vector recovery_steps; - - // No finalized WAL files were found. This means the difference is contained - // inside the current WAL or the snapshot. - if (wal_files->empty()) { - if (current_wal_from_timestamp && replica_commit >= *current_wal_from_timestamp) { - MG_ASSERT(current_wal_seq_num); - recovery_steps.emplace_back(RecoveryCurrentWal{*current_wal_seq_num}); - return recovery_steps; - } - - // Without the finalized WAL containing the current timestamp of replica, - // we cannot know if the difference is only in the current WAL or we need - // to send the snapshot. - if (latest_snapshot) { - const auto lock_success = locker_acc.AddPath(latest_snapshot->path); - MG_ASSERT(!lock_success.HasError(), "Tried to lock a nonexistant path."); - recovery_steps.emplace_back(std::in_place_type_t{}, std::move(latest_snapshot->path)); - } - // if there are no finalized WAL files, snapshot left the current WAL - // as the WAL file containing a transaction before snapshot creation - // so we can be sure that the current WAL is present - MG_ASSERT(current_wal_seq_num); - recovery_steps.emplace_back(RecoveryCurrentWal{*current_wal_seq_num}); - return recovery_steps; - } - - // Find the longest chain of WALs for recovery. - // The chain consists ONLY of sequential WALs. - auto rwal_it = wal_files->rbegin(); - - // if the last finalized WAL is before the replica commit - // then we can recovery only from current WAL - if (rwal_it->to_timestamp <= replica_commit) { - MG_ASSERT(current_wal_seq_num); - recovery_steps.emplace_back(RecoveryCurrentWal{*current_wal_seq_num}); - return recovery_steps; - } - - uint64_t previous_seq_num{rwal_it->seq_num}; - for (; rwal_it != wal_files->rend(); ++rwal_it) { - // If the difference between two consecutive wal files is not 0 or 1 - // we have a missing WAL in our chain - if (previous_seq_num - rwal_it->seq_num > 1) { - break; - } - - // Find first WAL that contains up to replica commit, i.e. WAL - // that is before the replica commit or conatins the replica commit - // as the last committed transaction OR we managed to find the first WAL - // file. - if (replica_commit >= rwal_it->from_timestamp || rwal_it->seq_num == 0) { - if (replica_commit >= rwal_it->to_timestamp) { - // We want the WAL after because the replica already contains all the - // commits from this WAL - --rwal_it; - } - std::vector wal_chain; - auto distance_from_first = std::distance(rwal_it, wal_files->rend() - 1); - // We have managed to create WAL chain - // We need to lock these files and add them to the chain - for (auto result_wal_it = wal_files->begin() + distance_from_first; result_wal_it != wal_files->end(); - ++result_wal_it) { - const auto lock_success = locker_acc.AddPath(result_wal_it->path); - MG_ASSERT(!lock_success.HasError(), "Tried to lock a nonexistant path."); - wal_chain.push_back(std::move(result_wal_it->path)); - } - - recovery_steps.emplace_back(std::in_place_type_t{}, std::move(wal_chain)); - - if (current_wal_seq_num) { - recovery_steps.emplace_back(RecoveryCurrentWal{*current_wal_seq_num}); - } - return recovery_steps; - } - - previous_seq_num = rwal_it->seq_num; - } - - MG_ASSERT(latest_snapshot, "Invalid durability state, missing snapshot"); - // We didn't manage to find a WAL chain, we need to send the latest snapshot - // with its WALs - const auto lock_success = locker_acc.AddPath(latest_snapshot->path); - MG_ASSERT(!lock_success.HasError(), "Tried to lock a nonexistant path."); - recovery_steps.emplace_back(std::in_place_type_t{}, std::move(latest_snapshot->path)); - - std::vector recovery_wal_files; - auto wal_it = wal_files->begin(); - for (; wal_it != wal_files->end(); ++wal_it) { - // Assuming recovery process is correct the snashpot should - // always retain a single WAL that contains a transaction - // before its creation - if (latest_snapshot->start_timestamp < wal_it->to_timestamp) { - if (latest_snapshot->start_timestamp < wal_it->from_timestamp) { - MG_ASSERT(wal_it != wal_files->begin(), "Invalid durability files state"); - --wal_it; - } - break; - } - } - - for (; wal_it != wal_files->end(); ++wal_it) { - const auto lock_success = locker_acc.AddPath(wal_it->path); - MG_ASSERT(!lock_success.HasError(), "Tried to lock a nonexistant path."); - recovery_wal_files.push_back(std::move(wal_it->path)); - } - - // We only have a WAL before the snapshot - if (recovery_wal_files.empty()) { - const auto lock_success = locker_acc.AddPath(wal_files->back().path); - MG_ASSERT(!lock_success.HasError(), "Tried to lock a nonexistant path."); - recovery_wal_files.push_back(std::move(wal_files->back().path)); - } - - recovery_steps.emplace_back(std::in_place_type_t{}, std::move(recovery_wal_files)); - - if (current_wal_seq_num) { - recovery_steps.emplace_back(RecoveryCurrentWal{*current_wal_seq_num}); - } - - return recovery_steps; -} - -} // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/replication/replication_client.hpp b/src/storage/v2/inmemory/replication/replication_client.hpp deleted file mode 100644 index e956838e7..000000000 --- a/src/storage/v2/inmemory/replication/replication_client.hpp +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2023 Memgraph Ltd. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source -// License, and you may not use this file except in compliance with the Business Source License. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. -#pragma once - -#include "storage/v2/replication/replication_client.hpp" - -namespace memgraph::storage { - -class InMemoryStorage; - -class InMemoryReplicationClient : public ReplicationClient { - public: - InMemoryReplicationClient(InMemoryStorage *storage, const memgraph::replication::ReplicationClientConfig &config, - const memgraph::replication::ReplicationEpoch *epoch); - - protected: - void RecoverReplica(uint64_t replica_commit) override; - - // TODO: move the GetRecoverySteps stuff below as an internal detail - using RecoverySnapshot = std::filesystem::path; - using RecoveryWals = std::vector; - struct RecoveryCurrentWal { - explicit RecoveryCurrentWal(const uint64_t current_wal_seq_num) : current_wal_seq_num(current_wal_seq_num) {} - uint64_t current_wal_seq_num; - }; - using RecoveryStep = std::variant; - std::vector GetRecoverySteps(uint64_t replica_commit, utils::FileRetainer::FileLocker *file_locker); -}; - -} // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index 9f00081f6..08aa896bf 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -10,28 +10,65 @@ // licenses/APL.txt. #include "storage/v2/inmemory/storage.hpp" +#include +#include +#include #include "dbms/constants.hpp" #include "memory/global_memory_control.hpp" #include "storage/v2/durability/durability.hpp" #include "storage/v2/durability/snapshot.hpp" +#include "storage/v2/edge_direction.hpp" +#include "storage/v2/id_types.hpp" #include "storage/v2/metadata_delta.hpp" /// REPLICATION /// #include "dbms/inmemory/replication_handlers.hpp" -#include "storage/v2/inmemory/replication/replication_client.hpp" +#include "storage/v2/inmemory/replication/recovery.hpp" #include "storage/v2/inmemory/unique_constraints.hpp" +#include "storage/v2/property_value.hpp" #include "utils/resource_lock.hpp" #include "utils/stat.hpp" namespace memgraph::storage { +namespace { + +auto FindEdges(const View view, EdgeTypeId edge_type, const VertexAccessor *from_vertex, VertexAccessor *to_vertex) + -> Result { + auto use_out_edges = [](Vertex const *from_vertex, Vertex const *to_vertex) { + // Obtain the locks by `gid` order to avoid lock cycles. + auto guard_from = std::unique_lock{from_vertex->lock, std::defer_lock}; + auto guard_to = std::unique_lock{to_vertex->lock, std::defer_lock}; + if (from_vertex->gid < to_vertex->gid) { + guard_from.lock(); + guard_to.lock(); + } else if (from_vertex->gid > to_vertex->gid) { + guard_to.lock(); + guard_from.lock(); + } else { + // The vertices are the same vertex, only lock one. + guard_from.lock(); + } + + // With the potentially cheaper side FindEdges + const auto out_n = from_vertex->out_edges.size(); + const auto in_n = to_vertex->in_edges.size(); + return out_n <= in_n; + }; + + return use_out_edges(from_vertex->vertex_, to_vertex->vertex_) ? from_vertex->OutEdges(view, {edge_type}, to_vertex) + : to_vertex->InEdges(view, {edge_type}, from_vertex); +} + +}; // namespace + using OOMExceptionEnabler = utils::MemoryTracker::OutOfMemoryExceptionEnabler; InMemoryStorage::InMemoryStorage(Config config, StorageMode storage_mode) : Storage(config, storage_mode), - snapshot_directory_(config.durability.storage_directory / durability::kSnapshotDirectory), + recovery_{config.durability.storage_directory / durability::kSnapshotDirectory, + config.durability.storage_directory / durability::kWalDirectory}, lock_file_path_(config.durability.storage_directory / durability::kLockFile), - wal_directory_(config.durability.storage_directory / durability::kWalDirectory), uuid_(utils::GenerateUUID()), global_locker_(file_retainer_.AddLocker()) { MG_ASSERT(storage_mode != StorageMode::ON_DISK_TRANSACTIONAL, @@ -42,9 +79,9 @@ InMemoryStorage::InMemoryStorage(Config config, StorageMode storage_mode) // permission errors. This is done early to crash the database on startup // instead of crashing the database for the first time during runtime (which // could be an unpleasant surprise). - utils::EnsureDirOrDie(snapshot_directory_); + utils::EnsureDirOrDie(recovery_.snapshot_directory_); // Same reasoning as above. - utils::EnsureDirOrDie(wal_directory_); + utils::EnsureDirOrDie(recovery_.wal_directory_); // Verify that the user that started the process is the same user that is // the owner of the storage directory. @@ -62,9 +99,8 @@ InMemoryStorage::InMemoryStorage(Config config, StorageMode storage_mode) config_.durability.storage_directory); } if (config_.durability.recover_on_startup) { - auto info = - durability::RecoverData(snapshot_directory_, wal_directory_, &uuid_, repl_storage_state_, &vertices_, &edges_, - &edge_count_, name_id_mapper_.get(), &indices_, &constraints_, config_, &wal_seq_num_); + auto info = recovery_.RecoverData(&uuid_, repl_storage_state_, &vertices_, &edges_, &edge_count_, + name_id_mapper_.get(), &indices_, &constraints_, config_, &wal_seq_num_); if (info) { vertex_id_ = info->next_vertex_id; edge_id_ = info->next_edge_id; @@ -78,8 +114,8 @@ InMemoryStorage::InMemoryStorage(Config config, StorageMode storage_mode) bool files_moved = false; auto backup_root = config_.durability.storage_directory / durability::kBackupDirectory; for (const auto &[path, dirname, what] : - {std::make_tuple(snapshot_directory_, durability::kSnapshotDirectory, "snapshot"), - std::make_tuple(wal_directory_, durability::kWalDirectory, "WAL")}) { + {std::make_tuple(recovery_.snapshot_directory_, durability::kSnapshotDirectory, "snapshot"), + std::make_tuple(recovery_.wal_directory_, durability::kWalDirectory, "WAL")}) { if (!utils::DirExists(path)) continue; auto backup_curr = backup_root / dirname; std::error_code error_code; @@ -285,6 +321,9 @@ Result InMemoryStorage::InMemoryAccessor::CreateEdge(VertexAccesso if (to_vertex->deleted) return Error::DELETED_OBJECT; } + if (storage_->config_.items.enable_schema_metadata) { + storage_->stored_edge_types_.try_insert(edge_type); + } auto *mem_storage = static_cast(storage_); auto gid = storage::Gid::FromUint(mem_storage->edge_id_.fetch_add(1, std::memory_order_acq_rel)); EdgeRef edge(gid); @@ -315,6 +354,24 @@ Result InMemoryStorage::InMemoryAccessor::CreateEdge(VertexAccesso return EdgeAccessor(edge, edge_type, from_vertex, to_vertex, storage_, &transaction_); } +std::optional InMemoryStorage::InMemoryAccessor::FindEdge(Gid gid, const View view, EdgeTypeId edge_type, + VertexAccessor *from_vertex, + VertexAccessor *to_vertex) { + auto res = FindEdges(view, edge_type, from_vertex, to_vertex); + if (res.HasError()) return std::nullopt; // TODO: use a Result type + + auto const it = std::invoke([this, gid, &res]() { + auto const byGid = [gid](EdgeAccessor const &edge_accessor) { return edge_accessor.edge_.gid == gid; }; + auto const byEdgePtr = [gid](EdgeAccessor const &edge_accessor) { return edge_accessor.edge_.ptr->gid == gid; }; + if (config_.properties_on_edges) return std::ranges::find_if(res->edges, byEdgePtr); + return std::ranges::find_if(res->edges, byGid); + }); + + if (it == res->edges.end()) return std::nullopt; // TODO: use a Result type + + return *it; +} + Result InMemoryStorage::InMemoryAccessor::CreateEdgeEx(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type, storage::Gid gid) { MG_ASSERT(from->transaction_ == to->transaction_, @@ -349,6 +406,10 @@ Result InMemoryStorage::InMemoryAccessor::CreateEdgeEx(VertexAcces if (to_vertex->deleted) return Error::DELETED_OBJECT; } + if (storage_->config_.items.enable_schema_metadata) { + storage_->stored_edge_types_.try_insert(edge_type); + } + // NOTE: When we update the next `edge_id_` here we perform a RMW // (read-modify-write) operation that ISN'T atomic! But, that isn't an issue // because this function is only called from the replication delta applier @@ -697,7 +758,8 @@ utils::BasicResult InMemoryStorage::InMemoryAcce could_replicate_all_sync_replicas = mem_storage->AppendToWalDataDefinition(transaction_, *commit_timestamp_); // protected by engine_guard // TODO: release lock, and update all deltas to have a local copy of the commit timestamp - transaction_.commit_timestamp->store(*commit_timestamp_, std::memory_order_release); // protected by engine_guard + transaction_.commit_timestamp->store(*commit_timestamp_, + std::memory_order_release); // protected by engine_guard // Replica can only update the last commit timestamp with // the commits received from main. if (is_main || desired_commit_timestamp.has_value()) { @@ -823,6 +885,21 @@ void InMemoryStorage::InMemoryAccessor::Abort() { std::list my_deleted_vertices; std::list my_deleted_edges; + std::map> label_cleanup; + std::map>> label_property_cleanup; + std::map>> property_cleanup; + + // CONSTRAINTS + if (transaction_.constraint_verification_info.NeedsUniqueConstraintVerification()) { + // Need to remove elements from constraints before handling of the deltas, so the elements match the correct + // values + auto vertices_to_check = transaction_.constraint_verification_info.GetVerticesForUniqueConstraintChecking(); + auto vertices_to_check_v = std::vector{vertices_to_check.begin(), vertices_to_check.end()}; + storage_->constraints_.AbortEntries(vertices_to_check_v, transaction_.start_timestamp); + } + + const auto index_stats = storage_->indices_.Analysis(); + for (const auto &delta : transaction_.deltas.use()) { auto prev = delta.prev.Get(); switch (prev.type) { @@ -838,6 +915,24 @@ void InMemoryStorage::InMemoryAccessor::Abort() { MG_ASSERT(it != vertex->labels.end(), "Invalid database state!"); std::swap(*it, *vertex->labels.rbegin()); vertex->labels.pop_back(); + + // For label index + // check if there is a label index for the label and add entry if so + // For property label index + // check if we care about the label; this will return all the propertyIds we care about and then get + // the current property value + if (std::binary_search(index_stats.label.begin(), index_stats.label.end(), current->label)) { + label_cleanup[current->label].emplace_back(vertex); + } + const auto &properties = index_stats.property_label.l2p.find(current->label); + if (properties != index_stats.property_label.l2p.end()) { + for (const auto &property : properties->second) { + auto current_value = vertex->properties.GetProperty(property); + if (!current_value.IsNull()) { + label_property_cleanup[current->label].emplace_back(std::move(current_value), vertex); + } + } + } break; } case Delta::Action::ADD_LABEL: { @@ -847,6 +942,18 @@ void InMemoryStorage::InMemoryAccessor::Abort() { break; } case Delta::Action::SET_PROPERTY: { + // For label index nothing + // For property label index + // check if we care about the property, this will return all the labels and then get current property + // value + const auto &labels = index_stats.property_label.p2l.find(current->property.key); + if (labels != index_stats.property_label.p2l.end()) { + auto current_value = vertex->properties.GetProperty(current->property.key); + if (!current_value.IsNull()) { + property_cleanup[current->property.key].emplace_back(std::move(current_value), vertex); + } + } + // Setting the correct value vertex->properties.SetProperty(current->property.key, current->property.value); break; } @@ -963,7 +1070,7 @@ void InMemoryStorage::InMemoryAccessor::Abort() { auto *mem_storage = static_cast(storage_); { - std::unique_lock engine_guard(storage_->engine_lock_); + auto engine_guard = std::unique_lock(storage_->engine_lock_); uint64_t mark_timestamp = storage_->timestamp_; // Take garbage_undo_buffers lock while holding the engine lock to make // sure that entries are sorted by mark timestamp in the list. @@ -975,10 +1082,37 @@ void InMemoryStorage::InMemoryAccessor::Abort() { garbage_undo_buffers.emplace_back(mark_timestamp, std::move(transaction_.deltas), std::move(transaction_.commit_timestamp)); }); - mem_storage->deleted_vertices_.WithLock( - [&](auto &deleted_vertices) { deleted_vertices.splice(deleted_vertices.begin(), my_deleted_vertices); }); - mem_storage->deleted_edges_.WithLock( - [&](auto &deleted_edges) { deleted_edges.splice(deleted_edges.begin(), my_deleted_edges); }); + + /// We MUST unlink (aka. remove) entries in indexes and constraints + /// before we unlink (aka. remove) vertices from storage + /// this is because they point into vertices skip_list + + // INDICES + for (auto const &[label, vertices] : label_cleanup) { + storage_->indices_.AbortEntries(label, vertices, transaction_.start_timestamp); + } + for (auto const &[label, prop_vertices] : label_property_cleanup) { + storage_->indices_.AbortEntries(label, prop_vertices, transaction_.start_timestamp); + } + for (auto const &[property, prop_vertices] : property_cleanup) { + storage_->indices_.AbortEntries(property, prop_vertices, transaction_.start_timestamp); + } + + // VERTICES + { + auto vertices_acc = mem_storage->vertices_.access(); + for (auto gid : my_deleted_vertices) { + vertices_acc.remove(gid); + } + } + + // EDGES + { + auto edges_acc = mem_storage->edges_.access(); + for (auto gid : my_deleted_edges) { + edges_acc.remove(gid); + } + } } mem_storage->commit_log_->MarkFinished(transaction_.start_timestamp); @@ -1066,8 +1200,8 @@ InMemoryStorage::InMemoryAccessor::CreateExistenceConstraint(LabelId label, Prop if (existence_constraints->ConstraintExists(label, property)) { return StorageExistenceConstraintDefinitionError{ConstraintDefinitionError{}}; } - if (auto violation = - ExistenceConstraints::ValidateVerticesOnConstraint(in_memory->vertices_.access(), label, property); + if (auto violation = ExistenceConstraints::ValidateVerticesOnConstraint(in_memory->vertices_.access(), label, + property, std::nullopt); violation.has_value()) { return StorageExistenceConstraintDefinitionError{violation.value()}; } @@ -1094,7 +1228,7 @@ InMemoryStorage::InMemoryAccessor::CreateUniqueConstraint(LabelId label, const s auto *in_memory = static_cast(storage_); auto *mem_unique_constraints = static_cast(in_memory->constraints_.unique_constraints_.get()); - auto ret = mem_unique_constraints->CreateConstraint(label, properties, in_memory->vertices_.access()); + auto ret = mem_unique_constraints->CreateConstraint(label, properties, in_memory->vertices_.access(), std::nullopt); if (ret.HasError()) { return StorageUniqueConstraintDefinitionError{ret.GetError()}; } @@ -1271,8 +1405,6 @@ void InMemoryStorage::CollectGarbage(std::unique_lock main_ // vertices that appear in an index also exist in main storage. std::list current_deleted_edges; std::list current_deleted_vertices; - deleted_vertices_->swap(current_deleted_vertices); - deleted_edges_->swap(current_deleted_edges); auto const need_full_scan_vertices = gc_full_scan_vertices_delete_.exchange(false); auto const need_full_scan_edges = gc_full_scan_edges_delete_.exchange(false); @@ -1575,7 +1707,7 @@ bool InMemoryStorage::InitializeWalFile(memgraph::replication::ReplicationEpoch if (config_.durability.snapshot_wal_mode != Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL) return false; if (!wal_file_) { - wal_file_.emplace(wal_directory_, uuid_, epoch.id(), config_.items, name_id_mapper_.get(), wal_seq_num_++, + wal_file_.emplace(recovery_.wal_directory_, uuid_, epoch.id(), config_.items, name_id_mapper_.get(), wal_seq_num_++, &file_retainer_); } return true; @@ -1608,7 +1740,7 @@ bool InMemoryStorage::AppendToWalDataManipulation(const Transaction &transaction // A single transaction will always be contained in a single WAL file. auto current_commit_timestamp = transaction.commit_timestamp->load(std::memory_order_acquire); - repl_storage_state_.InitializeTransaction(wal_file_->SequenceNumber()); + repl_storage_state_.InitializeTransaction(wal_file_->SequenceNumber(), this); auto append_deltas = [&](auto callback) { // Helper lambda that traverses the delta chain on order to find the first @@ -1767,7 +1899,7 @@ bool InMemoryStorage::AppendToWalDataManipulation(const Transaction &transaction wal_file_->AppendTransactionEnd(final_commit_timestamp); FinalizeWalFile(); - return repl_storage_state_.FinalizeTransaction(final_commit_timestamp); + return repl_storage_state_.FinalizeTransaction(final_commit_timestamp, this); } bool InMemoryStorage::AppendToWalDataDefinition(const Transaction &transaction, uint64_t final_commit_timestamp) { @@ -1775,7 +1907,7 @@ bool InMemoryStorage::AppendToWalDataDefinition(const Transaction &transaction, return true; } - repl_storage_state_.InitializeTransaction(wal_file_->SequenceNumber()); + repl_storage_state_.InitializeTransaction(wal_file_->SequenceNumber(), this); for (const auto &md_delta : transaction.md_deltas) { switch (md_delta.action) { @@ -1846,7 +1978,7 @@ bool InMemoryStorage::AppendToWalDataDefinition(const Transaction &transaction, wal_file_->AppendTransactionEnd(final_commit_timestamp); FinalizeWalFile(); - return repl_storage_state_.FinalizeTransaction(final_commit_timestamp); + return repl_storage_state_.FinalizeTransaction(final_commit_timestamp, this); } void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, @@ -1885,8 +2017,8 @@ utils::BasicResult InMemoryStorage::Create auto snapshot_creator = [this, &epoch]() { utils::Timer timer; auto transaction = CreateTransaction(IsolationLevel::SNAPSHOT_ISOLATION, storage_mode_); - durability::CreateSnapshot(this, &transaction, snapshot_directory_, wal_directory_, &vertices_, &edges_, uuid_, - epoch, repl_storage_state_.history, &file_retainer_); + durability::CreateSnapshot(this, &transaction, recovery_.snapshot_directory_, recovery_.wal_directory_, &vertices_, + &edges_, uuid_, epoch, repl_storage_state_.history, &file_retainer_); // Finalize snapshot transaction. commit_log_->MarkFinished(transaction.start_timestamp); @@ -1922,12 +2054,12 @@ utils::BasicResult InMemoryStorage::Create void InMemoryStorage::FreeMemory(std::unique_lock main_guard) { CollectGarbage(std::move(main_guard)); + static_cast(indices_.label_index_.get())->RunGC(); + static_cast(indices_.label_property_index_.get())->RunGC(); + // SkipList is already threadsafe vertices_.run_gc(); edges_.run_gc(); - - static_cast(indices_.label_index_.get())->RunGC(); - static_cast(indices_.label_property_index_.get())->RunGC(); } uint64_t InMemoryStorage::CommitTimestamp(const std::optional desired_commit_timestamp) { @@ -1972,12 +2104,6 @@ utils::FileRetainer::FileLockerAccessor::ret_type InMemoryStorage::UnlockPath() return true; } -auto InMemoryStorage::CreateReplicationClient(const memgraph::replication::ReplicationClientConfig &config, - const memgraph::replication::ReplicationEpoch *current_epoch) - -> std::unique_ptr { - return std::make_unique(this, config, current_epoch); -} - std::unique_ptr InMemoryStorage::Access(std::optional override_isolation_level, bool is_main) { return std::unique_ptr(new InMemoryAccessor{Storage::Accessor::shared_access, this, diff --git a/src/storage/v2/inmemory/storage.hpp b/src/storage/v2/inmemory/storage.hpp index bfb445332..2d2837467 100644 --- a/src/storage/v2/inmemory/storage.hpp +++ b/src/storage/v2/inmemory/storage.hpp @@ -18,10 +18,13 @@ #include "storage/v2/indices/label_index_stats.hpp" #include "storage/v2/inmemory/label_index.hpp" #include "storage/v2/inmemory/label_property_index.hpp" +#include "storage/v2/inmemory/replication/recovery.hpp" +#include "storage/v2/replication/replication_client.hpp" #include "storage/v2/storage.hpp" /// REPLICATION /// #include "replication/config.hpp" +#include "storage/v2/inmemory/replication/recovery.hpp" #include "storage/v2/replication/enums.hpp" #include "storage/v2/replication/replication_storage_state.hpp" #include "storage/v2/replication/rpc.hpp" @@ -44,7 +47,12 @@ namespace memgraph::storage { class InMemoryStorage final : public Storage { friend class memgraph::dbms::InMemoryReplicationHandlers; - friend class InMemoryReplicationClient; + friend class ReplicationStorageClient; + friend std::vector GetRecoverySteps(uint64_t replica_commit, + utils::FileRetainer::FileLocker *file_locker, + const InMemoryStorage *storage); + friend class InMemoryLabelIndex; + friend class InMemoryLabelPropertyIndex; public: enum class CreateSnapshotError : uint8_t { DisabledForReplica, ReachedMaxNumTries }; @@ -179,6 +187,9 @@ class InMemoryStorage final : public Storage { /// @throw std::bad_alloc Result CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type) override; + std::optional FindEdge(Gid gid, View view, EdgeTypeId edge_type, VertexAccessor *from_vertex, + VertexAccessor *to_vertex) override; + Result EdgeSetFrom(EdgeAccessor *edge, VertexAccessor *new_from) override; Result EdgeSetTo(EdgeAccessor *edge, VertexAccessor *new_to) override; @@ -332,12 +343,10 @@ class InMemoryStorage final : public Storage { using Storage::CreateTransaction; Transaction CreateTransaction(IsolationLevel isolation_level, StorageMode storage_mode, bool is_main) override; - auto CreateReplicationClient(const memgraph::replication::ReplicationClientConfig &config, - const memgraph::replication::ReplicationEpoch *current_epoch) - -> std::unique_ptr override; - void SetStorageMode(StorageMode storage_mode); + const durability::Recovery &GetRecovery() const noexcept { return recovery_; } + private: /// The force parameter determines the behaviour of the garbage collector. /// If it's set to true, it will behave as a global operation, i.e. it can't @@ -390,10 +399,10 @@ class InMemoryStorage final : public Storage { utils::SkipList edges_; // Durability - std::filesystem::path snapshot_directory_; + durability::Recovery recovery_; + std::filesystem::path lock_file_path_; utils::OutputFile lock_file_handle_; - std::filesystem::path wal_directory_; utils::Scheduler snapshot_runner_; utils::SpinLock snapshot_lock_; diff --git a/src/storage/v2/inmemory/unique_constraints.cpp b/src/storage/v2/inmemory/unique_constraints.cpp index 78929bc38..76cda1730 100644 --- a/src/storage/v2/inmemory/unique_constraints.cpp +++ b/src/storage/v2/inmemory/unique_constraints.cpp @@ -10,7 +10,13 @@ // licenses/APL.txt. #include "storage/v2/inmemory/unique_constraints.hpp" - +#include +#include "storage/v2/constraints/constraint_violation.hpp" +#include "storage/v2/constraints/utils.hpp" +#include "storage/v2/durability/recovery_type.hpp" +#include "storage/v2/id_types.hpp" +#include "utils/logging.hpp" +#include "utils/skip_list.hpp" namespace memgraph::storage { namespace { @@ -256,11 +262,12 @@ bool InMemoryUniqueConstraints::Entry::operator==(const std::vectorlabels) { - if (!constraints_by_label_.contains(label)) { + const auto &constraint = constraints_by_label_.find(label); + if (constraint == constraints_by_label_.end()) { continue; } - for (auto &[props, storage] : constraints_by_label_.at(label)) { + for (auto &[props, storage] : constraint->second) { auto values = vertex->properties.ExtractPropertyValues(props); if (!values) { @@ -273,9 +280,101 @@ void InMemoryUniqueConstraints::UpdateBeforeCommit(const Vertex *vertex, const T } } +std::variant +InMemoryUniqueConstraints::GetCreationFunction( + const std::optional &par_exec_info) { + if (par_exec_info.has_value()) { + return InMemoryUniqueConstraints::MultipleThreadsConstraintValidation{par_exec_info.value()}; + } + return InMemoryUniqueConstraints::SingleThreadConstraintValidation{}; +} + +bool InMemoryUniqueConstraints::MultipleThreadsConstraintValidation::operator()( + const utils::SkipList::Accessor &vertex_accessor, utils::SkipList::Accessor &constraint_accessor, + const LabelId &label, const std::set &properties) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + const auto &vertex_batches = parallel_exec_info.vertex_recovery_info; + MG_ASSERT(!vertex_batches.empty(), + "The size of batches should always be greater than zero if you want to use the parallel version of index " + "creation!"); + const auto thread_count = std::min(parallel_exec_info.thread_count, vertex_batches.size()); + + std::atomic batch_counter = 0; + memgraph::utils::Synchronized, utils::RWSpinLock> has_error; + { + std::vector threads; + threads.reserve(thread_count); + for (auto i{0U}; i < thread_count; ++i) { + threads.emplace_back( + [&has_error, &vertex_batches, &batch_counter, &vertex_accessor, &constraint_accessor, &label, &properties]() { + do_per_thread_validation(has_error, DoValidate, vertex_batches, batch_counter, vertex_accessor, + constraint_accessor, label, properties); + }); + } + } + return has_error.Lock()->has_value(); +} + +bool InMemoryUniqueConstraints::SingleThreadConstraintValidation::operator()( + const utils::SkipList::Accessor &vertex_accessor, utils::SkipList::Accessor &constraint_accessor, + const LabelId &label, const std::set &properties) { + for (const Vertex &vertex : vertex_accessor) { + if (const auto violation = DoValidate(vertex, constraint_accessor, label, properties); violation.has_value()) { + return true; + } + } + return false; +} + +std::optional InMemoryUniqueConstraints::DoValidate( + const Vertex &vertex, utils::SkipList::Accessor &constraint_accessor, const LabelId &label, + const std::set &properties) { + if (vertex.deleted || !utils::Contains(vertex.labels, label)) { + return std::nullopt; + } + auto values = vertex.properties.ExtractPropertyValues(properties); + if (!values) { + return std::nullopt; + } + + // Check whether there already is a vertex with the same values for the + // given label and property. + auto it = constraint_accessor.find_equal_or_greater(*values); + if (it != constraint_accessor.end() && it->values == *values) { + return ConstraintViolation{ConstraintViolation::Type::UNIQUE, label, properties}; + } + + constraint_accessor.insert(Entry{std::move(*values), &vertex, 0}); + return std::nullopt; +} + +void InMemoryUniqueConstraints::AbortEntries(std::span vertices, uint64_t exact_start_timestamp) { + for (const auto &vertex : vertices) { + for (const auto &label : vertex->labels) { + const auto &constraint = constraints_by_label_.find(label); + if (constraint == constraints_by_label_.end()) { + return; + } + + for (auto &[props, storage] : constraint->second) { + auto values = vertex->properties.ExtractPropertyValues(props); + + if (!values) { + continue; + } + + auto acc = storage->access(); + acc.remove(Entry{std::move(*values), vertex, exact_start_timestamp}); + } + } + } +} + utils::BasicResult -InMemoryUniqueConstraints::CreateConstraint(LabelId label, const std::set &properties, - utils::SkipList::Accessor vertices) { +InMemoryUniqueConstraints::CreateConstraint( + LabelId label, const std::set &properties, const utils::SkipList::Accessor &vertex_accessor, + const std::optional &par_exec_info) { if (properties.empty()) { return CreationStatus::EMPTY_PROPERTIES; } @@ -283,49 +382,28 @@ InMemoryUniqueConstraints::CreateConstraint(LabelId label, const std::set constraints_skip_list; + utils::SkipList::Accessor constraint_accessor{constraints_skip_list.access()}; - bool violation_found = false; + auto multi_single_thread_processing = GetCreationFunction(par_exec_info); - { - auto acc = constraint->second.access(); - - for (const Vertex &vertex : vertices) { - if (vertex.deleted || !utils::Contains(vertex.labels, label)) { - continue; - } - auto values = vertex.properties.ExtractPropertyValues(properties); - if (!values) { - continue; - } - - // Check whether there already is a vertex with the same values for the - // given label and property. - auto it = acc.find_equal_or_greater(*values); - if (it != acc.end() && it->values == *values) { - violation_found = true; - break; - } - - acc.insert(Entry{std::move(*values), &vertex, 0}); - } - } + bool violation_found = std::visit( + [&vertex_accessor, &constraint_accessor, &label, &properties](auto &multi_single_thread_processing) { + return multi_single_thread_processing(vertex_accessor, constraint_accessor, label, properties); + }, + multi_single_thread_processing); if (violation_found) { - // In the case of the violation, storage for the current constraint has to - // be removed. - constraints_.erase(constraint); return ConstraintViolation{ConstraintViolation::Type::UNIQUE, label, properties}; } + auto [it, _] = constraints_.emplace(std::make_pair(label, properties), std::move(constraints_skip_list)); + // Add the new constraint to the optimized structure only if there are no violations. - constraints_by_label_[label].insert({properties, &constraints_.at({label, properties})}); + constraints_by_label_[label].insert({properties, &it->second}); return CreationStatus::SUCCESS; } @@ -364,12 +442,14 @@ std::optional InMemoryUniqueConstraints::Validate(const Ver if (vertex.deleted) { return std::nullopt; } + for (const auto &label : vertex.labels) { - if (!constraints_by_label_.contains(label)) { + const auto &constraint = constraints_by_label_.find(label); + if (constraint == constraints_by_label_.end()) { continue; } - for (const auto &[properties, storage] : constraints_by_label_.at(label)) { + for (const auto &[properties, storage] : constraint->second) { auto value_array = vertex.properties.ExtractPropertyValues(properties); if (!value_array) { diff --git a/src/storage/v2/inmemory/unique_constraints.hpp b/src/storage/v2/inmemory/unique_constraints.hpp index 45472ca74..15107f131 100644 --- a/src/storage/v2/inmemory/unique_constraints.hpp +++ b/src/storage/v2/inmemory/unique_constraints.hpp @@ -11,7 +11,17 @@ #pragma once +#include +#include +#include +#include +#include "storage/v2/constraints/constraint_violation.hpp" #include "storage/v2/constraints/unique_constraints.hpp" +#include "storage/v2/durability/recovery_type.hpp" +#include "storage/v2/id_types.hpp" +#include "utils/logging.hpp" +#include "utils/rw_spin_lock.hpp" +#include "utils/synchronized.hpp" namespace memgraph::storage { @@ -44,7 +54,24 @@ class InMemoryUniqueConstraints : public UniqueConstraints { bool operator==(const std::vector &rhs) const; }; + static std::optional DoValidate(const Vertex &vertex, + utils::SkipList::Accessor &constraint_accessor, + const LabelId &label, const std::set &properties); + public: + struct MultipleThreadsConstraintValidation { + bool operator()(const utils::SkipList::Accessor &vertex_accessor, + utils::SkipList::Accessor &constraint_accessor, const LabelId &label, + const std::set &properties); + + const durability::ParallelizedSchemaCreationInfo ¶llel_exec_info; + }; + struct SingleThreadConstraintValidation { + bool operator()(const utils::SkipList::Accessor &vertex_accessor, + utils::SkipList::Accessor &constraint_accessor, const LabelId &label, + const std::set &properties); + }; + /// Indexes the given vertex for relevant labels and properties. /// This method should be called before committing and validating vertices /// against unique constraints. @@ -54,6 +81,8 @@ class InMemoryUniqueConstraints : public UniqueConstraints { void UpdateBeforeCommit(const Vertex *vertex, std::unordered_set &added_labels, std::unordered_set &added_properties, const Transaction &tx); + void AbortEntries(std::span vertices, uint64_t exact_start_timestamp); + /// Creates unique constraint on the given `label` and a list of `properties`. /// Returns constraint violation if there are multiple vertices with the same /// label and property values. Returns `CreationStatus::ALREADY_EXISTS` if @@ -63,9 +92,9 @@ class InMemoryUniqueConstraints : public UniqueConstraints { /// exceeds the maximum allowed number of properties, and /// `CreationStatus::SUCCESS` on success. /// @throw std::bad_alloc - utils::BasicResult CreateConstraint(LabelId label, - const std::set &properties, - utils::SkipList::Accessor vertices); + utils::BasicResult CreateConstraint( + LabelId label, const std::set &properties, const utils::SkipList::Accessor &vertex_accessor, + const std::optional &par_exec_info); /// Deletes the specified constraint. Returns `DeletionStatus::NOT_FOUND` if /// there is not such constraint in the storage, @@ -97,6 +126,9 @@ class InMemoryUniqueConstraints : public UniqueConstraints { void Clear() override; + static std::variant GetCreationFunction( + const std::optional &); + private: std::map>, utils::SkipList> constraints_; std::map, utils::SkipList *>> constraints_by_label_; diff --git a/src/storage/v2/replication/enums.hpp b/src/storage/v2/replication/enums.hpp index e89a1fdd3..be16ca192 100644 --- a/src/storage/v2/replication/enums.hpp +++ b/src/storage/v2/replication/enums.hpp @@ -14,6 +14,6 @@ namespace memgraph::storage::replication { -enum class ReplicaState : std::uint8_t { READY, REPLICATING, RECOVERY, INVALID }; +enum class ReplicaState : std::uint8_t { READY, REPLICATING, RECOVERY, MAYBE_BEHIND }; } // namespace memgraph::storage::replication diff --git a/src/storage/v2/replication/recovery.hpp b/src/storage/v2/replication/recovery.hpp new file mode 100644 index 000000000..346e03ecd --- /dev/null +++ b/src/storage/v2/replication/recovery.hpp @@ -0,0 +1,28 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include +#include + +namespace memgraph::storage { + +using RecoverySnapshot = std::filesystem::path; +using RecoveryWals = std::vector; +struct RecoveryCurrentWal { + explicit RecoveryCurrentWal(const uint64_t current_wal_seq_num) : current_wal_seq_num(current_wal_seq_num) {} + uint64_t current_wal_seq_num; +}; +using RecoveryStep = std::variant; + +} // namespace memgraph::storage diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index 33313b130..3bc1b3d32 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -9,53 +9,37 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -#include "storage/v2/replication/replication_client.hpp" +#include "replication/replication_client.hpp" +#include "storage/v2/durability/durability.hpp" +#include "storage/v2/inmemory/storage.hpp" +#include "storage/v2/storage.hpp" +#include "utils/exceptions.hpp" +#include "utils/variant_helpers.hpp" #include #include -#include "storage/v2/durability/durability.hpp" -#include "storage/v2/storage.hpp" +namespace { +template +[[maybe_unused]] inline constexpr bool always_false_v = false; +} // namespace namespace memgraph::storage { -static auto CreateClientContext(const memgraph::replication::ReplicationClientConfig &config) - -> communication::ClientContext { - return (config.ssl) ? communication::ClientContext{config.ssl->key_file, config.ssl->cert_file} - : communication::ClientContext{}; -} +ReplicationStorageClient::ReplicationStorageClient(::memgraph::replication::ReplicationClient &client) + : client_{client} {} -ReplicationClient::ReplicationClient(Storage *storage, const memgraph::replication::ReplicationClientConfig &config, - const memgraph::replication::ReplicationEpoch *epoch) - : name_{config.name}, - rpc_context_{CreateClientContext(config)}, - rpc_client_{io::network::Endpoint(io::network::Endpoint::needs_resolving, config.ip_address, config.port), - &rpc_context_}, - replica_check_frequency_{config.replica_check_frequency}, - mode_{config.mode}, - storage_{storage}, - repl_epoch_{epoch} {} - -ReplicationClient::~ReplicationClient() { - auto endpoint = rpc_client_.Endpoint(); - spdlog::trace("Closing replication client on {}:{}", endpoint.address, endpoint.port); - thread_pool_.Shutdown(); -} - -uint64_t ReplicationClient::LastCommitTimestamp() const { - return storage_->repl_storage_state_.last_commit_timestamp_.load(); -} - -void ReplicationClient::InitializeClient() { +void ReplicationStorageClient::CheckReplicaState(Storage *storage) { uint64_t current_commit_timestamp{kTimestampInitialId}; - auto stream{rpc_client_.Stream( - storage_->id(), storage_->repl_storage_state_.last_commit_timestamp_, std::string{repl_epoch_->id()})}; + auto &replStorageState = storage->repl_storage_state_; + auto stream{client_.rpc_client_.Stream( + storage->id(), replStorageState.last_commit_timestamp_, std::string{replStorageState.epoch_.id()})}; const auto replica = stream.AwaitResponse(); std::optional branching_point; - if (replica.epoch_id != repl_epoch_->id() && replica.current_commit_timestamp != kTimestampInitialId) { - auto const &history = storage_->repl_storage_state_.history; + if (replica.epoch_id != replStorageState.epoch_.id() && replica.current_commit_timestamp != kTimestampInitialId) { + auto const &history = replStorageState.history; const auto epoch_info_iter = std::find_if(history.crbegin(), history.crend(), [&](const auto &main_epoch_info) { return main_epoch_info.first == replica.epoch_id; }); @@ -71,94 +55,86 @@ void ReplicationClient::InitializeClient() { "Replica {} acted as the Main instance. Both the Main and Replica {} " "now hold unique data. Please resolve data conflicts and start the " "replication on a clean instance.", - name_, name_, name_); + client_.name_, client_.name_, client_.name_); + // State not updated, hence in MAYBE_BEHIND state return; } current_commit_timestamp = replica.current_commit_timestamp; - spdlog::trace("Current timestamp on replica {}: {}", name_, current_commit_timestamp); - spdlog::trace("Current timestamp on main: {}", storage_->repl_storage_state_.last_commit_timestamp_.load()); - if (current_commit_timestamp == storage_->repl_storage_state_.last_commit_timestamp_.load()) { - spdlog::debug("Replica '{}' up to date", name_); - std::unique_lock client_guard{client_lock_}; - replica_state_.store(replication::ReplicaState::READY); - } else { - spdlog::debug("Replica '{}' is behind", name_); - { - std::unique_lock client_guard{client_lock_}; - replica_state_.store(replication::ReplicaState::RECOVERY); + spdlog::trace("Current timestamp on replica {}: {}", client_.name_, current_commit_timestamp); + spdlog::trace("Current timestamp on main: {}", replStorageState.last_commit_timestamp_.load()); + replica_state_.WithLock([&](auto &state) { + if (current_commit_timestamp == replStorageState.last_commit_timestamp_.load()) { + spdlog::debug("Replica '{}' up to date", client_.name_); + state = replication::ReplicaState::READY; + } else { + spdlog::debug("Replica '{}' is behind", client_.name_); + state = replication::ReplicaState::RECOVERY; + client_.thread_pool_.AddTask( + [storage, current_commit_timestamp, this] { this->RecoverReplica(current_commit_timestamp, storage); }); } - thread_pool_.AddTask([=, this] { this->RecoverReplica(current_commit_timestamp); }); - } + }); } -TimestampInfo ReplicationClient::GetTimestampInfo() { +TimestampInfo ReplicationStorageClient::GetTimestampInfo(Storage const *storage) { TimestampInfo info; info.current_timestamp_of_replica = 0; info.current_number_of_timestamp_behind_master = 0; try { - auto stream{rpc_client_.Stream(storage_->id())}; + auto stream{client_.rpc_client_.Stream(storage->id())}; const auto response = stream.AwaitResponse(); const auto is_success = response.success; if (!is_success) { - replica_state_.store(replication::ReplicaState::INVALID); - HandleRpcFailure(); + replica_state_.WithLock([](auto &val) { val = replication::ReplicaState::MAYBE_BEHIND; }); + LogRpcFailure(); } - auto main_time_stamp = storage_->repl_storage_state_.last_commit_timestamp_.load(); + auto main_time_stamp = storage->repl_storage_state_.last_commit_timestamp_.load(); info.current_timestamp_of_replica = response.current_commit_timestamp; info.current_number_of_timestamp_behind_master = response.current_commit_timestamp - main_time_stamp; } catch (const rpc::RpcFailedException &) { - { - std::unique_lock client_guard(client_lock_); - replica_state_.store(replication::ReplicaState::INVALID); - } - HandleRpcFailure(); // mutex already unlocked, if the new enqueued task dispatches immediately it probably won't - // block + replica_state_.WithLock([](auto &val) { val = replication::ReplicaState::MAYBE_BEHIND; }); + LogRpcFailure(); // mutex already unlocked, if the new enqueued task dispatches immediately it probably + // won't block } return info; } -void ReplicationClient::HandleRpcFailure() { - spdlog::error(utils::MessageWithLink("Couldn't replicate data to {}.", name_, "https://memgr.ph/replication")); - TryInitializeClientAsync(); +void ReplicationStorageClient::LogRpcFailure() { + spdlog::error( + utils::MessageWithLink("Couldn't replicate data to {}.", client_.name_, "https://memgr.ph/replication")); } -void ReplicationClient::TryInitializeClientAsync() { - thread_pool_.AddTask([this] { - rpc_client_.Abort(); - this->TryInitializeClientSync(); - }); +void ReplicationStorageClient::TryCheckReplicaStateAsync(Storage *storage) { + client_.thread_pool_.AddTask([storage, this] { this->TryCheckReplicaStateSync(storage); }); } -void ReplicationClient::TryInitializeClientSync() { +void ReplicationStorageClient::TryCheckReplicaStateSync(Storage *storage) { try { - InitializeClient(); + CheckReplicaState(storage); } catch (const rpc::VersionMismatchRpcFailedException &) { - std::unique_lock client_guard{client_lock_}; - replica_state_.store(replication::ReplicaState::INVALID); + replica_state_.WithLock([](auto &val) { val = replication::ReplicaState::MAYBE_BEHIND; }); spdlog::error( utils::MessageWithLink("Failed to connect to replica {} at the endpoint {}. Because the replica " "deployed is not a compatible version.", - name_, rpc_client_.Endpoint(), "https://memgr.ph/replication")); + client_.name_, client_.rpc_client_.Endpoint(), "https://memgr.ph/replication")); } catch (const rpc::RpcFailedException &) { - std::unique_lock client_guard{client_lock_}; - replica_state_.store(replication::ReplicaState::INVALID); - spdlog::error(utils::MessageWithLink("Failed to connect to replica {} at the endpoint {}.", name_, - rpc_client_.Endpoint(), "https://memgr.ph/replication")); + replica_state_.WithLock([](auto &val) { val = replication::ReplicaState::MAYBE_BEHIND; }); + spdlog::error(utils::MessageWithLink("Failed to connect to replica {} at the endpoint {}.", client_.name_, + client_.rpc_client_.Endpoint(), "https://memgr.ph/replication")); } } -void ReplicationClient::StartTransactionReplication(const uint64_t current_wal_seq_num) { - std::unique_lock guard(client_lock_); - const auto status = replica_state_.load(); - switch (status) { - case replication::ReplicaState::RECOVERY: - spdlog::debug("Replica {} is behind MAIN instance", name_); +void ReplicationStorageClient::StartTransactionReplication(const uint64_t current_wal_seq_num, Storage *storage) { + auto locked_state = replica_state_.Lock(); + switch (*locked_state) { + using enum replication::ReplicaState; + case RECOVERY: + spdlog::debug("Replica {} is behind MAIN instance", client_.name_); return; - case replication::ReplicaState::REPLICATING: - spdlog::debug("Replica {} missed a transaction", name_); + case REPLICATING: + spdlog::debug("Replica {} missed a transaction", client_.name_); // We missed a transaction because we're still replicating // the previous transaction so we need to go to RECOVERY // state to catch up with the missing transaction @@ -166,143 +142,169 @@ void ReplicationClient::StartTransactionReplication(const uint64_t current_wal_s // an error can happen while we're replicating the previous // transaction after which the client should go to // INVALID state before starting the recovery process - replica_state_.store(replication::ReplicaState::RECOVERY); + // + // This is a signal to any async streams that are still finalizing to start recovery, since this commit will be + // missed. + *locked_state = RECOVERY; return; - case replication::ReplicaState::INVALID: - HandleRpcFailure(); + case MAYBE_BEHIND: + spdlog::error( + utils::MessageWithLink("Couldn't replicate data to {}.", client_.name_, "https://memgr.ph/replication")); + TryCheckReplicaStateAsync(storage); return; - case replication::ReplicaState::READY: + case READY: MG_ASSERT(!replica_stream_); try { - replica_stream_.emplace( - ReplicaStream{this, storage_->repl_storage_state_.last_commit_timestamp_.load(), current_wal_seq_num}); - replica_state_.store(replication::ReplicaState::REPLICATING); + replica_stream_.emplace(storage, client_.rpc_client_, current_wal_seq_num); + *locked_state = REPLICATING; } catch (const rpc::RpcFailedException &) { - replica_state_.store(replication::ReplicaState::INVALID); - HandleRpcFailure(); + *locked_state = MAYBE_BEHIND; + LogRpcFailure(); } return; } } -bool ReplicationClient::FinalizeTransactionReplication() { +bool ReplicationStorageClient::FinalizeTransactionReplication(Storage *storage) { // We can only check the state because it guarantees to be only // valid during a single transaction replication (if the assumption // that this and other transaction replication functions can only be // called from a one thread stands) - if (replica_state_ != replication::ReplicaState::REPLICATING) { + if (State() != replication::ReplicaState::REPLICATING) { return false; } - auto task = [this]() { + if (replica_stream_->IsDefunct()) return false; + + auto task = [storage, this]() { MG_ASSERT(replica_stream_, "Missing stream for transaction deltas"); try { auto response = replica_stream_->Finalize(); - replica_stream_.reset(); - std::unique_lock client_guard(client_lock_); - if (!response.success || replica_state_ == replication::ReplicaState::RECOVERY) { - replica_state_.store(replication::ReplicaState::RECOVERY); - thread_pool_.AddTask([&, this] { this->RecoverReplica(response.current_commit_timestamp); }); - } else { - replica_state_.store(replication::ReplicaState::READY); + return replica_state_.WithLock([storage, &response, this](auto &state) { + replica_stream_.reset(); + if (!response.success || state == replication::ReplicaState::RECOVERY) { + state = replication::ReplicaState::RECOVERY; + client_.thread_pool_.AddTask( + [storage, &response, this] { this->RecoverReplica(response.current_commit_timestamp, storage); }); + return false; + } + state = replication::ReplicaState::READY; return true; - } + }); } catch (const rpc::RpcFailedException &) { - replica_stream_.reset(); - { - std::unique_lock client_guard(client_lock_); - replica_state_.store(replication::ReplicaState::INVALID); - } - HandleRpcFailure(); + replica_state_.WithLock([this](auto &state) { + replica_stream_.reset(); + state = replication::ReplicaState::MAYBE_BEHIND; + }); + LogRpcFailure(); + return false; } - return false; }; - if (mode_ == memgraph::replication::ReplicationMode::ASYNC) { - thread_pool_.AddTask([=] { (void)task(); }); + if (client_.mode_ == memgraph::replication::ReplicationMode::ASYNC) { + client_.thread_pool_.AddTask([task = std::move(task)] { (void)task(); }); return true; } return task(); } -void ReplicationClient::FrequentCheck() { - const auto is_success = std::invoke([this]() { - try { - auto stream{rpc_client_.Stream()}; - const auto response = stream.AwaitResponse(); - return response.success; - } catch (const rpc::RpcFailedException &) { - return false; - } - }); - // States: READY, REPLICATING, RECOVERY, INVALID - // If success && ready, replicating, recovery -> stay the same because something good is going on. - // If success && INVALID -> [it's possible that replica came back to life] -> TryInitializeClient. - // If fail -> [replica is not reachable at all] -> INVALID state. - // NOTE: TryInitializeClient might return nothing if there is a branching point. - // NOTE: The early return pattern simplified the code, but the behavior should be as explained. - if (!is_success) { - replica_state_.store(replication::ReplicaState::INVALID); - return; - } - if (replica_state_.load() == replication::ReplicaState::INVALID) { - TryInitializeClientAsync(); - } +void ReplicationStorageClient::Start(Storage *storage) { + spdlog::trace("Replication client started for database \"{}\"", storage->id()); + TryCheckReplicaStateSync(storage); } -void ReplicationClient::Start() { - auto const &endpoint = rpc_client_.Endpoint(); - spdlog::trace("Replication client started at: {}:{}", endpoint.address, endpoint.port); - - TryInitializeClientSync(); - - // Help the user to get the most accurate replica state possible. - if (replica_check_frequency_ > std::chrono::seconds(0)) { - replica_checker_.Run("Replica Checker", replica_check_frequency_, [this] { this->FrequentCheck(); }); +void ReplicationStorageClient::RecoverReplica(uint64_t replica_commit, memgraph::storage::Storage *storage) { + if (storage->storage_mode_ != StorageMode::IN_MEMORY_TRANSACTIONAL) { + throw utils::BasicException("Only InMemoryTransactional mode supports replication!"); } -} + spdlog::debug("Starting replica recovery"); + auto *mem_storage = static_cast(storage); -void ReplicationClient::IfStreamingTransaction(const std::function &callback) { - // We can only check the state because it guarantees to be only - // valid during a single transaction replication (if the assumption - // that this and other transaction replication functions can only be - // called from a one thread stands) - if (replica_state_ != replication::ReplicaState::REPLICATING) { - return; - } + while (true) { + auto file_locker = mem_storage->file_retainer_.AddLocker(); - try { - callback(*replica_stream_); - } catch (const rpc::RpcFailedException &) { - { - std::unique_lock client_guard{client_lock_}; - replica_state_.store(replication::ReplicaState::INVALID); + const auto steps = GetRecoverySteps(replica_commit, &file_locker, mem_storage); + int i = 0; + for (const RecoveryStep &recovery_step : steps) { + spdlog::trace("Recovering in step: {}", i++); + try { + rpc::Client &rpcClient = client_.rpc_client_; + std::visit(utils::Overloaded{ + [&replica_commit, mem_storage, &rpcClient](RecoverySnapshot const &snapshot) { + spdlog::debug("Sending the latest snapshot file: {}", snapshot); + auto response = TransferSnapshot(mem_storage->id(), rpcClient, snapshot); + replica_commit = response.current_commit_timestamp; + }, + [&replica_commit, mem_storage, &rpcClient](RecoveryWals const &wals) { + spdlog::debug("Sending the latest wal files"); + auto response = TransferWalFiles(mem_storage->id(), rpcClient, wals); + replica_commit = response.current_commit_timestamp; + spdlog::debug("Wal files successfully transferred."); + }, + [&replica_commit, mem_storage, &rpcClient](RecoveryCurrentWal const ¤t_wal) { + std::unique_lock transaction_guard(mem_storage->engine_lock_); + if (mem_storage->wal_file_ && + mem_storage->wal_file_->SequenceNumber() == current_wal.current_wal_seq_num) { + mem_storage->wal_file_->DisableFlushing(); + transaction_guard.unlock(); + spdlog::debug("Sending current wal file"); + replica_commit = ReplicateCurrentWal(mem_storage, rpcClient, *mem_storage->wal_file_); + mem_storage->wal_file_->EnableFlushing(); + } else { + spdlog::debug("Cannot recover using current wal file"); + } + }, + [](auto const &in) { + static_assert(always_false_v, "Missing type from variant visitor"); + }, + }, + recovery_step); + } catch (const rpc::RpcFailedException &) { + replica_state_.WithLock([](auto &val) { val = replication::ReplicaState::MAYBE_BEHIND; }); + LogRpcFailure(); + return; + } + } + + spdlog::trace("Current timestamp on replica: {}", replica_commit); + // To avoid the situation where we read a correct commit timestamp in + // one thread, and after that another thread commits a different a + // transaction and THEN we set the state to READY in the first thread, + // we set this lock before checking the timestamp. + // We will detect that the state is invalid during the next commit, + // because replication::AppendDeltasRpc sends the last commit timestamp which + // replica checks if it's the same last commit timestamp it received + // and we will go to recovery. + // By adding this lock, we can avoid that, and go to RECOVERY immediately. + const auto last_commit_timestamp = storage->repl_storage_state_.last_commit_timestamp_.load(); + SPDLOG_INFO("Replica timestamp: {}", replica_commit); + SPDLOG_INFO("Last commit: {}", last_commit_timestamp); + if (last_commit_timestamp == replica_commit) { + replica_state_.WithLock([](auto &val) { val = replication::ReplicaState::READY; }); + return; } - HandleRpcFailure(); } } ////// ReplicaStream ////// -ReplicaStream::ReplicaStream(ReplicationClient *self, const uint64_t previous_commit_timestamp, - const uint64_t current_seq_num) - : self_(self), - stream_(self_->rpc_client_.Stream(self->GetStorageId(), previous_commit_timestamp, - current_seq_num)) { +ReplicaStream::ReplicaStream(Storage *storage, rpc::Client &rpc_client, const uint64_t current_seq_num) + : storage_{storage}, + stream_(rpc_client.Stream( + storage->id(), storage->repl_storage_state_.last_commit_timestamp_.load(), current_seq_num)) { replication::Encoder encoder{stream_.GetBuilder()}; - - encoder.WriteString(self->repl_epoch_->id()); + encoder.WriteString(storage->repl_storage_state_.epoch_.id()); } void ReplicaStream::AppendDelta(const Delta &delta, const Vertex &vertex, uint64_t final_commit_timestamp) { replication::Encoder encoder(stream_.GetBuilder()); - auto *storage = self_->GetStorage(); - EncodeDelta(&encoder, storage->name_id_mapper_.get(), storage->config_.items, delta, vertex, final_commit_timestamp); + EncodeDelta(&encoder, storage_->name_id_mapper_.get(), storage_->config_.items, delta, vertex, + final_commit_timestamp); } void ReplicaStream::AppendDelta(const Delta &delta, const Edge &edge, uint64_t final_commit_timestamp) { replication::Encoder encoder(stream_.GetBuilder()); - EncodeDelta(&encoder, self_->GetStorage()->name_id_mapper_.get(), delta, edge, final_commit_timestamp); + EncodeDelta(&encoder, storage_->name_id_mapper_.get(), delta, edge, final_commit_timestamp); } void ReplicaStream::AppendTransactionEnd(uint64_t final_commit_timestamp) { @@ -314,11 +316,10 @@ void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operati const std::set &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp) { replication::Encoder encoder(stream_.GetBuilder()); - EncodeOperation(&encoder, self_->GetStorage()->name_id_mapper_.get(), operation, label, properties, stats, - property_stats, timestamp); + EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, label, properties, stats, property_stats, + timestamp); } replication::AppendDeltasRes ReplicaStream::Finalize() { return stream_.AwaitResponse(); } -auto ReplicationClient::GetStorageId() const -> std::string { return storage_->id(); } } // namespace memgraph::storage diff --git a/src/storage/v2/replication/replication_client.hpp b/src/storage/v2/replication/replication_client.hpp index 8cd8cb384..3d2c019e9 100644 --- a/src/storage/v2/replication/replication_client.hpp +++ b/src/storage/v2/replication/replication_client.hpp @@ -13,6 +13,8 @@ #include "replication/config.hpp" #include "replication/epoch.hpp" +#include "replication/messages.hpp" +#include "replication/replication_client.hpp" #include "rpc/client.hpp" #include "storage/v2/durability/storage_global_operation.hpp" #include "storage/v2/id_types.hpp" @@ -23,9 +25,12 @@ #include "storage/v2/replication/rpc.hpp" #include "utils/file_locker.hpp" #include "utils/scheduler.hpp" +#include "utils/synchronized.hpp" #include "utils/thread_pool.hpp" #include +#include +#include #include #include #include @@ -37,12 +42,12 @@ struct Delta; struct Vertex; struct Edge; class Storage; -class ReplicationClient; +class ReplicationStorageClient; // Handler used for transferring the current transaction. class ReplicaStream { public: - explicit ReplicaStream(ReplicationClient *self, uint64_t previous_commit_timestamp, uint64_t current_seq_num); + explicit ReplicaStream(Storage *storage, rpc::Client &rpc_client, uint64_t current_seq_num); /// @throw rpc::RpcFailedException void AppendDelta(const Delta &delta, const Vertex &vertex, uint64_t final_commit_timestamp); @@ -61,85 +66,84 @@ class ReplicaStream { /// @throw rpc::RpcFailedException replication::AppendDeltasRes Finalize(); + bool IsDefunct() const { return stream_.IsDefunct(); } + private: - ReplicationClient *self_; + Storage *storage_; rpc::Client::StreamHandler stream_; }; -class ReplicationClient { - friend class CurrentWalHandler; +template +concept InvocableWithStream = std::invocable; + +// TODO Rename to something without the word "client" +class ReplicationStorageClient { + friend class InMemoryCurrentWalHandler; friend class ReplicaStream; + friend struct ::memgraph::replication::ReplicationClient; public: - ReplicationClient(Storage *storage, const memgraph::replication::ReplicationClientConfig &config, - const memgraph::replication::ReplicationEpoch *epoch); + explicit ReplicationStorageClient(::memgraph::replication::ReplicationClient &client); - ReplicationClient(ReplicationClient const &) = delete; - ReplicationClient &operator=(ReplicationClient const &) = delete; - ReplicationClient(ReplicationClient &&) noexcept = delete; - ReplicationClient &operator=(ReplicationClient &&) noexcept = delete; + ReplicationStorageClient(ReplicationStorageClient const &) = delete; + ReplicationStorageClient &operator=(ReplicationStorageClient const &) = delete; + ReplicationStorageClient(ReplicationStorageClient &&) noexcept = delete; + ReplicationStorageClient &operator=(ReplicationStorageClient &&) noexcept = delete; - virtual ~ReplicationClient(); + ~ReplicationStorageClient() = default; - auto Mode() const -> memgraph::replication::ReplicationMode { return mode_; } - auto Name() const -> std::string const & { return name_; } - auto Endpoint() const -> io::network::Endpoint const & { return rpc_client_.Endpoint(); } - auto State() const -> replication::ReplicaState { return replica_state_.load(); } - auto GetTimestampInfo() -> TimestampInfo; + // TODO Remove the client related functions + auto Mode() const -> memgraph::replication::ReplicationMode { return client_.mode_; } + auto Name() const -> std::string const & { return client_.name_; } + auto Endpoint() const -> io::network::Endpoint const & { return client_.rpc_client_.Endpoint(); } - auto GetStorageId() const -> std::string; + auto State() const -> replication::ReplicaState { return replica_state_.WithLock(std::identity()); } + auto GetTimestampInfo(Storage const *storage) -> TimestampInfo; + + void Start(Storage *storage); + void StartTransactionReplication(uint64_t current_wal_seq_num, Storage *storage); - void Start(); - void StartTransactionReplication(uint64_t current_wal_seq_num); // Replication clients can be removed at any point // so to avoid any complexity of checking if the client was removed whenever // we want to send part of transaction and to avoid adding some GC logic this // function will run a callback if, after previously callling // StartTransactionReplication, stream is created. - void IfStreamingTransaction(const std::function &callback); + template + void IfStreamingTransaction(F &&callback) { + // We can only check the state because it guarantees to be only + // valid during a single transaction replication (if the assumption + // that this and other transaction replication functions can only be + // called from a one thread stands) + if (State() != replication::ReplicaState::REPLICATING) { + return; + } + if (replica_stream_->IsDefunct()) return; + try { + callback(*replica_stream_); // failure state what if not streaming (std::nullopt) + } catch (const rpc::RpcFailedException &) { + return replica_state_.WithLock([](auto &state) { state = replication::ReplicaState::MAYBE_BEHIND; }); + LogRpcFailure(); + } + } + // Return whether the transaction could be finalized on the replication client or not. - [[nodiscard]] bool FinalizeTransactionReplication(); + [[nodiscard]] bool FinalizeTransactionReplication(Storage *storage); - protected: - virtual void RecoverReplica(uint64_t replica_commit) = 0; + void TryCheckReplicaStateAsync(Storage *storage); // TODO Move back to private + private: + void RecoverReplica(uint64_t replica_commit, memgraph::storage::Storage *storage); - auto GetStorage() -> Storage * { return storage_; } - auto LastCommitTimestamp() const -> uint64_t; - void InitializeClient(); - void HandleRpcFailure(); - void TryInitializeClientAsync(); - void TryInitializeClientSync(); - void FrequentCheck(); + void CheckReplicaState(Storage *storage); + void LogRpcFailure(); + void TryCheckReplicaStateSync(Storage *storage); + void FrequentCheck(Storage *storage); - std::string name_; - communication::ClientContext rpc_context_; - rpc::Client rpc_client_; - std::chrono::seconds replica_check_frequency_; - - std::optional replica_stream_; - memgraph::replication::ReplicationMode mode_{memgraph::replication::ReplicationMode::SYNC}; - - utils::SpinLock client_lock_; - // This thread pool is used for background tasks so we don't - // block the main storage thread - // We use only 1 thread for 2 reasons: - // - background tasks ALWAYS contain some kind of RPC communication. - // We can't have multiple RPC communication from a same client - // because that's not logically valid (e.g. you cannot send a snapshot - // and WAL at a same time because WAL will arrive earlier and be applied - // before the snapshot which is not correct) - // - the implementation is simplified as we have a total control of what - // this pool is executing. Also, we can simply queue multiple tasks - // and be sure of the execution order. - // Not having mulitple possible threads in the same client allows us - // to ignore concurrency problems inside the client. - utils::ThreadPool thread_pool_{1}; - std::atomic replica_state_{replication::ReplicaState::INVALID}; - - utils::Scheduler replica_checker_; - Storage *storage_; - - memgraph::replication::ReplicationEpoch const *repl_epoch_; + ::memgraph::replication::ReplicationClient &client_; + // TODO Do not store the stream, make is a local variable + std::optional + replica_stream_; // Currently active stream (nullopt if not in use), note: a single stream per rpc client + mutable utils::Synchronized replica_state_{ + replication::ReplicaState::MAYBE_BEHIND}; }; } // namespace memgraph::storage diff --git a/src/storage/v2/replication/replication_storage_state.cpp b/src/storage/v2/replication/replication_storage_state.cpp index 1cd0bec09..a443c7171 100644 --- a/src/storage/v2/replication/replication_storage_state.cpp +++ b/src/storage/v2/replication/replication_storage_state.cpp @@ -16,10 +16,10 @@ namespace memgraph::storage { -void ReplicationStorageState::InitializeTransaction(uint64_t seq_num) { - replication_clients_.WithLock([&](auto &clients) { +void ReplicationStorageState::InitializeTransaction(uint64_t seq_num, Storage *storage) { + replication_clients_.WithLock([=](auto &clients) { for (auto &client : clients) { - client->StartTransactionReplication(seq_num); + client->StartTransactionReplication(seq_num, storage); } }); } @@ -52,12 +52,12 @@ void ReplicationStorageState::AppendOperation(durability::StorageMetadataOperati }); } -bool ReplicationStorageState::FinalizeTransaction(uint64_t timestamp) { +bool ReplicationStorageState::FinalizeTransaction(uint64_t timestamp, Storage *storage) { return replication_clients_.WithLock([=](auto &clients) { bool finalized_on_all_replicas = true; for (ReplicationClientPtr &client : clients) { client->IfStreamingTransaction([&](auto &stream) { stream.AppendTransactionEnd(timestamp); }); - const auto finalized = client->FinalizeTransactionReplication(); + const auto finalized = client->FinalizeTransactionReplication(storage); if (client->Mode() == memgraph::replication::ReplicationMode::SYNC) { finalized_on_all_replicas = finalized && finalized_on_all_replicas; @@ -78,12 +78,12 @@ std::optional ReplicationStorageState::GetReplicaStat }); } -std::vector ReplicationStorageState::ReplicasInfo() const { - return replication_clients_.WithReadLock([](auto const &clients) { +std::vector ReplicationStorageState::ReplicasInfo(const Storage *storage) const { + return replication_clients_.WithReadLock([storage](auto const &clients) { std::vector replica_infos; replica_infos.reserve(clients.size()); - auto const asReplicaInfo = [](ReplicationClientPtr const &client) -> ReplicaInfo { - return {client->Name(), client->Mode(), client->Endpoint(), client->State(), client->GetTimestampInfo()}; + auto const asReplicaInfo = [storage](ReplicationClientPtr const &client) -> ReplicaInfo { + return {client->Name(), client->Mode(), client->Endpoint(), client->State(), client->GetTimestampInfo(storage)}; }; std::transform(clients.begin(), clients.end(), std::back_inserter(replica_infos), asReplicaInfo); return replica_infos; diff --git a/src/storage/v2/replication/replication_storage_state.hpp b/src/storage/v2/replication/replication_storage_state.hpp index afedb3950..e3d6b94a0 100644 --- a/src/storage/v2/replication/replication_storage_state.hpp +++ b/src/storage/v2/replication/replication_storage_state.hpp @@ -12,11 +12,13 @@ #pragma once #include +#include #include "kvstore/kvstore.hpp" #include "storage/v2/delta.hpp" #include "storage/v2/durability/storage_global_operation.hpp" #include "storage/v2/transaction.hpp" +#include "utils/exceptions.hpp" #include "utils/result.hpp" /// REPLICATION /// @@ -33,21 +35,21 @@ namespace memgraph::storage { class Storage; -class ReplicationClient; +class ReplicationStorageClient; struct ReplicationStorageState { // Only MAIN can send - void InitializeTransaction(uint64_t seq_num); + void InitializeTransaction(uint64_t seq_num, Storage *storage); void AppendDelta(const Delta &delta, const Vertex &vertex, uint64_t timestamp); void AppendDelta(const Delta &delta, const Edge &edge, uint64_t timestamp); void AppendOperation(durability::StorageMetadataOperation operation, LabelId label, const std::set &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t final_commit_timestamp); - bool FinalizeTransaction(uint64_t timestamp); + bool FinalizeTransaction(uint64_t timestamp, Storage *storage); // Getters auto GetReplicaState(std::string_view name) const -> std::optional; - auto ReplicasInfo() const -> std::vector; + auto ReplicasInfo(const Storage *storage) const -> std::vector; // History void TrackLatestHistory(); @@ -55,6 +57,19 @@ struct ReplicationStorageState { void Reset(); + template + bool WithClient(std::string_view replica_name, F &&callback) { + return replication_clients_.WithLock([replica_name, cb = std::forward(callback)](auto &clients) { + for (const auto &client : clients) { + if (client->Name() == replica_name) { + cb(client.get()); + return true; + } + } + return false; + }); + } + // Questions: // - storage durability <- databases/*name*/wal and snapshots (where this for epoch_id) // - multi-tenant durability <- databases/.durability (there is a list of all active tenants) @@ -74,7 +89,7 @@ struct ReplicationStorageState { // This way we can initialize client in main thread which means // that we can immediately notify the user if the initialization // failed. - using ReplicationClientPtr = std::unique_ptr; + using ReplicationClientPtr = std::unique_ptr; using ReplicationClientList = utils::Synchronized, utils::RWSpinLock>; ReplicationClientList replication_clients_; diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index 6104ea6c7..86cc02696 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -85,7 +85,7 @@ Storage::Accessor::Accessor(Accessor &&other) noexcept other.commit_timestamp_.reset(); } -StorageMode Storage::GetStorageMode() const { return storage_mode_; } +StorageMode Storage::GetStorageMode() const noexcept { return storage_mode_; } IsolationLevel Storage::GetIsolationLevel() const noexcept { return isolation_level_; } @@ -95,7 +95,7 @@ utils::BasicResult Storage::SetIsolationLevel(I return {}; } -StorageMode Storage::Accessor::GetCreationStorageMode() const { return creation_storage_mode_; } +StorageMode Storage::Accessor::GetCreationStorageMode() const noexcept { return creation_storage_mode_; } std::optional Storage::Accessor::GetTransactionId() const { if (is_transaction_active_) { @@ -104,6 +104,18 @@ std::optional Storage::Accessor::GetTransactionId() const { return {}; } +std::vector Storage::Accessor::ListAllPossiblyPresentVertexLabels() const { + std::vector vertex_labels; + storage_->stored_node_labels_.for_each([&vertex_labels](const auto &label) { vertex_labels.push_back(label); }); + return vertex_labels; +} + +std::vector Storage::Accessor::ListAllPossiblyPresentEdgeTypes() const { + std::vector edge_types; + storage_->stored_edge_types_.for_each([&edge_types](const auto &type) { edge_types.push_back(type); }); + return edge_types; +} + void Storage::Accessor::AdvanceCommand() { transaction_.manyDeltasCache.Clear(); // TODO: Just invalidate the View::OLD cache, NEW should still be fine ++transaction_.command_id; diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 8d8b06cd6..bf1973bcd 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -40,6 +40,7 @@ #include "utils/event_histogram.hpp" #include "utils/resource_lock.hpp" #include "utils/scheduler.hpp" +#include "utils/synchronized_metadata_store.hpp" #include "utils/timer.hpp" #include "utils/uuid.hpp" @@ -109,7 +110,7 @@ struct EdgeInfoForDeletion { class Storage { friend class ReplicationServer; - friend class ReplicationClient; + friend class ReplicationStorageClient; public: Storage(Config config, StorageMode storage_mode); @@ -195,6 +196,9 @@ class Storage { virtual Result CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type) = 0; + virtual std::optional FindEdge(Gid gid, View view, EdgeTypeId edge_type, VertexAccessor *from_vertex, + VertexAccessor *to_vertex) = 0; + virtual Result EdgeSetFrom(EdgeAccessor *edge, VertexAccessor *new_from) = 0; virtual Result EdgeSetTo(EdgeAccessor *edge, VertexAccessor *new_to) = 0; @@ -235,10 +239,14 @@ class Storage { EdgeTypeId NameToEdgeType(std::string_view name) { return storage_->NameToEdgeType(name); } - StorageMode GetCreationStorageMode() const; + StorageMode GetCreationStorageMode() const noexcept; const std::string &id() const { return storage_->id(); } + std::vector ListAllPossiblyPresentVertexLabels() const; + + std::vector ListAllPossiblyPresentEdgeTypes() const; + virtual utils::BasicResult CreateIndex(LabelId label) = 0; virtual utils::BasicResult CreateIndex(LabelId label, PropertyId property) = 0; @@ -301,7 +309,7 @@ class Storage { return EdgeTypeId::FromUint(name_id_mapper_->NameToId(name)); } - StorageMode GetStorageMode() const; + StorageMode GetStorageMode() const noexcept; virtual void FreeMemory(std::unique_lock main_guard) = 0; @@ -355,11 +363,7 @@ class Storage { virtual void PrepareForNewEpoch() = 0; - virtual auto CreateReplicationClient(const memgraph::replication::ReplicationClientConfig &config, - const memgraph::replication::ReplicationEpoch *current_epoch) - -> std::unique_ptr = 0; - - auto ReplicasInfo() const { return repl_storage_state_.ReplicasInfo(); } + auto ReplicasInfo() const { return repl_storage_state_.ReplicasInfo(this); } auto GetReplicaState(std::string_view name) const -> std::optional { return repl_storage_state_.GetReplicaState(name); } @@ -384,7 +388,7 @@ class Storage { Config config_; // Transaction engine - utils::SpinLock engine_lock_; + mutable utils::SpinLock engine_lock_; uint64_t timestamp_{kTimestampInitialId}; uint64_t transaction_id_{kTransactionInitialId}; @@ -394,6 +398,18 @@ class Storage { Indices indices_; Constraints constraints_; + // Datastructures to provide fast retrieval of node-label and + // edge-type related metadata. + // Currently we should not remove any node-labels or edge-types even + // if the set of given types are currently not present in the + // database. This metadata is usually used by client side + // applications that want to be aware of the kind of data that *may* + // be present in the database. + + // TODO(gvolfing): check if this would be faster with flat_maps. + utils::SynchronizedMetaDataStore stored_node_labels_; + utils::SynchronizedMetaDataStore stored_edge_types_; + std::atomic vertex_id_{0}; std::atomic edge_id_{0}; const std::string id_; //!< High-level assigned ID diff --git a/src/storage/v2/storage_mode.cpp b/src/storage/v2/storage_mode.cpp index 73a886d6a..067646854 100644 --- a/src/storage/v2/storage_mode.cpp +++ b/src/storage/v2/storage_mode.cpp @@ -13,6 +13,10 @@ namespace memgraph::storage { +bool IsTransactional(const StorageMode storage_mode) noexcept { + return storage_mode != StorageMode::IN_MEMORY_ANALYTICAL; +} + std::string_view StorageModeToString(memgraph::storage::StorageMode storage_mode) { switch (storage_mode) { case memgraph::storage::StorageMode::IN_MEMORY_ANALYTICAL: diff --git a/src/storage/v2/storage_mode.hpp b/src/storage/v2/storage_mode.hpp index 2ab348c59..c02d3c177 100644 --- a/src/storage/v2/storage_mode.hpp +++ b/src/storage/v2/storage_mode.hpp @@ -18,6 +18,8 @@ namespace memgraph::storage { enum class StorageMode : std::uint8_t { IN_MEMORY_ANALYTICAL, IN_MEMORY_TRANSACTIONAL, ON_DISK_TRANSACTIONAL }; +bool IsTransactional(const StorageMode storage_mode) noexcept; + std::string_view StorageModeToString(memgraph::storage::StorageMode storage_mode); } // namespace memgraph::storage diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index 924c305ad..ff5881563 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -110,6 +110,10 @@ Result VertexAccessor::AddLabel(LabelId label) { CreateAndLinkDelta(transaction_, vertex_, Delta::RemoveLabelTag(), label); vertex_->labels.push_back(label); + if (storage_->config_.items.enable_schema_metadata) { + storage_->stored_node_labels_.try_insert(label); + } + /// TODO: some by pointers, some by reference => not good, make it better storage_->constraints_.unique_constraints_->UpdateOnAddLabel(label, *vertex_, transaction_->start_timestamp); transaction_->constraint_verification_info.AddedLabel(vertex_); diff --git a/src/utils/memory.cpp b/src/utils/memory.cpp index 8bfbf4220..d09f70fc3 100644 --- a/src/utils/memory.cpp +++ b/src/utils/memory.cpp @@ -331,8 +331,7 @@ void PoolResource::DoDeallocate(void *p, size_t bytes, size_t alignment) { return; } // Deallocate a regular block, first check if last_dealloc_pool_ is suitable. - MG_ASSERT(last_dealloc_pool_, "Failed deallocation"); - if (last_dealloc_pool_->GetBlockSize() == block_size) return last_dealloc_pool_->Deallocate(p); + if (last_dealloc_pool_ && last_dealloc_pool_->GetBlockSize() == block_size) return last_dealloc_pool_->Deallocate(p); // Find the pool with equal block_size. impl::Pool pool(block_size, max_blocks_per_chunk_, GetUpstreamResource()); auto it = std::lower_bound(pools_.begin(), pools_.end(), pool, diff --git a/src/utils/synchronized_metadata_store.hpp b/src/utils/synchronized_metadata_store.hpp new file mode 100644 index 000000000..0c0a85c21 --- /dev/null +++ b/src/utils/synchronized_metadata_store.hpp @@ -0,0 +1,65 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include +#include +#include + +#include "utils/rw_lock.hpp" +#include "utils/synchronized.hpp" + +namespace memgraph::utils { + +template +class SynchronizedMetaDataStore { + public: + SynchronizedMetaDataStore() = default; + ~SynchronizedMetaDataStore() = default; + + SynchronizedMetaDataStore(const SynchronizedMetaDataStore &) = delete; + SynchronizedMetaDataStore(SynchronizedMetaDataStore &&) = delete; + SynchronizedMetaDataStore &operator=(const SynchronizedMetaDataStore &) = delete; + SynchronizedMetaDataStore &operator=(SynchronizedMetaDataStore &&) = delete; + + void try_insert(const T &elem) { + { + std::shared_lock read_lock(lock_); + if (element_store_.contains(elem)) { + return; + } + } + { + std::unique_lock write_lock(lock_); + element_store_.insert(elem); + } + } + + void erase(const T &elem) { + std::unique_lock write_lock(lock_); + element_store_.erase(elem); + } + + template + void for_each(const TFunc &func) { + std::unique_lock write_lock(lock_); + for (const auto &elem : element_store_) { + func(elem); + } + } + + private: + std::unordered_set element_store_; + RWLock lock_{RWLock::Priority::READ}; +}; + +} // namespace memgraph::utils diff --git a/tests/drivers/java/v5_8/run.sh b/tests/drivers/java/v5_8/run.sh index 0e85f68df..03400e385 100755 --- a/tests/drivers/java/v5_8/run.sh +++ b/tests/drivers/java/v5_8/run.sh @@ -9,8 +9,8 @@ fi if [ -d "/usr/lib/jvm/java-17-openjdk-amd64" ]; then export JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" fi -if [ -d "/opt/apache-maven-3.9.2" ]; then - export M2_HOME="/opt/apache-maven-3.9.2" +if [ -d "/opt/apache-maven-3.9.3" ]; then + export M2_HOME="/opt/apache-maven-3.9.3" fi export PATH="$JAVA_HOME/bin:$M2_HOME/bin:$PATH" diff --git a/tests/drivers/python/v5_8/transactions.py b/tests/drivers/python/v5_8/transactions.py index 58f05eeb1..d26eef44c 100644 --- a/tests/drivers/python/v5_8/transactions.py +++ b/tests/drivers/python/v5_8/transactions.py @@ -14,7 +14,7 @@ import time -from neo4j import GraphDatabase, basic_auth +from neo4j import GraphDatabase from neo4j.exceptions import ClientError, TransientError @@ -75,11 +75,58 @@ def test_timeout(driver, set_timeout): raise Exception("The query should have timed out, but it didn't!") +def violate_constraint(tx): + tx.run("CREATE (n:Employee:Person {id: '123', alt_id: '100'});").consume() + + +def violate_constraint_on_intermediate_result(tx): + tx.run("CREATE (n:Employee:Person {id: '124', alt_id: '200'});").consume() + tx.run("MATCH (n {alt_id: '200'}) SET n.id = '123';").consume() # two (:Person {id: '123'}) + tx.run("MATCH (n {alt_id: '100'}) SET n.id = '122';").consume() # above violation fixed + + +def clear_db(session): + session.run("DROP CONSTRAINT ON (n:Person) ASSERT n.id IS UNIQUE;") + session.run("DROP CONSTRAINT ON (n:Employee) ASSERT n.id IS UNIQUE;") + session.run("DROP CONSTRAINT ON (n:Employee) ASSERT EXISTS (n.id);") + + session.run("MATCH (n) DETACH DELETE n;") + + with GraphDatabase.driver("bolt://localhost:7687", auth=None, encrypted=False) as driver: + with driver.session() as session: + # Clear the DB + session.run("MATCH (n) DETACH DELETE n;").consume() + + # Add constraints + session.run("CREATE CONSTRAINT ON (n:Person) ASSERT n.id IS UNIQUE;").consume() + session.run("CREATE CONSTRAINT ON (n:Employee) ASSERT n.id IS UNIQUE;").consume() + session.run("CREATE CONSTRAINT ON (n:Employee) ASSERT EXISTS (n.id);").consume() + + # Set the initial graph state + session.execute_write(lambda tx: tx.run("CREATE (n:Employee:Person {id: '123', alt_id: '100'}) RETURN n;")) + + # Run a transaction that violates a constraint + try: + session.execute_write(violate_constraint) + except ClientError: + pass + else: + clear_db(session) + raise Exception("neo4j.exceptions.ClientError should have been thrown!") + + # Run a transaction that violates no constraints even though an intermediate result does + try: + session.execute_write(violate_constraint_on_intermediate_result) + except ClientError: + clear_db(session) + raise Exception("neo4j.exceptions.ClientError should not have been thrown!") + + clear_db(session) def add_person(f, name, name2): with driver.session() as session: - session.write_transaction(f, name, name2) + session.execute_write(f, name, name2) # Wrong query. try: diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt index 28fe94559..fcf7f45b6 100644 --- a/tests/e2e/CMakeLists.txt +++ b/tests/e2e/CMakeLists.txt @@ -48,6 +48,7 @@ add_subdirectory(temporal_types) add_subdirectory(write_procedures) add_subdirectory(configuration) add_subdirectory(magic_functions) +add_subdirectory(metadata_queries) add_subdirectory(module_file_manager) add_subdirectory(monitoring_server) add_subdirectory(lba_procedures) @@ -70,8 +71,11 @@ add_subdirectory(index_hints) add_subdirectory(query_modules) add_subdirectory(constraints) add_subdirectory(inspect_query) +add_subdirectory(filter_info) add_subdirectory(queries) +add_subdirectory(query_modules_storage_modes) add_subdirectory(garbage_collection) +add_subdirectory(query_planning) copy_e2e_python_files(pytest_runner pytest_runner.sh "") copy_e2e_python_files(x x.sh "") diff --git a/tests/e2e/analyze_graph/optimize_indexes.py b/tests/e2e/analyze_graph/optimize_indexes.py index 325993ca1..c0530a827 100644 --- a/tests/e2e/analyze_graph/optimize_indexes.py +++ b/tests/e2e/analyze_graph/optimize_indexes.py @@ -62,7 +62,7 @@ def test_analyze_graph_delete_statistics(delete_query, multi_db): # After deleting statistics, id2 should be chosen because it has less vertices expected_explain_after_delete_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] @@ -96,7 +96,7 @@ def test_analyze_full_graph(analyze_query, multi_db): # Choose id2 before tha analysis because it has less vertices expected_explain_before_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] @@ -117,7 +117,7 @@ def test_analyze_full_graph(analyze_query, multi_db): # After analyzing graph, id1 index should be chosen because it has smaller average group size expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id2}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",), (f" * Once",), ] @@ -152,7 +152,7 @@ def test_cardinality_different_avg_group_size_uniform_dist(multi_db): assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id2}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",), (f" * Once",), ] @@ -183,7 +183,7 @@ def test_cardinality_same_avg_group_size_uniform_dist_diff_vertex_count(multi_db assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] @@ -214,7 +214,7 @@ def test_large_diff_in_num_vertices_v1(multi_db): assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] @@ -245,7 +245,7 @@ def test_large_diff_in_num_vertices_v2(multi_db): assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id2}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",), (f" * Once",), ] @@ -286,7 +286,7 @@ def test_same_avg_group_size_diff_distribution(multi_db): assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] diff --git a/tests/e2e/concurrent_query_modules/test_query_modules/module_test.cpp b/tests/e2e/concurrent_query_modules/test_query_modules/module_test.cpp index 44479d900..b53dda881 100644 --- a/tests/e2e/concurrent_query_modules/test_query_modules/module_test.cpp +++ b/tests/e2e/concurrent_query_modules/test_query_modules/module_test.cpp @@ -16,11 +16,11 @@ #include #include -constexpr char *kProcedureHackerNews = "hacker_news"; -constexpr char *kArgumentHackerNewsVotes = "votes"; -constexpr char *kArgumentHackerNewsItemHourAge = "item_hour_age"; -constexpr char *kArgumentHackerNewsGravity = "gravity"; -constexpr char *kReturnHackerNewsScore = "score"; +constexpr char const *kProcedureHackerNews = "hacker_news"; +constexpr char const *kArgumentHackerNewsVotes = "votes"; +constexpr char const *kArgumentHackerNewsItemHourAge = "item_hour_age"; +constexpr char const *kArgumentHackerNewsGravity = "gravity"; +constexpr char const *kReturnHackerNewsScore = "score"; void HackerNews(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { mgp::MemoryDispatcherGuard guard(memory); diff --git a/tests/e2e/configuration/configuration_check.py b/tests/e2e/configuration/configuration_check.py index 3cdc919f4..8684b24d6 100644 --- a/tests/e2e/configuration/configuration_check.py +++ b/tests/e2e/configuration/configuration_check.py @@ -31,7 +31,6 @@ def test_does_default_config_match(): use the DEFINE_HIDDEN_* macro instead of DEFINE_* to prevent SHOW CONFIG from returning it. """ - assert len(config) == len(default_config.startup_config_dict), define_msg for flag in config: @@ -46,7 +45,6 @@ def test_does_default_config_match(): ] if flag_name in machine_dependent_configurations: continue - # default_value assert default_config.startup_config_dict[flag_name][0] == flag[1] # current_value diff --git a/tests/e2e/configuration/default_config.py b/tests/e2e/configuration/default_config.py index 9a251c15c..e1d42b443 100644 --- a/tests/e2e/configuration/default_config.py +++ b/tests/e2e/configuration/default_config.py @@ -115,6 +115,16 @@ startup_config_dict = { "false", "Controls whether the index creation can be done in a multithreaded fashion.", ), + "storage_parallel_schema_recovery": ( + "false", + "false", + "Controls whether the indices and constraints creation can be done in a multithreaded fashion.", + ), + "storage_enable_schema_metadata": ( + "false", + "false", + "Controls whether metadata should be collected about the resident labels and edge types.", + ), "password_encryption_algorithm": ("bcrypt", "bcrypt", "The password encryption algorithm used for authentication."), "pulsar_service_url": ("", "", "Default URL used while connecting to Pulsar brokers."), "query_execution_timeout_sec": ( diff --git a/tests/e2e/filter_info/CMakeLists.txt b/tests/e2e/filter_info/CMakeLists.txt new file mode 100644 index 000000000..401ec46a6 --- /dev/null +++ b/tests/e2e/filter_info/CMakeLists.txt @@ -0,0 +1,6 @@ +function(copy_filter_info_e2e_python_files FILE_NAME) + copy_e2e_python_files(filter_info ${FILE_NAME}) +endfunction() + +copy_filter_info_e2e_python_files(common.py) +copy_filter_info_e2e_python_files(filter_info.py) diff --git a/tests/e2e/filter_info/common.py b/tests/e2e/filter_info/common.py new file mode 100644 index 000000000..1eb81a94b --- /dev/null +++ b/tests/e2e/filter_info/common.py @@ -0,0 +1,23 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import pytest +from gqlalchemy import Memgraph + + +@pytest.fixture +def memgraph(**kwargs) -> Memgraph: + memgraph = Memgraph() + + yield memgraph + + memgraph.drop_database() + memgraph.drop_indexes() diff --git a/tests/e2e/filter_info/filter_info.py b/tests/e2e/filter_info/filter_info.py new file mode 100644 index 000000000..53bc737ea --- /dev/null +++ b/tests/e2e/filter_info/filter_info.py @@ -0,0 +1,39 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import sys + +import pytest +from common import memgraph + + +def test_label_index_hint(memgraph): + memgraph.execute("CREATE (n:Label1:Label2 {prop: 1});") + memgraph.execute("CREATE INDEX ON :Label1;") + + # TODO: Fix this test since it should only filter on :Label2 and prop + expected_explain = [ + " * Produce {n}", + " * Filter (n :Label1:Label2), {n.prop}", + " * ScanAllByLabel (n :Label1)", + " * Once", + ] + + actual_explain = [ + row["QUERY PLAN"] + for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) WHERE n.prop = 1 return n;") + ] + + assert expected_explain == actual_explain + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/filter_info/workloads.yaml b/tests/e2e/filter_info/workloads.yaml new file mode 100644 index 000000000..69c98f741 --- /dev/null +++ b/tests/e2e/filter_info/workloads.yaml @@ -0,0 +1,13 @@ +filter_info_cluster: &filter_info_cluster + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "filter_info.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Filter info information" + binary: "tests/e2e/pytest_runner.sh" + args: ["filter_info/filter_info.py"] + <<: *filter_info_cluster diff --git a/tests/e2e/index_hints/index_hints.py b/tests/e2e/index_hints/index_hints.py index 85b63a84b..70d3ce6b6 100644 --- a/tests/e2e/index_hints/index_hints.py +++ b/tests/e2e/index_hints/index_hints.py @@ -162,12 +162,13 @@ def test_label_property_index_hint(memgraph): expected_explain_no_hint = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id1}", " * ScanAllByLabelPropertyValue (n :Label {id2})", " * Once", ] expected_explain_with_hint = [ - row.replace("(n :Label {id2})", "(n :Label {id1})") for row in expected_explain_no_hint + row.replace("(n :Label {id2})", "(n :Label {id1})").replace(" * Filter {n.id1}", " * Filter {n.id2}") + for row in expected_explain_no_hint ] explain_no_hint = [ @@ -192,7 +193,7 @@ def test_label_property_index_hint_alternative_orderings(memgraph): expected_explain_with_hint = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id2}", " * ScanAllByLabelPropertyValue (n :Label {id1})", " * Once", ] @@ -221,7 +222,7 @@ def test_multiple_label_property_index_hints(memgraph): expected_explain_with_hint = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id2}", " * ScanAllByLabelPropertyValue (n :Label {id1})", " * Once", ] @@ -251,7 +252,7 @@ def test_multiple_applicable_label_property_index_hints(memgraph): expected_explain_with_hint = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id2}", " * ScanAllByLabelPropertyValue (n :Label {id1})", " * Once", ] @@ -275,12 +276,13 @@ def test_multiple_applicable_label_property_index_hints_alternative_orderings(me expected_explain_with_hint_1 = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id2}", " * ScanAllByLabelPropertyValue (n :Label {id1})", " * Once", ] expected_explain_with_hint_2 = [ - row.replace("(n :Label {id1})", "(n :Label {id2})") for row in expected_explain_with_hint_1 + row.replace("(n :Label {id1})", "(n :Label {id2})").replace(" * Filter {n.id2}", " * Filter {n.id1}") + for row in expected_explain_with_hint_1 ] explain_with_hint_ordering_1a = [ @@ -407,6 +409,7 @@ def test_multiple_match_query(memgraph): memgraph.execute("CREATE INDEX ON :Label2;") memgraph.execute("CREATE INDEX ON :Label3;") + # TODO: Fix this test since it has the filtering info wrong (filtering by label that's already indexed) expected_explain_with_hint = [ " * Produce {n, m}", " * Cartesian {m : n}", @@ -414,7 +417,7 @@ def test_multiple_match_query(memgraph): " | * Filter (n :Label1:Label2), {n.id}", " | * ScanAllByLabel (n :Label1)", " | * Once", - " * Filter (m :Label2:Label3), (n :Label1:Label2), {n.id}", + " * Filter (m :Label2:Label3)", " * ScanAllByLabel (m :Label2)", " * Once", ] diff --git a/tests/e2e/interactive_mg_runner.py b/tests/e2e/interactive_mg_runner.py index 769516961..13aa951db 100644 --- a/tests/e2e/interactive_mg_runner.py +++ b/tests/e2e/interactive_mg_runner.py @@ -80,6 +80,8 @@ ACTIONS = { "quit": lambda _: sys.exit(1), } +CLEANUP_DIRECTORIES_ON_EXIT = False + log = logging.getLogger("memgraph.tests.e2e") @@ -109,10 +111,11 @@ def _start_instance(name, args, log_file, setup_queries, use_ssl, procdir, data_ assert not is_port_in_use( extract_bolt_port(args) ), "If this raises, you are trying to start an instance on a port already used by one already running instance." - mg_instance = MemgraphInstanceRunner(MEMGRAPH_BINARY, use_ssl) - MEMGRAPH_INSTANCES[name] = mg_instance + log_file_path = os.path.join(BUILD_DIR, "logs", log_file) data_directory_path = os.path.join(BUILD_DIR, data_directory) + mg_instance = MemgraphInstanceRunner(MEMGRAPH_BINARY, use_ssl, {data_directory_path}) + MEMGRAPH_INSTANCES[name] = mg_instance binary_args = args + ["--log-file", log_file_path] + ["--data-directory", data_directory_path] if len(procdir) != 0: @@ -122,39 +125,43 @@ def _start_instance(name, args, log_file, setup_queries, use_ssl, procdir, data_ assert mg_instance.is_running(), "An error occured after starting Memgraph instance: application stopped running." -def stop_all(): +def stop_all(keep_directories=True): for mg_instance in MEMGRAPH_INSTANCES.values(): - mg_instance.stop() + mg_instance.stop(keep_directories) MEMGRAPH_INSTANCES.clear() -def stop_instance(context, name): +def stop_instance(context, name, keep_directories=True): for key, _ in context.items(): if key != name: continue - MEMGRAPH_INSTANCES[name].stop() + MEMGRAPH_INSTANCES[name].stop(keep_directories) MEMGRAPH_INSTANCES.pop(name) -def stop(context, name): +def stop(context, name, keep_directories=True): if name != "all": - stop_instance(context, name) + stop_instance(context, name, keep_directories) return stop_all() -def kill(context, name): +def kill(context, name, keep_directories=True): for key in context.keys(): if key != name: continue - MEMGRAPH_INSTANCES[name].kill() + MEMGRAPH_INSTANCES[name].kill(keep_directories) MEMGRAPH_INSTANCES.pop(name) +def cleanup_directories_on_exit(value=True): + CLEANUP_DIRECTORIES_ON_EXIT = value + + @atexit.register def cleanup(): - stop_all() + stop_all(CLEANUP_DIRECTORIES_ON_EXIT) def start_instance(context, name, procdir): @@ -184,8 +191,8 @@ def start_instance(context, name, procdir): assert len(mg_instances) == 1 -def start_all(context, procdir=""): - stop_all() +def start_all(context, procdir="", keep_directories=True): + stop_all(keep_directories) for key, _ in context.items(): start_instance(context, key, procdir) diff --git a/tests/e2e/memgraph.py b/tests/e2e/memgraph.py index a65bed2ed..d5a62a388 100755 --- a/tests/e2e/memgraph.py +++ b/tests/e2e/memgraph.py @@ -11,6 +11,7 @@ import copy import os +import shutil import subprocess import sys import time @@ -56,13 +57,14 @@ def replace_paths(path): class MemgraphInstanceRunner: - def __init__(self, binary_path=MEMGRAPH_BINARY, use_ssl=False): + def __init__(self, binary_path=MEMGRAPH_BINARY, use_ssl=False, delete_on_stop=None): self.host = "127.0.0.1" self.bolt_port = None self.binary_path = binary_path self.args = None self.proc_mg = None self.ssl = use_ssl + self.delete_on_stop = delete_on_stop def execute_setup_queries(self, setup_queries): if setup_queries is None: @@ -128,7 +130,7 @@ class MemgraphInstanceRunner: return False return True - def stop(self): + def stop(self, keep_directories=False): if not self.is_running(): return @@ -140,9 +142,16 @@ class MemgraphInstanceRunner: time.sleep(1) - def kill(self): + if not keep_directories: + for folder in self.delete_on_stop or {}: + shutil.rmtree(folder) + + def kill(self, keep_directories=False): if not self.is_running(): return self.proc_mg.kill() code = self.proc_mg.wait() + if not keep_directories: + for folder in self.delete_on_stop or {}: + shutil.rmtree(folder) assert code == -9, "The killed Memgraph process exited with non-nine!" diff --git a/tests/e2e/metadata_queries/CMakeLists.txt b/tests/e2e/metadata_queries/CMakeLists.txt new file mode 100644 index 000000000..9a91900cd --- /dev/null +++ b/tests/e2e/metadata_queries/CMakeLists.txt @@ -0,0 +1,7 @@ +function(copy_metadata_queries_e2e_python_files FILE_NAME) + copy_e2e_python_files(metadata_queries ${FILE_NAME}) +endfunction() + +copy_metadata_queries_e2e_python_files(common.py) +copy_metadata_queries_e2e_python_files(show_node_labels_info.py) +copy_metadata_queries_e2e_python_files(show_edge_types_info.py) diff --git a/tests/e2e/metadata_queries/common.py b/tests/e2e/metadata_queries/common.py new file mode 100644 index 000000000..f36c5ee16 --- /dev/null +++ b/tests/e2e/metadata_queries/common.py @@ -0,0 +1,34 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import typing + +import mgclient +import pytest + + +@pytest.fixture(scope="module") +def cursor(**kwargs) -> mgclient.Connection: + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + return connection.cursor() + + +def execute_and_fetch_all(cursor: mgclient.Cursor, query: str, params: dict = dict()) -> typing.List[tuple]: + cursor.execute(query, params) + return cursor.fetchall() + + +def are_results_equal(result1, result2): + if len(result1) != len(result2): + return False + + return sorted(result1) == sorted(result2) diff --git a/tests/e2e/metadata_queries/show_edge_types_info.py b/tests/e2e/metadata_queries/show_edge_types_info.py new file mode 100644 index 000000000..f20f0beab --- /dev/null +++ b/tests/e2e/metadata_queries/show_edge_types_info.py @@ -0,0 +1,80 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import sys + +import pytest +from common import are_results_equal, cursor, execute_and_fetch_all + + +# Helper functions +def create_nodes(cursor): + execute_and_fetch_all( + cursor, "CREATE (charlie:Person:Actor {name: 'Charlie Sheen'}), (oliver:Person:Director {name: 'Oliver Stone'})" + ) + + +def create_edges(cursor): + execute_and_fetch_all( + cursor, + "MATCH (charlie:Person {name: 'Charlie Sheen'}), (oliver:Person {name: 'Oliver Stone'}) CREATE (charlie)-[:ACTED_IN {role: 'Bud Fox'}]->(wallStreet:Movie {title: 'Wall Street'})<-[:DIRECTED]-(oliver)", + ) + + +def edge_types_info(cursor): + return execute_and_fetch_all(cursor, "SHOW EDGE_TYPES INFO") + + +def default_expected_result(cursor): + return [("DIRECTED",), ("ACTED_IN",)] + + +# Tests +def test_return_empty(cursor): + create_nodes(cursor) + + edge_types = edge_types_info(cursor) + expected = [] + assert are_results_equal(expected, edge_types) + + +def test_return_edge_types_simple(cursor): + create_nodes(cursor) + create_edges(cursor) + + edge_types = edge_types_info(cursor) + expected = default_expected_result(cursor) + assert are_results_equal(expected, edge_types) + + +def test_return_edge_types_repeating_identical_edges(cursor): + create_nodes(cursor) + + for _ in range(100): + create_edges(cursor) + + edge_types = edge_types_info(cursor) + expected = default_expected_result(cursor) + assert are_results_equal(expected, edge_types) + + +def test_return_edge_types_obtainable_after_edge_deletion(cursor): + create_nodes(cursor) + create_edges(cursor) + execute_and_fetch_all(cursor, "MATCH(n) DETACH DELETE n") + + edge_types = edge_types_info(cursor) + expected = default_expected_result(cursor) + assert are_results_equal(expected, edge_types) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/metadata_queries/show_node_labels_info.py b/tests/e2e/metadata_queries/show_node_labels_info.py new file mode 100644 index 000000000..e98ef3e92 --- /dev/null +++ b/tests/e2e/metadata_queries/show_node_labels_info.py @@ -0,0 +1,67 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import sys + +import pytest +from common import are_results_equal, cursor, execute_and_fetch_all + + +# Helper functions +def create_nodes(cursor): + execute_and_fetch_all( + cursor, "CREATE (charlie:Person:Actor {name: 'Charlie Sheen'}), (oliver:Person:Director {name: 'Oliver Stone'})" + ) + + +def node_labels_info(cursor): + return execute_and_fetch_all(cursor, "SHOW NODE_LABELS INFO") + + +def default_expected_result(cursor): + return [("Person",), ("Actor",), ("Director",)] + + +# Tests +def test_return_empty(cursor): + node_labels = node_labels_info(cursor) + expected = [] + assert are_results_equal(expected, node_labels) + + +def test_return_node_labels_simple(cursor): + create_nodes(cursor) + + node_labels = node_labels_info(cursor) + expected = default_expected_result(cursor) + assert are_results_equal(expected, node_labels) + + +def test_return_node_labels_repeating_identical_labels(cursor): + for _ in range(100): + create_nodes(cursor) + + node_labels = node_labels_info(cursor) + expected = default_expected_result(cursor) + assert are_results_equal(expected, node_labels) + + +def test_return_node_labels_obtainable_after_vertex_deletion(cursor): + create_nodes(cursor) + execute_and_fetch_all(cursor, "MATCH(n) DELETE n") + + node_labels = node_labels_info(cursor) + expected = default_expected_result(cursor) + assert are_results_equal(expected, node_labels) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/metadata_queries/workloads.yaml b/tests/e2e/metadata_queries/workloads.yaml new file mode 100644 index 000000000..d4b213ae5 --- /dev/null +++ b/tests/e2e/metadata_queries/workloads.yaml @@ -0,0 +1,18 @@ +metadata_queries: &metadata_queries + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE", "--also-log-to-stderr", "--storage-enable-schema-metadata=TRUE"] + log_file: "metadata-queries.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Show edge types info" + binary: "tests/e2e/pytest_runner.sh" + args: ["metadata_queries/show_edge_types_info.py"] + <<: *metadata_queries + + - name: "Show node labels info" + binary: "tests/e2e/pytest_runner.sh" + args: ["metadata_queries/show_node_labels_info.py"] + <<: *metadata_queries diff --git a/tests/e2e/query_modules/schema_test.py b/tests/e2e/query_modules/schema_test.py index 515514a74..fbb376a22 100644 --- a/tests/e2e/query_modules/schema_test.py +++ b/tests/e2e/query_modules/schema_test.py @@ -431,7 +431,7 @@ def test_node_type_properties1(): f"CALL libschema.node_type_properties() YIELD nodeType, nodeLabels, propertyName, propertyTypes , mandatory RETURN nodeType, nodeLabels, propertyName, propertyTypes , mandatory ORDER BY propertyName, nodeLabels[0];", )[0] ) - assert (result) == [":`Activity`", ["Activity"], "location", ["String"], True] + assert (result) == [":`Activity`", ["Activity"], "location", "String", False] result = list( execute_and_fetch_all( @@ -439,7 +439,7 @@ def test_node_type_properties1(): f"CALL libschema.node_type_properties() YIELD nodeType, nodeLabels, propertyName, propertyTypes , mandatory RETURN nodeType, nodeLabels, propertyName, propertyTypes , mandatory ORDER BY propertyName, nodeLabels[0];", )[1] ) - assert (result) == [":`Activity`", ["Activity"], "name", ["String"], True] + assert (result) == [":`Activity`", ["Activity"], "name", "String", False] result = list( execute_and_fetch_all( @@ -447,7 +447,7 @@ def test_node_type_properties1(): f"CALL libschema.node_type_properties() YIELD nodeType, nodeLabels, propertyName, propertyTypes , mandatory RETURN nodeType, nodeLabels, propertyName, propertyTypes , mandatory ORDER BY propertyName, nodeLabels[0];", )[2] ) - assert (result) == [":`Dog`", ["Dog"], "name", ["String"], True] + assert (result) == [":`Dog`", ["Dog"], "name", "String", False] result = list( execute_and_fetch_all( @@ -455,7 +455,81 @@ def test_node_type_properties1(): f"CALL libschema.node_type_properties() YIELD nodeType, nodeLabels, propertyName, propertyTypes , mandatory RETURN nodeType, nodeLabels, propertyName, propertyTypes , mandatory ORDER BY propertyName, nodeLabels[0];", )[3] ) - assert (result) == [":`Dog`", ["Dog"], "owner", ["String"], True] + assert (result) == [":`Dog`", ["Dog"], "owner", "String", False] + + +def test_node_type_properties2(): + cursor = connect().cursor() + execute_and_fetch_all( + cursor, + """ + CREATE (d:MyNode) + CREATE (n:MyNode) + """, + ) + result = execute_and_fetch_all( + cursor, + f"CALL libschema.node_type_properties() YIELD nodeType, nodeLabels, propertyName, propertyTypes , mandatory RETURN nodeType, nodeLabels, propertyName, propertyTypes , mandatory ORDER BY propertyName, nodeLabels[0];", + ) + assert (list(result[0])) == [":`MyNode`", ["MyNode"], "", "", False] + assert (result.__len__()) == 1 + + +def test_node_type_properties3(): + cursor = connect().cursor() + execute_and_fetch_all( + cursor, + """ + CREATE (d:Dog {name: 'Rex', owner: 'Carl'}) + CREATE (n:Dog) + """, + ) + result = execute_and_fetch_all( + cursor, + f"CALL libschema.node_type_properties() YIELD nodeType, nodeLabels, propertyName, propertyTypes , mandatory RETURN nodeType, nodeLabels, propertyName, propertyTypes , mandatory ORDER BY propertyName, nodeLabels[0];", + ) + + assert (list(result[0])) == [":`Dog`", ["Dog"], "name", "String", False] + assert (list(result[1])) == [":`Dog`", ["Dog"], "owner", "String", False] + assert (result.__len__()) == 2 + + +def test_node_type_properties4(): + cursor = connect().cursor() + execute_and_fetch_all( + cursor, + """ + CREATE (n:Label1:Label2 {property1: 'value1', property2: 'value2'}) + CREATE (m:Label2:Label1 {property3: 'value3'}) + """, + ) + result = list( + execute_and_fetch_all( + cursor, + f"CALL libschema.node_type_properties() YIELD nodeType, nodeLabels, propertyName, propertyTypes , mandatory RETURN nodeType, nodeLabels, propertyName, propertyTypes , mandatory ORDER BY propertyName, nodeLabels[0];", + ) + ) + assert (list(result[0])) == [":`Label1`:`Label2`", ["Label1", "Label2"], "property1", "String", False] + assert (list(result[1])) == [":`Label1`:`Label2`", ["Label1", "Label2"], "property2", "String", False] + assert (list(result[2])) == [":`Label1`:`Label2`", ["Label1", "Label2"], "property3", "String", False] + assert (result.__len__()) == 3 + + +def test_node_type_properties5(): + cursor = connect().cursor() + execute_and_fetch_all( + cursor, + """ + CREATE (d:Dog {name: 'Rex'}) + """, + ) + result = execute_and_fetch_all( + cursor, + f"CALL libschema.node_type_properties() YIELD nodeType, nodeLabels, propertyName, propertyTypes , mandatory RETURN nodeType, nodeLabels, propertyName, propertyTypes , mandatory ORDER BY propertyName, nodeLabels[0];", + ) + + assert (list(result[0])) == [":`Dog`", ["Dog"], "name", "String", True] + assert (result.__len__()) == 1 def test_rel_type_properties1(): @@ -473,5 +547,38 @@ def test_rel_type_properties1(): assert (result) == [":`LOVES`", "", "", False] +def test_rel_type_properties2(): + cursor = connect().cursor() + execute_and_fetch_all( + cursor, + """ + CREATE (d:Dog {name: 'Rex', owner: 'Carl'})-[l:LOVES]->(a:Activity {name: 'Running', location: 'Zadar'}) + CREATE (n:Dog {name: 'Simba', owner: 'Lucy'})-[j:LOVES {duration: 30}]->(b:Activity {name: 'Running', location: 'Zadar'}) + """, + ) + result = execute_and_fetch_all( + cursor, + f"CALL libschema.rel_type_properties() YIELD relType,propertyName, propertyTypes , mandatory RETURN relType, propertyName, propertyTypes , mandatory;", + ) + assert (list(result[0])) == [":`LOVES`", "duration", "Int", False] + assert (result.__len__()) == 1 + + +def test_rel_type_properties3(): + cursor = connect().cursor() + execute_and_fetch_all( + cursor, + """ + CREATE (n:Dog {name: 'Simba', owner: 'Lucy'})-[j:LOVES {duration: 30}]->(b:Activity {name: 'Running', location: 'Zadar'}) + """, + ) + result = execute_and_fetch_all( + cursor, + f"CALL libschema.rel_type_properties() YIELD relType,propertyName, propertyTypes , mandatory RETURN relType, propertyName, propertyTypes , mandatory;", + ) + assert (list(result[0])) == [":`LOVES`", "duration", "Int", True] + assert (result.__len__()) == 1 + + if __name__ == "__main__": sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/query_modules_storage_modes/CMakeLists.txt b/tests/e2e/query_modules_storage_modes/CMakeLists.txt new file mode 100644 index 000000000..5bd0ac436 --- /dev/null +++ b/tests/e2e/query_modules_storage_modes/CMakeLists.txt @@ -0,0 +1,8 @@ +function(copy_qm_storage_modes_e2e_python_files FILE_NAME) + copy_e2e_python_files(query_modules_storage_modes ${FILE_NAME}) +endfunction() + +copy_qm_storage_modes_e2e_python_files(common.py) +copy_qm_storage_modes_e2e_python_files(test_query_modules_storage_modes.py) + +add_subdirectory(query_modules) diff --git a/tests/e2e/query_modules_storage_modes/common.py b/tests/e2e/query_modules_storage_modes/common.py new file mode 100644 index 000000000..19abde158 --- /dev/null +++ b/tests/e2e/query_modules_storage_modes/common.py @@ -0,0 +1,37 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import typing + +import mgclient +import pytest + + +@pytest.fixture(scope="function") +def cursor(**kwargs) -> mgclient.Connection: + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + cursor = connection.cursor() + + cursor.execute("MATCH (n) DETACH DELETE n;") + cursor.execute("CREATE (m:Component {id: 'A7422'}), (n:Component {id: '7X8X0'});") + cursor.execute("MATCH (m:Component {id: 'A7422'}) MATCH (n:Component {id: '7X8X0'}) CREATE (m)-[:PART_OF]->(n);") + cursor.execute("MATCH (m:Component {id: 'A7422'}) MATCH (n:Component {id: '7X8X0'}) CREATE (n)-[:DEPENDS_ON]->(m);") + + yield cursor + + cursor.execute("MATCH (n) DETACH DELETE n;") + + +def connect(**kwargs): + connection = mgclient.connect(host="localhost", port=7687, **kwargs) + connection.autocommit = True + return connection.cursor() diff --git a/tests/e2e/query_modules_storage_modes/query_modules/CMakeLists.txt b/tests/e2e/query_modules_storage_modes/query_modules/CMakeLists.txt new file mode 100644 index 000000000..44df43702 --- /dev/null +++ b/tests/e2e/query_modules_storage_modes/query_modules/CMakeLists.txt @@ -0,0 +1,4 @@ +copy_qm_storage_modes_e2e_python_files(python_api.py) + +add_query_module(c_api c_api.cpp) +add_query_module(cpp_api cpp_api.cpp) diff --git a/tests/e2e/query_modules_storage_modes/query_modules/c_api.cpp b/tests/e2e/query_modules_storage_modes/query_modules/c_api.cpp new file mode 100644 index 000000000..85663a829 --- /dev/null +++ b/tests/e2e/query_modules_storage_modes/query_modules/c_api.cpp @@ -0,0 +1,70 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include +#include + +#include "_mgp.hpp" +#include "mg_exceptions.hpp" +#include "mg_procedure.h" + +constexpr std::string_view kFunctionPassRelationship = "pass_relationship"; +constexpr std::string_view kPassRelationshipArg = "relationship"; + +constexpr std::string_view kProcedurePassNodeWithId = "pass_node_with_id"; +constexpr std::string_view kPassNodeWithIdArg = "node"; +constexpr std::string_view kPassNodeWithIdFieldNode = "node"; +constexpr std::string_view kPassNodeWithIdFieldId = "id"; + +// While the query procedure/function sleeps for this amount of time, a parallel transaction will erase a graph element +// (node or relationship) contained in the return value. Any operation in the parallel transaction should take far less +// time than this value. +const int64_t kSleep = 1; + +void PassRelationship(mgp_list *args, mgp_func_context *ctx, mgp_func_result *res, mgp_memory *memory) { + auto *relationship = mgp::list_at(args, 0); + + std::this_thread::sleep_for(std::chrono::seconds(kSleep)); + + mgp::func_result_set_value(res, relationship, memory); +} + +void PassNodeWithId(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + auto *node = mgp::value_get_vertex(mgp::list_at(args, 0)); + auto node_id = mgp::vertex_get_id(node).as_int; + + std::this_thread::sleep_for(std::chrono::seconds(kSleep)); + + auto *result_record = mgp::result_new_record(result); + mgp::result_record_insert(result_record, kPassNodeWithIdFieldNode.data(), mgp::value_make_vertex(node)); + mgp::result_record_insert(result_record, kPassNodeWithIdFieldId.data(), mgp::value_make_int(node_id, memory)); +} + +extern "C" int mgp_init_module(struct mgp_module *query_module, struct mgp_memory *memory) { + try { + { + auto *func = mgp::module_add_function(query_module, kFunctionPassRelationship.data(), PassRelationship); + mgp::func_add_arg(func, kPassRelationshipArg.data(), mgp::type_relationship()); + } + { + auto *proc = mgp::module_add_read_procedure(query_module, kProcedurePassNodeWithId.data(), PassNodeWithId); + mgp::proc_add_arg(proc, kPassNodeWithIdArg.data(), mgp::type_node()); + mgp::proc_add_result(proc, kPassNodeWithIdFieldNode.data(), mgp::type_node()); + mgp::proc_add_result(proc, kPassNodeWithIdFieldId.data(), mgp::type_int()); + } + } catch (const std::exception &e) { + return 1; + } + + return 0; +} + +extern "C" int mgp_shutdown_module() { return 0; } diff --git a/tests/e2e/query_modules_storage_modes/query_modules/cpp_api.cpp b/tests/e2e/query_modules_storage_modes/query_modules/cpp_api.cpp new file mode 100644 index 000000000..a74dce832 --- /dev/null +++ b/tests/e2e/query_modules_storage_modes/query_modules/cpp_api.cpp @@ -0,0 +1,86 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include +#include + +#include + +constexpr std::string_view kFunctionPassRelationship = "pass_relationship"; +constexpr std::string_view kPassRelationshipArg = "relationship"; + +constexpr std::string_view kProcedurePassNodeWithId = "pass_node_with_id"; +constexpr std::string_view kPassNodeWithIdArg = "node"; +constexpr std::string_view kPassNodeWithIdFieldNode = "node"; +constexpr std::string_view kPassNodeWithIdFieldId = "id"; + +// While the query procedure/function sleeps for this amount of time, a parallel transaction will erase a graph element +// (node or relationship) contained in the return value. Any operation in the parallel transaction should take far less +// time than this value. +const int64_t kSleep = 1; + +void PassRelationship(mgp_list *args, mgp_func_context *ctx, mgp_func_result *res, mgp_memory *memory) { + try { + mgp::MemoryDispatcherGuard guard{memory}; + const auto arguments = mgp::List(args); + auto result = mgp::Result(res); + + const auto relationship = arguments[0].ValueRelationship(); + + std::this_thread::sleep_for(std::chrono::seconds(kSleep)); + + result.SetValue(relationship); + } catch (const std::exception &e) { + mgp::func_result_set_error_msg(res, e.what(), memory); + return; + } +} + +void PassNodeWithId(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) { + try { + mgp::MemoryDispatcherGuard guard(memory); + const auto arguments = mgp::List(args); + const auto record_factory = mgp::RecordFactory(result); + + const auto node = arguments[0].ValueNode(); + const auto node_id = node.Id().AsInt(); + + std::this_thread::sleep_for(std::chrono::seconds(kSleep)); + + auto record = record_factory.NewRecord(); + record.Insert(kPassNodeWithIdFieldNode.data(), node); + record.Insert(kPassNodeWithIdFieldId.data(), node_id); + } catch (const std::exception &e) { + mgp::result_set_error_msg(result, e.what()); + return; + } +} + +extern "C" int mgp_init_module(struct mgp_module *query_module, struct mgp_memory *memory) { + try { + mgp::MemoryDispatcherGuard guard(memory); + + mgp::AddFunction(PassRelationship, kFunctionPassRelationship, + {mgp::Parameter(kPassRelationshipArg, mgp::Type::Relationship)}, query_module, memory); + + mgp::AddProcedure( + PassNodeWithId, kProcedurePassNodeWithId, mgp::ProcedureType::Read, + {mgp::Parameter(kPassNodeWithIdArg, mgp::Type::Node)}, + {mgp::Return(kPassNodeWithIdFieldNode, mgp::Type::Node), mgp::Return(kPassNodeWithIdFieldId, mgp::Type::Int)}, + query_module, memory); + } catch (const std::exception &e) { + return 1; + } + + return 0; +} + +extern "C" int mgp_shutdown_module() { return 0; } diff --git a/tests/e2e/query_modules_storage_modes/query_modules/python_api.py b/tests/e2e/query_modules_storage_modes/query_modules/python_api.py new file mode 100644 index 000000000..90db4c97d --- /dev/null +++ b/tests/e2e/query_modules_storage_modes/query_modules/python_api.py @@ -0,0 +1,55 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +from time import sleep + +import mgp + +# While the query procedure/function sleeps for this amount of time, a parallel transaction will erase a graph element +# (node or relationship) contained in the return value. Any operation in the parallel transaction should take far less +# time than this value. +SLEEP = 1 + + +@mgp.read_proc +def pass_node_with_id(ctx: mgp.ProcCtx, node: mgp.Vertex) -> mgp.Record(node=mgp.Vertex, id=int): + sleep(SLEEP) + return mgp.Record(node=node, id=node.id) + + +@mgp.function +def pass_node(ctx: mgp.FuncCtx, node: mgp.Vertex): + sleep(SLEEP) + return node + + +@mgp.function +def pass_relationship(ctx: mgp.FuncCtx, relationship: mgp.Edge): + sleep(SLEEP) + return relationship + + +@mgp.function +def pass_path(ctx: mgp.FuncCtx, path: mgp.Path): + sleep(SLEEP) + return path + + +@mgp.function +def pass_list(ctx: mgp.FuncCtx, list_: mgp.List[mgp.Any]): + sleep(SLEEP) + return list_ + + +@mgp.function +def pass_map(ctx: mgp.FuncCtx, map_: mgp.Map): + sleep(SLEEP) + return map_ diff --git a/tests/e2e/query_modules_storage_modes/test_query_modules_storage_modes.py b/tests/e2e/query_modules_storage_modes/test_query_modules_storage_modes.py new file mode 100644 index 000000000..2c6eeac20 --- /dev/null +++ b/tests/e2e/query_modules_storage_modes/test_query_modules_storage_modes.py @@ -0,0 +1,283 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +# isort: off +from multiprocessing import Process +import sys +import pytest + +from common import cursor, connect + +import time + +SWITCH_TO_ANALYTICAL = "STORAGE MODE IN_MEMORY_ANALYTICAL;" + + +def modify_graph(query): + subprocess_cursor = connect() + + time.sleep(0.5) # Time for the parallel transaction to call a query procedure + + subprocess_cursor.execute(query) + + +@pytest.mark.parametrize("api", ["c", "cpp", "python"]) +def test_function_delete_result(cursor, api): + cursor.execute(SWITCH_TO_ANALYTICAL) + + deleter = Process( + target=modify_graph, + args=("MATCH (m:Component {id: 'A7422'})-[e:PART_OF]->(n:Component {id: '7X8X0'}) DELETE e;",), + ) + deleter.start() + + cursor.execute(f"MATCH (m)-[e]->(n) RETURN {api}_api.pass_relationship(e);") + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 1 and result[0][0].type == "DEPENDS_ON" + + +@pytest.mark.parametrize("api", ["c", "cpp", "python"]) +def test_function_delete_only_result(cursor, api): + cursor.execute(SWITCH_TO_ANALYTICAL) + + cursor.execute("MATCH (m:Component {id: '7X8X0'})-[e:DEPENDS_ON]->(n:Component {id: 'A7422'}) DELETE e;") + + deleter = Process( + target=modify_graph, + args=("MATCH (m:Component {id: 'A7422'})-[e:PART_OF]->(n:Component {id: '7X8X0'}) DELETE e;",), + ) + deleter.start() + + cursor.execute(f"MATCH (m)-[e]->(n) RETURN {api}_api.pass_relationship(e);") + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 1 and result[0][0] is None + + +@pytest.mark.parametrize("api", ["c", "cpp", "python"]) +def test_procedure_delete_result(cursor, api): + cursor.execute(SWITCH_TO_ANALYTICAL) + + deleter = Process( + target=modify_graph, + args=("MATCH (n {id: 'A7422'}) DETACH DELETE n;",), + ) + deleter.start() + + cursor.execute( + f"""MATCH (n) + CALL {api}_api.pass_node_with_id(n) + YIELD node, id + RETURN node, id;""" + ) + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 2 and result[0][0].properties["id"] == "7X8X0" + + +@pytest.mark.parametrize("api", ["c", "cpp", "python"]) +def test_procedure_delete_only_result(cursor, api): + cursor.execute(SWITCH_TO_ANALYTICAL) + + cursor.execute("MATCH (n {id: '7X8X0'}) DETACH DELETE n;") + + deleter = Process( + target=modify_graph, + args=("MATCH (n {id: 'A7422'}) DETACH DELETE n;",), + ) + deleter.start() + + cursor.execute( + f"""MATCH (n) + CALL {api}_api.pass_node_with_id(n) + YIELD node, id + RETURN node, id;""" + ) + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 0 + + +def test_deleted_node(cursor): + cursor.execute(SWITCH_TO_ANALYTICAL) + + deleter = Process(target=modify_graph, args=("MATCH (n:Component {id: 'A7422'}) DETACH DELETE n;",)) + deleter.start() + + cursor.execute( + """MATCH (n: Component {id: 'A7422'}) + RETURN python_api.pass_node(n);""" + ) + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 1 and result[0][0] is None + + +def test_deleted_relationship(cursor): + cursor.execute(SWITCH_TO_ANALYTICAL) + + deleter = Process( + target=modify_graph, + args=("MATCH (:Component {id: 'A7422'})-[e:PART_OF]->(:Component {id: '7X8X0'}) DELETE e;",), + ) + deleter.start() + + cursor.execute( + """MATCH (:Component {id: 'A7422'})-[e:PART_OF]->(:Component {id: '7X8X0'}) + RETURN python_api.pass_relationship(e);""" + ) + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 1 and result[0][0] is None + + +def test_deleted_node_in_path(cursor): + cursor.execute(SWITCH_TO_ANALYTICAL) + + deleter = Process(target=modify_graph, args=("MATCH (n:Component {id: 'A7422'}) DETACH DELETE n;",)) + deleter.start() + + cursor.execute( + """MATCH path=(n {id: 'A7422'})-[e]->(m) + RETURN python_api.pass_path(path);""" + ) + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 1 and result[0][0] is None + + +def test_deleted_relationship_in_path(cursor): + cursor.execute(SWITCH_TO_ANALYTICAL) + + deleter = Process( + target=modify_graph, + args=("MATCH (:Component {id: 'A7422'})-[e:PART_OF]->(:Component {id: '7X8X0'}) DELETE e;",), + ) + deleter.start() + + cursor.execute( + """MATCH path=(n {id: 'A7422'})-[e]->(m) + RETURN python_api.pass_path(path);""" + ) + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 1 and result[0][0] is None + + +def test_deleted_value_in_list(cursor): + cursor.execute(SWITCH_TO_ANALYTICAL) + + deleter = Process( + target=modify_graph, + args=("MATCH (:Component {id: 'A7422'})-[e:PART_OF]->(:Component {id: '7X8X0'}) DELETE e;",), + ) + deleter.start() + + cursor.execute( + """MATCH (n)-[e]->() + WITH collect(n) + collect(e) as list + RETURN python_api.pass_list(list);""" + ) + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 1 and result[0][0] is None + + +def test_deleted_value_in_map(cursor): + cursor.execute(SWITCH_TO_ANALYTICAL) + + deleter = Process( + target=modify_graph, + args=("MATCH (:Component {id: 'A7422'})-[e:PART_OF]->(:Component {id: '7X8X0'}) DELETE e;",), + ) + deleter.start() + + cursor.execute( + """MATCH (n {id: 'A7422'})-[e]->() + WITH {node: n, relationship: e} AS map + RETURN python_api.pass_map(map);""" + ) + + deleter.join() + + result = cursor.fetchall() + + assert len(result) == 1 and len(result[0]) == 1 and result[0][0] is None + + +@pytest.mark.parametrize("storage_mode", ["IN_MEMORY_TRANSACTIONAL", "IN_MEMORY_ANALYTICAL"]) +def test_function_none_deleted(storage_mode): + cursor = connect() + + cursor.execute(f"STORAGE MODE {storage_mode};") + cursor.execute("CREATE (m:Component {id: 'A7422'}), (n:Component {id: '7X8X0'});") + + cursor.execute( + """MATCH (n) + RETURN python_api.pass_node(n);""" + ) + + result = cursor.fetchall() + cursor.execute("MATCH (n) DETACH DELETE n;") + + assert len(result) == 2 + + +@pytest.mark.parametrize("storage_mode", ["IN_MEMORY_TRANSACTIONAL", "IN_MEMORY_ANALYTICAL"]) +def test_procedure_none_deleted(storage_mode): + cursor = connect() + + cursor.execute(f"STORAGE MODE {storage_mode};") + cursor.execute("CREATE (m:Component {id: 'A7422'}), (n:Component {id: '7X8X0'});") + + cursor.execute( + """MATCH (n) + CALL python_api.pass_node_with_id(n) + YIELD node, id + RETURN node, id;""" + ) + + result = cursor.fetchall() + cursor.execute("MATCH (n) DETACH DELETE n;") + + assert len(result) == 2 + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/query_modules_storage_modes/workloads.yaml b/tests/e2e/query_modules_storage_modes/workloads.yaml new file mode 100644 index 000000000..d8caee2ca --- /dev/null +++ b/tests/e2e/query_modules_storage_modes/workloads.yaml @@ -0,0 +1,14 @@ +query_modules_storage_modes_cluster: &query_modules_storage_modes_cluster + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "query_modules_storage_modes.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Test query module API behavior in Memgraph storage modes" + binary: "tests/e2e/pytest_runner.sh" + proc: "tests/e2e/query_modules_storage_modes/query_modules/" + args: ["query_modules_storage_modes/test_query_modules_storage_modes.py"] + <<: *query_modules_storage_modes_cluster diff --git a/tests/e2e/query_planning/CMakeLists.txt b/tests/e2e/query_planning/CMakeLists.txt new file mode 100644 index 000000000..9c6d39bf9 --- /dev/null +++ b/tests/e2e/query_planning/CMakeLists.txt @@ -0,0 +1,6 @@ +function(copy_query_planning_e2e_python_files FILE_NAME) + copy_e2e_python_files(query_planning ${FILE_NAME}) +endfunction() + +copy_query_planning_e2e_python_files(common.py) +copy_query_planning_e2e_python_files(query_planning_cartesian.py) diff --git a/tests/e2e/query_planning/common.py b/tests/e2e/query_planning/common.py new file mode 100644 index 000000000..6ad52539b --- /dev/null +++ b/tests/e2e/query_planning/common.py @@ -0,0 +1,24 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import pytest +from gqlalchemy import Memgraph + + +@pytest.fixture +def memgraph(**kwargs) -> Memgraph: + memgraph = Memgraph() + + yield memgraph + + memgraph.drop_indexes() + memgraph.ensure_constraints([]) + memgraph.drop_database() diff --git a/tests/e2e/query_planning/query_planning_cartesian.py b/tests/e2e/query_planning/query_planning_cartesian.py new file mode 100644 index 000000000..11bc3f628 --- /dev/null +++ b/tests/e2e/query_planning/query_planning_cartesian.py @@ -0,0 +1,42 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import sys + +import pytest +from common import memgraph + +QUERY_PLAN = "QUERY PLAN" + + +def test_indexed_join_with_indices(memgraph): + memgraph.execute("CREATE INDEX ON :Node(id);") + + expected_explain = [ + f" * Produce {{a, b, r}}", + f" * Filter (a :Node), {{a.id}}", + f" * Expand (b)-[r:EDGE]-(a)", + f" * ScanAllByLabelPropertyValue (b :Node {{id}})", + f" * Once", + ] + + results = list( + memgraph.execute_and_fetch( + "EXPLAIN MATCH (a:Node {id: 1}) MATCH (b:Node {id: 2}) MATCH (a)-[r:EDGE]-(b) return a,b,r;" + ) + ) + actual_explain = [x[QUERY_PLAN] for x in results] + + assert expected_explain == actual_explain + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/query_planning/workloads.yaml b/tests/e2e/query_planning/workloads.yaml new file mode 100644 index 000000000..3fd5fb478 --- /dev/null +++ b/tests/e2e/query_planning/workloads.yaml @@ -0,0 +1,14 @@ +queries_cluster: &queries_cluster + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "query_planning.log" + setup_queries: [] + validation_queries: [] + + +workloads: + - name: "Query planning cartesian" + binary: "tests/e2e/pytest_runner.sh" + args: ["query_planning/query_planning_cartesian.py"] + <<: *queries_cluster diff --git a/tests/e2e/replication/CMakeLists.txt b/tests/e2e/replication/CMakeLists.txt index b054981c8..39f179a3d 100644 --- a/tests/e2e/replication/CMakeLists.txt +++ b/tests/e2e/replication/CMakeLists.txt @@ -13,6 +13,7 @@ copy_e2e_python_files(replication_show common.py) copy_e2e_python_files(replication_show conftest.py) copy_e2e_python_files(replication_show show.py) copy_e2e_python_files(replication_show show_while_creating_invalid_state.py) +copy_e2e_python_files(replication_show edge_delete.py) copy_e2e_python_files_from_parent_folder(replication_show ".." memgraph.py) copy_e2e_python_files_from_parent_folder(replication_show ".." interactive_mg_runner.py) copy_e2e_python_files_from_parent_folder(replication_show ".." mg_utils.py) diff --git a/tests/e2e/replication/edge_delete.py b/tests/e2e/replication/edge_delete.py new file mode 100755 index 000000000..0e25faee1 --- /dev/null +++ b/tests/e2e/replication/edge_delete.py @@ -0,0 +1,56 @@ +# Copyright 2022 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import sys +import time + +import pytest +from common import execute_and_fetch_all +from mg_utils import mg_sleep_and_assert + + +# BUGFIX: for issue https://github.com/memgraph/memgraph/issues/1515 +def test_replication_handles_delete_when_multiple_edges_of_same_type(connection): + # Goal is to check the timestamp are correctly computed from the information we get from replicas. + # 0/ Check original state of replicas. + # 1/ Add nodes and edges to MAIN, then delete the edges. + # 2/ Check state of replicas. + + # 0/ + conn = connection(7687, "main") + conn.autocommit = True + cursor = conn.cursor() + actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;")) + + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), + ("replica_2", "127.0.0.1:10002", "async", 0, 0, "ready"), + } + assert actual_data == expected_data + + # 1/ + execute_and_fetch_all(cursor, "CREATE (a)-[r:X]->(b) CREATE (a)-[:X]->(b) DELETE r;") + + # 2/ + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 2, 0, "ready"), + ("replica_2", "127.0.0.1:10002", "async", 2, 0, "ready"), + } + + def retrieve_data(): + return set(execute_and_fetch_all(cursor, "SHOW REPLICAS;")) + + actual_data = mg_sleep_and_assert(expected_data, retrieve_data) + assert actual_data == expected_data + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/replication/show_while_creating_invalid_state.py b/tests/e2e/replication/show_while_creating_invalid_state.py index 996955dc1..74dcbce74 100644 --- a/tests/e2e/replication/show_while_creating_invalid_state.py +++ b/tests/e2e/replication/show_while_creating_invalid_state.py @@ -123,6 +123,143 @@ def test_show_replicas(connection): assert actual_data == expected_data +def test_drop_replicas(connection): + # Goal of this test is to check the DROP REPLICAS command. + # 0/ Manually start main and all replicas + # 1/ Check status of the replicas + # 2/ Kill replica 3 + # 3/ Drop replica 3 and check status + # 4/ Stop replica 4 + # 5/ Drop replica 4 and check status + # 6/ Kill replica 1 + # 7/ Drop replica 1 and check status + # 8/ Stop replica 2 + # 9/ Drop replica 2 and check status + # 10/ Restart all replicas + # 11/ Register them + # 12/ Drop all and check status + + def retrieve_data(): + return set(execute_and_fetch_all(cursor, "SHOW REPLICAS;")) + + # 0/ + interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) + + cursor = connection(7687, "main").cursor() + + # 1/ + actual_data = set(execute_and_fetch_all(cursor, "SHOW REPLICAS;")) + EXPECTED_COLUMN_NAMES = { + "name", + "socket_address", + "sync_mode", + "current_timestamp_of_replica", + "number_of_timestamp_behind_master", + "state", + } + + actual_column_names = {x.name for x in cursor.description} + assert actual_column_names == EXPECTED_COLUMN_NAMES + + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + ("replica_3", "127.0.0.1:10003", "async", 0, 0, "ready"), + ("replica_4", "127.0.0.1:10004", "async", 0, 0, "ready"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 2/ + interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "replica_3") + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + ("replica_3", "127.0.0.1:10003", "async", 0, 0, "invalid"), + ("replica_4", "127.0.0.1:10004", "async", 0, 0, "ready"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 3/ + execute_and_fetch_all(cursor, "DROP REPLICA replica_3") + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + ("replica_4", "127.0.0.1:10004", "async", 0, 0, "ready"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 4/ + interactive_mg_runner.stop(MEMGRAPH_INSTANCES_DESCRIPTION, "replica_4") + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + ("replica_4", "127.0.0.1:10004", "async", 0, 0, "invalid"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 5/ + execute_and_fetch_all(cursor, "DROP REPLICA replica_4") + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 6/ + interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "replica_1") + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 0, 0, "invalid"), + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 7/ + execute_and_fetch_all(cursor, "DROP REPLICA replica_1") + expected_data = { + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 8/ + interactive_mg_runner.stop(MEMGRAPH_INSTANCES_DESCRIPTION, "replica_2") + expected_data = { + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "invalid"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 9/ + execute_and_fetch_all(cursor, "DROP REPLICA replica_2") + expected_data = set() + mg_sleep_and_assert(expected_data, retrieve_data) + + # 10/ + interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "replica_1") + interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "replica_2") + interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "replica_3") + interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "replica_4") + execute_and_fetch_all(cursor, "REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001';") + execute_and_fetch_all(cursor, "REGISTER REPLICA replica_2 SYNC TO '127.0.0.1:10002';") + execute_and_fetch_all(cursor, "REGISTER REPLICA replica_3 ASYNC TO '127.0.0.1:10003';") + execute_and_fetch_all(cursor, "REGISTER REPLICA replica_4 ASYNC TO '127.0.0.1:10004';") + + # 11/ + expected_data = { + ("replica_1", "127.0.0.1:10001", "sync", 0, 0, "ready"), + ("replica_2", "127.0.0.1:10002", "sync", 0, 0, "ready"), + ("replica_3", "127.0.0.1:10003", "async", 0, 0, "ready"), + ("replica_4", "127.0.0.1:10004", "async", 0, 0, "ready"), + } + mg_sleep_and_assert(expected_data, retrieve_data) + + # 12/ + execute_and_fetch_all(cursor, "DROP REPLICA replica_1") + execute_and_fetch_all(cursor, "DROP REPLICA replica_2") + execute_and_fetch_all(cursor, "DROP REPLICA replica_3") + execute_and_fetch_all(cursor, "DROP REPLICA replica_4") + expected_data = set() + mg_sleep_and_assert(expected_data, retrieve_data) + + def test_basic_recovery(connection): # Goal of this test is to check the recovery of main. # 0/ We start all replicas manually: we want to be able to kill them ourselves without relying on external tooling to kill processes. @@ -630,10 +767,26 @@ def test_async_replication_when_main_is_killed(): ) # 2/ - for index in range(50): + # First make sure that anything has been replicated + for index in range(0, 5): + interactive_mg_runner.MEMGRAPH_INSTANCES["main"].query(f"CREATE (p:Number {{name:{index}}})") + expected_data = [("async_replica", "127.0.0.1:10001", "async", "ready")] + + def retrieve_data(): + replicas = interactive_mg_runner.MEMGRAPH_INSTANCES["main"].query("SHOW REPLICAS;") + return [ + (replica_name, ip, mode, status) + for replica_name, ip, mode, timestamp, timestamp_behind_main, status in replicas + ] + + actual_data = mg_sleep_and_assert(expected_data, retrieve_data) + assert actual_data == expected_data + + for index in range(5, 50): interactive_mg_runner.MEMGRAPH_INSTANCES["main"].query(f"CREATE (p:Number {{name:{index}}})") if random.randint(0, 100) > 95: main_killed = f"Main was killed at index={index}" + print(main_killed) interactive_mg_runner.kill(CONFIGURATION, "main") break diff --git a/tests/e2e/replication/workloads.yaml b/tests/e2e/replication/workloads.yaml index 72269d652..fc239b221 100644 --- a/tests/e2e/replication/workloads.yaml +++ b/tests/e2e/replication/workloads.yaml @@ -8,6 +8,23 @@ template_validation_queries: &template_validation_queries validation_queries: - <<: *template_test_nodes_query - <<: *template_test_edges_query +template_simple_cluster: &template_simple_cluster + cluster: + replica_1: + args: [ "--bolt-port", "7688", "--log-level=TRACE" ] + log_file: "replication-e2e-replica1.log" + setup_queries: [ "SET REPLICATION ROLE TO REPLICA WITH PORT 10001;" ] + replica_2: + args: ["--bolt-port", "7689", "--log-level=TRACE"] + log_file: "replication-e2e-replica2.log" + setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"] + main: + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "replication-e2e-main.log" + setup_queries: [ + "REGISTER REPLICA replica_1 SYNC TO '127.0.0.1:10001'", + "REGISTER REPLICA replica_2 ASYNC TO '127.0.0.1:10002'", + ] template_cluster: &template_cluster cluster: replica_1: @@ -83,3 +100,8 @@ workloads: - name: "Show while creating invalid state" binary: "tests/e2e/pytest_runner.sh" args: ["replication/show_while_creating_invalid_state.py"] + + - name: "Delete edge replication" + binary: "tests/e2e/pytest_runner.sh" + args: ["replication/edge_delete.py"] + <<: *template_simple_cluster diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index f2ca5cce5..949670d43 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -53,8 +53,8 @@ def run(args): # Setup. @atexit.register - def cleanup(): - interactive_mg_runner.stop_all() + def cleanup(keep_directories=True): + interactive_mg_runner.stop_all(keep_directories) if "pre_set_workload" in workload: binary = os.path.join(BUILD_DIR, workload["pre_set_workload"]) @@ -92,7 +92,7 @@ def run(args): data = mg_instance.query(validation["query"], conn)[0][0] assert data == validation["expected"] conn.close() - cleanup() + cleanup(keep_directories=False) log.info("%s PASSED.", workload_name) diff --git a/tests/gql_behave/continuous_integration b/tests/gql_behave/continuous_integration index 850fc5934..26ee6d727 100755 --- a/tests/gql_behave/continuous_integration +++ b/tests/gql_behave/continuous_integration @@ -104,7 +104,7 @@ class MemgraphRunner: memgraph_binary = os.path.join(self.build_directory, "memgraph") args_mg = [ memgraph_binary, - "--storage-properties-on-edges", + "--storage-properties-on-edges=true", "--data-directory", self.data_directory.name, "--log-file", diff --git a/tests/gql_behave/tests/memgraph_V1/features/aggregations.feature b/tests/gql_behave/tests/memgraph_V1/features/aggregations.feature index 80b0ca69a..11297f39e 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/aggregations.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/aggregations.feature @@ -410,19 +410,67 @@ Feature: Aggregations CREATE (s:Subnet {ip: "192.168.0.1"}) """ When executing query: - """ - MATCH (subnet:Subnet) WHERE FALSE WITH subnet, collect(subnet.ip) as ips RETURN id(subnet) as id - """ + """ + MATCH (subnet:Subnet) WHERE FALSE WITH subnet, collect(subnet.ip) as ips RETURN id(subnet) as id + """ Then the result should be empty - Scenario: Empty count aggregation: + Scenario: Empty count aggregation Given an empty graph And having executed """ CREATE (s:Subnet {ip: "192.168.0.1"}) """ When executing query: - """ - MATCH (subnet:Subnet) WHERE FALSE WITH subnet, count(subnet.ip) as ips RETURN id(subnet) as id - """ + """ + MATCH (subnet:Subnet) WHERE FALSE WITH subnet, count(subnet.ip) as ips RETURN id(subnet) as id + """ Then the result should be empty + + Scenario: Collect nodes properties into a map: + Given an empty graph + And having executed + """ + CREATE (t:Tag {short_code: "TST", description: "SYSTEM_TAG"}), (t2:Tag {short_code: "PRD", description: "SYSTEM_TAG"}), + (t3:Tag {short_code: "STG", description: "SYSTEM_TAG"}), (device {name: "name1"}), (device)-[a1:ASSOCIATED]->(t), + (device)-[a2:ASSOCIATED]->(t2), (device)-[a3:ASSOCIATED]->(t3); + """ + When executing query: + """ + MATCH (d {name: "name1"})-[t:ASSOCIATED]-(tag:Tag) RETURN collect({short_code: tag.short_code, description: tag.description}) as tags; + """ + Then the result should be: + | tags | + | [{description: 'SYSTEM_TAG', short_code: 'TST'}, {description: 'SYSTEM_TAG', short_code: 'PRD'}, {description: 'SYSTEM_TAG', short_code: 'STG'}] | + + Scenario: Count directly without WITH clause 01 + Given an empty graph + And having executed + """ + CREATE (:Node {prop1: 1, prop2: 2, prop3: 3}), (:Node {prop1: 10, prop2: 11, prop3: 12}), (:Node {prop1: 20, prop2: 21, prop3: 22}) + """ + When executing query: + """ + MATCH (n) RETURN n.prop1, n.prop2, n.prop3, count(*) AS cnt + """ + Then the result should be: + | n.prop1 | n.prop2 | n.prop3 | cnt | + | 20 | 21 | 22 | 1 | + | 10 | 11 | 12 | 1 | + | 1 | 2 | 3 | 1 | + + Scenario: Count directly without WITH clause 02 + Given an empty graph + And having executed + """ + CREATE (:Node {prop1: 1, prop2: 2, prop3: 3}), (:Node {prop1: 10, prop2: 11, prop3: 12}), (:Node {prop1: 20, prop2: 21, prop3: 22}) + """ + When executing query: + """ + MATCH (n) WITH n.prop1 AS prop1, n.prop2 AS prop2, n.prop3 AS prop3 RETURN prop1, prop2, prop3, count(*) AS cnt; + """ + Then the result should be: + | prop1 | prop2 | prop3 | cnt | + | 20 | 21 | 22 | 1 | + | 10 | 11 | 12 | 1 | + | 1 | 2 | 3 | 1 | diff --git a/tests/gql_behave/tests/memgraph_V1/features/match.feature b/tests/gql_behave/tests/memgraph_V1/features/match.feature index cf41c20f2..227ad9ad6 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/match.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/match.feature @@ -699,3 +699,75 @@ Feature: Match Then the result should be | date(n.time) | | 2021-10-05 | + + Scenario: Variable expand with filter by size of accumulated path + Given an empty graph + And having executed: + """ + CREATE (:Person {id: 1})-[:KNOWS]->(:Person {id: 2})-[:KNOWS]->(:Person {id: 3})-[:KNOWS]->(:Person {id: 4}); + """ + When executing query: + """ + MATCH path = (:Person {id: 1})-[* (e, n, p | size(p) < 4)]->(:Person {id: 4}) RETURN path + """ + Then the result should be + | path | + | <(:Person{id:1})-[:KNOWS]->(:Person{id:2})-[:KNOWS]->(:Person{id:3})-[:KNOWS]->(:Person{id:4})> | + + Scenario: Variable expand with filter by last edge type of accumulated path + Given an empty graph + And having executed: + """ + CREATE (:Person {id: 1})-[:KNOWS]->(:Person {id: 2})-[:KNOWS]->(:Person {id: 3})-[:KNOWS]->(:Person {id: 4}); + """ + When executing query: + """ + MATCH path = (:Person {id: 1})-[* (e, n, p | type(relationships(p)[-1]) = 'KNOWS')]->(:Person {id: 4}) RETURN path + """ + Then the result should be + | path | + | <(:Person{id:1})-[:KNOWS]->(:Person{id:2})-[:KNOWS]->(:Person{id:3})-[:KNOWS]->(:Person{id:4})> | + + Scenario: Variable expand with too restricted filter by size of accumulated path + Given an empty graph + And having executed: + """ + CREATE (:Person {id: 1})-[:KNOWS]->(:Person {id: 2})-[:KNOWS]->(:Person {id: 3})-[:KNOWS]->(:Person {id: 4}); + """ + When executing query: + """ + MATCH path = (:Person {id: 1})-[* (e, n, p | size(p) < 3)]->(:Person {id: 4}) RETURN path + """ + Then the result should be empty + + Scenario: Variable expand with too restricted filter by last edge type of accumulated path + Given an empty graph + And having executed: + """ + CREATE (:Person {id: 1})-[:KNOWS]->(:Person {id: 2})-[:KNOWS]->(:Person {id: 3})-[:KNOWS]->(:Person {id: 4}); + """ + When executing query: + """ + MATCH path = (:Person {id: 1})-[* (e, n, p | type(relationships(p)[-1]) = 'Invalid')]->(:Person {id: 4}) RETURN path + """ + Then the result should be empty + + Scenario: Test DFS variable expand with filter by edge type1 + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[* (e, n, p | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1'))]->(:label3) RETURN path; + """ + Then the result should be: + | path | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | + + Scenario: Test DFS variable expand with filter by edge type2 + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[* (e, n, p | NOT(type(e)='type2' AND type(last(relationships(p))) = 'type2'))]->(:label3) RETURN path; + """ + Then the result should be: + | path | + | <(:label1 {id: 1})-[:type1 {id: 1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})> | diff --git a/tests/gql_behave/tests/memgraph_V1/features/memgraph_allshortest.feature b/tests/gql_behave/tests/memgraph_V1/features/memgraph_allshortest.feature index 73fb9e75b..29dc0a5ef 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/memgraph_allshortest.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/memgraph_allshortest.feature @@ -203,3 +203,103 @@ Feature: All Shortest Path Then the result should be: | total_cost | | 20.3 | + + Scenario: Test match AllShortest with accumulated path filtered by order of ids + Given an empty graph + And having executed: + """ + CREATE (:label1 {id: 1})-[:type1 {id:1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})-[:type1 {id: 3}]->(:label4 {id: 4}); + """ + When executing query: + """ + MATCH pth=(:label1)-[*ALLSHORTEST (r, n | r.id) total_weight (e,n,p | e.id > 0 and (nodes(p)[-1]).id > (nodes(p)[-2]).id)]->(:label4) RETURN pth, total_weight; + """ + Then the result should be: + | pth | total_weight | + | <(:label1{id:1})-[:type1{id:1}]->(:label2{id:2})-[:type1{id:2}]->(:label3{id:3})-[:type1{id:3}]->(:label4{id:4})> | 6 | + + Scenario: Test match AllShortest with accumulated path filtered by edge type1 + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*ALLSHORTEST (r, n | r.id) total_weight (e, n, p | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1'))]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | 10 | + + Scenario: Test match AllShortest with accumulated path filtered by edge type2 + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*ALLSHORTEST (r, n | r.id) total_weight (e, n, p | NOT(type(e)='type2' AND type(last(relationships(p))) = 'type2'))]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type1 {id: 1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})> | 3 | + + Scenario: Test match AllShortest with accumulated path filtered by edge type1 and accumulated weight based on edge + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*ALLSHORTEST (r, n | r.id) total_weight (e, n, p, w | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1') AND w > 0)]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | 10 | + + Scenario: Test match AllShortest with accumulated path filtered by edge type1 and accumulated weight based on edge too restricted + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*ALLSHORTEST (r, n | r.id) total_weight (e, n, p, w | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1') AND w < 10)]->(:label3) RETURN path, total_weight; + """ + Then the result should be empty + + Scenario: Test match AllShortest with accumulated path filtered by edge type1 and accumulated weight based on vertex is int + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*ALLSHORTEST (r, n | n.id) total_weight (e, n, p, w | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1') AND w > 0)]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | 4 | + + Scenario: Test match allShortest with accumulated path filtered by edge type1 and accumulated weight based on vertex and edge are ints + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*ALLSHORTEST (r, n | n.id + coalesce(r.id, 0)) total_weight (e, n, p, w | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1') AND w > 0)]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | 14 | + + Scenario: Test match AllShortest with accumulated path filtered by edge type1 and accumulated weight based on vertex and edge are doubles + Given an empty graph + And having executed: + """ + CREATE (:label1 {id: 1})-[:type1 {id:1.5}]->(:label2 {id: 2})-[:type1 {id: 2.1}]->(:label3 {id: 3})-[:type1 {id: 3.4}]->(:label4 {id: 4}); + """ + When executing query: + """ + MATCH path=(:label1)-[*ALLSHORTEST (r, n | n.id + coalesce(r.id, 0)) total_weight (e, n, p, w | w > 0)]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type1 {id: 1.5}]->(:label2 {id: 2})-[:type1 {id: 2.1}]->(:label3 {id: 3})> | 9.6 | + + Scenario: Test match AllShortest with accumulated path filtered by order of ids and accumulated weight based on both vertex and edge is duration + Given an empty graph + And having executed: + """ + CREATE (:station {name: "A", arrival: localTime("08:00"), departure: localTime("08:15")})-[:ride {id: 1, duration: duration("PT1H5M")}]->(:station {name: "B", arrival: localtime("09:20"), departure: localTime("09:30")})-[:ride {id: 2, duration: duration("PT30M")}]->(:station {name: "C", arrival: localTime("10:00"), departure: localTime("10:20")}); + """ + When executing query: + """ + MATCH path=(:station {name:"A"})-[*ALLSHORTEST (r, v | v.departure - v.arrival + coalesce(r.duration, duration("PT0M"))) total_weight (r,n,p,w | (nodes(p)[-1]).name > (nodes(p)[-2]).name AND not(w is null))]->(:station {name:"C"}) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:station {arrival: 08:00:00.000000000, departure: 08:15:00.000000000, name: 'A'})-[:ride {duration: PT1H5M, id: 1}]->(:station {arrival: 09:20:00.000000000, departure: 09:30:00.000000000, name: 'B'})-[:ride {duration: PT30M, id: 2}]->(:station {arrival: 10:00:00.000000000, departure: 10:20:00.000000000, name: 'C'})> | PT2H20M | diff --git a/tests/gql_behave/tests/memgraph_V1/features/memgraph_bfs.feature b/tests/gql_behave/tests/memgraph_V1/features/memgraph_bfs.feature index d47566012..2736a6d71 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/memgraph_bfs.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/memgraph_bfs.feature @@ -121,3 +121,95 @@ Feature: Bfs Then the result should be: | p | | <(:Node {id: 2})-[:LINK {date: '2023-03'}]->(:Node {id: 3})> | + + Scenario: Test BFS variable expand with filter by last edge type of accumulated path + Given an empty graph + And having executed: + """ + CREATE (:label1 {id: 1})-[:type1 {id:1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3}); + """ + When executing query: + """ + MATCH pth=(:label1)-[*BFS (e,n,p | type(relationships(p)[-1]) = 'type1')]->(:label3) return pth; + """ + Then the result should be: + | pth | + | <(:label1{id:1})-[:type1{id:1}]->(:label2{id:2})-[:type1{id:2}]->(:label3{id:3})> | + + Scenario: Test BFS variable expand with restict filter by last edge type of accumulated path + Given an empty graph + And having executed: + """ + CREATE (:label1 {id: 1})-[:type1 {id:1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3}); + """ + When executing query: + """ + MATCH pth=(:label1)-[*BFS (e,n,p | type(relationships(p)[-1]) = 'type2')]->(:label2) return pth; + """ + Then the result should be empty + + Scenario: Test BFS variable expand with filter by size of accumulated path + Given an empty graph + And having executed: + """ + CREATE (:label1 {id: 1})-[:type1 {id:1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3}); + """ + When executing query: + """ + MATCH pth=(:label1)-[*BFS (e,n,p | size(p) < 3)]->(:label3) return pth; + """ + Then the result should be: + | pth | + | <(:label1{id:1})-[:type1{id:1}]->(:label2{id:2})-[:type1{id:2}]->(:label3{id:3})> | + + Scenario: Test BFS variable expand with restict filter by size of accumulated path + Given an empty graph + And having executed: + """ + CREATE (:label1 {id: 1})-[:type1 {id:1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3}); + """ + When executing query: + """ + MATCH pth=(:label1)-[*BFS (e,n,p | size(p) < 2)]->(:label3) return pth; + """ + Then the result should be empty + + Scenario: Test BFS variable expand with filter by order of ids in accumulated path when target vertex is indexed + Given graph "graph_index" + When executing query: + """ + MATCH pth=(:label1)-[*BFS (e,n,p | (nodes(p)[-1]).id > (nodes(p)[-2]).id)]->(:label4) return pth; + """ + Then the result should be: + | pth | + | <(:label1 {id: 1})-[:type1 {id: 1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})-[:type1 {id: 3}]->(:label4 {id: 4})> | + + Scenario: Test BFS variable expand with filter by order of ids in accumulated path when target vertex is NOT indexed + Given graph "graph_index" + When executing query: + """ + MATCH pth=(:label1)-[*BFS (e,n,p | (nodes(p)[-1]).id > (nodes(p)[-2]).id)]->(:label3) return pth; + """ + Then the result should be: + | pth | + | <(:label1 {id: 1})-[:type1 {id: 1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})> | + + Scenario: Test BFS variable expand with filter by edge type1 + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*BFS (e, n, p | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1'))]->(:label3) RETURN path; + """ + Then the result should be: + | path | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | + + Scenario: Test BFS variable expand with filter by edge type2 + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*BFS (e, n, p | NOT(type(e)='type2' AND type(last(relationships(p))) = 'type2'))]->(:label3) RETURN path; + """ + Then the result should be: + | path | + | <(:label1 {id: 1})-[:type1 {id: 1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})> | diff --git a/tests/gql_behave/tests/memgraph_V1/features/memgraph_wshortest.feature b/tests/gql_behave/tests/memgraph_V1/features/memgraph_wshortest.feature index 1c98c2830..819bc94b3 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/memgraph_wshortest.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/memgraph_wshortest.feature @@ -155,3 +155,103 @@ Feature: Weighted Shortest Path MATCH (n {a:'0'})-[le *wShortest 10 (e, n | e.w ) w]->(m) RETURN m.a, size(le) as s, w """ Then an error should be raised + + Scenario: Test match wShortest with accumulated path filtered by order of ids + Given an empty graph + And having executed: + """ + CREATE (:label1 {id: 1})-[:type1 {id:1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})-[:type1 {id: 3}]->(:label4 {id: 4}); + """ + When executing query: + """ + MATCH pth=(:label1)-[*WSHORTEST (r, n | r.id) total_weight (e,n,p | e.id > 0 and (nodes(p)[-1]).id > (nodes(p)[-2]).id)]->(:label4) RETURN pth, total_weight; + """ + Then the result should be: + | pth | total_weight | + | <(:label1{id:1})-[:type1{id:1}]->(:label2{id:2})-[:type1{id:2}]->(:label3{id:3})-[:type1{id:3}]->(:label4{id:4})> | 6 | + + Scenario: Test match wShortest with accumulated path filtered by edge type1 + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*WSHORTEST (r, n | r.id) total_weight (e, n, p | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1'))]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | 10 | + + Scenario: Test match wShortest with accumulated path filtered by edge type2 + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*WSHORTEST (r, n | r.id) total_weight (e, n, p | NOT(type(e)='type2' AND type(last(relationships(p))) = 'type2'))]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type1 {id: 1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})> | 3 | + + Scenario: Test match wShortest with accumulated path filtered by edge type1 and accumulated weight based on edge + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*WSHORTEST (r, n | r.id) total_weight (e, n, p, w | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1') AND w > 0)]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | 10 | + + Scenario: Test match wShortest with accumulated path filtered by edge type1 and accumulated weight based on edge too restricted + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*WSHORTEST (r, n | r.id) total_weight (e, n, p, w | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1') AND w < 10)]->(:label3) RETURN path, total_weight; + """ + Then the result should be empty + + Scenario: Test match wShortest with accumulated path filtered by edge type1 and accumulated weight based on vertex is int + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*WSHORTEST (r, n | n.id) total_weight (e, n, p, w | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1') AND w > 0)]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | 4 | + + Scenario: Test match wShortest with accumulated path filtered by edge type1 and accumulated weight based on vertex and edge are ints + Given graph "graph_edges" + When executing query: + """ + MATCH path=(:label1)-[*WSHORTEST (r, n | n.id + coalesce(r.id, 0)) total_weight (e, n, p, w | NOT(type(e)='type1' AND type(last(relationships(p))) = 'type1') AND w > 0)]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type2 {id: 10}]->(:label3 {id: 3})> | 14 | + + Scenario: Test match wShortest with accumulated path filtered by edge type1 and accumulated weight based on vertex and edge are doubles + Given an empty graph + And having executed: + """ + CREATE (:label1 {id: 1})-[:type1 {id:1.5}]->(:label2 {id: 2})-[:type1 {id: 2.1}]->(:label3 {id: 3})-[:type1 {id: 3.4}]->(:label4 {id: 4}); + """ + When executing query: + """ + MATCH path=(:label1)-[*WSHORTEST (r, n | n.id + coalesce(r.id, 0)) total_weight (e, n, p, w | w > 0)]->(:label3) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:label1 {id: 1})-[:type1 {id: 1.5}]->(:label2 {id: 2})-[:type1 {id: 2.1}]->(:label3 {id: 3})> | 9.6 | + + Scenario: Test match wShortest with accumulated path filtered by order of ids and accumulated weight based on both vertex and edge is duration + Given an empty graph + And having executed: + """ + CREATE (:station {name: "A", arrival: localTime("08:00"), departure: localTime("08:15")})-[:ride {id: 1, duration: duration("PT1H5M")}]->(:station {name: "B", arrival: localtime("09:20"), departure: localTime("09:30")})-[:ride {id: 2, duration: duration("PT30M")}]->(:station {name: "C", arrival: localTime("10:00"), departure: localTime("10:20")}); + """ + When executing query: + """ + MATCH path=(:station {name:"A"})-[*WSHORTEST (r, v | v.departure - v.arrival + coalesce(r.duration, duration("PT0M"))) total_weight (r,n,p,w | (nodes(p)[-1]).name > (nodes(p)[-2]).name AND not(w is null))]->(:station {name:"C"}) RETURN path, total_weight; + """ + Then the result should be: + | path | total_weight | + | <(:station {arrival: 08:00:00.000000000, departure: 08:15:00.000000000, name: 'A'})-[:ride {duration: PT1H5M, id: 1}]->(:station {arrival: 09:20:00.000000000, departure: 09:30:00.000000000, name: 'B'})-[:ride {duration: PT30M, id: 2}]->(:station {arrival: 10:00:00.000000000, departure: 10:20:00.000000000, name: 'C'})> | PT2H20M | diff --git a/tests/gql_behave/tests/memgraph_V1/graphs/graph_edges.cypher b/tests/gql_behave/tests/memgraph_V1/graphs/graph_edges.cypher new file mode 100644 index 000000000..06e7cdb5c --- /dev/null +++ b/tests/gql_behave/tests/memgraph_V1/graphs/graph_edges.cypher @@ -0,0 +1,2 @@ +CREATE (:label1 {id: 1})-[:type1 {id:1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})-[:type1 {id: 3}]->(:label4 {id: 4}); +MATCH (n :label1), (m :label3) CREATE (n)-[:type2 {id: 10}]->(m); diff --git a/tests/gql_behave/tests/memgraph_V1/graphs/graph_index.cypher b/tests/gql_behave/tests/memgraph_V1/graphs/graph_index.cypher new file mode 100644 index 000000000..ef30012d1 --- /dev/null +++ b/tests/gql_behave/tests/memgraph_V1/graphs/graph_index.cypher @@ -0,0 +1,2 @@ +CREATE INDEX ON :label4; +CREATE (:label1 {id: 1})-[:type1 {id:1}]->(:label2 {id: 2})-[:type1 {id: 2}]->(:label3 {id: 3})-[:type1 {id:3}]->(:label4 {id: 4}); diff --git a/tests/gql_behave/tests/memgraph_V1_on_disk/features/aggregations.feature b/tests/gql_behave/tests/memgraph_V1_on_disk/features/aggregations.feature index 80b0ca69a..11297f39e 100644 --- a/tests/gql_behave/tests/memgraph_V1_on_disk/features/aggregations.feature +++ b/tests/gql_behave/tests/memgraph_V1_on_disk/features/aggregations.feature @@ -410,19 +410,67 @@ Feature: Aggregations CREATE (s:Subnet {ip: "192.168.0.1"}) """ When executing query: - """ - MATCH (subnet:Subnet) WHERE FALSE WITH subnet, collect(subnet.ip) as ips RETURN id(subnet) as id - """ + """ + MATCH (subnet:Subnet) WHERE FALSE WITH subnet, collect(subnet.ip) as ips RETURN id(subnet) as id + """ Then the result should be empty - Scenario: Empty count aggregation: + Scenario: Empty count aggregation Given an empty graph And having executed """ CREATE (s:Subnet {ip: "192.168.0.1"}) """ When executing query: - """ - MATCH (subnet:Subnet) WHERE FALSE WITH subnet, count(subnet.ip) as ips RETURN id(subnet) as id - """ + """ + MATCH (subnet:Subnet) WHERE FALSE WITH subnet, count(subnet.ip) as ips RETURN id(subnet) as id + """ Then the result should be empty + + Scenario: Collect nodes properties into a map: + Given an empty graph + And having executed + """ + CREATE (t:Tag {short_code: "TST", description: "SYSTEM_TAG"}), (t2:Tag {short_code: "PRD", description: "SYSTEM_TAG"}), + (t3:Tag {short_code: "STG", description: "SYSTEM_TAG"}), (device {name: "name1"}), (device)-[a1:ASSOCIATED]->(t), + (device)-[a2:ASSOCIATED]->(t2), (device)-[a3:ASSOCIATED]->(t3); + """ + When executing query: + """ + MATCH (d {name: "name1"})-[t:ASSOCIATED]-(tag:Tag) RETURN collect({short_code: tag.short_code, description: tag.description}) as tags; + """ + Then the result should be: + | tags | + | [{description: 'SYSTEM_TAG', short_code: 'TST'}, {description: 'SYSTEM_TAG', short_code: 'PRD'}, {description: 'SYSTEM_TAG', short_code: 'STG'}] | + + Scenario: Count directly without WITH clause 01 + Given an empty graph + And having executed + """ + CREATE (:Node {prop1: 1, prop2: 2, prop3: 3}), (:Node {prop1: 10, prop2: 11, prop3: 12}), (:Node {prop1: 20, prop2: 21, prop3: 22}) + """ + When executing query: + """ + MATCH (n) RETURN n.prop1, n.prop2, n.prop3, count(*) AS cnt + """ + Then the result should be: + | n.prop1 | n.prop2 | n.prop3 | cnt | + | 20 | 21 | 22 | 1 | + | 10 | 11 | 12 | 1 | + | 1 | 2 | 3 | 1 | + + Scenario: Count directly without WITH clause 02 + Given an empty graph + And having executed + """ + CREATE (:Node {prop1: 1, prop2: 2, prop3: 3}), (:Node {prop1: 10, prop2: 11, prop3: 12}), (:Node {prop1: 20, prop2: 21, prop3: 22}) + """ + When executing query: + """ + MATCH (n) WITH n.prop1 AS prop1, n.prop2 AS prop2, n.prop3 AS prop3 RETURN prop1, prop2, prop3, count(*) AS cnt; + """ + Then the result should be: + | prop1 | prop2 | prop3 | cnt | + | 20 | 21 | 22 | 1 | + | 10 | 11 | 12 | 1 | + | 1 | 2 | 3 | 1 | diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt index 73d98ce6a..c61f046dc 100644 --- a/tests/integration/CMakeLists.txt +++ b/tests/integration/CMakeLists.txt @@ -1,38 +1,14 @@ -# telemetry test binaries add_subdirectory(telemetry) - -# ssl test binaries add_subdirectory(ssl) - -# transactions test binaries add_subdirectory(transactions) - -# auth test binaries add_subdirectory(auth) - -# lba test binaries add_subdirectory(fine_grained_access) - -# audit test binaries add_subdirectory(audit) - -# ldap test binaries add_subdirectory(ldap) - -# mg_import_csv test binaries add_subdirectory(mg_import_csv) - -# license_check test binaries add_subdirectory(license_info) - -#environment variable check binaries add_subdirectory(env_variable_check) - -#flag check binaries add_subdirectory(flag_check) - -#storage mode binaries add_subdirectory(storage_mode) - -#run time settings binaries add_subdirectory(run_time_settings) +add_subdirectory(init_file) diff --git a/tests/integration/init_file/CMakeLists.txt b/tests/integration/init_file/CMakeLists.txt new file mode 100644 index 000000000..41f2af6cc --- /dev/null +++ b/tests/integration/init_file/CMakeLists.txt @@ -0,0 +1,6 @@ +set(target_name memgraph__integration__init_file) +set(tester_target_name ${target_name}__tester) + +add_executable(${tester_target_name} tester.cpp) +set_target_properties(${tester_target_name} PROPERTIES OUTPUT_NAME tester) +target_link_libraries(${tester_target_name} mg-communication) diff --git a/tests/integration/init_file/auth.cypherl b/tests/integration/init_file/auth.cypherl new file mode 100644 index 000000000..3a2f8d441 --- /dev/null +++ b/tests/integration/init_file/auth.cypherl @@ -0,0 +1 @@ +CREATE USER memgraph1 IDENTIFIED BY '1234'; diff --git a/tests/integration/init_file/runner.py b/tests/integration/init_file/runner.py new file mode 100644 index 000000000..fcaa10f95 --- /dev/null +++ b/tests/integration/init_file/runner.py @@ -0,0 +1,60 @@ +import argparse +import os +import subprocess +import sys +import tempfile +import time + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +PROJECT_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", "..")) +BUILD_DIR = os.path.join(PROJECT_DIR, "build") +INIT_FILE = os.path.join(SCRIPT_DIR, "auth.cypherl") +SIGNAL_SIGTERM = 15 + + +def wait_for_server(port, delay=0.1): + cmd = ["nc", "-z", "-w", "1", "127.0.0.1", str(port)] + while subprocess.call(cmd) != 0: + time.sleep(0.01) + time.sleep(delay) + + +def prepare_memgraph(memgraph_args): + memgraph = subprocess.Popen(list(map(str, memgraph_args))) + time.sleep(0.1) + assert memgraph.poll() is None, "Memgraph process died prematurely!" + wait_for_server(7687) + return memgraph + + +def terminate_memgraph(memgraph): + pid = memgraph.pid + try: + os.kill(pid, SIGNAL_SIGTERM) + except os.OSError: + assert False, "Memgraph process didn't exit cleanly!" + time.sleep(1) + + +def execute_test_restart_memgraph_with_init_file(memgraph_binary: str, tester_binary: str) -> None: + storage_directory = tempfile.TemporaryDirectory() + tester_args = [tester_binary, "--username", "memgraph1", "--password", "1234"] + memgraph = prepare_memgraph([memgraph_binary, "--data-directory", storage_directory.name, "--init-file", INIT_FILE]) + subprocess.run(tester_args, stdout=subprocess.PIPE, check=True).check_returncode() + terminate_memgraph(memgraph) + memgraph = prepare_memgraph([memgraph_binary, "--data-directory", storage_directory.name, "--init-file", INIT_FILE]) + subprocess.run(tester_args, stdout=subprocess.PIPE, check=True).check_returncode() + terminate_memgraph(memgraph) + + +if __name__ == "__main__": + memgraph_binary = os.path.join(PROJECT_DIR, "build", "memgraph") + tester_binary = os.path.join(BUILD_DIR, "tests", "integration", "init_file", "tester") + + parser = argparse.ArgumentParser() + parser.add_argument("--memgraph", default=memgraph_binary) + parser.add_argument("--tester", default=tester_binary) + args = parser.parse_args() + + execute_test_restart_memgraph_with_init_file(args.memgraph, args.tester) + sys.exit(0) diff --git a/tests/integration/init_file/tester.cpp b/tests/integration/init_file/tester.cpp new file mode 100644 index 000000000..d4486ead5 --- /dev/null +++ b/tests/integration/init_file/tester.cpp @@ -0,0 +1,47 @@ +// Copyright 2023 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include + +#include "communication/bolt/client.hpp" +#include "io/network/endpoint.hpp" +#include "io/network/utils.hpp" +#include "utils/logging.hpp" + +DEFINE_string(address, "127.0.0.1", "Server address"); +DEFINE_int32(port, 7687, "Server port"); +DEFINE_string(username, "", "Username for the database"); +DEFINE_string(password, "", "Password for the database"); +DEFINE_bool(use_ssl, false, "Set to true to connect with SSL to the server."); + +// NOLINTNEXTLINE(bugprone-exception-escape) +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + memgraph::logging::RedirectToStderr(); + + memgraph::communication::SSLInit sslInit; + + memgraph::io::network::Endpoint endpoint(memgraph::io::network::ResolveHostname(FLAGS_address), FLAGS_port); + + memgraph::communication::ClientContext context(FLAGS_use_ssl); + memgraph::communication::bolt::Client client(context); + + client.Connect(endpoint, FLAGS_username, FLAGS_password); + auto ret = client.Execute("SHOW USERS", {}); + auto size = ret.records.size(); + MG_ASSERT(size == 1, "Too much users returned for SHOW USERA (got {}, expected 1)!", size); + auto row0_size = ret.records[0].size(); + MG_ASSERT(row0_size == 1, "Too much entries in query dump row (got {}, expected 1)!", row0_size); + auto user = ret.records[0][0].ValueString(); + MG_ASSERT(user == "memgraph1", "Unexpected user returned for SHOW USERS (got {}, expected memgraph)!", user); + + return 0; +} diff --git a/tests/integration/telemetry/client.cpp b/tests/integration/telemetry/client.cpp index 558e0a6bc..34e1c2a67 100644 --- a/tests/integration/telemetry/client.cpp +++ b/tests/integration/telemetry/client.cpp @@ -41,7 +41,7 @@ int main(int argc, char **argv) { memgraph::storage::UpdatePaths(db_config, data_directory); memgraph::replication::ReplicationState repl_state(ReplicationStateRootPath(db_config)); - memgraph::dbms::DbmsHandler dbms_handler(db_config, repl_state + memgraph::dbms::DbmsHandler dbms_handler(db_config #ifdef MG_ENTERPRISE , &auth_, false, false diff --git a/tests/stress/memory_tracker.py b/tests/stress/memory_tracker.py index ec67fe97b..d0f8bc350 100644 --- a/tests/stress/memory_tracker.py +++ b/tests/stress/memory_tracker.py @@ -260,10 +260,11 @@ def run_monitor_cleanup(repetition_count: int, sleep_sec: float) -> None: # Problem with test using detach delete and memory tracker # is that memory tracker gets updated immediately # whereas RES takes some time - cnt_again = 3 + # Tries 10 times or fails + cnt_again = 10 skip_failure = False - # 10% is maximum increment, afterwards is fail - multiplier = 1 + # 10% is maximum diff for this test to pass + multiplier = 1.10 while cnt_again: new_memory_tracker, new_res_data = get_storage_data(session) @@ -277,7 +278,6 @@ def run_monitor_cleanup(repetition_count: int, sleep_sec: float) -> None: f"RES data: {new_res_data}, multiplier: {multiplier}" ) break - multiplier += 0.05 cnt_again -= 1 if not skip_failure: log.info(memory_tracker, initial_diff, res_data) diff --git a/tests/unit/cpp_api.cpp b/tests/unit/cpp_api.cpp index 929a70431..012b2d713 100644 --- a/tests/unit/cpp_api.cpp +++ b/tests/unit/cpp_api.cpp @@ -38,7 +38,8 @@ struct CppApiTestFixture : public ::testing::Test { mgp_graph CreateGraph(const memgraph::storage::View view = memgraph::storage::View::NEW) { // the execution context can be null as it shouldn't be used in these tests - return mgp_graph{&CreateDbAccessor(memgraph::storage::IsolationLevel::SNAPSHOT_ISOLATION), view, ctx_.get()}; + return mgp_graph{&CreateDbAccessor(memgraph::storage::IsolationLevel::SNAPSHOT_ISOLATION), view, ctx_.get(), + memgraph::storage::StorageMode::IN_MEMORY_TRANSACTIONAL}; } memgraph::query::DbAccessor &CreateDbAccessor(const memgraph::storage::IsolationLevel isolationLevel) { @@ -499,6 +500,7 @@ TYPED_TEST(CppApiTestFixture, TestValueOperatorLessThan) { ASSERT_THROW(list_test < map_test, mgp::ValueException); ASSERT_THROW(list_test < list_test, mgp::ValueException); } + TYPED_TEST(CppApiTestFixture, TestNumberEquality) { mgp::Value double_1{1.0}; mgp::Value int_1{static_cast(1)}; diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 54453de09..31fd95c6c 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -1925,6 +1925,41 @@ TEST_P(CypherMainVisitorTest, MatchBfsReturn) { ASSERT_TRUE(eq); } +TEST_P(CypherMainVisitorTest, MatchBfsFilterByPathReturn) { + auto &ast_generator = *GetParam(); + { + const auto *query = dynamic_cast( + ast_generator.ParseQuery("MATCH pth=(r:type1 {id: 1})<-[*BFS ..10 (e, n, p | startNode(relationships(e)[-1]) = " + "c:type2)]->(:type3 {id: 3}) RETURN pth;")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->single_query_); + const auto *match = dynamic_cast(query->single_query_->clauses_[0]); + ASSERT_TRUE(match); + ASSERT_EQ(match->patterns_.size(), 1U); + ASSERT_EQ(match->patterns_[0]->atoms_.size(), 3U); + auto *bfs = dynamic_cast(match->patterns_[0]->atoms_[1]); + ASSERT_TRUE(bfs); + EXPECT_TRUE(bfs->IsVariable()); + EXPECT_EQ(bfs->filter_lambda_.inner_edge->name_, "e"); + EXPECT_TRUE(bfs->filter_lambda_.inner_edge->user_declared_); + EXPECT_EQ(bfs->filter_lambda_.inner_node->name_, "n"); + EXPECT_TRUE(bfs->filter_lambda_.inner_node->user_declared_); + EXPECT_EQ(bfs->filter_lambda_.accumulated_path->name_, "p"); + EXPECT_TRUE(bfs->filter_lambda_.accumulated_path->user_declared_); + EXPECT_EQ(bfs->filter_lambda_.accumulated_weight, nullptr); + } +} + +TEST_P(CypherMainVisitorTest, SemanticExceptionOnBfsFilterByWeight) { + auto &ast_generator = *GetParam(); + { + ASSERT_THROW(ast_generator.ParseQuery( + "MATCH pth=(:type1 {id: 1})<-[*BFS ..10 (e, n, p, w | startNode(relationships(e)[-1] AND w > 0) = " + "c:type2)]->(:type3 {id: 3}) RETURN pth;"), + SemanticException); + } +} + TEST_P(CypherMainVisitorTest, MatchVariableLambdaSymbols) { auto &ast_generator = *GetParam(); auto *query = dynamic_cast(ast_generator.ParseQuery("MATCH () -[*]- () RETURN *")); @@ -1981,6 +2016,57 @@ TEST_P(CypherMainVisitorTest, MatchWShortestReturn) { EXPECT_TRUE(shortest->total_weight_->user_declared_); } +TEST_P(CypherMainVisitorTest, MatchWShortestFilterByPathReturn) { + auto &ast_generator = *GetParam(); + { + const auto *query = dynamic_cast( + ast_generator.ParseQuery("MATCH pth=()-[r:type1 *wShortest 10 (we, wn | 42) total_weight " + "(e, n, p | startNode(relationships(e)[-1]) = c:type3)]->(:type2) RETURN pth")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->single_query_); + const auto *match = dynamic_cast(query->single_query_->clauses_[0]); + ASSERT_TRUE(match); + ASSERT_EQ(match->patterns_.size(), 1U); + ASSERT_EQ(match->patterns_[0]->atoms_.size(), 3U); + auto *shortestPath = dynamic_cast(match->patterns_[0]->atoms_[1]); + ASSERT_TRUE(shortestPath); + EXPECT_TRUE(shortestPath->IsVariable()); + EXPECT_EQ(shortestPath->filter_lambda_.inner_edge->name_, "e"); + EXPECT_TRUE(shortestPath->filter_lambda_.inner_edge->user_declared_); + EXPECT_EQ(shortestPath->filter_lambda_.inner_node->name_, "n"); + EXPECT_TRUE(shortestPath->filter_lambda_.inner_node->user_declared_); + EXPECT_EQ(shortestPath->filter_lambda_.accumulated_path->name_, "p"); + EXPECT_TRUE(shortestPath->filter_lambda_.accumulated_path->user_declared_); + EXPECT_EQ(shortestPath->filter_lambda_.accumulated_weight, nullptr); + } +} + +TEST_P(CypherMainVisitorTest, MatchWShortestFilterByPathWeightReturn) { + auto &ast_generator = *GetParam(); + { + const auto *query = dynamic_cast(ast_generator.ParseQuery( + "MATCH pth=()-[r:type1 *wShortest 10 (we, wn | 42) total_weight " + "(e, n, p, w | startNode(relationships(e)[-1]) = c:type3 AND w < 50)]->(:type2) RETURN pth")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->single_query_); + const auto *match = dynamic_cast(query->single_query_->clauses_[0]); + ASSERT_TRUE(match); + ASSERT_EQ(match->patterns_.size(), 1U); + ASSERT_EQ(match->patterns_[0]->atoms_.size(), 3U); + auto *shortestPath = dynamic_cast(match->patterns_[0]->atoms_[1]); + ASSERT_TRUE(shortestPath); + EXPECT_TRUE(shortestPath->IsVariable()); + EXPECT_EQ(shortestPath->filter_lambda_.inner_edge->name_, "e"); + EXPECT_TRUE(shortestPath->filter_lambda_.inner_edge->user_declared_); + EXPECT_EQ(shortestPath->filter_lambda_.inner_node->name_, "n"); + EXPECT_TRUE(shortestPath->filter_lambda_.inner_node->user_declared_); + EXPECT_EQ(shortestPath->filter_lambda_.accumulated_path->name_, "p"); + EXPECT_TRUE(shortestPath->filter_lambda_.accumulated_path->user_declared_); + EXPECT_EQ(shortestPath->filter_lambda_.accumulated_weight->name_, "w"); + EXPECT_TRUE(shortestPath->filter_lambda_.accumulated_weight->user_declared_); + } +} + TEST_P(CypherMainVisitorTest, MatchWShortestNoFilterReturn) { auto &ast_generator = *GetParam(); auto *query = diff --git a/tests/unit/dbms_handler.cpp b/tests/unit/dbms_handler.cpp index 75efddefe..0ea4197fb 100644 --- a/tests/unit/dbms_handler.cpp +++ b/tests/unit/dbms_handler.cpp @@ -52,18 +52,16 @@ class TestEnvironment : public ::testing::Environment { auth = std::make_unique>( storage_directory / "auth"); - repl_state_.emplace(memgraph::storage::ReplicationStateRootPath(storage_conf)); - ptr_ = std::make_unique(storage_conf, *repl_state_, auth.get(), false, true); + ptr_ = std::make_unique(storage_conf, auth.get(), false, true); } void TearDown() override { ptr_.reset(); auth.reset(); - repl_state_.reset(); + std::filesystem::remove_all(storage_directory); } static std::unique_ptr ptr_; - std::optional repl_state_; }; std::unique_ptr TestEnvironment::ptr_ = nullptr; diff --git a/tests/unit/dbms_handler_community.cpp b/tests/unit/dbms_handler_community.cpp index efce2854d..860f70ba0 100644 --- a/tests/unit/dbms_handler_community.cpp +++ b/tests/unit/dbms_handler_community.cpp @@ -52,18 +52,16 @@ class TestEnvironment : public ::testing::Environment { auth = std::make_unique>( storage_directory / "auth"); - repl_state_.emplace(memgraph::storage::ReplicationStateRootPath(storage_conf)); - ptr_ = std::make_unique(storage_conf, *repl_state_); + ptr_ = std::make_unique(storage_conf); } void TearDown() override { ptr_.reset(); auth.reset(); - repl_state_.reset(); + std::filesystem::remove_all(storage_directory); } static std::unique_ptr ptr_; - std::optional repl_state_; }; std::unique_ptr TestEnvironment::ptr_ = nullptr; diff --git a/tests/unit/formatters.hpp b/tests/unit/formatters.hpp index a5ee49166..5217fd65c 100644 --- a/tests/unit/formatters.hpp +++ b/tests/unit/formatters.hpp @@ -138,6 +138,8 @@ inline std::string ToString(const memgraph::query::TypedValue &value, const TAcc break; case memgraph::query::TypedValue::Type::Graph: throw std::logic_error{"Not implemented"}; + case memgraph::query::TypedValue::Type::Function: + throw std::logic_error{"Not implemented"}; } return os.str(); } diff --git a/tests/unit/interpreter.cpp b/tests/unit/interpreter.cpp index 57bb79db8..bd587e7df 100644 --- a/tests/unit/interpreter.cpp +++ b/tests/unit/interpreter.cpp @@ -101,6 +101,8 @@ class InterpreterTest : public ::testing::Test { disk_test_utils::RemoveRocksDbDirs(testSuite); disk_test_utils::RemoveRocksDbDirs(testSuiteCsv); } + + std::filesystem::remove_all(data_directory); } InterpreterFaker default_interpreter{&interpreter_context, db}; diff --git a/tests/unit/query_dump.cpp b/tests/unit/query_dump.cpp index 5556ab90a..63019ad28 100644 --- a/tests/unit/query_dump.cpp +++ b/tests/unit/query_dump.cpp @@ -700,6 +700,11 @@ TYPED_TEST(DumpTest, CheckStateVertexWithMultipleProperties) { config.disk = disk_test_utils::GenerateOnDiskConfig("query-dump-s1").disk; config.force_on_disk = true; } + auto on_exit_s1 = memgraph::utils::OnScopeExit{[&]() { + if constexpr (std::is_same_v) { + disk_test_utils::RemoveRocksDbDirs("query-dump-s1"); + } + }}; memgraph::replication::ReplicationState repl_state(ReplicationStateRootPath(config)); memgraph::utils::Gatekeeper db_gk(config, repl_state); @@ -814,7 +819,11 @@ TYPED_TEST(DumpTest, CheckStateSimpleGraph) { config.disk = disk_test_utils::GenerateOnDiskConfig("query-dump-s2").disk; config.force_on_disk = true; } - + auto on_exit_s2 = memgraph::utils::OnScopeExit{[&]() { + if constexpr (std::is_same_v) { + disk_test_utils::RemoveRocksDbDirs("query-dump-s2"); + } + }}; memgraph::replication::ReplicationState repl_state{ReplicationStateRootPath(config)}; memgraph::utils::Gatekeeper db_gk{config, repl_state}; auto db_acc_opt = db_gk.access(); diff --git a/tests/unit/query_expression_evaluator.cpp b/tests/unit/query_expression_evaluator.cpp index 44d3ed301..c9786fe5e 100644 --- a/tests/unit/query_expression_evaluator.cpp +++ b/tests/unit/query_expression_evaluator.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,14 @@ class ExpressionEvaluatorTest : public ::testing::Test { return id; } + Exists *CreateExistsWithValue(std::string name, TypedValue &&value) { + auto id = storage.template Create(); + auto symbol = symbol_table.CreateSymbol(name, true); + id->MapTo(symbol); + frame[symbol] = std::move(value); + return id; + } + template auto Eval(TExpression *expr) { ctx.properties = NamesToProperties(storage.properties_, &dba); @@ -149,6 +158,33 @@ TYPED_TEST(ExpressionEvaluatorTest, AndOperatorShortCircuit) { } } +TYPED_TEST(ExpressionEvaluatorTest, AndExistsOperatorShortCircuit) { + { + std::function my_func = [](TypedValue * /*return_value*/) { + throw QueryRuntimeException("This should not be evaluated"); + }; + TypedValue func_should_not_evaluate{std::move(my_func)}; + + auto *op = this->storage.template Create( + this->storage.template Create(false), + this->CreateExistsWithValue("anon1", std::move(func_should_not_evaluate))); + auto value = this->Eval(op); + EXPECT_EQ(value.ValueBool(), false); + } + { + std::function my_func = [memory = this->ctx.memory](TypedValue *return_value) { + *return_value = TypedValue(false, memory); + }; + TypedValue should_evaluate{std::move(my_func)}; + + auto *op = + this->storage.template Create(this->storage.template Create(true), + this->CreateExistsWithValue("anon1", std::move(should_evaluate))); + auto value = this->Eval(op); + EXPECT_EQ(value.ValueBool(), false); + } +} + TYPED_TEST(ExpressionEvaluatorTest, AndOperatorNull) { { // Null doesn't short circuit diff --git a/tests/unit/query_plan.cpp b/tests/unit/query_plan.cpp index 910ebdc54..bc4b2660c 100644 --- a/tests/unit/query_plan.cpp +++ b/tests/unit/query_plan.cpp @@ -853,6 +853,26 @@ TYPED_TEST(TestPlanner, MatchFilterPropIsNotNull) { } } +TYPED_TEST(TestPlanner, MatchFilterWhere) { + // Test MATCH (n)-[r]-(m) WHERE exists((n)-[]-()) and n!=n and 7!=8 RETURN n + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n"), EDGE("r"), NODE("m"))), + WHERE(AND(EXISTS(PATTERN(NODE("n"), EDGE("edge2", memgraph::query::EdgeAtom::Direction::BOTH, {}, false), + NODE("node3", std::nullopt, false))), + AND(NEQ(IDENT("n"), IDENT("n")), NEQ(LITERAL(7), LITERAL(8))))), + RETURN("n"))); + + std::list pattern_filter{new ExpectScanAll(), new ExpectExpand(), new ExpectLimit(), + new ExpectEvaluatePatternFilter()}; + CheckPlan( + query, this->storage, + ExpectFilter(), // 7!=8 + ExpectScanAll(), + ExpectFilter(std::vector>{pattern_filter}), // filter pulls from expand + ExpectExpand(), ExpectProduce()); + DeleteListContent(&pattern_filter); +} + TYPED_TEST(TestPlanner, MultiMatchWhere) { // Test MATCH (n) -[r]- (m) MATCH (l) WHERE n.prop < 42 RETURN n FakeDbAccessor dba; diff --git a/tests/unit/query_plan_checker.hpp b/tests/unit/query_plan_checker.hpp index 6f2f23df7..92089eb82 100644 --- a/tests/unit/query_plan_checker.hpp +++ b/tests/unit/query_plan_checker.hpp @@ -14,11 +14,13 @@ #include #include +#include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol_generator.hpp" #include "query/frontend/semantic/symbol_table.hpp" #include "query/plan/operator.hpp" #include "query/plan/planner.hpp" #include "query/plan/preprocess.hpp" +#include "utils/typeinfo.hpp" namespace memgraph::query::plan { @@ -197,6 +199,29 @@ class ExpectFilter : public OpChecker { filter.pattern_filters_[i]->Accept(check_updates); } + // ordering in AND Operator must be ..., exists, exists, exists. + auto *expr = filter.expression_; + std::vector filter_expressions; + while (auto *and_operator = utils::Downcast(expr)) { + auto *expr1 = and_operator->expression1_; + auto *expr2 = and_operator->expression2_; + filter_expressions.emplace_back(expr1); + expr = expr2; + } + if (expr) filter_expressions.emplace_back(expr); + + auto it = filter_expressions.begin(); + for (; it != filter_expressions.end(); it++) { + if ((*it)->GetTypeInfo().name == query::Exists::kType.name) { + break; + } + } + while (it != filter_expressions.end()) { + ASSERT_TRUE((*it)->GetTypeInfo().name == query::Exists::kType.name) + << "Filter expression is '" << (*it)->GetTypeInfo().name << "' expected '" << query::Exists::kType.name + << "'!"; + it++; + } } std::vector> pattern_filters_; diff --git a/tests/unit/query_procedure_mgp_type.cpp b/tests/unit/query_procedure_mgp_type.cpp index 4ed9f4926..c5f30f3af 100644 --- a/tests/unit/query_procedure_mgp_type.cpp +++ b/tests/unit/query_procedure_mgp_type.cpp @@ -249,7 +249,7 @@ TYPED_TEST(CypherType, VertexSatisfiesType) { auto vertex = dba.InsertVertex(); mgp_memory memory{memgraph::utils::NewDeleteResource()}; memgraph::utils::Allocator alloc(memory.impl); - mgp_graph graph{&dba, memgraph::storage::View::NEW, nullptr}; + mgp_graph graph{&dba, memgraph::storage::View::NEW, nullptr, dba.GetStorageMode()}; auto *mgp_vertex_v = EXPECT_MGP_NO_ERROR(mgp_value *, mgp_value_make_vertex, alloc.new_object(vertex, &graph)); const memgraph::query::TypedValue tv_vertex(vertex); @@ -274,7 +274,7 @@ TYPED_TEST(CypherType, EdgeSatisfiesType) { auto edge = *dba.InsertEdge(&v1, &v2, dba.NameToEdgeType("edge_type")); mgp_memory memory{memgraph::utils::NewDeleteResource()}; memgraph::utils::Allocator alloc(memory.impl); - mgp_graph graph{&dba, memgraph::storage::View::NEW, nullptr}; + mgp_graph graph{&dba, memgraph::storage::View::NEW, nullptr, dba.GetStorageMode()}; auto *mgp_edge_v = EXPECT_MGP_NO_ERROR(mgp_value *, mgp_value_make_edge, alloc.new_object(edge, &graph)); const memgraph::query::TypedValue tv_edge(edge); CheckSatisfiesTypesAndNullable( @@ -298,7 +298,7 @@ TYPED_TEST(CypherType, PathSatisfiesType) { auto edge = *dba.InsertEdge(&v1, &v2, dba.NameToEdgeType("edge_type")); mgp_memory memory{memgraph::utils::NewDeleteResource()}; memgraph::utils::Allocator alloc(memory.impl); - mgp_graph graph{&dba, memgraph::storage::View::NEW, nullptr}; + mgp_graph graph{&dba, memgraph::storage::View::NEW, nullptr, dba.GetStorageMode()}; auto *mgp_vertex_v = alloc.new_object(v1, &graph); auto path = EXPECT_MGP_NO_ERROR(mgp_path *, mgp_path_make_with_start, mgp_vertex_v, &memory); ASSERT_TRUE(path); diff --git a/tests/unit/query_procedure_py_module.cpp b/tests/unit/query_procedure_py_module.cpp index 9487ebb2c..dd744229a 100644 --- a/tests/unit/query_procedure_py_module.cpp +++ b/tests/unit/query_procedure_py_module.cpp @@ -132,7 +132,7 @@ TYPED_TEST(PyModule, PyVertex) { auto storage_dba = this->db->Access(); memgraph::query::DbAccessor dba(storage_dba.get()); mgp_memory memory{memgraph::utils::NewDeleteResource()}; - mgp_graph graph{&dba, memgraph::storage::View::OLD, nullptr}; + mgp_graph graph{&dba, memgraph::storage::View::OLD, nullptr, dba.GetStorageMode()}; auto *vertex = EXPECT_MGP_NO_ERROR(mgp_vertex *, mgp_graph_get_vertex_by_id, &graph, mgp_vertex_id{0}, &memory); ASSERT_TRUE(vertex); auto *vertex_value = EXPECT_MGP_NO_ERROR(mgp_value *, mgp_value_make_vertex, @@ -182,7 +182,7 @@ TYPED_TEST(PyModule, PyEdge) { auto storage_dba = this->db->Access(); memgraph::query::DbAccessor dba(storage_dba.get()); mgp_memory memory{memgraph::utils::NewDeleteResource()}; - mgp_graph graph{&dba, memgraph::storage::View::OLD, nullptr}; + mgp_graph graph{&dba, memgraph::storage::View::OLD, nullptr, dba.GetStorageMode()}; auto *start_v = EXPECT_MGP_NO_ERROR(mgp_vertex *, mgp_graph_get_vertex_by_id, &graph, mgp_vertex_id{0}, &memory); ASSERT_TRUE(start_v); auto *edges_it = EXPECT_MGP_NO_ERROR(mgp_edges_iterator *, mgp_vertex_iter_out_edges, start_v, &memory); @@ -228,7 +228,7 @@ TYPED_TEST(PyModule, PyPath) { auto storage_dba = this->db->Access(); memgraph::query::DbAccessor dba(storage_dba.get()); mgp_memory memory{memgraph::utils::NewDeleteResource()}; - mgp_graph graph{&dba, memgraph::storage::View::OLD, nullptr}; + mgp_graph graph{&dba, memgraph::storage::View::OLD, nullptr, dba.GetStorageMode()}; auto *start_v = EXPECT_MGP_NO_ERROR(mgp_vertex *, mgp_graph_get_vertex_by_id, &graph, mgp_vertex_id{0}, &memory); ASSERT_TRUE(start_v); auto *path = EXPECT_MGP_NO_ERROR(mgp_path *, mgp_path_make_with_start, start_v, &memory); diff --git a/tests/unit/query_procedures_mgp_graph.cpp b/tests/unit/query_procedures_mgp_graph.cpp index 207080967..785aab2cf 100644 --- a/tests/unit/query_procedures_mgp_graph.cpp +++ b/tests/unit/query_procedures_mgp_graph.cpp @@ -120,7 +120,8 @@ class MgpGraphTest : public ::testing::Test { public: mgp_graph CreateGraph(const memgraph::storage::View view = memgraph::storage::View::NEW) { // the execution context can be null as it shouldn't be used in these tests - return mgp_graph{&CreateDbAccessor(memgraph::storage::IsolationLevel::SNAPSHOT_ISOLATION), view, ctx_.get()}; + return mgp_graph{&CreateDbAccessor(memgraph::storage::IsolationLevel::SNAPSHOT_ISOLATION), view, ctx_.get(), + memgraph::storage::StorageMode::IN_MEMORY_TRANSACTIONAL}; } std::array CreateEdge() { diff --git a/tests/unit/storage_v2_durability_inmemory.cpp b/tests/unit/storage_v2_durability_inmemory.cpp index 725db9283..8a6d26fd1 100644 --- a/tests/unit/storage_v2_durability_inmemory.cpp +++ b/tests/unit/storage_v2_durability_inmemory.cpp @@ -9,6 +9,7 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. +#include #include #include #include @@ -19,13 +20,18 @@ #include #include #include +#include #include #include #include +#include #include "dbms/database.hpp" #include "replication/state.hpp" #include "storage/v2/config.hpp" +#include "storage/v2/constraints/constraints.hpp" +#include "storage/v2/constraints/existence_constraints.hpp" +#include "storage/v2/durability/durability.hpp" #include "storage/v2/durability/marker.hpp" #include "storage/v2/durability/paths.hpp" #include "storage/v2/durability/snapshot.hpp" @@ -34,10 +40,13 @@ #include "storage/v2/edge_accessor.hpp" #include "storage/v2/indices/label_index_stats.hpp" #include "storage/v2/inmemory/storage.hpp" +#include "storage/v2/inmemory/unique_constraints.hpp" +#include "storage/v2/storage_mode.hpp" #include "storage/v2/vertex_accessor.hpp" #include "utils/file.hpp" #include "utils/logging.hpp" #include "utils/timer.hpp" +#include "utils/uuid.hpp" using testing::Contains; using testing::UnorderedElementsAre; @@ -2703,3 +2712,113 @@ TEST_P(DurabilityTest, SnapshotAndWalMixedUUID) { ASSERT_FALSE(acc->Commit().HasError()); } } + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TEST_P(DurabilityTest, ParallelConstraintsRecovery) { + // Create snapshot. + { + memgraph::storage::Config config{ + .items = {.properties_on_edges = GetParam()}, + .durability = {.storage_directory = storage_directory, .snapshot_on_exit = true, .items_per_batch = 13}}; + memgraph::replication::ReplicationState repl_state{memgraph::storage::ReplicationStateRootPath(config)}; + memgraph::dbms::Database db{config, repl_state}; + CreateBaseDataset(db.storage(), GetParam()); + VerifyDataset(db.storage(), DatasetType::ONLY_BASE, GetParam()); + CreateExtendedDataset(db.storage()); + VerifyDataset(db.storage(), DatasetType::BASE_WITH_EXTENDED, GetParam()); + } + + ASSERT_EQ(GetSnapshotsList().size(), 1); + ASSERT_EQ(GetBackupSnapshotsList().size(), 0); + ASSERT_EQ(GetWalsList().size(), 0); + ASSERT_EQ(GetBackupWalsList().size(), 0); + + // Recover snapshot. + memgraph::storage::Config config{.items = {.properties_on_edges = GetParam()}, + .durability = {.storage_directory = storage_directory, + .recover_on_startup = true, + .snapshot_on_exit = false, + .items_per_batch = 13, + .allow_parallel_index_creation = true}}; + memgraph::replication::ReplicationState repl_state{memgraph::storage::ReplicationStateRootPath(config)}; + memgraph::dbms::Database db{config, repl_state}; + VerifyDataset(db.storage(), DatasetType::BASE_WITH_EXTENDED, GetParam()); + { + auto acc = db.storage()->Access(); + auto vertex = acc->CreateVertex(); + auto edge = acc->CreateEdge(&vertex, &vertex, db.storage()->NameToEdgeType("et")); + ASSERT_TRUE(edge.HasValue()); + ASSERT_FALSE(acc->Commit().HasError()); + } +} + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TEST_P(DurabilityTest, ConstraintsRecoveryFunctionSetting) { + memgraph::storage::Config config{.items = {.properties_on_edges = GetParam()}, + .durability = {.storage_directory = storage_directory, + .recover_on_startup = true, + .snapshot_on_exit = false, + .items_per_batch = 13, + .allow_parallel_schema_creation = true}}; + // Create snapshot. + { + config.durability.recover_on_startup = false; + config.durability.snapshot_on_exit = true; + memgraph::replication::ReplicationState repl_state{memgraph::storage::ReplicationStateRootPath(config)}; + memgraph::dbms::Database db{config, repl_state}; + CreateBaseDataset(db.storage(), GetParam()); + VerifyDataset(db.storage(), DatasetType::ONLY_BASE, GetParam()); + CreateExtendedDataset(db.storage()); + VerifyDataset(db.storage(), DatasetType::BASE_WITH_EXTENDED, GetParam()); + } + + ASSERT_EQ(GetSnapshotsList().size(), 1); + ASSERT_EQ(GetBackupSnapshotsList().size(), 0); + ASSERT_EQ(GetWalsList().size(), 0); + ASSERT_EQ(GetBackupWalsList().size(), 0); + + config.durability.recover_on_startup = true; + config.durability.snapshot_on_exit = false; + memgraph::replication::ReplicationState repl_state{memgraph::storage::ReplicationStateRootPath(config)}; + memgraph::utils::SkipList vertices; + memgraph::utils::SkipList edges; + std::unique_ptr name_id_mapper = std::make_unique(); + std::atomic edge_count{0}; + uint64_t wal_seq_num{0}; + std::string uuid{memgraph::utils::GenerateUUID()}; + memgraph::storage::Indices indices{config, memgraph::storage::StorageMode::IN_MEMORY_TRANSACTIONAL}; + memgraph::storage::Constraints constraints{config, memgraph::storage::StorageMode::IN_MEMORY_TRANSACTIONAL}; + memgraph::storage::ReplicationStorageState repl_storage_state; + + memgraph::storage::durability::Recovery recovery{ + config.durability.storage_directory / memgraph::storage::durability::kSnapshotDirectory, + config.durability.storage_directory / memgraph::storage::durability::kWalDirectory}; + + // Recover snapshot. + const auto info = recovery.RecoverData(&uuid, repl_storage_state, &vertices, &edges, &edge_count, + name_id_mapper.get(), &indices, &constraints, config, &wal_seq_num); + + MG_ASSERT(info.has_value(), "Info doesn't have value present"); + const auto par_exec_info = memgraph::storage::durability::GetParallelExecInfo(*info, config); + + MG_ASSERT(par_exec_info.has_value(), "Parallel exec info should have value present"); + + // Unique constraint choose function + auto *mem_unique_constraints = + static_cast(constraints.unique_constraints_.get()); + auto variant_unique_constraint_creation_func = mem_unique_constraints->GetCreationFunction(par_exec_info); + + const auto *pval = std::get_if( + &variant_unique_constraint_creation_func); + MG_ASSERT(pval, "Chose wrong function for recovery of data"); + + // Existence constraint choose function + auto *mem_existence_constraint = + static_cast(constraints.existence_constraints_.get()); + auto variant_existence_constraint_creation_func = mem_existence_constraint->GetCreationFunction(par_exec_info); + + const auto *pval_existence = + std::get_if( + &variant_existence_constraint_creation_func); + MG_ASSERT(pval_existence, "Chose wrong type of function for recovery of existence constraint data"); +} diff --git a/tests/unit/storage_v2_isolation_level.cpp b/tests/unit/storage_v2_isolation_level.cpp index 5cdfb5656..d2ae14d8f 100644 --- a/tests/unit/storage_v2_isolation_level.cpp +++ b/tests/unit/storage_v2_isolation_level.cpp @@ -15,6 +15,7 @@ #include "storage/v2/disk/storage.hpp" #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/isolation_level.hpp" +#include "utils/on_scope_exit.hpp" namespace { int64_t VerticesCount(memgraph::storage::Storage::Accessor *accessor) { @@ -113,6 +114,7 @@ TEST_P(StorageIsolationLevelTest, VisibilityOnDiskStorage) { for (const auto override_isolation_level : isolation_levels) { std::unique_ptr storage(new memgraph::storage::DiskStorage(config)); + auto on_exit = memgraph::utils::OnScopeExit{[&]() { disk_test_utils::RemoveRocksDbDirs(testSuite); }}; try { this->TestVisibility(storage, default_isolation_level, override_isolation_level); } catch (memgraph::utils::NotYetImplemented &) { @@ -120,10 +122,8 @@ TEST_P(StorageIsolationLevelTest, VisibilityOnDiskStorage) { override_isolation_level != memgraph::storage::IsolationLevel::SNAPSHOT_ISOLATION) { continue; } - disk_test_utils::RemoveRocksDbDirs(testSuite); throw; } - disk_test_utils::RemoveRocksDbDirs(testSuite); } } diff --git a/tests/unit/storage_v2_replication.cpp b/tests/unit/storage_v2_replication.cpp index 261b2ccf0..f07130c4a 100644 --- a/tests/unit/storage_v2_replication.cpp +++ b/tests/unit/storage_v2_replication.cpp @@ -102,8 +102,7 @@ class ReplicationTest : public ::testing::Test { struct MinMemgraph { MinMemgraph(const memgraph::storage::Config &conf) - : repl_state{ReplicationStateRootPath(conf)}, - dbms{conf, repl_state + : dbms{conf #ifdef MG_ENTERPRISE , reinterpret_cast< @@ -111,11 +110,12 @@ struct MinMemgraph { true, false #endif }, + repl_state{dbms.ReplicationState()}, db{*dbms.Get().get()}, - repl_handler(repl_state, dbms) { + repl_handler(dbms) { } - memgraph::replication::ReplicationState repl_state; memgraph::dbms::DbmsHandler dbms; + memgraph::replication::ReplicationState &repl_state; memgraph::dbms::Database &db; ReplicationHandler repl_handler; }; @@ -130,14 +130,13 @@ TEST_F(ReplicationTest, BasicSynchronousReplicationTest) { .port = ports[0], }); - ASSERT_FALSE(main.repl_handler - .RegisterReplica(ReplicationClientConfig{ - .name = "REPLICA", - .mode = ReplicationMode::SYNC, - .ip_address = local_host, - .port = ports[0], - }) - .HasError()); + const auto ® = main.repl_handler.RegisterReplica(ReplicationClientConfig{ + .name = "REPLICA", + .mode = ReplicationMode::SYNC, + .ip_address = local_host, + .port = ports[0], + }); + ASSERT_FALSE(reg.HasError()) << (int)reg.GetError(); // vertex create // vertex add label @@ -966,14 +965,14 @@ TEST_F(ReplicationTest, RestoringReplicationAtStartupAfterDroppingReplica) { .ip_address = local_host, .port = ports[0], }); - ASSERT_FALSE(res.HasError()); + ASSERT_FALSE(res.HasError()) << (int)res.GetError(); res = main->repl_handler.RegisterReplica(ReplicationClientConfig{ .name = replicas[1], .mode = ReplicationMode::SYNC, .ip_address = local_host, .port = ports[1], }); - ASSERT_FALSE(res.HasError()); + ASSERT_FALSE(res.HasError()) << (int)res.GetError(); auto replica_infos = main->db.storage()->ReplicasInfo(); diff --git a/tests/unit/storage_v2_storage_mode.cpp b/tests/unit/storage_v2_storage_mode.cpp index 3daea2e69..49ee633c5 100644 --- a/tests/unit/storage_v2_storage_mode.cpp +++ b/tests/unit/storage_v2_storage_mode.cpp @@ -75,6 +75,8 @@ class StorageModeMultiTxTest : public ::testing::Test { return tmp; }(); // iile + void TearDown() override { std::filesystem::remove_all(data_directory); } + memgraph::storage::Config config{.durability.storage_directory = data_directory, .disk.main_storage_directory = data_directory / "disk"};