diff --git a/.clang-tidy b/.clang-tidy index a30f9e592..c02b7152f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -64,8 +64,8 @@ Checks: '*, -readability-identifier-length, -misc-no-recursion, -concurrency-mt-unsafe, - -bugprone-easily-swappable-parameters' - + -bugprone-easily-swappable-parameters, + -bugprone-unchecked-optional-access' WarningsAsErrors: '' HeaderFilterRegex: 'src/.*' AnalyzeTemporaryDtors: false diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 72ab1ea66..7a14cf12e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,14 +1,28 @@ +### Description + +Please briefly explain the changes you made here. + + +Please delete either the [master < EPIC] or [master < Task] part, depending on what are your needs. + [master < Epic] PR -- [ ] Check, and update documentation if necessary - [ ] Write E2E tests - [ ] Compare the [benchmarking results](https://bench-graph.memgraph.com/) between the master branch and the Epic branch - [ ] Provide the full content or a guide for the final git message + - [FINAL GIT MESSAGE] [master < Task] PR -- [ ] Check, and update documentation if necessary - [ ] Provide the full content or a guide for the final git message + - **[FINAL GIT MESSAGE]** -To keep docs changelog up to date, one more thing to do: -- [ ] Write a release note here, including added/changed clauses +### Documentation checklist +- [ ] Add the documentation label tag +- [ ] Add the bug / feature label tag +- [ ] Add the milestone for which this feature is intended + - If not known, set for a later milestone +- [ ] Write a release note, including added/changed clauses + - **[Release note text]** +- [ ] Link the documentation PR here + - **[Documentation PR link]** - [ ] Tag someone from docs team in the comments diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index a2dc0aef2..6c96cda80 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -4,10 +4,6 @@ concurrency: cancel-in-progress: true on: - push: - branches: - - master - workflow_dispatch: pull_request: paths-ignore: - "docs/**" @@ -19,11 +15,16 @@ on: jobs: community_build: name: "Community build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: RelWithDebInfo steps: - name: Set up repository @@ -33,35 +34,56 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Build community binaries + - name: Spin up mgbuild container run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - # Initialize dependencies. - ./init - - # Build community binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMG_ENTERPRISE=OFF .. - make -j$THREADS + - name: Build release binaries + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --community - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit - # Run unit tests. - cd build - ctest -R memgraph__unit --output-on-failure -j$THREADS + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove code_analysis: name: "Code analysis" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Debug steps: - name: Set up repository @@ -71,6 +93,14 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + # This is also needed if we want do to comparison against other branches # See https://github.community/t/checkout-code-fails-when-it-runs-lerna-run-test-since-master/17920 - name: Fetch all history for all tags and branches @@ -78,11 +108,13 @@ jobs: - name: Initialize deps run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --init-only - name: Set base branch if: ${{ github.event_name == 'pull_request' }} @@ -96,45 +128,43 @@ jobs: - name: Python code analysis run: | - CHANGED_FILES=$(git diff -U0 ${{ env.BASE_BRANCH }}... --name-only --diff-filter=d) - for file in ${CHANGED_FILES}; do - echo ${file} - if [[ ${file} == *.py ]]; then - python3 -m black --check --diff ${file} - python3 -m isort --profile black --check-only --diff ${file} - fi - done + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph code-analysis --base-branch "${{ env.BASE_BRANCH }}" - name: Build combined ASAN, UBSAN and coverage binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - cd build - cmake -DTEST_COVERAGE=ON -DASAN=ON -DUBSAN=ON .. - make -j$THREADS memgraph__unit + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph --coverage --asan --ubsan - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run unit tests. It is restricted to 2 threads intentionally, because higher concurrency makes the timing related tests unstable. - cd build - LSAN_OPTIONS=suppressions=$PWD/../tools/lsan.supp UBSAN_OPTIONS=halt_on_error=1 ctest -R memgraph__unit --output-on-failure -j2 + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit-coverage - name: Compute code coverage run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Compute code coverage. - cd tools/github - ./coverage_convert - - # Package code coverage. - cd generated - tar -czf code_coverage.tar.gz coverage.json html report.json summary.rmu + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph code-coverage - name: Save code coverage uses: actions/upload-artifact@v4 @@ -144,21 +174,36 @@ jobs: - name: Run clang-tidy run: | - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph clang-tidy --base-branch "${{ env.BASE_BRANCH }}" - # Restrict clang-tidy results only to the modified parts - git diff -U0 ${{ env.BASE_BRANCH }}... -- src | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build -regex ".+\.cpp" | tee ./build/clang_tidy_output.txt - - # Fail if any warning is reported - ! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove debug_build: name: "Debug build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 100 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Debug steps: - name: Set up repository @@ -168,58 +213,95 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Build debug binaries + - name: Spin up mgbuild container run: | - # Activate toolchain. - source /opt/toolchain-v4/activate + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - # Initialize dependencies. - ./init - - # Build debug binaries. - cd build - cmake .. - make -j$THREADS + - name: Build release binaries + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run leftover CTest tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run leftover CTest tests (all except unit and benchmark tests). - cd build - ctest -E "(memgraph__unit|memgraph__benchmark)" --output-on-failure + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph leftover-CTest - name: Run drivers tests run: | - ./tests/drivers/run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph drivers - name: Run integration tests run: | - tests/integration/run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph integration - name: Run cppcheck and clang-format run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run cppcheck and clang-format. - cd tools/github - ./cppcheck_and_clang_format diff - + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph cppcheck-and-clang-format + - name: Save cppcheck and clang-format errors uses: actions/upload-artifact@v4 with: name: "Code coverage(Debug build)" path: tools/github/cppcheck_and_clang_format.txt + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove + release_build: name: "Release build" - runs-on: [self-hosted, Linux, X64, Diff] + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 100 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Release steps: - name: Set up repository @@ -229,26 +311,33 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init - - # Build release binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=Release .. - make -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run GQL Behave tests run: | - cd tests - ./setup.sh /opt/toolchain-v4/activate - cd gql_behave - ./continuous_integration + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph gql-behave - name: Save quality assurance status uses: actions/upload-artifact@v4 @@ -260,14 +349,19 @@ jobs: - name: Run unit tests run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Run unit tests. - cd build - ctest -R memgraph__unit --output-on-failure -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --threads $THREADS \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph unit + # This step will be skipped because the e2e stream tests have been disabled + # We need to fix this as soon as possible - name: Ensure Kafka and Pulsar are up + if: false run: | cd tests/e2e/streams/kafka docker-compose up -d @@ -276,13 +370,17 @@ jobs: - name: Run e2e tests run: | - cd tests - ./setup.sh /opt/toolchain-v4/activate - source ve3/bin/activate_e2e - cd e2e - ./run.sh + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph e2e + # Same as two steps prior - name: Ensure Kafka and Pulsar are down + if: false run: | cd tests/e2e/streams/kafka docker-compose down @@ -291,59 +389,92 @@ jobs: - name: Run stress test (plain) run: | - cd tests/stress - source ve3/bin/activate - ./continuous_integration + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph stress-plain - name: Run stress test (SSL) run: | - cd tests/stress - source ve3/bin/activate - ./continuous_integration --use-ssl + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph stress-ssl - name: Run durability test run: | - cd tests/stress - source ve3/bin/activate - python3 durability --num-steps 5 + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph durability - name: Create enterprise DEB package run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - cd build + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + package-memgraph - # create mgconsole - # we use the -B to force the build - make -j$THREADS -B mgconsole - - # Create enterprise DEB package. - mkdir output && cd output - cpack -G DEB --config ../CPackConfig.cmake + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --package - name: Save enterprise DEB package uses: actions/upload-artifact@v4 with: name: "Enterprise DEB package" - path: build/output/memgraph*.deb + path: build/output/${{ env.OS }}/memgraph*.deb + + - name: Copy build logs + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --build-logs - name: Save test data uses: actions/upload-artifact@v4 if: always() with: name: "Test data(Release build)" - path: | - # multiple paths could be defined - build/logs + path: build/logs + + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove release_jepsen_test: name: "Release Jepsen Test" - runs-on: [self-hosted, Linux, X64, Debian10, JepsenControl] - #continue-on-error: true + runs-on: [self-hosted, Linux, X64, DockerMgBuild] + timeout-minutes: 80 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-10 + TOOLCHAIN: v4 + ARCH: amd + BUILD_TYPE: RelWithDebInfo steps: - name: Set up repository @@ -353,16 +484,31 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - # Initialize dependencies. - ./init - # Build only memgraph release binarie. - cd build - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. - make -j$THREADS memgraph + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph + + - name: Copy memgraph binary + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + copy --binary - name: Refresh Jepsen Cluster run: | @@ -381,13 +527,27 @@ jobs: name: "Jepsen Report" path: tests/jepsen/Jepsen.tar.gz + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove + release_benchmarks: name: "Release benchmarks" - runs-on: [self-hosted, Linux, X64, Diff, Gen7] + runs-on: [self-hosted, Linux, X64, DockerMgBuild, Gen7] + timeout-minutes: 60 env: THREADS: 24 MEMGRAPH_ENTERPRISE_LICENSE: ${{ secrets.MEMGRAPH_ENTERPRISE_LICENSE }} MEMGRAPH_ORGANIZATION_NAME: ${{ secrets.MEMGRAPH_ORGANIZATION_NAME }} + OS: debian-11 + TOOLCHAIN: v5 + ARCH: amd + BUILD_TYPE: Release steps: - name: Set up repository @@ -397,25 +557,33 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 + - name: Spin up mgbuild container + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + run + - name: Build release binaries run: | - # Activate toolchain. - source /opt/toolchain-v4/activate - - # Initialize dependencies. - ./init - - # Build only memgraph release binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=release .. - make -j$THREADS + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --build-type $BUILD_TYPE \ + --threads $THREADS \ + build-memgraph - name: Run macro benchmarks run: | - cd tests/macro_benchmark - ./harness QuerySuite MemgraphRunner \ - --groups aggregation 1000_create unwind_create dense_expand match \ - --no-strict + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph macro-benchmark - name: Get branch name (merge) if: github.event_name != 'pull_request' @@ -429,30 +597,49 @@ jobs: - name: Upload macro benchmark results run: | - cd tools/bench-graph-client - virtualenv -p python3 ve3 - source ve3/bin/activate - pip install -r requirements.txt - ./main.py --benchmark-name "macro_benchmark" \ - --benchmark-results "../../tests/macro_benchmark/.harness_summary" \ - --github-run-id "${{ github.run_id }}" \ - --github-run-number "${{ github.run_number }}" \ - --head-branch-name "${{ env.BRANCH_NAME }}" + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph upload-to-bench-graph \ + --benchmark-name "macro_benchmark" \ + --benchmark-results "../../tests/macro_benchmark/.harness_summary" \ + --github-run-id ${{ github.run_id }} \ + --github-run-number ${{ github.run_number }} \ + --head-branch-name ${{ env.BRANCH_NAME }} - # TODO (andi) No need for path flags and for --disk-storage and --in-memory-analytical - name: Run mgbench run: | - cd tests/mgbench - ./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/* + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph mgbench - name: Upload mgbench results run: | - cd tools/bench-graph-client - virtualenv -p python3 ve3 - source ve3/bin/activate - pip install -r requirements.txt - ./main.py --benchmark-name "mgbench" \ - --benchmark-results "../../tests/mgbench/benchmark_result.json" \ - --github-run-id "${{ github.run_id }}" \ - --github-run-number "${{ github.run_number }}" \ - --head-branch-name "${{ env.BRANCH_NAME }}" + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + --enterprise-license $MEMGRAPH_ENTERPRISE_LICENSE \ + --organization-name $MEMGRAPH_ORGANIZATION_NAME \ + test-memgraph upload-to-bench-graph \ + --benchmark-name "mgbench" \ + --benchmark-results "../../tests/mgbench/benchmark_result.json" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" \ + --head-branch-name "${{ env.BRANCH_NAME }}" + + - name: Stop mgbuild container + if: always() + run: | + ./release/package/mgbuild.sh \ + --toolchain $TOOLCHAIN \ + --os $OS \ + --arch $ARCH \ + stop --remove diff --git a/CMakeLists.txt b/CMakeLists.txt index 028406447..c02039497 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -300,6 +300,19 @@ endif() option(ENABLE_JEMALLOC "Use jemalloc" ON) +option(MG_MEMORY_PROFILE "If build should be setup for memory profiling" OFF) +if (MG_MEMORY_PROFILE AND ENABLE_JEMALLOC) + message(STATUS "Jemalloc has been disabled because MG_MEMORY_PROFILE is enabled") + set(ENABLE_JEMALLOC OFF) +endif () +if (MG_MEMORY_PROFILE AND ASAN) + message(STATUS "ASAN has been disabled because MG_MEMORY_PROFILE is enabled") + set(ASAN OFF) +endif () +if (MG_MEMORY_PROFILE) + add_compile_definitions(MG_MEMORY_PROFILE) +endif () + if (ASAN) message(WARNING "Disabling jemalloc as it doesn't work well with ASAN") set(ENABLE_JEMALLOC OFF) diff --git a/environment/os/amzn-2.sh b/environment/os/amzn-2.sh index a9cc3e4b2..bac62233e 100755 --- a/environment/os/amzn-2.sh +++ b/environment/os/amzn-2.sh @@ -45,6 +45,7 @@ MEMGRAPH_BUILD_DEPS=( readline-devel # for memgraph console python3-devel # for query modules openssl-devel + openssl libseccomp-devel python3 python3-pip nmap-ncat # for tests # diff --git a/environment/os/centos-7.sh b/environment/os/centos-7.sh index d9fc93912..0e029fab5 100755 --- a/environment/os/centos-7.sh +++ b/environment/os/centos-7.sh @@ -43,6 +43,7 @@ MEMGRAPH_BUILD_DEPS=( readline-devel # for memgraph console python3-devel # for query modules openssl-devel + openssl libseccomp-devel python3 python-virtualenv python3-pip nmap-ncat # for qa, macro_benchmark and stress tests # diff --git a/environment/os/rocky-9.3.sh b/environment/os/rocky-9.3.sh index 571278654..6ec045b42 100755 --- a/environment/os/rocky-9.3.sh +++ b/environment/os/rocky-9.3.sh @@ -59,7 +59,7 @@ MEMGRAPH_BUILD_DEPS=( doxygen graphviz # source documentation generators which nodejs golang custom-golang1.18.9 # for driver tests zip unzip java-11-openjdk-devel java-17-openjdk java-17-openjdk-devel custom-maven3.9.3 # for driver tests - sbcl # for custom Lisp C++ preprocessing + cl-asdf common-lisp-controller sbcl # for custom Lisp C++ preprocessing autoconf # for jemalloc code generation libtool # for protobuf code generation cyrus-sasl-devel @@ -162,6 +162,30 @@ install() { fi continue fi + if [ "$pkg" == doxygen ]; then + if ! dnf list installed doxygen >/dev/null 2>/dev/null; then + dnf install -y https://dl.rockylinux.org/pub/rocky/9/CRB/x86_64/os/Packages/d/doxygen-1.9.1-11.el9.x86_64.rpm + fi + continue + fi + if [ "$pkg" == cl-asdf ]; then + if ! dnf list installed cl-asdf >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/cl-asdf-20101028-18.el8.noarch.rpm + fi + continue + fi + if [ "$pkg" == common-lisp-controller ]; then + if ! dnf list installed common-lisp-controller >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/common-lisp-controller-7.4-20.el8.noarch.rpm + fi + continue + fi + if [ "$pkg" == sbcl ]; then + if ! dnf list installed sbcl >/dev/null 2>/dev/null; then + dnf install -y https://pkgs.sysadmins.ws/el8/base/x86_64/sbcl-2.0.1-4.el8.x86_64.rpm + fi + continue + fi if [ "$pkg" == PyYAML ]; then if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker). pip3 install --user PyYAML diff --git a/import/n2mg_cypherl.sh b/import/n2mg_cypherl.sh index b11f5d3e3..2605fc6c7 100755 --- a/import/n2mg_cypherl.sh +++ b/import/n2mg_cypherl.sh @@ -20,14 +20,18 @@ if [ ! -f "$INPUT" ]; then fi echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} BEGIN and COMMIT are required because variables share the same name (e.g. row)" -echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} CONSTRAINTS are just skipped -> ${COLOR_RED}please create consraints manually if needed${COLOR_NULL}" +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} CONSTRAINTS are just skipped -> ${COLOR_RED}please create constraints manually if needed${COLOR_NULL}" + +echo 'CREATE INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' > "$OUTPUT" sed -e 's/^:begin/BEGIN/g; s/^BEGIN$/BEGIN;/g;' \ -e 's/^:commit/COMMIT/g; s/^COMMIT$/COMMIT;/g;' \ -e '/^CALL/d; /^SCHEMA AWAIT/d;' \ -e 's/CREATE RANGE INDEX FOR (n:/CREATE INDEX ON :/g;' \ -e 's/) ON (n./(/g;' \ - -e '/^CREATE CONSTRAINT/d; /^DROP CONSTRAINT/d;' "$INPUT" > "$OUTPUT" + -e '/^CREATE CONSTRAINT/d; /^DROP CONSTRAINT/d;' "$INPUT" >> "$OUTPUT" + +echo 'DROP INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' >> "$OUTPUT" echo "" echo -e "${COLOR_GREEN}DONE!${COLOR_NULL} Please find Memgraph compatible cypherl|.cypher file under $OUTPUT" diff --git a/import/n2mg_separate_files_cypherl.sh b/import/n2mg_separate_files_cypherl.sh new file mode 100755 index 000000000..98049f193 --- /dev/null +++ b/import/n2mg_separate_files_cypherl.sh @@ -0,0 +1,61 @@ +#!/bin/bash -e +COLOR_ORANGE="\e[38;5;208m" +COLOR_GREEN="\e[38;5;35m" +COLOR_RED="\e[0;31m" +COLOR_NULL="\e[0m" + +print_help() { + echo -e "${COLOR_ORANGE}HOW TO RUN:${COLOR_NULL} $0 input_file_schema_path input_file_nodes_path input_file_relationships_path input_file_cleanup_path output_file_path" + exit 1 +} + +if [ "$#" -ne 5 ]; then + print_help +fi +INPUT_SCHEMA="$1" +INPUT_NODES="$2" +INPUT_RELATIONSHIPS="$3" +INPUT_CLEANUP="$4" +OUTPUT="$5" + +if [ ! -f "$INPUT_SCHEMA" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_NODES" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_RELATIONSHIPS" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_CLEANUP" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} BEGIN and COMMIT are required because variables share the same name (e.g. row)" +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} CONSTRAINTS are just skipped -> ${COLOR_RED}please create constraints manually if needed${COLOR_NULL}" + + +echo 'CREATE INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' > "$OUTPUT" + +sed -e 's/CREATE RANGE INDEX FOR (n:/CREATE INDEX ON :/g;' \ + -e 's/) ON (n./(/g;' \ + -e '/^CREATE CONSTRAINT/d' $INPUT_SCHEMA >> "$OUTPUT" + +cat "$INPUT_NODES" >> "$OUTPUT" +cat "$INPUT_RELATIONSHIPS" >> "$OUTPUT" + +sed -e '/^DROP CONSTRAINT/d' "$INPUT_CLEANUP" >> "$OUTPUT" + +echo 'DROP INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' >> "$OUTPUT" + +echo "" +echo -e "${COLOR_GREEN}DONE!${COLOR_NULL} Please find Memgraph compatible cypherl|.cypher file under $OUTPUT" +echo "" +echo "Please import data by executing => \`cat $OUTPUT | mgconsole\`" diff --git a/import/n2mg_separate_files_cypherls.sh b/import/n2mg_separate_files_cypherls.sh new file mode 100755 index 000000000..5b9057e24 --- /dev/null +++ b/import/n2mg_separate_files_cypherls.sh @@ -0,0 +1,64 @@ +#!/bin/bash -e +COLOR_ORANGE="\e[38;5;208m" +COLOR_GREEN="\e[38;5;35m" +COLOR_RED="\e[0;31m" +COLOR_NULL="\e[0m" + +print_help() { + echo -e "${COLOR_ORANGE}HOW TO RUN:${COLOR_NULL} $0 input_file_schema_path input_file_nodes_path input_file_relationships_path input_file_cleanup_path output_file_schema_path output_file_nodes_path output_file_relationships_path output_file_cleanup_path" + exit 1 +} + +if [ "$#" -ne 8 ]; then + print_help +fi +INPUT_SCHEMA="$1" +INPUT_NODES="$2" +INPUT_RELATIONSHIPS="$3" +INPUT_CLEANUP="$4" +OUTPUT_SCHEMA="$5" +OUTPUT_NODES="$6" +OUTPUT_RELATIONSHIPS="$7" +OUTPUT_CLEANUP="$8" + +if [ ! -f "$INPUT_SCHEMA" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_NODES" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_RELATIONSHIPS" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +if [ ! -f "$INPUT_CLEANUP" ]; then + echo -e "${COLOR_RED}ERROR:${COLOR_NULL} input_file_path is not a file!" + print_help +fi + +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} BEGIN and COMMIT are required because variables share the same name (e.g. row)" +echo -e "${COLOR_ORANGE}NOTE:${COLOR_NULL} CONSTRAINTS are just skipped -> ${COLOR_RED}please create constraints manually if needed${COLOR_NULL}" + + +echo 'CREATE INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' > "$OUTPUT_SCHEMA" + +sed -e 's/CREATE RANGE INDEX FOR (n:/CREATE INDEX ON :/g;' \ + -e 's/) ON (n./(/g;' \ + -e '/^CREATE CONSTRAINT/d' $INPUT_SCHEMA >> "$OUTPUT_SCHEMA" + +cat "$INPUT_NODES" > "$OUTPUT_NODES" +cat "$INPUT_RELATIONSHIPS" > "$OUTPUT_RELATIONSHIPS" + +sed -e '/^DROP CONSTRAINT/d' "$INPUT_CLEANUP" >> "$OUTPUT_CLEANUP" + +echo 'DROP INDEX ON :`UNIQUE IMPORT LABEL`(`UNIQUE IMPORT ID`);' >> "$OUTPUT_CLEANUP" + +echo "" +echo -e "${COLOR_GREEN}DONE!${COLOR_NULL} Please find Memgraph compatible cypherl|.cypher files under $OUTPUT_SCHEMA, $OUTPUT_NODES, $OUTPUT_RELATIONSHIPS and $OUTPUT_CLEANUP" +echo "" +echo "Please import data by executing => \`cat $OUTPUT_SCHEMA | mgconsole\`, \`cat $OUTPUT_NODES | mgconsole\`, \`cat $OUTPUT_RELATIONSHIPS | mgconsole\` and \`cat $OUTPUT_CLEANUP | mgconsole\`" diff --git a/init b/init index 9187ee5aa..5d1799237 100755 --- a/init +++ b/init @@ -14,6 +14,7 @@ function print_help () { echo "Optional arguments:" echo -e " -h\tdisplay this help and exit" echo -e " --without-libs-setup\tskip the step for setting up libs" + echo -e " --ci\tscript is being run inside ci" } function setup_virtualenv () { @@ -35,6 +36,7 @@ function setup_virtualenv () { } setup_libs=true +ci=false if [[ $# -eq 1 && "$1" == "-h" ]]; then print_help exit 0 @@ -45,6 +47,10 @@ else shift setup_libs=false ;; + --ci) + shift + ci=true + ;; *) # unknown option echo "Invalid argument provided: $1" @@ -76,11 +82,13 @@ if [[ "$setup_libs" == "true" ]]; then fi # Fix for centos 7 during release -if [ "${DISTRO}" = "centos-7" ] || [ "${DISTRO}" = "debian-11" ] || [ "${DISTRO}" = "amzn-2" ]; then - if python3 -m pip show virtualenv >/dev/null 2>/dev/null; then - python3 -m pip uninstall -y virtualenv +if [[ "$ci" == "false" ]]; then + if [ "${DISTRO}" = "centos-7" ] || [ "${DISTRO}" = "debian-11" ] || [ "${DISTRO}" = "amzn-2" ]; then + if python3 -m pip show virtualenv >/dev/null 2>/dev/null; then + python3 -m pip uninstall -y virtualenv + fi + python3 -m pip install virtualenv fi - python3 -m pip install virtualenv fi # setup gql_behave dependencies @@ -119,14 +127,16 @@ fi # Install precommit hook except on old operating systems because we don't # develop on them -> pre-commit hook not required -> we can use latest # packages. -if [ "${DISTRO}" != "centos-7" ] && [ "$DISTRO" != "debian-10" ] && [ "${DISTRO}" != "ubuntu-18.04" ] && [ "${DISTRO}" != "amzn-2" ]; then - python3 -m pip install pre-commit - python3 -m pre_commit install - # Install py format tools for usage during the development. - echo "Install black formatter" - python3 -m pip install black==23.1.* - echo "Install isort" - python3 -m pip install isort==5.12.* +if [[ "$ci" == "false" ]]; then + if [ "${DISTRO}" != "centos-7" ] && [ "$DISTRO" != "debian-10" ] && [ "${DISTRO}" != "ubuntu-18.04" ] && [ "${DISTRO}" != "amzn-2" ]; then + python3 -m pip install pre-commit + python3 -m pre_commit install + # Install py format tools for usage during the development. + echo "Install black formatter" + python3 -m pip install black==23.1.* + echo "Install isort" + python3 -m pip install isort==5.12.* + fi fi # Link `include/mgp.py` with `release/mgp/mgp.py` diff --git a/libs/setup.sh b/libs/setup.sh index 9c2a38c47..e23c5efef 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -127,6 +127,7 @@ declare -A primary_urls=( ["jemalloc"]="http://$local_cache_host/git/jemalloc.git" ["range-v3"]="http://$local_cache_host/git/range-v3.git" ["nuraft"]="http://$local_cache_host/git/NuRaft.git" + ["asio"]="http://$local_cache_host/git/asio.git" ) # The goal of secondary urls is to have links to the "source of truth" of @@ -157,6 +158,7 @@ declare -A secondary_urls=( ["jemalloc"]="https://github.com/jemalloc/jemalloc.git" ["range-v3"]="https://github.com/ericniebler/range-v3.git" ["nuraft"]="https://github.com/eBay/NuRaft.git" + ["asio"]="https://github.com/chriskohlhoff/asio.git" ) # antlr @@ -266,13 +268,13 @@ repo_clone_try_double "${primary_urls[jemalloc]}" "${secondary_urls[jemalloc]}" pushd jemalloc ./autogen.sh -MALLOC_CONF="retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" \ +MALLOC_CONF="background_thread:true,retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" \ ./configure \ --disable-cxx \ --with-lg-page=12 \ --with-lg-hugepage=21 \ --enable-shared=no --prefix=$working_dir \ - --with-malloc-conf="retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" + --with-malloc-conf="background_thread:true,retain:false,percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" make -j$CPUS install popd @@ -286,5 +288,7 @@ nuraft_tag="v2.1.0" repo_clone_try_double "${primary_urls[nuraft]}" "${secondary_urls[nuraft]}" "nuraft" "$nuraft_tag" true pushd nuraft git apply ../nuraft2.1.0.patch +asio_tag="asio-1-29-0" +repo_clone_try_double "${primary_urls[asio]}" "${secondary_urls[asio]}" "asio" "$asio_tag" true ./prepare.sh popd diff --git a/release/package/amd-builders-v4.yml b/release/package/amd-builders-v4.yml new file mode 100644 index 000000000..2e3d4c5d2 --- /dev/null +++ b/release/package/amd-builders-v4.yml @@ -0,0 +1,73 @@ +version: "3" +services: + mgbuild_v4_amzn-2: + image: "memgraph/mgbuild:v4_amzn-2" + build: + context: amzn-2 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_amzn-2" + + mgbuild_v4_centos-7: + image: "memgraph/mgbuild:v4_centos-7" + build: + context: centos-7 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_centos-7" + + mgbuild_v4_centos-9: + image: "memgraph/mgbuild:v4_centos-9" + build: + context: centos-9 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_centos-9" + + mgbuild_v4_debian-10: + image: "memgraph/mgbuild:v4_debian-10" + build: + context: debian-10 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-10" + + mgbuild_v4_debian-11: + image: "memgraph/mgbuild:v4_debian-11" + build: + context: debian-11 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-11" + + mgbuild_v4_fedora-36: + image: "memgraph/mgbuild:v4_fedora-36" + build: + context: fedora-36 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_fedora-36" + + mgbuild_v4_ubuntu-18.04: + image: "memgraph/mgbuild:v4_ubuntu-18.04" + build: + context: ubuntu-18.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-18.04" + + mgbuild_v4_ubuntu-20.04: + image: "memgraph/mgbuild:v4_ubuntu-20.04" + build: + context: ubuntu-20.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-20.04" + + mgbuild_v4_ubuntu-22.04: + image: "memgraph/mgbuild:v4_ubuntu-22.04" + build: + context: ubuntu-22.04 + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-22.04" diff --git a/release/package/amd-builders-v5.yml b/release/package/amd-builders-v5.yml new file mode 100644 index 000000000..d37bc6092 --- /dev/null +++ b/release/package/amd-builders-v5.yml @@ -0,0 +1,81 @@ +version: "3" +services: + mgbuild_v5_amzn-2: + image: "memgraph/mgbuild:v5_amzn-2" + build: + context: amzn-2 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_amzn-2" + + mgbuild_v5_centos-7: + image: "memgraph/mgbuild:v5_centos-7" + build: + context: centos-7 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_centos-7" + + mgbuild_v5_centos-9: + image: "memgraph/mgbuild:v5_centos-9" + build: + context: centos-9 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_centos-9" + + mgbuild_v5_debian-11: + image: "memgraph/mgbuild:v5_debian-11" + build: + context: debian-11 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_debian-11" + + mgbuild_v5_debian-12: + image: "memgraph/mgbuild:v5_debian-12" + build: + context: debian-12 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_debian-12" + + mgbuild_v5_fedora-38: + image: "memgraph/mgbuild:v5_fedora-38" + build: + context: fedora-38 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_fedora-38" + + mgbuild_v5_fedora-39: + image: "memgraph/mgbuild:v5_fedora-39" + build: + context: fedora-39 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_fedora-39" + + mgbuild_v5_rocky-9.3: + image: "memgraph/mgbuild:v5_rocky-9.3" + build: + context: rocky-9.3 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_rocky-9.3" + + mgbuild_v5_ubuntu-20.04: + image: "memgraph/mgbuild:v5_ubuntu-20.04" + build: + context: ubuntu-20.04 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_ubuntu-20.04" + + mgbuild_v5_ubuntu-22.04: + image: "memgraph/mgbuild:v5_ubuntu-22.04" + build: + context: ubuntu-22.04 + args: + TOOLCHAIN_VERSION: "v5" + container_name: "mgbuild_v5_ubuntu-22.04" diff --git a/release/package/amzn-2/Dockerfile b/release/package/amzn-2/Dockerfile index ba3ad34b9..80fdb9893 100644 --- a/release/package/amzn-2/Dockerfile +++ b/release/package/amzn-2/Dockerfile @@ -7,9 +7,34 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz +# Download and install toolchain +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-amzn-2-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/amzn-2.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/amzn-2.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/arm-builders-v4.yml b/release/package/arm-builders-v4.yml new file mode 100644 index 000000000..18cd5c4e9 --- /dev/null +++ b/release/package/arm-builders-v4.yml @@ -0,0 +1,18 @@ +version: "3" + +services: + mgbuild_v4_debian-11-arm: + image: "memgraph/mgbuild:v4_debian-11-arm" + build: + context: debian-11-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_debian-11-arm" + + mgbuild_v4_ubuntu_v4_22.04-arm: + image: "memgraph/mgbuild:v4_ubuntu-22.04-arm" + build: + context: ubuntu-22.04-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_v4_ubuntu-22.04-arm" diff --git a/release/package/arm-builders-v5.yml b/release/package/arm-builders-v5.yml new file mode 100644 index 000000000..c276a49a8 --- /dev/null +++ b/release/package/arm-builders-v5.yml @@ -0,0 +1,18 @@ +version: "3" + +services: + debian-12-arm: + image: "memgraph/mgbuild:v5_debian-12-arm" + build: + context: debian-12-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_debian-12-arm" + + ubuntu-22.04-arm: + image: "memgraph/mgbuild:v5_ubuntu-22.04-arm" + build: + context: ubuntu-22.04-arm + args: + TOOLCHAIN_VERSION: "v4" + container_name: "mgbuild_ubuntu-22.04-arm" diff --git a/release/package/arm-builders.yml b/release/package/arm-builders.yml deleted file mode 100644 index d52f3bb26..000000000 --- a/release/package/arm-builders.yml +++ /dev/null @@ -1,11 +0,0 @@ -version: "3" - -services: - debian-11-arm: - build: - context: debian-11-arm - container_name: "mgbuild_debian-11-arm" - ubuntu-2204-arm: - build: - context: ubuntu-22.04-arm - container_name: "mgbuild_ubuntu-22.04-arm" diff --git a/release/package/centos-7/Dockerfile b/release/package/centos-7/Dockerfile index ff386c551..d34dfee77 100644 --- a/release/package/centos-7/Dockerfile +++ b/release/package/centos-7/Dockerfile @@ -7,9 +7,33 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-centos-7-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/centos-7.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/centos-7.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/centos-9/Dockerfile b/release/package/centos-9/Dockerfile index 1c4c9541e..7c559a932 100644 --- a/release/package/centos-9/Dockerfile +++ b/release/package/centos-9/Dockerfile @@ -7,9 +7,33 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-centos-9-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/centos-9.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/centos-9.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9) +RUN pip3 install --user PyYAML ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-10/Dockerfile b/release/package/debian-10/Dockerfile index abd4a7f0e..a3ef12cc6 100644 --- a/release/package/debian-10/Dockerfile +++ b/release/package/debian-10/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-10-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-10.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-10.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-11-arm/Dockerfile b/release/package/debian-11-arm/Dockerfile index 5040d8041..9b3aa3c8c 100644 --- a/release/package/debian-11-arm/Dockerfile +++ b/release/package/debian-11-arm/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-11-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-11-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-11/Dockerfile b/release/package/debian-11/Dockerfile index cf47f253e..62070eccf 100644 --- a/release/package/debian-11/Dockerfile +++ b/release/package/debian-11/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-11-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-11.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-11.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-12-arm/Dockerfile b/release/package/debian-12-arm/Dockerfile new file mode 100644 index 000000000..6d4200391 --- /dev/null +++ b/release/package/debian-12-arm/Dockerfile @@ -0,0 +1,39 @@ +FROM debian:12 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-12-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-12-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-12/Dockerfile b/release/package/debian-12/Dockerfile new file mode 100644 index 000000000..c35640eb3 --- /dev/null +++ b/release/package/debian-12/Dockerfile @@ -0,0 +1,39 @@ +FROM debian:12 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-debian-12-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/debian-12.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/debian-12.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/docker-compose.yml b/release/package/docker-compose.yml deleted file mode 100644 index 4da0526ba..000000000 --- a/release/package/docker-compose.yml +++ /dev/null @@ -1,38 +0,0 @@ -version: "3" -services: - mgbuild_centos-7: - build: - context: centos-7 - container_name: "mgbuild_centos-7" - mgbuild_centos-9: - build: - context: centos-9 - container_name: "mgbuild_centos-9" - mgbuild_debian-10: - build: - context: debian-10 - container_name: "mgbuild_debian-10" - mgbuild_debian-11: - build: - context: debian-11 - container_name: "mgbuild_debian-11" - mgbuild_ubuntu-18.04: - build: - context: ubuntu-18.04 - container_name: "mgbuild_ubuntu-18.04" - mgbuild_ubuntu-20.04: - build: - context: ubuntu-20.04 - container_name: "mgbuild_ubuntu-20.04" - mgbuild_ubuntu-22.04: - build: - context: ubuntu-22.04 - container_name: "mgbuild_ubuntu-22.04" - mgbuild_fedora-36: - build: - context: fedora-36 - container_name: "mgbuild_fedora-36" - mgbuild_amzn-2: - build: - context: amzn-2 - container_name: "mgbuild_amzn-2" diff --git a/release/package/fedora-36/Dockerfile b/release/package/fedora-36/Dockerfile index f84af132f..954a5b2bf 100644 --- a/release/package/fedora-36/Dockerfile +++ b/release/package/fedora-36/Dockerfile @@ -8,9 +8,30 @@ RUN yum -y update \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-36-x86_64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-36.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-36.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/fedora-38/Dockerfile b/release/package/fedora-38/Dockerfile new file mode 100644 index 000000000..cf7454ae4 --- /dev/null +++ b/release/package/fedora-38/Dockerfile @@ -0,0 +1,37 @@ +FROM fedora:38 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-38-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-38.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-38.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/fedora-39/Dockerfile b/release/package/fedora-39/Dockerfile new file mode 100644 index 000000000..263017a3d --- /dev/null +++ b/release/package/fedora-39/Dockerfile @@ -0,0 +1,37 @@ +FROM fedora:39 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-fedora-39-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/fedora-39.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/fedora-39.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/mgbuild.sh b/release/package/mgbuild.sh new file mode 100755 index 000000000..3ff2f4979 --- /dev/null +++ b/release/package/mgbuild.sh @@ -0,0 +1,665 @@ +#!/bin/bash +set -Eeuo pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +SCRIPT_NAME=${0##*/} +PROJECT_ROOT="$SCRIPT_DIR/../.." +MGBUILD_HOME_DIR="/home/mg" +MGBUILD_ROOT_DIR="$MGBUILD_HOME_DIR/memgraph" + +DEFAULT_TOOLCHAIN="v5" +SUPPORTED_TOOLCHAINS=( + v4 v5 +) +DEFAULT_OS="all" +SUPPORTED_OS=( + all + amzn-2 + centos-7 centos-9 + debian-10 debian-11 debian-11-arm debian-12 debian-12-arm + fedora-36 fedora-38 fedora-39 + rocky-9.3 + ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +SUPPORTED_OS_V4=( + amzn-2 + centos-7 centos-9 + debian-10 debian-11 debian-11-arm + fedora-36 + ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +SUPPORTED_OS_V5=( + amzn-2 + centos-7 centos-9 + debian-11 debian-11-arm debian-12 debian-12-arm + fedora-38 fedora-39 + rocky-9.3 + ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm +) +DEFAULT_BUILD_TYPE="Release" +SUPPORTED_BUILD_TYPES=( + Debug + Release + RelWithDebInfo +) +DEFAULT_ARCH="amd" +SUPPORTED_ARCHS=( + amd + arm +) +SUPPORTED_TESTS=( + clang-tidy cppcheck-and-clang-format code-analysis + code-coverage drivers durability e2e gql-behave + integration leftover-CTest macro-benchmark + mgbench stress-plain stress-ssl + unit unit-coverage upload-to-bench-graph + +) +DEFAULT_THREADS=0 +DEFAULT_ENTERPRISE_LICENSE="" +DEFAULT_ORGANIZATION_NAME="memgraph" + +print_help () { + echo -e "\nUsage: $SCRIPT_NAME [GLOBAL OPTIONS] COMMAND [COMMAND OPTIONS]" + echo -e "\nInteract with mgbuild containers" + + echo -e "\nCommands:" + echo -e " build Build mgbuild image" + echo -e " build-memgraph [OPTIONS] Build memgraph binary inside mgbuild container" + echo -e " copy OPTIONS Copy an artifact from mgbuild container to host" + echo -e " package-memgraph Create memgraph package from built binary inside mgbuild container" + echo -e " pull Pull mgbuild image from dockerhub" + echo -e " push [OPTIONS] Push mgbuild image to dockerhub" + echo -e " run [OPTIONS] Run mgbuild container" + echo -e " stop [OPTIONS] Stop mgbuild container" + echo -e " test-memgraph TEST Run a selected test TEST (see supported tests below) inside mgbuild container" + + echo -e "\nSupported tests:" + echo -e " \"${SUPPORTED_TESTS[*]}\"" + + echo -e "\nGlobal options:" + echo -e " --arch string Specify target architecture (\"${SUPPORTED_ARCHS[*]}\") (default \"$DEFAULT_ARCH\")" + echo -e " --build-type string Specify build type (\"${SUPPORTED_BUILD_TYPES[*]}\") (default \"$DEFAULT_BUILD_TYPE\")" + echo -e " --enterprise-license string Specify the enterprise license (default \"\")" + echo -e " --organization-name string Specify the organization name (default \"memgraph\")" + echo -e " --os string Specify operating system (\"${SUPPORTED_OS[*]}\") (default \"$DEFAULT_OS\")" + echo -e " --threads int Specify the number of threads a command will use (default \"\$(nproc)\" for container)" + echo -e " --toolchain string Specify toolchain version (\"${SUPPORTED_TOOLCHAINS[*]}\") (default \"$DEFAULT_TOOLCHAIN\")" + + echo -e "\nbuild-memgraph options:" + echo -e " --asan Build with ASAN" + echo -e " --community Build community version" + echo -e " --coverage Build with code coverage" + echo -e " --for-docker Add flag -DMG_TELEMETRY_ID_OVERRIDE=DOCKER to cmake" + echo -e " --for-platform Add flag -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM to cmake" + echo -e " --init-only Only run init script" + echo -e " --no-copy Don't copy the memgraph repo from host." + echo -e " Use this option with caution, be sure that memgraph source code is in correct location inside mgbuild container" + echo -e " --ubsan Build with UBSAN" + + echo -e "\ncopy options:" + echo -e " --binary Copy memgraph binary from mgbuild container to host" + echo -e " --build-logs Copy build logs from mgbuild container to host" + echo -e " --package Copy memgraph package from mgbuild container to host" + + echo -e "\npush options:" + echo -e " -p, --password string Specify password for docker login" + echo -e " -u, --username string Specify username for docker login" + + echo -e "\nrun options:" + echo -e " --pull Pull the mgbuild image before running" + + echo -e "\nstop options:" + echo -e " --remove Remove the stopped mgbuild container" + + echo -e "\nToolchain v4 supported OSs:" + echo -e " \"${SUPPORTED_OS_V4[*]}\"" + + echo -e "\nToolchain v5 supported OSs:" + echo -e " \"${SUPPORTED_OS_V5[*]}\"" + + echo -e "\nExample usage:" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd run" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo build-memgraph --community" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd --build-type RelWithDebInfo test-memgraph unit" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd package" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd copy --package" + echo -e " $SCRIPT_NAME --os debian-12 --toolchain v5 --arch amd stop --remove" +} + +check_support() { + local is_supported=false + case "$1" in + arch) + for e in "${SUPPORTED_ARCHS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Architecture $2 isn't supported!\nChoose from ${SUPPORTED_ARCHS[*]}" + exit 1 + fi + ;; + build_type) + for e in "${SUPPORTED_BUILD_TYPES[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Build type $2 isn't supported!\nChoose from ${SUPPORTED_BUILD_TYPES[*]}" + exit 1 + fi + ;; + os) + for e in "${SUPPORTED_OS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: OS $2 isn't supported!\nChoose from ${SUPPORTED_OS[*]}" + exit 1 + fi + ;; + toolchain) + for e in "${SUPPORTED_TOOLCHAINS[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "TError: oolchain version $2 isn't supported!\nChoose from ${SUPPORTED_TOOLCHAINS[*]}" + exit 1 + fi + ;; + os_toolchain_combo) + if [[ "$3" == "v4" ]]; then + local SUPPORTED_OS_TOOLCHAIN=("${SUPPORTED_OS_V4[@]}") + elif [[ "$3" == "v5" ]]; then + local SUPPORTED_OS_TOOLCHAIN=("${SUPPORTED_OS_V5[@]}") + else + echo -e "Error: $3 isn't a supported toolchain_version!\nChoose from ${SUPPORTED_TOOLCHAINS[*]}" + exit 1 + fi + for e in "${SUPPORTED_OS_TOOLCHAIN[@]}"; do + if [[ "$e" == "$2" ]]; then + is_supported=true + break + fi + done + if [[ "$is_supported" == false ]]; then + echo -e "Error: Toolchain version $3 doesn't support OS $2!\nChoose from ${SUPPORTED_OS_TOOLCHAIN[*]}" + exit 1 + fi + ;; + *) + echo -e "Error: This function can only check arch, build_type, os, toolchain version and os toolchain combination" + exit 1 + ;; + esac +} + + +################################################## +######## BUILD, COPY AND PACKAGE MEMGRAPH ######## +################################################## +build_memgraph () { + local build_container="mgbuild_${toolchain_version}_${os}" + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local container_build_dir="$MGBUILD_ROOT_DIR/build" + local container_output_dir="$container_build_dir/output" + local arm_flag="" + if [[ "$arch" == "arm" ]] || [[ "$os" =~ "-arm" ]]; then + arm_flag="-DMG_ARCH="ARM64"" + fi + local build_type_flag="-DCMAKE_BUILD_TYPE=$build_type" + local telemetry_id_override_flag="" + local community_flag="" + local coverage_flag="" + local asan_flag="" + local ubsan_flag="" + local init_only=false + local for_docker=false + local for_platform=false + local copy_from_host=true + while [[ "$#" -gt 0 ]]; do + case "$1" in + --community) + community_flag="-DMG_ENTERPRISE=OFF" + shift 1 + ;; + --init-only) + init_only=true + shift 1 + ;; + --for-docker) + for_docker=true + if [[ "$for_platform" == "true" ]]; then + echo "Error: Cannot combine --for-docker and --for-platform flags" + exit 1 + fi + telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER " + shift 1 + ;; + --for-platform) + for_platform=true + if [[ "$for_docker" == "true" ]]; then + echo "Error: Cannot combine --for-docker and --for-platform flags" + exit 1 + fi + telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM " + shift 1 + ;; + --coverage) + coverage_flag="-DTEST_COVERAGE=ON" + shift 1 + ;; + --asan) + asan_flag="-DASAN=ON" + shift 1 + ;; + --ubsan) + ubsan_flag="-DUBSAN=ON" + shift 1 + ;; + --no-copy) + copy_from_host=false + shift 1 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac + done + + echo "Initializing deps ..." + # If master is not the current branch, fetch it, because the get_version + # script depends on it. If we are on master, the fetch command is going to + # fail so that's why there is the explicit check. + # Required here because Docker build container can't access remote. + cd "$PROJECT_ROOT" + if [[ "$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then + git fetch origin master:master + fi + + if [[ "$copy_from_host" == "true" ]]; then + # Ensure we have a clean build directory + docker exec -u mg "$build_container" bash -c "rm -rf $MGBUILD_ROOT_DIR && mkdir -p $MGBUILD_ROOT_DIR" + echo "Copying project files..." + docker cp "$PROJECT_ROOT/." "$build_container:$MGBUILD_ROOT_DIR/" + fi + # Change ownership of copied files so the mg user inside container can access them + docker exec -u root $build_container bash -c "chown -R mg:mg $MGBUILD_ROOT_DIR" + + echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..." + docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check TOOLCHAIN_RUN_DEPS || /environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" + docker exec -u root "$build_container" bash -c "$MGBUILD_ROOT_DIR/environment/os/$os.sh check MEMGRAPH_BUILD_DEPS || /environment/os/$os.sh install MEMGRAPH_BUILD_DEPS" + + echo "Building targeted package..." + # Fix issue with git marking directory as not safe + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && git config --global --add safe.directory '*'" + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && $ACTIVATE_TOOLCHAIN && ./init --ci" + if [[ "$init_only" == "true" ]]; then + return + fi + + echo "Building Memgraph for $os on $build_container..." + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && rm -rf ./*" + # Fix cmake failing locally if remote is clone via ssh + docker exec -u mg "$build_container" bash -c "cd $MGBUILD_ROOT_DIR && git remote set-url origin https://github.com/memgraph/memgraph.git" + + # Define cmake command + local cmake_cmd="cmake $build_type_flag $arm_flag $community_flag $telemetry_id_override_flag $coverage_flag $asan_flag $ubsan_flag .." + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && $cmake_cmd" + + # ' is used instead of " because we need to run make within the allowed + # container resources. + # Default value for $threads is 0 instead of $(nproc) because macos + # doesn't support the nproc command. + # 0 is set for default value and checked here because mgbuild containers + # support nproc + # shellcheck disable=SC2016 + if [[ "$threads" == 0 ]]; then + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc)' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc) -B mgconsole' + else + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$threads' + docker exec -u mg "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$threads -B mgconsole' + fi +} + +package_memgraph() { + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local build_container="mgbuild_${toolchain_version}_${os}" + local container_output_dir="$MGBUILD_ROOT_DIR/build/output" + local package_command="" + if [[ "$os" =~ ^"centos".* ]] || [[ "$os" =~ ^"fedora".* ]] || [[ "$os" =~ ^"amzn".* ]] || [[ "$os" =~ ^"rocky".* ]]; then + docker exec -u root "$build_container" bash -c "yum -y update" + package_command=" cpack -G RPM --config ../CPackConfig.cmake && rpmlint --file='../../release/rpm/rpmlintrc' memgraph*.rpm " + fi + if [[ "$os" =~ ^"debian".* ]]; then + docker exec -u root "$build_container" bash -c "apt --allow-releaseinfo-change -y update" + package_command=" cpack -G DEB --config ../CPackConfig.cmake " + fi + if [[ "$os" =~ ^"ubuntu".* ]]; then + docker exec -u root "$build_container" bash -c "apt update" + package_command=" cpack -G DEB --config ../CPackConfig.cmake " + fi + docker exec -u mg "$build_container" bash -c "mkdir -p $container_output_dir && cd $container_output_dir && $ACTIVATE_TOOLCHAIN && $package_command" +} + +copy_memgraph() { + local build_container="mgbuild_${toolchain_version}_${os}" + case "$1" in + --binary) + echo "Copying memgraph binary to host..." + local container_output_path="$MGBUILD_ROOT_DIR/build/memgraph" + local host_output_path="$PROJECT_ROOT/build/memgraph" + mkdir -p "$PROJECT_ROOT/build" + docker cp -L $build_container:$container_output_path $host_output_path + echo "Binary saved to $host_output_path" + ;; + --build-logs) + echo "Copying memgraph build logs to host..." + local container_output_path="$MGBUILD_ROOT_DIR/build/logs" + local host_output_path="$PROJECT_ROOT/build/logs" + mkdir -p "$PROJECT_ROOT/build" + docker cp -L $build_container:$container_output_path $host_output_path + echo "Build logs saved to $host_output_path" + ;; + --package) + echo "Copying memgraph package to host..." + local container_output_dir="$MGBUILD_ROOT_DIR/build/output" + local host_output_dir="$PROJECT_ROOT/build/output/$os" + local last_package_name=$(docker exec -u mg "$build_container" bash -c "cd $container_output_dir && ls -t memgraph* | head -1") + mkdir -p "$host_output_dir" + docker cp "$build_container:$container_output_dir/$last_package_name" "$host_output_dir/$last_package_name" + echo "Package saved to $host_output_dir/$last_package_name" + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +} + + +################################################## +##################### TESTS ###################### +################################################## +test_memgraph() { + local ACTIVATE_TOOLCHAIN="source /opt/toolchain-${toolchain_version}/activate" + local ACTIVATE_VENV="./setup.sh /opt/toolchain-${toolchain_version}/activate" + local EXPORT_LICENSE="export MEMGRAPH_ENTERPRISE_LICENSE=$enterprise_license" + local EXPORT_ORG_NAME="export MEMGRAPH_ORGANIZATION_NAME=$organization_name" + local BUILD_DIR="$MGBUILD_ROOT_DIR/build" + local build_container="mgbuild_${toolchain_version}_${os}" + echo "Running $1 test on $build_container..." + + case "$1" in + unit) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN "'&& ctest -R memgraph__unit --output-on-failure -j$threads' + ;; + unit-coverage) + local setup_lsan_ubsan="export LSAN_OPTIONS=suppressions=$BUILD_DIR/../tools/lsan.supp && export UBSAN_OPTIONS=halt_on_error=1" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN && $setup_lsan_ubsan "'&& ctest -R memgraph__unit --output-on-failure -j2' + ;; + leftover-CTest) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $BUILD_DIR && $ACTIVATE_TOOLCHAIN "'&& ctest -E "(memgraph__unit|memgraph__benchmark)" --output-on-failure' + ;; + drivers) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& ./tests/drivers/run.sh' + ;; + integration) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR "'&& tests/integration/run.sh' + ;; + cppcheck-and-clang-format) + local test_output_path="$MGBUILD_ROOT_DIR/tools/github/cppcheck_and_clang_format.txt" + local test_output_host_dest="$PROJECT_ROOT/tools/github/cppcheck_and_clang_format.txt" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tools/github && $ACTIVATE_TOOLCHAIN "'&& ./cppcheck_and_clang_format diff' + docker cp $build_container:$test_output_path $test_output_host_dest + ;; + stress-plain) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& ./continuous_integration' + ;; + stress-ssl) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& ./continuous_integration --use-ssl' + ;; + durability) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/stress && source ve3/bin/activate "'&& python3 durability --num-steps 5' + ;; + gql-behave) + local test_output_dir="$MGBUILD_ROOT_DIR/tests/gql_behave" + local test_output_host_dest="$PROJECT_ROOT/tests/gql_behave" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests && $ACTIVATE_VENV && cd $MGBUILD_ROOT_DIR/tests/gql_behave "'&& ./continuous_integration' + docker cp $build_container:$test_output_dir/gql_behave_status.csv $test_output_host_dest/gql_behave_status.csv + docker cp $build_container:$test_output_dir/gql_behave_status.html $test_output_host_dest/gql_behave_status.html + ;; + macro-benchmark) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && export USER=mg && export LANG=$(echo $LANG) && cd $MGBUILD_ROOT_DIR/tests/macro_benchmark "'&& ./harness QuerySuite MemgraphRunner --groups aggregation 1000_create unwind_create dense_expand match --no-strict' + ;; + mgbench) + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/mgbench "'&& ./benchmark.py vendor-native --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/*' + ;; + upload-to-bench-graph) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + local SETUP_VE3_ENV="virtualenv -p python3 ve3 && source ve3/bin/activate && pip install -r requirements.txt" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tools/bench-graph-client && $SETUP_VE3_ENV && $SETUP_PASSED_ARGS "'&& ./main.py $PASSED_ARGS' + ;; + code-analysis) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests/code_analysis && $SETUP_PASSED_ARGS "'&& ./python_code_analysis.sh $PASSED_ARGS' + ;; + code-coverage) + local test_output_path="$MGBUILD_ROOT_DIR/tools/github/generated/code_coverage.tar.gz" + local test_output_host_dest="$PROJECT_ROOT/tools/github/generated/code_coverage.tar.gz" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && $ACTIVATE_TOOLCHAIN && cd $MGBUILD_ROOT_DIR/tools/github "'&& ./coverage_convert' + docker exec -u mg $build_container bash -c "cd $MGBUILD_ROOT_DIR/tools/github/generated && tar -czf code_coverage.tar.gz coverage.json html report.json summary.rmu" + mkdir -p $PROJECT_ROOT/tools/github/generated + docker cp $build_container:$test_output_path $test_output_host_dest + ;; + clang-tidy) + shift 1 + local SETUP_PASSED_ARGS="export PASSED_ARGS=\"$@\"" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && export THREADS=$threads && $ACTIVATE_TOOLCHAIN && cd $MGBUILD_ROOT_DIR/tests/code_analysis && $SETUP_PASSED_ARGS "'&& ./clang_tidy.sh $PASSED_ARGS' + ;; + e2e) + # local kafka_container="kafka_kafka_1" + # local kafka_hostname="kafka" + # local pulsar_container="pulsar_pulsar_1" + # local pulsar_hostname="pulsar" + # local setup_hostnames="export KAFKA_HOSTNAME=$kafka_hostname && PULSAR_HOSTNAME=$pulsar_hostname" + # local build_container_network=$(docker inspect $build_container --format='{{ .HostConfig.NetworkMode }}') + # docker network connect --alias $kafka_hostname $build_container_network $kafka_container > /dev/null 2>&1 || echo "Kafka container already inside correct network or something went wrong ..." + # docker network connect --alias $pulsar_hostname $build_container_network $pulsar_container > /dev/null 2>&1 || echo "Kafka container already inside correct network or something went wrong ..." + docker exec -u mg $build_container bash -c "pip install --user networkx && pip3 install --user networkx" + docker exec -u mg $build_container bash -c "$EXPORT_LICENSE && $EXPORT_ORG_NAME && cd $MGBUILD_ROOT_DIR/tests && $ACTIVATE_VENV && source ve3/bin/activate_e2e && cd $MGBUILD_ROOT_DIR/tests/e2e "'&& ./run.sh' + ;; + *) + echo "Error: Unknown test '$1'" + exit 1 + ;; + esac +} + + +################################################## +################### PARSE ARGS ################### +################################################## +if [ "$#" -eq 0 ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then + print_help + exit 0 +fi +arch=$DEFAULT_ARCH +build_type=$DEFAULT_BUILD_TYPE +enterprise_license=$DEFAULT_ENTERPRISE_LICENSE +organization_name=$DEFAULT_ORGANIZATION_NAME +os=$DEFAULT_OS +threads=$DEFAULT_THREADS +toolchain_version=$DEFAULT_TOOLCHAIN +command="" +while [[ $# -gt 0 ]]; do + case "$1" in + --arch) + arch=$2 + check_support arch $arch + shift 2 + ;; + --build-type) + build_type=$2 + check_support build_type $build_type + shift 2 + ;; + --enterprise-license) + enterprise_license=$2 + shift 2 + ;; + --organization-name) + organization_name=$2 + shift 2 + ;; + --os) + os=$2 + check_support os $os + shift 2 + ;; + --threads) + threads=$2 + shift 2 + ;; + --toolchain) + toolchain_version=$2 + check_support toolchain $toolchain_version + shift 2 + ;; + *) + if [[ "$1" =~ ^--.* ]]; then + echo -e "Error: Unknown option '$1'" + exit 1 + else + command=$1 + shift 1 + break + fi + ;; + esac +done +check_support os_toolchain_combo $os $toolchain_version + +if [[ "$command" == "" ]]; then + echo -e "Error: Command not provided, please provide command" + print_help + exit 1 +fi + +if docker compose version > /dev/null 2>&1; then + docker_compose_cmd="docker compose" +elif which docker-compose > /dev/null 2>&1; then + docker_compose_cmd="docker-compose" +else + echo -e "Missing command: There has to be installed either 'docker-compose' or 'docker compose'" + exit 1 +fi +echo "Using $docker_compose_cmd" + +################################################## +################# PARSE COMMAND ################## +################################################## +case $command in + build) + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml build + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml build mgbuild_${toolchain_version}_${os} + fi + ;; + run) + cd $SCRIPT_DIR + pull=false + if [[ "$#" -gt 0 ]]; then + if [[ "$1" == "--pull" ]]; then + pull=true + else + echo "Error: Unknown flag '$1'" + exit 1 + fi + fi + if [[ "$os" == "all" ]]; then + if [[ "$pull" == "true" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures + elif [[ "$docker_compose_cmd" == "docker compose" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures --policy missing + fi + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml up -d + else + if [[ "$pull" == "true" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull mgbuild_${toolchain_version}_${os} + elif ! docker image inspect memgraph/mgbuild:${toolchain_version}_${os} > /dev/null 2>&1; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures mgbuild_${toolchain_version}_${os} + fi + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml up -d mgbuild_${toolchain_version}_${os} + fi + ;; + stop) + cd $SCRIPT_DIR + remove=false + if [[ "$#" -gt 0 ]]; then + if [[ "$1" == "--remove" ]]; then + remove=true + else + echo "Error: Unknown flag '$1'" + exit 1 + fi + fi + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml down + else + docker stop mgbuild_${toolchain_version}_${os} + if [[ "$remove" == "true" ]]; then + docker rm mgbuild_${toolchain_version}_${os} + fi + fi + ;; + pull) + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull --ignore-pull-failures + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml pull mgbuild_${toolchain_version}_${os} + fi + ;; + push) + docker login $@ + cd $SCRIPT_DIR + if [[ "$os" == "all" ]]; then + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml push --ignore-push-failures + else + $docker_compose_cmd -f ${arch}-builders-${toolchain_version}.yml push mgbuild_${toolchain_version}_${os} + fi + ;; + build-memgraph) + build_memgraph $@ + ;; + package-memgraph) + package_memgraph + ;; + test-memgraph) + test_memgraph $@ + ;; + copy) + copy_memgraph $@ + ;; + *) + echo "Error: Unknown command '$command'" + exit 1 + ;; +esac diff --git a/release/package/rocky-9.3/Dockerfile b/release/package/rocky-9.3/Dockerfile new file mode 100644 index 000000000..4ee3a0d78 --- /dev/null +++ b/release/package/rocky-9.3/Dockerfile @@ -0,0 +1,40 @@ +FROM rockylinux:9.3 + +ARG TOOLCHAIN_VERSION + +# Stops tzdata interactive configuration. +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-rocky-9.3-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/rocky-9.3.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/rocky-9.3.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash + +# Install PyYAML (only for amzn-2, centos-7, cento-9 and rocky-9.3) +RUN pip3 install --user PyYAML + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/run.sh b/release/package/run.sh deleted file mode 100755 index bbd5ff48a..000000000 --- a/release/package/run.sh +++ /dev/null @@ -1,208 +0,0 @@ -#!/bin/bash - -set -Eeuo pipefail - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -SUPPORTED_OS=( - centos-7 centos-9 - debian-10 debian-11 debian-11-arm - ubuntu-18.04 ubuntu-20.04 ubuntu-22.04 ubuntu-22.04-arm - fedora-36 - amzn-2 -) - -SUPPORTED_BUILD_TYPES=( - Debug - Release - RelWithDebInfo -) - -PROJECT_ROOT="$SCRIPT_DIR/../.." -TOOLCHAIN_VERSION="toolchain-v4" -ACTIVATE_TOOLCHAIN="source /opt/${TOOLCHAIN_VERSION}/activate" -HOST_OUTPUT_DIR="$PROJECT_ROOT/build/output" - -print_help () { - # TODO(gitbuda): Update the release/package/run.sh help - echo "$0 init|package|docker|test {os} {build_type} [--for-docker|--for-platform]" - echo "" - echo " OSs: ${SUPPORTED_OS[*]}" - echo " Build types: ${SUPPORTED_BUILD_TYPES[*]}" - exit 1 -} - -make_package () { - os="$1" - build_type="$2" - - build_container="mgbuild_$os" - echo "Building Memgraph for $os on $build_container..." - - package_command="" - if [[ "$os" =~ ^"centos".* ]] || [[ "$os" =~ ^"fedora".* ]] || [[ "$os" =~ ^"amzn".* ]]; then - docker exec "$build_container" bash -c "yum -y update" - package_command=" cpack -G RPM --config ../CPackConfig.cmake && rpmlint --file='../../release/rpm/rpmlintrc' memgraph*.rpm " - fi - if [[ "$os" =~ ^"debian".* ]]; then - docker exec "$build_container" bash -c "apt --allow-releaseinfo-change -y update" - package_command=" cpack -G DEB --config ../CPackConfig.cmake " - fi - if [[ "$os" =~ ^"ubuntu".* ]]; then - docker exec "$build_container" bash -c "apt update" - package_command=" cpack -G DEB --config ../CPackConfig.cmake " - fi - telemetry_id_override_flag="" - if [[ "$#" -gt 2 ]]; then - if [[ "$3" == "--for-docker" ]]; then - telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER " - elif [[ "$3" == "--for-platform" ]]; then - telemetry_id_override_flag=" -DMG_TELEMETRY_ID_OVERRIDE=DOCKER-PLATFORM" - else - print_help - exit - fi - fi - - echo "Copying project files..." - # If master is not the current branch, fetch it, because the get_version - # script depends on it. If we are on master, the fetch command is going to - # fail so that's why there is the explicit check. - # Required here because Docker build container can't access remote. - cd "$PROJECT_ROOT" - if [[ "$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then - git fetch origin master:master - fi - - # Ensure we have a clean build directory - docker exec "$build_container" rm -rf /memgraph - - docker exec "$build_container" mkdir -p /memgraph - # TODO(gitbuda): Revisit copying the whole repo -> makese sense under CI. - docker cp "$PROJECT_ROOT/." "$build_container:/memgraph/" - - container_build_dir="/memgraph/build" - container_output_dir="$container_build_dir/output" - - # TODO(gitbuda): TOOLCHAIN_RUN_DEPS should be installed during the Docker - # image build phase, but that is not easy at this point because the - # environment/os/{os}.sh does not come within the toolchain package. When - # migrating to the next version of toolchain do that, and remove the - # TOOLCHAIN_RUN_DEPS installation from here. - # TODO(gitbuda): On the other side, having this here allows updating deps - # wihout reruning the build containers. - echo "Installing dependencies using '/memgraph/environment/os/$os.sh' script..." - docker exec "$build_container" bash -c "/memgraph/environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" - docker exec "$build_container" bash -c "/memgraph/environment/os/$os.sh install MEMGRAPH_BUILD_DEPS" - - echo "Building targeted package..." - # Fix issue with git marking directory as not safe - docker exec "$build_container" bash -c "cd /memgraph && git config --global --add safe.directory '*'" - docker exec "$build_container" bash -c "cd /memgraph && $ACTIVATE_TOOLCHAIN && ./init" - docker exec "$build_container" bash -c "cd $container_build_dir && rm -rf ./*" - # TODO(gitbuda): cmake fails locally if remote is clone via ssh because of the key -> FIX - if [[ "$os" =~ "-arm" ]]; then - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && cmake -DCMAKE_BUILD_TYPE=$build_type -DMG_ARCH="ARM64" $telemetry_id_override_flag .." - else - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && cmake -DCMAKE_BUILD_TYPE=$build_type $telemetry_id_override_flag .." - fi - # ' is used instead of " because we need to run make within the allowed - # container resources. - # shellcheck disable=SC2016 - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc)' - docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc) -B mgconsole' - docker exec "$build_container" bash -c "mkdir -p $container_output_dir && cd $container_output_dir && $ACTIVATE_TOOLCHAIN && $package_command" - - echo "Copying targeted package to host..." - last_package_name=$(docker exec "$build_container" bash -c "cd $container_output_dir && ls -t memgraph* | head -1") - # The operating system folder is introduced because multiple different - # packages could be preserved during the same build "session". - mkdir -p "$HOST_OUTPUT_DIR/$os" - package_host_destination="$HOST_OUTPUT_DIR/$os/$last_package_name" - docker cp "$build_container:$container_output_dir/$last_package_name" "$package_host_destination" - echo "Package saved to $package_host_destination." -} - -case "$1" in - init) - cd "$SCRIPT_DIR" - if ! which "docker-compose" >/dev/null; then - docker_compose_cmd="docker compose" - else - docker_compose_cmd="docker-compose" - fi - $docker_compose_cmd build --build-arg TOOLCHAIN_VERSION="${TOOLCHAIN_VERSION}" - $docker_compose_cmd up -d - ;; - - docker) - # NOTE: Docker is build on top of Debian 11 package. - based_on_os="debian-11" - # shellcheck disable=SC2012 - last_package_name=$(cd "$HOST_OUTPUT_DIR/$based_on_os" && ls -t memgraph* | head -1) - docker_build_folder="$PROJECT_ROOT/release/docker" - cd "$docker_build_folder" - ./package_docker --latest "$HOST_OUTPUT_DIR/$based_on_os/$last_package_name" - # shellcheck disable=SC2012 - docker_image_name=$(cd "$docker_build_folder" && ls -t memgraph* | head -1) - docker_host_folder="$HOST_OUTPUT_DIR/docker" - docker_host_image_path="$docker_host_folder/$docker_image_name" - mkdir -p "$docker_host_folder" - cp "$docker_build_folder/$docker_image_name" "$docker_host_image_path" - echo "Docker images saved to $docker_host_image_path." - ;; - - package) - shift 1 - if [[ "$#" -lt 2 ]]; then - print_help - fi - os="$1" - build_type="$2" - shift 2 - is_os_ok=false - for supported_os in "${SUPPORTED_OS[@]}"; do - if [[ "$supported_os" == "${os}" ]]; then - is_os_ok=true - break - fi - done - is_build_type_ok=false - for supported_build_type in "${SUPPORTED_BUILD_TYPES[@]}"; do - if [[ "$supported_build_type" == "${build_type}" ]]; then - is_build_type_ok=true - break - fi - done - if [[ "$is_os_ok" == true && "$is_build_type_ok" == true ]]; then - make_package "$os" "$build_type" "$@" - else - if [[ "$is_os_ok" == false ]]; then - echo "Unsupported OS: $os" - elif [[ "$is_build_type_ok" == false ]]; then - echo "Unsupported build type: $build_type" - fi - print_help - fi - ;; - - build) - shift 1 - if [[ "$#" -ne 2 ]]; then - print_help - fi - # in the vX format, e.g. v5 - toolchain_version="$1" - # a name of the os folder, e.g. ubuntu-22.04-arm - os="$2" - cd "$SCRIPT_DIR/$os" - docker build -f Dockerfile --build-arg TOOLCHAIN_VERSION="toolchain-$toolchain_version" -t "memgraph/memgraph-builder:${toolchain_version}_$os" . - ;; - - test) - echo "TODO(gitbuda): Test all packages on mgtest containers." - ;; - - *) - print_help - ;; -esac diff --git a/release/package/ubuntu-18.04/Dockerfile b/release/package/ubuntu-18.04/Dockerfile index 97eceead4..b4b090984 100644 --- a/release/package/ubuntu-18.04/Dockerfile +++ b/release/package/ubuntu-18.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-18.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-18.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-18.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-20.04/Dockerfile b/release/package/ubuntu-20.04/Dockerfile index 9fea7fd79..30d6cda8e 100644 --- a/release/package/ubuntu-20.04/Dockerfile +++ b/release/package/ubuntu-20.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-20.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-20.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-20.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-22.04-arm/Dockerfile b/release/package/ubuntu-22.04-arm/Dockerfile index 56cac8f76..aa7b5b63f 100644 --- a/release/package/ubuntu-22.04-arm/Dockerfile +++ b/release/package/ubuntu-22.04-arm/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-arm64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-22.04-arm.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-22.04-arm.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-22.04/Dockerfile b/release/package/ubuntu-22.04/Dockerfile index 6bd22589b..652de0f5c 100644 --- a/release/package/ubuntu-22.04/Dockerfile +++ b/release/package/ubuntu-22.04/Dockerfile @@ -10,9 +10,30 @@ RUN apt update && apt install -y \ # Do NOT be smart here and clean the cache because the container is used in the # stateful context. -RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${TOOLCHAIN_VERSION}/${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ - -O ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ - && tar xzvf ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz -C /opt \ - && rm ${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-${TOOLCHAIN_VERSION}/toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ + -O toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz \ + && tar xzvf toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz -C /opt \ + && rm toolchain-${TOOLCHAIN_VERSION}-binaries-ubuntu-22.04-amd64.tar.gz + +# Install toolchain run deps and memgraph build deps +SHELL ["/bin/bash", "-c"] +RUN git clone https://github.com/memgraph/memgraph.git \ + && cd memgraph \ + && ./environment/os/ubuntu-22.04.sh install TOOLCHAIN_RUN_DEPS \ + && ./environment/os/ubuntu-22.04.sh install MEMGRAPH_BUILD_DEPS \ + && cd .. && rm -rf memgraph + +# Add mgdeps-cache and bench-graph-api hostnames +RUN echo -e "10.42.16.10 mgdeps-cache\n10.42.16.10 bench-graph-api" >> /etc/hosts + +# Create mg user and set as default +RUN useradd -m -s /bin/bash mg +USER mg + +# Install rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Fix node +RUN curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash ENTRYPOINT ["sleep", "infinity"] diff --git a/src/communication/result_stream_faker.hpp b/src/communication/result_stream_faker.hpp index 779d039cc..c0a40cecf 100644 --- a/src/communication/result_stream_faker.hpp +++ b/src/communication/result_stream_faker.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/coordination/CMakeLists.txt b/src/coordination/CMakeLists.txt index 936d7a5c2..ef9376a70 100644 --- a/src/coordination/CMakeLists.txt +++ b/src/coordination/CMakeLists.txt @@ -16,11 +16,14 @@ target_sources(mg-coordination include/coordination/raft_state.hpp include/coordination/rpc_errors.hpp + include/nuraft/raft_log_action.hpp + include/nuraft/coordinator_cluster_state.hpp include/nuraft/coordinator_log_store.hpp include/nuraft/coordinator_state_machine.hpp include/nuraft/coordinator_state_manager.hpp PRIVATE + coordinator_config.cpp coordinator_client.cpp coordinator_state.cpp coordinator_rpc.cpp @@ -33,6 +36,7 @@ target_sources(mg-coordination coordinator_log_store.cpp coordinator_state_machine.cpp coordinator_state_manager.cpp + coordinator_cluster_state.cpp ) target_include_directories(mg-coordination PUBLIC include) diff --git a/src/coordination/coordinator_client.cpp b/src/coordination/coordinator_client.cpp index 84044b04a..8530faff3 100644 --- a/src/coordination/coordinator_client.cpp +++ b/src/coordination/coordinator_client.cpp @@ -16,6 +16,7 @@ #include "coordination/coordinator_config.hpp" #include "coordination/coordinator_rpc.hpp" +#include "replication_coordination_glue/common.hpp" #include "replication_coordination_glue/messages.hpp" #include "utils/result.hpp" @@ -30,7 +31,7 @@ auto CreateClientContext(memgraph::coordination::CoordinatorClientConfig const & } // namespace CoordinatorClient::CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorClientConfig config, - HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) + HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb) : rpc_context_{CreateClientContext(config)}, rpc_client_{io::network::Endpoint(io::network::Endpoint::needs_resolving, config.ip_address, config.port), &rpc_context_}, @@ -40,7 +41,9 @@ CoordinatorClient::CoordinatorClient(CoordinatorInstance *coord_instance, Coordi fail_cb_{std::move(fail_cb)} {} auto CoordinatorClient::InstanceName() const -> std::string { return config_.instance_name; } -auto CoordinatorClient::SocketAddress() const -> std::string { return rpc_client_.Endpoint().SocketAddress(); } + +auto CoordinatorClient::CoordinatorSocketAddress() const -> std::string { return config_.CoordinatorSocketAddress(); } +auto CoordinatorClient::ReplicationSocketAddress() const -> std::string { return config_.ReplicationSocketAddress(); } auto CoordinatorClient::InstanceDownTimeoutSec() const -> std::chrono::seconds { return config_.instance_down_timeout_sec; @@ -63,11 +66,15 @@ void CoordinatorClient::StartFrequentCheck() { [this, instance_name = config_.instance_name] { try { spdlog::trace("Sending frequent heartbeat to machine {} on {}", instance_name, - rpc_client_.Endpoint().SocketAddress()); + config_.CoordinatorSocketAddress()); { // NOTE: This is intentionally scoped so that stream lock could get released. auto stream{rpc_client_.Stream<memgraph::replication_coordination_glue::FrequentHeartbeatRpc>()}; stream.AwaitResponse(); } + // Subtle race condition: + // acquiring of lock needs to happen before function call, as function callback can be changed + // for instance after lock is already acquired + // (failover case when instance is promoted to MAIN) succ_cb_(coord_instance_, instance_name); } catch (rpc::RpcFailedException const &) { fail_cb_(coord_instance_, instance_name); @@ -79,11 +86,6 @@ void CoordinatorClient::StopFrequentCheck() { instance_checker_.Stop(); } void CoordinatorClient::PauseFrequentCheck() { instance_checker_.Pause(); } void CoordinatorClient::ResumeFrequentCheck() { instance_checker_.Resume(); } -auto CoordinatorClient::SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) -> void { - succ_cb_ = std::move(succ_cb); - fail_cb_ = std::move(fail_cb); -} - auto CoordinatorClient::ReplicationClientInfo() const -> ReplClientInfo { return config_.replication_client_info; } auto CoordinatorClient::SendPromoteReplicaToMainRpc(const utils::UUID &uuid, @@ -117,7 +119,7 @@ auto CoordinatorClient::DemoteToReplica() const -> bool { return false; } -auto CoordinatorClient::SendSwapMainUUIDRpc(const utils::UUID &uuid) const -> bool { +auto CoordinatorClient::SendSwapMainUUIDRpc(utils::UUID const &uuid) const -> bool { try { auto stream{rpc_client_.Stream<replication_coordination_glue::SwapMainUUIDRpc>(uuid)}; if (!stream.AwaitResponse().success) { @@ -131,7 +133,7 @@ auto CoordinatorClient::SendSwapMainUUIDRpc(const utils::UUID &uuid) const -> bo return false; } -auto CoordinatorClient::SendUnregisterReplicaRpc(std::string const &instance_name) const -> bool { +auto CoordinatorClient::SendUnregisterReplicaRpc(std::string_view instance_name) const -> bool { try { auto stream{rpc_client_.Stream<UnregisterReplicaRpc>(instance_name)}; if (!stream.AwaitResponse().success) { @@ -171,5 +173,17 @@ auto CoordinatorClient::SendEnableWritingOnMainRpc() const -> bool { return false; } +auto CoordinatorClient::SendGetInstanceTimestampsRpc() const + -> utils::BasicResult<GetInstanceUUIDError, replication_coordination_glue::DatabaseHistories> { + try { + auto stream{rpc_client_.Stream<coordination::GetDatabaseHistoriesRpc>()}; + return stream.AwaitResponse().database_histories; + + } catch (const rpc::RpcFailedException &) { + spdlog::error("RPC error occured while sending GetInstance UUID RPC"); + return GetInstanceUUIDError::RPC_EXCEPTION; + } +} + } // namespace memgraph::coordination #endif diff --git a/src/coordination/coordinator_cluster_state.cpp b/src/coordination/coordinator_cluster_state.cpp new file mode 100644 index 000000000..cf6e1a574 --- /dev/null +++ b/src/coordination/coordinator_cluster_state.cpp @@ -0,0 +1,147 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#ifdef MG_ENTERPRISE + +#include "nuraft/coordinator_cluster_state.hpp" +#include "utils/logging.hpp" + +#include <shared_mutex> + +namespace memgraph::coordination { + +void to_json(nlohmann::json &j, InstanceState const &instance_state) { + j = nlohmann::json{{"config", instance_state.config}, {"status", instance_state.status}}; +} + +void from_json(nlohmann::json const &j, InstanceState &instance_state) { + j.at("config").get_to(instance_state.config); + j.at("status").get_to(instance_state.status); +} + +CoordinatorClusterState::CoordinatorClusterState(std::map<std::string, InstanceState, std::less<>> instances) + : instances_{std::move(instances)} {} + +CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState const &other) : instances_{other.instances_} {} + +CoordinatorClusterState &CoordinatorClusterState::operator=(CoordinatorClusterState const &other) { + if (this == &other) { + return *this; + } + instances_ = other.instances_; + return *this; +} + +CoordinatorClusterState::CoordinatorClusterState(CoordinatorClusterState &&other) noexcept + : instances_{std::move(other.instances_)} {} + +CoordinatorClusterState &CoordinatorClusterState::operator=(CoordinatorClusterState &&other) noexcept { + if (this == &other) { + return *this; + } + instances_ = std::move(other.instances_); + return *this; +} + +auto CoordinatorClusterState::MainExists() const -> bool { + auto lock = std::shared_lock{log_lock_}; + return std::ranges::any_of(instances_, + [](auto const &entry) { return entry.second.status == ReplicationRole::MAIN; }); +} + +auto CoordinatorClusterState::IsMain(std::string_view instance_name) const -> bool { + auto lock = std::shared_lock{log_lock_}; + auto const it = instances_.find(instance_name); + return it != instances_.end() && it->second.status == ReplicationRole::MAIN; +} + +auto CoordinatorClusterState::IsReplica(std::string_view instance_name) const -> bool { + auto lock = std::shared_lock{log_lock_}; + auto const it = instances_.find(instance_name); + return it != instances_.end() && it->second.status == ReplicationRole::REPLICA; +} + +auto CoordinatorClusterState::InsertInstance(std::string instance_name, InstanceState instance_state) -> void { + auto lock = std::lock_guard{log_lock_}; + instances_.insert_or_assign(std::move(instance_name), std::move(instance_state)); +} + +auto CoordinatorClusterState::DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void { + auto lock = std::lock_guard{log_lock_}; + switch (log_action) { + case RaftLogAction::REGISTER_REPLICATION_INSTANCE: { + auto const &config = std::get<CoordinatorClientConfig>(log_entry); + instances_[config.instance_name] = InstanceState{config, ReplicationRole::REPLICA}; + break; + } + case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: { + auto const instance_name = std::get<std::string>(log_entry); + instances_.erase(instance_name); + break; + } + case RaftLogAction::SET_INSTANCE_AS_MAIN: { + auto const instance_name = std::get<std::string>(log_entry); + auto it = instances_.find(instance_name); + MG_ASSERT(it != instances_.end(), "Instance does not exist as part of raft state!"); + it->second.status = ReplicationRole::MAIN; + break; + } + case RaftLogAction::SET_INSTANCE_AS_REPLICA: { + auto const instance_name = std::get<std::string>(log_entry); + auto it = instances_.find(instance_name); + MG_ASSERT(it != instances_.end(), "Instance does not exist as part of raft state!"); + it->second.status = ReplicationRole::REPLICA; + break; + } + case RaftLogAction::UPDATE_UUID: { + uuid_ = std::get<utils::UUID>(log_entry); + break; + } + } +} + +auto CoordinatorClusterState::Serialize(ptr<buffer> &data) -> void { + auto lock = std::shared_lock{log_lock_}; + + auto const log = nlohmann::json(instances_).dump(); + + data = buffer::alloc(sizeof(uint32_t) + log.size()); + buffer_serializer bs(data); + bs.put_str(log); +} + +auto CoordinatorClusterState::Deserialize(buffer &data) -> CoordinatorClusterState { + buffer_serializer bs(data); + auto const j = nlohmann::json::parse(bs.get_str()); + auto instances = j.get<std::map<std::string, InstanceState, std::less<>>>(); + + return CoordinatorClusterState{std::move(instances)}; +} + +auto CoordinatorClusterState::GetInstances() const -> std::vector<InstanceState> { + auto lock = std::shared_lock{log_lock_}; + return instances_ | ranges::views::values | ranges::to<std::vector<InstanceState>>; +} + +auto CoordinatorClusterState::GetUUID() const -> utils::UUID { return uuid_; } + +auto CoordinatorClusterState::FindCurrentMainInstanceName() const -> std::optional<std::string> { + auto lock = std::shared_lock{log_lock_}; + auto const it = + std::ranges::find_if(instances_, [](auto const &entry) { return entry.second.status == ReplicationRole::MAIN; }); + if (it == instances_.end()) { + return {}; + } + return it->first; +} + +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/coordinator_config.cpp b/src/coordination/coordinator_config.cpp new file mode 100644 index 000000000..a1147d3b6 --- /dev/null +++ b/src/coordination/coordinator_config.cpp @@ -0,0 +1,54 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#ifdef MG_ENTERPRISE + +#include "coordination/coordinator_config.hpp" + +namespace memgraph::coordination { + +void to_json(nlohmann::json &j, ReplClientInfo const &config) { + j = nlohmann::json{{"instance_name", config.instance_name}, + {"replication_mode", config.replication_mode}, + {"replication_ip_address", config.replication_ip_address}, + {"replication_port", config.replication_port}}; +} + +void from_json(nlohmann::json const &j, ReplClientInfo &config) { + config.instance_name = j.at("instance_name").get<std::string>(); + config.replication_mode = j.at("replication_mode").get<replication_coordination_glue::ReplicationMode>(); + config.replication_ip_address = j.at("replication_ip_address").get<std::string>(); + config.replication_port = j.at("replication_port").get<uint16_t>(); +} + +void to_json(nlohmann::json &j, CoordinatorClientConfig const &config) { + j = nlohmann::json{{"instance_name", config.instance_name}, + {"ip_address", config.ip_address}, + {"port", config.port}, + {"instance_health_check_frequency_sec", config.instance_health_check_frequency_sec.count()}, + {"instance_down_timeout_sec", config.instance_down_timeout_sec.count()}, + {"instance_get_uuid_frequency_sec", config.instance_get_uuid_frequency_sec.count()}, + {"replication_client_info", config.replication_client_info}}; +} + +void from_json(nlohmann::json const &j, CoordinatorClientConfig &config) { + config.instance_name = j.at("instance_name").get<std::string>(); + config.ip_address = j.at("ip_address").get<std::string>(); + config.port = j.at("port").get<uint16_t>(); + config.instance_health_check_frequency_sec = + std::chrono::seconds{j.at("instance_health_check_frequency_sec").get<int>()}; + config.instance_down_timeout_sec = std::chrono::seconds{j.at("instance_down_timeout_sec").get<int>()}; + config.instance_get_uuid_frequency_sec = std::chrono::seconds{j.at("instance_get_uuid_frequency_sec").get<int>()}; + config.replication_client_info = j.at("replication_client_info").get<ReplClientInfo>(); +} + +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/coordinator_handlers.cpp b/src/coordination/coordinator_handlers.cpp index f605069fe..637360267 100644 --- a/src/coordination/coordinator_handlers.cpp +++ b/src/coordination/coordinator_handlers.cpp @@ -57,6 +57,17 @@ void CoordinatorHandlers::Register(memgraph::coordination::CoordinatorServer &se spdlog::info("Received GetInstanceUUIDRpc on coordinator server"); CoordinatorHandlers::GetInstanceUUIDHandler(replication_handler, req_reader, res_builder); }); + + server.Register<coordination::GetDatabaseHistoriesRpc>( + [&replication_handler](slk::Reader *req_reader, slk::Builder *res_builder) -> void { + spdlog::info("Received GetDatabasesHistoryRpc on coordinator server"); + CoordinatorHandlers::GetDatabaseHistoriesHandler(replication_handler, req_reader, res_builder); + }); +} + +void CoordinatorHandlers::GetDatabaseHistoriesHandler(replication::ReplicationHandler &replication_handler, + slk::Reader * /*req_reader*/, slk::Builder *res_builder) { + slk::Save(coordination::GetDatabaseHistoriesRes{replication_handler.GetDatabasesHistories()}, res_builder); } void CoordinatorHandlers::SwapMainUUIDHandler(replication::ReplicationHandler &replication_handler, diff --git a/src/coordination/coordinator_instance.cpp b/src/coordination/coordinator_instance.cpp index 90674cf3c..791ffbc59 100644 --- a/src/coordination/coordinator_instance.cpp +++ b/src/coordination/coordinator_instance.cpp @@ -15,10 +15,12 @@ #include "coordination/coordinator_exceptions.hpp" #include "coordination/fmt.hpp" +#include "dbms/constants.hpp" #include "nuraft/coordinator_state_machine.hpp" #include "nuraft/coordinator_state_manager.hpp" #include "utils/counter.hpp" #include "utils/functional.hpp" +#include "utils/resource_lock.hpp" #include <range/v3/view.hpp> #include <shared_mutex> @@ -30,144 +32,156 @@ using nuraft::srv_config; CoordinatorInstance::CoordinatorInstance() : raft_state_(RaftState::MakeRaftState( - [this] { std::ranges::for_each(repl_instances_, &ReplicationInstance::StartFrequentCheck); }, - [this] { std::ranges::for_each(repl_instances_, &ReplicationInstance::StopFrequentCheck); })) { - auto find_repl_instance = [](CoordinatorInstance *self, - std::string_view repl_instance_name) -> ReplicationInstance & { - auto repl_instance = - std::ranges::find_if(self->repl_instances_, [repl_instance_name](ReplicationInstance const &instance) { - return instance.InstanceName() == repl_instance_name; - }); + [this]() { + spdlog::info("Leader changed, starting all replication instances!"); + auto const instances = raft_state_.GetInstances(); + auto replicas = instances | ranges::views::filter([](auto const &instance) { + return instance.status == ReplicationRole::REPLICA; + }); - MG_ASSERT(repl_instance != self->repl_instances_.end(), "Instance {} not found during callback!", - repl_instance_name); - return *repl_instance; + std::ranges::for_each(replicas, [this](auto &replica) { + spdlog::info("Started pinging replication instance {}", replica.config.instance_name); + repl_instances_.emplace_back(this, replica.config, client_succ_cb_, client_fail_cb_, + &CoordinatorInstance::ReplicaSuccessCallback, + &CoordinatorInstance::ReplicaFailCallback); + }); + + auto main = instances | ranges::views::filter( + [](auto const &instance) { return instance.status == ReplicationRole::MAIN; }); + + std::ranges::for_each(main, [this](auto &main_instance) { + spdlog::info("Started pinging main instance {}", main_instance.config.instance_name); + repl_instances_.emplace_back(this, main_instance.config, client_succ_cb_, client_fail_cb_, + &CoordinatorInstance::MainSuccessCallback, + &CoordinatorInstance::MainFailCallback); + }); + + std::ranges::for_each(repl_instances_, [this](auto &instance) { + instance.SetNewMainUUID(raft_state_.GetUUID()); + instance.StartFrequentCheck(); + }); + }, + [this]() { + spdlog::info("Leader changed, stopping all replication instances!"); + repl_instances_.clear(); + })) { + client_succ_cb_ = [](CoordinatorInstance *self, std::string_view repl_instance_name) -> void { + auto lock = std::lock_guard{self->coord_instance_lock_}; + auto &repl_instance = self->FindReplicationInstance(repl_instance_name); + std::invoke(repl_instance.GetSuccessCallback(), self, repl_instance_name); }; - replica_succ_cb_ = [find_repl_instance](CoordinatorInstance *self, std::string_view repl_instance_name) -> void { + client_fail_cb_ = [](CoordinatorInstance *self, std::string_view repl_instance_name) -> void { auto lock = std::lock_guard{self->coord_instance_lock_}; - spdlog::trace("Instance {} performing replica successful callback", repl_instance_name); - auto &repl_instance = find_repl_instance(self, repl_instance_name); - - // We need to get replicas UUID from time to time to ensure replica is listening to correct main - // and that it didn't go down for less time than we could notice - // We need to get id of main replica is listening to - // and swap if necessary - if (!repl_instance.EnsureReplicaHasCorrectMainUUID(self->GetMainUUID())) { - spdlog::error("Failed to swap uuid for replica instance {} which is alive", repl_instance.InstanceName()); - return; - } - - repl_instance.OnSuccessPing(); - }; - - replica_fail_cb_ = [find_repl_instance](CoordinatorInstance *self, std::string_view repl_instance_name) -> void { - auto lock = std::lock_guard{self->coord_instance_lock_}; - spdlog::trace("Instance {} performing replica failure callback", repl_instance_name); - auto &repl_instance = find_repl_instance(self, repl_instance_name); - repl_instance.OnFailPing(); - }; - - main_succ_cb_ = [find_repl_instance](CoordinatorInstance *self, std::string_view repl_instance_name) -> void { - auto lock = std::lock_guard{self->coord_instance_lock_}; - spdlog::trace("Instance {} performing main successful callback", repl_instance_name); - - auto &repl_instance = find_repl_instance(self, repl_instance_name); - - if (repl_instance.IsAlive()) { - repl_instance.OnSuccessPing(); - return; - } - - const auto &repl_instance_uuid = repl_instance.GetMainUUID(); - MG_ASSERT(repl_instance_uuid.has_value(), "Instance must have uuid set."); - - auto const curr_main_uuid = self->GetMainUUID(); - if (curr_main_uuid == repl_instance_uuid.value()) { - if (!repl_instance.EnableWritingOnMain()) { - spdlog::error("Failed to enable writing on main instance {}", repl_instance_name); - return; - } - - repl_instance.OnSuccessPing(); - return; - } - - // TODO(antoniof) make demoteToReplica idempotent since main can be demoted to replica but - // swapUUID can fail - if (repl_instance.DemoteToReplica(self->replica_succ_cb_, self->replica_fail_cb_)) { - repl_instance.OnSuccessPing(); - spdlog::info("Instance {} demoted to replica", repl_instance_name); - } else { - spdlog::error("Instance {} failed to become replica", repl_instance_name); - return; - } - - if (!repl_instance.SendSwapAndUpdateUUID(curr_main_uuid)) { - spdlog::error(fmt::format("Failed to swap uuid for demoted main instance {}", repl_instance.InstanceName())); - return; - } - }; - - main_fail_cb_ = [find_repl_instance](CoordinatorInstance *self, std::string_view repl_instance_name) -> void { - auto lock = std::lock_guard{self->coord_instance_lock_}; - spdlog::trace("Instance {} performing main failure callback", repl_instance_name); - auto &repl_instance = find_repl_instance(self, repl_instance_name); - repl_instance.OnFailPing(); - const auto &repl_instance_uuid = repl_instance.GetMainUUID(); - MG_ASSERT(repl_instance_uuid.has_value(), "Instance must have uuid set"); - - if (!repl_instance.IsAlive() && self->GetMainUUID() == repl_instance_uuid.value()) { - spdlog::info("Cluster without main instance, trying automatic failover"); - self->TryFailover(); // TODO: (andi) Initiate failover - } + auto &repl_instance = self->FindReplicationInstance(repl_instance_name); + std::invoke(repl_instance.GetFailCallback(), self, repl_instance_name); }; } +auto CoordinatorInstance::FindReplicationInstance(std::string_view replication_instance_name) -> ReplicationInstance & { + auto repl_instance = + std::ranges::find_if(repl_instances_, [replication_instance_name](ReplicationInstance const &instance) { + return instance.InstanceName() == replication_instance_name; + }); + + MG_ASSERT(repl_instance != repl_instances_.end(), "Instance {} not found during callback!", + replication_instance_name); + return *repl_instance; +} + auto CoordinatorInstance::ShowInstances() const -> std::vector<InstanceStatus> { - auto const coord_instances = raft_state_.GetAllCoordinators(); - - auto const stringify_repl_role = [](ReplicationInstance const &instance) -> std::string { - if (!instance.IsAlive()) return "unknown"; - if (instance.IsMain()) return "main"; - return "replica"; - }; - - auto const repl_instance_to_status = [&stringify_repl_role](ReplicationInstance const &instance) -> InstanceStatus { - return {.instance_name = instance.InstanceName(), - .coord_socket_address = instance.SocketAddress(), - .cluster_role = stringify_repl_role(instance), - .is_alive = instance.IsAlive()}; - }; - auto const coord_instance_to_status = [](ptr<srv_config> const &instance) -> InstanceStatus { return {.instance_name = "coordinator_" + std::to_string(instance->get_id()), .raft_socket_address = instance->get_endpoint(), .cluster_role = "coordinator", - .is_alive = true}; // TODO: (andi) Get this info from RAFT and test it or when we will move - // CoordinatorState to every instance, we can be smarter about this using our RPC. + .health = "unknown"}; // TODO: (andi) Get this info from RAFT and test it or when we will move }; + auto instances_status = utils::fmap(raft_state_.GetAllCoordinators(), coord_instance_to_status); - auto instances_status = utils::fmap(coord_instance_to_status, coord_instances); - { - auto lock = std::shared_lock{coord_instance_lock_}; - std::ranges::transform(repl_instances_, std::back_inserter(instances_status), repl_instance_to_status); + if (raft_state_.IsLeader()) { + auto const stringify_repl_role = [this](ReplicationInstance const &instance) -> std::string { + if (!instance.IsAlive()) return "unknown"; + if (raft_state_.IsMain(instance.InstanceName())) return "main"; + return "replica"; + }; + + auto const stringify_repl_health = [](ReplicationInstance const &instance) -> std::string { + return instance.IsAlive() ? "up" : "down"; + }; + + auto process_repl_instance_as_leader = + [&stringify_repl_role, &stringify_repl_health](ReplicationInstance const &instance) -> InstanceStatus { + return {.instance_name = instance.InstanceName(), + .coord_socket_address = instance.CoordinatorSocketAddress(), + .cluster_role = stringify_repl_role(instance), + .health = stringify_repl_health(instance)}; + }; + + { + auto lock = std::shared_lock{coord_instance_lock_}; + std::ranges::transform(repl_instances_, std::back_inserter(instances_status), process_repl_instance_as_leader); + } + } else { + auto const stringify_inst_status = [](ReplicationRole status) -> std::string { + return status == ReplicationRole::MAIN ? "main" : "replica"; + }; + + // TODO: (andi) Add capability that followers can also return socket addresses + auto process_repl_instance_as_follower = [&stringify_inst_status](auto const &instance) -> InstanceStatus { + return {.instance_name = instance.config.instance_name, + .cluster_role = stringify_inst_status(instance.status), + .health = "unknown"}; + }; + + std::ranges::transform(raft_state_.GetInstances(), std::back_inserter(instances_status), + process_repl_instance_as_follower); } return instances_status; } auto CoordinatorInstance::TryFailover() -> void { - auto alive_replicas = repl_instances_ | ranges::views::filter(&ReplicationInstance::IsReplica) | - ranges::views::filter(&ReplicationInstance::IsAlive); + auto const is_replica = [this](ReplicationInstance const &instance) { return IsReplica(instance.InstanceName()); }; + + auto alive_replicas = + repl_instances_ | ranges::views::filter(is_replica) | ranges::views::filter(&ReplicationInstance::IsAlive); if (ranges::empty(alive_replicas)) { spdlog::warn("Failover failed since all replicas are down!"); return; } - // TODO: Smarter choice - auto new_main = ranges::begin(alive_replicas); + if (!raft_state_.RequestLeadership()) { + spdlog::error("Failover failed since the instance is not the leader!"); + return; + } + + auto const get_ts = [](ReplicationInstance &replica) { return replica.GetClient().SendGetInstanceTimestampsRpc(); }; + + auto maybe_instance_db_histories = alive_replicas | ranges::views::transform(get_ts) | ranges::to<std::vector>(); + + auto const ts_has_error = [](auto const &res) -> bool { return res.HasError(); }; + + if (std::ranges::any_of(maybe_instance_db_histories, ts_has_error)) { + spdlog::error("Aborting failover as at least one instance didn't provide per database history."); + return; + } + + auto transform_to_pairs = ranges::views::transform([](auto const &zipped) { + auto &[replica, res] = zipped; + return std::make_pair(replica.InstanceName(), res.GetValue()); + }); + + auto instance_db_histories = + ranges::views::zip(alive_replicas, maybe_instance_db_histories) | transform_to_pairs | ranges::to<std::vector>(); + + auto [most_up_to_date_instance, latest_epoch, latest_commit_timestamp] = + ChooseMostUpToDateInstance(instance_db_histories); + + spdlog::trace("The most up to date instance is {} with epoch {} and {} latest commit timestamp", + most_up_to_date_instance, latest_epoch, latest_commit_timestamp); // NOLINT + + auto *new_main = &FindReplicationInstance(most_up_to_date_instance); new_main->PauseFrequentCheck(); utils::OnScopeExit scope_exit{[&new_main] { new_main->ResumeFrequentCheck(); }}; @@ -177,41 +191,56 @@ auto CoordinatorInstance::TryFailover() -> void { }; auto const new_main_uuid = utils::UUID{}; + + auto const failed_to_swap = [&new_main_uuid](ReplicationInstance &instance) { + return !instance.SendSwapAndUpdateUUID(new_main_uuid); + }; + // If for some replicas swap fails, for others on successful ping we will revert back on next change // or we will do failover first again and then it will be consistent again - for (auto &other_replica_instance : alive_replicas | ranges::views::filter(is_not_new_main)) { - if (!other_replica_instance.SendSwapAndUpdateUUID(new_main_uuid)) { - spdlog::error(fmt::format("Failed to swap uuid for instance {} which is alive, aborting failover", - other_replica_instance.InstanceName())); - return; - } + if (std::ranges::any_of(alive_replicas | ranges::views::filter(is_not_new_main), failed_to_swap)) { + spdlog::error("Failed to swap uuid for all instances"); + return; } - auto repl_clients_info = repl_instances_ | ranges::views::filter(is_not_new_main) | ranges::views::transform(&ReplicationInstance::ReplicationClientInfo) | ranges::to<ReplicationClientsInfo>(); - if (!new_main->PromoteToMain(new_main_uuid, std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) { + if (!new_main->PromoteToMain(new_main_uuid, std::move(repl_clients_info), &CoordinatorInstance::MainSuccessCallback, + &CoordinatorInstance::MainFailCallback)) { spdlog::warn("Failover failed since promoting replica to main failed!"); return; } - // TODO: (andi) This should be replicated across all coordinator instances with Raft log - SetMainUUID(new_main_uuid); + + if (!raft_state_.AppendUpdateUUIDLog(new_main_uuid)) { + return; + } + + auto const new_main_instance_name = new_main->InstanceName(); + + if (!raft_state_.AppendSetInstanceAsMainLog(new_main_instance_name)) { + return; + } + spdlog::info("Failover successful! Instance {} promoted to main.", new_main->InstanceName()); } -// TODO: (andi) Make sure you cannot put coordinator instance to the main -auto CoordinatorInstance::SetReplicationInstanceToMain(std::string instance_name) +auto CoordinatorInstance::SetReplicationInstanceToMain(std::string_view instance_name) -> SetInstanceToMainCoordinatorStatus { auto lock = std::lock_guard{coord_instance_lock_}; - if (std::ranges::any_of(repl_instances_, &ReplicationInstance::IsMain)) { + if (raft_state_.MainExists()) { return SetInstanceToMainCoordinatorStatus::MAIN_ALREADY_EXISTS; } + if (!raft_state_.RequestLeadership()) { + return SetInstanceToMainCoordinatorStatus::NOT_LEADER; + } + auto const is_new_main = [&instance_name](ReplicationInstance const &instance) { return instance.InstanceName() == instance_name; }; + auto new_main = std::ranges::find_if(repl_instances_, is_new_main); if (new_main == repl_instances_.end()) { @@ -229,85 +258,93 @@ auto CoordinatorInstance::SetReplicationInstanceToMain(std::string instance_name auto const new_main_uuid = utils::UUID{}; - for (auto &other_instance : repl_instances_ | ranges::views::filter(is_not_new_main)) { - if (!other_instance.SendSwapAndUpdateUUID(new_main_uuid)) { - spdlog::error( - fmt::format("Failed to swap uuid for instance {}, aborting failover", other_instance.InstanceName())); - return SetInstanceToMainCoordinatorStatus::SWAP_UUID_FAILED; - } + auto const failed_to_swap = [&new_main_uuid](ReplicationInstance &instance) { + return !instance.SendSwapAndUpdateUUID(new_main_uuid); + }; + + if (std::ranges::any_of(repl_instances_ | ranges::views::filter(is_not_new_main), failed_to_swap)) { + spdlog::error("Failed to swap uuid for all instances"); + return SetInstanceToMainCoordinatorStatus::SWAP_UUID_FAILED; } - ReplicationClientsInfo repl_clients_info; - repl_clients_info.reserve(repl_instances_.size() - 1); - std::ranges::transform(repl_instances_ | ranges::views::filter(is_not_new_main), - std::back_inserter(repl_clients_info), &ReplicationInstance::ReplicationClientInfo); + auto repl_clients_info = repl_instances_ | ranges::views::filter(is_not_new_main) | + ranges::views::transform(&ReplicationInstance::ReplicationClientInfo) | + ranges::to<ReplicationClientsInfo>(); - if (!new_main->PromoteToMain(new_main_uuid, std::move(repl_clients_info), main_succ_cb_, main_fail_cb_)) { + if (!new_main->PromoteToMain(new_main_uuid, std::move(repl_clients_info), &CoordinatorInstance::MainSuccessCallback, + &CoordinatorInstance::MainFailCallback)) { return SetInstanceToMainCoordinatorStatus::COULD_NOT_PROMOTE_TO_MAIN; } - // TODO: (andi) This should be replicated across all coordinator instances with Raft log - SetMainUUID(new_main_uuid); - spdlog::info("Instance {} promoted to main", instance_name); + if (!raft_state_.AppendUpdateUUIDLog(new_main_uuid)) { + return SetInstanceToMainCoordinatorStatus::RAFT_LOG_ERROR; + } + + if (!raft_state_.AppendSetInstanceAsMainLog(instance_name)) { + return SetInstanceToMainCoordinatorStatus::RAFT_LOG_ERROR; + } + + spdlog::info("Instance {} promoted to main on leader", instance_name); return SetInstanceToMainCoordinatorStatus::SUCCESS; } -auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig config) +auto CoordinatorInstance::RegisterReplicationInstance(CoordinatorClientConfig const &config) -> RegisterInstanceCoordinatorStatus { auto lock = std::lock_guard{coord_instance_lock_}; - auto instance_name = config.instance_name; - - auto const name_matches = [&instance_name](ReplicationInstance const &instance) { - return instance.InstanceName() == instance_name; - }; - - if (std::ranges::any_of(repl_instances_, name_matches)) { + if (std::ranges::any_of(repl_instances_, [instance_name = config.instance_name](ReplicationInstance const &instance) { + return instance.InstanceName() == instance_name; + })) { return RegisterInstanceCoordinatorStatus::NAME_EXISTS; } - auto const socket_address_matches = [&config](ReplicationInstance const &instance) { - return instance.SocketAddress() == config.SocketAddress(); - }; + if (std::ranges::any_of(repl_instances_, [&config](ReplicationInstance const &instance) { + return instance.CoordinatorSocketAddress() == config.CoordinatorSocketAddress(); + })) { + return RegisterInstanceCoordinatorStatus::COORD_ENDPOINT_EXISTS; + } - if (std::ranges::any_of(repl_instances_, socket_address_matches)) { - return RegisterInstanceCoordinatorStatus::ENDPOINT_EXISTS; + if (std::ranges::any_of(repl_instances_, [&config](ReplicationInstance const &instance) { + return instance.ReplicationSocketAddress() == config.ReplicationSocketAddress(); + })) { + return RegisterInstanceCoordinatorStatus::REPL_ENDPOINT_EXISTS; } if (!raft_state_.RequestLeadership()) { return RegisterInstanceCoordinatorStatus::NOT_LEADER; } - auto const res = raft_state_.AppendRegisterReplicationInstance(instance_name); - if (!res->get_accepted()) { - spdlog::error( - "Failed to accept request for registering instance {}. Most likely the reason is that the instance is not " - "the " - "leader.", - config.instance_name); - return RegisterInstanceCoordinatorStatus::RAFT_COULD_NOT_ACCEPT; - } + auto const undo_action_ = [this]() { repl_instances_.pop_back(); }; - spdlog::info("Request for registering instance {} accepted", instance_name); - try { - repl_instances_.emplace_back(this, std::move(config), replica_succ_cb_, replica_fail_cb_); - } catch (CoordinatorRegisterInstanceException const &) { + auto *new_instance = &repl_instances_.emplace_back(this, config, client_succ_cb_, client_fail_cb_, + &CoordinatorInstance::ReplicaSuccessCallback, + &CoordinatorInstance::ReplicaFailCallback); + + if (!new_instance->SendDemoteToReplicaRpc()) { + spdlog::error("Failed to send demote to replica rpc for instance {}", config.instance_name); + undo_action_(); return RegisterInstanceCoordinatorStatus::RPC_FAILED; } - if (res->get_result_code() != nuraft::cmd_result_code::OK) { - spdlog::error("Failed to register instance {} with error code {}", instance_name, res->get_result_code()); - return RegisterInstanceCoordinatorStatus::RAFT_COULD_NOT_APPEND; + if (!raft_state_.AppendRegisterReplicationInstanceLog(config)) { + undo_action_(); + return RegisterInstanceCoordinatorStatus::RAFT_LOG_ERROR; } - spdlog::info("Instance {} registered", instance_name); + new_instance->StartFrequentCheck(); + + spdlog::info("Instance {} registered", config.instance_name); return RegisterInstanceCoordinatorStatus::SUCCESS; } -auto CoordinatorInstance::UnregisterReplicationInstance(std::string instance_name) +auto CoordinatorInstance::UnregisterReplicationInstance(std::string_view instance_name) -> UnregisterInstanceCoordinatorStatus { auto lock = std::lock_guard{coord_instance_lock_}; + if (!raft_state_.RequestLeadership()) { + return UnregisterInstanceCoordinatorStatus::NOT_LEADER; + } + auto const name_matches = [&instance_name](ReplicationInstance const &instance) { return instance.InstanceName() == instance_name; }; @@ -317,31 +354,208 @@ auto CoordinatorInstance::UnregisterReplicationInstance(std::string instance_nam return UnregisterInstanceCoordinatorStatus::NO_INSTANCE_WITH_NAME; } - if (inst_to_remove->IsMain() && inst_to_remove->IsAlive()) { + auto const is_main = [this](ReplicationInstance const &instance) { + return IsMain(instance.InstanceName()) && instance.GetMainUUID() == raft_state_.GetUUID() && instance.IsAlive(); + }; + + if (is_main(*inst_to_remove)) { return UnregisterInstanceCoordinatorStatus::IS_MAIN; } inst_to_remove->StopFrequentCheck(); - auto curr_main = std::ranges::find_if(repl_instances_, &ReplicationInstance::IsMain); - MG_ASSERT(curr_main != repl_instances_.end(), "There must be a main instance when unregistering a replica"); - if (!curr_main->SendUnregisterReplicaRpc(instance_name)) { - inst_to_remove->StartFrequentCheck(); - return UnregisterInstanceCoordinatorStatus::RPC_FAILED; + + auto curr_main = std::ranges::find_if(repl_instances_, is_main); + + if (curr_main != repl_instances_.end() && curr_main->IsAlive()) { + if (!curr_main->SendUnregisterReplicaRpc(instance_name)) { + inst_to_remove->StartFrequentCheck(); + return UnregisterInstanceCoordinatorStatus::RPC_FAILED; + } } + std::erase_if(repl_instances_, name_matches); + if (!raft_state_.AppendUnregisterReplicationInstanceLog(instance_name)) { + return UnregisterInstanceCoordinatorStatus::RAFT_LOG_ERROR; + } + return UnregisterInstanceCoordinatorStatus::SUCCESS; } -auto CoordinatorInstance::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) - -> void { - raft_state_.AddCoordinatorInstance(raft_server_id, raft_port, std::move(raft_address)); +auto CoordinatorInstance::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, + std::string_view raft_address) -> void { + raft_state_.AddCoordinatorInstance(raft_server_id, raft_port, raft_address); } -auto CoordinatorInstance::GetMainUUID() const -> utils::UUID { return main_uuid_; } +void CoordinatorInstance::MainFailCallback(std::string_view repl_instance_name) { + spdlog::trace("Instance {} performing main fail callback", repl_instance_name); + auto &repl_instance = FindReplicationInstance(repl_instance_name); + repl_instance.OnFailPing(); + const auto &repl_instance_uuid = repl_instance.GetMainUUID(); + MG_ASSERT(repl_instance_uuid.has_value(), "Replication instance must have uuid set"); -// TODO: (andi) Add to the RAFT log. -auto CoordinatorInstance::SetMainUUID(utils::UUID new_uuid) -> void { main_uuid_ = new_uuid; } + // NOLINTNEXTLINE + if (!repl_instance.IsAlive() && raft_state_.GetUUID() == repl_instance_uuid.value()) { + spdlog::info("Cluster without main instance, trying automatic failover"); + TryFailover(); + } +} + +void CoordinatorInstance::MainSuccessCallback(std::string_view repl_instance_name) { + spdlog::trace("Instance {} performing main successful callback", repl_instance_name); + auto &repl_instance = FindReplicationInstance(repl_instance_name); + + if (repl_instance.IsAlive()) { + repl_instance.OnSuccessPing(); + return; + } + + const auto &repl_instance_uuid = repl_instance.GetMainUUID(); + MG_ASSERT(repl_instance_uuid.has_value(), "Instance must have uuid set."); + + // NOLINTNEXTLINE + if (raft_state_.GetUUID() == repl_instance_uuid.value()) { + if (!repl_instance.EnableWritingOnMain()) { + spdlog::error("Failed to enable writing on main instance {}", repl_instance_name); + return; + } + + repl_instance.OnSuccessPing(); + return; + } + + if (!raft_state_.RequestLeadership()) { + spdlog::error("Demoting main instance {} to replica failed since the instance is not the leader!", + repl_instance_name); + return; + } + + if (repl_instance.DemoteToReplica(&CoordinatorInstance::ReplicaSuccessCallback, + &CoordinatorInstance::ReplicaFailCallback)) { + repl_instance.OnSuccessPing(); + spdlog::info("Instance {} demoted to replica", repl_instance_name); + } else { + spdlog::error("Instance {} failed to become replica", repl_instance_name); + return; + } + + if (!repl_instance.SendSwapAndUpdateUUID(raft_state_.GetUUID())) { + spdlog::error("Failed to swap uuid for demoted main instance {}", repl_instance_name); + return; + } + + if (!raft_state_.AppendSetInstanceAsReplicaLog(repl_instance_name)) { + return; + } +} + +void CoordinatorInstance::ReplicaSuccessCallback(std::string_view repl_instance_name) { + spdlog::trace("Instance {} performing replica successful callback", repl_instance_name); + auto &repl_instance = FindReplicationInstance(repl_instance_name); + + if (!IsReplica(repl_instance_name)) { + spdlog::error("Aborting replica callback since instance {} is not replica anymore", repl_instance_name); + return; + } + // We need to get replicas UUID from time to time to ensure replica is listening to correct main + // and that it didn't go down for less time than we could notice + // We need to get id of main replica is listening to + // and swap if necessary + if (!repl_instance.EnsureReplicaHasCorrectMainUUID(raft_state_.GetUUID())) { + spdlog::error("Failed to swap uuid for replica instance {} which is alive", repl_instance.InstanceName()); + return; + } + + repl_instance.OnSuccessPing(); +} + +void CoordinatorInstance::ReplicaFailCallback(std::string_view repl_instance_name) { + spdlog::trace("Instance {} performing replica failure callback", repl_instance_name); + auto &repl_instance = FindReplicationInstance(repl_instance_name); + + if (!IsReplica(repl_instance_name)) { + spdlog::error("Aborting replica fail callback since instance {} is not replica anymore", repl_instance_name); + return; + } + + repl_instance.OnFailPing(); +} + +auto CoordinatorInstance::ChooseMostUpToDateInstance(std::span<InstanceNameDbHistories> instance_database_histories) + -> NewMainRes { + std::optional<NewMainRes> new_main_res; + std::for_each( + instance_database_histories.begin(), instance_database_histories.end(), + [&new_main_res](const InstanceNameDbHistories &instance_res_pair) { + const auto &[instance_name, instance_db_histories] = instance_res_pair; + + // Find default db for instance and its history + auto default_db_history_data = std::ranges::find_if( + instance_db_histories, [default_db = memgraph::dbms::kDefaultDB]( + const replication_coordination_glue::DatabaseHistory &db_timestamps) { + return db_timestamps.name == default_db; + }); + + std::ranges::for_each( + instance_db_histories, + [&instance_name = instance_name](const replication_coordination_glue::DatabaseHistory &db_history) { + spdlog::debug("Instance {}: name {}, default db {}", instance_name, db_history.name, + memgraph::dbms::kDefaultDB); + }); + + MG_ASSERT(default_db_history_data != instance_db_histories.end(), "No history for instance"); + + const auto &instance_default_db_history = default_db_history_data->history; + + std::ranges::for_each(instance_default_db_history | ranges::views::reverse, + [&instance_name = instance_name](const auto &epoch_history_it) { + spdlog::debug("Instance {}: epoch {}, last_commit_timestamp: {}", instance_name, + std::get<0>(epoch_history_it), std::get<1>(epoch_history_it)); + }); + + // get latest epoch + // get latest timestamp + + if (!new_main_res) { + const auto &[epoch, timestamp] = *instance_default_db_history.crbegin(); + new_main_res = std::make_optional<NewMainRes>({instance_name, epoch, timestamp}); + spdlog::debug("Currently the most up to date instance is {} with epoch {} and {} latest commit timestamp", + instance_name, epoch, timestamp); + return; + } + + bool found_same_point{false}; + std::string last_most_up_to_date_epoch{new_main_res->latest_epoch}; + for (auto [epoch, timestamp] : ranges::reverse_view(instance_default_db_history)) { + if (new_main_res->latest_commit_timestamp < timestamp) { + new_main_res = std::make_optional<NewMainRes>({instance_name, epoch, timestamp}); + spdlog::trace("Found the new most up to date instance {} with epoch {} and {} latest commit timestamp", + instance_name, epoch, timestamp); + } + + // we found point at which they were same + if (epoch == last_most_up_to_date_epoch) { + found_same_point = true; + break; + } + } + + if (!found_same_point) { + spdlog::error("Didn't find same history epoch {} for instance {} and instance {}", last_most_up_to_date_epoch, + new_main_res->most_up_to_date_instance, instance_name); + } + }); + + return std::move(*new_main_res); +} + +auto CoordinatorInstance::IsMain(std::string_view instance_name) const -> bool { + return raft_state_.IsMain(instance_name); +} + +auto CoordinatorInstance::IsReplica(std::string_view instance_name) const -> bool { + return raft_state_.IsReplica(instance_name); +} } // namespace memgraph::coordination #endif diff --git a/src/coordination/coordinator_log_store.cpp b/src/coordination/coordinator_log_store.cpp index 37126b747..d5e134492 100644 --- a/src/coordination/coordinator_log_store.cpp +++ b/src/coordination/coordinator_log_store.cpp @@ -62,34 +62,33 @@ ptr<log_entry> CoordinatorLogStore::last_entry() const { uint64_t CoordinatorLogStore::append(ptr<log_entry> &entry) { ptr<log_entry> clone = MakeClone(entry); - uint64_t next_slot{0}; - { - auto lock = std::lock_guard{logs_lock_}; - next_slot = start_idx_ + logs_.size() - 1; - logs_[next_slot] = clone; - } + + auto lock = std::lock_guard{logs_lock_}; + uint64_t next_slot = start_idx_ + logs_.size() - 1; + logs_[next_slot] = clone; + return next_slot; } +// TODO: (andi) I think this is used for resolving conflicts inside NuRaft, check... +// different compared to in_memory_log_store.cxx void CoordinatorLogStore::write_at(uint64_t index, ptr<log_entry> &entry) { ptr<log_entry> clone = MakeClone(entry); // Discard all logs equal to or greater than `index. - { - auto lock = std::lock_guard{logs_lock_}; - auto itr = logs_.lower_bound(index); - while (itr != logs_.end()) { - itr = logs_.erase(itr); - } - logs_[index] = clone; + auto lock = std::lock_guard{logs_lock_}; + auto itr = logs_.lower_bound(index); + while (itr != logs_.end()) { + itr = logs_.erase(itr); } + logs_[index] = clone; } ptr<std::vector<ptr<log_entry>>> CoordinatorLogStore::log_entries(uint64_t start, uint64_t end) { auto ret = cs_new<std::vector<ptr<log_entry>>>(); ret->resize(end - start); - for (uint64_t i = start, curr_index = 0; i < end; ++i, ++curr_index) { + for (uint64_t i = start, curr_index = 0; i < end; i++, curr_index++) { ptr<log_entry> src = nullptr; { auto lock = std::lock_guard{logs_lock_}; @@ -105,21 +104,14 @@ ptr<std::vector<ptr<log_entry>>> CoordinatorLogStore::log_entries(uint64_t start } ptr<log_entry> CoordinatorLogStore::entry_at(uint64_t index) { - ptr<log_entry> src = nullptr; - { - auto lock = std::lock_guard{logs_lock_}; - src = FindOrDefault_(index); - } + auto lock = std::lock_guard{logs_lock_}; + ptr<log_entry> src = FindOrDefault_(index); return MakeClone(src); } uint64_t CoordinatorLogStore::term_at(uint64_t index) { - uint64_t term = 0; - { - auto lock = std::lock_guard{logs_lock_}; - term = FindOrDefault_(index)->get_term(); - } - return term; + auto lock = std::lock_guard{logs_lock_}; + return FindOrDefault_(index)->get_term(); } ptr<buffer> CoordinatorLogStore::pack(uint64_t index, int32 cnt) { diff --git a/src/coordination/coordinator_rpc.cpp b/src/coordination/coordinator_rpc.cpp index 4115f1979..815693824 100644 --- a/src/coordination/coordinator_rpc.cpp +++ b/src/coordination/coordinator_rpc.cpp @@ -76,9 +76,9 @@ void EnableWritingOnMainRes::Load(EnableWritingOnMainRes *self, memgraph::slk::R memgraph::slk::Load(self, reader); } -void EnableWritingOnMainReq::Save(EnableWritingOnMainReq const &self, memgraph::slk::Builder *builder) {} +void EnableWritingOnMainReq::Save(EnableWritingOnMainReq const & /*self*/, memgraph::slk::Builder * /*builder*/) {} -void EnableWritingOnMainReq::Load(EnableWritingOnMainReq *self, memgraph::slk::Reader *reader) {} +void EnableWritingOnMainReq::Load(EnableWritingOnMainReq * /*self*/, memgraph::slk::Reader * /*reader*/) {} // GetInstanceUUID void GetInstanceUUIDReq::Save(const GetInstanceUUIDReq &self, memgraph::slk::Builder *builder) { @@ -97,6 +97,24 @@ void GetInstanceUUIDRes::Load(GetInstanceUUIDRes *self, memgraph::slk::Reader *r memgraph::slk::Load(self, reader); } +// GetDatabaseHistoriesRpc + +void GetDatabaseHistoriesReq::Save(const GetDatabaseHistoriesReq & /*self*/, memgraph::slk::Builder * /*builder*/) { + /* nothing to serialize */ +} + +void GetDatabaseHistoriesReq::Load(GetDatabaseHistoriesReq * /*self*/, memgraph::slk::Reader * /*reader*/) { + /* nothing to serialize */ +} + +void GetDatabaseHistoriesRes::Save(const GetDatabaseHistoriesRes &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self, builder); +} + +void GetDatabaseHistoriesRes::Load(GetDatabaseHistoriesRes *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(self, reader); +} + } // namespace coordination constexpr utils::TypeInfo coordination::PromoteReplicaToMainReq::kType{utils::TypeId::COORD_FAILOVER_REQ, @@ -130,6 +148,12 @@ constexpr utils::TypeInfo coordination::GetInstanceUUIDReq::kType{utils::TypeId: constexpr utils::TypeInfo coordination::GetInstanceUUIDRes::kType{utils::TypeId::COORD_GET_UUID_RES, "CoordGetUUIDRes", nullptr}; +constexpr utils::TypeInfo coordination::GetDatabaseHistoriesReq::kType{utils::TypeId::COORD_GET_INSTANCE_DATABASES_REQ, + "GetInstanceDatabasesReq", nullptr}; + +constexpr utils::TypeInfo coordination::GetDatabaseHistoriesRes::kType{utils::TypeId::COORD_GET_INSTANCE_DATABASES_RES, + "GetInstanceDatabasesRes", nullptr}; + namespace slk { // PromoteReplicaToMainRpc @@ -213,6 +237,16 @@ void Load(memgraph::coordination::GetInstanceUUIDRes *self, memgraph::slk::Reade memgraph::slk::Load(&self->uuid, reader); } +// GetInstanceTimestampsReq + +void Save(const memgraph::coordination::GetDatabaseHistoriesRes &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self.database_histories, builder); +} + +void Load(memgraph::coordination::GetDatabaseHistoriesRes *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(&self->database_histories, reader); +} + } // namespace slk } // namespace memgraph diff --git a/src/coordination/coordinator_state.cpp b/src/coordination/coordinator_state.cpp index 28d6c604e..f429cd5a7 100644 --- a/src/coordination/coordinator_state.cpp +++ b/src/coordination/coordinator_state.cpp @@ -41,7 +41,7 @@ CoordinatorState::CoordinatorState() { } } -auto CoordinatorState::RegisterReplicationInstance(CoordinatorClientConfig config) +auto CoordinatorState::RegisterReplicationInstance(CoordinatorClientConfig const &config) -> RegisterInstanceCoordinatorStatus { MG_ASSERT(std::holds_alternative<CoordinatorInstance>(data_), "Coordinator cannot register replica since variant holds wrong alternative"); @@ -56,7 +56,8 @@ auto CoordinatorState::RegisterReplicationInstance(CoordinatorClientConfig confi data_); } -auto CoordinatorState::UnregisterReplicationInstance(std::string instance_name) -> UnregisterInstanceCoordinatorStatus { +auto CoordinatorState::UnregisterReplicationInstance(std::string_view instance_name) + -> UnregisterInstanceCoordinatorStatus { MG_ASSERT(std::holds_alternative<CoordinatorInstance>(data_), "Coordinator cannot unregister instance since variant holds wrong alternative"); @@ -70,7 +71,8 @@ auto CoordinatorState::UnregisterReplicationInstance(std::string instance_name) data_); } -auto CoordinatorState::SetReplicationInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus { +auto CoordinatorState::SetReplicationInstanceToMain(std::string_view instance_name) + -> SetInstanceToMainCoordinatorStatus { MG_ASSERT(std::holds_alternative<CoordinatorInstance>(data_), "Coordinator cannot register replica since variant holds wrong alternative"); @@ -96,8 +98,8 @@ auto CoordinatorState::GetCoordinatorServer() const -> CoordinatorServer & { return *std::get<CoordinatorMainReplicaData>(data_).coordinator_server_; } -auto CoordinatorState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) - -> void { +auto CoordinatorState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, + std::string_view raft_address) -> void { MG_ASSERT(std::holds_alternative<CoordinatorInstance>(data_), "Coordinator cannot register replica since variant holds wrong alternative"); return std::get<CoordinatorInstance>(data_).AddCoordinatorInstance(raft_server_id, raft_port, raft_address); diff --git a/src/coordination/coordinator_state_machine.cpp b/src/coordination/coordinator_state_machine.cpp index b939bd304..631c3c4d2 100644 --- a/src/coordination/coordinator_state_machine.cpp +++ b/src/coordination/coordinator_state_machine.cpp @@ -12,100 +12,204 @@ #ifdef MG_ENTERPRISE #include "nuraft/coordinator_state_machine.hpp" +#include "utils/logging.hpp" + +namespace { +constexpr int MAX_SNAPSHOTS = 3; +} // namespace namespace memgraph::coordination { -auto CoordinatorStateMachine::EncodeRegisterReplicationInstance(const std::string &name) -> ptr<buffer> { - std::string str_log = name + "_replica"; - ptr<buffer> log = buffer::alloc(sizeof(uint32_t) + str_log.size()); - buffer_serializer bs(log); - bs.put_str(str_log); - return log; +auto CoordinatorStateMachine::FindCurrentMainInstanceName() const -> std::optional<std::string> { + return cluster_state_.FindCurrentMainInstanceName(); } -auto CoordinatorStateMachine::DecodeRegisterReplicationInstance(buffer &data) -> std::string { +auto CoordinatorStateMachine::MainExists() const -> bool { return cluster_state_.MainExists(); } + +auto CoordinatorStateMachine::IsMain(std::string_view instance_name) const -> bool { + return cluster_state_.IsMain(instance_name); +} + +auto CoordinatorStateMachine::IsReplica(std::string_view instance_name) const -> bool { + return cluster_state_.IsReplica(instance_name); +} + +auto CoordinatorStateMachine::CreateLog(nlohmann::json &&log) -> ptr<buffer> { + auto const log_dump = log.dump(); + ptr<buffer> log_buf = buffer::alloc(sizeof(uint32_t) + log_dump.size()); + buffer_serializer bs(log_buf); + bs.put_str(log_dump); + return log_buf; +} + +auto CoordinatorStateMachine::SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr<buffer> { + return CreateLog({{"action", RaftLogAction::REGISTER_REPLICATION_INSTANCE}, {"info", config}}); +} + +auto CoordinatorStateMachine::SerializeUnregisterInstance(std::string_view instance_name) -> ptr<buffer> { + return CreateLog({{"action", RaftLogAction::UNREGISTER_REPLICATION_INSTANCE}, {"info", instance_name}}); +} + +auto CoordinatorStateMachine::SerializeSetInstanceAsMain(std::string_view instance_name) -> ptr<buffer> { + return CreateLog({{"action", RaftLogAction::SET_INSTANCE_AS_MAIN}, {"info", instance_name}}); +} + +auto CoordinatorStateMachine::SerializeSetInstanceAsReplica(std::string_view instance_name) -> ptr<buffer> { + return CreateLog({{"action", RaftLogAction::SET_INSTANCE_AS_REPLICA}, {"info", instance_name}}); +} + +auto CoordinatorStateMachine::SerializeUpdateUUID(utils::UUID const &uuid) -> ptr<buffer> { + return CreateLog({{"action", RaftLogAction::UPDATE_UUID}, {"info", uuid}}); +} + +auto CoordinatorStateMachine::DecodeLog(buffer &data) -> std::pair<TRaftLog, RaftLogAction> { buffer_serializer bs(data); - return bs.get_str(); + auto const json = nlohmann::json::parse(bs.get_str()); + + auto const action = json["action"].get<RaftLogAction>(); + auto const &info = json["info"]; + + switch (action) { + case RaftLogAction::REGISTER_REPLICATION_INSTANCE: + return {info.get<CoordinatorClientConfig>(), action}; + case RaftLogAction::UPDATE_UUID: + return {info.get<utils::UUID>(), action}; + case RaftLogAction::UNREGISTER_REPLICATION_INSTANCE: + case RaftLogAction::SET_INSTANCE_AS_MAIN: + [[fallthrough]]; + case RaftLogAction::SET_INSTANCE_AS_REPLICA: + return {info.get<std::string>(), action}; + } + throw std::runtime_error("Unknown action"); } -auto CoordinatorStateMachine::pre_commit(ulong const log_idx, buffer &data) -> ptr<buffer> { - buffer_serializer bs(data); - std::string str = bs.get_str(); - - spdlog::info("pre_commit {} : {}", log_idx, str); - return nullptr; -} +auto CoordinatorStateMachine::pre_commit(ulong const /*log_idx*/, buffer & /*data*/) -> ptr<buffer> { return nullptr; } auto CoordinatorStateMachine::commit(ulong const log_idx, buffer &data) -> ptr<buffer> { - buffer_serializer bs(data); - std::string str = bs.get_str(); - - spdlog::info("commit {} : {}", log_idx, str); - + spdlog::debug("Commit: log_idx={}, data.size()={}", log_idx, data.size()); + auto const [parsed_data, log_action] = DecodeLog(data); + cluster_state_.DoAction(parsed_data, log_action); last_committed_idx_ = log_idx; - return nullptr; + + // Return raft log number + ptr<buffer> ret = buffer::alloc(sizeof(log_idx)); + buffer_serializer bs_ret(ret); + bs_ret.put_u64(log_idx); + return ret; } auto CoordinatorStateMachine::commit_config(ulong const log_idx, ptr<cluster_config> & /*new_conf*/) -> void { last_committed_idx_ = log_idx; + spdlog::debug("Commit config: log_idx={}", log_idx); } auto CoordinatorStateMachine::rollback(ulong const log_idx, buffer &data) -> void { - buffer_serializer bs(data); - std::string str = bs.get_str(); - - spdlog::info("rollback {} : {}", log_idx, str); + // NOTE: Nothing since we don't do anything in pre_commit + spdlog::debug("Rollback: log_idx={}, data.size()={}", log_idx, data.size()); } -auto CoordinatorStateMachine::read_logical_snp_obj(snapshot & /*snapshot*/, void *& /*user_snp_ctx*/, ulong /*obj_id*/, +auto CoordinatorStateMachine::read_logical_snp_obj(snapshot &snapshot, void *& /*user_snp_ctx*/, ulong obj_id, ptr<buffer> &data_out, bool &is_last_obj) -> int { - // Put dummy data. - data_out = buffer::alloc(sizeof(int32)); - buffer_serializer bs(data_out); - bs.put_i32(0); + spdlog::debug("read logical snapshot object, obj_id: {}", obj_id); + + ptr<SnapshotCtx> ctx = nullptr; + { + auto ll = std::lock_guard{snapshots_lock_}; + auto entry = snapshots_.find(snapshot.get_last_log_idx()); + if (entry == snapshots_.end()) { + data_out = nullptr; + is_last_obj = true; + return 0; + } + ctx = entry->second; + } + + if (obj_id == 0) { + // Object ID == 0: first object, put dummy data. + data_out = buffer::alloc(sizeof(int32)); + buffer_serializer bs(data_out); + bs.put_i32(0); + is_last_obj = false; + } else { + // Object ID > 0: second object, put actual value. + ctx->cluster_state_.Serialize(data_out); + } - is_last_obj = true; return 0; } -auto CoordinatorStateMachine::save_logical_snp_obj(snapshot &s, ulong &obj_id, buffer & /*data*/, bool /*is_first_obj*/, - bool /*is_last_obj*/) -> void { - spdlog::info("save snapshot {} term {} object ID", s.get_last_log_idx(), s.get_last_log_term(), obj_id); - // Request next object. - obj_id++; +auto CoordinatorStateMachine::save_logical_snp_obj(snapshot &snapshot, ulong &obj_id, buffer &data, bool is_first_obj, + bool is_last_obj) -> void { + spdlog::debug("save logical snapshot object, obj_id: {}, is_first_obj: {}, is_last_obj: {}", obj_id, is_first_obj, + is_last_obj); + + if (obj_id == 0) { + ptr<buffer> snp_buf = snapshot.serialize(); + auto ss = snapshot::deserialize(*snp_buf); + create_snapshot_internal(ss); + } else { + auto cluster_state = CoordinatorClusterState::Deserialize(data); + + auto ll = std::lock_guard{snapshots_lock_}; + auto entry = snapshots_.find(snapshot.get_last_log_idx()); + DMG_ASSERT(entry != snapshots_.end()); + entry->second->cluster_state_ = cluster_state; + } } auto CoordinatorStateMachine::apply_snapshot(snapshot &s) -> bool { - spdlog::info("apply snapshot {} term {}", s.get_last_log_idx(), s.get_last_log_term()); - { - auto lock = std::lock_guard{last_snapshot_lock_}; - ptr<buffer> snp_buf = s.serialize(); - last_snapshot_ = snapshot::deserialize(*snp_buf); - } + auto ll = std::lock_guard{snapshots_lock_}; + spdlog::debug("apply snapshot, last_log_idx: {}", s.get_last_log_idx()); + + auto entry = snapshots_.find(s.get_last_log_idx()); + if (entry == snapshots_.end()) return false; + + cluster_state_ = entry->second->cluster_state_; return true; } auto CoordinatorStateMachine::free_user_snp_ctx(void *&user_snp_ctx) -> void {} auto CoordinatorStateMachine::last_snapshot() -> ptr<snapshot> { - auto lock = std::lock_guard{last_snapshot_lock_}; - return last_snapshot_; + auto ll = std::lock_guard{snapshots_lock_}; + spdlog::debug("last_snapshot"); + auto entry = snapshots_.rbegin(); + if (entry == snapshots_.rend()) return nullptr; + + ptr<SnapshotCtx> ctx = entry->second; + return ctx->snapshot_; } auto CoordinatorStateMachine::last_commit_index() -> ulong { return last_committed_idx_; } auto CoordinatorStateMachine::create_snapshot(snapshot &s, async_result<bool>::handler_type &when_done) -> void { - spdlog::info("create snapshot {} term {}", s.get_last_log_idx(), s.get_last_log_term()); - // Clone snapshot from `s`. - { - auto lock = std::lock_guard{last_snapshot_lock_}; - ptr<buffer> snp_buf = s.serialize(); - last_snapshot_ = snapshot::deserialize(*snp_buf); - } + spdlog::debug("create_snapshot, last_log_idx: {}", s.get_last_log_idx()); + ptr<buffer> snp_buf = s.serialize(); + ptr<snapshot> ss = snapshot::deserialize(*snp_buf); + create_snapshot_internal(ss); + ptr<std::exception> except(nullptr); bool ret = true; when_done(ret, except); } +auto CoordinatorStateMachine::create_snapshot_internal(ptr<snapshot> snapshot) -> void { + auto ll = std::lock_guard{snapshots_lock_}; + spdlog::debug("create_snapshot_internal, last_log_idx: {}", snapshot->get_last_log_idx()); + + auto ctx = cs_new<SnapshotCtx>(snapshot, cluster_state_); + snapshots_[snapshot->get_last_log_idx()] = ctx; + + while (snapshots_.size() > MAX_SNAPSHOTS) { + snapshots_.erase(snapshots_.begin()); + } +} + +auto CoordinatorStateMachine::GetInstances() const -> std::vector<InstanceState> { + return cluster_state_.GetInstances(); +} + +auto CoordinatorStateMachine::GetUUID() const -> utils::UUID { return cluster_state_.GetUUID(); } + } // namespace memgraph::coordination #endif diff --git a/src/coordination/include/coordination/coordinator_client.hpp b/src/coordination/include/coordination/coordinator_client.hpp index 5e10af89d..5d4795f81 100644 --- a/src/coordination/include/coordination/coordinator_client.hpp +++ b/src/coordination/include/coordination/coordinator_client.hpp @@ -14,6 +14,7 @@ #ifdef MG_ENTERPRISE #include "coordination/coordinator_config.hpp" +#include "replication_coordination_glue/common.hpp" #include "rpc/client.hpp" #include "rpc_errors.hpp" #include "utils/result.hpp" @@ -23,13 +24,13 @@ namespace memgraph::coordination { class CoordinatorInstance; -using HealthCheckCallback = std::function<void(CoordinatorInstance *, std::string_view)>; +using HealthCheckClientCallback = std::function<void(CoordinatorInstance *, std::string_view)>; using ReplicationClientsInfo = std::vector<ReplClientInfo>; class CoordinatorClient { public: explicit CoordinatorClient(CoordinatorInstance *coord_instance, CoordinatorClientConfig config, - HealthCheckCallback succ_cb, HealthCheckCallback fail_cb); + HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb); ~CoordinatorClient() = default; @@ -45,16 +46,17 @@ class CoordinatorClient { void ResumeFrequentCheck(); auto InstanceName() const -> std::string; - auto SocketAddress() const -> std::string; + auto CoordinatorSocketAddress() const -> std::string; + auto ReplicationSocketAddress() const -> std::string; [[nodiscard]] auto DemoteToReplica() const -> bool; - auto SendPromoteReplicaToMainRpc(const utils::UUID &uuid, ReplicationClientsInfo replication_clients_info) const + auto SendPromoteReplicaToMainRpc(utils::UUID const &uuid, ReplicationClientsInfo replication_clients_info) const -> bool; - auto SendSwapMainUUIDRpc(const utils::UUID &uuid) const -> bool; + auto SendSwapMainUUIDRpc(utils::UUID const &uuid) const -> bool; - auto SendUnregisterReplicaRpc(std::string const &instance_name) const -> bool; + auto SendUnregisterReplicaRpc(std::string_view instance_name) const -> bool; auto SendEnableWritingOnMainRpc() const -> bool; @@ -62,7 +64,8 @@ class CoordinatorClient { auto ReplicationClientInfo() const -> ReplClientInfo; - auto SetCallbacks(HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) -> void; + auto SendGetInstanceTimestampsRpc() const + -> utils::BasicResult<GetInstanceUUIDError, replication_coordination_glue::DatabaseHistories>; auto RpcClient() -> rpc::Client & { return rpc_client_; } @@ -82,8 +85,8 @@ class CoordinatorClient { CoordinatorClientConfig config_; CoordinatorInstance *coord_instance_; - HealthCheckCallback succ_cb_; - HealthCheckCallback fail_cb_; + HealthCheckClientCallback succ_cb_; + HealthCheckClientCallback fail_cb_; }; } // namespace memgraph::coordination diff --git a/src/coordination/include/coordination/coordinator_config.hpp b/src/coordination/include/coordination/coordinator_config.hpp index df7a5f94f..127a365eb 100644 --- a/src/coordination/include/coordination/coordinator_config.hpp +++ b/src/coordination/include/coordination/coordinator_config.hpp @@ -14,12 +14,16 @@ #ifdef MG_ENTERPRISE #include "replication_coordination_glue/mode.hpp" +#include "utils/string.hpp" #include <chrono> #include <cstdint> #include <optional> #include <string> +#include <fmt/format.h> +#include "json/json.hpp" + namespace memgraph::coordination { inline constexpr auto *kDefaultReplicationServerIp = "0.0.0.0"; @@ -32,7 +36,11 @@ struct CoordinatorClientConfig { std::chrono::seconds instance_down_timeout_sec{5}; std::chrono::seconds instance_get_uuid_frequency_sec{10}; - auto SocketAddress() const -> std::string { return ip_address + ":" + std::to_string(port); } + auto CoordinatorSocketAddress() const -> std::string { return fmt::format("{}:{}", ip_address, port); } + auto ReplicationSocketAddress() const -> std::string { + return fmt::format("{}:{}", replication_client_info.replication_ip_address, + replication_client_info.replication_port); + } struct ReplicationClientInfo { std::string instance_name; @@ -75,5 +83,11 @@ struct CoordinatorServerConfig { friend bool operator==(CoordinatorServerConfig const &, CoordinatorServerConfig const &) = default; }; +void to_json(nlohmann::json &j, CoordinatorClientConfig const &config); +void from_json(nlohmann::json const &j, CoordinatorClientConfig &config); + +void to_json(nlohmann::json &j, ReplClientInfo const &config); +void from_json(nlohmann::json const &j, ReplClientInfo &config); + } // namespace memgraph::coordination #endif diff --git a/src/coordination/include/coordination/coordinator_exceptions.hpp b/src/coordination/include/coordination/coordinator_exceptions.hpp index 59a2e89d8..7a967f80b 100644 --- a/src/coordination/include/coordination/coordinator_exceptions.hpp +++ b/src/coordination/include/coordination/coordinator_exceptions.hpp @@ -83,5 +83,16 @@ class RaftCouldNotParseFlagsException final : public utils::BasicException { SPECIALIZE_GET_EXCEPTION_NAME(RaftCouldNotParseFlagsException) }; +class InvalidRaftLogActionException final : public utils::BasicException { + public: + explicit InvalidRaftLogActionException(std::string_view what) noexcept : BasicException(what) {} + + template <class... Args> + explicit InvalidRaftLogActionException(fmt::format_string<Args...> fmt, Args &&...args) noexcept + : InvalidRaftLogActionException(fmt::format(fmt, std::forward<Args>(args)...)) {} + + SPECIALIZE_GET_EXCEPTION_NAME(InvalidRaftLogActionException) +}; + } // namespace memgraph::coordination #endif diff --git a/src/coordination/include/coordination/coordinator_handlers.hpp b/src/coordination/include/coordination/coordinator_handlers.hpp index b9ed4b519..18aecc9cf 100644 --- a/src/coordination/include/coordination/coordinator_handlers.hpp +++ b/src/coordination/include/coordination/coordinator_handlers.hpp @@ -41,6 +41,9 @@ class CoordinatorHandlers { static void GetInstanceUUIDHandler(replication::ReplicationHandler &replication_handler, slk::Reader *req_reader, slk::Builder *res_builder); + + static void GetDatabaseHistoriesHandler(replication::ReplicationHandler &replication_handler, slk::Reader *req_reader, + slk::Builder *res_builder); }; } // namespace memgraph::dbms diff --git a/src/coordination/include/coordination/coordinator_instance.hpp b/src/coordination/include/coordination/coordinator_instance.hpp index 15b377ed9..10549f468 100644 --- a/src/coordination/include/coordination/coordinator_instance.hpp +++ b/src/coordination/include/coordination/coordinator_instance.hpp @@ -18,6 +18,7 @@ #include "coordination/raft_state.hpp" #include "coordination/register_main_replica_coordinator_status.hpp" #include "coordination/replication_instance.hpp" +#include "utils/resource_lock.hpp" #include "utils/rw_lock.hpp" #include "utils/thread_pool.hpp" @@ -25,33 +26,54 @@ namespace memgraph::coordination { +struct NewMainRes { + std::string most_up_to_date_instance; + std::string latest_epoch; + uint64_t latest_commit_timestamp; +}; +using InstanceNameDbHistories = std::pair<std::string, replication_coordination_glue::DatabaseHistories>; + class CoordinatorInstance { public: CoordinatorInstance(); - [[nodiscard]] auto RegisterReplicationInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus; - [[nodiscard]] auto UnregisterReplicationInstance(std::string instance_name) -> UnregisterInstanceCoordinatorStatus; + [[nodiscard]] auto RegisterReplicationInstance(CoordinatorClientConfig const &config) + -> RegisterInstanceCoordinatorStatus; + [[nodiscard]] auto UnregisterReplicationInstance(std::string_view instance_name) + -> UnregisterInstanceCoordinatorStatus; - [[nodiscard]] auto SetReplicationInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus; + [[nodiscard]] auto SetReplicationInstanceToMain(std::string_view instance_name) -> SetInstanceToMainCoordinatorStatus; auto ShowInstances() const -> std::vector<InstanceStatus>; auto TryFailover() -> void; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void; + auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; - auto GetMainUUID() const -> utils::UUID; - - auto SetMainUUID(utils::UUID new_uuid) -> void; + static auto ChooseMostUpToDateInstance(std::span<InstanceNameDbHistories> histories) -> NewMainRes; private: - HealthCheckCallback main_succ_cb_, main_fail_cb_, replica_succ_cb_, replica_fail_cb_; + HealthCheckClientCallback client_succ_cb_, client_fail_cb_; - // NOTE: Must be std::list because we rely on pointer stability + auto OnRaftCommitCallback(TRaftLog const &log_entry, RaftLogAction log_action) -> void; + + auto FindReplicationInstance(std::string_view replication_instance_name) -> ReplicationInstance &; + + void MainFailCallback(std::string_view); + + void MainSuccessCallback(std::string_view); + + void ReplicaSuccessCallback(std::string_view); + + void ReplicaFailCallback(std::string_view); + + auto IsMain(std::string_view instance_name) const -> bool; + auto IsReplica(std::string_view instance_name) const -> bool; + + // NOTE: Must be std::list because we rely on pointer stability. + // Leader and followers should both have same view on repl_instances_ std::list<ReplicationInstance> repl_instances_; - mutable utils::RWLock coord_instance_lock_{utils::RWLock::Priority::READ}; - - utils::UUID main_uuid_; + mutable utils::ResourceLock coord_instance_lock_{}; RaftState raft_state_; }; diff --git a/src/coordination/include/coordination/coordinator_rpc.hpp b/src/coordination/include/coordination/coordinator_rpc.hpp index 1578b4577..d799b2955 100644 --- a/src/coordination/include/coordination/coordinator_rpc.hpp +++ b/src/coordination/include/coordination/coordinator_rpc.hpp @@ -15,6 +15,7 @@ #ifdef MG_ENTERPRISE #include "coordination/coordinator_config.hpp" +#include "replication_coordination_glue/common.hpp" #include "rpc/messages.hpp" #include "slk/serialization.hpp" @@ -89,7 +90,7 @@ struct UnregisterReplicaReq { static void Load(UnregisterReplicaReq *self, memgraph::slk::Reader *reader); static void Save(UnregisterReplicaReq const &self, memgraph::slk::Builder *builder); - explicit UnregisterReplicaReq(std::string instance_name) : instance_name(std::move(instance_name)) {} + explicit UnregisterReplicaReq(std::string_view inst_name) : instance_name(inst_name) {} UnregisterReplicaReq() = default; @@ -161,6 +162,32 @@ struct GetInstanceUUIDRes { using GetInstanceUUIDRpc = rpc::RequestResponse<GetInstanceUUIDReq, GetInstanceUUIDRes>; +struct GetDatabaseHistoriesReq { + static const utils::TypeInfo kType; + static const utils::TypeInfo &GetTypeInfo() { return kType; } + + static void Load(GetDatabaseHistoriesReq *self, memgraph::slk::Reader *reader); + static void Save(const GetDatabaseHistoriesReq &self, memgraph::slk::Builder *builder); + + GetDatabaseHistoriesReq() = default; +}; + +struct GetDatabaseHistoriesRes { + static const utils::TypeInfo kType; + static const utils::TypeInfo &GetTypeInfo() { return kType; } + + static void Load(GetDatabaseHistoriesRes *self, memgraph::slk::Reader *reader); + static void Save(const GetDatabaseHistoriesRes &self, memgraph::slk::Builder *builder); + + explicit GetDatabaseHistoriesRes(const replication_coordination_glue::DatabaseHistories &database_histories) + : database_histories(database_histories) {} + GetDatabaseHistoriesRes() = default; + + replication_coordination_glue::DatabaseHistories database_histories; +}; + +using GetDatabaseHistoriesRpc = rpc::RequestResponse<GetDatabaseHistoriesReq, GetDatabaseHistoriesRes>; + } // namespace memgraph::coordination // SLK serialization declarations @@ -183,15 +210,21 @@ void Save(const memgraph::coordination::GetInstanceUUIDReq &self, memgraph::slk: void Load(memgraph::coordination::GetInstanceUUIDReq *self, memgraph::slk::Reader *reader); void Save(const memgraph::coordination::GetInstanceUUIDRes &self, memgraph::slk::Builder *builder); void Load(memgraph::coordination::GetInstanceUUIDRes *self, memgraph::slk::Reader *reader); + // UnregisterReplicaRpc void Save(memgraph::coordination::UnregisterReplicaRes const &self, memgraph::slk::Builder *builder); void Load(memgraph::coordination::UnregisterReplicaRes *self, memgraph::slk::Reader *reader); void Save(memgraph::coordination::UnregisterReplicaReq const &self, memgraph::slk::Builder *builder); void Load(memgraph::coordination::UnregisterReplicaReq *self, memgraph::slk::Reader *reader); +// EnableWritingOnMainRpc void Save(memgraph::coordination::EnableWritingOnMainRes const &self, memgraph::slk::Builder *builder); void Load(memgraph::coordination::EnableWritingOnMainRes *self, memgraph::slk::Reader *reader); +// GetDatabaseHistoriesRpc +void Save(const memgraph::coordination::GetDatabaseHistoriesRes &self, memgraph::slk::Builder *builder); +void Load(memgraph::coordination::GetDatabaseHistoriesRes *self, memgraph::slk::Reader *reader); + } // namespace memgraph::slk #endif diff --git a/src/coordination/include/coordination/coordinator_slk.hpp b/src/coordination/include/coordination/coordinator_slk.hpp index 49834be41..ee393b7b6 100644 --- a/src/coordination/include/coordination/coordinator_slk.hpp +++ b/src/coordination/include/coordination/coordinator_slk.hpp @@ -14,6 +14,7 @@ #ifdef MG_ENTERPRISE #include "coordination/coordinator_config.hpp" +#include "replication_coordination_glue/common.hpp" #include "slk/serialization.hpp" #include "slk/streams.hpp" @@ -34,5 +35,18 @@ inline void Load(ReplicationClientInfo *obj, Reader *reader) { Load(&obj->replication_ip_address, reader); Load(&obj->replication_port, reader); } + +inline void Save(const replication_coordination_glue::DatabaseHistory &obj, Builder *builder) { + Save(obj.db_uuid, builder); + Save(obj.history, builder); + Save(obj.name, builder); +} + +inline void Load(replication_coordination_glue::DatabaseHistory *obj, Reader *reader) { + Load(&obj->db_uuid, reader); + Load(&obj->history, reader); + Load(&obj->name, reader); +} + } // namespace memgraph::slk #endif diff --git a/src/coordination/include/coordination/coordinator_state.hpp b/src/coordination/include/coordination/coordinator_state.hpp index 256af66f9..400c36940 100644 --- a/src/coordination/include/coordination/coordinator_state.hpp +++ b/src/coordination/include/coordination/coordinator_state.hpp @@ -33,14 +33,16 @@ class CoordinatorState { CoordinatorState(CoordinatorState &&) noexcept = delete; CoordinatorState &operator=(CoordinatorState &&) noexcept = delete; - [[nodiscard]] auto RegisterReplicationInstance(CoordinatorClientConfig config) -> RegisterInstanceCoordinatorStatus; - [[nodiscard]] auto UnregisterReplicationInstance(std::string instance_name) -> UnregisterInstanceCoordinatorStatus; + [[nodiscard]] auto RegisterReplicationInstance(CoordinatorClientConfig const &config) + -> RegisterInstanceCoordinatorStatus; + [[nodiscard]] auto UnregisterReplicationInstance(std::string_view instance_name) + -> UnregisterInstanceCoordinatorStatus; - [[nodiscard]] auto SetReplicationInstanceToMain(std::string instance_name) -> SetInstanceToMainCoordinatorStatus; + [[nodiscard]] auto SetReplicationInstanceToMain(std::string_view instance_name) -> SetInstanceToMainCoordinatorStatus; auto ShowInstances() const -> std::vector<InstanceStatus>; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void; + auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; // NOTE: The client code must check that the server exists before calling this method. auto GetCoordinatorServer() const -> CoordinatorServer &; diff --git a/src/coordination/include/coordination/instance_status.hpp b/src/coordination/include/coordination/instance_status.hpp index 492410061..da6fd8828 100644 --- a/src/coordination/include/coordination/instance_status.hpp +++ b/src/coordination/include/coordination/instance_status.hpp @@ -26,7 +26,7 @@ struct InstanceStatus { std::string raft_socket_address; std::string coord_socket_address; std::string cluster_role; - bool is_alive; + std::string health; }; } // namespace memgraph::coordination diff --git a/src/coordination/include/coordination/raft_state.hpp b/src/coordination/include/coordination/raft_state.hpp index b6ef06008..34da3e2a6 100644 --- a/src/coordination/include/coordination/raft_state.hpp +++ b/src/coordination/include/coordination/raft_state.hpp @@ -14,11 +14,17 @@ #ifdef MG_ENTERPRISE #include <flags/replication.hpp> +#include "io/network/endpoint.hpp" +#include "nuraft/coordinator_state_machine.hpp" +#include "nuraft/coordinator_state_manager.hpp" #include <libnuraft/nuraft.hxx> namespace memgraph::coordination { +class CoordinatorInstance; +struct CoordinatorClientConfig; + using BecomeLeaderCb = std::function<void()>; using BecomeFollowerCb = std::function<void()>; @@ -47,26 +53,38 @@ class RaftState { RaftState &operator=(RaftState &&other) noexcept = default; ~RaftState(); - static auto MakeRaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb) -> RaftState; + static auto MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerCb &&become_follower_cb) -> RaftState; auto InstanceName() const -> std::string; auto RaftSocketAddress() const -> std::string; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void; + auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; auto GetAllCoordinators() const -> std::vector<ptr<srv_config>>; auto RequestLeadership() -> bool; auto IsLeader() const -> bool; - auto AppendRegisterReplicationInstance(std::string const &instance) -> ptr<raft_result>; + auto FindCurrentMainInstanceName() const -> std::optional<std::string>; + auto MainExists() const -> bool; + auto IsMain(std::string_view instance_name) const -> bool; + auto IsReplica(std::string_view instance_name) const -> bool; - // TODO: (andi) I think variables below can be abstracted + auto AppendRegisterReplicationInstanceLog(CoordinatorClientConfig const &config) -> bool; + auto AppendUnregisterReplicationInstanceLog(std::string_view instance_name) -> bool; + auto AppendSetInstanceAsMainLog(std::string_view instance_name) -> bool; + auto AppendSetInstanceAsReplicaLog(std::string_view instance_name) -> bool; + auto AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool; + + auto GetInstances() const -> std::vector<InstanceState>; + auto GetUUID() const -> utils::UUID; + + private: + // TODO: (andi) I think variables below can be abstracted/clean them. + io::network::Endpoint raft_endpoint_; uint32_t raft_server_id_; - uint32_t raft_port_; - std::string raft_address_; - ptr<state_machine> state_machine_; - ptr<state_mgr> state_manager_; + ptr<CoordinatorStateMachine> state_machine_; + ptr<CoordinatorStateManager> state_manager_; ptr<raft_server> raft_server_; ptr<logger> logger_; raft_launcher launcher_; diff --git a/src/coordination/include/coordination/register_main_replica_coordinator_status.hpp b/src/coordination/include/coordination/register_main_replica_coordinator_status.hpp index 3aa7e3ca1..13b58ff9f 100644 --- a/src/coordination/include/coordination/register_main_replica_coordinator_status.hpp +++ b/src/coordination/include/coordination/register_main_replica_coordinator_status.hpp @@ -19,12 +19,12 @@ namespace memgraph::coordination { enum class RegisterInstanceCoordinatorStatus : uint8_t { NAME_EXISTS, - ENDPOINT_EXISTS, + COORD_ENDPOINT_EXISTS, + REPL_ENDPOINT_EXISTS, NOT_COORDINATOR, - RPC_FAILED, NOT_LEADER, - RAFT_COULD_NOT_ACCEPT, - RAFT_COULD_NOT_APPEND, + RPC_FAILED, + RAFT_LOG_ERROR, SUCCESS }; @@ -32,8 +32,9 @@ enum class UnregisterInstanceCoordinatorStatus : uint8_t { NO_INSTANCE_WITH_NAME, IS_MAIN, NOT_COORDINATOR, - NOT_LEADER, RPC_FAILED, + NOT_LEADER, + RAFT_LOG_ERROR, SUCCESS, }; @@ -41,9 +42,11 @@ enum class SetInstanceToMainCoordinatorStatus : uint8_t { NO_INSTANCE_WITH_NAME, MAIN_ALREADY_EXISTS, NOT_COORDINATOR, - SUCCESS, + NOT_LEADER, + RAFT_LOG_ERROR, COULD_NOT_PROMOTE_TO_MAIN, - SWAP_UUID_FAILED + SWAP_UUID_FAILED, + SUCCESS, }; } // namespace memgraph::coordination diff --git a/src/coordination/include/coordination/replication_instance.hpp b/src/coordination/include/coordination/replication_instance.hpp index 8001d0905..7b5d73b81 100644 --- a/src/coordination/include/coordination/replication_instance.hpp +++ b/src/coordination/include/coordination/replication_instance.hpp @@ -17,18 +17,24 @@ #include "coordination/coordinator_exceptions.hpp" #include "replication_coordination_glue/role.hpp" -#include <libnuraft/nuraft.hxx> +#include "utils/resource_lock.hpp" #include "utils/result.hpp" #include "utils/uuid.hpp" +#include <libnuraft/nuraft.hxx> + namespace memgraph::coordination { class CoordinatorInstance; +class ReplicationInstance; + +using HealthCheckInstanceCallback = void (CoordinatorInstance::*)(std::string_view); class ReplicationInstance { public: - ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config, HealthCheckCallback succ_cb, - HealthCheckCallback fail_cb); + ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config, HealthCheckClientCallback succ_cb, + HealthCheckClientCallback fail_cb, HealthCheckInstanceCallback succ_instance_cb, + HealthCheckInstanceCallback fail_instance_cb); ReplicationInstance(ReplicationInstance const &other) = delete; ReplicationInstance &operator=(ReplicationInstance const &other) = delete; @@ -45,14 +51,16 @@ class ReplicationInstance { auto IsAlive() const -> bool; auto InstanceName() const -> std::string; - auto SocketAddress() const -> std::string; + auto CoordinatorSocketAddress() const -> std::string; + auto ReplicationSocketAddress() const -> std::string; - auto IsReplica() const -> bool; - auto IsMain() const -> bool; + auto PromoteToMain(utils::UUID const &uuid, ReplicationClientsInfo repl_clients_info, + HealthCheckInstanceCallback main_succ_cb, HealthCheckInstanceCallback main_fail_cb) -> bool; - auto PromoteToMain(utils::UUID uuid, ReplicationClientsInfo repl_clients_info, HealthCheckCallback main_succ_cb, - HealthCheckCallback main_fail_cb) -> bool; - auto DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb) -> bool; + auto SendDemoteToReplicaRpc() -> bool; + + auto DemoteToReplica(HealthCheckInstanceCallback replica_succ_cb, HealthCheckInstanceCallback replica_fail_cb) + -> bool; auto StartFrequentCheck() -> void; auto StopFrequentCheck() -> void; @@ -63,9 +71,8 @@ class ReplicationInstance { auto EnsureReplicaHasCorrectMainUUID(utils::UUID const &curr_main_uuid) -> bool; - auto SendSwapAndUpdateUUID(const utils::UUID &new_main_uuid) -> bool; - auto SendUnregisterReplicaRpc(std::string const &instance_name) -> bool; - + auto SendSwapAndUpdateUUID(utils::UUID const &new_main_uuid) -> bool; + auto SendUnregisterReplicaRpc(std::string_view instance_name) -> bool; auto SendGetInstanceUUID() -> utils::BasicResult<coordination::GetInstanceUUIDError, std::optional<utils::UUID>>; auto GetClient() -> CoordinatorClient &; @@ -74,11 +81,13 @@ class ReplicationInstance { auto SetNewMainUUID(utils::UUID const &main_uuid) -> void; auto ResetMainUUID() -> void; - auto GetMainUUID() const -> const std::optional<utils::UUID> &; + auto GetMainUUID() const -> std::optional<utils::UUID> const &; + + auto GetSuccessCallback() -> HealthCheckInstanceCallback &; + auto GetFailCallback() -> HealthCheckInstanceCallback &; private: CoordinatorClient client_; - replication_coordination_glue::ReplicationRole replication_role_; std::chrono::system_clock::time_point last_response_time_{}; bool is_alive_{false}; std::chrono::system_clock::time_point last_check_of_uuid_{}; @@ -90,8 +99,12 @@ class ReplicationInstance { // so we need to send swap uuid again std::optional<utils::UUID> main_uuid_; + HealthCheckInstanceCallback succ_cb_; + HealthCheckInstanceCallback fail_cb_; + friend bool operator==(ReplicationInstance const &first, ReplicationInstance const &second) { - return first.client_ == second.client_ && first.replication_role_ == second.replication_role_; + return first.client_ == second.client_ && first.last_response_time_ == second.last_response_time_ && + first.is_alive_ == second.is_alive_ && first.main_uuid_ == second.main_uuid_; } }; diff --git a/src/coordination/include/coordination/rpc_errors.hpp b/src/coordination/include/coordination/rpc_errors.hpp index f6bfbf3e0..3829d430a 100644 --- a/src/coordination/include/coordination/rpc_errors.hpp +++ b/src/coordination/include/coordination/rpc_errors.hpp @@ -11,4 +11,5 @@ namespace memgraph::coordination { enum class GetInstanceUUIDError { NO_RESPONSE, RPC_EXCEPTION }; +enum class GetInstanceTimestampsError { NO_RESPONSE, RPC_EXCEPTION }; } // namespace memgraph::coordination diff --git a/src/coordination/include/nuraft/coordinator_cluster_state.hpp b/src/coordination/include/nuraft/coordinator_cluster_state.hpp new file mode 100644 index 000000000..11d539a14 --- /dev/null +++ b/src/coordination/include/nuraft/coordinator_cluster_state.hpp @@ -0,0 +1,92 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#ifdef MG_ENTERPRISE + +#include "coordination/coordinator_config.hpp" +#include "nuraft/raft_log_action.hpp" +#include "replication_coordination_glue/role.hpp" +#include "utils/resource_lock.hpp" +#include "utils/uuid.hpp" + +#include <libnuraft/nuraft.hxx> +#include <range/v3/view.hpp> +#include "json/json.hpp" + +#include <map> +#include <numeric> +#include <string> +#include <variant> + +namespace memgraph::coordination { + +using replication_coordination_glue::ReplicationRole; + +struct InstanceState { + CoordinatorClientConfig config; + ReplicationRole status; + + friend auto operator==(InstanceState const &lhs, InstanceState const &rhs) -> bool { + return lhs.config == rhs.config && lhs.status == rhs.status; + } +}; + +void to_json(nlohmann::json &j, InstanceState const &instance_state); +void from_json(nlohmann::json const &j, InstanceState &instance_state); + +using TRaftLog = std::variant<CoordinatorClientConfig, std::string, utils::UUID>; + +using nuraft::buffer; +using nuraft::buffer_serializer; +using nuraft::ptr; + +class CoordinatorClusterState { + public: + CoordinatorClusterState() = default; + explicit CoordinatorClusterState(std::map<std::string, InstanceState, std::less<>> instances); + + CoordinatorClusterState(CoordinatorClusterState const &); + CoordinatorClusterState &operator=(CoordinatorClusterState const &); + + CoordinatorClusterState(CoordinatorClusterState &&other) noexcept; + CoordinatorClusterState &operator=(CoordinatorClusterState &&other) noexcept; + ~CoordinatorClusterState() = default; + + auto FindCurrentMainInstanceName() const -> std::optional<std::string>; + + auto MainExists() const -> bool; + + auto IsMain(std::string_view instance_name) const -> bool; + + auto IsReplica(std::string_view instance_name) const -> bool; + + auto InsertInstance(std::string instance_name, InstanceState instance_state) -> void; + + auto DoAction(TRaftLog log_entry, RaftLogAction log_action) -> void; + + auto Serialize(ptr<buffer> &data) -> void; + + static auto Deserialize(buffer &data) -> CoordinatorClusterState; + + auto GetInstances() const -> std::vector<InstanceState>; + + auto GetUUID() const -> utils::UUID; + + private: + std::map<std::string, InstanceState, std::less<>> instances_{}; + utils::UUID uuid_{}; + mutable utils::ResourceLock log_lock_{}; +}; + +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/include/nuraft/coordinator_state_machine.hpp b/src/coordination/include/nuraft/coordinator_state_machine.hpp index 5b5f37b48..836ac17a6 100644 --- a/src/coordination/include/nuraft/coordinator_state_machine.hpp +++ b/src/coordination/include/nuraft/coordinator_state_machine.hpp @@ -13,9 +13,15 @@ #ifdef MG_ENTERPRISE +#include "coordination/coordinator_config.hpp" +#include "nuraft/coordinator_cluster_state.hpp" +#include "nuraft/raft_log_action.hpp" + #include <spdlog/spdlog.h> #include <libnuraft/nuraft.hxx> +#include <variant> + namespace memgraph::coordination { using nuraft::async_result; @@ -36,9 +42,19 @@ class CoordinatorStateMachine : public state_machine { CoordinatorStateMachine &operator=(CoordinatorStateMachine &&) = delete; ~CoordinatorStateMachine() override {} - static auto EncodeRegisterReplicationInstance(const std::string &name) -> ptr<buffer>; + auto FindCurrentMainInstanceName() const -> std::optional<std::string>; + auto MainExists() const -> bool; + auto IsMain(std::string_view instance_name) const -> bool; + auto IsReplica(std::string_view instance_name) const -> bool; - static auto DecodeRegisterReplicationInstance(buffer &data) -> std::string; + static auto CreateLog(nlohmann::json &&log) -> ptr<buffer>; + static auto SerializeRegisterInstance(CoordinatorClientConfig const &config) -> ptr<buffer>; + static auto SerializeUnregisterInstance(std::string_view instance_name) -> ptr<buffer>; + static auto SerializeSetInstanceAsMain(std::string_view instance_name) -> ptr<buffer>; + static auto SerializeSetInstanceAsReplica(std::string_view instance_name) -> ptr<buffer>; + static auto SerializeUpdateUUID(utils::UUID const &uuid) -> ptr<buffer>; + + static auto DecodeLog(buffer &data) -> std::pair<TRaftLog, RaftLogAction>; auto pre_commit(ulong log_idx, buffer &data) -> ptr<buffer> override; @@ -64,11 +80,27 @@ class CoordinatorStateMachine : public state_machine { auto create_snapshot(snapshot &s, async_result<bool>::handler_type &when_done) -> void override; + auto GetInstances() const -> std::vector<InstanceState>; + auto GetUUID() const -> utils::UUID; + private: + struct SnapshotCtx { + SnapshotCtx(ptr<snapshot> &snapshot, CoordinatorClusterState const &cluster_state) + : snapshot_(snapshot), cluster_state_(cluster_state) {} + + ptr<snapshot> snapshot_; + CoordinatorClusterState cluster_state_; + }; + + auto create_snapshot_internal(ptr<snapshot> snapshot) -> void; + + CoordinatorClusterState cluster_state_; std::atomic<uint64_t> last_committed_idx_{0}; - ptr<snapshot> last_snapshot_; + std::map<uint64_t, ptr<SnapshotCtx>> snapshots_; + std::mutex snapshots_lock_; + ptr<snapshot> last_snapshot_; std::mutex last_snapshot_lock_; }; diff --git a/src/coordination/include/nuraft/raft_log_action.hpp b/src/coordination/include/nuraft/raft_log_action.hpp new file mode 100644 index 000000000..3f1b26dfa --- /dev/null +++ b/src/coordination/include/nuraft/raft_log_action.hpp @@ -0,0 +1,42 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#ifdef MG_ENTERPRISE + +#include "coordination/coordinator_exceptions.hpp" + +#include <cstdint> +#include <string> + +#include "json/json.hpp" + +namespace memgraph::coordination { + +enum class RaftLogAction : uint8_t { + REGISTER_REPLICATION_INSTANCE, + UNREGISTER_REPLICATION_INSTANCE, + SET_INSTANCE_AS_MAIN, + SET_INSTANCE_AS_REPLICA, + UPDATE_UUID +}; + +NLOHMANN_JSON_SERIALIZE_ENUM(RaftLogAction, { + {RaftLogAction::REGISTER_REPLICATION_INSTANCE, "register"}, + {RaftLogAction::UNREGISTER_REPLICATION_INSTANCE, "unregister"}, + {RaftLogAction::SET_INSTANCE_AS_MAIN, "promote"}, + {RaftLogAction::SET_INSTANCE_AS_REPLICA, "demote"}, + {RaftLogAction::UPDATE_UUID, "update_uuid"}, + }) + +} // namespace memgraph::coordination +#endif diff --git a/src/coordination/raft_state.cpp b/src/coordination/raft_state.cpp index d171a6b3d..fd93160b6 100644 --- a/src/coordination/raft_state.cpp +++ b/src/coordination/raft_state.cpp @@ -10,12 +10,12 @@ // licenses/APL.txt. #ifdef MG_ENTERPRISE +#include <chrono> -#include "coordination/raft_state.hpp" - +#include <spdlog/spdlog.h> +#include "coordination/coordinator_config.hpp" #include "coordination/coordinator_exceptions.hpp" -#include "nuraft/coordinator_state_machine.hpp" -#include "nuraft/coordinator_state_manager.hpp" +#include "coordination/raft_state.hpp" #include "utils/counter.hpp" namespace memgraph::coordination { @@ -33,31 +33,35 @@ using raft_result = cmd_result<ptr<buffer>>; RaftState::RaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb, uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) - : raft_server_id_(raft_server_id), - raft_port_(raft_port), - raft_address_(std::move(raft_address)), + : raft_endpoint_(raft_address, raft_port), + raft_server_id_(raft_server_id), state_machine_(cs_new<CoordinatorStateMachine>()), - state_manager_( - cs_new<CoordinatorStateManager>(raft_server_id_, raft_address_ + ":" + std::to_string(raft_port_))), + state_manager_(cs_new<CoordinatorStateManager>(raft_server_id_, raft_endpoint_.SocketAddress())), logger_(nullptr), become_leader_cb_(std::move(become_leader_cb)), become_follower_cb_(std::move(become_follower_cb)) {} auto RaftState::InitRaftServer() -> void { asio_service::options asio_opts; - asio_opts.thread_pool_size_ = 1; // TODO: (andi) Improve this + asio_opts.thread_pool_size_ = 1; raft_params params; params.heart_beat_interval_ = 100; params.election_timeout_lower_bound_ = 200; params.election_timeout_upper_bound_ = 400; - // 5 logs are preserved before the last snapshot params.reserved_log_items_ = 5; - // Create snapshot for every 5 log appends params.snapshot_distance_ = 5; params.client_req_timeout_ = 3000; params.return_method_ = raft_params::blocking; + // If the leader doesn't receive any response from quorum nodes + // in 200ms, it will step down. + // This allows us to achieve strong consistency even if network partition + // happens between the current leader and followers. + // The value must be <= election_timeout_lower_bound_ so that cluster can never + // have multiple leaders. + params.leadership_expiry_ = 200; + raft_server::init_options init_opts; init_opts.raft_callback_ = [this](cb_func::Type event_type, cb_func::Param *param) -> nuraft::CbReturnCode { if (event_type == cb_func::BecomeLeader) { @@ -72,11 +76,11 @@ auto RaftState::InitRaftServer() -> void { raft_launcher launcher; - raft_server_ = launcher.init(state_machine_, state_manager_, logger_, static_cast<int>(raft_port_), asio_opts, params, - init_opts); + raft_server_ = + launcher.init(state_machine_, state_manager_, logger_, raft_endpoint_.port, asio_opts, params, init_opts); if (!raft_server_) { - throw RaftServerStartException("Failed to launch raft server on {}:{}", raft_address_, raft_port_); + throw RaftServerStartException("Failed to launch raft server on {}", raft_endpoint_.SocketAddress()); } auto maybe_stop = utils::ResettableCounter<20>(); @@ -87,38 +91,61 @@ auto RaftState::InitRaftServer() -> void { std::this_thread::sleep_for(std::chrono::milliseconds(250)); } while (!maybe_stop()); - throw RaftServerStartException("Failed to initialize raft server on {}:{}", raft_address_, raft_port_); + throw RaftServerStartException("Failed to initialize raft server on {}", raft_endpoint_.SocketAddress()); } -auto RaftState::MakeRaftState(BecomeLeaderCb become_leader_cb, BecomeFollowerCb become_follower_cb) -> RaftState { - uint32_t raft_server_id{0}; - uint32_t raft_port{0}; - try { - raft_server_id = FLAGS_raft_server_id; - raft_port = FLAGS_raft_server_port; - } catch (std::exception const &e) { - throw RaftCouldNotParseFlagsException("Failed to parse flags: {}", e.what()); - } +auto RaftState::MakeRaftState(BecomeLeaderCb &&become_leader_cb, BecomeFollowerCb &&become_follower_cb) -> RaftState { + uint32_t raft_server_id = FLAGS_raft_server_id; + uint32_t raft_port = FLAGS_raft_server_port; auto raft_state = RaftState(std::move(become_leader_cb), std::move(become_follower_cb), raft_server_id, raft_port, "127.0.0.1"); + raft_state.InitRaftServer(); return raft_state; } RaftState::~RaftState() { launcher_.shutdown(); } -auto RaftState::InstanceName() const -> std::string { return "coordinator_" + std::to_string(raft_server_id_); } +auto RaftState::InstanceName() const -> std::string { + return fmt::format("coordinator_{}", std::to_string(raft_server_id_)); +} -auto RaftState::RaftSocketAddress() const -> std::string { return raft_address_ + ":" + std::to_string(raft_port_); } +auto RaftState::RaftSocketAddress() const -> std::string { return raft_endpoint_.SocketAddress(); } -auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void { - auto const endpoint = raft_address + ":" + std::to_string(raft_port); +auto RaftState::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) + -> void { + auto const endpoint = fmt::format("{}:{}", raft_address, raft_port); srv_config const srv_config_to_add(static_cast<int>(raft_server_id), endpoint); - if (!raft_server_->add_srv(srv_config_to_add)->get_accepted()) { - throw RaftAddServerException("Failed to add server {} to the cluster", endpoint); + + auto cmd_result = raft_server_->add_srv(srv_config_to_add); + + if (cmd_result->get_result_code() == nuraft::cmd_result_code::OK) { + spdlog::info("Request to add server {} to the cluster accepted", endpoint); + } else { + throw RaftAddServerException("Failed to accept request to add server {} to the cluster with error code {}", + endpoint, int(cmd_result->get_result_code())); + } + + // Waiting for server to join + constexpr int max_tries{10}; + auto maybe_stop = utils::ResettableCounter<max_tries>(); + constexpr int waiting_period{200}; + bool added{false}; + while (!maybe_stop()) { + std::this_thread::sleep_for(std::chrono::milliseconds(waiting_period)); + const auto server_config = raft_server_->get_srv_config(static_cast<nuraft::int32>(raft_server_id)); + if (server_config) { + spdlog::trace("Server with id {} added to cluster", raft_server_id); + added = true; + break; + } + } + + if (!added) { + throw RaftAddServerException("Failed to add server {} to the cluster in {}ms", endpoint, + max_tries * waiting_period); } - spdlog::info("Request to add server {} to the cluster accepted", endpoint); } auto RaftState::GetAllCoordinators() const -> std::vector<ptr<srv_config>> { @@ -131,10 +158,124 @@ auto RaftState::IsLeader() const -> bool { return raft_server_->is_leader(); } auto RaftState::RequestLeadership() -> bool { return raft_server_->is_leader() || raft_server_->request_leadership(); } -auto RaftState::AppendRegisterReplicationInstance(std::string const &instance) -> ptr<raft_result> { - auto new_log = CoordinatorStateMachine::EncodeRegisterReplicationInstance(instance); - return raft_server_->append_entries({new_log}); +auto RaftState::AppendRegisterReplicationInstanceLog(CoordinatorClientConfig const &config) -> bool { + auto new_log = CoordinatorStateMachine::SerializeRegisterInstance(config); + auto const res = raft_server_->append_entries({new_log}); + + if (!res->get_accepted()) { + spdlog::error( + "Failed to accept request for registering instance {}. Most likely the reason is that the instance is not " + "the " + "leader.", + config.instance_name); + return false; + } + + spdlog::info("Request for registering instance {} accepted", config.instance_name); + + if (res->get_result_code() != nuraft::cmd_result_code::OK) { + spdlog::error("Failed to register instance {} with error code {}", config.instance_name, + int(res->get_result_code())); + return false; + } + + return true; } +auto RaftState::AppendUnregisterReplicationInstanceLog(std::string_view instance_name) -> bool { + auto new_log = CoordinatorStateMachine::SerializeUnregisterInstance(instance_name); + auto const res = raft_server_->append_entries({new_log}); + if (!res->get_accepted()) { + spdlog::error( + "Failed to accept request for unregistering instance {}. Most likely the reason is that the instance is not " + "the leader.", + instance_name); + return false; + } + + spdlog::info("Request for unregistering instance {} accepted", instance_name); + + if (res->get_result_code() != nuraft::cmd_result_code::OK) { + spdlog::error("Failed to unregister instance {} with error code {}", instance_name, int(res->get_result_code())); + return false; + } + return true; +} + +auto RaftState::AppendSetInstanceAsMainLog(std::string_view instance_name) -> bool { + auto new_log = CoordinatorStateMachine::SerializeSetInstanceAsMain(instance_name); + auto const res = raft_server_->append_entries({new_log}); + if (!res->get_accepted()) { + spdlog::error( + "Failed to accept request for promoting instance {}. Most likely the reason is that the instance is not " + "the leader.", + instance_name); + return false; + } + + spdlog::info("Request for promoting instance {} accepted", instance_name); + + if (res->get_result_code() != nuraft::cmd_result_code::OK) { + spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code())); + return false; + } + return true; +} + +auto RaftState::AppendSetInstanceAsReplicaLog(std::string_view instance_name) -> bool { + auto new_log = CoordinatorStateMachine::SerializeSetInstanceAsReplica(instance_name); + auto const res = raft_server_->append_entries({new_log}); + if (!res->get_accepted()) { + spdlog::error( + "Failed to accept request for demoting instance {}. Most likely the reason is that the instance is not " + "the leader.", + instance_name); + return false; + } + spdlog::info("Request for demoting instance {} accepted", instance_name); + + if (res->get_result_code() != nuraft::cmd_result_code::OK) { + spdlog::error("Failed to promote instance {} with error code {}", instance_name, int(res->get_result_code())); + return false; + } + + return true; +} + +auto RaftState::AppendUpdateUUIDLog(utils::UUID const &uuid) -> bool { + auto new_log = CoordinatorStateMachine::SerializeUpdateUUID(uuid); + auto const res = raft_server_->append_entries({new_log}); + if (!res->get_accepted()) { + spdlog::error( + "Failed to accept request for updating UUID. Most likely the reason is that the instance is not " + "the leader."); + return false; + } + spdlog::info("Request for updating UUID accepted"); + + if (res->get_result_code() != nuraft::cmd_result_code::OK) { + spdlog::error("Failed to update UUID with error code {}", int(res->get_result_code())); + return false; + } + + return true; +} + +auto RaftState::FindCurrentMainInstanceName() const -> std::optional<std::string> { + return state_machine_->FindCurrentMainInstanceName(); +} + +auto RaftState::MainExists() const -> bool { return state_machine_->MainExists(); } + +auto RaftState::IsMain(std::string_view instance_name) const -> bool { return state_machine_->IsMain(instance_name); } + +auto RaftState::IsReplica(std::string_view instance_name) const -> bool { + return state_machine_->IsReplica(instance_name); +} + +auto RaftState::GetInstances() const -> std::vector<InstanceState> { return state_machine_->GetInstances(); } + +auto RaftState::GetUUID() const -> utils::UUID { return state_machine_->GetUUID(); } + } // namespace memgraph::coordination #endif diff --git a/src/coordination/replication_instance.cpp b/src/coordination/replication_instance.cpp index 0d16db648..ca7572ea7 100644 --- a/src/coordination/replication_instance.cpp +++ b/src/coordination/replication_instance.cpp @@ -13,21 +13,20 @@ #include "coordination/replication_instance.hpp" +#include <utility> + #include "replication_coordination_glue/handler.hpp" #include "utils/result.hpp" namespace memgraph::coordination { ReplicationInstance::ReplicationInstance(CoordinatorInstance *peer, CoordinatorClientConfig config, - HealthCheckCallback succ_cb, HealthCheckCallback fail_cb) + HealthCheckClientCallback succ_cb, HealthCheckClientCallback fail_cb, + HealthCheckInstanceCallback succ_instance_cb, + HealthCheckInstanceCallback fail_instance_cb) : client_(peer, std::move(config), std::move(succ_cb), std::move(fail_cb)), - replication_role_(replication_coordination_glue::ReplicationRole::REPLICA) { - if (!client_.DemoteToReplica()) { - throw CoordinatorRegisterInstanceException("Failed to demote instance {} to replica", client_.InstanceName()); - } - - client_.StartFrequentCheck(); -} + succ_cb_(succ_instance_cb), + fail_cb_(fail_instance_cb) {} auto ReplicationInstance::OnSuccessPing() -> void { last_response_time_ = std::chrono::system_clock::now(); @@ -46,37 +45,34 @@ auto ReplicationInstance::IsReadyForUUIDPing() -> bool { } auto ReplicationInstance::InstanceName() const -> std::string { return client_.InstanceName(); } -auto ReplicationInstance::SocketAddress() const -> std::string { return client_.SocketAddress(); } +auto ReplicationInstance::CoordinatorSocketAddress() const -> std::string { return client_.CoordinatorSocketAddress(); } +auto ReplicationInstance::ReplicationSocketAddress() const -> std::string { return client_.ReplicationSocketAddress(); } auto ReplicationInstance::IsAlive() const -> bool { return is_alive_; } -auto ReplicationInstance::IsReplica() const -> bool { - return replication_role_ == replication_coordination_glue::ReplicationRole::REPLICA; -} -auto ReplicationInstance::IsMain() const -> bool { - return replication_role_ == replication_coordination_glue::ReplicationRole::MAIN; -} - -auto ReplicationInstance::PromoteToMain(utils::UUID new_uuid, ReplicationClientsInfo repl_clients_info, - HealthCheckCallback main_succ_cb, HealthCheckCallback main_fail_cb) -> bool { +auto ReplicationInstance::PromoteToMain(utils::UUID const &new_uuid, ReplicationClientsInfo repl_clients_info, + HealthCheckInstanceCallback main_succ_cb, + HealthCheckInstanceCallback main_fail_cb) -> bool { if (!client_.SendPromoteReplicaToMainRpc(new_uuid, std::move(repl_clients_info))) { return false; } - replication_role_ = replication_coordination_glue::ReplicationRole::MAIN; main_uuid_ = new_uuid; - client_.SetCallbacks(std::move(main_succ_cb), std::move(main_fail_cb)); + succ_cb_ = main_succ_cb; + fail_cb_ = main_fail_cb; return true; } -auto ReplicationInstance::DemoteToReplica(HealthCheckCallback replica_succ_cb, HealthCheckCallback replica_fail_cb) - -> bool { +auto ReplicationInstance::SendDemoteToReplicaRpc() -> bool { return client_.DemoteToReplica(); } + +auto ReplicationInstance::DemoteToReplica(HealthCheckInstanceCallback replica_succ_cb, + HealthCheckInstanceCallback replica_fail_cb) -> bool { if (!client_.DemoteToReplica()) { return false; } - replication_role_ = replication_coordination_glue::ReplicationRole::REPLICA; - client_.SetCallbacks(std::move(replica_succ_cb), std::move(replica_fail_cb)); + succ_cb_ = replica_succ_cb; + fail_cb_ = replica_fail_cb; return true; } @@ -90,10 +86,12 @@ auto ReplicationInstance::ReplicationClientInfo() const -> CoordinatorClientConf return client_.ReplicationClientInfo(); } +auto ReplicationInstance::GetSuccessCallback() -> HealthCheckInstanceCallback & { return succ_cb_; } +auto ReplicationInstance::GetFailCallback() -> HealthCheckInstanceCallback & { return fail_cb_; } + auto ReplicationInstance::GetClient() -> CoordinatorClient & { return client_; } auto ReplicationInstance::SetNewMainUUID(utils::UUID const &main_uuid) -> void { main_uuid_ = main_uuid; } -auto ReplicationInstance::ResetMainUUID() -> void { main_uuid_ = std::nullopt; } auto ReplicationInstance::GetMainUUID() const -> std::optional<utils::UUID> const & { return main_uuid_; } auto ReplicationInstance::EnsureReplicaHasCorrectMainUUID(utils::UUID const &curr_main_uuid) -> bool { @@ -106,6 +104,7 @@ auto ReplicationInstance::EnsureReplicaHasCorrectMainUUID(utils::UUID const &cur } UpdateReplicaLastResponseUUID(); + // NOLINTNEXTLINE if (res.GetValue().has_value() && res.GetValue().value() == curr_main_uuid) { return true; } @@ -113,7 +112,7 @@ auto ReplicationInstance::EnsureReplicaHasCorrectMainUUID(utils::UUID const &cur return SendSwapAndUpdateUUID(curr_main_uuid); } -auto ReplicationInstance::SendSwapAndUpdateUUID(const utils::UUID &new_main_uuid) -> bool { +auto ReplicationInstance::SendSwapAndUpdateUUID(utils::UUID const &new_main_uuid) -> bool { if (!replication_coordination_glue::SendSwapMainUUIDRpc(client_.RpcClient(), new_main_uuid)) { return false; } @@ -121,7 +120,7 @@ auto ReplicationInstance::SendSwapAndUpdateUUID(const utils::UUID &new_main_uuid return true; } -auto ReplicationInstance::SendUnregisterReplicaRpc(std::string const &instance_name) -> bool { +auto ReplicationInstance::SendUnregisterReplicaRpc(std::string_view instance_name) -> bool { return client_.SendUnregisterReplicaRpc(instance_name); } diff --git a/src/csv/include/csv/parsing.hpp b/src/csv/include/csv/parsing.hpp index 66f2913c8..0accc616d 100644 --- a/src/csv/include/csv/parsing.hpp +++ b/src/csv/include/csv/parsing.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -119,6 +119,8 @@ class Reader { auto GetHeader() const -> Header const &; auto GetNextRow(utils::MemoryResource *mem) -> std::optional<Row>; + void Reset(); + private: // Some implementation issues that need clearing up, but this is mainly because // I don't want `boost/iostreams/filtering_stream.hpp` included in this header file diff --git a/src/csv/parsing.cpp b/src/csv/parsing.cpp index 6d03dc7fd..6961a42e4 100644 --- a/src/csv/parsing.cpp +++ b/src/csv/parsing.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -34,6 +34,10 @@ struct Reader::impl { [[nodiscard]] bool HasHeader() const { return read_config_.with_header; } [[nodiscard]] auto Header() const -> Header const & { return header_; } + void Reset() { + line_buffer_.clear(); + line_buffer_.shrink_to_fit(); + } auto GetNextRow(utils::MemoryResource *mem) -> std::optional<Reader::Row>; @@ -42,7 +46,7 @@ struct Reader::impl { void TryInitializeHeader(); - std::optional<utils::pmr::string> GetNextLine(utils::MemoryResource *mem); + bool GetNextLine(); ParsingResult ParseHeader(); @@ -55,6 +59,8 @@ struct Reader::impl { Config read_config_; uint64_t line_count_{1}; uint16_t number_of_columns_{0}; + uint64_t estimated_number_of_columns_{0}; + utils::pmr::string line_buffer_{memory_}; Reader::Header header_{memory_}; }; @@ -129,17 +135,16 @@ void Reader::impl::InitializeStream() { MG_ASSERT(csv_stream_.is_complete(), "Should be 'complete' for correct operation"); } -std::optional<utils::pmr::string> Reader::impl::GetNextLine(utils::MemoryResource *mem) { - utils::pmr::string line(mem); - if (!std::getline(csv_stream_, line)) { +bool Reader::impl::GetNextLine() { + if (!std::getline(csv_stream_, line_buffer_)) { // reached end of file or an I/0 error occurred if (!csv_stream_.good()) { csv_stream_.reset(); // this will close the file_stream_ and clear the chain } - return std::nullopt; + return false; } ++line_count_; - return std::move(line); + return true; } Reader::ParsingResult Reader::impl::ParseHeader() { @@ -170,6 +175,8 @@ void Reader::impl::TryInitializeHeader() { const Reader::Header &Reader::GetHeader() const { return pimpl->Header(); } +void Reader::Reset() { pimpl->Reset(); } + namespace { enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE }; @@ -179,6 +186,8 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { utils::pmr::vector<utils::pmr::string> row(mem); if (number_of_columns_ != 0) { row.reserve(number_of_columns_); + } else if (estimated_number_of_columns_ != 0) { + row.reserve(estimated_number_of_columns_); } utils::pmr::string column(memory_); @@ -186,13 +195,12 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { auto state = CsvParserState::INITIAL_FIELD; do { - const auto maybe_line = GetNextLine(mem); - if (!maybe_line) { + if (!GetNextLine()) { // The whole file was processed. break; } - std::string_view line_string_view = *maybe_line; + std::string_view line_string_view = line_buffer_; // remove '\r' from the end in case we have dos file format if (line_string_view.back() == '\r') { @@ -312,6 +320,11 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { fmt::format("Expected {:d} columns in row {:d}, but got {:d}", number_of_columns_, line_count_ - 1, row.size())); } + // To avoid unessisary dynamic growth of the row, remember the number of + // columns for future calls + if (number_of_columns_ == 0 && estimated_number_of_columns_ == 0) { + estimated_number_of_columns_ = row.size(); + } return std::move(row); } @@ -319,7 +332,7 @@ Reader::ParsingResult Reader::impl::ParseRow(utils::MemoryResource *mem) { std::optional<Reader::Row> Reader::impl::GetNextRow(utils::MemoryResource *mem) { auto row = ParseRow(mem); - if (row.HasError()) { + if (row.HasError()) [[unlikely]] { if (!read_config_.ignore_bad) { throw CsvReadException("CSV Reader: Bad row at line {:d}: {}", line_count_ - 1, row.GetError().message); } @@ -333,7 +346,7 @@ std::optional<Reader::Row> Reader::impl::GetNextRow(utils::MemoryResource *mem) } while (row.HasError()); } - if (row->empty()) { + if (row->empty()) [[unlikely]] { // reached end of file return std::nullopt; } diff --git a/src/dbms/coordinator_handler.cpp b/src/dbms/coordinator_handler.cpp index f8e14e2a0..292d50d3d 100644 --- a/src/dbms/coordinator_handler.cpp +++ b/src/dbms/coordinator_handler.cpp @@ -20,28 +20,28 @@ namespace memgraph::dbms { CoordinatorHandler::CoordinatorHandler(coordination::CoordinatorState &coordinator_state) : coordinator_state_(coordinator_state) {} -auto CoordinatorHandler::RegisterReplicationInstance(memgraph::coordination::CoordinatorClientConfig config) +auto CoordinatorHandler::RegisterReplicationInstance(coordination::CoordinatorClientConfig const &config) -> coordination::RegisterInstanceCoordinatorStatus { return coordinator_state_.RegisterReplicationInstance(config); } -auto CoordinatorHandler::UnregisterReplicationInstance(std::string instance_name) +auto CoordinatorHandler::UnregisterReplicationInstance(std::string_view instance_name) -> coordination::UnregisterInstanceCoordinatorStatus { - return coordinator_state_.UnregisterReplicationInstance(std::move(instance_name)); + return coordinator_state_.UnregisterReplicationInstance(instance_name); } -auto CoordinatorHandler::SetReplicationInstanceToMain(std::string instance_name) +auto CoordinatorHandler::SetReplicationInstanceToMain(std::string_view instance_name) -> coordination::SetInstanceToMainCoordinatorStatus { - return coordinator_state_.SetReplicationInstanceToMain(std::move(instance_name)); + return coordinator_state_.SetReplicationInstanceToMain(instance_name); } auto CoordinatorHandler::ShowInstances() const -> std::vector<coordination::InstanceStatus> { return coordinator_state_.ShowInstances(); } -auto CoordinatorHandler::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) - -> void { - coordinator_state_.AddCoordinatorInstance(raft_server_id, raft_port, std::move(raft_address)); +auto CoordinatorHandler::AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, + std::string_view raft_address) -> void { + coordinator_state_.AddCoordinatorInstance(raft_server_id, raft_port, raft_address); } } // namespace memgraph::dbms diff --git a/src/dbms/coordinator_handler.hpp b/src/dbms/coordinator_handler.hpp index d06e70676..1c456134d 100644 --- a/src/dbms/coordinator_handler.hpp +++ b/src/dbms/coordinator_handler.hpp @@ -30,16 +30,17 @@ class CoordinatorHandler { // TODO: (andi) When moving coordinator state on same instances, rename from RegisterReplicationInstance to // RegisterInstance - auto RegisterReplicationInstance(coordination::CoordinatorClientConfig config) + auto RegisterReplicationInstance(coordination::CoordinatorClientConfig const &config) -> coordination::RegisterInstanceCoordinatorStatus; - auto UnregisterReplicationInstance(std::string instance_name) -> coordination::UnregisterInstanceCoordinatorStatus; + auto UnregisterReplicationInstance(std::string_view instance_name) + -> coordination::UnregisterInstanceCoordinatorStatus; - auto SetReplicationInstanceToMain(std::string instance_name) -> coordination::SetInstanceToMainCoordinatorStatus; + auto SetReplicationInstanceToMain(std::string_view instance_name) -> coordination::SetInstanceToMainCoordinatorStatus; auto ShowInstances() const -> std::vector<coordination::InstanceStatus>; - auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string raft_address) -> void; + auto AddCoordinatorInstance(uint32_t raft_server_id, uint32_t raft_port, std::string_view raft_address) -> void; private: coordination::CoordinatorState &coordinator_state_; diff --git a/src/dbms/database_handler.hpp b/src/dbms/database_handler.hpp index de5f813ba..cae54088e 100644 --- a/src/dbms/database_handler.hpp +++ b/src/dbms/database_handler.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/dbms/dbms_handler.cpp b/src/dbms/dbms_handler.cpp index 1c38106db..16927d7e2 100644 --- a/src/dbms/dbms_handler.cpp +++ b/src/dbms/dbms_handler.cpp @@ -185,6 +185,16 @@ DbmsHandler::DbmsHandler(storage::Config config, replication::ReplicationState & auto directories = std::set{std::string{kDefaultDB}}; // Recover previous databases + if (flags::AreExperimentsEnabled(flags::Experiments::SYSTEM_REPLICATION) && !recovery_on_startup) { + // This will result in dropping databases on SystemRecoveryHandler + // for MT case, and for single DB case we might not even set replication as commit timestamp is checked + spdlog::warn( + "Data recovery on startup not set, this will result in dropping database in case of multi-tenancy enabled."); + } + + // TODO: Problem is if user doesn't set this up "database" name won't be recovered + // but if storage-recover-on-startup is true storage will be recovered which is an issue + spdlog::info("Data recovery on startup set to {}", recovery_on_startup); if (recovery_on_startup) { auto it = durability_->begin(std::string(kDBPrefix)); auto end = durability_->end(std::string(kDBPrefix)); @@ -410,9 +420,10 @@ void DbmsHandler::UpdateDurability(const storage::Config &config, std::optional< if (!durability_) return; // Save database in a list of active databases const auto &key = Durability::GenKey(config.salient.name); - if (rel_dir == std::nullopt) + if (rel_dir == std::nullopt) { rel_dir = std::filesystem::relative(config.durability.storage_directory, default_config_.durability.storage_directory); + } const auto &val = Durability::GenVal(config.salient.uuid, *rel_dir); durability_->Put(key, val); } diff --git a/src/dbms/dbms_handler.hpp b/src/dbms/dbms_handler.hpp index 7b1d45335..b0bbd5758 100644 --- a/src/dbms/dbms_handler.hpp +++ b/src/dbms/dbms_handler.hpp @@ -155,6 +155,8 @@ class DbmsHandler { spdlog::debug("Trying to create db '{}' on replica which already exists.", config.name); auto db = Get_(config.name); + spdlog::debug("Aligning database with name {} which has UUID {}, where config UUID is {}", config.name, + std::string(db->uuid()), std::string(config.uuid)); if (db->uuid() == config.uuid) { // Same db return db; } @@ -163,18 +165,22 @@ class DbmsHandler { // TODO: Fix this hack if (config.name == kDefaultDB) { + spdlog::debug("Last commit timestamp for DB {} is {}", kDefaultDB, + db->storage()->repl_storage_state_.last_commit_timestamp_); + // This seems correct, if database made progress if (db->storage()->repl_storage_state_.last_commit_timestamp_ != storage::kTimestampInitialId) { spdlog::debug("Default storage is not clean, cannot update UUID..."); return NewError::GENERIC; // Update error } - spdlog::debug("Update default db's UUID"); + spdlog::debug("Updated default db's UUID"); // Default db cannot be deleted and remade, have to just update the UUID db->storage()->config_.salient.uuid = config.uuid; UpdateDurability(db->storage()->config_, "."); return db; } - spdlog::debug("Drop database and recreate with the correct UUID"); + spdlog::debug("Dropping database {} with UUID: {} and recreating with the correct UUID: {}", config.name, + std::string(db->uuid()), std::string(config.uuid)); // Defer drop (void)Delete_(db->name()); // Second attempt @@ -266,10 +272,6 @@ class DbmsHandler { bool IsMain() const { return repl_state_.IsMain(); } bool IsReplica() const { return repl_state_.IsReplica(); } -#ifdef MG_ENTERPRISE - // coordination::CoordinatorState &CoordinatorState() { return coordinator_state_; } -#endif - /** * @brief Return all active databases. * diff --git a/src/dbms/inmemory/replication_handlers.cpp b/src/dbms/inmemory/replication_handlers.cpp index 3fc174d3c..69f04914c 100644 --- a/src/dbms/inmemory/replication_handlers.cpp +++ b/src/dbms/inmemory/replication_handlers.cpp @@ -19,7 +19,6 @@ #include "storage/v2/durability/durability.hpp" #include "storage/v2/durability/snapshot.hpp" #include "storage/v2/durability/version.hpp" -#include "storage/v2/fmt.hpp" #include "storage/v2/indices/label_index_stats.hpp" #include "storage/v2/inmemory/storage.hpp" #include "storage/v2/inmemory/unique_constraints.hpp" @@ -119,9 +118,14 @@ void InMemoryReplicationHandlers::Register(dbms::DbmsHandler *dbms_handler, repl }); server.rpc_server_.Register<replication_coordination_glue::SwapMainUUIDRpc>( [&data, dbms_handler](auto *req_reader, auto *res_builder) { - spdlog::debug("Received SwapMainUUIDHandler"); + spdlog::debug("Received SwapMainUUIDRpc"); InMemoryReplicationHandlers::SwapMainUUIDHandler(dbms_handler, data, req_reader, res_builder); }); + server.rpc_server_.Register<storage::replication::ForceResetStorageRpc>( + [&data, dbms_handler](auto *req_reader, auto *res_builder) { + spdlog::debug("Received ForceResetStorageRpc"); + InMemoryReplicationHandlers::ForceResetStorageHandler(dbms_handler, data.uuid_, req_reader, res_builder); + }); } void InMemoryReplicationHandlers::SwapMainUUIDHandler(dbms::DbmsHandler *dbms_handler, @@ -135,7 +139,7 @@ void InMemoryReplicationHandlers::SwapMainUUIDHandler(dbms::DbmsHandler *dbms_ha replication_coordination_glue::SwapMainUUIDReq req; slk::Load(&req, req_reader); - spdlog::info(fmt::format("Set replica data UUID to main uuid {}", std::string(req.uuid))); + spdlog::info("Set replica data UUID to main uuid {}", std::string(req.uuid)); dbms_handler->ReplicationState().TryPersistRoleReplica(role_replica_data.config, req.uuid); role_replica_data.uuid_ = req.uuid; @@ -330,6 +334,78 @@ void InMemoryReplicationHandlers::SnapshotHandler(dbms::DbmsHandler *dbms_handle spdlog::debug("Replication recovery from snapshot finished!"); } +void InMemoryReplicationHandlers::ForceResetStorageHandler(dbms::DbmsHandler *dbms_handler, + const std::optional<utils::UUID> ¤t_main_uuid, + slk::Reader *req_reader, slk::Builder *res_builder) { + storage::replication::ForceResetStorageReq req; + slk::Load(&req, req_reader); + auto db_acc = GetDatabaseAccessor(dbms_handler, req.db_uuid); + if (!db_acc) { + storage::replication::ForceResetStorageRes res{false, 0}; + slk::Save(res, res_builder); + return; + } + if (!current_main_uuid.has_value() || req.main_uuid != current_main_uuid) [[unlikely]] { + LogWrongMain(current_main_uuid, req.main_uuid, storage::replication::SnapshotReq::kType.name); + storage::replication::ForceResetStorageRes res{false, 0}; + slk::Save(res, res_builder); + return; + } + + storage::replication::Decoder decoder(req_reader); + + auto *storage = static_cast<storage::InMemoryStorage *>(db_acc->get()->storage()); + + auto storage_guard = std::unique_lock{storage->main_lock_}; + + // Clear the database + storage->vertices_.clear(); + storage->edges_.clear(); + storage->commit_log_.reset(); + storage->commit_log_.emplace(); + + storage->constraints_.existence_constraints_ = std::make_unique<storage::ExistenceConstraints>(); + storage->constraints_.unique_constraints_ = std::make_unique<storage::InMemoryUniqueConstraints>(); + storage->indices_.label_index_ = std::make_unique<storage::InMemoryLabelIndex>(); + storage->indices_.label_property_index_ = std::make_unique<storage::InMemoryLabelPropertyIndex>(); + + // Fine since we will force push when reading from WAL just random epoch with 0 timestamp, as it should be if it + // acted as MAIN before + storage->repl_storage_state_.epoch_.SetEpoch(std::string(utils::UUID{})); + storage->repl_storage_state_.last_commit_timestamp_ = 0; + + storage->repl_storage_state_.history.clear(); + storage->vertex_id_ = 0; + storage->edge_id_ = 0; + storage->timestamp_ = storage::kTimestampInitialId; + + storage->CollectGarbage<true>(std::move(storage_guard), false); + storage->vertices_.run_gc(); + storage->edges_.run_gc(); + + storage::replication::ForceResetStorageRes res{true, storage->repl_storage_state_.last_commit_timestamp_.load()}; + slk::Save(res, res_builder); + + spdlog::trace("Deleting old snapshot files."); + // Delete other durability files + auto snapshot_files = storage::durability::GetSnapshotFiles(storage->recovery_.snapshot_directory_, storage->uuid_); + for (const auto &[path, uuid, _] : snapshot_files) { + spdlog::trace("Deleting snapshot file {}", path); + storage->file_retainer_.DeleteFile(path); + } + + spdlog::trace("Deleting old WAL files."); + auto wal_files = storage::durability::GetWalFiles(storage->recovery_.wal_directory_, storage->uuid_); + if (wal_files) { + for (const auto &wal_file : *wal_files) { + spdlog::trace("Deleting WAL file {}", wal_file.path); + storage->file_retainer_.DeleteFile(wal_file.path); + } + + storage->wal_file_.reset(); + } +} + void InMemoryReplicationHandlers::WalFilesHandler(dbms::DbmsHandler *dbms_handler, const std::optional<utils::UUID> ¤t_main_uuid, slk::Reader *req_reader, slk::Builder *res_builder) { @@ -513,7 +589,6 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage if (timestamp < storage->timestamp_) { continue; } - SPDLOG_INFO(" Delta {}", applied_deltas); switch (delta.type) { case WalDeltaData::Type::VERTEX_CREATE: { @@ -558,9 +633,10 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage break; } case WalDeltaData::Type::VERTEX_SET_PROPERTY: { - spdlog::trace(" Vertex {} set property {} to {}", delta.vertex_edge_set_property.gid.AsUint(), - delta.vertex_edge_set_property.property, delta.vertex_edge_set_property.value); + spdlog::trace(" Vertex {} set property", delta.vertex_edge_set_property.gid.AsUint()); + // NOLINTNEXTLINE auto *transaction = get_transaction(timestamp); + // NOLINTNEXTLINE auto vertex = transaction->FindVertex(delta.vertex_edge_set_property.gid, View::NEW); if (!vertex) throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); @@ -608,8 +684,7 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage break; } case WalDeltaData::Type::EDGE_SET_PROPERTY: { - spdlog::trace(" Edge {} set property {} to {}", delta.vertex_edge_set_property.gid.AsUint(), - delta.vertex_edge_set_property.property, delta.vertex_edge_set_property.value); + spdlog::trace(" Edge {} set property", delta.vertex_edge_set_property.gid.AsUint()); if (!storage->config_.salient.items.properties_on_edges) throw utils::BasicException( "Can't set properties on edges because properties on edges " @@ -764,6 +839,20 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage transaction->DeleteLabelPropertyIndexStats(storage->NameToLabel(info.label)); break; } + case WalDeltaData::Type::EDGE_INDEX_CREATE: { + spdlog::trace(" Create edge index on :{}", delta.operation_edge_type.edge_type); + auto *transaction = get_transaction(timestamp, kUniqueAccess); + if (transaction->CreateIndex(storage->NameToEdgeType(delta.operation_label.label)).HasError()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + break; + } + case WalDeltaData::Type::EDGE_INDEX_DROP: { + spdlog::trace(" Drop edge index on :{}", delta.operation_edge_type.edge_type); + auto *transaction = get_transaction(timestamp, kUniqueAccess); + if (transaction->DropIndex(storage->NameToEdgeType(delta.operation_label.label)).HasError()) + throw utils::BasicException("Invalid transaction! Please raise an issue, {}:{}", __FILE__, __LINE__); + break; + } case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: { spdlog::trace(" Create existence constraint on :{} ({})", delta.operation_label_property.label, delta.operation_label_property.property); @@ -827,5 +916,4 @@ uint64_t InMemoryReplicationHandlers::ReadAndApplyDelta(storage::InMemoryStorage spdlog::debug("Applied {} deltas", applied_deltas); return applied_deltas; } - } // namespace memgraph::dbms diff --git a/src/dbms/inmemory/replication_handlers.hpp b/src/dbms/inmemory/replication_handlers.hpp index 4406b8338..aaa2d0755 100644 --- a/src/dbms/inmemory/replication_handlers.hpp +++ b/src/dbms/inmemory/replication_handlers.hpp @@ -48,6 +48,9 @@ class InMemoryReplicationHandlers { static void SwapMainUUIDHandler(dbms::DbmsHandler *dbms_handler, replication::RoleReplicaData &role_replica_data, slk::Reader *req_reader, slk::Builder *res_builder); + static void ForceResetStorageHandler(dbms::DbmsHandler *dbms_handler, + const std::optional<utils::UUID> ¤t_main_uuid, slk::Reader *req_reader, + slk::Builder *res_builder); static void LoadWal(storage::InMemoryStorage *storage, storage::replication::Decoder *decoder); diff --git a/src/glue/SessionHL.cpp b/src/glue/SessionHL.cpp index 6c901516c..6a48f15ca 100644 --- a/src/glue/SessionHL.cpp +++ b/src/glue/SessionHL.cpp @@ -59,12 +59,14 @@ class TypedValueResultStreamBase { public: explicit TypedValueResultStreamBase(memgraph::storage::Storage *storage); - std::vector<memgraph::communication::bolt::Value> DecodeValues( - const std::vector<memgraph::query::TypedValue> &values) const; + void DecodeValues(const std::vector<memgraph::query::TypedValue> &values); + + auto AccessValues() const -> std::vector<memgraph::communication::bolt::Value> const & { return decoded_values_; } protected: // NOTE: Needed only for ToBoltValue conversions memgraph::storage::Storage *storage_; + std::vector<memgraph::communication::bolt::Value> decoded_values_; }; /// Wrapper around TEncoder which converts TypedValue to Value @@ -75,16 +77,18 @@ class TypedValueResultStream : public TypedValueResultStreamBase { TypedValueResultStream(TEncoder *encoder, memgraph::storage::Storage *storage) : TypedValueResultStreamBase{storage}, encoder_(encoder) {} - void Result(const std::vector<memgraph::query::TypedValue> &values) { encoder_->MessageRecord(DecodeValues(values)); } + void Result(const std::vector<memgraph::query::TypedValue> &values) { + DecodeValues(values); + encoder_->MessageRecord(AccessValues()); + } private: TEncoder *encoder_; }; -std::vector<memgraph::communication::bolt::Value> TypedValueResultStreamBase::DecodeValues( - const std::vector<memgraph::query::TypedValue> &values) const { - std::vector<memgraph::communication::bolt::Value> decoded_values; - decoded_values.reserve(values.size()); +void TypedValueResultStreamBase::DecodeValues(const std::vector<memgraph::query::TypedValue> &values) { + decoded_values_.reserve(values.size()); + decoded_values_.clear(); for (const auto &v : values) { auto maybe_value = memgraph::glue::ToBoltValue(v, storage_, memgraph::storage::View::NEW); if (maybe_value.HasError()) { @@ -99,9 +103,8 @@ std::vector<memgraph::communication::bolt::Value> TypedValueResultStreamBase::De throw memgraph::communication::bolt::ClientError("Unexpected storage error when streaming results."); } } - decoded_values.emplace_back(std::move(*maybe_value)); + decoded_values_.emplace_back(std::move(*maybe_value)); } - return decoded_values; } TypedValueResultStreamBase::TypedValueResultStreamBase(memgraph::storage::Storage *storage) : storage_(storage) {} diff --git a/src/glue/communication.hpp b/src/glue/communication.hpp index 737f32db2..a448b05fc 100644 --- a/src/glue/communication.hpp +++ b/src/glue/communication.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/io/network/endpoint.cpp b/src/io/network/endpoint.cpp index 44123db6b..6ed4a6753 100644 --- a/src/io/network/endpoint.cpp +++ b/src/io/network/endpoint.cpp @@ -22,113 +22,15 @@ #include "utils/message.hpp" #include "utils/string.hpp" +namespace { +constexpr std::string_view delimiter = ":"; +} // namespace + namespace memgraph::io::network { -Endpoint::IpFamily Endpoint::GetIpFamily(const std::string &address) { - in_addr addr4; - in6_addr addr6; - int ipv4_result = inet_pton(AF_INET, address.c_str(), &addr4); - int ipv6_result = inet_pton(AF_INET6, address.c_str(), &addr6); - if (ipv4_result == 1) { - return IpFamily::IP4; - } else if (ipv6_result == 1) { - return IpFamily::IP6; - } else { - return IpFamily::NONE; - } -} - -std::optional<std::pair<std::string, uint16_t>> Endpoint::ParseSocketOrIpAddress( - const std::string &address, const std::optional<uint16_t> default_port) { - /// expected address format: - /// - "ip_address:port_number" - /// - "ip_address" - /// We parse the address first. If it's an IP address, a default port must - // be given, or we return nullopt. If it's a socket address, we try to parse - // it into an ip address and a port number; even if a default port is given, - // it won't be used, as we expect that it is given in the address string. - const std::string delimiter = ":"; - std::string ip_address; - - std::vector<std::string> parts = utils::Split(address, delimiter); - if (parts.size() == 1) { - if (default_port) { - if (GetIpFamily(address) == IpFamily::NONE) { - return std::nullopt; - } - return std::pair{address, *default_port}; - } - } else if (parts.size() == 2) { - ip_address = std::move(parts[0]); - if (GetIpFamily(ip_address) == IpFamily::NONE) { - return std::nullopt; - } - int64_t int_port{0}; - try { - int_port = utils::ParseInt(parts[1]); - } catch (utils::BasicException &e) { - spdlog::error(utils::MessageWithLink("Invalid port number {}.", parts[1], "https://memgr.ph/ports")); - return std::nullopt; - } - if (int_port < 0) { - spdlog::error(utils::MessageWithLink("Invalid port number {}. The port number must be a positive integer.", - int_port, "https://memgr.ph/ports")); - return std::nullopt; - } - if (int_port > std::numeric_limits<uint16_t>::max()) { - spdlog::error(utils::MessageWithLink("Invalid port number. The port number exceedes the maximum possible size.", - "https://memgr.ph/ports")); - return std::nullopt; - } - - return std::pair{ip_address, static_cast<uint16_t>(int_port)}; - } - - return std::nullopt; -} - -std::optional<std::pair<std::string, uint16_t>> Endpoint::ParseHostname( - const std::string &address, const std::optional<uint16_t> default_port = {}) { - const std::string delimiter = ":"; - std::string ip_address; - std::vector<std::string> parts = utils::Split(address, delimiter); - if (parts.size() == 1) { - if (default_port) { - if (!IsResolvableAddress(address, *default_port)) { - return std::nullopt; - } - return std::pair{address, *default_port}; - } - } else if (parts.size() == 2) { - int64_t int_port{0}; - auto hostname = std::move(parts[0]); - try { - int_port = utils::ParseInt(parts[1]); - } catch (utils::BasicException &e) { - spdlog::error(utils::MessageWithLink("Invalid port number {}.", parts[1], "https://memgr.ph/ports")); - return std::nullopt; - } - if (int_port < 0) { - spdlog::error(utils::MessageWithLink("Invalid port number {}. The port number must be a positive integer.", - int_port, "https://memgr.ph/ports")); - return std::nullopt; - } - if (int_port > std::numeric_limits<uint16_t>::max()) { - spdlog::error(utils::MessageWithLink("Invalid port number. The port number exceedes the maximum possible size.", - "https://memgr.ph/ports")); - return std::nullopt; - } - if (IsResolvableAddress(hostname, static_cast<uint16_t>(int_port))) { - return std::pair{hostname, static_cast<u_int16_t>(int_port)}; - } - } - return std::nullopt; -} - -std::string Endpoint::SocketAddress() const { - auto ip_address = address.empty() ? "EMPTY" : address; - return ip_address + ":" + std::to_string(port); -} +// NOLINTNEXTLINE +Endpoint::Endpoint(needs_resolving_t, std::string hostname, uint16_t port) + : address(std::move(hostname)), port(port), family{GetIpFamily(address)} {} Endpoint::Endpoint(std::string ip_address, uint16_t port) : address(std::move(ip_address)), port(port) { IpFamily ip_family = GetIpFamily(address); @@ -138,9 +40,23 @@ Endpoint::Endpoint(std::string ip_address, uint16_t port) : address(std::move(ip family = ip_family; } -// NOLINTNEXTLINE -Endpoint::Endpoint(needs_resolving_t, std::string hostname, uint16_t port) - : address(std::move(hostname)), port(port), family{GetIpFamily(address)} {} +std::string Endpoint::SocketAddress() const { return fmt::format("{}:{}", address, port); } + +Endpoint::IpFamily Endpoint::GetIpFamily(std::string_view address) { + // Ensure null-terminated + auto const tmp = std::string(address); + in_addr addr4; + in6_addr addr6; + int ipv4_result = inet_pton(AF_INET, tmp.c_str(), &addr4); + int ipv6_result = inet_pton(AF_INET6, tmp.c_str(), &addr6); + if (ipv4_result == 1) { + return IpFamily::IP4; + } + if (ipv6_result == 1) { + return IpFamily::IP6; + } + return IpFamily::NONE; +} std::ostream &operator<<(std::ostream &os, const Endpoint &endpoint) { // no need to cover the IpFamily::NONE case, as you can't even construct an @@ -153,35 +69,73 @@ std::ostream &operator<<(std::ostream &os, const Endpoint &endpoint) { return os << endpoint.address << ":" << endpoint.port; } -bool Endpoint::IsResolvableAddress(const std::string &address, uint16_t port) { +// NOTE: Intentional copy to ensure null-terminated string +bool Endpoint::IsResolvableAddress(std::string_view address, uint16_t port) { addrinfo hints{ .ai_flags = AI_PASSIVE, .ai_family = AF_UNSPEC, // IPv4 and IPv6 .ai_socktype = SOCK_STREAM // TCP socket }; addrinfo *info = nullptr; - auto status = getaddrinfo(address.c_str(), std::to_string(port).c_str(), &hints, &info); + auto status = getaddrinfo(std::string(address).c_str(), std::to_string(port).c_str(), &hints, &info); if (info) freeaddrinfo(info); return status == 0; } -std::optional<std::pair<std::string, uint16_t>> Endpoint::ParseSocketOrAddress( - const std::string &address, const std::optional<uint16_t> default_port) { - const std::string delimiter = ":"; - std::vector<std::string> parts = utils::Split(address, delimiter); - if (parts.size() == 1) { - if (GetIpFamily(address) == IpFamily::NONE) { - return ParseHostname(address, default_port); - } - return ParseSocketOrIpAddress(address, default_port); +std::optional<ParsedAddress> Endpoint::ParseSocketOrAddress(std::string_view address, + std::optional<uint16_t> default_port) { + auto const parts = utils::SplitView(address, delimiter); + + if (parts.size() > 2) { + return std::nullopt; } - if (parts.size() == 2) { - if (GetIpFamily(parts[0]) == IpFamily::NONE) { - return ParseHostname(address, default_port); + + auto const port = [default_port, &parts]() -> std::optional<uint16_t> { + if (parts.size() == 2) { + return static_cast<uint16_t>(utils::ParseInt(parts[1])); } - return ParseSocketOrIpAddress(address, default_port); + return default_port; + }(); + + if (!ValidatePort(port)) { + return std::nullopt; } - return std::nullopt; + + auto const addr = [address, &parts]() { + if (parts.size() == 2) { + return parts[0]; + } + return address; + }(); + + if (GetIpFamily(addr) == IpFamily::NONE) { + if (IsResolvableAddress(addr, *port)) { // NOLINT + return std::pair{addr, *port}; // NOLINT + } + return std::nullopt; + } + + return std::pair{addr, *port}; // NOLINT +} + +auto Endpoint::ValidatePort(std::optional<uint16_t> port) -> bool { + if (!port) { + return false; + } + + if (port < 0) { + spdlog::error(utils::MessageWithLink("Invalid port number {}. The port number must be a positive integer.", *port, + "https://memgr.ph/ports")); + return false; + } + + if (port > std::numeric_limits<uint16_t>::max()) { + spdlog::error(utils::MessageWithLink("Invalid port number. The port number exceedes the maximum possible size.", + "https://memgr.ph/ports")); + return false; + } + + return true; } } // namespace memgraph::io::network diff --git a/src/io/network/endpoint.hpp b/src/io/network/endpoint.hpp index 16d70e080..f46d28ace 100644 --- a/src/io/network/endpoint.hpp +++ b/src/io/network/endpoint.hpp @@ -19,11 +19,8 @@ namespace memgraph::io::network { -/** - * This class represents a network endpoint that is used in Socket. - * It is used when connecting to an address and to get the current - * connection address. - */ +using ParsedAddress = std::pair<std::string_view, uint16_t>; + struct Endpoint { static const struct needs_resolving_t { } needs_resolving; @@ -31,59 +28,35 @@ struct Endpoint { Endpoint() = default; Endpoint(std::string ip_address, uint16_t port); Endpoint(needs_resolving_t, std::string hostname, uint16_t port); + Endpoint(Endpoint const &) = default; Endpoint(Endpoint &&) noexcept = default; + Endpoint &operator=(Endpoint const &) = default; Endpoint &operator=(Endpoint &&) noexcept = default; + ~Endpoint() = default; enum class IpFamily : std::uint8_t { NONE, IP4, IP6 }; - std::string SocketAddress() const; + static std::optional<ParsedAddress> ParseSocketOrAddress(std::string_view address, + std::optional<uint16_t> default_port = {}); - bool operator==(const Endpoint &other) const = default; - friend std::ostream &operator<<(std::ostream &os, const Endpoint &endpoint); + std::string SocketAddress() const; std::string address; uint16_t port{0}; IpFamily family{IpFamily::NONE}; - static std::optional<std::pair<std::string, uint16_t>> ParseSocketOrAddress(const std::string &address, - std::optional<uint16_t> default_port); + bool operator==(const Endpoint &other) const = default; + friend std::ostream &operator<<(std::ostream &os, const Endpoint &endpoint); - /** - * Tries to parse the given string as either a socket address or ip address. - * Expected address format: - * - "ip_address:port_number" - * - "ip_address" - * We parse the address first. If it's an IP address, a default port must - * be given, or we return nullopt. If it's a socket address, we try to parse - * it into an ip address and a port number; even if a default port is given, - * it won't be used, as we expect that it is given in the address string. - */ - static std::optional<std::pair<std::string, uint16_t>> ParseSocketOrIpAddress( - const std::string &address, std::optional<uint16_t> default_port = {}); + private: + static IpFamily GetIpFamily(std::string_view address); - /** - * Tries to parse given string as either socket address or hostname. - * Expected address format: - * - "hostname:port_number" - * - "hostname" - * After we parse hostname and port we try to resolve the hostname into an ip_address. - */ - static std::optional<std::pair<std::string, uint16_t>> ParseHostname(const std::string &address, - std::optional<uint16_t> default_port); + static bool IsResolvableAddress(std::string_view address, uint16_t port); - static IpFamily GetIpFamily(const std::string &address); - - static bool IsResolvableAddress(const std::string &address, uint16_t port); - - /** - * Tries to resolve hostname to its corresponding IP address. - * Given a DNS hostname, this function performs resolution and returns - * the IP address associated with the hostname. - */ - static std::string ResolveHostnameIntoIpAddress(const std::string &address, uint16_t port); + static auto ValidatePort(std::optional<uint16_t> port) -> bool; }; } // namespace memgraph::io::network diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 13adeab52..3e62df0c1 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -334,7 +334,8 @@ int main(int argc, char **argv) { .salient.items = {.properties_on_edges = FLAGS_storage_properties_on_edges, .enable_schema_metadata = FLAGS_storage_enable_schema_metadata}, .salient.storage_mode = memgraph::flags::ParseStorageMode()}; - + spdlog::info("config recover on startup {}, flags {} {}", db_config.durability.recover_on_startup, + FLAGS_storage_recover_on_startup, FLAGS_data_recovery_on_startup); memgraph::utils::Scheduler jemalloc_purge_scheduler; jemalloc_purge_scheduler.Run("Jemalloc purge", std::chrono::seconds(FLAGS_storage_gc_cycle_sec), [] { memgraph::memory::PurgeUnusedMemory(); }); diff --git a/src/query/common.cpp b/src/query/common.cpp index 3c75ed5ec..94a8d8cdf 100644 --- a/src/query/common.cpp +++ b/src/query/common.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -13,64 +13,6 @@ namespace memgraph::query { -namespace impl { - -bool TypedValueCompare(const TypedValue &a, const TypedValue &b) { - // in ordering null comes after everything else - // at the same time Null is not less that null - // first deal with Null < Whatever case - if (a.IsNull()) return false; - // now deal with NotNull < Null case - if (b.IsNull()) return true; - - // comparisons are from this point legal only between values of - // the same type, or int+float combinations - if ((a.type() != b.type() && !(a.IsNumeric() && b.IsNumeric()))) - throw QueryRuntimeException("Can't compare value of type {} to value of type {}.", a.type(), b.type()); - - switch (a.type()) { - case TypedValue::Type::Bool: - return !a.ValueBool() && b.ValueBool(); - case TypedValue::Type::Int: - if (b.type() == TypedValue::Type::Double) - return a.ValueInt() < b.ValueDouble(); - else - return a.ValueInt() < b.ValueInt(); - case TypedValue::Type::Double: - if (b.type() == TypedValue::Type::Int) - return a.ValueDouble() < b.ValueInt(); - else - return a.ValueDouble() < b.ValueDouble(); - case TypedValue::Type::String: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueString() < b.ValueString(); - case TypedValue::Type::Date: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueDate() < b.ValueDate(); - case TypedValue::Type::LocalTime: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueLocalTime() < b.ValueLocalTime(); - case TypedValue::Type::LocalDateTime: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueLocalDateTime() < b.ValueLocalDateTime(); - case TypedValue::Type::Duration: - // NOLINTNEXTLINE(modernize-use-nullptr) - return a.ValueDuration() < b.ValueDuration(); - case TypedValue::Type::List: - case TypedValue::Type::Map: - case TypedValue::Type::Vertex: - case TypedValue::Type::Edge: - case TypedValue::Type::Path: - case TypedValue::Type::Graph: - case TypedValue::Type::Function: - throw QueryRuntimeException("Comparison is not defined for values of type {}.", a.type()); - case TypedValue::Type::Null: - LOG_FATAL("Invalid type"); - } -} - -} // namespace impl - int64_t QueryTimestamp() { return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()) .count(); diff --git a/src/query/common.hpp b/src/query/common.hpp index 36ba07791..9f4e01cc0 100644 --- a/src/query/common.hpp +++ b/src/query/common.hpp @@ -23,6 +23,7 @@ #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol.hpp" #include "query/typed_value.hpp" +#include "range/v3/all.hpp" #include "storage/v2/id_types.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/result.hpp" @@ -31,9 +32,91 @@ namespace memgraph::query { -namespace impl { -bool TypedValueCompare(const TypedValue &a, const TypedValue &b); -} // namespace impl +namespace { +std::partial_ordering TypedValueCompare(TypedValue const &a, TypedValue const &b) { + // First assume typical same type comparisons + if (a.type() == b.type()) { + switch (a.type()) { + case TypedValue::Type::Bool: + return a.UnsafeValueBool() <=> b.UnsafeValueBool(); + case TypedValue::Type::Int: + return a.UnsafeValueInt() <=> b.UnsafeValueInt(); + case TypedValue::Type::Double: + return a.UnsafeValueDouble() <=> b.UnsafeValueDouble(); + case TypedValue::Type::String: + return a.UnsafeValueString() <=> b.UnsafeValueString(); + case TypedValue::Type::Date: + return a.UnsafeValueDate() <=> b.UnsafeValueDate(); + case TypedValue::Type::LocalTime: + return a.UnsafeValueLocalTime() <=> b.UnsafeValueLocalTime(); + case TypedValue::Type::LocalDateTime: + return a.UnsafeValueLocalDateTime() <=> b.UnsafeValueLocalDateTime(); + case TypedValue::Type::Duration: + return a.UnsafeValueDuration() <=> b.UnsafeValueDuration(); + case TypedValue::Type::Null: + return std::partial_ordering::equivalent; + case TypedValue::Type::List: + case TypedValue::Type::Map: + case TypedValue::Type::Vertex: + case TypedValue::Type::Edge: + case TypedValue::Type::Path: + case TypedValue::Type::Graph: + case TypedValue::Type::Function: + throw QueryRuntimeException("Comparison is not defined for values of type {}.", a.type()); + } + } else { + // from this point legal only between values of + // int+float combinations or against null + + // in ordering null comes after everything else + // at the same time Null is not less that null + // first deal with Null < Whatever case + if (a.IsNull()) return std::partial_ordering::greater; + // now deal with NotNull < Null case + if (b.IsNull()) return std::partial_ordering::less; + + if (!(a.IsNumeric() && b.IsNumeric())) [[unlikely]] + throw QueryRuntimeException("Can't compare value of type {} to value of type {}.", a.type(), b.type()); + + switch (a.type()) { + case TypedValue::Type::Int: + return a.UnsafeValueInt() <=> b.ValueDouble(); + case TypedValue::Type::Double: + return a.UnsafeValueDouble() <=> b.ValueInt(); + case TypedValue::Type::Bool: + case TypedValue::Type::Null: + case TypedValue::Type::String: + case TypedValue::Type::List: + case TypedValue::Type::Map: + case TypedValue::Type::Vertex: + case TypedValue::Type::Edge: + case TypedValue::Type::Path: + case TypedValue::Type::Date: + case TypedValue::Type::LocalTime: + case TypedValue::Type::LocalDateTime: + case TypedValue::Type::Duration: + case TypedValue::Type::Graph: + case TypedValue::Type::Function: + LOG_FATAL("Invalid type"); + } + } +} + +} // namespace + +struct OrderedTypedValueCompare { + OrderedTypedValueCompare(Ordering ordering) : ordering_{ordering}, ascending{ordering == Ordering::ASC} {} + + auto operator()(const TypedValue &lhs, const TypedValue &rhs) const -> std::partial_ordering { + return ascending ? TypedValueCompare(lhs, rhs) : TypedValueCompare(rhs, lhs); + } + + auto ordering() const { return ordering_; } + + private: + Ordering ordering_; + bool ascending = true; +}; /// Custom Comparator type for comparing vectors of TypedValues. /// @@ -43,32 +126,27 @@ bool TypedValueCompare(const TypedValue &a, const TypedValue &b); class TypedValueVectorCompare final { public: TypedValueVectorCompare() = default; - explicit TypedValueVectorCompare(const std::vector<Ordering> &ordering) : ordering_(ordering) {} + explicit TypedValueVectorCompare(std::vector<OrderedTypedValueCompare> orderings) + : orderings_{std::move(orderings)} {} - template <class TAllocator> - bool operator()(const std::vector<TypedValue, TAllocator> &c1, const std::vector<TypedValue, TAllocator> &c2) const { - // ordering is invalid if there are more elements in the collections - // then there are in the ordering_ vector - MG_ASSERT(c1.size() <= ordering_.size() && c2.size() <= ordering_.size(), - "Collections contain more elements then there are orderings"); + const auto &orderings() const { return orderings_; } - auto c1_it = c1.begin(); - auto c2_it = c2.begin(); - auto ordering_it = ordering_.begin(); - for (; c1_it != c1.end() && c2_it != c2.end(); c1_it++, c2_it++, ordering_it++) { - if (impl::TypedValueCompare(*c1_it, *c2_it)) return *ordering_it == Ordering::ASC; - if (impl::TypedValueCompare(*c2_it, *c1_it)) return *ordering_it == Ordering::DESC; - } - - // at least one collection is exhausted - // c1 is less then c2 iff c1 reached the end but c2 didn't - return (c1_it == c1.end()) && (c2_it != c2.end()); + auto lex_cmp() const { + return [orderings = &orderings_]<typename TAllocator>(const std::vector<TypedValue, TAllocator> &lhs, + const std::vector<TypedValue, TAllocator> &rhs) { + auto rng = ranges::views::zip(*orderings, lhs, rhs); + for (auto const &[cmp, l, r] : rng) { + auto res = cmp(l, r); + if (res == std::partial_ordering::less) return true; + if (res == std::partial_ordering::greater) return false; + } + DMG_ASSERT(orderings->size() == lhs.size() && lhs.size() == rhs.size()); + return false; + }; } - // TODO: Remove this, member is public - const auto &ordering() const { return ordering_; } - - std::vector<Ordering> ordering_; + private: + std::vector<OrderedTypedValueCompare> orderings_; }; /// Raise QueryRuntimeException if the value for symbol isn't of expected type. diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index e10102ee5..915ea9936 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -371,6 +371,62 @@ class VerticesIterable final { } }; +class EdgesIterable final { + std::variant<storage::EdgesIterable, std::unordered_set<EdgeAccessor, std::hash<EdgeAccessor>, std::equal_to<void>, + utils::Allocator<EdgeAccessor>> *> + iterable_; + + public: + class Iterator final { + std::variant<storage::EdgesIterable::Iterator, + std::unordered_set<EdgeAccessor, std::hash<EdgeAccessor>, std::equal_to<void>, + utils::Allocator<EdgeAccessor>>::iterator> + it_; + + public: + explicit Iterator(storage::EdgesIterable::Iterator it) : it_(std::move(it)) {} + explicit Iterator(std::unordered_set<EdgeAccessor, std::hash<EdgeAccessor>, std::equal_to<void>, + utils::Allocator<EdgeAccessor>>::iterator it) + : it_(it) {} + + EdgeAccessor operator*() const { + return std::visit([](auto &it_) { return EdgeAccessor(*it_); }, it_); + } + + Iterator &operator++() { + std::visit([](auto &it_) { ++it_; }, it_); + return *this; + } + + bool operator==(const Iterator &other) const { return it_ == other.it_; } + + bool operator!=(const Iterator &other) const { return !(other == *this); } + }; + + explicit EdgesIterable(storage::EdgesIterable iterable) : iterable_(std::move(iterable)) {} + explicit EdgesIterable(std::unordered_set<EdgeAccessor, std::hash<EdgeAccessor>, std::equal_to<void>, + utils::Allocator<EdgeAccessor>> *edges) + : iterable_(edges) {} + + Iterator begin() { + return std::visit( + memgraph::utils::Overloaded{ + [](storage::EdgesIterable &iterable_) { return Iterator(iterable_.begin()); }, + [](std::unordered_set<EdgeAccessor, std::hash<EdgeAccessor>, std::equal_to<void>, + utils::Allocator<EdgeAccessor>> *iterable_) { return Iterator(iterable_->begin()); }}, + iterable_); + } + + Iterator end() { + return std::visit( + memgraph::utils::Overloaded{ + [](storage::EdgesIterable &iterable_) { return Iterator(iterable_.end()); }, + [](std::unordered_set<EdgeAccessor, std::hash<EdgeAccessor>, std::equal_to<void>, + utils::Allocator<EdgeAccessor>> *iterable_) { return Iterator(iterable_->end()); }}, + iterable_); + } +}; + class DbAccessor final { storage::Storage::Accessor *accessor_; @@ -416,6 +472,10 @@ class DbAccessor final { return VerticesIterable(accessor_->Vertices(label, property, lower, upper, view)); } + EdgesIterable Edges(storage::View view, storage::EdgeTypeId edge_type) { + return EdgesIterable(accessor_->Edges(edge_type, view)); + } + VertexAccessor InsertVertex() { return VertexAccessor(accessor_->CreateVertex()); } storage::Result<EdgeAccessor> InsertEdge(VertexAccessor *from, VertexAccessor *to, @@ -572,6 +632,8 @@ class DbAccessor final { return accessor_->LabelPropertyIndexExists(label, prop); } + bool EdgeTypeIndexExists(storage::EdgeTypeId edge_type) const { return accessor_->EdgeTypeIndexExists(edge_type); } + std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const { return accessor_->GetIndexStats(label); } @@ -638,6 +700,10 @@ class DbAccessor final { return accessor_->CreateIndex(label, property); } + utils::BasicResult<storage::StorageIndexDefinitionError, void> CreateIndex(storage::EdgeTypeId edge_type) { + return accessor_->CreateIndex(edge_type); + } + utils::BasicResult<storage::StorageIndexDefinitionError, void> DropIndex(storage::LabelId label) { return accessor_->DropIndex(label); } @@ -647,6 +713,10 @@ class DbAccessor final { return accessor_->DropIndex(label, property); } + utils::BasicResult<storage::StorageIndexDefinitionError, void> DropIndex(storage::EdgeTypeId edge_type) { + return accessor_->DropIndex(edge_type); + } + utils::BasicResult<storage::StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint( storage::LabelId label, storage::PropertyId property) { return accessor_->CreateExistenceConstraint(label, property); diff --git a/src/query/dump.cpp b/src/query/dump.cpp index 2925023fb..f1dd08c8d 100644 --- a/src/query/dump.cpp +++ b/src/query/dump.cpp @@ -242,6 +242,10 @@ void DumpLabelIndex(std::ostream *os, query::DbAccessor *dba, const storage::Lab *os << "CREATE INDEX ON :" << EscapeName(dba->LabelToName(label)) << ";"; } +void DumpEdgeTypeIndex(std::ostream *os, query::DbAccessor *dba, const storage::EdgeTypeId edge_type) { + *os << "CREATE EDGE INDEX ON :" << EscapeName(dba->EdgeTypeToName(edge_type)) << ";"; +} + void DumpLabelPropertyIndex(std::ostream *os, query::DbAccessor *dba, storage::LabelId label, storage::PropertyId property) { *os << "CREATE INDEX ON :" << EscapeName(dba->LabelToName(label)) << "(" << EscapeName(dba->PropertyToName(property)) @@ -297,7 +301,9 @@ PullPlanDump::PullPlanDump(DbAccessor *dba, dbms::DatabaseAccess db_acc) // Internal index cleanup CreateInternalIndexCleanupPullChunk(), // Dump all triggers - CreateTriggersPullChunk()} {} + CreateTriggersPullChunk(), + // Dump all edge-type indices + CreateEdgeTypeIndicesPullChunk()} {} bool PullPlanDump::Pull(AnyStream *stream, std::optional<int> n) { // Iterate all functions that stream some results. @@ -352,6 +358,33 @@ PullPlanDump::PullChunk PullPlanDump::CreateLabelIndicesPullChunk() { }; } +PullPlanDump::PullChunk PullPlanDump::CreateEdgeTypeIndicesPullChunk() { + // Dump all label indices + return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> { + // Delay the construction of indices vectors + if (!indices_info_) { + indices_info_.emplace(dba_->ListAllIndices()); + } + const auto &edge_type = indices_info_->edge_type; + + size_t local_counter = 0; + while (global_index < edge_type.size() && (!n || local_counter < *n)) { + std::ostringstream os; + DumpEdgeTypeIndex(&os, dba_, edge_type[global_index]); + stream->Result({TypedValue(os.str())}); + + ++global_index; + ++local_counter; + } + + if (global_index == edge_type.size()) { + return local_counter; + } + + return std::nullopt; + }; +} + PullPlanDump::PullChunk PullPlanDump::CreateLabelPropertyIndicesPullChunk() { return [this, global_index = 0U](AnyStream *stream, std::optional<int> n) mutable -> std::optional<size_t> { // Delay the construction of indices vectors diff --git a/src/query/dump.hpp b/src/query/dump.hpp index a9d68d45c..05bd42967 100644 --- a/src/query/dump.hpp +++ b/src/query/dump.hpp @@ -63,5 +63,6 @@ struct PullPlanDump { PullChunk CreateDropInternalIndexPullChunk(); PullChunk CreateInternalIndexCleanupPullChunk(); PullChunk CreateTriggersPullChunk(); + PullChunk CreateEdgeTypeIndicesPullChunk(); }; } // namespace memgraph::query diff --git a/src/query/frame_change.hpp b/src/query/frame_change.hpp index 7baf1fe41..f51185722 100644 --- a/src/query/frame_change.hpp +++ b/src/query/frame_change.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -126,10 +126,11 @@ class FrameChangeCollector { } bool ResetTrackingValue(const std::string &key) { - if (!tracked_values_.contains(utils::pmr::string(key, utils::NewDeleteResource()))) { + auto const it = tracked_values_.find(utils::pmr::string(key, utils::NewDeleteResource())); + if (it == tracked_values_.cend()) { return false; } - tracked_values_.erase(utils::pmr::string(key, utils::NewDeleteResource())); + tracked_values_.erase(it); AddTrackingKey(key); return true; } diff --git a/src/query/frontend/ast/ast.cpp b/src/query/frontend/ast/ast.cpp index 57d5398ab..7da5c09a0 100644 --- a/src/query/frontend/ast/ast.cpp +++ b/src/query/frontend/ast/ast.cpp @@ -186,6 +186,9 @@ constexpr utils::TypeInfo query::ProfileQuery::kType{utils::TypeId::AST_PROFILE_ constexpr utils::TypeInfo query::IndexQuery::kType{utils::TypeId::AST_INDEX_QUERY, "IndexQuery", &query::Query::kType}; +constexpr utils::TypeInfo query::EdgeIndexQuery::kType{utils::TypeId::AST_EDGE_INDEX_QUERY, "EdgeIndexQuery", + &query::Query::kType}; + constexpr utils::TypeInfo query::Create::kType{utils::TypeId::AST_CREATE, "Create", &query::Clause::kType}; constexpr utils::TypeInfo query::CallProcedure::kType{utils::TypeId::AST_CALL_PROCEDURE, "CallProcedure", diff --git a/src/query/frontend/ast/ast.hpp b/src/query/frontend/ast/ast.hpp index ad3cdb7c5..29f7be3cf 100644 --- a/src/query/frontend/ast/ast.hpp +++ b/src/query/frontend/ast/ast.hpp @@ -21,10 +21,16 @@ #include "query/interpret/awesome_memgraph_functions.hpp" #include "query/typed_value.hpp" #include "storage/v2/property_value.hpp" +#include "utils/exceptions.hpp" #include "utils/typeinfo.hpp" namespace memgraph::query { +constexpr std::string_view kBoltServer = "bolt_server"; +constexpr std::string_view kReplicationServer = "replication_server"; +constexpr std::string_view kCoordinatorServer = "coordinator_server"; +constexpr std::string_view kManagementServer = "management_server"; + struct LabelIx { static const utils::TypeInfo kType; const utils::TypeInfo &GetTypeInfo() const { return kType; } @@ -1248,6 +1254,8 @@ class AllPropertiesLookup : public memgraph::query::Expression { friend class AstStorage; }; +using QueryLabelType = std::variant<LabelIx, Expression *>; + class LabelsTest : public memgraph::query::Expression { public: static const utils::TypeInfo kType; @@ -1280,6 +1288,16 @@ class LabelsTest : public memgraph::query::Expression { protected: LabelsTest(Expression *expression, const std::vector<LabelIx> &labels) : expression_(expression), labels_(labels) {} + LabelsTest(Expression *expression, const std::vector<QueryLabelType> &labels) : expression_(expression) { + labels_.reserve(labels.size()); + for (const auto &label : labels) { + if (const auto *label_ix = std::get_if<LabelIx>(&label)) { + labels_.push_back(*label_ix); + } else { + throw SemanticException("You can't use labels in filter expressions."); + } + } + } private: friend class AstStorage; @@ -1770,7 +1788,7 @@ class NodeAtom : public memgraph::query::PatternAtom { return visitor.PostVisit(*this); } - std::vector<memgraph::query::LabelIx> labels_; + std::vector<QueryLabelType> labels_; std::variant<std::unordered_map<memgraph::query::PropertyIx, memgraph::query::Expression *>, memgraph::query::ParameterLookup *> properties_; @@ -1780,7 +1798,11 @@ class NodeAtom : public memgraph::query::PatternAtom { object->identifier_ = identifier_ ? identifier_->Clone(storage) : nullptr; object->labels_.resize(labels_.size()); for (auto i = 0; i < object->labels_.size(); ++i) { - object->labels_[i] = storage->GetLabelIx(labels_[i].name); + if (const auto *label = std::get_if<LabelIx>(&labels_[i])) { + object->labels_[i] = storage->GetLabelIx(label->name); + } else { + object->labels_[i] = std::get<Expression *>(labels_[i])->Clone(storage); + } } if (const auto *properties = std::get_if<std::unordered_map<PropertyIx, Expression *>>(&properties_)) { auto &new_obj_properties = std::get<std::unordered_map<PropertyIx, Expression *>>(object->properties_); @@ -2223,6 +2245,34 @@ class IndexQuery : public memgraph::query::Query { friend class AstStorage; }; +class EdgeIndexQuery : public memgraph::query::Query { + public: + static const utils::TypeInfo kType; + const utils::TypeInfo &GetTypeInfo() const override { return kType; } + + enum class Action { CREATE, DROP }; + + EdgeIndexQuery() = default; + + DEFVISITABLE(QueryVisitor<void>); + + memgraph::query::EdgeIndexQuery::Action action_; + memgraph::query::EdgeTypeIx edge_type_; + + EdgeIndexQuery *Clone(AstStorage *storage) const override { + EdgeIndexQuery *object = storage->Create<EdgeIndexQuery>(); + object->action_ = action_; + object->edge_type_ = storage->GetEdgeTypeIx(edge_type_.name); + return object; + } + + protected: + EdgeIndexQuery(Action action, EdgeTypeIx edge_type) : action_(action), edge_type_(edge_type) {} + + private: + friend class AstStorage; +}; + class Create : public memgraph::query::Clause { public: static const utils::TypeInfo kType; @@ -2628,20 +2678,25 @@ class SetLabels : public memgraph::query::Clause { } memgraph::query::Identifier *identifier_{nullptr}; - std::vector<memgraph::query::LabelIx> labels_; + std::vector<QueryLabelType> labels_; SetLabels *Clone(AstStorage *storage) const override { SetLabels *object = storage->Create<SetLabels>(); object->identifier_ = identifier_ ? identifier_->Clone(storage) : nullptr; object->labels_.resize(labels_.size()); for (auto i = 0; i < object->labels_.size(); ++i) { - object->labels_[i] = storage->GetLabelIx(labels_[i].name); + if (const auto *label = std::get_if<LabelIx>(&labels_[i])) { + object->labels_[i] = storage->GetLabelIx(label->name); + } else { + object->labels_[i] = std::get<Expression *>(labels_[i])->Clone(storage); + } } return object; } protected: - SetLabels(Identifier *identifier, const std::vector<LabelIx> &labels) : identifier_(identifier), labels_(labels) {} + SetLabels(Identifier *identifier, std::vector<QueryLabelType> labels) + : identifier_(identifier), labels_(std::move(labels)) {} private: friend class AstStorage; @@ -2691,20 +2746,25 @@ class RemoveLabels : public memgraph::query::Clause { } memgraph::query::Identifier *identifier_{nullptr}; - std::vector<memgraph::query::LabelIx> labels_; + std::vector<QueryLabelType> labels_; RemoveLabels *Clone(AstStorage *storage) const override { RemoveLabels *object = storage->Create<RemoveLabels>(); object->identifier_ = identifier_ ? identifier_->Clone(storage) : nullptr; object->labels_.resize(labels_.size()); for (auto i = 0; i < object->labels_.size(); ++i) { - object->labels_[i] = storage->GetLabelIx(labels_[i].name); + if (const auto *label = std::get_if<LabelIx>(&labels_[i])) { + object->labels_[i] = storage->GetLabelIx(label->name); + } else { + object->labels_[i] = std::get<Expression *>(labels_[i])->Clone(storage); + } } return object; } protected: - RemoveLabels(Identifier *identifier, const std::vector<LabelIx> &labels) : identifier_(identifier), labels_(labels) {} + RemoveLabels(Identifier *identifier, std::vector<QueryLabelType> labels) + : identifier_(identifier), labels_(std::move(labels)) {} private: friend class AstStorage; @@ -3085,24 +3145,21 @@ class CoordinatorQuery : public memgraph::query::Query { DEFVISITABLE(QueryVisitor<void>); memgraph::query::CoordinatorQuery::Action action_; - std::string instance_name_; - memgraph::query::Expression *replication_socket_address_{nullptr}; - memgraph::query::Expression *coordinator_socket_address_{nullptr}; - memgraph::query::Expression *raft_socket_address_{nullptr}; - memgraph::query::Expression *raft_server_id_{nullptr}; + std::string instance_name_{}; + std::unordered_map<memgraph::query::Expression *, memgraph::query::Expression *> configs_; + memgraph::query::Expression *coordinator_server_id_{nullptr}; memgraph::query::CoordinatorQuery::SyncMode sync_mode_; CoordinatorQuery *Clone(AstStorage *storage) const override { auto *object = storage->Create<CoordinatorQuery>(); + object->action_ = action_; object->instance_name_ = instance_name_; - object->replication_socket_address_ = - replication_socket_address_ ? replication_socket_address_->Clone(storage) : nullptr; + object->coordinator_server_id_ = coordinator_server_id_ ? coordinator_server_id_->Clone(storage) : nullptr; object->sync_mode_ = sync_mode_; - object->coordinator_socket_address_ = - coordinator_socket_address_ ? coordinator_socket_address_->Clone(storage) : nullptr; - object->raft_socket_address_ = raft_socket_address_ ? raft_socket_address_->Clone(storage) : nullptr; - object->raft_server_id_ = raft_server_id_ ? raft_server_id_->Clone(storage) : nullptr; + for (const auto &[key, value] : configs_) { + object->configs_[key->Clone(storage)] = value->Clone(storage); + } return object; } @@ -3586,7 +3643,7 @@ class PatternComprehension : public memgraph::query::Expression { bool Accept(HierarchicalTreeVisitor &visitor) override { if (visitor.PreVisit(*this)) { if (variable_) { - variable_->Accept(visitor); + throw utils::NotYetImplemented("Variable in pattern comprehension."); } pattern_->Accept(visitor); if (filter_) { @@ -3615,7 +3672,8 @@ class PatternComprehension : public memgraph::query::Expression { int32_t symbol_pos_{-1}; PatternComprehension *Clone(AstStorage *storage) const override { - PatternComprehension *object = storage->Create<PatternComprehension>(); + auto *object = storage->Create<PatternComprehension>(); + object->variable_ = variable_ ? variable_->Clone(storage) : nullptr; object->pattern_ = pattern_ ? pattern_->Clone(storage) : nullptr; object->filter_ = filter_ ? filter_->Clone(storage) : nullptr; object->resultExpr_ = resultExpr_ ? resultExpr_->Clone(storage) : nullptr; @@ -3625,7 +3683,8 @@ class PatternComprehension : public memgraph::query::Expression { } protected: - PatternComprehension(Identifier *variable, Pattern *pattern) : variable_(variable), pattern_(pattern) {} + PatternComprehension(Identifier *variable, Pattern *pattern, Where *filter, Expression *resultExpr) + : variable_(variable), pattern_(pattern), filter_(filter), resultExpr_(resultExpr) {} private: friend class AstStorage; diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index 5d463d3ee..bf11878da 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -82,6 +82,7 @@ class AuthQuery; class ExplainQuery; class ProfileQuery; class IndexQuery; +class EdgeIndexQuery; class DatabaseInfoQuery; class SystemInfoQuery; class ConstraintQuery; @@ -143,11 +144,11 @@ class ExpressionVisitor template <class TResult> class QueryVisitor - : public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, AuthQuery, DatabaseInfoQuery, - SystemInfoQuery, ConstraintQuery, DumpQuery, ReplicationQuery, LockPathQuery, - FreeMemoryQuery, TriggerQuery, IsolationLevelQuery, CreateSnapshotQuery, StreamQuery, - SettingQuery, VersionQuery, ShowConfigQuery, TransactionQueueQuery, StorageModeQuery, - AnalyzeGraphQuery, MultiDatabaseQuery, ShowDatabasesQuery, EdgeImportModeQuery, - CoordinatorQuery> {}; + : public utils::Visitor<TResult, CypherQuery, ExplainQuery, ProfileQuery, IndexQuery, EdgeIndexQuery, AuthQuery, + DatabaseInfoQuery, SystemInfoQuery, ConstraintQuery, DumpQuery, ReplicationQuery, + LockPathQuery, FreeMemoryQuery, TriggerQuery, IsolationLevelQuery, CreateSnapshotQuery, + StreamQuery, SettingQuery, VersionQuery, ShowConfigQuery, TransactionQueueQuery, + StorageModeQuery, AnalyzeGraphQuery, MultiDatabaseQuery, ShowDatabasesQuery, + EdgeImportModeQuery, CoordinatorQuery> {}; } // namespace memgraph::query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index d3747bc3f..6da48c97c 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -265,6 +265,27 @@ antlrcpp::Any CypherMainVisitor::visitDropIndex(MemgraphCypher::DropIndexContext return index_query; } +antlrcpp::Any CypherMainVisitor::visitEdgeIndexQuery(MemgraphCypher::EdgeIndexQueryContext *ctx) { + MG_ASSERT(ctx->children.size() == 1, "EdgeIndexQuery should have exactly one child!"); + auto *index_query = std::any_cast<EdgeIndexQuery *>(ctx->children[0]->accept(this)); + query_ = index_query; + return index_query; +} + +antlrcpp::Any CypherMainVisitor::visitCreateEdgeIndex(MemgraphCypher::CreateEdgeIndexContext *ctx) { + auto *index_query = storage_->Create<EdgeIndexQuery>(); + index_query->action_ = EdgeIndexQuery::Action::CREATE; + index_query->edge_type_ = AddEdgeType(std::any_cast<std::string>(ctx->labelName()->accept(this))); + return index_query; +} + +antlrcpp::Any CypherMainVisitor::visitDropEdgeIndex(MemgraphCypher::DropEdgeIndexContext *ctx) { + auto *index_query = storage_->Create<EdgeIndexQuery>(); + index_query->action_ = EdgeIndexQuery::Action::DROP; + index_query->edge_type_ = AddEdgeType(std::any_cast<std::string>(ctx->labelName()->accept(this))); + return index_query; +} + antlrcpp::Any CypherMainVisitor::visitAuthQuery(MemgraphCypher::AuthQueryContext *ctx) { MG_ASSERT(ctx->children.size() == 1, "AuthQuery should have exactly one child!"); auto *auth_query = std::any_cast<AuthQuery *>(ctx->children[0]->accept(this)); @@ -377,24 +398,17 @@ antlrcpp::Any CypherMainVisitor::visitRegisterReplica(MemgraphCypher::RegisterRe antlrcpp::Any CypherMainVisitor::visitRegisterInstanceOnCoordinator( MemgraphCypher::RegisterInstanceOnCoordinatorContext *ctx) { auto *coordinator_query = storage_->Create<CoordinatorQuery>(); - if (!ctx->replicationSocketAddress()->literal()->StringLiteral()) { - throw SemanticException("Replication socket address should be a string literal!"); - } - if (!ctx->coordinatorSocketAddress()->literal()->StringLiteral()) { - throw SemanticException("Coordinator socket address should be a string literal!"); - } coordinator_query->action_ = CoordinatorQuery::Action::REGISTER_INSTANCE; - coordinator_query->replication_socket_address_ = - std::any_cast<Expression *>(ctx->replicationSocketAddress()->accept(this)); - coordinator_query->coordinator_socket_address_ = - std::any_cast<Expression *>(ctx->coordinatorSocketAddress()->accept(this)); coordinator_query->instance_name_ = std::any_cast<std::string>(ctx->instanceName()->symbolicName()->accept(this)); - if (ctx->ASYNC()) { - coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::ASYNC; - } else { - coordinator_query->sync_mode_ = memgraph::query::CoordinatorQuery::SyncMode::SYNC; - } + coordinator_query->configs_ = + std::any_cast<std::unordered_map<Expression *, Expression *>>(ctx->configsMap->accept(this)); + coordinator_query->sync_mode_ = [ctx]() { + if (ctx->ASYNC()) { + return CoordinatorQuery::SyncMode::ASYNC; + } + return CoordinatorQuery::SyncMode::SYNC; + }(); return coordinator_query; } @@ -410,17 +424,10 @@ antlrcpp::Any CypherMainVisitor::visitUnregisterInstanceOnCoordinator( antlrcpp::Any CypherMainVisitor::visitAddCoordinatorInstance(MemgraphCypher::AddCoordinatorInstanceContext *ctx) { auto *coordinator_query = storage_->Create<CoordinatorQuery>(); - if (!ctx->raftSocketAddress()->literal()->StringLiteral()) { - throw SemanticException("Raft socket address should be a string literal!"); - } - - if (!ctx->raftServerId()->literal()->numberLiteral()) { - throw SemanticException("Raft server id should be a number literal!"); - } - coordinator_query->action_ = CoordinatorQuery::Action::ADD_COORDINATOR_INSTANCE; - coordinator_query->raft_socket_address_ = std::any_cast<Expression *>(ctx->raftSocketAddress()->accept(this)); - coordinator_query->raft_server_id_ = std::any_cast<Expression *>(ctx->raftServerId()->accept(this)); + coordinator_query->coordinator_server_id_ = std::any_cast<Expression *>(ctx->coordinatorServerId()->accept(this)); + coordinator_query->configs_ = + std::any_cast<std::unordered_map<Expression *, Expression *>>(ctx->configsMap->accept(this)); return coordinator_query; } @@ -1912,7 +1919,7 @@ antlrcpp::Any CypherMainVisitor::visitNodePattern(MemgraphCypher::NodePatternCon anonymous_identifiers.push_back(&node->identifier_); } if (ctx->nodeLabels()) { - node->labels_ = std::any_cast<std::vector<LabelIx>>(ctx->nodeLabels()->accept(this)); + node->labels_ = std::any_cast<std::vector<QueryLabelType>>(ctx->nodeLabels()->accept(this)); } if (ctx->properties()) { // This can return either properties or parameters @@ -1926,16 +1933,27 @@ antlrcpp::Any CypherMainVisitor::visitNodePattern(MemgraphCypher::NodePatternCon } antlrcpp::Any CypherMainVisitor::visitNodeLabels(MemgraphCypher::NodeLabelsContext *ctx) { - std::vector<LabelIx> labels; + std::vector<QueryLabelType> labels; for (auto *node_label : ctx->nodeLabel()) { - if (node_label->labelName()->symbolicName()) { + auto *label_name = node_label->labelName(); + if (label_name->symbolicName()) { labels.emplace_back(AddLabel(std::any_cast<std::string>(node_label->accept(this)))); - } else { + } else if (label_name->parameter()) { // If we have a parameter, we have to resolve it. const auto *param_lookup = std::any_cast<ParameterLookup *>(node_label->accept(this)); const auto label_name = parameters_->AtTokenPosition(param_lookup->token_position_).ValueString(); labels.emplace_back(storage_->GetLabelIx(label_name)); query_info_.is_cacheable = false; // We can't cache queries with label parameters. + } else { + auto variable = std::any_cast<std::string>(label_name->variable()->accept(this)); + users_identifiers.insert(variable); + auto *expression = static_cast<Expression *>(storage_->Create<Identifier>(variable)); + for (auto *lookup : label_name->propertyLookup()) { + auto key = std::any_cast<PropertyIx>(lookup->accept(this)); + auto *property_lookup = storage_->Create<PropertyLookup>(expression, key); + expression = property_lookup; + } + labels.emplace_back(expression); } } return labels; @@ -2483,7 +2501,7 @@ antlrcpp::Any CypherMainVisitor::visitListIndexingOrSlicing(MemgraphCypher::List antlrcpp::Any CypherMainVisitor::visitExpression2a(MemgraphCypher::Expression2aContext *ctx) { auto *expression = std::any_cast<Expression *>(ctx->expression2b()->accept(this)); if (ctx->nodeLabels()) { - auto labels = std::any_cast<std::vector<LabelIx>>(ctx->nodeLabels()->accept(this)); + auto labels = std::any_cast<std::vector<QueryLabelType>>(ctx->nodeLabels()->accept(this)); expression = storage_->Create<LabelsTest>(expression, labels); } return expression; @@ -2809,7 +2827,7 @@ antlrcpp::Any CypherMainVisitor::visitSetItem(MemgraphCypher::SetItemContext *ct // SetLabels auto *set_labels = storage_->Create<SetLabels>(); set_labels->identifier_ = storage_->Create<Identifier>(std::any_cast<std::string>(ctx->variable()->accept(this))); - set_labels->labels_ = std::any_cast<std::vector<LabelIx>>(ctx->nodeLabels()->accept(this)); + set_labels->labels_ = std::any_cast<std::vector<QueryLabelType>>(ctx->nodeLabels()->accept(this)); return static_cast<Clause *>(set_labels); } @@ -2832,7 +2850,7 @@ antlrcpp::Any CypherMainVisitor::visitRemoveItem(MemgraphCypher::RemoveItemConte // RemoveLabels auto *remove_labels = storage_->Create<RemoveLabels>(); remove_labels->identifier_ = storage_->Create<Identifier>(std::any_cast<std::string>(ctx->variable()->accept(this))); - remove_labels->labels_ = std::any_cast<std::vector<LabelIx>>(ctx->nodeLabels()->accept(this)); + remove_labels->labels_ = std::any_cast<std::vector<QueryLabelType>>(ctx->nodeLabels()->accept(this)); return static_cast<Clause *>(remove_labels); } diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 6d66e6d7e..8c65345c8 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -148,6 +148,11 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitIndexQuery(MemgraphCypher::IndexQueryContext *ctx) override; + /** + * @return IndexQuery* + */ + antlrcpp::Any visitEdgeIndexQuery(MemgraphCypher::EdgeIndexQueryContext *ctx) override; + /** * @return ExplainQuery* */ @@ -499,6 +504,16 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitDropIndex(MemgraphCypher::DropIndexContext *ctx) override; + /** + * @return EdgeIndexQuery* + */ + antlrcpp::Any visitCreateEdgeIndex(MemgraphCypher::CreateEdgeIndexContext *ctx) override; + + /** + * @return DropEdgeIndex* + */ + antlrcpp::Any visitDropEdgeIndex(MemgraphCypher::DropEdgeIndexContext *ctx) override; + /** * @return AuthQuery* */ diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index 55cb53ef3..7fa218598 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -193,7 +193,10 @@ nodeLabels : nodeLabel ( nodeLabel )* ; nodeLabel : ':' labelName ; -labelName : symbolicName | parameter; +labelName : symbolicName + | parameter + | variable ( propertyLookup )+ + ; relTypeName : symbolicName ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index d24480b0a..378310c22 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -133,6 +133,7 @@ symbolicName : UnescapedSymbolicName query : cypherQuery | indexQuery + | edgeIndexQuery | explainQuery | profileQuery | databaseInfoQuery @@ -387,22 +388,22 @@ instanceName : symbolicName ; socketAddress : literal ; -coordinatorSocketAddress : literal ; -replicationSocketAddress : literal ; -raftSocketAddress : literal ; - registerReplica : REGISTER REPLICA instanceName ( SYNC | ASYNC ) TO socketAddress ; -registerInstanceOnCoordinator : REGISTER INSTANCE instanceName ON coordinatorSocketAddress ( AS ASYNC ) ? WITH replicationSocketAddress ; +configKeyValuePair : literal ':' literal ; + +configMap : '{' ( configKeyValuePair ( ',' configKeyValuePair )* )? '}' ; + +registerInstanceOnCoordinator : REGISTER INSTANCE instanceName ( AS ASYNC ) ? WITH CONFIG configsMap=configMap ; unregisterInstanceOnCoordinator : UNREGISTER INSTANCE instanceName ; setInstanceToMain : SET INSTANCE instanceName TO MAIN ; -raftServerId : literal ; +coordinatorServerId : literal ; -addCoordinatorInstance : ADD COORDINATOR raftServerId ON raftSocketAddress ; +addCoordinatorInstance : ADD COORDINATOR coordinatorServerId WITH CONFIG configsMap=configMap ; dropReplica : DROP REPLICA instanceName ; @@ -456,10 +457,6 @@ commonCreateStreamConfig : TRANSFORM transformationName=procedureName createStream : kafkaCreateStream | pulsarCreateStream ; -configKeyValuePair : literal ':' literal ; - -configMap : '{' ( configKeyValuePair ( ',' configKeyValuePair )* )? '}' ; - kafkaCreateStreamConfig : TOPICS topicNames | CONSUMER_GROUP consumerGroup=symbolicNameWithDotsAndMinus | BOOTSTRAP_SERVERS bootstrapServers=literal @@ -527,3 +524,9 @@ showDatabase : SHOW DATABASE ; showDatabases : SHOW DATABASES ; edgeImportModeQuery : EDGE IMPORT MODE ( ACTIVE | INACTIVE ) ; + +createEdgeIndex : CREATE EDGE INDEX ON ':' labelName ; + +dropEdgeIndex : DROP EDGE INDEX ON ':' labelName ; + +edgeIndexQuery : createEdgeIndex | dropEdgeIndex ; diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index ef66a75ac..15726e3e2 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -27,6 +27,8 @@ class PrivilegeExtractor : public QueryVisitor<void>, public HierarchicalTreeVis void Visit(IndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } + void Visit(EdgeIndexQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } + void Visit(AnalyzeGraphQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::INDEX); } void Visit(AuthQuery & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::AUTH); } diff --git a/src/query/frontend/semantic/symbol.hpp b/src/query/frontend/semantic/symbol.hpp index 77557b6fe..0cfb86608 100644 --- a/src/query/frontend/semantic/symbol.hpp +++ b/src/query/frontend/semantic/symbol.hpp @@ -53,6 +53,8 @@ class Symbol { bool user_declared() const { return user_declared_; } int token_position() const { return token_position_; } + bool IsSymbolAnonym() const { return name_.substr(0U, 4U) == "anon"; } + std::string name_; int64_t position_; bool user_declared_{true}; diff --git a/src/query/frontend/semantic/symbol_generator.cpp b/src/query/frontend/semantic/symbol_generator.cpp index e8ef3cba5..c12915634 100644 --- a/src/query/frontend/semantic/symbol_generator.cpp +++ b/src/query/frontend/semantic/symbol_generator.cpp @@ -568,6 +568,44 @@ bool SymbolGenerator::PostVisit(SetProperty & /*set_property*/) { return true; } +bool SymbolGenerator::PreVisit(SetLabels &set_labels) { + auto &scope = scopes_.back(); + scope.in_set_labels = true; + for (auto &label : set_labels.labels_) { + if (auto *expression = std::get_if<Expression *>(&label)) { + (*expression)->Accept(*this); + } + } + + return true; +} + +bool SymbolGenerator::PostVisit(SetLabels & /*set_labels*/) { + auto &scope = scopes_.back(); + scope.in_set_labels = false; + + return true; +} + +bool SymbolGenerator::PreVisit(RemoveLabels &remove_labels) { + auto &scope = scopes_.back(); + scope.in_remove_labels = true; + for (auto &label : remove_labels.labels_) { + if (auto *expression = std::get_if<Expression *>(&label)) { + (*expression)->Accept(*this); + } + } + + return true; +} + +bool SymbolGenerator::PostVisit(RemoveLabels & /*remove_labels*/) { + auto &scope = scopes_.back(); + scope.in_remove_labels = false; + + return true; +} + // Pattern and its subparts. bool SymbolGenerator::PreVisit(Pattern &pattern) { @@ -602,6 +640,15 @@ bool SymbolGenerator::PreVisit(NodeAtom &node_atom) { }; scope.in_node_atom = true; + + if (scope.in_create) { // you can use expressions with labels only in create + for (auto &label : node_atom.labels_) { + if (auto *expression = std::get_if<Expression *>(&label)) { + (*expression)->Accept(*this); + } + } + } + if (auto *properties = std::get_if<std::unordered_map<PropertyIx, Expression *>>(&node_atom.properties_)) { bool props_or_labels = !properties->empty() || !node_atom.labels_.empty(); @@ -721,6 +768,32 @@ bool SymbolGenerator::PostVisit(EdgeAtom &) { return true; } +bool SymbolGenerator::PreVisit(PatternComprehension &pc) { + auto &scope = scopes_.back(); + + if (scope.in_set_property) { + throw utils::NotYetImplemented("Pattern Comprehension cannot be used within SET clause.!"); + } + + if (scope.in_with) { + throw utils::NotYetImplemented("Pattern Comprehension cannot be used within WITH!"); + } + + if (scope.in_reduce) { + throw utils::NotYetImplemented("Pattern Comprehension cannot be used within REDUCE!"); + } + + if (scope.num_if_operators) { + throw utils::NotYetImplemented("IF operator cannot be used with Pattern Comprehension!"); + } + + const auto &symbol = CreateAnonymousSymbol(); + pc.MapTo(symbol); + return true; +} + +bool SymbolGenerator::PostVisit(PatternComprehension & /*pc*/) { return true; } + void SymbolGenerator::VisitWithIdentifiers(Expression *expr, const std::vector<Identifier *> &identifiers) { auto &scope = scopes_.back(); std::vector<std::pair<std::optional<Symbol>, Identifier *>> prev_symbols; diff --git a/src/query/frontend/semantic/symbol_generator.hpp b/src/query/frontend/semantic/symbol_generator.hpp index f9e6468f6..41122625a 100644 --- a/src/query/frontend/semantic/symbol_generator.hpp +++ b/src/query/frontend/semantic/symbol_generator.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -68,6 +68,10 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool PostVisit(Foreach &) override; bool PreVisit(SetProperty & /*set_property*/) override; bool PostVisit(SetProperty & /*set_property*/) override; + bool PreVisit(SetLabels &) override; + bool PostVisit(SetLabels & /*set_labels*/) override; + bool PreVisit(RemoveLabels &) override; + bool PostVisit(RemoveLabels & /*remove_labels*/) override; // Expressions ReturnType Visit(Identifier &) override; @@ -97,6 +101,8 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool PostVisit(NodeAtom &) override; bool PreVisit(EdgeAtom &) override; bool PostVisit(EdgeAtom &) override; + bool PreVisit(PatternComprehension &) override; + bool PostVisit(PatternComprehension &) override; private: // Scope stores the state of where we are when visiting the AST and a map of @@ -128,6 +134,8 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool in_set_property{false}; bool in_call_subquery{false}; bool has_return{false}; + bool in_set_labels{false}; + bool in_remove_labels{false}; // True when visiting a pattern atom (node or edge) identifier, which can be // reused or created in the pattern itself. bool in_pattern_atom_identifier{false}; diff --git a/src/query/interpret/awesome_memgraph_functions.cpp b/src/query/interpret/awesome_memgraph_functions.cpp index 6be8c4837..a9381f92a 100644 --- a/src/query/interpret/awesome_memgraph_functions.cpp +++ b/src/query/interpret/awesome_memgraph_functions.cpp @@ -761,13 +761,19 @@ TypedValue Range(const TypedValue *args, int64_t nargs, const FunctionContext &c int64_t step = nargs == 3 ? args[2].ValueInt() : 1; TypedValue::TVector list(ctx.memory); if (lbound <= rbound && step > 0) { + int64_t n = ((rbound - lbound + 1) + (step - 1)) / step; + list.reserve(n); for (auto i = lbound; i <= rbound; i += step) { list.emplace_back(i); } + MG_ASSERT(list.size() == n); } else if (lbound >= rbound && step < 0) { + int64_t n = ((lbound - rbound + 1) + (-step - 1)) / -step; + list.reserve(n); for (auto i = lbound; i >= rbound; i += step) { list.emplace_back(i); } + MG_ASSERT(list.size() == n); } return TypedValue(std::move(list)); } diff --git a/src/query/interpret/eval.cpp b/src/query/interpret/eval.cpp index 8bd308420..7c5d838a5 100644 --- a/src/query/interpret/eval.cpp +++ b/src/query/interpret/eval.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -13,12 +13,12 @@ namespace memgraph::query { -int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what) { +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, std::string_view what) { TypedValue value = expr->Accept(*evaluator); try { return value.ValueInt(); } catch (TypedValueException &e) { - throw QueryRuntimeException(what + " must be an int"); + throw QueryRuntimeException(std::string(what) + " must be an int"); } } diff --git a/src/query/interpret/eval.hpp b/src/query/interpret/eval.hpp index fe47a3fcd..07a71412c 100644 --- a/src/query/interpret/eval.hpp +++ b/src/query/interpret/eval.hpp @@ -226,7 +226,6 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> { } \ } - BINARY_OPERATOR_VISITOR(OrOperator, ||, OR); BINARY_OPERATOR_VISITOR(XorOperator, ^, XOR); BINARY_OPERATOR_VISITOR(AdditionOperator, +, +); BINARY_OPERATOR_VISITOR(SubtractionOperator, -, -); @@ -261,6 +260,20 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> { } } + TypedValue Visit(OrOperator &op) override { + auto value1 = op.expression1_->Accept(*this); + if (value1.IsBool() && value1.ValueBool()) { + // If first expression is true, don't evaluate the second one. + return value1; + } + auto value2 = op.expression2_->Accept(*this); + try { + return value1 || value2; + } catch (const TypedValueException &) { + throw QueryRuntimeException("Invalid types: {} and {} for OR.", value1.type(), value2.type()); + } + } + TypedValue Visit(IfOperator &if_operator) override { auto condition = if_operator.condition_->Accept(*this); if (condition.IsNull()) { @@ -1196,7 +1209,7 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> { /// @param what - Name of what's getting evaluated. Used for user feedback (via /// exception) when the evaluated value is not an int. /// @throw QueryRuntimeException if expression doesn't evaluate to an int. -int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what); +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, std::string_view what); std::optional<size_t> EvaluateMemoryLimit(ExpressionVisitor<TypedValue> &eval, Expression *memory_limit, size_t memory_scale); diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 5c987d0bd..9c2f3ff28 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -246,27 +246,6 @@ std::optional<std::string> GetOptionalStringValue(query::Expression *expression, return {}; }; -bool IsAllShortestPathsQuery(const std::vector<memgraph::query::Clause *> &clauses) { - for (const auto &clause : clauses) { - if (clause->GetTypeInfo() != Match::kType) { - continue; - } - auto *match_clause = utils::Downcast<Match>(clause); - for (const auto &pattern : match_clause->patterns_) { - for (const auto &atom : pattern->atoms_) { - if (atom->GetTypeInfo() != EdgeAtom::kType) { - continue; - } - auto *edge_atom = utils::Downcast<EdgeAtom>(atom); - if (edge_atom->type_ == EdgeAtom::Type::ALL_SHORTEST_PATHS) { - return true; - } - } - } - } - return false; -} - inline auto convertFromCoordinatorToReplicationMode(const CoordinatorQuery::SyncMode &sync_mode) -> replication_coordination_glue::ReplicationMode { switch (sync_mode) { @@ -355,7 +334,7 @@ class ReplQueryHandler { const auto replication_config = replication::ReplicationClientConfig{.name = name, .mode = repl_mode, - .ip_address = ip, + .ip_address = std::string(ip), .port = port, .replica_check_frequency = replica_check_frequency, .ssl = std::nullopt}; @@ -410,7 +389,7 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { : coordinator_handler_(coordinator_state) {} - void UnregisterInstance(std::string const &instance_name) override { + void UnregisterInstance(std::string_view instance_name) override { auto status = coordinator_handler_.UnregisterReplicationInstance(instance_name); switch (status) { using enum memgraph::coordination::UnregisterInstanceCoordinatorStatus; @@ -423,6 +402,8 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { throw QueryRuntimeException("UNREGISTER INSTANCE query can only be run on a coordinator!"); case NOT_LEADER: throw QueryRuntimeException("Couldn't unregister replica instance since coordinator is not a leader!"); + case RAFT_LOG_ERROR: + throw QueryRuntimeException("Couldn't unregister replica instance since raft server couldn't append the log!"); case RPC_FAILED: throw QueryRuntimeException( "Couldn't unregister replica instance because current main instance couldn't unregister replica!"); @@ -431,20 +412,18 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } } - void RegisterReplicationInstance(std::string const &coordinator_socket_address, - std::string const &replication_socket_address, + void RegisterReplicationInstance(std::string_view coordinator_socket_address, + std::string_view replication_socket_address, std::chrono::seconds const &instance_check_frequency, std::chrono::seconds const &instance_down_timeout, std::chrono::seconds const &instance_get_uuid_frequency, - std::string const &instance_name, CoordinatorQuery::SyncMode sync_mode) override { - const auto maybe_replication_ip_port = - io::network::Endpoint::ParseSocketOrAddress(replication_socket_address, std::nullopt); + std::string_view instance_name, CoordinatorQuery::SyncMode sync_mode) override { + const auto maybe_replication_ip_port = io::network::Endpoint::ParseSocketOrAddress(replication_socket_address); if (!maybe_replication_ip_port) { throw QueryRuntimeException("Invalid replication socket address!"); } - const auto maybe_coordinator_ip_port = - io::network::Endpoint::ParseSocketOrAddress(coordinator_socket_address, std::nullopt); + const auto maybe_coordinator_ip_port = io::network::Endpoint::ParseSocketOrAddress(coordinator_socket_address); if (!maybe_replication_ip_port) { throw QueryRuntimeException("Invalid replication socket address!"); } @@ -452,14 +431,14 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { const auto [replication_ip, replication_port] = *maybe_replication_ip_port; const auto [coordinator_server_ip, coordinator_server_port] = *maybe_coordinator_ip_port; const auto repl_config = coordination::CoordinatorClientConfig::ReplicationClientInfo{ - .instance_name = instance_name, + .instance_name = std::string(instance_name), .replication_mode = convertFromCoordinatorToReplicationMode(sync_mode), - .replication_ip_address = replication_ip, + .replication_ip_address = std::string(replication_ip), .replication_port = replication_port}; auto coordinator_client_config = - coordination::CoordinatorClientConfig{.instance_name = instance_name, - .ip_address = coordinator_server_ip, + coordination::CoordinatorClientConfig{.instance_name = std::string(instance_name), + .ip_address = std::string(coordinator_server_ip), .port = coordinator_server_port, .instance_health_check_frequency_sec = instance_check_frequency, .instance_down_timeout_sec = instance_down_timeout, @@ -472,18 +451,17 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { using enum memgraph::coordination::RegisterInstanceCoordinatorStatus; case NAME_EXISTS: throw QueryRuntimeException("Couldn't register replica instance since instance with such name already exists!"); - case ENDPOINT_EXISTS: + case COORD_ENDPOINT_EXISTS: throw QueryRuntimeException( - "Couldn't register replica instance since instance with such endpoint already exists!"); + "Couldn't register replica instance since instance with such coordinator endpoint already exists!"); + case REPL_ENDPOINT_EXISTS: + throw QueryRuntimeException( + "Couldn't register replica instance since instance with such replication endpoint already exists!"); case NOT_COORDINATOR: throw QueryRuntimeException("REGISTER INSTANCE query can only be run on a coordinator!"); case NOT_LEADER: throw QueryRuntimeException("Couldn't register replica instance since coordinator is not a leader!"); - case RAFT_COULD_NOT_ACCEPT: - throw QueryRuntimeException( - "Couldn't register replica instance since raft server couldn't accept the log! Most likely the raft " - "instance is not a leader!"); - case RAFT_COULD_NOT_APPEND: + case RAFT_LOG_ERROR: throw QueryRuntimeException("Couldn't register replica instance since raft server couldn't append the log!"); case RPC_FAILED: throw QueryRuntimeException( @@ -494,19 +472,19 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { } } - auto AddCoordinatorInstance(uint32_t raft_server_id, std::string const &raft_socket_address) -> void override { - auto const maybe_ip_and_port = io::network::Endpoint::ParseSocketOrIpAddress(raft_socket_address); + auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view raft_socket_address) -> void override { + auto const maybe_ip_and_port = io::network::Endpoint::ParseSocketOrAddress(raft_socket_address); if (maybe_ip_and_port) { auto const [ip, port] = *maybe_ip_and_port; - spdlog::info("Adding instance {} with raft socket address {}:{}.", raft_server_id, port, ip); + spdlog::info("Adding instance {} with raft socket address {}:{}.", raft_server_id, ip, port); coordinator_handler_.AddCoordinatorInstance(raft_server_id, port, ip); } else { spdlog::error("Invalid raft socket address {}.", raft_socket_address); } } - void SetReplicationInstanceToMain(const std::string &instance_name) override { - auto status = coordinator_handler_.SetReplicationInstanceToMain(instance_name); + void SetReplicationInstanceToMain(std::string_view instance_name) override { + auto const status = coordinator_handler_.SetReplicationInstanceToMain(instance_name); switch (status) { using enum memgraph::coordination::SetInstanceToMainCoordinatorStatus; case NO_INSTANCE_WITH_NAME: @@ -515,6 +493,10 @@ class CoordQueryHandler final : public query::CoordinatorQueryHandler { throw QueryRuntimeException("Couldn't set instance to main since there is already a main instance in cluster!"); case NOT_COORDINATOR: throw QueryRuntimeException("SET INSTANCE TO MAIN query can only be run on a coordinator!"); + case NOT_LEADER: + throw QueryRuntimeException("Couldn't set instance to main since coordinator is not a leader!"); + case RAFT_LOG_ERROR: + throw QueryRuntimeException("Couldn't promote instance since raft server couldn't append the log!"); case COULD_NOT_PROMOTE_TO_MAIN: throw QueryRuntimeException( "Couldn't set replica instance to main! Check coordinator and replica for more logs"); @@ -1143,6 +1125,27 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters & } #ifdef MG_ENTERPRISE + +auto ParseConfigMap(std::unordered_map<Expression *, Expression *> const &config_map, + ExpressionVisitor<TypedValue> &evaluator) + -> std::optional<std::map<std::string, std::string, std::less<>>> { + if (std::ranges::any_of(config_map, [&evaluator](const auto &entry) { + auto key_expr = entry.first->Accept(evaluator); + auto value_expr = entry.second->Accept(evaluator); + return !key_expr.IsString() || !value_expr.IsString(); + })) { + spdlog::error("Config map must contain only string keys and values!"); + return std::nullopt; + } + + return ranges::views::all(config_map) | ranges::views::transform([&evaluator](const auto &entry) { + auto key_expr = entry.first->Accept(evaluator); + auto value_expr = entry.second->Accept(evaluator); + return std::pair{key_expr.ValueString(), value_expr.ValueString()}; + }) | + ranges::to<std::map<std::string, std::string, std::less<>>>; +} + Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Parameters ¶meters, coordination::CoordinatorState *coordinator_state, const query::InterpreterConfig &config, std::vector<Notification> *notifications) { @@ -1170,17 +1173,37 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param EvaluationContext evaluation_context{.timestamp = QueryTimestamp(), .parameters = parameters}; auto evaluator = PrimitiveLiteralExpressionEvaluator{evaluation_context}; - auto raft_socket_address_tv = coordinator_query->raft_socket_address_->Accept(evaluator); - auto raft_server_id_tv = coordinator_query->raft_server_id_->Accept(evaluator); - callback.fn = [handler = CoordQueryHandler{*coordinator_state}, raft_socket_address_tv, - raft_server_id_tv]() mutable { - handler.AddCoordinatorInstance(raft_server_id_tv.ValueInt(), std::string(raft_socket_address_tv.ValueString())); + auto config_map = ParseConfigMap(coordinator_query->configs_, evaluator); + if (!config_map) { + throw QueryRuntimeException("Failed to parse config map!"); + } + + if (config_map->size() != 2) { + throw QueryRuntimeException("Config map must contain exactly 2 entries: {} and !", kCoordinatorServer, + kBoltServer); + } + + auto const &coordinator_server_it = config_map->find(kCoordinatorServer); + if (coordinator_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kCoordinatorServer); + } + + auto const &bolt_server_it = config_map->find(kBoltServer); + if (bolt_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kBoltServer); + } + + auto coord_server_id = coordinator_query->coordinator_server_id_->Accept(evaluator).ValueInt(); + + callback.fn = [handler = CoordQueryHandler{*coordinator_state}, coord_server_id, + coordinator_server = coordinator_server_it->second]() mutable { + handler.AddCoordinatorInstance(coord_server_id, coordinator_server); return std::vector<std::vector<TypedValue>>(); }; notifications->emplace_back(SeverityLevel::INFO, NotificationCode::ADD_COORDINATOR_INSTANCE, fmt::format("Coordinator has added instance {} on coordinator server {}.", - coordinator_query->instance_name_, raft_socket_address_tv.ValueString())); + coordinator_query->instance_name_, coordinator_server_it->second)); return callback; } case CoordinatorQuery::Action::REGISTER_INSTANCE: { @@ -1191,27 +1214,49 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param // the argument to Callback. EvaluationContext evaluation_context{.timestamp = QueryTimestamp(), .parameters = parameters}; auto evaluator = PrimitiveLiteralExpressionEvaluator{evaluation_context}; + auto config_map = ParseConfigMap(coordinator_query->configs_, evaluator); - auto coordinator_socket_address_tv = coordinator_query->coordinator_socket_address_->Accept(evaluator); - auto replication_socket_address_tv = coordinator_query->replication_socket_address_->Accept(evaluator); - callback.fn = [handler = CoordQueryHandler{*coordinator_state}, coordinator_socket_address_tv, - replication_socket_address_tv, + if (!config_map) { + throw QueryRuntimeException("Failed to parse config map!"); + } + + if (config_map->size() != 3) { + throw QueryRuntimeException("Config map must contain exactly 3 entries: {}, {} and {}!", kBoltServer, + kManagementServer, kReplicationServer); + } + + auto const &replication_server_it = config_map->find(kReplicationServer); + if (replication_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kReplicationServer); + } + + auto const &management_server_it = config_map->find(kManagementServer); + if (management_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kManagementServer); + } + + auto const &bolt_server_it = config_map->find(kBoltServer); + if (bolt_server_it == config_map->end()) { + throw QueryRuntimeException("Config map must contain {} entry!", kBoltServer); + } + + callback.fn = [handler = CoordQueryHandler{*coordinator_state}, instance_health_check_frequency_sec = config.instance_health_check_frequency_sec, + management_server = management_server_it->second, + replication_server = replication_server_it->second, bolt_server = bolt_server_it->second, instance_name = coordinator_query->instance_name_, instance_down_timeout_sec = config.instance_down_timeout_sec, instance_get_uuid_frequency_sec = config.instance_get_uuid_frequency_sec, sync_mode = coordinator_query->sync_mode_]() mutable { - handler.RegisterReplicationInstance(std::string(coordinator_socket_address_tv.ValueString()), - std::string(replication_socket_address_tv.ValueString()), - instance_health_check_frequency_sec, instance_down_timeout_sec, - instance_get_uuid_frequency_sec, instance_name, sync_mode); + handler.RegisterReplicationInstance(management_server, replication_server, instance_health_check_frequency_sec, + instance_down_timeout_sec, instance_get_uuid_frequency_sec, instance_name, + sync_mode); return std::vector<std::vector<TypedValue>>(); }; - notifications->emplace_back( - SeverityLevel::INFO, NotificationCode::REGISTER_COORDINATOR_SERVER, - fmt::format("Coordinator has registered coordinator server on {} for instance {}.", - coordinator_socket_address_tv.ValueString(), coordinator_query->instance_name_)); + notifications->emplace_back(SeverityLevel::INFO, NotificationCode::REGISTER_REPLICATION_INSTANCE, + fmt::format("Coordinator has registered replication instance on {} for instance {}.", + bolt_server_it->second, coordinator_query->instance_name_)); return callback; } case CoordinatorQuery::Action::UNREGISTER_INSTANCE: @@ -1251,17 +1296,16 @@ Callback HandleCoordinatorQuery(CoordinatorQuery *coordinator_query, const Param throw QueryRuntimeException("Only coordinator can run SHOW INSTANCES."); } - callback.header = {"name", "raft_socket_address", "coordinator_socket_address", "alive", "role"}; + callback.header = {"name", "raft_socket_address", "coordinator_socket_address", "health", "role"}; callback.fn = [handler = CoordQueryHandler{*coordinator_state}, replica_nfields = callback.header.size()]() mutable { auto const instances = handler.ShowInstances(); auto const converter = [](const auto &status) -> std::vector<TypedValue> { return {TypedValue{status.instance_name}, TypedValue{status.raft_socket_address}, - TypedValue{status.coord_socket_address}, TypedValue{status.is_alive}, - TypedValue{status.cluster_role}}; + TypedValue{status.coord_socket_address}, TypedValue{status.health}, TypedValue{status.cluster_role}}; }; - return utils::fmap(converter, instances); + return utils::fmap(instances, converter); }; return callback; } @@ -1668,8 +1712,7 @@ struct PullPlan { std::shared_ptr<QueryUserOrRole> user_or_role, std::atomic<TransactionStatus> *transaction_status, std::shared_ptr<utils::AsyncTimer> tx_timer, TriggerContextCollector *trigger_context_collector = nullptr, - std::optional<size_t> memory_limit = {}, bool use_monotonic_memory = true, - FrameChangeCollector *frame_change_collector_ = nullptr); + std::optional<size_t> memory_limit = {}, FrameChangeCollector *frame_change_collector_ = nullptr); std::optional<plan::ProfilingStatsWithTotalTime> Pull(AnyStream *stream, std::optional<int> n, const std::vector<Symbol> &output_symbols, @@ -1694,26 +1737,17 @@ struct PullPlan { // we have to keep track of any unsent results from previous `PullPlan::Pull` // manually by using this flag. bool has_unsent_results_ = false; - - // In the case of LOAD CSV, we want to use only PoolResource without MonotonicMemoryResource - // to reuse allocated memory. As LOAD CSV is processing row by row - // it is possible to reduce memory usage significantly if MemoryResource deals with memory allocation - // can reuse memory that was allocated on processing the first row on all subsequent rows. - // This flag signals to `PullPlan::Pull` which MemoryResource to use - bool use_monotonic_memory_; }; PullPlan::PullPlan(const std::shared_ptr<PlanWrapper> plan, const Parameters ¶meters, const bool is_profile_query, DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, std::shared_ptr<QueryUserOrRole> user_or_role, std::atomic<TransactionStatus> *transaction_status, std::shared_ptr<utils::AsyncTimer> tx_timer, TriggerContextCollector *trigger_context_collector, - const std::optional<size_t> memory_limit, bool use_monotonic_memory, - FrameChangeCollector *frame_change_collector) + const std::optional<size_t> memory_limit, FrameChangeCollector *frame_change_collector) : plan_(plan), cursor_(plan->plan().MakeCursor(execution_memory)), frame_(plan->symbol_table().max_position(), execution_memory), - memory_limit_(memory_limit), - use_monotonic_memory_(use_monotonic_memory) { + memory_limit_(memory_limit) { ctx_.db_accessor = dba; ctx_.symbol_table = plan->symbol_table(); ctx_.evaluation_context.timestamp = QueryTimestamp(); @@ -1741,6 +1775,7 @@ PullPlan::PullPlan(const std::shared_ptr<PlanWrapper> plan, const Parameters &pa ctx_.is_profile_query = is_profile_query; ctx_.trigger_context_collector = trigger_context_collector; ctx_.frame_change_collector = frame_change_collector; + ctx_.evaluation_context.memory = execution_memory; } std::optional<plan::ProfilingStatsWithTotalTime> PullPlan::Pull(AnyStream *stream, std::optional<int> n, @@ -1764,43 +1799,14 @@ std::optional<plan::ProfilingStatsWithTotalTime> PullPlan::Pull(AnyStream *strea } }}; - // Set up temporary memory for a single Pull. Initial memory comes from the - // stack. 256 KiB should fit on the stack and should be more than enough for a - // single `Pull`. - static constexpr size_t stack_size = 256UL * 1024UL; - char stack_data[stack_size]; - - utils::ResourceWithOutOfMemoryException resource_with_exception; - utils::MonotonicBufferResource monotonic_memory{&stack_data[0], stack_size, &resource_with_exception}; - std::optional<utils::PoolResource> pool_memory; - static constexpr auto kMaxBlockPerChunks = 128; - - if (!use_monotonic_memory_) { - pool_memory.emplace(kMaxBlockPerChunks, kExecutionPoolMaxBlockSize, &resource_with_exception, - &resource_with_exception); - } else { - // We can throw on every query because a simple queries for deleting will use only - // the stack allocated buffer. - // Also, we want to throw only when the query engine requests more memory and not the storage - // so we add the exception to the allocator. - // TODO (mferencevic): Tune the parameters accordingly. - pool_memory.emplace(kMaxBlockPerChunks, 1024, &monotonic_memory, &resource_with_exception); - } - - ctx_.evaluation_context.memory = &*pool_memory; - // Returns true if a result was pulled. const auto pull_result = [&]() -> bool { return cursor_->Pull(frame_, ctx_); }; - const auto stream_values = [&]() { - // TODO: The streamed values should also probably use the above memory. - std::vector<TypedValue> values; - values.reserve(output_symbols.size()); - - for (const auto &symbol : output_symbols) { - values.emplace_back(frame_[symbol]); + auto values = std::vector<TypedValue>(output_symbols.size()); + const auto stream_values = [&] { + for (auto const i : ranges::views::iota(0UL, output_symbols.size())) { + values[i] = frame_[output_symbols[i]]; } - stream->Result(values); }; @@ -1910,7 +1916,6 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper, std::function<void()> handler; if (query_upper == "BEGIN") { - ResetInterpreter(); // TODO: Evaluate doing move(extras). Currently the extras is very small, but this will be important if it ever // becomes large. handler = [this, extras = extras] { @@ -1988,30 +1993,6 @@ inline static void TryCaching(const AstStorage &ast_storage, FrameChangeCollecto } } -bool IsLoadCsvQuery(const std::vector<memgraph::query::Clause *> &clauses) { - return std::any_of(clauses.begin(), clauses.end(), - [](memgraph::query::Clause const *clause) { return clause->GetTypeInfo() == LoadCsv::kType; }); -} - -bool IsCallBatchedProcedureQuery(const std::vector<memgraph::query::Clause *> &clauses) { - EvaluationContext evaluation_context; - - return std::ranges::any_of(clauses, [&evaluation_context](memgraph::query::Clause *clause) -> bool { - if (!(clause->GetTypeInfo() == CallProcedure::kType)) return false; - auto *call_procedure_clause = utils::Downcast<CallProcedure>(clause); - - const auto &maybe_found = memgraph::query::procedure::FindProcedure( - procedure::gModuleRegistry, call_procedure_clause->procedure_name_, evaluation_context.memory); - if (!maybe_found) { - throw QueryRuntimeException("There is no procedure named '{}'.", call_procedure_clause->procedure_name_); - } - const auto &[module, proc] = *maybe_found; - if (!proc->info.is_batched) return false; - spdlog::trace("Using PoolResource for batched query procedure"); - return true; - }); -} - PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string, TypedValue> *summary, InterpreterContext *interpreter_context, CurrentDB ¤t_db, utils::MemoryResource *execution_memory, std::vector<Notification> *notifications, @@ -2031,7 +2012,6 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string, spdlog::info("Running query with memory limit of {}", utils::GetReadableSize(*memory_limit)); } auto clauses = cypher_query->single_query_->clauses_; - bool contains_csv = false; if (std::any_of(clauses.begin(), clauses.end(), [](const auto *clause) { return clause->GetTypeInfo() == LoadCsv::kType; })) { notifications->emplace_back( @@ -2039,13 +2019,8 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string, "It's important to note that the parser parses the values as strings. It's up to the user to " "convert the parsed row values to the appropriate type. This can be done using the built-in " "conversion functions such as ToInteger, ToFloat, ToBoolean etc."); - contains_csv = true; } - // If this is LOAD CSV query, use PoolResource without MonotonicMemoryResource as we want to reuse allocated memory - auto use_monotonic_memory = - !contains_csv && !IsCallBatchedProcedureQuery(clauses) && !IsAllShortestPathsQuery(clauses); - MG_ASSERT(current_db.execution_db_accessor_, "Cypher query expects a current DB transaction"); auto *dba = &*current_db @@ -2084,7 +2059,7 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map<std::string, current_db.trigger_context_collector_ ? &*current_db.trigger_context_collector_ : nullptr; auto pull_plan = std::make_shared<PullPlan>( plan, parsed_query.parameters, false, dba, interpreter_context, execution_memory, std::move(user_or_role), - transaction_status, std::move(tx_timer), trigger_context_collector, memory_limit, use_monotonic_memory, + transaction_status, std::move(tx_timer), trigger_context_collector, memory_limit, frame_change_collector->IsTrackingValues() ? frame_change_collector : nullptr); return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges), [pull_plan = std::move(pull_plan), output_symbols = std::move(output_symbols), summary]( @@ -2198,18 +2173,6 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra auto *cypher_query = utils::Downcast<CypherQuery>(parsed_inner_query.query); - bool contains_csv = false; - auto clauses = cypher_query->single_query_->clauses_; - if (std::any_of(clauses.begin(), clauses.end(), - [](const auto *clause) { return clause->GetTypeInfo() == LoadCsv::kType; })) { - contains_csv = true; - } - - // If this is LOAD CSV, BatchedProcedure or AllShortest query, use PoolResource without MonotonicMemoryResource as we - // want to reuse allocated memory - auto use_monotonic_memory = - !contains_csv && !IsCallBatchedProcedureQuery(clauses) && !IsAllShortestPathsQuery(clauses); - MG_ASSERT(cypher_query, "Cypher grammar should not allow other queries in PROFILE"); EvaluationContext evaluation_context; evaluation_context.timestamp = QueryTimestamp(); @@ -2243,14 +2206,14 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra // We want to execute the query we are profiling lazily, so we delay // the construction of the corresponding context. stats_and_total_time = std::optional<plan::ProfilingStatsWithTotalTime>{}, - pull_plan = std::shared_ptr<PullPlanVector>(nullptr), transaction_status, use_monotonic_memory, - frame_change_collector, tx_timer = std::move(tx_timer)]( - AnyStream *stream, std::optional<int> n) mutable -> std::optional<QueryHandlerResult> { + pull_plan = std::shared_ptr<PullPlanVector>(nullptr), transaction_status, frame_change_collector, + tx_timer = std::move(tx_timer)](AnyStream *stream, + std::optional<int> n) mutable -> std::optional<QueryHandlerResult> { // No output symbols are given so that nothing is streamed. if (!stats_and_total_time) { stats_and_total_time = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory, std::move(user_or_role), - transaction_status, std::move(tx_timer), nullptr, memory_limit, use_monotonic_memory, + transaction_status, std::move(tx_timer), nullptr, memory_limit, frame_change_collector->IsTrackingValues() ? frame_change_collector : nullptr) .Pull(stream, {}, {}, summary); pull_plan = std::make_shared<PullPlanVector>(ProfilingStatsToTable(*stats_and_total_time)); @@ -2679,6 +2642,75 @@ PreparedQuery PrepareIndexQuery(ParsedQuery parsed_query, bool in_explicit_trans RWType::W}; } +PreparedQuery PrepareEdgeIndexQuery(ParsedQuery parsed_query, bool in_explicit_transaction, + std::vector<Notification> *notifications, CurrentDB ¤t_db) { + if (in_explicit_transaction) { + throw IndexInMulticommandTxException(); + } + + auto *index_query = utils::Downcast<EdgeIndexQuery>(parsed_query.query); + std::function<void(Notification &)> handler; + + MG_ASSERT(current_db.db_acc_, "Index query expects a current DB"); + auto &db_acc = *current_db.db_acc_; + + MG_ASSERT(current_db.db_transactional_accessor_, "Index query expects a current DB transaction"); + auto *dba = &*current_db.execution_db_accessor_; + + auto invalidate_plan_cache = [plan_cache = db_acc->plan_cache()] { + plan_cache->WithLock([&](auto &cache) { cache.reset(); }); + }; + + auto *storage = db_acc->storage(); + auto edge_type = storage->NameToEdgeType(index_query->edge_type_.name); + + Notification index_notification(SeverityLevel::INFO); + switch (index_query->action_) { + case EdgeIndexQuery::Action::CREATE: { + index_notification.code = NotificationCode::CREATE_INDEX; + index_notification.title = fmt::format("Created index on edge-type {}.", index_query->edge_type_.name); + + handler = [dba, edge_type, label_name = index_query->edge_type_.name, + invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) { + auto maybe_index_error = dba->CreateIndex(edge_type); + utils::OnScopeExit invalidator(invalidate_plan_cache); + + if (maybe_index_error.HasError()) { + index_notification.code = NotificationCode::EXISTENT_INDEX; + index_notification.title = fmt::format("Index on edge-type {} already exists.", label_name); + } + }; + break; + } + case EdgeIndexQuery::Action::DROP: { + index_notification.code = NotificationCode::DROP_INDEX; + index_notification.title = fmt::format("Dropped index on edge-type {}.", index_query->edge_type_.name); + handler = [dba, edge_type, label_name = index_query->edge_type_.name, + invalidate_plan_cache = std::move(invalidate_plan_cache)](Notification &index_notification) { + auto maybe_index_error = dba->DropIndex(edge_type); + utils::OnScopeExit invalidator(invalidate_plan_cache); + + if (maybe_index_error.HasError()) { + index_notification.code = NotificationCode::NONEXISTENT_INDEX; + index_notification.title = fmt::format("Index on edge-type {} doesn't exist.", label_name); + } + }; + break; + } + } + + return PreparedQuery{ + {}, + std::move(parsed_query.required_privileges), + [handler = std::move(handler), notifications, index_notification = std::move(index_notification)]( + AnyStream * /*stream*/, std::optional<int> /*unused*/) mutable { + handler(index_notification); + notifications->push_back(index_notification); + return QueryHandlerResult::COMMIT; + }, + RWType::W}; +} + PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transaction, InterpreterContext *interpreter_context, Interpreter &interpreter) { if (in_explicit_transaction) { @@ -3483,6 +3515,7 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici auto *storage = database->storage(); const std::string_view label_index_mark{"label"}; const std::string_view label_property_index_mark{"label+property"}; + const std::string_view edge_type_index_mark{"edge-type"}; auto info = dba->ListAllIndices(); auto storage_acc = database->Access(); std::vector<std::vector<TypedValue>> results; @@ -3497,6 +3530,10 @@ PreparedQuery PrepareDatabaseInfoQuery(ParsedQuery parsed_query, bool in_explici TypedValue(storage->PropertyToName(item.second)), TypedValue(static_cast<int>(storage_acc->ApproximateVertexCount(item.first, item.second)))}); } + for (const auto &item : info.edge_type) { + results.push_back({TypedValue(edge_type_index_mark), TypedValue(storage->EdgeTypeToName(item)), TypedValue(), + TypedValue(static_cast<int>(storage_acc->ApproximateEdgeCount(item)))}); + } std::sort(results.begin(), results.end(), [&label_index_mark](const auto &record_1, const auto &record_2) { const auto type_1 = record_1[0].ValueString(); const auto type_2 = record_2[0].ValueString(); @@ -4139,6 +4176,7 @@ PreparedQuery PrepareShowDatabasesQuery(ParsedQuery parsed_query, InterpreterCon std::optional<uint64_t> Interpreter::GetTransactionId() const { return current_transaction_; } void Interpreter::BeginTransaction(QueryExtras const &extras) { + ResetInterpreter(); const auto prepared_query = PrepareTransactionQuery("BEGIN", extras); prepared_query.query_handler(nullptr, {}); } @@ -4173,12 +4211,12 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, const auto upper_case_query = utils::ToUpperCase(query_string); const auto trimmed_query = utils::Trim(upper_case_query); if (trimmed_query == "BEGIN" || trimmed_query == "COMMIT" || trimmed_query == "ROLLBACK") { - auto resource = utils::MonotonicBufferResource(kExecutionMemoryBlockSize); - auto prepared_query = PrepareTransactionQuery(trimmed_query, extras); - auto &query_execution = - query_executions_.emplace_back(QueryExecution::Create(std::move(resource), std::move(prepared_query))); - std::optional<int> qid = - in_explicit_transaction_ ? static_cast<int>(query_executions_.size() - 1) : std::optional<int>{}; + if (trimmed_query == "BEGIN") { + ResetInterpreter(); + } + auto &query_execution = query_executions_.emplace_back(QueryExecution::Create()); + query_execution->prepared_query = PrepareTransactionQuery(trimmed_query, extras); + auto qid = in_explicit_transaction_ ? static_cast<int>(query_executions_.size() - 1) : std::optional<int>{}; return {query_execution->prepared_query->header, query_execution->prepared_query->privileges, qid, {}}; } @@ -4208,35 +4246,8 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, ParseQuery(query_string, params, &interpreter_context_->ast_cache, interpreter_context_->config.query); auto parsing_time = parsing_timer.Elapsed().count(); - CypherQuery const *const cypher_query = [&]() -> CypherQuery * { - if (auto *cypher_query = utils::Downcast<CypherQuery>(parsed_query.query)) { - return cypher_query; - } - if (auto *profile_query = utils::Downcast<ProfileQuery>(parsed_query.query)) { - return profile_query->cypher_query_; - } - return nullptr; - }(); // IILE - - auto const [usePool, hasAllShortestPaths] = [&]() -> std::pair<bool, bool> { - if (!cypher_query) { - return {false, false}; - } - auto const &clauses = cypher_query->single_query_->clauses_; - bool hasAllShortestPaths = IsAllShortestPathsQuery(clauses); - // Using PoolResource without MonotonicMemoryResouce for LOAD CSV reduces memory usage. - bool usePool = hasAllShortestPaths || IsCallBatchedProcedureQuery(clauses) || IsLoadCsvQuery(clauses); - return {usePool, hasAllShortestPaths}; - }(); // IILE - // Setup QueryExecution - // its MemoryResource is mostly used for allocations done on Frame and storing `row`s - if (usePool) { - query_executions_.emplace_back(QueryExecution::Create(utils::PoolResource(128, kExecutionPoolMaxBlockSize))); - } else { - query_executions_.emplace_back(QueryExecution::Create(utils::MonotonicBufferResource(kExecutionMemoryBlockSize))); - } - + query_executions_.emplace_back(QueryExecution::Create()); auto &query_execution = query_executions_.back(); query_execution_ptr = &query_execution; @@ -4283,13 +4294,14 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, utils::Downcast<CypherQuery>(parsed_query.query) || utils::Downcast<ExplainQuery>(parsed_query.query) || utils::Downcast<ProfileQuery>(parsed_query.query) || utils::Downcast<DumpQuery>(parsed_query.query) || utils::Downcast<TriggerQuery>(parsed_query.query) || utils::Downcast<AnalyzeGraphQuery>(parsed_query.query) || - utils::Downcast<IndexQuery>(parsed_query.query) || utils::Downcast<DatabaseInfoQuery>(parsed_query.query) || - utils::Downcast<ConstraintQuery>(parsed_query.query); + utils::Downcast<IndexQuery>(parsed_query.query) || utils::Downcast<EdgeIndexQuery>(parsed_query.query) || + utils::Downcast<DatabaseInfoQuery>(parsed_query.query) || utils::Downcast<ConstraintQuery>(parsed_query.query); if (!in_explicit_transaction_ && requires_db_transaction) { // TODO: ATM only a single database, will change when we have multiple database transactions bool could_commit = utils::Downcast<CypherQuery>(parsed_query.query) != nullptr; bool unique = utils::Downcast<IndexQuery>(parsed_query.query) != nullptr || + utils::Downcast<EdgeIndexQuery>(parsed_query.query) != nullptr || utils::Downcast<ConstraintQuery>(parsed_query.query) != nullptr || upper_case_query.find(kSchemaAssert) != std::string::npos; SetupDatabaseTransaction(could_commit, unique); @@ -4304,9 +4316,7 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, utils::Timer planning_timer; PreparedQuery prepared_query; - utils::MemoryResource *memory_resource = - std::visit([](auto &execution_memory) -> utils::MemoryResource * { return &execution_memory; }, - query_execution->execution_memory); + utils::MemoryResource *memory_resource = query_execution->execution_memory.resource(); frame_change_collector_.reset(); frame_change_collector_.emplace(); if (utils::Downcast<CypherQuery>(parsed_query.query)) { @@ -4317,15 +4327,18 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, prepared_query = PrepareExplainQuery(std::move(parsed_query), &query_execution->summary, &query_execution->notifications, interpreter_context_, current_db_); } else if (utils::Downcast<ProfileQuery>(parsed_query.query)) { - prepared_query = PrepareProfileQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->summary, - &query_execution->notifications, interpreter_context_, current_db_, - &query_execution->execution_memory_with_exception, user_or_role_, - &transaction_status_, current_timeout_timer_, &*frame_change_collector_); + prepared_query = + PrepareProfileQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->summary, + &query_execution->notifications, interpreter_context_, current_db_, memory_resource, + user_or_role_, &transaction_status_, current_timeout_timer_, &*frame_change_collector_); } else if (utils::Downcast<DumpQuery>(parsed_query.query)) { prepared_query = PrepareDumpQuery(std::move(parsed_query), current_db_); } else if (utils::Downcast<IndexQuery>(parsed_query.query)) { prepared_query = PrepareIndexQuery(std::move(parsed_query), in_explicit_transaction_, &query_execution->notifications, current_db_); + } else if (utils::Downcast<EdgeIndexQuery>(parsed_query.query)) { + prepared_query = PrepareEdgeIndexQuery(std::move(parsed_query), in_explicit_transaction_, + &query_execution->notifications, current_db_); } else if (utils::Downcast<AnalyzeGraphQuery>(parsed_query.query)) { prepared_query = PrepareAnalyzeGraphQuery(std::move(parsed_query), in_explicit_transaction_, current_db_); } else if (utils::Downcast<AuthQuery>(parsed_query.query)) { @@ -4519,7 +4532,7 @@ void RunTriggersAfterCommit(dbms::DatabaseAccess db_acc, InterpreterContext *int std::atomic<TransactionStatus> *transaction_status) { // Run the triggers for (const auto &trigger : db_acc->trigger_store()->AfterCommitTriggers().access()) { - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + QueryAllocator execution_memory{}; // create a new transaction for each trigger auto tx_acc = db_acc->Access(); @@ -4530,7 +4543,7 @@ void RunTriggersAfterCommit(dbms::DatabaseAccess db_acc, InterpreterContext *int auto trigger_context = original_trigger_context; trigger_context.AdaptForAccessor(&db_accessor); try { - trigger.Execute(&db_accessor, &execution_memory, flags::run_time::GetExecutionTimeout(), + trigger.Execute(&db_accessor, execution_memory.resource(), flags::run_time::GetExecutionTimeout(), &interpreter_context->is_shutting_down, transaction_status, trigger_context); } catch (const utils::BasicException &exception) { spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.Name(), exception.what()); @@ -4684,11 +4697,12 @@ void Interpreter::Commit() { if (trigger_context) { // Run the triggers for (const auto &trigger : db->trigger_store()->BeforeCommitTriggers().access()) { - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + QueryAllocator execution_memory{}; AdvanceCommand(); try { - trigger.Execute(&*current_db_.execution_db_accessor_, &execution_memory, flags::run_time::GetExecutionTimeout(), - &interpreter_context_->is_shutting_down, &transaction_status_, *trigger_context); + trigger.Execute(&*current_db_.execution_db_accessor_, execution_memory.resource(), + flags::run_time::GetExecutionTimeout(), &interpreter_context_->is_shutting_down, + &transaction_status_, *trigger_context); } catch (const utils::BasicException &e) { throw utils::BasicException( fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.Name(), e.what())); diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 01a443d6d..5366b4472 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -65,6 +65,54 @@ extern const Event SuccessfulQuery; namespace memgraph::query { +struct QueryAllocator { + QueryAllocator() = default; + QueryAllocator(QueryAllocator const &) = delete; + QueryAllocator &operator=(QueryAllocator const &) = delete; + + // No move addresses to pool & monotonic fields must be stable + QueryAllocator(QueryAllocator &&) = delete; + QueryAllocator &operator=(QueryAllocator &&) = delete; + + auto resource() -> utils::MemoryResource * { +#ifndef MG_MEMORY_PROFILE + return &pool; +#else + return upstream_resource(); +#endif + } + auto resource_without_pool() -> utils::MemoryResource * { +#ifndef MG_MEMORY_PROFILE + return &monotonic; +#else + return upstream_resource(); +#endif + } + auto resource_without_pool_or_mono() -> utils::MemoryResource * { return upstream_resource(); } + + private: + // At least one page to ensure not sharing page with other subsystems + static constexpr auto kMonotonicInitialSize = 4UL * 1024UL; + // TODO: need to profile to check for good defaults, also maybe PoolResource + // needs to be smarter. We expect more reuse of smaller objects than larger + // objects. 64*1024B is maybe wasteful, whereas 256*32B maybe sensible. + // Depends on number of small objects expected. + static constexpr auto kPoolBlockPerChunk = 64UL; + static constexpr auto kPoolMaxBlockSize = 1024UL; + + static auto upstream_resource() -> utils::MemoryResource * { + // singleton ResourceWithOutOfMemoryException + // explicitly backed by NewDeleteResource + static auto upstream = utils::ResourceWithOutOfMemoryException{utils::NewDeleteResource()}; + return &upstream; + } + +#ifndef MG_MEMORY_PROFILE + memgraph::utils::MonotonicBufferResource monotonic{kMonotonicInitialSize, upstream_resource()}; + memgraph::utils::PoolResource pool{kPoolBlockPerChunk, &monotonic, upstream_resource()}; +#endif +}; + struct InterpreterContext; inline constexpr size_t kExecutionMemoryBlockSize = 1UL * 1024UL * 1024UL; @@ -95,25 +143,24 @@ class CoordinatorQueryHandler { }; /// @throw QueryRuntimeException if an error ocurred. - virtual void RegisterReplicationInstance(std::string const &coordinator_socket_address, - std::string const &replication_socket_address, + virtual void RegisterReplicationInstance(std::string_view coordinator_socket_address, + std::string_view replication_socket_address, std::chrono::seconds const &instance_health_check_frequency, std::chrono::seconds const &instance_down_timeout, std::chrono::seconds const &instance_get_uuid_frequency, - std::string const &instance_name, CoordinatorQuery::SyncMode sync_mode) = 0; + std::string_view instance_name, CoordinatorQuery::SyncMode sync_mode) = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual void UnregisterInstance(std::string const &instance_name) = 0; + virtual void UnregisterInstance(std::string_view instance_name) = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual void SetReplicationInstanceToMain(const std::string &instance_name) = 0; + virtual void SetReplicationInstanceToMain(std::string_view instance_name) = 0; /// @throw QueryRuntimeException if an error ocurred. virtual std::vector<coordination::InstanceStatus> ShowInstances() const = 0; /// @throw QueryRuntimeException if an error ocurred. - virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string const &coordinator_socket_address) - -> void = 0; + virtual auto AddCoordinatorInstance(uint32_t raft_server_id, std::string_view coordinator_socket_address) -> void = 0; }; #endif @@ -305,45 +352,25 @@ class Interpreter final { } struct QueryExecution { - std::variant<utils::MonotonicBufferResource, utils::PoolResource> execution_memory; - utils::ResourceWithOutOfMemoryException execution_memory_with_exception; - std::optional<PreparedQuery> prepared_query; + QueryAllocator execution_memory; // NOTE: before all other fields which uses this memory + std::optional<PreparedQuery> prepared_query; std::map<std::string, TypedValue> summary; std::vector<Notification> notifications; - static auto Create(std::variant<utils::MonotonicBufferResource, utils::PoolResource> memory_resource, - std::optional<PreparedQuery> prepared_query = std::nullopt) -> std::unique_ptr<QueryExecution> { - return std::make_unique<QueryExecution>(std::move(memory_resource), std::move(prepared_query)); - } + static auto Create() -> std::unique_ptr<QueryExecution> { return std::make_unique<QueryExecution>(); } - explicit QueryExecution(std::variant<utils::MonotonicBufferResource, utils::PoolResource> memory_resource, - std::optional<PreparedQuery> prepared_query) - : execution_memory(std::move(memory_resource)), prepared_query{std::move(prepared_query)} { - std::visit( - [&](auto &memory_resource) { - execution_memory_with_exception = utils::ResourceWithOutOfMemoryException(&memory_resource); - }, - execution_memory); - }; + explicit QueryExecution() = default; QueryExecution(const QueryExecution &) = delete; - QueryExecution(QueryExecution &&) = default; + QueryExecution(QueryExecution &&) = delete; QueryExecution &operator=(const QueryExecution &) = delete; - QueryExecution &operator=(QueryExecution &&) = default; + QueryExecution &operator=(QueryExecution &&) = delete; - ~QueryExecution() { - // We should always release the execution memory AFTER we - // destroy the prepared query which is using that instance - // of execution memory. - prepared_query.reset(); - std::visit([](auto &memory_resource) { memory_resource.Release(); }, execution_memory); - } + ~QueryExecution() = default; void CleanRuntimeData() { - if (prepared_query.has_value()) { - prepared_query.reset(); - } + prepared_query.reset(); notifications.clear(); } }; @@ -414,9 +441,7 @@ std::map<std::string, TypedValue> Interpreter::Pull(TStream *result_stream, std: try { // Wrap the (statically polymorphic) stream type into a common type which // the handler knows. - AnyStream stream{result_stream, - std::visit([](auto &execution_memory) -> utils::MemoryResource * { return &execution_memory; }, - query_execution->execution_memory)}; + AnyStream stream{result_stream, query_execution->execution_memory.resource()}; const auto maybe_res = query_execution->prepared_query->query_handler(&stream, n); // Stream is using execution memory of the query_execution which // can be deleted after its execution so the stream should be cleared diff --git a/src/query/metadata.cpp b/src/query/metadata.cpp index e339aad57..af3b8d15f 100644 --- a/src/query/metadata.cpp +++ b/src/query/metadata.cpp @@ -67,8 +67,8 @@ constexpr std::string_view GetCodeString(const NotificationCode code) { case NotificationCode::REGISTER_REPLICA: return "RegisterReplica"sv; #ifdef MG_ENTERPRISE - case NotificationCode::REGISTER_COORDINATOR_SERVER: - return "RegisterCoordinatorServer"sv; + case NotificationCode::REGISTER_REPLICATION_INSTANCE: + return "RegisterReplicationInstance"sv; case NotificationCode::ADD_COORDINATOR_INSTANCE: return "AddCoordinatorInstance"sv; case NotificationCode::UNREGISTER_INSTANCE: diff --git a/src/query/metadata.hpp b/src/query/metadata.hpp index dd8c2db07..fba672f4b 100644 --- a/src/query/metadata.hpp +++ b/src/query/metadata.hpp @@ -43,7 +43,7 @@ enum class NotificationCode : uint8_t { REPLICA_PORT_WARNING, REGISTER_REPLICA, #ifdef MG_ENTERPRISE - REGISTER_COORDINATOR_SERVER, // TODO: (andi) What is this? + REGISTER_REPLICATION_INSTANCE, ADD_COORDINATOR_INSTANCE, UNREGISTER_INSTANCE, #endif diff --git a/src/query/plan/hint_provider.hpp b/src/query/plan/hint_provider.hpp index 74dde2f46..3c8510561 100644 --- a/src/query/plan/hint_provider.hpp +++ b/src/query/plan/hint_provider.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -114,6 +114,9 @@ class PlanHintsProvider final : public HierarchicalLogicalOperatorVisitor { bool PreVisit(ScanAllById & /*unused*/) override { return true; } bool PostVisit(ScanAllById & /*unused*/) override { return true; } + bool PreVisit(ScanAllByEdgeType & /*unused*/) override { return true; } + bool PostVisit(ScanAllByEdgeType & /*unused*/) override { return true; } + bool PreVisit(ConstructNamedPath & /*unused*/) override { return true; } bool PostVisit(ConstructNamedPath & /*unused*/) override { return true; } @@ -206,6 +209,14 @@ class PlanHintsProvider final : public HierarchicalLogicalOperatorVisitor { bool PostVisit(IndexedJoin & /*unused*/) override { return true; } + bool PreVisit(RollUpApply &op) override { + op.input()->Accept(*this); + op.list_collection_branch_->Accept(*this); + return false; + } + + bool PostVisit(RollUpApply & /*unused*/) override { return true; } + private: const SymbolTable &symbol_table_; std::vector<std::string> hints_; diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 75b531261..8e1b9f529 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -47,6 +47,8 @@ #include "query/procedure/mg_procedure_impl.hpp" #include "query/procedure/module.hpp" #include "query/typed_value.hpp" +#include "range/v3/all.hpp" +#include "storage/v2/id_types.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/view.hpp" #include "utils/algorithm.hpp" @@ -67,6 +69,7 @@ #include "utils/pmr/vector.hpp" #include "utils/readable_size.hpp" #include "utils/string.hpp" +#include "utils/tag.hpp" #include "utils/temporal.hpp" #include "utils/typeinfo.hpp" @@ -105,6 +108,7 @@ extern const Event ScanAllByLabelPropertyRangeOperator; extern const Event ScanAllByLabelPropertyValueOperator; extern const Event ScanAllByLabelPropertyOperator; extern const Event ScanAllByIdOperator; +extern const Event ScanAllByEdgeTypeOperator; extern const Event ExpandOperator; extern const Event ExpandVariableOperator; extern const Event ConstructNamedPathOperator; @@ -177,6 +181,20 @@ inline void AbortCheck(ExecutionContext const &context) { if (auto const reason = MustAbort(context); reason != AbortReason::NO_ABORT) throw HintedAbortError(reason); } +std::vector<storage::LabelId> EvaluateLabels(const std::vector<StorageLabelType> &labels, + ExpressionEvaluator &evaluator, DbAccessor *dba) { + std::vector<storage::LabelId> result; + result.reserve(labels.size()); + for (const auto &label : labels) { + if (const auto *label_atom = std::get_if<storage::LabelId>(&label)) { + result.emplace_back(*label_atom); + } else { + result.emplace_back(dba->NameToLabel(std::get<Expression *>(label)->Accept(evaluator).ValueString())); + } + } + return result; +} + } // namespace // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) @@ -212,12 +230,13 @@ CreateNode::CreateNode(const std::shared_ptr<LogicalOperator> &input, NodeCreati // Creates a vertex on this GraphDb. Returns a reference to vertex placed on the // frame. -VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *frame, ExecutionContext &context) { +VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *frame, ExecutionContext &context, + std::vector<storage::LabelId> &labels, ExpressionEvaluator &evaluator) { auto &dba = *context.db_accessor; auto new_node = dba.InsertVertex(); context.execution_stats[ExecutionStats::Key::CREATED_NODES] += 1; - for (auto label : node_info.labels) { - auto maybe_error = new_node.AddLabel(label); + for (const auto &label : labels) { + auto maybe_error = std::invoke([&] { return new_node.AddLabel(label); }); if (maybe_error.HasError()) { switch (maybe_error.GetError()) { case storage::Error::SERIALIZATION_ERROR: @@ -232,10 +251,6 @@ VertexAccessor &CreateLocalVertex(const NodeCreationInfo &node_info, Frame *fram } context.execution_stats[ExecutionStats::Key::CREATED_LABELS] += 1; } - // Evaluator should use the latest accessors, as modified in this query, when - // setting properties on new nodes. - ExpressionEvaluator evaluator(frame, context.symbol_table, context.evaluation_context, context.db_accessor, - storage::View::NEW); // TODO: PropsSetChecked allocates a PropertyValue, make it use context.memory // when we update PropertyValue with custom allocator. std::map<storage::PropertyId, storage::PropertyValue> properties; @@ -275,16 +290,21 @@ CreateNode::CreateNodeCursor::CreateNodeCursor(const CreateNode &self, utils::Me bool CreateNode::CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) { OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP("CreateNode"); -#ifdef MG_ENTERPRISE - if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && - !context.auth_checker->Has(self_.node_info_.labels, - memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { - throw QueryRuntimeException("Vertex not created due to not having enough permission!"); - } -#endif + ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); if (input_cursor_->Pull(frame, context)) { - auto created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); + // we have to resolve the labels before we can check for permissions + auto labels = EvaluateLabels(self_.node_info_.labels, evaluator, context.db_accessor); + +#ifdef MG_ENTERPRISE + if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && + !context.auth_checker->Has(labels, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { + throw QueryRuntimeException("Vertex not created due to not having enough permission!"); + } +#endif + + auto created_vertex = CreateLocalVertex(self_.node_info_, &frame, context, labels, evaluator); if (context.trigger_context_collector) { context.trigger_context_collector->RegisterCreatedObject(created_vertex); } @@ -368,6 +388,9 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont SCOPED_PROFILE_OP_BY_REF(self_); if (!input_cursor_->Pull(frame, context)) return false; + ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); + auto labels = EvaluateLabels(self_.node_info_.labels, evaluator, context.db_accessor); #ifdef MG_ENTERPRISE if (license::global_license_checker.IsEnterpriseValidFast()) { @@ -379,7 +402,7 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont if (context.auth_checker && !(context.auth_checker->Has(self_.edge_info_.edge_type, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE) && - context.auth_checker->Has(self_.node_info_.labels, fine_grained_permission))) { + context.auth_checker->Has(labels, fine_grained_permission))) { throw QueryRuntimeException("Edge not created due to not having enough permission!"); } } @@ -389,14 +412,8 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex); auto &v1 = vertex_value.ValueVertex(); - // Similarly to CreateNode, newly created edges and nodes should use the - // storage::View::NEW. - // E.g. we pickup new properties: `CREATE (n {p: 42}) -[:r {ep: n.p}]-> ()` - ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, - storage::View::NEW); - // get the destination vertex (possibly an existing node) - auto &v2 = OtherVertex(frame, context); + auto &v2 = OtherVertex(frame, context, labels, evaluator); // create an edge between the two nodes auto *dba = context.db_accessor; @@ -427,13 +444,15 @@ void CreateExpand::CreateExpandCursor::Shutdown() { input_cursor_->Shutdown(); } void CreateExpand::CreateExpandCursor::Reset() { input_cursor_->Reset(); } -VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(Frame &frame, ExecutionContext &context) { +VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(Frame &frame, ExecutionContext &context, + std::vector<storage::LabelId> &labels, + ExpressionEvaluator &evaluator) { if (self_.existing_node_) { TypedValue &dest_node_value = frame[self_.node_info_.symbol]; ExpectType(self_.node_info_.symbol, dest_node_value, TypedValue::Type::Vertex); return dest_node_value.ValueVertex(); } else { - auto &created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); + auto &created_vertex = CreateLocalVertex(self_.node_info_, &frame, context, labels, evaluator); if (context.trigger_context_collector) { context.trigger_context_collector->RegisterCreatedObject(created_vertex); } @@ -517,6 +536,60 @@ class ScanAllCursor : public Cursor { const char *op_name_; }; +template <typename TEdgesFun> +class ScanAllByEdgeTypeCursor : public Cursor { + public: + explicit ScanAllByEdgeTypeCursor(const ScanAllByEdgeType &self, Symbol output_symbol, UniqueCursorPtr input_cursor, + storage::View view, TEdgesFun get_edges, const char *op_name) + : self_(self), + output_symbol_(std::move(output_symbol)), + input_cursor_(std::move(input_cursor)), + view_(view), + get_edges_(std::move(get_edges)), + op_name_(op_name) {} + + bool Pull(Frame &frame, ExecutionContext &context) override { + OOMExceptionEnabler oom_exception; + SCOPED_PROFILE_OP_BY_REF(self_); + + AbortCheck(context); + + while (!vertices_ || vertices_it_.value() == vertices_end_it_.value()) { + if (!input_cursor_->Pull(frame, context)) return false; + auto next_vertices = get_edges_(frame, context); + if (!next_vertices) continue; + + vertices_.emplace(std::move(next_vertices.value())); + vertices_it_.emplace(vertices_.value().begin()); + vertices_end_it_.emplace(vertices_.value().end()); + } + + frame[output_symbol_] = *vertices_it_.value(); + ++vertices_it_.value(); + return true; + } + + void Shutdown() override { input_cursor_->Shutdown(); } + + void Reset() override { + input_cursor_->Reset(); + vertices_ = std::nullopt; + vertices_it_ = std::nullopt; + vertices_end_it_ = std::nullopt; + } + + private: + const ScanAllByEdgeType &self_; + const Symbol output_symbol_; + const UniqueCursorPtr input_cursor_; + storage::View view_; + TEdgesFun get_edges_; + std::optional<typename std::result_of<TEdgesFun(Frame &, ExecutionContext &)>::type::value_type> vertices_; + std::optional<decltype(vertices_.value().begin())> vertices_it_; + std::optional<decltype(vertices_.value().end())> vertices_end_it_; + const char *op_name_; +}; + ScanAll::ScanAll(const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol, storage::View view) : input_(input ? input : std::make_shared<Once>()), output_symbol_(std::move(output_symbol)), view_(view) {} @@ -556,6 +629,33 @@ UniqueCursorPtr ScanAllByLabel::MakeCursor(utils::MemoryResource *mem) const { view_, std::move(vertices), "ScanAllByLabel"); } +ScanAllByEdgeType::ScanAllByEdgeType(const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol, + storage::EdgeTypeId edge_type, storage::View view) + : input_(input ? input : std::make_shared<Once>()), + output_symbol_(std::move(output_symbol)), + view_(view), + edge_type_(edge_type) {} + +ACCEPT_WITH_INPUT(ScanAllByEdgeType) + +UniqueCursorPtr ScanAllByEdgeType::MakeCursor(utils::MemoryResource *mem) const { + memgraph::metrics::IncrementCounter(memgraph::metrics::ScanAllByEdgeTypeOperator); + + auto edges = [this](Frame &, ExecutionContext &context) { + auto *db = context.db_accessor; + return std::make_optional(db->Edges(view_, edge_type_)); + }; + + return MakeUniqueCursorPtr<ScanAllByEdgeTypeCursor<decltype(edges)>>( + mem, *this, output_symbol_, input_->MakeCursor(mem), view_, std::move(edges), "ScanAllByEdgeType"); +} + +std::vector<Symbol> ScanAllByEdgeType::ModifiedSymbols(const SymbolTable &table) const { + auto symbols = input_->ModifiedSymbols(table); + symbols.emplace_back(output_symbol_); + return symbols; +} + // TODO(buda): Implement ScanAllByLabelProperty operator to iterate over // vertices that have the label and some value for the given property. @@ -765,17 +865,15 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { SCOPED_PROFILE_OP_BY_REF(self_); // A helper function for expanding a node from an edge. - auto pull_node = [this, &frame](const EdgeAccessor &new_edge, EdgeAtom::Direction direction) { + auto pull_node = [this, &frame]<EdgeAtom::Direction direction>(const EdgeAccessor &new_edge, + utils::tag_value<direction>) { if (self_.common_.existing_node) return; - switch (direction) { - case EdgeAtom::Direction::IN: - frame[self_.common_.node_symbol] = new_edge.From(); - break; - case EdgeAtom::Direction::OUT: - frame[self_.common_.node_symbol] = new_edge.To(); - break; - case EdgeAtom::Direction::BOTH: - LOG_FATAL("Must indicate exact expansion direction here"); + if constexpr (direction == EdgeAtom::Direction::IN) { + frame[self_.common_.node_symbol] = new_edge.From(); + } else if constexpr (direction == EdgeAtom::Direction::OUT) { + frame[self_.common_.node_symbol] = new_edge.To(); + } else { + LOG_FATAL("Must indicate exact expansion direction here"); } }; @@ -794,7 +892,7 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { #endif frame[self_.common_.edge_symbol] = edge; - pull_node(edge, EdgeAtom::Direction::IN); + pull_node(edge, utils::tag_v<EdgeAtom::Direction::IN>); return true; } @@ -814,7 +912,7 @@ bool Expand::ExpandCursor::Pull(Frame &frame, ExecutionContext &context) { } #endif frame[self_.common_.edge_symbol] = edge; - pull_node(edge, EdgeAtom::Direction::OUT); + pull_node(edge, utils::tag_v<EdgeAtom::Direction::OUT>); return true; } @@ -908,12 +1006,12 @@ bool Expand::ExpandCursor::InitEdges(Frame &frame, ExecutionContext &context) { auto existing_node = *expansion_info_.existing_node; auto edges_result = UnwrapEdgesResult(vertex.InEdges(self_.view_, self_.common_.edge_types, existing_node)); - in_edges_.emplace(edges_result.edges); + in_edges_.emplace(std::move(edges_result.edges)); num_expanded_first = edges_result.expanded_count; } } else { auto edges_result = UnwrapEdgesResult(vertex.InEdges(self_.view_, self_.common_.edge_types)); - in_edges_.emplace(edges_result.edges); + in_edges_.emplace(std::move(edges_result.edges)); num_expanded_first = edges_result.expanded_count; } if (in_edges_) { @@ -927,12 +1025,12 @@ bool Expand::ExpandCursor::InitEdges(Frame &frame, ExecutionContext &context) { if (expansion_info_.existing_node) { auto existing_node = *expansion_info_.existing_node; auto edges_result = UnwrapEdgesResult(vertex.OutEdges(self_.view_, self_.common_.edge_types, existing_node)); - out_edges_.emplace(edges_result.edges); + out_edges_.emplace(std::move(edges_result.edges)); num_expanded_second = edges_result.expanded_count; } } else { auto edges_result = UnwrapEdgesResult(vertex.OutEdges(self_.view_, self_.common_.edge_types)); - out_edges_.emplace(edges_result.edges); + out_edges_.emplace(std::move(edges_result.edges)); num_expanded_second = edges_result.expanded_count; } if (out_edges_) { @@ -1018,14 +1116,14 @@ auto ExpandFromVertex(const VertexAccessor &vertex, EdgeAtom::Direction directio if (direction != EdgeAtom::Direction::OUT) { auto edges = UnwrapEdgesResult(vertex.InEdges(view, edge_types)).edges; - if (edges.begin() != edges.end()) { + if (!edges.empty()) { chain_elements.emplace_back(wrapper(EdgeAtom::Direction::IN, std::move(edges))); } } if (direction != EdgeAtom::Direction::IN) { auto edges = UnwrapEdgesResult(vertex.OutEdges(view, edge_types)).edges; - if (edges.begin() != edges.end()) { + if (!edges.empty()) { chain_elements.emplace_back(wrapper(EdgeAtom::Direction::OUT, std::move(edges))); } } @@ -1145,8 +1243,13 @@ class ExpandVariableCursor : public Cursor { } // reset the frame value to an empty edge list - auto *pull_memory = context.evaluation_context.memory; - frame[self_.common_.edge_symbol] = TypedValue::TVector(pull_memory); + if (frame[self_.common_.edge_symbol].IsList()) { + // Preserve the list capacity if possible + frame[self_.common_.edge_symbol].ValueList().clear(); + } else { + auto *pull_memory = context.evaluation_context.memory; + frame[self_.common_.edge_symbol] = TypedValue::TVector(pull_memory); + } return true; } @@ -3125,8 +3228,8 @@ void SetProperties::SetPropertiesCursor::Shutdown() { input_cursor_->Shutdown(); void SetProperties::SetPropertiesCursor::Reset() { input_cursor_->Reset(); } SetLabels::SetLabels(const std::shared_ptr<LogicalOperator> &input, Symbol input_symbol, - const std::vector<storage::LabelId> &labels) - : input_(input), input_symbol_(std::move(input_symbol)), labels_(labels) {} + std::vector<StorageLabelType> labels) + : input_(input), input_symbol_(std::move(input_symbol)), labels_(std::move(labels)) {} ACCEPT_WITH_INPUT(SetLabels) @@ -3146,16 +3249,18 @@ SetLabels::SetLabelsCursor::SetLabelsCursor(const SetLabels &self, utils::Memory bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP("SetLabels"); + ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); + if (!input_cursor_->Pull(frame, context)) return false; + auto labels = EvaluateLabels(self_.labels_, evaluator, context.db_accessor); #ifdef MG_ENTERPRISE if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && - !context.auth_checker->Has(self_.labels_, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { + !context.auth_checker->Has(labels, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { throw QueryRuntimeException("Couldn't set label due to not having enough permission!"); } #endif - if (!input_cursor_->Pull(frame, context)) return false; - TypedValue &vertex_value = frame[self_.input_symbol_]; // Skip setting labels on Null (can occur in optional match). if (vertex_value.IsNull()) return true; @@ -3170,7 +3275,7 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { } #endif - for (auto label : self_.labels_) { + for (auto label : labels) { auto maybe_value = vertex.AddLabel(label); if (maybe_value.HasError()) { switch (maybe_value.GetError()) { @@ -3285,8 +3390,8 @@ void RemoveProperty::RemovePropertyCursor::Shutdown() { input_cursor_->Shutdown( void RemoveProperty::RemovePropertyCursor::Reset() { input_cursor_->Reset(); } RemoveLabels::RemoveLabels(const std::shared_ptr<LogicalOperator> &input, Symbol input_symbol, - const std::vector<storage::LabelId> &labels) - : input_(input), input_symbol_(std::move(input_symbol)), labels_(labels) {} + std::vector<StorageLabelType> labels) + : input_(input), input_symbol_(std::move(input_symbol)), labels_(std::move(labels)) {} ACCEPT_WITH_INPUT(RemoveLabels) @@ -3306,16 +3411,18 @@ RemoveLabels::RemoveLabelsCursor::RemoveLabelsCursor(const RemoveLabels &self, u bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP("RemoveLabels"); + ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, + storage::View::NEW); + if (!input_cursor_->Pull(frame, context)) return false; + auto labels = EvaluateLabels(self_.labels_, evaluator, context.db_accessor); #ifdef MG_ENTERPRISE if (license::global_license_checker.IsEnterpriseValidFast() && context.auth_checker && - !context.auth_checker->Has(self_.labels_, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { + !context.auth_checker->Has(labels, memgraph::query::AuthQuery::FineGrainedPrivilege::CREATE_DELETE)) { throw QueryRuntimeException("Couldn't remove label due to not having enough permission!"); } #endif - if (!input_cursor_->Pull(frame, context)) return false; - TypedValue &vertex_value = frame[self_.input_symbol_]; // Skip removing labels on Null (can occur in optional match). if (vertex_value.IsNull()) return true; @@ -3330,7 +3437,7 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont } #endif - for (auto label : self_.labels_) { + for (auto label : labels) { auto maybe_value = vertex.RemoveLabel(label); if (maybe_value.HasError()) { switch (maybe_value.GetError()) { @@ -4065,14 +4172,14 @@ OrderBy::OrderBy(const std::shared_ptr<LogicalOperator> &input, const std::vecto const std::vector<Symbol> &output_symbols) : input_(input), output_symbols_(output_symbols) { // split the order_by vector into two vectors of orderings and expressions - std::vector<Ordering> ordering; + std::vector<OrderedTypedValueCompare> ordering; ordering.reserve(order_by.size()); order_by_.reserve(order_by.size()); for (const auto &ordering_expression_pair : order_by) { ordering.emplace_back(ordering_expression_pair.ordering); order_by_.emplace_back(ordering_expression_pair.expression); } - compare_ = TypedValueVectorCompare(ordering); + compare_ = TypedValueVectorCompare(std::move(ordering)); } ACCEPT_WITH_INPUT(OrderBy) @@ -4093,29 +4200,43 @@ class OrderByCursor : public Cursor { OOMExceptionEnabler oom_exception; SCOPED_PROFILE_OP_BY_REF(self_); - if (!did_pull_all_) { + if (!did_pull_all_) [[unlikely]] { ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, storage::View::OLD); - auto *mem = cache_.get_allocator().GetMemoryResource(); + auto *pull_mem = context.evaluation_context.memory; + auto *query_mem = cache_.get_allocator().GetMemoryResource(); + + utils::pmr::vector<utils::pmr::vector<TypedValue>> order_by(pull_mem); // Not cached, pull memory + utils::pmr::vector<utils::pmr::vector<TypedValue>> output(query_mem); // Cached, query memory + while (input_cursor_->Pull(frame, context)) { // collect the order_by elements - utils::pmr::vector<TypedValue> order_by(mem); - order_by.reserve(self_.order_by_.size()); - for (auto expression_ptr : self_.order_by_) { - order_by.emplace_back(expression_ptr->Accept(evaluator)); + utils::pmr::vector<TypedValue> order_by_elem(pull_mem); + order_by_elem.reserve(self_.order_by_.size()); + for (auto const &expression_ptr : self_.order_by_) { + order_by_elem.emplace_back(expression_ptr->Accept(evaluator)); } + order_by.emplace_back(std::move(order_by_elem)); // collect the output elements - utils::pmr::vector<TypedValue> output(mem); - output.reserve(self_.output_symbols_.size()); - for (const Symbol &output_sym : self_.output_symbols_) output.emplace_back(frame[output_sym]); - - cache_.push_back(Element{std::move(order_by), std::move(output)}); + utils::pmr::vector<TypedValue> output_elem(query_mem); + output_elem.reserve(self_.output_symbols_.size()); + for (const Symbol &output_sym : self_.output_symbols_) { + output_elem.emplace_back(frame[output_sym]); + } + output.emplace_back(std::move(output_elem)); } - std::sort(cache_.begin(), cache_.end(), [this](const auto &pair1, const auto &pair2) { - return self_.compare_(pair1.order_by, pair2.order_by); - }); + // sorting with range zip + // we compare on just the projection of the 1st range (order_by) + // this will also permute the 2nd range (output) + ranges::sort( + ranges::views::zip(order_by, output), self_.compare_.lex_cmp(), + [](auto const &value) -> auto const & { return std::get<0>(value); }); + + // no longer need the order_by terms + order_by.clear(); + cache_ = std::move(output); did_pull_all_ = true; cache_it_ = cache_.begin(); @@ -4126,15 +4247,15 @@ class OrderByCursor : public Cursor { AbortCheck(context); // place the output values on the frame - DMG_ASSERT(self_.output_symbols_.size() == cache_it_->remember.size(), + DMG_ASSERT(self_.output_symbols_.size() == cache_it_->size(), "Number of values does not match the number of output symbols " "in OrderBy"); auto output_sym_it = self_.output_symbols_.begin(); - for (const TypedValue &output : cache_it_->remember) { - if (context.frame_change_collector && context.frame_change_collector->IsKeyTracked(output_sym_it->name())) { + for (TypedValue &output : *cache_it_) { + if (context.frame_change_collector) { context.frame_change_collector->ResetTrackingValue(output_sym_it->name()); } - frame[*output_sym_it++] = output; + frame[*output_sym_it++] = std::move(output); } cache_it_++; return true; @@ -4149,17 +4270,12 @@ class OrderByCursor : public Cursor { } private: - struct Element { - utils::pmr::vector<TypedValue> order_by; - utils::pmr::vector<TypedValue> remember; - }; - const OrderBy &self_; const UniqueCursorPtr input_cursor_; bool did_pull_all_{false}; // a cache of elements pulled from the input - // the cache is filled and sorted (only on first elem) on first Pull - utils::pmr::vector<Element> cache_; + // the cache is filled and sorted on first Pull + utils::pmr::vector<utils::pmr::vector<TypedValue>> cache_; // iterator over the cache_, maintains state between Pulls decltype(cache_.begin()) cache_it_ = cache_.begin(); }; @@ -4362,15 +4478,15 @@ class UnwindCursor : public Cursor { TypedValue input_value = self_.input_expression_->Accept(evaluator); if (input_value.type() != TypedValue::Type::List) throw QueryRuntimeException("Argument of UNWIND must be a list, but '{}' was provided.", input_value.type()); - // Copy the evaluted input_value_list to our vector. - input_value_ = input_value.ValueList(); + // Move the evaluted input_value_list to our vector. + input_value_ = std::move(input_value.ValueList()); input_value_it_ = input_value_.begin(); } // if we reached the end of our list of values goto back to top if (input_value_it_ == input_value_.end()) continue; - frame[self_.output_symbol_] = *input_value_it_++; + frame[self_.output_symbol_] = std::move(*input_value_it_++); if (context.frame_change_collector && context.frame_change_collector->IsKeyTracked(self_.output_symbol_.name_)) { context.frame_change_collector->ResetTrackingValue(self_.output_symbol_.name_); } @@ -4411,7 +4527,11 @@ class DistinctCursor : public Cursor { SCOPED_PROFILE_OP("Distinct"); while (true) { - if (!input_cursor_->Pull(frame, context)) return false; + if (!input_cursor_->Pull(frame, context)) { + // Nothing left to pull, we can dispose of seen_rows now + seen_rows_.clear(); + return false; + } utils::pmr::vector<TypedValue> row(seen_rows_.get_allocator().GetMemoryResource()); row.reserve(self_.value_symbols_.size()); @@ -5219,6 +5339,7 @@ class LoadCsvCursor : public Cursor { "1"); } did_pull_ = true; + reader_->Reset(); } auto row = reader_->GetNextRow(context.evaluation_context.memory); @@ -5624,4 +5745,25 @@ UniqueCursorPtr HashJoin::MakeCursor(utils::MemoryResource *mem) const { return MakeUniqueCursorPtr<HashJoinCursor>(mem, *this, mem); } +RollUpApply::RollUpApply(const std::shared_ptr<LogicalOperator> &input, + std::shared_ptr<LogicalOperator> &&second_branch) + : input_(input), list_collection_branch_(second_branch) {} + +std::vector<Symbol> RollUpApply::OutputSymbols(const SymbolTable & /*symbol_table*/) const { + std::vector<Symbol> symbols; + return symbols; +} + +std::vector<Symbol> RollUpApply::ModifiedSymbols(const SymbolTable &table) const { return OutputSymbols(table); } + +bool RollUpApply::Accept(HierarchicalLogicalOperatorVisitor &visitor) { + if (visitor.PreVisit(*this)) { + if (!input_ || !list_collection_branch_) { + throw utils::NotYetImplemented("One of the branches in pattern comprehension is null! Please contact support."); + } + input_->Accept(visitor) && list_collection_branch_->Accept(visitor); + } + return visitor.PostVisit(*this); +} + } // namespace memgraph::query::plan diff --git a/src/query/plan/operator.hpp b/src/query/plan/operator.hpp index 516ef2e38..e502fbadd 100644 --- a/src/query/plan/operator.hpp +++ b/src/query/plan/operator.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -76,18 +76,13 @@ using UniqueCursorPtr = std::unique_ptr<Cursor, std::function<void(Cursor *)>>; template <class TCursor, class... TArgs> std::unique_ptr<Cursor, std::function<void(Cursor *)>> MakeUniqueCursorPtr(utils::Allocator<TCursor> allocator, TArgs &&...args) { - auto *ptr = allocator.allocate(1); - try { - auto *cursor = new (ptr) TCursor(std::forward<TArgs>(args)...); - return std::unique_ptr<Cursor, std::function<void(Cursor *)>>(cursor, [allocator](Cursor *base_ptr) mutable { - auto *p = static_cast<TCursor *>(base_ptr); - p->~TCursor(); - allocator.deallocate(p, 1); - }); - } catch (...) { - allocator.deallocate(ptr, 1); - throw; - } + auto *cursor = allocator.template new_object<TCursor>(std::forward<TArgs>(args)...); + auto dtr = [allocator](Cursor *base_ptr) mutable { + auto *p = static_cast<TCursor *>(base_ptr); + allocator.delete_object(p); + }; + // TODO: not std::function + return std::unique_ptr<Cursor, std::function<void(Cursor *)>>(cursor, std::move(dtr)); } class Once; @@ -99,6 +94,7 @@ class ScanAllByLabelPropertyRange; class ScanAllByLabelPropertyValue; class ScanAllByLabelProperty; class ScanAllById; +class ScanAllByEdgeType; class Expand; class ExpandVariable; class ConstructNamedPath; @@ -130,14 +126,15 @@ class EvaluatePatternFilter; class Apply; class IndexedJoin; class HashJoin; +class RollUpApply; using LogicalOperatorCompositeVisitor = utils::CompositeVisitor<Once, CreateNode, CreateExpand, ScanAll, ScanAllByLabel, ScanAllByLabelPropertyRange, - ScanAllByLabelPropertyValue, ScanAllByLabelProperty, ScanAllById, Expand, ExpandVariable, - ConstructNamedPath, Filter, Produce, Delete, SetProperty, SetProperties, SetLabels, - RemoveProperty, RemoveLabels, EdgeUniquenessFilter, Accumulate, Aggregate, Skip, Limit, - OrderBy, Merge, Optional, Unwind, Distinct, Union, Cartesian, CallProcedure, LoadCsv, - Foreach, EmptyResult, EvaluatePatternFilter, Apply, IndexedJoin, HashJoin>; + ScanAllByLabelPropertyValue, ScanAllByLabelProperty, ScanAllById, ScanAllByEdgeType, Expand, + ExpandVariable, ConstructNamedPath, Filter, Produce, Delete, SetProperty, SetProperties, + SetLabels, RemoveProperty, RemoveLabels, EdgeUniquenessFilter, Accumulate, Aggregate, Skip, + Limit, OrderBy, Merge, Optional, Unwind, Distinct, Union, Cartesian, CallProcedure, LoadCsv, + Foreach, EmptyResult, EvaluatePatternFilter, Apply, IndexedJoin, HashJoin, RollUpApply>; using LogicalOperatorLeafVisitor = utils::LeafVisitor<Once>; @@ -283,6 +280,7 @@ class Once : public memgraph::query::plan::LogicalOperator { }; using PropertiesMapList = std::vector<std::pair<storage::PropertyId, Expression *>>; +using StorageLabelType = std::variant<storage::LabelId, Expression *>; struct NodeCreationInfo { static const utils::TypeInfo kType; @@ -290,18 +288,18 @@ struct NodeCreationInfo { NodeCreationInfo() = default; - NodeCreationInfo(Symbol symbol, std::vector<storage::LabelId> labels, + NodeCreationInfo(Symbol symbol, std::vector<StorageLabelType> labels, std::variant<PropertiesMapList, ParameterLookup *> properties) : symbol{std::move(symbol)}, labels{std::move(labels)}, properties{std::move(properties)} {}; - NodeCreationInfo(Symbol symbol, std::vector<storage::LabelId> labels, PropertiesMapList properties) + NodeCreationInfo(Symbol symbol, std::vector<StorageLabelType> labels, PropertiesMapList properties) : symbol{std::move(symbol)}, labels{std::move(labels)}, properties{std::move(properties)} {}; - NodeCreationInfo(Symbol symbol, std::vector<storage::LabelId> labels, ParameterLookup *properties) + NodeCreationInfo(Symbol symbol, std::vector<StorageLabelType> labels, ParameterLookup *properties) : symbol{std::move(symbol)}, labels{std::move(labels)}, properties{properties} {}; Symbol symbol; - std::vector<storage::LabelId> labels; + std::vector<StorageLabelType> labels; std::variant<PropertiesMapList, ParameterLookup *> properties; NodeCreationInfo Clone(AstStorage *storage) const { @@ -504,7 +502,8 @@ class CreateExpand : public memgraph::query::plan::LogicalOperator { const UniqueCursorPtr input_cursor_; // Get the existing node (if existing_node_ == true), or create a new node - VertexAccessor &OtherVertex(Frame &frame, ExecutionContext &context); + VertexAccessor &OtherVertex(Frame &frame, ExecutionContext &context, + std::vector<memgraph::storage::LabelId> &labels, ExpressionEvaluator &evaluator); }; }; @@ -591,6 +590,42 @@ class ScanAllByLabel : public memgraph::query::plan::ScanAll { } }; +class ScanAllByEdgeType : public memgraph::query::plan::LogicalOperator { + public: + static const utils::TypeInfo kType; + const utils::TypeInfo &GetTypeInfo() const override { return kType; } + + ScanAllByEdgeType() = default; + ScanAllByEdgeType(const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol, storage::EdgeTypeId edge_type, + storage::View view = storage::View::OLD); + bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override; + UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override; + std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override; + + bool HasSingleInput() const override { return true; } + std::shared_ptr<LogicalOperator> input() const override { return input_; } + void set_input(std::shared_ptr<LogicalOperator> input) override { input_ = input; } + + std::string ToString() const override { + return fmt::format("ScanAllByEdgeType ({} :{})", output_symbol_.name(), dba_->EdgeTypeToName(edge_type_)); + } + + std::shared_ptr<memgraph::query::plan::LogicalOperator> input_; + Symbol output_symbol_; + storage::View view_; + + storage::EdgeTypeId edge_type_; + + std::unique_ptr<LogicalOperator> Clone(AstStorage *storage) const override { + auto object = std::make_unique<ScanAllByEdgeType>(); + object->input_ = input_ ? input_->Clone(storage) : nullptr; + object->output_symbol_ = output_symbol_; + object->view_ = view_; + object->edge_type_ = edge_type_; + return object; + } +}; + /// Behaves like @c ScanAll, but produces only vertices with given label and /// property value which is inside a range (inclusive or exlusive). /// @@ -1439,8 +1474,7 @@ class SetLabels : public memgraph::query::plan::LogicalOperator { SetLabels() = default; - SetLabels(const std::shared_ptr<LogicalOperator> &input, Symbol input_symbol, - const std::vector<storage::LabelId> &labels); + SetLabels(const std::shared_ptr<LogicalOperator> &input, Symbol input_symbol, std::vector<StorageLabelType> labels); bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override; UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override; std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override; @@ -1451,7 +1485,7 @@ class SetLabels : public memgraph::query::plan::LogicalOperator { std::shared_ptr<memgraph::query::plan::LogicalOperator> input_; Symbol input_symbol_; - std::vector<storage::LabelId> labels_; + std::vector<StorageLabelType> labels_; std::unique_ptr<LogicalOperator> Clone(AstStorage *storage) const override { auto object = std::make_unique<SetLabels>(); @@ -1529,7 +1563,7 @@ class RemoveLabels : public memgraph::query::plan::LogicalOperator { RemoveLabels() = default; RemoveLabels(const std::shared_ptr<LogicalOperator> &input, Symbol input_symbol, - const std::vector<storage::LabelId> &labels); + std::vector<StorageLabelType> labels); bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override; UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override; std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override; @@ -1540,7 +1574,7 @@ class RemoveLabels : public memgraph::query::plan::LogicalOperator { std::shared_ptr<memgraph::query::plan::LogicalOperator> input_; Symbol input_symbol_; - std::vector<storage::LabelId> labels_; + std::vector<StorageLabelType> labels_; std::unique_ptr<LogicalOperator> Clone(AstStorage *storage) const override { auto object = std::make_unique<RemoveLabels>(); @@ -2634,5 +2668,38 @@ class HashJoin : public memgraph::query::plan::LogicalOperator { } }; +/// RollUpApply operator is used to execute an expression which takes as input a pattern, +/// and returns a list with content from the matched pattern +/// It's used for a pattern expression or pattern comprehension in a query. +class RollUpApply : public memgraph::query::plan::LogicalOperator { + public: + static const utils::TypeInfo kType; + const utils::TypeInfo &GetTypeInfo() const override { return kType; } + + RollUpApply() = default; + RollUpApply(const std::shared_ptr<LogicalOperator> &input, std::shared_ptr<LogicalOperator> &&second_branch); + + bool HasSingleInput() const override { return false; } + std::shared_ptr<LogicalOperator> input() const override { return input_; } + void set_input(std::shared_ptr<LogicalOperator> input) override { input_ = input; } + + bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override; + UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override { + throw utils::NotYetImplemented("Execution of Pattern comprehension is currently unsupported."); + } + std::vector<Symbol> OutputSymbols(const SymbolTable &) const override; + std::vector<Symbol> ModifiedSymbols(const SymbolTable &) const override; + + std::unique_ptr<LogicalOperator> Clone(AstStorage *storage) const override { + auto object = std::make_unique<RollUpApply>(); + object->input_ = input_ ? input_->Clone(storage) : nullptr; + object->list_collection_branch_ = list_collection_branch_ ? list_collection_branch_->Clone(storage) : nullptr; + return object; + } + + std::shared_ptr<memgraph::query::plan::LogicalOperator> input_; + std::shared_ptr<memgraph::query::plan::LogicalOperator> list_collection_branch_; +}; + } // namespace plan } // namespace memgraph::query diff --git a/src/query/plan/operator_type_info.cpp b/src/query/plan/operator_type_info.cpp index 3b3ffe14e..6b0a28313 100644 --- a/src/query/plan/operator_type_info.cpp +++ b/src/query/plan/operator_type_info.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -49,6 +49,8 @@ constexpr utils::TypeInfo query::plan::ScanAllByLabelProperty::kType{ constexpr utils::TypeInfo query::plan::ScanAllById::kType{utils::TypeId::SCAN_ALL_BY_ID, "ScanAllById", &query::plan::ScanAll::kType}; +constexpr utils::TypeInfo query::plan::ScanAllByEdgeType::kType{utils::TypeId::SCAN_ALL_BY_EDGE_TYPE, + "ScanAllByEdgeType", &query::plan::ScanAll::kType}; constexpr utils::TypeInfo query::plan::ExpandCommon::kType{utils::TypeId::EXPAND_COMMON, "ExpandCommon", nullptr}; @@ -154,4 +156,7 @@ constexpr utils::TypeInfo query::plan::IndexedJoin::kType{utils::TypeId::INDEXED constexpr utils::TypeInfo query::plan::HashJoin::kType{utils::TypeId::HASH_JOIN, "HashJoin", &query::plan::LogicalOperator::kType}; + +constexpr utils::TypeInfo query::plan::RollUpApply::kType{utils::TypeId::ROLLUP_APPLY, "RollUpApply", + &query::plan::LogicalOperator::kType}; } // namespace memgraph diff --git a/src/query/plan/planner.hpp b/src/query/plan/planner.hpp index e8ca80e39..3136e7271 100644 --- a/src/query/plan/planner.hpp +++ b/src/query/plan/planner.hpp @@ -23,6 +23,7 @@ #include "query/plan/operator.hpp" #include "query/plan/preprocess.hpp" #include "query/plan/pretty_print.hpp" +#include "query/plan/rewrite/edge_type_index_lookup.hpp" #include "query/plan/rewrite/index_lookup.hpp" #include "query/plan/rewrite/join.hpp" #include "query/plan/rule_based_planner.hpp" @@ -54,8 +55,11 @@ class PostProcessor final { std::unique_ptr<LogicalOperator> Rewrite(std::unique_ptr<LogicalOperator> plan, TPlanningContext *context) { auto index_lookup_plan = RewriteWithIndexLookup(std::move(plan), context->symbol_table, context->ast_storage, context->db, index_hints_); - return RewriteWithJoinRewriter(std::move(index_lookup_plan), context->symbol_table, context->ast_storage, - context->db); + auto join_plan = + RewriteWithJoinRewriter(std::move(index_lookup_plan), context->symbol_table, context->ast_storage, context->db); + auto edge_index_plan = RewriteWithEdgeTypeIndexRewriter(std::move(join_plan), context->symbol_table, + context->ast_storage, context->db); + return edge_index_plan; } template <class TVertexCounts> diff --git a/src/query/plan/preprocess.cpp b/src/query/plan/preprocess.cpp index c3bfdf462..2c783fa15 100644 --- a/src/query/plan/preprocess.cpp +++ b/src/query/plan/preprocess.cpp @@ -358,11 +358,17 @@ void Filters::CollectPatternFilters(Pattern &pattern, SymbolTable &symbol_table, }; auto add_node_filter = [&](NodeAtom *node) { const auto &node_symbol = symbol_table.at(*node->identifier_); - if (!node->labels_.empty()) { - // Create a LabelsTest and store it. - auto *labels_test = storage.Create<LabelsTest>(node->identifier_, node->labels_); + std::vector<LabelIx> labels; + for (auto label : node->labels_) { + if (const auto *label_node = std::get_if<Expression *>(&label)) { + throw SemanticException("Property lookup not supported in MATCH/MERGE clause!"); + } + labels.push_back(std::get<LabelIx>(label)); + } + if (!labels.empty()) { + auto *labels_test = storage.Create<LabelsTest>(node->identifier_, labels); auto label_filter = FilterInfo{FilterInfo::Type::Label, labels_test, std::unordered_set<Symbol>{node_symbol}}; - label_filter.labels = node->labels_; + label_filter.labels = labels; all_filters_.emplace_back(label_filter); } add_properties(node); @@ -632,20 +638,20 @@ void AddMatching(const Match &match, SymbolTable &symbol_table, AstStorage &stor // If there are any pattern filters, we add those as well for (auto &filter : matching.filters) { - PatternFilterVisitor visitor(symbol_table, storage); + PatternVisitor visitor(symbol_table, storage); filter.expression->Accept(visitor); - filter.matchings = visitor.getMatchings(); + filter.matchings = visitor.getFilterMatchings(); } } -PatternFilterVisitor::PatternFilterVisitor(SymbolTable &symbol_table, AstStorage &storage) +PatternVisitor::PatternVisitor(SymbolTable &symbol_table, AstStorage &storage) : symbol_table_(symbol_table), storage_(storage) {} -PatternFilterVisitor::PatternFilterVisitor(const PatternFilterVisitor &) = default; -PatternFilterVisitor::PatternFilterVisitor(PatternFilterVisitor &&) noexcept = default; -PatternFilterVisitor::~PatternFilterVisitor() = default; +PatternVisitor::PatternVisitor(const PatternVisitor &) = default; +PatternVisitor::PatternVisitor(PatternVisitor &&) noexcept = default; +PatternVisitor::~PatternVisitor() = default; -void PatternFilterVisitor::Visit(Exists &op) { +void PatternVisitor::Visit(Exists &op) { std::vector<Pattern *> patterns; patterns.push_back(op.pattern_); @@ -655,10 +661,14 @@ void PatternFilterVisitor::Visit(Exists &op) { filter_matching.type = PatternFilterType::EXISTS; filter_matching.symbol = std::make_optional<Symbol>(symbol_table_.at(op)); - matchings_.push_back(std::move(filter_matching)); + filter_matchings_.push_back(std::move(filter_matching)); } -std::vector<FilterMatching> PatternFilterVisitor::getMatchings() { return matchings_; } +std::vector<FilterMatching> PatternVisitor::getFilterMatchings() { return filter_matchings_; } + +std::vector<PatternComprehensionMatching> PatternVisitor::getPatternComprehensionMatchings() { + return pattern_comprehension_matchings_; +} static void ParseForeach(query::Foreach &foreach, SingleQueryPart &query_part, AstStorage &storage, SymbolTable &symbol_table) { @@ -672,6 +682,30 @@ static void ParseForeach(query::Foreach &foreach, SingleQueryPart &query_part, A } } +static void ParseReturn(query::Return &ret, AstStorage &storage, SymbolTable &symbol_table, + std::unordered_map<std::string, PatternComprehensionMatching> &matchings) { + PatternVisitor visitor(symbol_table, storage); + + for (auto *expr : ret.body_.named_expressions) { + expr->Accept(visitor); + auto pattern_comprehension_matchings = visitor.getPatternComprehensionMatchings(); + for (auto &matching : pattern_comprehension_matchings) { + matchings.emplace(expr->name_, matching); + } + } +} + +void PatternVisitor::Visit(NamedExpression &op) { op.expression_->Accept(*this); } + +void PatternVisitor::Visit(PatternComprehension &op) { + PatternComprehensionMatching matching; + AddMatching({op.pattern_}, op.filter_, symbol_table_, storage_, matching); + matching.result_expr = storage_.Create<NamedExpression>(symbol_table_.at(op).name(), op.resultExpr_); + matching.result_expr->MapTo(symbol_table_.at(op)); + + pattern_comprehension_matchings_.push_back(std::move(matching)); +} + // Converts a Query to multiple QueryParts. In the process new Ast nodes may be // created, e.g. filter expressions. std::vector<SingleQueryPart> CollectSingleQueryParts(SymbolTable &symbol_table, AstStorage &storage, @@ -703,7 +737,8 @@ std::vector<SingleQueryPart> CollectSingleQueryParts(SymbolTable &symbol_table, // This query part is done, continue with a new one. query_parts.emplace_back(SingleQueryPart{}); query_part = &query_parts.back(); - } else if (utils::IsSubtype(*clause, Return::kType)) { + } else if (auto *ret = utils::Downcast<Return>(clause)) { + ParseReturn(*ret, storage, symbol_table, query_part->pattern_comprehension_matchings); return query_parts; } } diff --git a/src/query/plan/preprocess.hpp b/src/query/plan/preprocess.hpp index 01b10ebaf..5d4e2e8d2 100644 --- a/src/query/plan/preprocess.hpp +++ b/src/query/plan/preprocess.hpp @@ -153,19 +153,20 @@ struct Expansion { ExpansionGroupId expansion_group_id = ExpansionGroupId(); }; +struct PatternComprehensionMatching; struct FilterMatching; enum class PatternFilterType { EXISTS }; -/// Collects matchings from filters that include patterns -class PatternFilterVisitor : public ExpressionVisitor<void> { +/// Collects matchings that include patterns +class PatternVisitor : public ExpressionVisitor<void> { public: - explicit PatternFilterVisitor(SymbolTable &symbol_table, AstStorage &storage); - PatternFilterVisitor(const PatternFilterVisitor &); - PatternFilterVisitor &operator=(const PatternFilterVisitor &) = delete; - PatternFilterVisitor(PatternFilterVisitor &&) noexcept; - PatternFilterVisitor &operator=(PatternFilterVisitor &&) noexcept = delete; - ~PatternFilterVisitor() override; + explicit PatternVisitor(SymbolTable &symbol_table, AstStorage &storage); + PatternVisitor(const PatternVisitor &); + PatternVisitor &operator=(const PatternVisitor &) = delete; + PatternVisitor(PatternVisitor &&) noexcept; + PatternVisitor &operator=(PatternVisitor &&) noexcept = delete; + ~PatternVisitor() override; using ExpressionVisitor<void>::Visit; @@ -233,18 +234,22 @@ class PatternFilterVisitor : public ExpressionVisitor<void> { void Visit(PropertyLookup &op) override{}; void Visit(AllPropertiesLookup &op) override{}; void Visit(ParameterLookup &op) override{}; - void Visit(NamedExpression &op) override{}; void Visit(RegexMatch &op) override{}; - void Visit(PatternComprehension &op) override{}; + void Visit(NamedExpression &op) override; + void Visit(PatternComprehension &op) override; - std::vector<FilterMatching> getMatchings(); + std::vector<FilterMatching> getFilterMatchings(); + std::vector<PatternComprehensionMatching> getPatternComprehensionMatchings(); SymbolTable &symbol_table_; AstStorage &storage_; private: /// Collection of matchings in the filter expression being analyzed. - std::vector<FilterMatching> matchings_; + std::vector<FilterMatching> filter_matchings_; + + /// Collection of matchings in the pattern comprehension being analyzed. + std::vector<PatternComprehensionMatching> pattern_comprehension_matchings_; }; /// Stores the symbols and expression used to filter a property. @@ -495,6 +500,11 @@ inline auto Filters::IdFilters(const Symbol &symbol) const -> std::vector<Filter return filters; } +struct PatternComprehensionMatching : Matching { + /// Pattern comprehension result named expression + NamedExpression *result_expr = nullptr; +}; + /// @brief Represents a read (+ write) part of a query. Parts are split on /// `WITH` clauses. /// @@ -537,6 +547,14 @@ struct SingleQueryPart { /// in the `remaining_clauses` but rather in the `Foreach` itself and are guranteed /// to be processed in the same order by the semantics of the `RuleBasedPlanner`. std::vector<Matching> merge_matching{}; + + /// @brief @c NamedExpression name to @c PatternComprehensionMatching for each pattern comprehension. + /// + /// Storing the normalized pattern of a @c PatternComprehension does not preclude storing the + /// @c PatternComprehension clause itself inside `remaining_clauses`. The reason is that we + /// need to have access to other parts of the clause, such as pattern, filter clauses. + std::unordered_map<std::string, PatternComprehensionMatching> pattern_comprehension_matchings{}; + /// @brief All the remaining clauses (without @c Match). std::vector<Clause *> remaining_clauses{}; /// The subqueries vector are all the subqueries in this query part ordered in a list by diff --git a/src/query/plan/pretty_print.cpp b/src/query/plan/pretty_print.cpp index a2df9422c..5dd272052 100644 --- a/src/query/plan/pretty_print.cpp +++ b/src/query/plan/pretty_print.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -76,6 +76,13 @@ bool PlanPrinter::PreVisit(ScanAllById &op) { return true; } +bool PlanPrinter::PreVisit(query::plan::ScanAllByEdgeType &op) { + op.dba_ = dba_; + WithPrintLn([&op](auto &out) { out << "* " << op.ToString(); }); + op.dba_ = nullptr; + return true; +} + bool PlanPrinter::PreVisit(query::plan::Expand &op) { op.dba_ = dba_; WithPrintLn([&op](auto &out) { out << "* " << op.ToString(); }); @@ -143,6 +150,13 @@ bool PlanPrinter::PreVisit(query::plan::Union &op) { return false; } +bool PlanPrinter::PreVisit(query::plan::RollUpApply &op) { + WithPrintLn([&op](auto &out) { out << "* " << op.ToString(); }); + Branch(*op.list_collection_branch_); + op.input_->Accept(*this); + return false; +} + bool PlanPrinter::PreVisit(query::plan::CallProcedure &op) { WithPrintLn([&op](auto &out) { out << "* " << op.ToString(); }); return true; @@ -326,7 +340,7 @@ json ToJson(NamedExpression *nexpr) { return json; } -json ToJson(const std::vector<std::pair<storage::PropertyId, Expression *>> &properties, const DbAccessor &dba) { +json ToJson(const PropertiesMapList &properties, const DbAccessor &dba) { json json; for (const auto &prop_pair : properties) { json.emplace(ToJson(prop_pair.first, dba), ToJson(prop_pair.second)); @@ -334,6 +348,18 @@ json ToJson(const std::vector<std::pair<storage::PropertyId, Expression *>> &pro return json; } +json ToJson(const std::vector<StorageLabelType> &labels, const DbAccessor &dba) { + json json; + for (const auto &label : labels) { + if (const auto *label_node = std::get_if<Expression *>(&label)) { + json.emplace_back(ToJson(*label_node)); + } else { + json.emplace_back(ToJson(std::get<storage::LabelId>(label), dba)); + } + } + return json; +} + json ToJson(const NodeCreationInfo &node_info, const DbAccessor &dba) { json self; self["symbol"] = ToJson(node_info.symbol); @@ -457,6 +483,19 @@ bool PlanToJsonVisitor::PreVisit(ScanAllById &op) { return false; } +bool PlanToJsonVisitor::PreVisit(ScanAllByEdgeType &op) { + json self; + self["name"] = "ScanAllByEdgeType"; + self["edge_type"] = ToJson(op.edge_type_, *dba_); + self["output_symbol"] = ToJson(op.output_symbol_); + + op.input_->Accept(*this); + self["input"] = PopOutput(); + + output_ = std::move(self); + return false; +} + bool PlanToJsonVisitor::PreVisit(CreateNode &op) { json self; self["name"] = "CreateNode"; @@ -627,7 +666,6 @@ bool PlanToJsonVisitor::PreVisit(SetLabels &op) { self["name"] = "SetLabels"; self["input_symbol"] = ToJson(op.input_symbol_); self["labels"] = ToJson(op.labels_, *dba_); - op.input_->Accept(*this); self["input"] = PopOutput(); @@ -742,7 +780,7 @@ bool PlanToJsonVisitor::PreVisit(OrderBy &op) { for (auto i = 0; i < op.order_by_.size(); ++i) { json json; - json["ordering"] = ToString(op.compare_.ordering_[i]); + json["ordering"] = ToString(op.compare_.orderings()[i].ordering()); json["expression"] = ToJson(op.order_by_[i]); self["order_by"].push_back(json); } diff --git a/src/query/plan/pretty_print.hpp b/src/query/plan/pretty_print.hpp index 645fe17a5..d62ae6bf2 100644 --- a/src/query/plan/pretty_print.hpp +++ b/src/query/plan/pretty_print.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -67,6 +67,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor { bool PreVisit(ScanAllByLabelPropertyRange &) override; bool PreVisit(ScanAllByLabelProperty &) override; bool PreVisit(ScanAllById &) override; + bool PreVisit(ScanAllByEdgeType &) override; bool PreVisit(Expand &) override; bool PreVisit(ExpandVariable &) override; @@ -91,6 +92,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor { bool PreVisit(OrderBy &) override; bool PreVisit(Distinct &) override; bool PreVisit(Union &) override; + bool PreVisit(RollUpApply &) override; bool PreVisit(Unwind &) override; bool PreVisit(CallProcedure &) override; @@ -203,6 +205,7 @@ class PlanToJsonVisitor : public virtual HierarchicalLogicalOperatorVisitor { bool PreVisit(ScanAllByLabelPropertyValue &) override; bool PreVisit(ScanAllByLabelProperty &) override; bool PreVisit(ScanAllById &) override; + bool PreVisit(ScanAllByEdgeType &) override; bool PreVisit(EmptyResult &) override; bool PreVisit(Produce &) override; diff --git a/src/query/plan/rewrite/edge_type_index_lookup.hpp b/src/query/plan/rewrite/edge_type_index_lookup.hpp new file mode 100644 index 000000000..893fef970 --- /dev/null +++ b/src/query/plan/rewrite/edge_type_index_lookup.hpp @@ -0,0 +1,546 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +/// @file +/// This file provides a plan rewriter which replaces `ScanAll` and `Expand` +/// operations with `ScanAllByEdgeType` if possible. The public entrypoint is +/// `RewriteWithEdgeTypeIndexRewriter`. + +#pragma once + +#include <algorithm> +#include <memory> +#include <optional> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include <gflags/gflags.h> + +#include "query/plan/operator.hpp" +#include "query/plan/preprocess.hpp" +#include "query/plan/rewrite/index_lookup.hpp" +#include "utils/algorithm.hpp" + +namespace memgraph::query::plan { + +namespace impl { + +template <class TDbAccessor> +class EdgeTypeIndexRewriter final : public HierarchicalLogicalOperatorVisitor { + public: + EdgeTypeIndexRewriter(SymbolTable *symbol_table, AstStorage *ast_storage, TDbAccessor *db) + : symbol_table_(symbol_table), ast_storage_(ast_storage), db_(db) {} + + using HierarchicalLogicalOperatorVisitor::PostVisit; + using HierarchicalLogicalOperatorVisitor::PreVisit; + using HierarchicalLogicalOperatorVisitor::Visit; + + bool Visit(Once &) override { return true; } + + bool PreVisit(Filter &op) override { + prev_ops_.push_back(&op); + return true; + } + + bool PostVisit(Filter & /*op*/) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(ScanAll &op) override { + prev_ops_.push_back(&op); + + if (op.input()->GetTypeInfo() == Once::kType) { + const bool is_node_anon = op.output_symbol_.IsSymbolAnonym(); + once_under_scanall_ = is_node_anon; + } + + return true; + } + + bool PostVisit(ScanAll &op) override { + prev_ops_.pop_back(); + + if (EdgeTypeIndexingPossible()) { + SetOnParent(op.input()); + } + + return true; + } + + bool PreVisit(Expand &op) override { + prev_ops_.push_back(&op); + + if (op.input()->GetTypeInfo() == ScanAll::kType) { + const bool only_one_edge_type = (op.common_.edge_types.size() == 1U); + const bool expansion_is_named = !(op.common_.edge_symbol.IsSymbolAnonym()); + const bool expdanded_node_not_named = op.common_.node_symbol.IsSymbolAnonym(); + + edge_type_index_exist = only_one_edge_type ? db_->EdgeTypeIndexExists(op.common_.edge_types.front()) : false; + + scanall_under_expand_ = only_one_edge_type && expansion_is_named && expdanded_node_not_named; + } + + return true; + } + + bool PostVisit(Expand &op) override { + prev_ops_.pop_back(); + + if (EdgeTypeIndexingPossible()) { + auto indexed_scan = GenEdgeTypeScan(op); + SetOnParent(std::move(indexed_scan)); + } + + return true; + } + + bool PreVisit(ExpandVariable &op) override { + prev_ops_.push_back(&op); + return true; + } + + bool PostVisit(ExpandVariable &expand) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Merge &op) override { + prev_ops_.push_back(&op); + op.input()->Accept(*this); + RewriteBranch(&op.merge_match_); + return false; + } + + bool PostVisit(Merge &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Optional &op) override { + prev_ops_.push_back(&op); + op.input()->Accept(*this); + RewriteBranch(&op.optional_); + return false; + } + + bool PostVisit(Optional &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Cartesian &op) override { + prev_ops_.push_back(&op); + return true; + } + + bool PostVisit(Cartesian &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(IndexedJoin &op) override { + prev_ops_.push_back(&op); + RewriteBranch(&op.main_branch_); + RewriteBranch(&op.sub_branch_); + return false; + } + + bool PostVisit(IndexedJoin &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(HashJoin &op) override { + prev_ops_.push_back(&op); + return true; + } + + bool PostVisit(HashJoin &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Union &op) override { + prev_ops_.push_back(&op); + RewriteBranch(&op.left_op_); + RewriteBranch(&op.right_op_); + return false; + } + + bool PostVisit(Union &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(CreateNode &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(CreateNode &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(CreateExpand &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(CreateExpand &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(ScanAllByLabel &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(ScanAllByLabel &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(ScanAllByLabelPropertyRange &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(ScanAllByLabelPropertyRange &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(ScanAllByLabelPropertyValue &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(ScanAllByLabelPropertyValue &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(ScanAllByLabelProperty &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(ScanAllByLabelProperty &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(ScanAllById &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(ScanAllById &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(ScanAllByEdgeType &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(ScanAllByEdgeType &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(ConstructNamedPath &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(ConstructNamedPath &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Produce &op) override { + prev_ops_.push_back(&op); + + if (op.input()->GetTypeInfo() == Expand::kType) { + expand_under_produce_ = true; + } + + return true; + } + bool PostVisit(Produce &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(EmptyResult &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(EmptyResult &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Delete &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(Delete &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(SetProperty &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(SetProperty &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(SetProperties &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(SetProperties &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(SetLabels &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(SetLabels &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(RemoveProperty &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(RemoveProperty &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(RemoveLabels &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(RemoveLabels &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(EdgeUniquenessFilter &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(EdgeUniquenessFilter &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Accumulate &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(Accumulate &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Aggregate &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(Aggregate &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Skip &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(Skip &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Limit &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(Limit &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(OrderBy &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(OrderBy &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Unwind &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(Unwind &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Distinct &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(Distinct &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(CallProcedure &op) override { + prev_ops_.push_back(&op); + return true; + } + bool PostVisit(CallProcedure &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Foreach &op) override { + prev_ops_.push_back(&op); + op.input()->Accept(*this); + RewriteBranch(&op.update_clauses_); + return false; + } + + bool PostVisit(Foreach &) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(EvaluatePatternFilter &op) override { + prev_ops_.push_back(&op); + return true; + } + + bool PostVisit(EvaluatePatternFilter & /*op*/) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(Apply &op) override { + prev_ops_.push_back(&op); + op.input()->Accept(*this); + RewriteBranch(&op.subquery_); + return false; + } + + bool PostVisit(Apply & /*op*/) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(LoadCsv &op) override { + prev_ops_.push_back(&op); + return true; + } + + bool PostVisit(LoadCsv & /*op*/) override { + prev_ops_.pop_back(); + return true; + } + + bool PreVisit(RollUpApply &op) override { + prev_ops_.push_back(&op); + op.input()->Accept(*this); + RewriteBranch(&op.list_collection_branch_); + return false; + } + + bool PostVisit(RollUpApply &) override { + prev_ops_.pop_back(); + return true; + } + + std::shared_ptr<LogicalOperator> new_root_; + + private: + SymbolTable *symbol_table_; + AstStorage *ast_storage_; + TDbAccessor *db_; + // Collected filters, pending for examination if they can be used for advanced + // lookup operations (by index, node ID, ...). + Filters filters_; + // Expressions which no longer need a plain Filter operator. + std::unordered_set<Expression *> filter_exprs_for_removal_; + std::vector<LogicalOperator *> prev_ops_; + std::unordered_set<Symbol> cartesian_symbols_; + + bool EdgeTypeIndexingPossible() const { + return expand_under_produce_ && scanall_under_expand_ && once_under_scanall_ && edge_type_index_exist; + } + bool expand_under_produce_ = false; + bool scanall_under_expand_ = false; + bool once_under_scanall_ = false; + bool edge_type_index_exist = false; + + bool DefaultPreVisit() override { + throw utils::NotYetImplemented("Operator not yet covered by EdgeTypeIndexRewriter"); + } + + std::unique_ptr<ScanAllByEdgeType> GenEdgeTypeScan(const Expand &expand) { + const auto &input = expand.input(); + const auto &output_symbol = expand.common_.edge_symbol; + const auto &view = expand.view_; + + // Extract edge_type from symbol + auto edge_type = expand.common_.edge_types.front(); + return std::make_unique<ScanAllByEdgeType>(input, output_symbol, edge_type, view); + } + + void SetOnParent(const std::shared_ptr<LogicalOperator> &input) { + MG_ASSERT(input); + if (prev_ops_.empty()) { + MG_ASSERT(!new_root_); + new_root_ = input; + return; + } + prev_ops_.back()->set_input(input); + } + + void RewriteBranch(std::shared_ptr<LogicalOperator> *branch) { + EdgeTypeIndexRewriter<TDbAccessor> rewriter(symbol_table_, ast_storage_, db_); + (*branch)->Accept(rewriter); + if (rewriter.new_root_) { + *branch = rewriter.new_root_; + } + } +}; + +} // namespace impl + +template <class TDbAccessor> +std::unique_ptr<LogicalOperator> RewriteWithEdgeTypeIndexRewriter(std::unique_ptr<LogicalOperator> root_op, + SymbolTable *symbol_table, AstStorage *ast_storage, + TDbAccessor *db) { + impl::EdgeTypeIndexRewriter<TDbAccessor> rewriter(symbol_table, ast_storage, db); + root_op->Accept(rewriter); + return root_op; +} + +} // namespace memgraph::query::plan diff --git a/src/query/plan/rewrite/index_lookup.hpp b/src/query/plan/rewrite/index_lookup.hpp index 09c6e2014..90c222b42 100644 --- a/src/query/plan/rewrite/index_lookup.hpp +++ b/src/query/plan/rewrite/index_lookup.hpp @@ -595,6 +595,18 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { return true; } + bool PreVisit(RollUpApply &op) override { + prev_ops_.push_back(&op); + op.input()->Accept(*this); + RewriteBranch(&op.list_collection_branch_); + return false; + } + + bool PostVisit(RollUpApply &) override { + prev_ops_.pop_back(); + return true; + } + std::shared_ptr<LogicalOperator> new_root_; private: diff --git a/src/query/plan/rewrite/join.hpp b/src/query/plan/rewrite/join.hpp index e346ded45..9ef6c6aec 100644 --- a/src/query/plan/rewrite/join.hpp +++ b/src/query/plan/rewrite/join.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -455,6 +455,18 @@ class JoinRewriter final : public HierarchicalLogicalOperatorVisitor { return true; } + bool PreVisit(RollUpApply &op) override { + prev_ops_.push_back(&op); + op.input()->Accept(*this); + RewriteBranch(&op.list_collection_branch_); + return false; + } + + bool PostVisit(RollUpApply &) override { + prev_ops_.pop_back(); + return true; + } + std::shared_ptr<LogicalOperator> new_root_; private: diff --git a/src/query/plan/rule_based_planner.cpp b/src/query/plan/rule_based_planner.cpp index bf5e66158..54b5c3834 100644 --- a/src/query/plan/rule_based_planner.cpp +++ b/src/query/plan/rule_based_planner.cpp @@ -14,9 +14,12 @@ #include <algorithm> #include <functional> #include <limits> +#include <memory> #include <stack> #include <unordered_set> +#include "query/frontend/ast/ast.hpp" +#include "query/plan/operator.hpp" #include "query/plan/preprocess.hpp" #include "utils/algorithm.hpp" #include "utils/exceptions.hpp" @@ -40,7 +43,8 @@ namespace { class ReturnBodyContext : public HierarchicalTreeVisitor { public: ReturnBodyContext(const ReturnBody &body, SymbolTable &symbol_table, const std::unordered_set<Symbol> &bound_symbols, - AstStorage &storage, Where *where = nullptr) + AstStorage &storage, std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pc_ops, + Where *where = nullptr) : body_(body), symbol_table_(symbol_table), bound_symbols_(bound_symbols), storage_(storage), where_(where) { // Collect symbols from named expressions. output_symbols_.reserve(body_.named_expressions.size()); @@ -53,6 +57,14 @@ class ReturnBodyContext : public HierarchicalTreeVisitor { output_symbols_.emplace_back(symbol_table_.at(*named_expr)); named_expr->Accept(*this); named_expressions_.emplace_back(named_expr); + if (pattern_comprehension_) { + if (auto it = pc_ops.find(named_expr->name_); it != pc_ops.end()) { + pattern_comprehension_op_ = std::move(it->second); + pc_ops.erase(it); + } else { + throw utils::NotYetImplemented("Operation on top of pattern comprehension"); + } + } } // Collect symbols used in group by expressions. if (!aggregations_.empty()) { @@ -386,8 +398,20 @@ class ReturnBodyContext : public HierarchicalTreeVisitor { return true; } - bool PostVisit(PatternComprehension & /*unused*/) override { - throw utils::NotYetImplemented("Planner can not handle pattern comprehension."); + bool PreVisit(PatternComprehension & /*unused*/) override { + pattern_compression_aggregations_start_index_ = has_aggregation_.size(); + return true; + } + + bool PostVisit(PatternComprehension &pattern_comprehension) override { + bool has_aggr = false; + for (auto i = has_aggregation_.size(); i > pattern_compression_aggregations_start_index_; --i) { + has_aggr |= has_aggregation_.back(); + has_aggregation_.pop_back(); + } + has_aggregation_.emplace_back(has_aggr); + pattern_comprehension_ = &pattern_comprehension; + return true; } // Creates NamedExpression with an Identifier for each user declared symbol. @@ -444,6 +468,10 @@ class ReturnBodyContext : public HierarchicalTreeVisitor { // named_expressions. const auto &output_symbols() const { return output_symbols_; } + const auto *pattern_comprehension() const { return pattern_comprehension_; } + + std::shared_ptr<LogicalOperator> pattern_comprehension_op() const { return pattern_comprehension_op_; } + private: const ReturnBody &body_; SymbolTable &symbol_table_; @@ -465,10 +493,13 @@ class ReturnBodyContext : public HierarchicalTreeVisitor { // group by it. std::list<bool> has_aggregation_; std::vector<NamedExpression *> named_expressions_; + PatternComprehension *pattern_comprehension_ = nullptr; + std::shared_ptr<LogicalOperator> pattern_comprehension_op_; + size_t pattern_compression_aggregations_start_index_ = 0; }; std::unique_ptr<LogicalOperator> GenReturnBody(std::unique_ptr<LogicalOperator> input_op, bool advance_command, - const ReturnBodyContext &body, bool accumulate = false) { + const ReturnBodyContext &body, bool accumulate) { std::vector<Symbol> used_symbols(body.used_symbols().begin(), body.used_symbols().end()); auto last_op = std::move(input_op); if (accumulate) { @@ -482,6 +513,11 @@ std::unique_ptr<LogicalOperator> GenReturnBody(std::unique_ptr<LogicalOperator> std::vector<Symbol> remember(body.group_by_used_symbols().begin(), body.group_by_used_symbols().end()); last_op = std::make_unique<Aggregate>(std::move(last_op), body.aggregations(), body.group_by(), remember); } + + if (body.pattern_comprehension()) { + last_op = std::make_unique<RollUpApply>(std::move(last_op), body.pattern_comprehension_op()); + } + last_op = std::make_unique<Produce>(std::move(last_op), body.named_expressions()); // Distinct in ReturnBody only makes Produce values unique, so plan after it. if (body.distinct()) { @@ -506,6 +542,7 @@ std::unique_ptr<LogicalOperator> GenReturnBody(std::unique_ptr<LogicalOperator> last_op = std::make_unique<Filter>(std::move(last_op), std::vector<std::shared_ptr<LogicalOperator>>{}, body.where()->expression_); } + return last_op; } @@ -543,8 +580,9 @@ Expression *ExtractFilters(const std::unordered_set<Symbol> &bound_symbols, Filt return filter_expr; } -std::unordered_set<Symbol> GetSubqueryBoundSymbols(const std::vector<SingleQueryPart> &single_query_parts, - SymbolTable &symbol_table, AstStorage &storage) { +std::unordered_set<Symbol> GetSubqueryBoundSymbols( + const std::vector<SingleQueryPart> &single_query_parts, SymbolTable &symbol_table, AstStorage &storage, + std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pc_ops) { const auto &query = single_query_parts[0]; if (!query.matching.expansions.empty() || query.remaining_clauses.empty()) { @@ -552,7 +590,7 @@ std::unordered_set<Symbol> GetSubqueryBoundSymbols(const std::vector<SingleQuery } if (std::unordered_set<Symbol> bound_symbols; auto *with = utils::Downcast<query::With>(query.remaining_clauses[0])) { - auto input_op = impl::GenWith(*with, nullptr, symbol_table, false, bound_symbols, storage); + auto input_op = impl::GenWith(*with, nullptr, symbol_table, false, bound_symbols, storage, pc_ops); return bound_symbols; } @@ -583,7 +621,8 @@ std::unique_ptr<LogicalOperator> GenNamedPaths(std::unique_ptr<LogicalOperator> std::unique_ptr<LogicalOperator> GenReturn(Return &ret, std::unique_ptr<LogicalOperator> input_op, SymbolTable &symbol_table, bool is_write, - const std::unordered_set<Symbol> &bound_symbols, AstStorage &storage) { + const std::unordered_set<Symbol> &bound_symbols, AstStorage &storage, + std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pc_ops) { // Similar to WITH clause, but we want to accumulate when the query writes to // the database. This way we handle the case when we want to return // expressions with the latest updated results. For example, `MATCH (n) -- () @@ -592,13 +631,14 @@ std::unique_ptr<LogicalOperator> GenReturn(Return &ret, std::unique_ptr<LogicalO // final result of 'k' increments. bool accumulate = is_write; bool advance_command = false; - ReturnBodyContext body(ret.body_, symbol_table, bound_symbols, storage); + ReturnBodyContext body(ret.body_, symbol_table, bound_symbols, storage, pc_ops); return GenReturnBody(std::move(input_op), advance_command, body, accumulate); } std::unique_ptr<LogicalOperator> GenWith(With &with, std::unique_ptr<LogicalOperator> input_op, SymbolTable &symbol_table, bool is_write, - std::unordered_set<Symbol> &bound_symbols, AstStorage &storage) { + std::unordered_set<Symbol> &bound_symbols, AstStorage &storage, + std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pc_ops) { // WITH clause is Accumulate/Aggregate (advance_command) + Produce and // optional Filter. In case of update and aggregation, we want to accumulate // first, so that when aggregating, we get the latest results. Similar to @@ -606,7 +646,7 @@ std::unique_ptr<LogicalOperator> GenWith(With &with, std::unique_ptr<LogicalOper bool accumulate = is_write; // No need to advance the command if we only performed reads. bool advance_command = is_write; - ReturnBodyContext body(with.body_, symbol_table, bound_symbols, storage, with.where_); + ReturnBodyContext body(with.body_, symbol_table, bound_symbols, storage, pc_ops, with.where_); auto last_op = GenReturnBody(std::move(input_op), advance_command, body, accumulate); // Reset bound symbols, so that only those in WITH are exposed. bound_symbols.clear(); diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp index 7fba3b623..52281de60 100644 --- a/src/query/plan/rule_based_planner.hpp +++ b/src/query/plan/rule_based_planner.hpp @@ -21,6 +21,7 @@ #include "query/frontend/ast/ast_visitor.hpp" #include "query/plan/operator.hpp" #include "query/plan/preprocess.hpp" +#include "utils/exceptions.hpp" #include "utils/logging.hpp" #include "utils/typeinfo.hpp" @@ -87,8 +88,9 @@ bool HasBoundFilterSymbols(const std::unordered_set<Symbol> &bound_symbols, cons // Returns the set of symbols for the subquery that are actually referenced from the outer scope and // used in the subquery. -std::unordered_set<Symbol> GetSubqueryBoundSymbols(const std::vector<SingleQueryPart> &single_query_parts, - SymbolTable &symbol_table, AstStorage &storage); +std::unordered_set<Symbol> GetSubqueryBoundSymbols( + const std::vector<SingleQueryPart> &single_query_parts, SymbolTable &symbol_table, AstStorage &storage, + std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pc_ops); Symbol GetSymbol(NodeAtom *atom, const SymbolTable &symbol_table); Symbol GetSymbol(EdgeAtom *atom, const SymbolTable &symbol_table); @@ -142,11 +144,13 @@ std::unique_ptr<LogicalOperator> GenNamedPaths(std::unique_ptr<LogicalOperator> std::unique_ptr<LogicalOperator> GenReturn(Return &ret, std::unique_ptr<LogicalOperator> input_op, SymbolTable &symbol_table, bool is_write, - const std::unordered_set<Symbol> &bound_symbols, AstStorage &storage); + const std::unordered_set<Symbol> &bound_symbols, AstStorage &storage, + std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pc_ops); std::unique_ptr<LogicalOperator> GenWith(With &with, std::unique_ptr<LogicalOperator> input_op, SymbolTable &symbol_table, bool is_write, - std::unordered_set<Symbol> &bound_symbols, AstStorage &storage); + std::unordered_set<Symbol> &bound_symbols, AstStorage &storage, + std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pc_ops); std::unique_ptr<LogicalOperator> GenUnion(const CypherUnion &cypher_union, std::shared_ptr<LogicalOperator> left_op, std::shared_ptr<LogicalOperator> right_op, SymbolTable &symbol_table); @@ -190,11 +194,24 @@ class RuleBasedPlanner { uint64_t merge_id = 0; uint64_t subquery_id = 0; + std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pattern_comprehension_ops; + + if (single_query_part.pattern_comprehension_matchings.size() > 1) { + throw utils::NotYetImplemented("Multiple pattern comprehensions."); + } + for (const auto &matching : single_query_part.pattern_comprehension_matchings) { + std::unique_ptr<LogicalOperator> new_input; + MatchContext match_ctx{matching.second, *context.symbol_table, context.bound_symbols}; + new_input = PlanMatching(match_ctx, std::move(new_input)); + new_input = std::make_unique<Produce>(std::move(new_input), std::vector{matching.second.result_expr}); + pattern_comprehension_ops.emplace(matching.first, std::move(new_input)); + } + for (const auto &clause : single_query_part.remaining_clauses) { MG_ASSERT(!utils::IsSubtype(*clause, Match::kType), "Unexpected Match in remaining clauses"); if (auto *ret = utils::Downcast<Return>(clause)) { input_op = impl::GenReturn(*ret, std::move(input_op), *context.symbol_table, context.is_write_query, - context.bound_symbols, *context.ast_storage); + context.bound_symbols, *context.ast_storage, pattern_comprehension_ops); } else if (auto *merge = utils::Downcast<query::Merge>(clause)) { input_op = GenMerge(*merge, std::move(input_op), single_query_part.merge_matching[merge_id++]); // Treat MERGE clause as write, because we do not know if it will @@ -202,7 +219,7 @@ class RuleBasedPlanner { context.is_write_query = true; } else if (auto *with = utils::Downcast<query::With>(clause)) { input_op = impl::GenWith(*with, std::move(input_op), *context.symbol_table, context.is_write_query, - context.bound_symbols, *context.ast_storage); + context.bound_symbols, *context.ast_storage, pattern_comprehension_ops); // WITH clause advances the command, so reset the flag. context.is_write_query = false; } else if (auto op = HandleWriteClause(clause, input_op, *context.symbol_table, context.bound_symbols)) { @@ -241,7 +258,7 @@ class RuleBasedPlanner { single_query_part, merge_id); } else if (auto *call_sub = utils::Downcast<query::CallSubquery>(clause)) { input_op = HandleSubquery(std::move(input_op), single_query_part.subqueries[subquery_id++], - *context.symbol_table, *context_->ast_storage); + *context.symbol_table, *context_->ast_storage, pattern_comprehension_ops); } else { throw utils::NotYetImplemented("clause '{}' conversion to operator(s)", clause->GetTypeInfo().name); } @@ -276,6 +293,19 @@ class RuleBasedPlanner { storage::EdgeTypeId GetEdgeType(EdgeTypeIx edge_type) { return context_->db->NameToEdgeType(edge_type.name); } + std::vector<StorageLabelType> GetLabelIds(const std::vector<QueryLabelType> &labels) { + std::vector<StorageLabelType> label_ids; + label_ids.reserve(labels.size()); + for (const auto &label : labels) { + if (const auto *label_atom = std::get_if<LabelIx>(&label)) { + label_ids.emplace_back(GetLabel(*label_atom)); + } else { + label_ids.emplace_back(std::get<Expression *>(label)); + } + } + return label_ids; + } + std::unique_ptr<LogicalOperator> HandleMatching(std::unique_ptr<LogicalOperator> last_op, const SingleQueryPart &single_query_part, SymbolTable &symbol_table, std::unordered_set<Symbol> &bound_symbols) { @@ -311,11 +341,6 @@ class RuleBasedPlanner { std::unordered_set<Symbol> &bound_symbols) { auto node_to_creation_info = [&](const NodeAtom &node) { const auto &node_symbol = symbol_table.at(*node.identifier_); - std::vector<storage::LabelId> labels; - labels.reserve(node.labels_.size()); - for (const auto &label : node.labels_) { - labels.push_back(GetLabel(label)); - } auto properties = std::invoke([&]() -> std::variant<PropertiesMapList, ParameterLookup *> { if (const auto *node_properties = @@ -329,7 +354,7 @@ class RuleBasedPlanner { } return std::get<ParameterLookup *>(node.properties_); }); - return NodeCreationInfo{node_symbol, labels, properties}; + return NodeCreationInfo{node_symbol, GetLabelIds(node.labels_), properties}; }; auto base = [&](NodeAtom *node) -> std::unique_ptr<LogicalOperator> { @@ -406,23 +431,13 @@ class RuleBasedPlanner { return std::make_unique<plan::SetProperties>(std::move(input_op), input_symbol, set->expression_, op); } else if (auto *set = utils::Downcast<query::SetLabels>(clause)) { const auto &input_symbol = symbol_table.at(*set->identifier_); - std::vector<storage::LabelId> labels; - labels.reserve(set->labels_.size()); - for (const auto &label : set->labels_) { - labels.push_back(GetLabel(label)); - } - return std::make_unique<plan::SetLabels>(std::move(input_op), input_symbol, labels); + return std::make_unique<plan::SetLabels>(std::move(input_op), input_symbol, GetLabelIds(set->labels_)); } else if (auto *rem = utils::Downcast<query::RemoveProperty>(clause)) { return std::make_unique<plan::RemoveProperty>(std::move(input_op), GetProperty(rem->property_lookup_->property_), rem->property_lookup_); } else if (auto *rem = utils::Downcast<query::RemoveLabels>(clause)) { const auto &input_symbol = symbol_table.at(*rem->identifier_); - std::vector<storage::LabelId> labels; - labels.reserve(rem->labels_.size()); - for (const auto &label : rem->labels_) { - labels.push_back(GetLabel(label)); - } - return std::make_unique<plan::RemoveLabels>(std::move(input_op), input_symbol, labels); + return std::make_unique<plan::RemoveLabels>(std::move(input_op), input_symbol, GetLabelIds(rem->labels_)); } return nullptr; } @@ -860,15 +875,15 @@ class RuleBasedPlanner { symbol); } - std::unique_ptr<LogicalOperator> HandleSubquery(std::unique_ptr<LogicalOperator> last_op, - std::shared_ptr<QueryParts> subquery, SymbolTable &symbol_table, - AstStorage &storage) { + std::unique_ptr<LogicalOperator> HandleSubquery( + std::unique_ptr<LogicalOperator> last_op, std::shared_ptr<QueryParts> subquery, SymbolTable &symbol_table, + AstStorage &storage, std::unordered_map<std::string, std::shared_ptr<LogicalOperator>> pc_ops) { std::unordered_set<Symbol> outer_scope_bound_symbols; outer_scope_bound_symbols.insert(std::make_move_iterator(context_->bound_symbols.begin()), std::make_move_iterator(context_->bound_symbols.end())); context_->bound_symbols = - impl::GetSubqueryBoundSymbols(subquery->query_parts[0].single_query_parts, symbol_table, storage); + impl::GetSubqueryBoundSymbols(subquery->query_parts[0].single_query_parts, symbol_table, storage, pc_ops); auto subquery_op = Plan(*subquery); diff --git a/src/query/plan/vertex_count_cache.hpp b/src/query/plan/vertex_count_cache.hpp index 4cfb2486b..802f4e09f 100644 --- a/src/query/plan/vertex_count_cache.hpp +++ b/src/query/plan/vertex_count_cache.hpp @@ -78,6 +78,8 @@ class VertexCountCache { return db_->LabelPropertyIndexExists(label, property); } + bool EdgeTypeIndexExists(storage::EdgeTypeId edge_type) { return db_->EdgeTypeIndexExists(edge_type); } + std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const { return db_->GetIndexStats(label); } diff --git a/src/query/procedure/module.hpp b/src/query/procedure/module.hpp index 41cda0ca6..f5027dafa 100644 --- a/src/query/procedure/module.hpp +++ b/src/query/procedure/module.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/procedure/py_module.hpp b/src/query/procedure/py_module.hpp index 9cb22fe2c..fe93b5c51 100644 --- a/src/query/procedure/py_module.hpp +++ b/src/query/procedure/py_module.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp index 437389128..151a33dad 100644 --- a/src/query/trigger.cpp +++ b/src/query/trigger.cpp @@ -191,9 +191,9 @@ std::shared_ptr<Trigger::TriggerPlan> Trigger::GetPlan(DbAccessor *db_accessor) return trigger_plan_; } -void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, - const double max_execution_time_sec, std::atomic<bool> *is_shutting_down, - std::atomic<TransactionStatus> *transaction_status, const TriggerContext &context) const { +void Trigger::Execute(DbAccessor *dba, utils::MemoryResource *execution_memory, const double max_execution_time_sec, + std::atomic<bool> *is_shutting_down, std::atomic<TransactionStatus> *transaction_status, + const TriggerContext &context) const { if (!context.ShouldEventTrigger(event_type_)) { return; } @@ -214,22 +214,7 @@ void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution ctx.is_shutting_down = is_shutting_down; ctx.transaction_status = transaction_status; ctx.is_profile_query = false; - - // Set up temporary memory for a single Pull. Initial memory comes from the - // stack. 256 KiB should fit on the stack and should be more than enough for a - // single `Pull`. - static constexpr size_t stack_size = 256UL * 1024UL; - char stack_data[stack_size]; - - // We can throw on every query because a simple queries for deleting will use only - // the stack allocated buffer. - // Also, we want to throw only when the query engine requests more memory and not the storage - // so we add the exception to the allocator. - utils::ResourceWithOutOfMemoryException resource_with_exception; - utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); - // TODO (mferencevic): Tune the parameters accordingly. - utils::PoolResource pool_memory(128, 1024, &monotonic_memory); - ctx.evaluation_context.memory = &pool_memory; + ctx.evaluation_context.memory = execution_memory; auto cursor = plan.plan().MakeCursor(execution_memory); Frame frame{plan.symbol_table().max_position(), execution_memory}; diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp index 91c74579e..24bbf50ee 100644 --- a/src/query/trigger.hpp +++ b/src/query/trigger.hpp @@ -39,7 +39,7 @@ struct Trigger { utils::SkipList<QueryCacheEntry> *query_cache, DbAccessor *db_accessor, const InterpreterConfig::Query &query_config, std::shared_ptr<QueryUserOrRole> owner); - void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double max_execution_time_sec, + void Execute(DbAccessor *dba, utils::MemoryResource *execution_memory, double max_execution_time_sec, std::atomic<bool> *is_shutting_down, std::atomic<TransactionStatus> *transaction_status, const TriggerContext &context) const; diff --git a/src/query/typed_value.cpp b/src/query/typed_value.cpp index 86d25f01b..059e1b1ba 100644 --- a/src/query/typed_value.cpp +++ b/src/query/typed_value.cpp @@ -321,6 +321,20 @@ TypedValue::operator storage::PropertyValue() const { throw TypedValueException("Unsupported conversion from TypedValue to PropertyValue"); } +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(type_param, type_enum, field) \ + type_param &TypedValue::Value##type_enum() { \ + if (type_ != Type::type_enum) [[unlikely]] \ + throw TypedValueException("TypedValue is of type '{}', not '{}'", type_, Type::type_enum); \ + return field; \ + } \ + type_param TypedValue::Value##type_enum() const { \ + if (type_ != Type::type_enum) [[unlikely]] \ + throw TypedValueException("TypedValue is of type '{}', not '{}'", type_, Type::type_enum); \ + return field; \ + } \ + bool TypedValue::Is##type_enum() const { return type_ == Type::type_enum; } + #define DEFINE_VALUE_AND_TYPE_GETTERS(type_param, type_enum, field) \ type_param &TypedValue::Value##type_enum() { \ if (type_ != Type::type_enum) [[unlikely]] \ @@ -334,9 +348,9 @@ TypedValue::operator storage::PropertyValue() const { } \ bool TypedValue::Is##type_enum() const { return type_ == Type::type_enum; } -DEFINE_VALUE_AND_TYPE_GETTERS(bool, Bool, bool_v) -DEFINE_VALUE_AND_TYPE_GETTERS(int64_t, Int, int_v) -DEFINE_VALUE_AND_TYPE_GETTERS(double, Double, double_v) +DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(bool, Bool, bool_v) +DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(int64_t, Int, int_v) +DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(double, Double, double_v) DEFINE_VALUE_AND_TYPE_GETTERS(TypedValue::TString, String, string_v) DEFINE_VALUE_AND_TYPE_GETTERS(TypedValue::TVector, List, list_v) DEFINE_VALUE_AND_TYPE_GETTERS(TypedValue::TMap, Map, map_v) @@ -348,24 +362,10 @@ DEFINE_VALUE_AND_TYPE_GETTERS(utils::LocalTime, LocalTime, local_time_v) DEFINE_VALUE_AND_TYPE_GETTERS(utils::LocalDateTime, LocalDateTime, local_date_time_v) DEFINE_VALUE_AND_TYPE_GETTERS(utils::Duration, Duration, duration_v) DEFINE_VALUE_AND_TYPE_GETTERS(std::function<void(TypedValue *)>, Function, function_v) - -Graph &TypedValue::ValueGraph() { - if (type_ != Type::Graph) { - throw TypedValueException("TypedValue is of type '{}', not '{}'", type_, Type::Graph); - } - return *graph_v; -} - -const Graph &TypedValue::ValueGraph() const { - if (type_ != Type::Graph) { - throw TypedValueException("TypedValue is of type '{}', not '{}'", type_, Type::Graph); - } - return *graph_v; -} - -bool TypedValue::IsGraph() const { return type_ == Type::Graph; } +DEFINE_VALUE_AND_TYPE_GETTERS(Graph, Graph, *graph_v) #undef DEFINE_VALUE_AND_TYPE_GETTERS +#undef DEFINE_VALUE_AND_TYPE_GETTERS_PRIMITIVE bool TypedValue::ContainsDeleted() const { switch (type_) { @@ -399,8 +399,6 @@ bool TypedValue::ContainsDeleted() const { return false; } -bool TypedValue::IsNull() const { return type_ == Type::Null; } - bool TypedValue::IsNumeric() const { return IsInt() || IsDouble(); } bool TypedValue::IsPropertyValue() const { diff --git a/src/query/typed_value.hpp b/src/query/typed_value.hpp index a1353869a..9b9346a1c 100644 --- a/src/query/typed_value.hpp +++ b/src/query/typed_value.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -475,50 +475,51 @@ class TypedValue { Type type() const { return type_; } - // TODO consider adding getters for primitives by value (and not by ref) +#define DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(type_param, type_enum, field) \ + /** Gets the value of type field. Throws if value is not field*/ \ + type_param &Value##type_enum(); \ + /** Gets the value of type field. Throws if value is not field*/ \ + type_param Value##type_enum() const; \ + /** Checks if it's the value is of the given type */ \ + bool Is##type_enum() const; \ + /** Get the value of the type field. Unchecked */ \ + type_param UnsafeValue##type_enum() const { return field; } -#define DECLARE_VALUE_AND_TYPE_GETTERS(type_param, field) \ - /** Gets the value of type field. Throws if value is not field*/ \ - type_param &Value##field(); \ - /** Gets the value of type field. Throws if value is not field*/ \ - const type_param &Value##field() const; \ - /** Checks if it's the value is of the given type */ \ - bool Is##field() const; +#define DECLARE_VALUE_AND_TYPE_GETTERS(type_param, type_enum, field) \ + /** Gets the value of type field. Throws if value is not field*/ \ + type_param &Value##type_enum(); \ + /** Gets the value of type field. Throws if value is not field*/ \ + const type_param &Value##type_enum() const; \ + /** Checks if it's the value is of the given type */ \ + bool Is##type_enum() const; \ + /** Get the value of the type field. Unchecked */ \ + type_param const &UnsafeValue##type_enum() const { return field; } - DECLARE_VALUE_AND_TYPE_GETTERS(bool, Bool) - DECLARE_VALUE_AND_TYPE_GETTERS(int64_t, Int) - DECLARE_VALUE_AND_TYPE_GETTERS(double, Double) - DECLARE_VALUE_AND_TYPE_GETTERS(TString, String) + DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(bool, Bool, bool_v) + DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(int64_t, Int, int_v) + DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE(double, Double, double_v) + DECLARE_VALUE_AND_TYPE_GETTERS(TString, String, string_v) - /** - * Get the list value. - * @throw TypedValueException if stored value is not a list. - */ - TVector &ValueList(); + DECLARE_VALUE_AND_TYPE_GETTERS(TVector, List, list_v) + DECLARE_VALUE_AND_TYPE_GETTERS(TMap, Map, map_v) + DECLARE_VALUE_AND_TYPE_GETTERS(VertexAccessor, Vertex, vertex_v) + DECLARE_VALUE_AND_TYPE_GETTERS(EdgeAccessor, Edge, edge_v) + DECLARE_VALUE_AND_TYPE_GETTERS(Path, Path, path_v) - const TVector &ValueList() const; - - /** Check if the stored value is a list value */ - bool IsList() const; - - DECLARE_VALUE_AND_TYPE_GETTERS(TMap, Map) - DECLARE_VALUE_AND_TYPE_GETTERS(VertexAccessor, Vertex) - DECLARE_VALUE_AND_TYPE_GETTERS(EdgeAccessor, Edge) - DECLARE_VALUE_AND_TYPE_GETTERS(Path, Path) - - DECLARE_VALUE_AND_TYPE_GETTERS(utils::Date, Date) - DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalTime, LocalTime) - DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalDateTime, LocalDateTime) - DECLARE_VALUE_AND_TYPE_GETTERS(utils::Duration, Duration) - DECLARE_VALUE_AND_TYPE_GETTERS(Graph, Graph) - DECLARE_VALUE_AND_TYPE_GETTERS(std::function<void(TypedValue *)>, Function) + DECLARE_VALUE_AND_TYPE_GETTERS(utils::Date, Date, date_v) + DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalTime, LocalTime, local_time_v) + DECLARE_VALUE_AND_TYPE_GETTERS(utils::LocalDateTime, LocalDateTime, local_date_time_v) + DECLARE_VALUE_AND_TYPE_GETTERS(utils::Duration, Duration, duration_v) + DECLARE_VALUE_AND_TYPE_GETTERS(Graph, Graph, *graph_v) + DECLARE_VALUE_AND_TYPE_GETTERS(std::function<void(TypedValue *)>, Function, function_v) #undef DECLARE_VALUE_AND_TYPE_GETTERS +#undef DECLARE_VALUE_AND_TYPE_GETTERS_PRIMITIVE bool ContainsDeleted() const; /** Checks if value is a TypedValue::Null. */ - bool IsNull() const; + bool IsNull() const { return type_ == Type::Null; } /** Convenience function for checking if this TypedValue is either * an integer or double */ diff --git a/src/replication_coordination_glue/CMakeLists.txt b/src/replication_coordination_glue/CMakeLists.txt index f81aed4ba..f452e1c1f 100644 --- a/src/replication_coordination_glue/CMakeLists.txt +++ b/src/replication_coordination_glue/CMakeLists.txt @@ -7,6 +7,7 @@ target_sources(mg-repl_coord_glue mode.hpp role.hpp handler.hpp + common.hpp PRIVATE messages.cpp diff --git a/src/replication_coordination_glue/common.hpp b/src/replication_coordination_glue/common.hpp new file mode 100644 index 000000000..439e5cae8 --- /dev/null +++ b/src/replication_coordination_glue/common.hpp @@ -0,0 +1,32 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "rpc/client.hpp" +#include "utils/uuid.hpp" + +#include <deque> +#include "messages.hpp" +#include "rpc/messages.hpp" +#include "utils/uuid.hpp" + +namespace memgraph::replication_coordination_glue { + +struct DatabaseHistory { + memgraph::utils::UUID db_uuid; + std::vector<std::pair<std::string, uint64_t>> history; + std::string name; +}; + +using DatabaseHistories = std::vector<DatabaseHistory>; + +} // namespace memgraph::replication_coordination_glue diff --git a/src/replication_coordination_glue/mode.hpp b/src/replication_coordination_glue/mode.hpp index d0b415733..4ca98b3a0 100644 --- a/src/replication_coordination_glue/mode.hpp +++ b/src/replication_coordination_glue/mode.hpp @@ -12,7 +12,19 @@ #pragma once #include <cstdint> +#include <map> +#include <stdexcept> +#include <string> + +#include "json/json.hpp" namespace memgraph::replication_coordination_glue { + enum class ReplicationMode : std::uint8_t { SYNC, ASYNC }; + +NLOHMANN_JSON_SERIALIZE_ENUM(ReplicationMode, { + {ReplicationMode::SYNC, "sync"}, + {ReplicationMode::ASYNC, "async"}, + }) + } // namespace memgraph::replication_coordination_glue diff --git a/src/replication_coordination_glue/role.hpp b/src/replication_coordination_glue/role.hpp index d472cb454..3fbf522ba 100644 --- a/src/replication_coordination_glue/role.hpp +++ b/src/replication_coordination_glue/role.hpp @@ -12,8 +12,14 @@ #pragma once #include <cstdint> + +#include "json/json.hpp" + namespace memgraph::replication_coordination_glue { // TODO: figure out a way of ensuring that usage of this type is never uninitialed/defaulted incorrectly to MAIN enum class ReplicationRole : uint8_t { MAIN, REPLICA }; + +NLOHMANN_JSON_SERIALIZE_ENUM(ReplicationRole, {{ReplicationRole::MAIN, "main"}, {ReplicationRole::REPLICA, "replica"}}) + } // namespace memgraph::replication_coordination_glue diff --git a/src/replication_handler/include/replication_handler/replication_handler.hpp b/src/replication_handler/include/replication_handler/replication_handler.hpp index b110e6015..e1da19bfa 100644 --- a/src/replication_handler/include/replication_handler/replication_handler.hpp +++ b/src/replication_handler/include/replication_handler/replication_handler.hpp @@ -14,6 +14,7 @@ #include "dbms/dbms_handler.hpp" #include "flags/experimental.hpp" #include "replication/include/replication/state.hpp" +#include "replication_coordination_glue/common.hpp" #include "replication_handler/system_replication.hpp" #include "replication_handler/system_rpc.hpp" #include "utils/result.hpp" @@ -149,6 +150,8 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler { auto GetReplicaUUID() -> std::optional<utils::UUID>; + auto GetDatabasesHistories() -> replication_coordination_glue::DatabaseHistories; + private: template <bool SendSwapUUID> auto RegisterReplica_(const memgraph::replication::ReplicationClientConfig &config) @@ -207,8 +210,13 @@ struct ReplicationHandler : public memgraph::query::ReplicationQueryHandler { auto client = std::make_unique<storage::ReplicationStorageClient>(*instance_client_ptr, main_uuid); client->Start(storage, std::move(db_acc)); bool const success = std::invoke([state = client->State()]() { + // We force sync replicas in other situation if (state == storage::replication::ReplicaState::DIVERGED_FROM_MAIN) { +#ifdef MG_ENTERPRISE + return FLAGS_coordinator_server_port != 0; +#else return false; +#endif } return true; }); diff --git a/src/replication_handler/replication_handler.cpp b/src/replication_handler/replication_handler.cpp index 5f807779d..4ae4c796e 100644 --- a/src/replication_handler/replication_handler.cpp +++ b/src/replication_handler/replication_handler.cpp @@ -14,6 +14,7 @@ #include "dbms/dbms_handler.hpp" #include "replication/replication_client.hpp" #include "replication_handler/system_replication.hpp" +#include "utils/functional.hpp" namespace memgraph::replication { @@ -265,8 +266,24 @@ auto ReplicationHandler::GetRole() const -> replication_coordination_glue::Repli return repl_state_.GetRole(); } +auto ReplicationHandler::GetDatabasesHistories() -> replication_coordination_glue::DatabaseHistories { + replication_coordination_glue::DatabaseHistories results; + dbms_handler_.ForEach([&results](memgraph::dbms::DatabaseAccess db_acc) { + auto &repl_storage_state = db_acc->storage()->repl_storage_state_; + + std::vector<std::pair<std::string, uint64_t>> history = utils::fmap(repl_storage_state.history); + + history.emplace_back(std::string(repl_storage_state.epoch_.id()), repl_storage_state.last_commit_timestamp_.load()); + replication_coordination_glue::DatabaseHistory repl{ + .db_uuid = utils::UUID{db_acc->storage()->uuid()}, .history = history, .name = std::string(db_acc->name())}; + results.emplace_back(repl); + }); + + return results; +} + auto ReplicationHandler::GetReplicaUUID() -> std::optional<utils::UUID> { - MG_ASSERT(repl_state_.IsReplica()); + MG_ASSERT(repl_state_.IsReplica(), "Instance is not replica"); return std::get<RoleReplicaData>(repl_state_.ReplicationData()).uuid_; } @@ -293,7 +310,7 @@ auto ReplicationHandler::ShowReplicas() const -> utils::BasicResult<query::ShowR // ATM we only support IN_MEMORY_TRANSACTIONAL if (storage->storage_mode_ != storage::StorageMode::IN_MEMORY_TRANSACTIONAL) return; if (!full_info && storage->name() == dbms::kDefaultDB) return; - auto ok = + [[maybe_unused]] auto ok = storage->repl_storage_state_.WithClient(replica.name_, [&](storage::ReplicationStorageClient &client) { auto ts_info = client.GetTimestampInfo(storage); auto state = client.State(); diff --git a/src/storage/v2/CMakeLists.txt b/src/storage/v2/CMakeLists.txt index 150a02cc7..ec5108d63 100644 --- a/src/storage/v2/CMakeLists.txt +++ b/src/storage/v2/CMakeLists.txt @@ -21,8 +21,10 @@ add_library(mg-storage-v2 STATIC storage.cpp indices/indices.cpp all_vertices_iterable.cpp + edges_iterable.cpp vertices_iterable.cpp inmemory/storage.cpp + inmemory/edge_type_index.cpp inmemory/label_index.cpp inmemory/label_property_index.cpp inmemory/unique_constraints.cpp @@ -30,6 +32,7 @@ add_library(mg-storage-v2 STATIC disk/edge_import_mode_cache.cpp disk/storage.cpp disk/rocksdb_storage.cpp + disk/edge_type_index.cpp disk/label_index.cpp disk/label_property_index.cpp disk/unique_constraints.cpp diff --git a/src/storage/v2/constraints/constraints.hpp b/src/storage/v2/constraints/constraints.hpp index 1f5ef999e..0d2a49875 100644 --- a/src/storage/v2/constraints/constraints.hpp +++ b/src/storage/v2/constraints/constraints.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -33,6 +33,7 @@ struct Constraints { std::unique_ptr<ExistenceConstraints> existence_constraints_; std::unique_ptr<UniqueConstraints> unique_constraints_; + bool empty() const { return existence_constraints_->empty() && unique_constraints_->empty(); } }; } // namespace memgraph::storage diff --git a/src/storage/v2/constraints/existence_constraints.hpp b/src/storage/v2/constraints/existence_constraints.hpp index c3b68828a..a043a9f5b 100644 --- a/src/storage/v2/constraints/existence_constraints.hpp +++ b/src/storage/v2/constraints/existence_constraints.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -40,6 +40,8 @@ class ExistenceConstraints { const LabelId &label, const PropertyId &property); }; + bool empty() const { return constraints_.empty(); } + [[nodiscard]] static std::optional<ConstraintViolation> ValidateVertexOnConstraint(const Vertex &vertex, const LabelId &label, const PropertyId &property); diff --git a/src/storage/v2/constraints/unique_constraints.hpp b/src/storage/v2/constraints/unique_constraints.hpp index b9ec04bfc..fcdcf1739 100644 --- a/src/storage/v2/constraints/unique_constraints.hpp +++ b/src/storage/v2/constraints/unique_constraints.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -60,6 +60,8 @@ class UniqueConstraints { virtual void Clear() = 0; + virtual bool empty() const = 0; + protected: static DeletionStatus CheckPropertiesBeforeDeletion(const std::set<PropertyId> &properties) { if (properties.empty()) { diff --git a/src/storage/v2/disk/edge_type_index.cpp b/src/storage/v2/disk/edge_type_index.cpp new file mode 100644 index 000000000..d11eb6caf --- /dev/null +++ b/src/storage/v2/disk/edge_type_index.cpp @@ -0,0 +1,49 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "edge_type_index.hpp" + +#include "utils/exceptions.hpp" + +namespace memgraph::storage { + +bool DiskEdgeTypeIndex::DropIndex(EdgeTypeId /*edge_type*/) { + spdlog::warn("Edge-type index related operations are not yet supported using on-disk storage mode."); + return true; +} + +bool DiskEdgeTypeIndex::IndexExists(EdgeTypeId /*edge_type*/) const { + spdlog::warn("Edge-type index related operations are not yet supported using on-disk storage mode."); + return false; +} + +std::vector<EdgeTypeId> DiskEdgeTypeIndex::ListIndices() const { + spdlog::warn("Edge-type index related operations are not yet supported using on-disk storage mode."); + return {}; +} + +uint64_t DiskEdgeTypeIndex::ApproximateEdgeCount(EdgeTypeId /*edge_type*/) const { + spdlog::warn("Edge-type index related operations are not yet supported using on-disk storage mode."); + return 0U; +} + +void DiskEdgeTypeIndex::UpdateOnEdgeCreation(Vertex * /*from*/, Vertex * /*to*/, EdgeRef /*edge_ref*/, + EdgeTypeId /*edge_type*/, const Transaction & /*tx*/) { + spdlog::warn("Edge-type index related operations are not yet supported using on-disk storage mode."); +} + +void DiskEdgeTypeIndex::UpdateOnEdgeModification(Vertex * /*old_from*/, Vertex * /*old_to*/, Vertex * /*new_from*/, + Vertex * /*new_to*/, EdgeRef /*edge_ref*/, EdgeTypeId /*edge_type*/, + const Transaction & /*tx*/) { + spdlog::warn("Edge-type index related operations are not yet supported using on-disk storage mode."); +} + +} // namespace memgraph::storage diff --git a/src/storage/v2/disk/edge_type_index.hpp b/src/storage/v2/disk/edge_type_index.hpp new file mode 100644 index 000000000..fe79b2690 --- /dev/null +++ b/src/storage/v2/disk/edge_type_index.hpp @@ -0,0 +1,35 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "storage/v2/indices/edge_type_index.hpp" + +namespace memgraph::storage { + +class DiskEdgeTypeIndex : public storage::EdgeTypeIndex { + public: + bool DropIndex(EdgeTypeId edge_type) override; + + bool IndexExists(EdgeTypeId edge_type) const override; + + std::vector<EdgeTypeId> ListIndices() const override; + + uint64_t ApproximateEdgeCount(EdgeTypeId edge_type) const override; + + void UpdateOnEdgeCreation(Vertex *from, Vertex *to, EdgeRef edge_ref, EdgeTypeId edge_type, + const Transaction &tx) override; + + void UpdateOnEdgeModification(Vertex *old_from, Vertex *old_to, Vertex *new_from, Vertex *new_to, EdgeRef edge_ref, + EdgeTypeId edge_type, const Transaction &tx) override; +}; + +} // namespace memgraph::storage diff --git a/src/storage/v2/disk/storage.cpp b/src/storage/v2/disk/storage.cpp index f9cd2ac13..21fa5ecc7 100644 --- a/src/storage/v2/disk/storage.cpp +++ b/src/storage/v2/disk/storage.cpp @@ -41,6 +41,7 @@ #include "storage/v2/edge_accessor.hpp" #include "storage/v2/edge_import_mode.hpp" #include "storage/v2/edge_ref.hpp" +#include "storage/v2/edges_iterable.hpp" #include "storage/v2/id_types.hpp" #include "storage/v2/modified_edge.hpp" #include "storage/v2/mvcc.hpp" @@ -807,11 +808,21 @@ void DiskStorage::LoadVerticesFromDiskLabelPropertyIndexForIntervalSearch( } } +EdgesIterable DiskStorage::DiskAccessor::Edges(EdgeTypeId /*edge_type*/, View /*view*/) { + throw utils::NotYetImplemented( + "Edge-type index related operations are not yet supported using on-disk storage mode."); +} + uint64_t DiskStorage::DiskAccessor::ApproximateVertexCount() const { auto *disk_storage = static_cast<DiskStorage *>(storage_); return disk_storage->vertex_count_.load(std::memory_order_acquire); } +uint64_t DiskStorage::DiskAccessor::ApproximateEdgeCount(EdgeTypeId /*edge_type*/) const { + spdlog::info("Edge-type index related operations are not yet supported using on-disk storage mode."); + return 0U; +} + uint64_t DiskStorage::GetDiskSpaceUsage() const { uint64_t main_disk_storage_size = utils::GetDirDiskUsage(config_.disk.main_storage_directory); uint64_t index_disk_storage_size = utils::GetDirDiskUsage(config_.disk.label_index_directory) + @@ -1629,6 +1640,9 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co return StorageManipulationError{PersistenceError{}}; } } break; + case MetadataDelta::Action::EDGE_INDEX_CREATE: { + throw utils::NotYetImplemented("Edge-type indexing is not yet implemented on on-disk storage mode."); + } case MetadataDelta::Action::LABEL_INDEX_DROP: { if (!disk_storage->durable_metadata_.PersistLabelIndexDeletion(md_delta.label)) { return StorageManipulationError{PersistenceError{}}; @@ -1641,6 +1655,9 @@ utils::BasicResult<StorageManipulationError, void> DiskStorage::DiskAccessor::Co return StorageManipulationError{PersistenceError{}}; } } break; + case MetadataDelta::Action::EDGE_INDEX_DROP: { + throw utils::NotYetImplemented("Edge-type indexing is not yet implemented on on-disk storage mode."); + } case MetadataDelta::Action::LABEL_INDEX_STATS_SET: { throw utils::NotYetImplemented("SetIndexStats(stats) is not implemented for DiskStorage."); } break; @@ -1917,6 +1934,11 @@ utils::BasicResult<StorageIndexDefinitionError, void> DiskStorage::DiskAccessor: return {}; } +utils::BasicResult<StorageIndexDefinitionError, void> DiskStorage::DiskAccessor::CreateIndex(EdgeTypeId /*edge_type*/) { + throw utils::NotYetImplemented( + "Edge-type index related operations are not yet supported using on-disk storage mode."); +} + utils::BasicResult<StorageIndexDefinitionError, void> DiskStorage::DiskAccessor::DropIndex(LabelId label) { MG_ASSERT(unique_guard_.owns_lock(), "Create index requires a unique access to the storage!"); auto *on_disk = static_cast<DiskStorage *>(storage_); @@ -1945,6 +1967,11 @@ utils::BasicResult<StorageIndexDefinitionError, void> DiskStorage::DiskAccessor: return {}; } +utils::BasicResult<StorageIndexDefinitionError, void> DiskStorage::DiskAccessor::DropIndex(EdgeTypeId /*edge_type*/) { + throw utils::NotYetImplemented( + "Edge-type index related operations are not yet supported using on-disk storage mode."); +} + utils::BasicResult<StorageExistenceConstraintDefinitionError, void> DiskStorage::DiskAccessor::CreateExistenceConstraint(LabelId label, PropertyId property) { MG_ASSERT(unique_guard_.owns_lock(), "Create existence constraint requires a unique access to the storage!"); @@ -2022,7 +2049,8 @@ Transaction DiskStorage::CreateTransaction(IsolationLevel isolation_level, Stora edge_import_mode_active = edge_import_status_ == EdgeImportMode::ACTIVE; } - return {transaction_id, start_timestamp, isolation_level, storage_mode, edge_import_mode_active}; + return {transaction_id, start_timestamp, isolation_level, + storage_mode, edge_import_mode_active, !constraints_.empty()}; } uint64_t DiskStorage::CommitTimestamp(const std::optional<uint64_t> desired_commit_timestamp) { @@ -2053,6 +2081,12 @@ std::unique_ptr<Storage::Accessor> DiskStorage::UniqueAccess( return std::unique_ptr<DiskAccessor>( new DiskAccessor{Storage::Accessor::unique_access, this, isolation_level, storage_mode_}); } + +bool DiskStorage::DiskAccessor::EdgeTypeIndexExists(EdgeTypeId /*edge_type*/) const { + spdlog::info("Edge-type index related operations are not yet supported using on-disk storage mode."); + return false; +} + IndicesInfo DiskStorage::DiskAccessor::ListAllIndices() const { auto *on_disk = static_cast<DiskStorage *>(storage_); auto *disk_label_index = static_cast<DiskLabelIndex *>(on_disk->indices_.label_index_.get()); diff --git a/src/storage/v2/disk/storage.hpp b/src/storage/v2/disk/storage.hpp index 4d71fd10b..349a7454a 100644 --- a/src/storage/v2/disk/storage.hpp +++ b/src/storage/v2/disk/storage.hpp @@ -72,6 +72,8 @@ class DiskStorage final : public Storage { const std::optional<utils::Bound<PropertyValue>> &lower_bound, const std::optional<utils::Bound<PropertyValue>> &upper_bound, View view) override; + EdgesIterable Edges(EdgeTypeId edge_type, View view) override; + uint64_t ApproximateVertexCount() const override; uint64_t ApproximateVertexCount(LabelId /*label*/) const override { return 10; } @@ -89,6 +91,8 @@ class DiskStorage final : public Storage { return 10; } + uint64_t ApproximateEdgeCount(EdgeTypeId edge_type) const override; + std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId & /*label*/) const override { return {}; } @@ -140,6 +144,8 @@ class DiskStorage final : public Storage { return disk_storage->indices_.label_property_index_->IndexExists(label, property); } + bool EdgeTypeIndexExists(EdgeTypeId edge_type) const override; + IndicesInfo ListAllIndices() const override; ConstraintsInfo ListAllConstraints() const override; @@ -158,10 +164,14 @@ class DiskStorage final : public Storage { utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(LabelId label, PropertyId property) override; + utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(EdgeTypeId edge_type) override; + utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(LabelId label) override; utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(LabelId label, PropertyId property) override; + utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(EdgeTypeId edge_type) override; + utils::BasicResult<StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint( LabelId label, PropertyId property) override; diff --git a/src/storage/v2/disk/unique_constraints.cpp b/src/storage/v2/disk/unique_constraints.cpp index 3c17530c2..04a0c265a 100644 --- a/src/storage/v2/disk/unique_constraints.cpp +++ b/src/storage/v2/disk/unique_constraints.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -347,5 +347,6 @@ void DiskUniqueConstraints::LoadUniqueConstraints(const std::vector<std::string> constraints_.emplace(label, properties); } } +bool DiskUniqueConstraints::empty() const { return constraints_.empty(); } } // namespace memgraph::storage diff --git a/src/storage/v2/disk/unique_constraints.hpp b/src/storage/v2/disk/unique_constraints.hpp index 0cc5a9586..4e3450ef1 100644 --- a/src/storage/v2/disk/unique_constraints.hpp +++ b/src/storage/v2/disk/unique_constraints.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -59,6 +59,7 @@ class DiskUniqueConstraints : public UniqueConstraints { RocksDBStorage *GetRocksDBStorage() const; void LoadUniqueConstraints(const std::vector<std::string> &keys); + bool empty() const override; private: utils::Synchronized<std::map<uint64_t, std::map<Gid, std::set<std::pair<LabelId, std::set<PropertyId>>>>>> diff --git a/src/storage/v2/durability/durability.cpp b/src/storage/v2/durability/durability.cpp index 92c4d11e8..fbbedbee5 100644 --- a/src/storage/v2/durability/durability.cpp +++ b/src/storage/v2/durability/durability.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -31,6 +31,7 @@ #include "storage/v2/durability/paths.hpp" #include "storage/v2/durability/snapshot.hpp" #include "storage/v2/durability/wal.hpp" +#include "storage/v2/inmemory/edge_type_index.hpp" #include "storage/v2/inmemory/label_index.hpp" #include "storage/v2/inmemory/label_property_index.hpp" #include "storage/v2/inmemory/unique_constraints.hpp" @@ -118,6 +119,8 @@ std::optional<std::vector<WalDurabilityInfo>> GetWalFiles(const std::filesystem: if (!item.is_regular_file()) continue; try { auto info = ReadWalInfo(item.path()); + spdlog::trace("Getting wal file with following info: uuid: {}, epoch id: {}, from timestamp {}, to_timestamp {} ", + info.uuid, info.epoch_id, info.from_timestamp, info.to_timestamp); if ((uuid.empty() || info.uuid == uuid) && (!current_seq_num || info.seq_num < *current_seq_num)) { wal_files.emplace_back(info.seq_num, info.from_timestamp, info.to_timestamp, std::move(info.uuid), std::move(info.epoch_id), item.path()); @@ -197,9 +200,18 @@ void RecoverIndicesAndStats(const RecoveredIndicesAndConstraints::IndicesMetadat } spdlog::info("Label+property indices statistics are recreated."); - spdlog::info("Indices are recreated."); + // Recover edge-type indices. + spdlog::info("Recreating {} edge-type indices from metadata.", indices_metadata.edge.size()); + auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(indices->edge_type_index_.get()); + for (const auto &item : indices_metadata.edge) { + if (!mem_edge_type_index->CreateIndex(item, vertices->access())) { + throw RecoveryFailure("The edge-type index must be created here!"); + } + spdlog::info("Index on :{} is recreated from metadata", name_id_mapper->IdToName(item.AsUint())); + } + spdlog::info("Edge-type indices are recreated."); - spdlog::info("Recreating constraints from metadata."); + spdlog::info("Indices are recreated."); } void RecoverExistenceConstraints(const RecoveredIndicesAndConstraints::ConstraintsMetadata &constraints_metadata, @@ -410,22 +422,17 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication std::optional<uint64_t> previous_seq_num; auto last_loaded_timestamp = snapshot_timestamp; spdlog::info("Trying to load WAL files."); + + if (last_loaded_timestamp) { + epoch_history->emplace_back(repl_storage_state.epoch_.id(), *last_loaded_timestamp); + } + for (auto &wal_file : wal_files) { if (previous_seq_num && (wal_file.seq_num - *previous_seq_num) > 1) { LOG_FATAL("You are missing a WAL file with the sequence number {}!", *previous_seq_num + 1); } previous_seq_num = wal_file.seq_num; - if (wal_file.epoch_id != repl_storage_state.epoch_.id()) { - // This way we skip WALs finalized only because of role change. - // We can also set the last timestamp to 0 if last loaded timestamp - // is nullopt as this can only happen if the WAL file with seq = 0 - // does not contain any deltas and we didn't find any snapshots. - if (last_loaded_timestamp) { - epoch_history->emplace_back(wal_file.epoch_id, *last_loaded_timestamp); - } - repl_storage_state.epoch_.SetEpoch(std::move(wal_file.epoch_id)); - } try { auto info = LoadWal(wal_file.path, &indices_constraints, last_loaded_timestamp, vertices, edges, name_id_mapper, edge_count, config.salient.items); @@ -434,13 +441,24 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication recovery_info.next_timestamp = std::max(recovery_info.next_timestamp, info.next_timestamp); recovery_info.last_commit_timestamp = info.last_commit_timestamp; + + if (recovery_info.next_timestamp != 0) { + last_loaded_timestamp.emplace(recovery_info.next_timestamp - 1); + } + + auto last_loaded_timestamp_value = last_loaded_timestamp.value_or(0); + if (epoch_history->empty() || epoch_history->back().first != wal_file.epoch_id) { + // no history or new epoch, add it + epoch_history->emplace_back(wal_file.epoch_id, last_loaded_timestamp_value); + repl_storage_state.epoch_.SetEpoch(wal_file.epoch_id); + } else if (epoch_history->back().second < last_loaded_timestamp_value) { + // existing epoch, update with newer timestamp + epoch_history->back().second = last_loaded_timestamp_value; + } + } catch (const RecoveryFailure &e) { LOG_FATAL("Couldn't recover WAL deltas from {} because of: {}", wal_file.path, e.what()); } - - if (recovery_info.next_timestamp != 0) { - last_loaded_timestamp.emplace(recovery_info.next_timestamp - 1); - } } // The sequence number needs to be recovered even though `LoadWal` didn't // load any deltas from that file. @@ -456,7 +474,12 @@ std::optional<RecoveryInfo> Recovery::RecoverData(std::string *uuid, Replication memgraph::metrics::Measure(memgraph::metrics::SnapshotRecoveryLatency_us, std::chrono::duration_cast<std::chrono::microseconds>(timer.Elapsed()).count()); + spdlog::trace("Set epoch id: {} with commit timestamp {}", std::string(repl_storage_state.epoch_.id()), + repl_storage_state.last_commit_timestamp_); + std::for_each(repl_storage_state.history.begin(), repl_storage_state.history.end(), [](auto &history) { + spdlog::trace("epoch id: {} with commit timestamp {}", std::string(history.first), history.second); + }); return recovery_info; } diff --git a/src/storage/v2/durability/marker.hpp b/src/storage/v2/durability/marker.hpp index 8f00d435d..ac0cc074d 100644 --- a/src/storage/v2/durability/marker.hpp +++ b/src/storage/v2/durability/marker.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -37,6 +37,8 @@ enum class Marker : uint8_t { SECTION_CONSTRAINTS = 0x25, SECTION_DELTA = 0x26, SECTION_EPOCH_HISTORY = 0x27, + SECTION_EDGE_INDICES = 0x28, + SECTION_OFFSETS = 0x42, DELTA_VERTEX_CREATE = 0x50, @@ -60,6 +62,8 @@ enum class Marker : uint8_t { DELTA_LABEL_INDEX_STATS_CLEAR = 0x62, DELTA_LABEL_PROPERTY_INDEX_STATS_SET = 0x63, DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR = 0x64, + DELTA_EDGE_TYPE_INDEX_CREATE = 0x65, + DELTA_EDGE_TYPE_INDEX_DROP = 0x66, VALUE_FALSE = 0x00, VALUE_TRUE = 0xff, @@ -85,6 +89,7 @@ static const Marker kMarkersAll[] = { Marker::SECTION_CONSTRAINTS, Marker::SECTION_DELTA, Marker::SECTION_EPOCH_HISTORY, + Marker::SECTION_EDGE_INDICES, Marker::SECTION_OFFSETS, Marker::DELTA_VERTEX_CREATE, Marker::DELTA_VERTEX_DELETE, @@ -103,6 +108,8 @@ static const Marker kMarkersAll[] = { Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR, Marker::DELTA_LABEL_PROPERTY_INDEX_CREATE, Marker::DELTA_LABEL_PROPERTY_INDEX_DROP, + Marker::DELTA_EDGE_TYPE_INDEX_CREATE, + Marker::DELTA_EDGE_TYPE_INDEX_DROP, Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE, Marker::DELTA_EXISTENCE_CONSTRAINT_DROP, Marker::DELTA_UNIQUE_CONSTRAINT_CREATE, diff --git a/src/storage/v2/durability/metadata.hpp b/src/storage/v2/durability/metadata.hpp index 42e24e723..c8ee27b2f 100644 --- a/src/storage/v2/durability/metadata.hpp +++ b/src/storage/v2/durability/metadata.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -43,6 +43,7 @@ struct RecoveredIndicesAndConstraints { std::vector<std::pair<LabelId, PropertyId>> label_property; std::vector<std::pair<LabelId, LabelIndexStats>> label_stats; std::vector<std::pair<LabelId, std::pair<PropertyId, LabelPropertyIndexStats>>> label_property_stats; + std::vector<EdgeTypeId> edge; } indices; struct ConstraintsMetadata { diff --git a/src/storage/v2/durability/serialization.cpp b/src/storage/v2/durability/serialization.cpp index 6b13d9d00..28ba64943 100644 --- a/src/storage/v2/durability/serialization.cpp +++ b/src/storage/v2/durability/serialization.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -332,6 +332,7 @@ std::optional<PropertyValue> Decoder::ReadPropertyValue() { case Marker::SECTION_CONSTRAINTS: case Marker::SECTION_DELTA: case Marker::SECTION_EPOCH_HISTORY: + case Marker::SECTION_EDGE_INDICES: case Marker::SECTION_OFFSETS: case Marker::DELTA_VERTEX_CREATE: case Marker::DELTA_VERTEX_DELETE: @@ -350,6 +351,8 @@ std::optional<PropertyValue> Decoder::ReadPropertyValue() { case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR: case Marker::DELTA_LABEL_PROPERTY_INDEX_CREATE: case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: + case Marker::DELTA_EDGE_TYPE_INDEX_CREATE: + case Marker::DELTA_EDGE_TYPE_INDEX_DROP: case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: @@ -435,6 +438,7 @@ bool Decoder::SkipPropertyValue() { case Marker::SECTION_CONSTRAINTS: case Marker::SECTION_DELTA: case Marker::SECTION_EPOCH_HISTORY: + case Marker::SECTION_EDGE_INDICES: case Marker::SECTION_OFFSETS: case Marker::DELTA_VERTEX_CREATE: case Marker::DELTA_VERTEX_DELETE: @@ -453,6 +457,8 @@ bool Decoder::SkipPropertyValue() { case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR: case Marker::DELTA_LABEL_PROPERTY_INDEX_CREATE: case Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: + case Marker::DELTA_EDGE_TYPE_INDEX_CREATE: + case Marker::DELTA_EDGE_TYPE_INDEX_DROP: case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: diff --git a/src/storage/v2/durability/snapshot.cpp b/src/storage/v2/durability/snapshot.cpp index eee099870..5fea3dfa5 100644 --- a/src/storage/v2/durability/snapshot.cpp +++ b/src/storage/v2/durability/snapshot.cpp @@ -153,6 +153,11 @@ SnapshotInfo ReadSnapshotInfo(const std::filesystem::path &path) { info.offset_edges = read_offset(); info.offset_vertices = read_offset(); info.offset_indices = read_offset(); + if (*version >= 17) { + info.offset_edge_indices = read_offset(); + } else { + info.offset_edge_indices = 0U; + } info.offset_constraints = read_offset(); info.offset_mapper = read_offset(); info.offset_epoch_history = read_offset(); @@ -1379,10 +1384,11 @@ RecoveredSnapshot LoadSnapshotVersion15(const std::filesystem::path &path, utils return {info, recovery_info, std::move(indices_constraints)}; } -RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipList<Vertex> *vertices, - utils::SkipList<Edge> *edges, - std::deque<std::pair<std::string, uint64_t>> *epoch_history, - NameIdMapper *name_id_mapper, std::atomic<uint64_t> *edge_count, const Config &config) { +RecoveredSnapshot LoadSnapshotVersion16(const std::filesystem::path &path, utils::SkipList<Vertex> *vertices, + utils::SkipList<Edge> *edges, + std::deque<std::pair<std::string, uint64_t>> *epoch_history, + NameIdMapper *name_id_mapper, std::atomic<uint64_t> *edge_count, + const Config &config) { RecoveryInfo recovery_info; RecoveredIndicesAndConstraints indices_constraints; @@ -1391,13 +1397,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!version) throw RecoveryFailure("Couldn't read snapshot magic and/or version!"); if (!IsVersionSupported(*version)) throw RecoveryFailure(fmt::format("Invalid snapshot version {}", *version)); - if (*version == 14U) { - return LoadSnapshotVersion14(path, vertices, edges, epoch_history, name_id_mapper, edge_count, - config.salient.items); - } - if (*version == 15U) { - return LoadSnapshotVersion15(path, vertices, edges, epoch_history, name_id_mapper, edge_count, config); - } + if (*version != 16U) throw RecoveryFailure(fmt::format("Expected snapshot version is 16, but got {}", *version)); // Cleanup of loaded data in case of failure. bool success = false; @@ -1727,6 +1727,380 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis return {info, recovery_info, std::move(indices_constraints)}; } +RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipList<Vertex> *vertices, + utils::SkipList<Edge> *edges, + std::deque<std::pair<std::string, uint64_t>> *epoch_history, + NameIdMapper *name_id_mapper, std::atomic<uint64_t> *edge_count, const Config &config) { + RecoveryInfo recovery_info; + RecoveredIndicesAndConstraints indices_constraints; + + Decoder snapshot; + const auto version = snapshot.Initialize(path, kSnapshotMagic); + if (!version) throw RecoveryFailure("Couldn't read snapshot magic and/or version!"); + + if (!IsVersionSupported(*version)) throw RecoveryFailure(fmt::format("Invalid snapshot version {}", *version)); + if (*version == 14U) { + return LoadSnapshotVersion14(path, vertices, edges, epoch_history, name_id_mapper, edge_count, + config.salient.items); + } + if (*version == 15U) { + return LoadSnapshotVersion15(path, vertices, edges, epoch_history, name_id_mapper, edge_count, config); + } + if (*version == 16U) { + return LoadSnapshotVersion16(path, vertices, edges, epoch_history, name_id_mapper, edge_count, config); + } + + // Cleanup of loaded data in case of failure. + bool success = false; + utils::OnScopeExit cleanup([&] { + if (!success) { + edges->clear(); + vertices->clear(); + epoch_history->clear(); + } + }); + + // Read snapshot info. + const auto info = ReadSnapshotInfo(path); + spdlog::info("Recovering {} vertices and {} edges.", info.vertices_count, info.edges_count); + // Check for edges. + bool snapshot_has_edges = info.offset_edges != 0; + + // Recover mapper. + std::unordered_map<uint64_t, uint64_t> snapshot_id_map; + { + spdlog::info("Recovering mapper metadata."); + if (!snapshot.SetPosition(info.offset_mapper)) throw RecoveryFailure("Couldn't read data from snapshot!"); + + auto marker = snapshot.ReadMarker(); + if (!marker || *marker != Marker::SECTION_MAPPER) throw RecoveryFailure("Failed to read section mapper!"); + + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Failed to read name-id mapper size!"); + + for (uint64_t i = 0; i < *size; ++i) { + auto id = snapshot.ReadUint(); + if (!id) throw RecoveryFailure("Failed to read id for name-id mapper!"); + auto name = snapshot.ReadString(); + if (!name) throw RecoveryFailure("Failed to read name for name-id mapper!"); + auto my_id = name_id_mapper->NameToId(*name); + snapshot_id_map.emplace(*id, my_id); + SPDLOG_TRACE("Mapping \"{}\"from snapshot id {} to actual id {}.", *name, *id, my_id); + } + } + auto get_label_from_id = [&snapshot_id_map](uint64_t label_id) { + auto it = snapshot_id_map.find(label_id); + if (it == snapshot_id_map.end()) throw RecoveryFailure("Couldn't find label id in snapshot_id_map!"); + return LabelId::FromUint(it->second); + }; + auto get_property_from_id = [&snapshot_id_map](uint64_t property_id) { + auto it = snapshot_id_map.find(property_id); + if (it == snapshot_id_map.end()) throw RecoveryFailure("Couldn't find property id in snapshot_id_map!"); + return PropertyId::FromUint(it->second); + }; + auto get_edge_type_from_id = [&snapshot_id_map](uint64_t edge_type_id) { + auto it = snapshot_id_map.find(edge_type_id); + if (it == snapshot_id_map.end()) throw RecoveryFailure("Couldn't find edge type id in snapshot_id_map!"); + return EdgeTypeId::FromUint(it->second); + }; + + // Reset current edge count. + edge_count->store(0, std::memory_order_release); + + { + spdlog::info("Recovering edges."); + // Recover edges. + if (snapshot_has_edges) { + // We don't need to check whether we store properties on edge or not, because `LoadPartialEdges` will always + // iterate over the edges in the snapshot (if they exist) and the current configuration of properties on edge only + // affect what it does: + // 1. If properties are allowed on edges, then it loads the edges. + // 2. If properties are not allowed on edges, then it checks that none of the edges have any properties. + if (!snapshot.SetPosition(info.offset_edge_batches)) { + throw RecoveryFailure("Couldn't read data from snapshot!"); + } + const auto edge_batches = ReadBatchInfos(snapshot); + + RecoverOnMultipleThreads( + config.durability.recovery_thread_count, + [path, edges, items = config.salient.items, &get_property_from_id](const size_t /*batch_index*/, + const BatchInfo &batch) { + LoadPartialEdges(path, *edges, batch.offset, batch.count, items, get_property_from_id); + }, + edge_batches); + } + spdlog::info("Edges are recovered."); + + // Recover vertices (labels and properties). + spdlog::info("Recovering vertices.", info.vertices_count); + uint64_t last_vertex_gid{0}; + + if (!snapshot.SetPosition(info.offset_vertex_batches)) { + throw RecoveryFailure("Couldn't read data from snapshot!"); + } + + const auto vertex_batches = ReadBatchInfos(snapshot); + RecoverOnMultipleThreads( + config.durability.recovery_thread_count, + [path, vertices, &vertex_batches, &get_label_from_id, &get_property_from_id, &last_vertex_gid]( + const size_t batch_index, const BatchInfo &batch) { + const auto last_vertex_gid_in_batch = + LoadPartialVertices(path, *vertices, batch.offset, batch.count, get_label_from_id, get_property_from_id); + if (batch_index == vertex_batches.size() - 1) { + last_vertex_gid = last_vertex_gid_in_batch; + } + }, + vertex_batches); + + spdlog::info("Vertices are recovered."); + + // Recover vertices (in/out edges). + spdlog::info("Recover connectivity."); + recovery_info.vertex_batches.reserve(vertex_batches.size()); + for (const auto batch : vertex_batches) { + recovery_info.vertex_batches.emplace_back(Gid::FromUint(0), batch.count); + } + std::atomic<uint64_t> highest_edge_gid{0}; + + RecoverOnMultipleThreads( + config.durability.recovery_thread_count, + [path, vertices, edges, edge_count, items = config.salient.items, snapshot_has_edges, &get_edge_type_from_id, + &highest_edge_gid, &recovery_info](const size_t batch_index, const BatchInfo &batch) { + const auto result = LoadPartialConnectivity(path, *vertices, *edges, batch.offset, batch.count, items, + snapshot_has_edges, get_edge_type_from_id); + edge_count->fetch_add(result.edge_count); + auto known_highest_edge_gid = highest_edge_gid.load(); + while (known_highest_edge_gid < result.highest_edge_id) { + highest_edge_gid.compare_exchange_weak(known_highest_edge_gid, result.highest_edge_id); + } + recovery_info.vertex_batches[batch_index].first = result.first_vertex_gid; + }, + vertex_batches); + + spdlog::info("Connectivity is recovered."); + + // Set initial values for edge/vertex ID generators. + recovery_info.next_edge_id = highest_edge_gid + 1; + recovery_info.next_vertex_id = last_vertex_gid + 1; + } + + // Recover indices. + { + spdlog::info("Recovering metadata of indices."); + if (!snapshot.SetPosition(info.offset_indices)) throw RecoveryFailure("Couldn't read data from snapshot!"); + + auto marker = snapshot.ReadMarker(); + if (!marker || *marker != Marker::SECTION_INDICES) throw RecoveryFailure("Couldn't read section indices!"); + + // Recover label indices. + { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't read the number of label indices"); + spdlog::info("Recovering metadata of {} label indices.", *size); + for (uint64_t i = 0; i < *size; ++i) { + auto label = snapshot.ReadUint(); + if (!label) throw RecoveryFailure("Couldn't read label of label index!"); + AddRecoveredIndexConstraint(&indices_constraints.indices.label, get_label_from_id(*label), + "The label index already exists!"); + SPDLOG_TRACE("Recovered metadata of label index for :{}", name_id_mapper->IdToName(snapshot_id_map.at(*label))); + } + spdlog::info("Metadata of label indices are recovered."); + } + + // Recover label indices statistics. + { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't read the number of entries for label index statistics!"); + spdlog::info("Recovering metadata of {} label indices statistics.", *size); + for (uint64_t i = 0; i < *size; ++i) { + const auto label = snapshot.ReadUint(); + if (!label) throw RecoveryFailure("Couldn't read label while recovering label index statistics!"); + const auto count = snapshot.ReadUint(); + if (!count) throw RecoveryFailure("Couldn't read count for label index statistics!"); + const auto avg_degree = snapshot.ReadDouble(); + if (!avg_degree) throw RecoveryFailure("Couldn't read average degree for label index statistics"); + const auto label_id = get_label_from_id(*label); + indices_constraints.indices.label_stats.emplace_back(label_id, LabelIndexStats{*count, *avg_degree}); + SPDLOG_TRACE("Recovered metadata of label index statistics for :{}", + name_id_mapper->IdToName(snapshot_id_map.at(*label))); + } + spdlog::info("Metadata of label indices are recovered."); + } + + // Recover label+property indices. + { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't recover the number of label property indices!"); + spdlog::info("Recovering metadata of {} label+property indices.", *size); + for (uint64_t i = 0; i < *size; ++i) { + auto label = snapshot.ReadUint(); + if (!label) throw RecoveryFailure("Couldn't read label for label property index!"); + auto property = snapshot.ReadUint(); + if (!property) throw RecoveryFailure("Couldn't read property for label property index"); + AddRecoveredIndexConstraint(&indices_constraints.indices.label_property, + {get_label_from_id(*label), get_property_from_id(*property)}, + "The label+property index already exists!"); + SPDLOG_TRACE("Recovered metadata of label+property index for :{}({})", + name_id_mapper->IdToName(snapshot_id_map.at(*label)), + name_id_mapper->IdToName(snapshot_id_map.at(*property))); + } + spdlog::info("Metadata of label+property indices are recovered."); + } + + // Recover label+property indices statistics. + { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't recover the number of entries for label property statistics!"); + spdlog::info("Recovering metadata of {} label+property indices statistics.", *size); + for (uint64_t i = 0; i < *size; ++i) { + const auto label = snapshot.ReadUint(); + if (!label) throw RecoveryFailure("Couldn't read label for label property index statistics!"); + const auto property = snapshot.ReadUint(); + if (!property) throw RecoveryFailure("Couldn't read property for label property index statistics!"); + const auto count = snapshot.ReadUint(); + if (!count) throw RecoveryFailure("Couldn't read count for label property index statistics!!"); + const auto distinct_values_count = snapshot.ReadUint(); + if (!distinct_values_count) + throw RecoveryFailure("Couldn't read distinct values count for label property index statistics!"); + const auto statistic = snapshot.ReadDouble(); + if (!statistic) throw RecoveryFailure("Couldn't read statistics value for label-property index statistics!"); + const auto avg_group_size = snapshot.ReadDouble(); + if (!avg_group_size) + throw RecoveryFailure("Couldn't read average group size for label property index statistics!"); + const auto avg_degree = snapshot.ReadDouble(); + if (!avg_degree) throw RecoveryFailure("Couldn't read average degree for label property index statistics!"); + const auto label_id = get_label_from_id(*label); + const auto property_id = get_property_from_id(*property); + indices_constraints.indices.label_property_stats.emplace_back( + label_id, std::make_pair(property_id, LabelPropertyIndexStats{*count, *distinct_values_count, *statistic, + *avg_group_size, *avg_degree})); + SPDLOG_TRACE("Recovered metadata of label+property index statistics for :{}({})", + name_id_mapper->IdToName(snapshot_id_map.at(*label)), + name_id_mapper->IdToName(snapshot_id_map.at(*property))); + } + spdlog::info("Metadata of label+property indices are recovered."); + } + + // Recover edge-type indices. + spdlog::info("Recovering metadata of indices."); + if (!snapshot.SetPosition(info.offset_edge_indices)) throw RecoveryFailure("Couldn't read data from snapshot!"); + + marker = snapshot.ReadMarker(); + if (!marker || *marker != Marker::SECTION_EDGE_INDICES) + throw RecoveryFailure("Couldn't read section edge-indices!"); + + { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't read the number of edge-type indices"); + spdlog::info("Recovering metadata of {} edge-type indices.", *size); + for (uint64_t i = 0; i < *size; ++i) { + auto edge_type = snapshot.ReadUint(); + if (!edge_type) throw RecoveryFailure("Couldn't read edge-type of edge-type index!"); + AddRecoveredIndexConstraint(&indices_constraints.indices.edge, get_edge_type_from_id(*edge_type), + "The edge-type index already exists!"); + SPDLOG_TRACE("Recovered metadata of edge-type index for :{}", + name_id_mapper->IdToName(snapshot_id_map.at(*edge_type))); + } + spdlog::info("Metadata of edge-type indices are recovered."); + } + + spdlog::info("Metadata of indices are recovered."); + } + + // Recover constraints. + { + spdlog::info("Recovering metadata of constraints."); + if (!snapshot.SetPosition(info.offset_constraints)) throw RecoveryFailure("Couldn't read data from snapshot!"); + + auto marker = snapshot.ReadMarker(); + if (!marker || *marker != Marker::SECTION_CONSTRAINTS) + throw RecoveryFailure("Couldn't read section constraints marker!"); + + // Recover existence constraints. + { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't read the number of existence constraints!"); + spdlog::info("Recovering metadata of {} existence constraints.", *size); + for (uint64_t i = 0; i < *size; ++i) { + auto label = snapshot.ReadUint(); + if (!label) throw RecoveryFailure("Couldn't read label of existence constraints!"); + auto property = snapshot.ReadUint(); + if (!property) throw RecoveryFailure("Couldn't read property of existence constraints!"); + AddRecoveredIndexConstraint(&indices_constraints.constraints.existence, + {get_label_from_id(*label), get_property_from_id(*property)}, + "The existence constraint already exists!"); + SPDLOG_TRACE("Recovered metadata of existence constraint for :{}({})", + name_id_mapper->IdToName(snapshot_id_map.at(*label)), + name_id_mapper->IdToName(snapshot_id_map.at(*property))); + } + spdlog::info("Metadata of existence constraints are recovered."); + } + + // Recover unique constraints. + // Snapshot version should be checked since unique constraints were + // implemented in later versions of snapshot. + if (*version >= kUniqueConstraintVersion) { + auto size = snapshot.ReadUint(); + if (!size) throw RecoveryFailure("Couldn't read the number of unique constraints!"); + spdlog::info("Recovering metadata of {} unique constraints.", *size); + for (uint64_t i = 0; i < *size; ++i) { + auto label = snapshot.ReadUint(); + if (!label) throw RecoveryFailure("Couldn't read label of unique constraints!"); + auto properties_count = snapshot.ReadUint(); + if (!properties_count) throw RecoveryFailure("Couldn't read the number of properties in unique constraint!"); + std::set<PropertyId> properties; + for (uint64_t j = 0; j < *properties_count; ++j) { + auto property = snapshot.ReadUint(); + if (!property) throw RecoveryFailure("Couldn't read property of unique constraint!"); + properties.insert(get_property_from_id(*property)); + } + AddRecoveredIndexConstraint(&indices_constraints.constraints.unique, {get_label_from_id(*label), properties}, + "The unique constraint already exists!"); + SPDLOG_TRACE("Recovered metadata of unique constraints for :{}", + name_id_mapper->IdToName(snapshot_id_map.at(*label))); + } + spdlog::info("Metadata of unique constraints are recovered."); + } + spdlog::info("Metadata of constraints are recovered."); + } + + spdlog::info("Recovering metadata."); + // Recover epoch history + { + if (!snapshot.SetPosition(info.offset_epoch_history)) throw RecoveryFailure("Couldn't read data from snapshot!"); + + const auto marker = snapshot.ReadMarker(); + if (!marker || *marker != Marker::SECTION_EPOCH_HISTORY) + throw RecoveryFailure("Couldn't read section epoch history marker!"); + + const auto history_size = snapshot.ReadUint(); + if (!history_size) { + throw RecoveryFailure("Couldn't read history size!"); + } + + for (int i = 0; i < *history_size; ++i) { + auto maybe_epoch_id = snapshot.ReadString(); + if (!maybe_epoch_id) { + throw RecoveryFailure("Couldn't read maybe epoch id!"); + } + const auto maybe_last_commit_timestamp = snapshot.ReadUint(); + if (!maybe_last_commit_timestamp) { + throw RecoveryFailure("Couldn't read maybe last commit timestamp!"); + } + epoch_history->emplace_back(std::move(*maybe_epoch_id), *maybe_last_commit_timestamp); + } + } + + spdlog::info("Metadata recovered."); + // Recover timestamp. + recovery_info.next_timestamp = info.start_timestamp + 1; + + // Set success flag (to disable cleanup). + success = true; + + return {info, recovery_info, std::move(indices_constraints)}; +} + using OldSnapshotFiles = std::vector<std::pair<uint64_t, std::filesystem::path>>; void EnsureNecessaryWalFilesExist(const std::filesystem::path &wal_directory, const std::string &uuid, OldSnapshotFiles old_snapshot_files, Transaction *transaction, @@ -1835,6 +2209,7 @@ void CreateSnapshot(Storage *storage, Transaction *transaction, const std::files uint64_t offset_edges = 0; uint64_t offset_vertices = 0; uint64_t offset_indices = 0; + uint64_t offset_edge_indices = 0; uint64_t offset_constraints = 0; uint64_t offset_mapper = 0; uint64_t offset_metadata = 0; @@ -1847,6 +2222,7 @@ void CreateSnapshot(Storage *storage, Transaction *transaction, const std::files snapshot.WriteUint(offset_edges); snapshot.WriteUint(offset_vertices); snapshot.WriteUint(offset_indices); + snapshot.WriteUint(offset_edge_indices); snapshot.WriteUint(offset_constraints); snapshot.WriteUint(offset_mapper); snapshot.WriteUint(offset_epoch_history); @@ -2106,6 +2482,17 @@ void CreateSnapshot(Storage *storage, Transaction *transaction, const std::files snapshot.SetPosition(last_pos); } } + + // Write edge-type indices. + offset_edge_indices = snapshot.GetPosition(); + snapshot.WriteMarker(Marker::SECTION_EDGE_INDICES); + { + auto edge_type = storage->indices_.edge_type_index_->ListIndices(); + snapshot.WriteUint(edge_type.size()); + for (const auto &item : edge_type) { + write_mapping(item); + } + } } // Write constraints. @@ -2196,6 +2583,7 @@ void CreateSnapshot(Storage *storage, Transaction *transaction, const std::files snapshot.WriteUint(offset_edges); snapshot.WriteUint(offset_vertices); snapshot.WriteUint(offset_indices); + snapshot.WriteUint(offset_edge_indices); snapshot.WriteUint(offset_constraints); snapshot.WriteUint(offset_mapper); snapshot.WriteUint(offset_epoch_history); diff --git a/src/storage/v2/durability/snapshot.hpp b/src/storage/v2/durability/snapshot.hpp index 4c1aee1ce..b8c224b3f 100644 --- a/src/storage/v2/durability/snapshot.hpp +++ b/src/storage/v2/durability/snapshot.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -34,6 +34,7 @@ struct SnapshotInfo { uint64_t offset_edges; uint64_t offset_vertices; uint64_t offset_indices; + uint64_t offset_edge_indices; uint64_t offset_constraints; uint64_t offset_mapper; uint64_t offset_epoch_history; diff --git a/src/storage/v2/durability/storage_global_operation.hpp b/src/storage/v2/durability/storage_global_operation.hpp index a4f1b043a..7dd635e9d 100644 --- a/src/storage/v2/durability/storage_global_operation.hpp +++ b/src/storage/v2/durability/storage_global_operation.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -23,6 +23,8 @@ enum class StorageMetadataOperation { LABEL_PROPERTY_INDEX_DROP, LABEL_PROPERTY_INDEX_STATS_SET, LABEL_PROPERTY_INDEX_STATS_CLEAR, + EDGE_TYPE_INDEX_CREATE, + EDGE_TYPE_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, diff --git a/src/storage/v2/durability/version.hpp b/src/storage/v2/durability/version.hpp index 25eb30904..58ca0364a 100644 --- a/src/storage/v2/durability/version.hpp +++ b/src/storage/v2/durability/version.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -20,7 +20,7 @@ namespace memgraph::storage::durability { // The current version of snapshot and WAL encoding / decoding. // IMPORTANT: Please bump this version for every snapshot and/or WAL format // change!!! -const uint64_t kVersion{16}; +const uint64_t kVersion{17}; const uint64_t kOldestSupportedVersion{14}; const uint64_t kUniqueConstraintVersion{13}; diff --git a/src/storage/v2/durability/wal.cpp b/src/storage/v2/durability/wal.cpp index 52e916052..5c40ab1c5 100644 --- a/src/storage/v2/durability/wal.cpp +++ b/src/storage/v2/durability/wal.cpp @@ -95,6 +95,10 @@ Marker OperationToMarker(StorageMetadataOperation operation) { return Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_SET; case StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR: return Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR; + case StorageMetadataOperation::EDGE_TYPE_INDEX_CREATE: + return Marker::DELTA_EDGE_TYPE_INDEX_CREATE; + case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: + return Marker::DELTA_EDGE_TYPE_INDEX_DROP; case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: return Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE; case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: @@ -172,6 +176,10 @@ WalDeltaData::Type MarkerToWalDeltaDataType(Marker marker) { return WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET; case Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR: return WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR; + case Marker::DELTA_EDGE_TYPE_INDEX_CREATE: + return WalDeltaData::Type::EDGE_INDEX_CREATE; + case Marker::DELTA_EDGE_TYPE_INDEX_DROP: + return WalDeltaData::Type::EDGE_INDEX_DROP; case Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: return WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE; case Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: @@ -198,6 +206,7 @@ WalDeltaData::Type MarkerToWalDeltaDataType(Marker marker) { case Marker::SECTION_CONSTRAINTS: case Marker::SECTION_DELTA: case Marker::SECTION_EPOCH_HISTORY: + case Marker::SECTION_EDGE_INDICES: case Marker::SECTION_OFFSETS: case Marker::VALUE_FALSE: case Marker::VALUE_TRUE: @@ -280,6 +289,7 @@ WalDeltaData ReadSkipWalDeltaData(BaseDecoder *decoder) { } case WalDeltaData::Type::TRANSACTION_END: break; + // NOLINTNEXTLINE(bugprone-branch-clone) case WalDeltaData::Type::LABEL_INDEX_CREATE: case WalDeltaData::Type::LABEL_INDEX_DROP: case WalDeltaData::Type::LABEL_INDEX_STATS_CLEAR: @@ -295,6 +305,17 @@ WalDeltaData ReadSkipWalDeltaData(BaseDecoder *decoder) { } break; } + case WalDeltaData::Type::EDGE_INDEX_CREATE: + case WalDeltaData::Type::EDGE_INDEX_DROP: { + if constexpr (read_data) { + auto edge_type = decoder->ReadString(); + if (!edge_type) throw RecoveryFailure("Invalid WAL data!"); + delta.operation_edge_type.edge_type = std::move(*edge_type); + } else { + if (!decoder->SkipString()) throw RecoveryFailure("Invalid WAL data!"); + } + break; + } case WalDeltaData::Type::LABEL_INDEX_STATS_SET: { if constexpr (read_data) { auto label = decoder->ReadString(); @@ -522,6 +543,9 @@ bool operator==(const WalDeltaData &a, const WalDeltaData &b) { case WalDeltaData::Type::UNIQUE_CONSTRAINT_DROP: return a.operation_label_properties.label == b.operation_label_properties.label && a.operation_label_properties.properties == b.operation_label_properties.properties; + case WalDeltaData::Type::EDGE_INDEX_CREATE: + case WalDeltaData::Type::EDGE_INDEX_DROP: + return a.operation_edge_type.edge_type == b.operation_edge_type.edge_type; } } bool operator!=(const WalDeltaData &a, const WalDeltaData &b) { return !(a == b); } @@ -703,6 +727,37 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage } break; } + case StorageMetadataOperation::EDGE_TYPE_INDEX_CREATE: + case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: { + MG_ASSERT(false, "Invalid function call!"); + } + } +} + +void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation, + EdgeTypeId edge_type, uint64_t timestamp) { + encoder->WriteMarker(Marker::SECTION_DELTA); + encoder->WriteUint(timestamp); + switch (operation) { + case StorageMetadataOperation::EDGE_TYPE_INDEX_CREATE: + case StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: { + encoder->WriteMarker(OperationToMarker(operation)); + encoder->WriteString(name_id_mapper->IdToName(edge_type.AsUint())); + break; + } + case StorageMetadataOperation::LABEL_INDEX_CREATE: + case StorageMetadataOperation::LABEL_INDEX_DROP: + case StorageMetadataOperation::LABEL_INDEX_STATS_CLEAR: + case StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR: + case StorageMetadataOperation::LABEL_INDEX_STATS_SET: + case StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE: + case StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP: + case StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: + case StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP: + case StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET: + case StorageMetadataOperation::UNIQUE_CONSTRAINT_CREATE: + case StorageMetadataOperation::UNIQUE_CONSTRAINT_DROP: + MG_ASSERT(false, "Invalid function call!"); } } @@ -887,6 +942,18 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst "The label index doesn't exist!"); break; } + case WalDeltaData::Type::EDGE_INDEX_CREATE: { + auto edge_type_id = EdgeTypeId::FromUint(name_id_mapper->NameToId(delta.operation_edge_type.edge_type)); + AddRecoveredIndexConstraint(&indices_constraints->indices.edge, edge_type_id, + "The edge-type index already exists!"); + break; + } + case WalDeltaData::Type::EDGE_INDEX_DROP: { + auto edge_type_id = EdgeTypeId::FromUint(name_id_mapper->NameToId(delta.operation_edge_type.edge_type)); + RemoveRecoveredIndexConstraint(&indices_constraints->indices.edge, edge_type_id, + "The edge-type index doesn't exist!"); + break; + } case WalDeltaData::Type::LABEL_INDEX_STATS_SET: { auto label_id = LabelId::FromUint(name_id_mapper->NameToId(delta.operation_label_stats.label)); LabelIndexStats stats{}; @@ -1088,6 +1155,11 @@ void WalFile::AppendOperation(StorageMetadataOperation operation, LabelId label, UpdateStats(timestamp); } +void WalFile::AppendOperation(StorageMetadataOperation operation, EdgeTypeId edge_type, uint64_t timestamp) { + EncodeOperation(&wal_, name_id_mapper_, operation, edge_type, timestamp); + UpdateStats(timestamp); +} + void WalFile::Sync() { wal_.Sync(); } uint64_t WalFile::GetSize() { return wal_.GetSize(); } diff --git a/src/storage/v2/durability/wal.hpp b/src/storage/v2/durability/wal.hpp index 20d88b040..516487e0d 100644 --- a/src/storage/v2/durability/wal.hpp +++ b/src/storage/v2/durability/wal.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -67,6 +67,8 @@ struct WalDeltaData { LABEL_PROPERTY_INDEX_DROP, LABEL_PROPERTY_INDEX_STATS_SET, LABEL_PROPERTY_INDEX_STATS_CLEAR, + EDGE_INDEX_CREATE, + EDGE_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, @@ -111,6 +113,10 @@ struct WalDeltaData { std::set<std::string, std::less<>> properties; } operation_label_properties; + struct { + std::string edge_type; + } operation_edge_type; + struct { std::string label; std::string stats; @@ -155,6 +161,8 @@ constexpr bool IsWalDeltaDataTypeTransactionEndVersion15(const WalDeltaData::Typ case WalDeltaData::Type::LABEL_PROPERTY_INDEX_DROP: case WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_SET: case WalDeltaData::Type::LABEL_PROPERTY_INDEX_STATS_CLEAR: + case WalDeltaData::Type::EDGE_INDEX_CREATE: + case WalDeltaData::Type::EDGE_INDEX_DROP: case WalDeltaData::Type::EXISTENCE_CONSTRAINT_CREATE: case WalDeltaData::Type::EXISTENCE_CONSTRAINT_DROP: case WalDeltaData::Type::UNIQUE_CONSTRAINT_CREATE: @@ -164,7 +172,7 @@ constexpr bool IsWalDeltaDataTypeTransactionEndVersion15(const WalDeltaData::Typ } constexpr bool IsWalDeltaDataTypeTransactionEnd(const WalDeltaData::Type type, const uint64_t version = kVersion) { - if (version < 16U) { + if (version < 17U) { return IsWalDeltaDataTypeTransactionEndVersion15(type); } // All deltas are now handled in a transactional scope @@ -208,6 +216,9 @@ void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, Storage LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp); +void EncodeOperation(BaseEncoder *encoder, NameIdMapper *name_id_mapper, StorageMetadataOperation operation, + EdgeTypeId edge_type, uint64_t timestamp); + /// Function used to load the WAL data into the storage. /// @throw RecoveryFailure RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConstraints *indices_constraints, @@ -240,6 +251,8 @@ class WalFile { void AppendOperation(StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp); + void AppendOperation(StorageMetadataOperation operation, EdgeTypeId edge_type, uint64_t timestamp); + void Sync(); uint64_t GetSize(); diff --git a/src/storage/v2/edges_iterable.cpp b/src/storage/v2/edges_iterable.cpp new file mode 100644 index 000000000..6acae34e3 --- /dev/null +++ b/src/storage/v2/edges_iterable.cpp @@ -0,0 +1,149 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "storage/v2/edges_iterable.hpp" + +namespace memgraph::storage { + +EdgesIterable::EdgesIterable(InMemoryEdgeTypeIndex::Iterable edges) : type_(Type::BY_EDGE_TYPE_IN_MEMORY) { + new (&in_memory_edges_by_edge_type_) InMemoryEdgeTypeIndex::Iterable(std::move(edges)); +} + +EdgesIterable::EdgesIterable(EdgesIterable &&other) noexcept : type_(other.type_) { + switch (other.type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + new (&in_memory_edges_by_edge_type_) + InMemoryEdgeTypeIndex::Iterable(std::move(other.in_memory_edges_by_edge_type_)); + break; + } +} + +EdgesIterable &EdgesIterable::operator=(EdgesIterable &&other) noexcept { + Destroy(); + type_ = other.type_; + switch (other.type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + new (&in_memory_edges_by_edge_type_) + InMemoryEdgeTypeIndex::Iterable(std::move(other.in_memory_edges_by_edge_type_)); + break; + } + return *this; +} + +EdgesIterable::~EdgesIterable() { Destroy(); } + +void EdgesIterable::Destroy() noexcept { + switch (type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + in_memory_edges_by_edge_type_.InMemoryEdgeTypeIndex::Iterable::~Iterable(); + break; + } +} + +EdgesIterable::Iterator EdgesIterable::begin() { + switch (type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + return Iterator(in_memory_edges_by_edge_type_.begin()); + } +} + +EdgesIterable::Iterator EdgesIterable::end() { + switch (type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + return Iterator(in_memory_edges_by_edge_type_.end()); + } +} + +EdgesIterable::Iterator::Iterator(InMemoryEdgeTypeIndex::Iterable::Iterator it) : type_(Type::BY_EDGE_TYPE_IN_MEMORY) { + // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) + new (&in_memory_edges_by_edge_type_) InMemoryEdgeTypeIndex::Iterable::Iterator(std::move(it)); +} + +EdgesIterable::Iterator::Iterator(const EdgesIterable::Iterator &other) : type_(other.type_) { + switch (other.type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + new (&in_memory_edges_by_edge_type_) + InMemoryEdgeTypeIndex::Iterable::Iterator(other.in_memory_edges_by_edge_type_); + break; + } +} + +// NOLINTNEXTLINE(cert-oop54-cpp) +EdgesIterable::Iterator &EdgesIterable::Iterator::operator=(const EdgesIterable::Iterator &other) { + Destroy(); + type_ = other.type_; + switch (other.type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + new (&in_memory_edges_by_edge_type_) + InMemoryEdgeTypeIndex::Iterable::Iterator(other.in_memory_edges_by_edge_type_); + break; + } + return *this; +} + +EdgesIterable::Iterator::Iterator(EdgesIterable::Iterator &&other) noexcept : type_(other.type_) { + switch (other.type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + new (&in_memory_edges_by_edge_type_) + // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) + InMemoryEdgeTypeIndex::Iterable::Iterator(std::move(other.in_memory_edges_by_edge_type_)); + break; + } +} + +EdgesIterable::Iterator &EdgesIterable::Iterator::operator=(EdgesIterable::Iterator &&other) noexcept { + Destroy(); + type_ = other.type_; + switch (other.type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + new (&in_memory_edges_by_edge_type_) + // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) + InMemoryEdgeTypeIndex::Iterable::Iterator(std::move(other.in_memory_edges_by_edge_type_)); + break; + } + return *this; +} + +EdgesIterable::Iterator::~Iterator() { Destroy(); } + +void EdgesIterable::Iterator::Destroy() noexcept { + switch (type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + in_memory_edges_by_edge_type_.InMemoryEdgeTypeIndex::Iterable::Iterator::~Iterator(); + break; + } +} + +EdgeAccessor const &EdgesIterable::Iterator::operator*() const { + switch (type_) { + ; + case Type::BY_EDGE_TYPE_IN_MEMORY: + return *in_memory_edges_by_edge_type_; + } +} + +EdgesIterable::Iterator &EdgesIterable::Iterator::operator++() { + switch (type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + ++in_memory_edges_by_edge_type_; + break; + } + return *this; +} + +bool EdgesIterable::Iterator::operator==(const Iterator &other) const { + switch (type_) { + case Type::BY_EDGE_TYPE_IN_MEMORY: + return in_memory_edges_by_edge_type_ == other.in_memory_edges_by_edge_type_; + } +} + +} // namespace memgraph::storage diff --git a/src/storage/v2/edges_iterable.hpp b/src/storage/v2/edges_iterable.hpp new file mode 100644 index 000000000..9c9326705 --- /dev/null +++ b/src/storage/v2/edges_iterable.hpp @@ -0,0 +1,73 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include "storage/v2/all_vertices_iterable.hpp" +#include "storage/v2/inmemory/edge_type_index.hpp" + +namespace memgraph::storage { + +class InMemoryEdgeTypeIndex; + +class EdgesIterable final { + enum class Type { BY_EDGE_TYPE_IN_MEMORY }; + + Type type_; + union { + InMemoryEdgeTypeIndex::Iterable in_memory_edges_by_edge_type_; + }; + + void Destroy() noexcept; + + public: + explicit EdgesIterable(InMemoryEdgeTypeIndex::Iterable); + + EdgesIterable(const EdgesIterable &) = delete; + EdgesIterable &operator=(const EdgesIterable &) = delete; + + EdgesIterable(EdgesIterable &&) noexcept; + EdgesIterable &operator=(EdgesIterable &&) noexcept; + + ~EdgesIterable(); + + class Iterator final { + Type type_; + union { + InMemoryEdgeTypeIndex::Iterable::Iterator in_memory_edges_by_edge_type_; + }; + + void Destroy() noexcept; + + public: + explicit Iterator(InMemoryEdgeTypeIndex::Iterable::Iterator); + + Iterator(const Iterator &); + Iterator &operator=(const Iterator &); + + Iterator(Iterator &&) noexcept; + Iterator &operator=(Iterator &&) noexcept; + + ~Iterator(); + + EdgeAccessor const &operator*() const; + + Iterator &operator++(); + + bool operator==(const Iterator &other) const; + bool operator!=(const Iterator &other) const { return !(*this == other); } + }; + + Iterator begin(); + Iterator end(); +}; + +} // namespace memgraph::storage diff --git a/src/storage/v2/indices/edge_type_index.hpp b/src/storage/v2/indices/edge_type_index.hpp new file mode 100644 index 000000000..788ccb225 --- /dev/null +++ b/src/storage/v2/indices/edge_type_index.hpp @@ -0,0 +1,46 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include <vector> + +#include "storage/v2/transaction.hpp" + +namespace memgraph::storage { + +class EdgeTypeIndex { + public: + EdgeTypeIndex() = default; + + EdgeTypeIndex(const EdgeTypeIndex &) = delete; + EdgeTypeIndex(EdgeTypeIndex &&) = delete; + EdgeTypeIndex &operator=(const EdgeTypeIndex &) = delete; + EdgeTypeIndex &operator=(EdgeTypeIndex &&) = delete; + + virtual ~EdgeTypeIndex() = default; + + virtual bool DropIndex(EdgeTypeId edge_type) = 0; + + virtual bool IndexExists(EdgeTypeId edge_type) const = 0; + + virtual std::vector<EdgeTypeId> ListIndices() const = 0; + + virtual uint64_t ApproximateEdgeCount(EdgeTypeId edge_type) const = 0; + + virtual void UpdateOnEdgeCreation(Vertex *from, Vertex *to, EdgeRef edge_ref, EdgeTypeId edge_type, + const Transaction &tx) = 0; + + virtual void UpdateOnEdgeModification(Vertex *old_from, Vertex *old_to, Vertex *new_from, Vertex *new_to, + EdgeRef edge_ref, EdgeTypeId edge_type, const Transaction &tx) = 0; +}; + +} // namespace memgraph::storage diff --git a/src/storage/v2/indices/indices.cpp b/src/storage/v2/indices/indices.cpp index c86ec8442..6068f888f 100644 --- a/src/storage/v2/indices/indices.cpp +++ b/src/storage/v2/indices/indices.cpp @@ -10,8 +10,10 @@ // licenses/APL.txt. #include "storage/v2/indices/indices.hpp" +#include "storage/v2/disk/edge_type_index.hpp" #include "storage/v2/disk/label_index.hpp" #include "storage/v2/disk/label_property_index.hpp" +#include "storage/v2/inmemory/edge_type_index.hpp" #include "storage/v2/inmemory/label_index.hpp" #include "storage/v2/inmemory/label_property_index.hpp" @@ -35,6 +37,8 @@ void Indices::AbortEntries(LabelId label, std::span<std::pair<PropertyValue, Ver void Indices::RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp, std::stop_token token) const { static_cast<InMemoryLabelIndex *>(label_index_.get())->RemoveObsoleteEntries(oldest_active_start_timestamp, token); static_cast<InMemoryLabelPropertyIndex *>(label_property_index_.get()) + ->RemoveObsoleteEntries(oldest_active_start_timestamp, token); + static_cast<InMemoryEdgeTypeIndex *>(edge_type_index_.get()) ->RemoveObsoleteEntries(oldest_active_start_timestamp, std::move(token)); } @@ -53,14 +57,21 @@ void Indices::UpdateOnSetProperty(PropertyId property, const PropertyValue &valu label_property_index_->UpdateOnSetProperty(property, value, vertex, tx); } +void Indices::UpdateOnEdgeCreation(Vertex *from, Vertex *to, EdgeRef edge_ref, EdgeTypeId edge_type, + const Transaction &tx) const { + edge_type_index_->UpdateOnEdgeCreation(from, to, edge_ref, edge_type, tx); +} + Indices::Indices(const Config &config, StorageMode storage_mode) { std::invoke([this, config, storage_mode]() { if (storage_mode == StorageMode::IN_MEMORY_TRANSACTIONAL || storage_mode == StorageMode::IN_MEMORY_ANALYTICAL) { label_index_ = std::make_unique<InMemoryLabelIndex>(); label_property_index_ = std::make_unique<InMemoryLabelPropertyIndex>(); + edge_type_index_ = std::make_unique<InMemoryEdgeTypeIndex>(); } else { label_index_ = std::make_unique<DiskLabelIndex>(config); label_property_index_ = std::make_unique<DiskLabelPropertyIndex>(config); + edge_type_index_ = std::make_unique<DiskEdgeTypeIndex>(); } }); } diff --git a/src/storage/v2/indices/indices.hpp b/src/storage/v2/indices/indices.hpp index d95187bbb..40cff577f 100644 --- a/src/storage/v2/indices/indices.hpp +++ b/src/storage/v2/indices/indices.hpp @@ -15,6 +15,7 @@ #include <span> #include "storage/v2/id_types.hpp" +#include "storage/v2/indices/edge_type_index.hpp" #include "storage/v2/indices/label_index.hpp" #include "storage/v2/indices/label_property_index.hpp" #include "storage/v2/storage_mode.hpp" @@ -64,8 +65,12 @@ struct Indices { void UpdateOnSetProperty(PropertyId property, const PropertyValue &value, Vertex *vertex, const Transaction &tx) const; + void UpdateOnEdgeCreation(Vertex *from, Vertex *to, EdgeRef edge_ref, EdgeTypeId edge_type, + const Transaction &tx) const; + std::unique_ptr<LabelIndex> label_index_; std::unique_ptr<LabelPropertyIndex> label_property_index_; + std::unique_ptr<EdgeTypeIndex> edge_type_index_; }; } // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/edge_type_index.cpp b/src/storage/v2/inmemory/edge_type_index.cpp new file mode 100644 index 000000000..e439628b4 --- /dev/null +++ b/src/storage/v2/inmemory/edge_type_index.cpp @@ -0,0 +1,318 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "storage/v2/inmemory/edge_type_index.hpp" + +#include "storage/v2/constraints/constraints.hpp" +#include "storage/v2/indices/indices_utils.hpp" +#include "utils/counter.hpp" + +namespace { + +using Delta = memgraph::storage::Delta; +using Vertex = memgraph::storage::Vertex; +using Edge = memgraph::storage::Edge; +using EdgeRef = memgraph::storage::EdgeRef; +using EdgeTypeId = memgraph::storage::EdgeTypeId; +using Transaction = memgraph::storage::Transaction; +using View = memgraph::storage::View; + +bool IsIndexEntryVisible(Edge *edge, const Transaction *transaction, View view) { + bool exists = true; + bool deleted = true; + Delta *delta = nullptr; + { + auto guard = std::shared_lock{edge->lock}; + deleted = edge->deleted; + delta = edge->delta; + } + ApplyDeltasForRead(transaction, delta, view, [&](const Delta &delta) { + switch (delta.action) { + case Delta::Action::ADD_LABEL: + case Delta::Action::REMOVE_LABEL: + case Delta::Action::SET_PROPERTY: + case Delta::Action::ADD_IN_EDGE: + case Delta::Action::ADD_OUT_EDGE: + case Delta::Action::REMOVE_IN_EDGE: + case Delta::Action::REMOVE_OUT_EDGE: + break; + case Delta::Action::RECREATE_OBJECT: { + deleted = false; + break; + } + case Delta::Action::DELETE_DESERIALIZED_OBJECT: + case Delta::Action::DELETE_OBJECT: { + exists = false; + break; + } + } + }); + return exists && !deleted; +} + +using ReturnType = std::optional<std::tuple<EdgeTypeId, Vertex *, EdgeRef>>; +ReturnType VertexDeletedConnectedEdges(Vertex *vertex, Edge *edge, const Transaction *transaction, View view) { + ReturnType link; + Delta *delta = nullptr; + { + auto guard = std::shared_lock{vertex->lock}; + delta = vertex->delta; + } + ApplyDeltasForRead(transaction, delta, view, [&](const Delta &delta) { + switch (delta.action) { + case Delta::Action::ADD_LABEL: + case Delta::Action::REMOVE_LABEL: + case Delta::Action::SET_PROPERTY: + break; + case Delta::Action::ADD_IN_EDGE: { + if (edge == delta.vertex_edge.edge.ptr) { + link = {delta.vertex_edge.edge_type, delta.vertex_edge.vertex, delta.vertex_edge.edge}; + auto it = std::find(vertex->in_edges.begin(), vertex->in_edges.end(), link); + MG_ASSERT(it == vertex->in_edges.end(), "Invalid database state!"); + break; + } + } + case Delta::Action::ADD_OUT_EDGE: { + if (edge == delta.vertex_edge.edge.ptr) { + link = {delta.vertex_edge.edge_type, delta.vertex_edge.vertex, delta.vertex_edge.edge}; + auto it = std::find(vertex->out_edges.begin(), vertex->out_edges.end(), link); + MG_ASSERT(it == vertex->out_edges.end(), "Invalid database state!"); + break; + } + } + case Delta::Action::REMOVE_IN_EDGE: + case Delta::Action::REMOVE_OUT_EDGE: + case Delta::Action::RECREATE_OBJECT: + case Delta::Action::DELETE_DESERIALIZED_OBJECT: + case Delta::Action::DELETE_OBJECT: + break; + } + }); + return link; +} + +} // namespace + +namespace memgraph::storage { + +bool InMemoryEdgeTypeIndex::CreateIndex(EdgeTypeId edge_type, utils::SkipList<Vertex>::Accessor vertices) { + auto [it, emplaced] = index_.try_emplace(edge_type); + if (!emplaced) { + return false; + } + + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + try { + auto edge_acc = it->second.access(); + for (auto &from_vertex : vertices) { + if (from_vertex.deleted) { + continue; + } + + for (auto &edge : from_vertex.out_edges) { + const auto type = std::get<kEdgeTypeIdPos>(edge); + if (type == edge_type) { + auto *to_vertex = std::get<kVertexPos>(edge); + if (to_vertex->deleted) { + continue; + } + edge_acc.insert({&from_vertex, to_vertex, std::get<kEdgeRefPos>(edge).ptr, 0}); + } + } + } + } catch (const utils::OutOfMemoryException &) { + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_exception_blocker; + index_.erase(it); + throw; + } + + return true; +} + +bool InMemoryEdgeTypeIndex::DropIndex(EdgeTypeId edge_type) { return index_.erase(edge_type) > 0; } + +bool InMemoryEdgeTypeIndex::IndexExists(EdgeTypeId edge_type) const { return index_.find(edge_type) != index_.end(); } + +std::vector<EdgeTypeId> InMemoryEdgeTypeIndex::ListIndices() const { + std::vector<EdgeTypeId> ret; + ret.reserve(index_.size()); + for (const auto &item : index_) { + ret.push_back(item.first); + } + return ret; +} + +void InMemoryEdgeTypeIndex::RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp, std::stop_token token) { + auto maybe_stop = utils::ResettableCounter<2048>(); + + for (auto &label_storage : index_) { + if (token.stop_requested()) return; + + auto edges_acc = label_storage.second.access(); + for (auto it = edges_acc.begin(); it != edges_acc.end();) { + if (maybe_stop() && token.stop_requested()) return; + + auto next_it = it; + ++next_it; + + if (it->timestamp >= oldest_active_start_timestamp) { + it = next_it; + continue; + } + + if (next_it != edges_acc.end() || it->from_vertex->deleted || it->to_vertex->deleted || + !std::ranges::all_of(it->from_vertex->out_edges, [&](const auto &edge) { + auto *to_vertex = std::get<InMemoryEdgeTypeIndex::kVertexPos>(edge); + return to_vertex != it->to_vertex; + })) { + edges_acc.remove(*it); + } + + it = next_it; + } + } +} + +uint64_t InMemoryEdgeTypeIndex::ApproximateEdgeCount(EdgeTypeId edge_type) const { + if (auto it = index_.find(edge_type); it != index_.end()) { + return it->second.size(); + } + return 0; +} + +void InMemoryEdgeTypeIndex::UpdateOnEdgeCreation(Vertex *from, Vertex *to, EdgeRef edge_ref, EdgeTypeId edge_type, + const Transaction &tx) { + auto it = index_.find(edge_type); + if (it == index_.end()) { + return; + } + auto acc = it->second.access(); + acc.insert(Entry{from, to, edge_ref.ptr, tx.start_timestamp}); +} + +void InMemoryEdgeTypeIndex::UpdateOnEdgeModification(Vertex *old_from, Vertex *old_to, Vertex *new_from, Vertex *new_to, + EdgeRef edge_ref, EdgeTypeId edge_type, const Transaction &tx) { + auto it = index_.find(edge_type); + if (it == index_.end()) { + return; + } + auto acc = it->second.access(); + + auto entry_to_update = std::ranges::find_if(acc, [&](const auto &entry) { + return entry.from_vertex == old_from && entry.to_vertex == old_to && entry.edge == edge_ref.ptr; + }); + + acc.remove(Entry{entry_to_update->from_vertex, entry_to_update->to_vertex, entry_to_update->edge, + entry_to_update->timestamp}); + acc.insert(Entry{new_from, new_to, edge_ref.ptr, tx.start_timestamp}); +} + +InMemoryEdgeTypeIndex::Iterable::Iterable(utils::SkipList<Entry>::Accessor index_accessor, EdgeTypeId edge_type, + View view, Storage *storage, Transaction *transaction) + : index_accessor_(std::move(index_accessor)), + edge_type_(edge_type), + view_(view), + storage_(storage), + transaction_(transaction) {} + +InMemoryEdgeTypeIndex::Iterable::Iterator::Iterator(Iterable *self, utils::SkipList<Entry>::Iterator index_iterator) + : self_(self), + index_iterator_(index_iterator), + current_edge_accessor_(EdgeRef{nullptr}, EdgeTypeId::FromInt(0), nullptr, nullptr, self_->storage_, nullptr), + current_edge_(nullptr) { + AdvanceUntilValid(); +} + +InMemoryEdgeTypeIndex::Iterable::Iterator &InMemoryEdgeTypeIndex::Iterable::Iterator::operator++() { + ++index_iterator_; + AdvanceUntilValid(); + return *this; +} + +void InMemoryEdgeTypeIndex::Iterable::Iterator::AdvanceUntilValid() { + for (; index_iterator_ != self_->index_accessor_.end(); ++index_iterator_) { + auto *from_vertex = index_iterator_->from_vertex; + auto *to_vertex = index_iterator_->to_vertex; + + if (!IsIndexEntryVisible(index_iterator_->edge, self_->transaction_, self_->view_) || from_vertex->deleted || + to_vertex->deleted) { + continue; + } + + const bool edge_was_deleted = index_iterator_->edge->deleted; + auto [edge_ref, edge_type, deleted_from_vertex, deleted_to_vertex] = GetEdgeInfo(); + MG_ASSERT(edge_ref != EdgeRef(nullptr), "Invalid database state!"); + + if (edge_was_deleted) { + from_vertex = deleted_from_vertex; + to_vertex = deleted_to_vertex; + } + + auto accessor = EdgeAccessor{edge_ref, edge_type, from_vertex, to_vertex, self_->storage_, self_->transaction_}; + if (!accessor.IsVisible(self_->view_)) { + continue; + } + + current_edge_accessor_ = accessor; + current_edge_ = edge_ref; + break; + } +} + +std::tuple<EdgeRef, EdgeTypeId, Vertex *, Vertex *> InMemoryEdgeTypeIndex::Iterable::Iterator::GetEdgeInfo() { + auto *from_vertex = index_iterator_->from_vertex; + auto *to_vertex = index_iterator_->to_vertex; + + if (index_iterator_->edge->deleted) { + const auto missing_in_edge = + VertexDeletedConnectedEdges(from_vertex, index_iterator_->edge, self_->transaction_, self_->view_); + const auto missing_out_edge = + VertexDeletedConnectedEdges(to_vertex, index_iterator_->edge, self_->transaction_, self_->view_); + if (missing_in_edge && missing_out_edge && + std::get<kEdgeRefPos>(*missing_in_edge) == std::get<kEdgeRefPos>(*missing_out_edge)) { + return std::make_tuple(std::get<kEdgeRefPos>(*missing_in_edge), std::get<kEdgeTypeIdPos>(*missing_in_edge), + to_vertex, from_vertex); + } + } + + const auto &from_edges = from_vertex->out_edges; + const auto &to_edges = to_vertex->in_edges; + + auto it = std::find_if(from_edges.begin(), from_edges.end(), [&](const auto &from_entry) { + const auto &from_edge = std::get<kEdgeRefPos>(from_entry); + return std::any_of(to_edges.begin(), to_edges.end(), [&](const auto &to_entry) { + const auto &to_edge = std::get<kEdgeRefPos>(to_entry); + return index_iterator_->edge->gid == from_edge.ptr->gid && from_edge.ptr->gid == to_edge.ptr->gid; + }); + }); + + if (it != from_edges.end()) { + const auto &from_edge = std::get<kEdgeRefPos>(*it); + return std::make_tuple(from_edge, std::get<kEdgeTypeIdPos>(*it), from_vertex, to_vertex); + } + + return {EdgeRef(nullptr), EdgeTypeId::FromUint(0U), nullptr, nullptr}; +} + +void InMemoryEdgeTypeIndex::RunGC() { + for (auto &index_entry : index_) { + index_entry.second.run_gc(); + } +} + +InMemoryEdgeTypeIndex::Iterable InMemoryEdgeTypeIndex::Edges(EdgeTypeId edge_type, View view, Storage *storage, + Transaction *transaction) { + const auto it = index_.find(edge_type); + MG_ASSERT(it != index_.end(), "Index for edge-type {} doesn't exist", edge_type.AsUint()); + return {it->second.access(), edge_type, view, storage, transaction}; +} + +} // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/edge_type_index.hpp b/src/storage/v2/inmemory/edge_type_index.hpp new file mode 100644 index 000000000..db8f7843f --- /dev/null +++ b/src/storage/v2/inmemory/edge_type_index.hpp @@ -0,0 +1,113 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +#include <map> +#include <utility> + +#include "storage/v2/constraints/constraints.hpp" +#include "storage/v2/edge_accessor.hpp" +#include "storage/v2/id_types.hpp" +#include "storage/v2/indices/edge_type_index.hpp" +#include "storage/v2/indices/label_index_stats.hpp" +#include "utils/rw_lock.hpp" +#include "utils/synchronized.hpp" + +namespace memgraph::storage { + +class InMemoryEdgeTypeIndex : public storage::EdgeTypeIndex { + private: + struct Entry { + Vertex *from_vertex; + Vertex *to_vertex; + + Edge *edge; + + uint64_t timestamp; + + bool operator<(const Entry &rhs) const { return edge->gid < rhs.edge->gid; } + bool operator==(const Entry &rhs) const { return edge->gid == rhs.edge->gid; } + }; + + public: + InMemoryEdgeTypeIndex() = default; + + /// @throw std::bad_alloc + bool CreateIndex(EdgeTypeId edge_type, utils::SkipList<Vertex>::Accessor vertices); + + /// Returns false if there was no index to drop + bool DropIndex(EdgeTypeId edge_type) override; + + bool IndexExists(EdgeTypeId edge_type) const override; + + std::vector<EdgeTypeId> ListIndices() const override; + + void RemoveObsoleteEntries(uint64_t oldest_active_start_timestamp, std::stop_token token); + + uint64_t ApproximateEdgeCount(EdgeTypeId edge_type) const override; + + void UpdateOnEdgeCreation(Vertex *from, Vertex *to, EdgeRef edge_ref, EdgeTypeId edge_type, + const Transaction &tx) override; + + void UpdateOnEdgeModification(Vertex *old_from, Vertex *old_to, Vertex *new_from, Vertex *new_to, EdgeRef edge_ref, + EdgeTypeId edge_type, const Transaction &tx) override; + + static constexpr std::size_t kEdgeTypeIdPos = 0U; + static constexpr std::size_t kVertexPos = 1U; + static constexpr std::size_t kEdgeRefPos = 2U; + + class Iterable { + public: + Iterable(utils::SkipList<Entry>::Accessor index_accessor, EdgeTypeId edge_type, View view, Storage *storage, + Transaction *transaction); + + class Iterator { + public: + Iterator(Iterable *self, utils::SkipList<Entry>::Iterator index_iterator); + + EdgeAccessor const &operator*() const { return current_edge_accessor_; } + + bool operator==(const Iterator &other) const { return index_iterator_ == other.index_iterator_; } + bool operator!=(const Iterator &other) const { return index_iterator_ != other.index_iterator_; } + + Iterator &operator++(); + + private: + void AdvanceUntilValid(); + std::tuple<EdgeRef, EdgeTypeId, Vertex *, Vertex *> GetEdgeInfo(); + + Iterable *self_; + utils::SkipList<Entry>::Iterator index_iterator_; + EdgeAccessor current_edge_accessor_; + EdgeRef current_edge_{nullptr}; + }; + + Iterator begin() { return {this, index_accessor_.begin()}; } + Iterator end() { return {this, index_accessor_.end()}; } + + private: + utils::SkipList<Entry>::Accessor index_accessor_; + EdgeTypeId edge_type_; + View view_; + Storage *storage_; + Transaction *transaction_; + }; + + void RunGC(); + + Iterable Edges(EdgeTypeId edge_type, View view, Storage *storage, Transaction *transaction); + + private: + std::map<EdgeTypeId, utils::SkipList<Entry>> index_; +}; + +} // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/replication/recovery.cpp b/src/storage/v2/inmemory/replication/recovery.cpp index 921c1f5c0..5f1182c75 100644 --- a/src/storage/v2/inmemory/replication/recovery.cpp +++ b/src/storage/v2/inmemory/replication/recovery.cpp @@ -106,8 +106,8 @@ uint64_t ReplicateCurrentWal(const utils::UUID &main_uuid, const InMemoryStorage return response.current_commit_timestamp; } -/// This method tries to find the optimal path for recoverying a single replica. -/// Based on the last commit transfered to replica it tries to update the +/// This method tries to find the optimal path for recovering a single replica. +/// Based on the last commit transferred to replica it tries to update the /// replica using durability files - WALs and Snapshots. WAL files are much /// smaller in size as they contain only the Deltas (changes) made during the /// transactions while Snapshots contain all the data. For that reason we prefer @@ -175,7 +175,7 @@ std::vector<RecoveryStep> GetRecoverySteps(uint64_t replica_commit, utils::FileR auto add_snapshot = [&]() { if (!latest_snapshot) return; const auto lock_success = locker_acc.AddPath(latest_snapshot->path); - MG_ASSERT(!lock_success.HasError(), "Tried to lock a nonexistant snapshot path."); + MG_ASSERT(!lock_success.HasError(), "Tried to lock a non-existent snapshot path."); recovery_steps.emplace_back(std::in_place_type_t<RecoverySnapshot>{}, std::move(latest_snapshot->path)); }; @@ -233,7 +233,7 @@ std::vector<RecoveryStep> GetRecoverySteps(uint64_t replica_commit, utils::FileR } } - // In all cases, if we have a current wal file we need to use itW + // In all cases, if we have a current wal file we need to use it if (current_wal_seq_num) { // NOTE: File not handled directly, so no need to lock it recovery_steps.emplace_back(RecoveryCurrentWal{*current_wal_seq_num}); diff --git a/src/storage/v2/inmemory/storage.cpp b/src/storage/v2/inmemory/storage.cpp index bd8534673..dab56750b 100644 --- a/src/storage/v2/inmemory/storage.cpp +++ b/src/storage/v2/inmemory/storage.cpp @@ -20,6 +20,7 @@ #include "storage/v2/durability/snapshot.hpp" #include "storage/v2/edge_direction.hpp" #include "storage/v2/id_types.hpp" +#include "storage/v2/inmemory/edge_type_index.hpp" #include "storage/v2/metadata_delta.hpp" /// REPLICATION /// @@ -109,6 +110,7 @@ InMemoryStorage::InMemoryStorage(Config config) timestamp_ = std::max(timestamp_, info->next_timestamp); if (info->last_commit_timestamp) { repl_storage_state_.last_commit_timestamp_ = *info->last_commit_timestamp; + spdlog::trace("Recovering last commit timestamp {}", *info->last_commit_timestamp); } } } else if (config_.durability.snapshot_wal_mode != Config::Durability::SnapshotWalMode::DISABLED || @@ -349,6 +351,9 @@ Result<EdgeAccessor> InMemoryStorage::InMemoryAccessor::CreateEdge(VertexAccesso transaction_.manyDeltasCache.Invalidate(from_vertex, edge_type, EdgeDirection::OUT); transaction_.manyDeltasCache.Invalidate(to_vertex, edge_type, EdgeDirection::IN); + // Update indices if they exist. + storage_->indices_.UpdateOnEdgeCreation(from_vertex, to_vertex, edge, edge_type, transaction_); + // Increment edge count. storage_->edge_count_.fetch_add(1, std::memory_order_acq_rel); }}; @@ -552,6 +557,11 @@ Result<EdgeAccessor> InMemoryStorage::InMemoryAccessor::EdgeSetFrom(EdgeAccessor CreateAndLinkDelta(&transaction_, to_vertex, Delta::RemoveInEdgeTag(), edge_type, new_from_vertex, edge_ref); to_vertex->in_edges.emplace_back(edge_type, new_from_vertex, edge_ref); + auto *in_memory = static_cast<InMemoryStorage *>(storage_); + auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(in_memory->indices_.edge_type_index_.get()); + mem_edge_type_index->UpdateOnEdgeModification(old_from_vertex, to_vertex, new_from_vertex, to_vertex, edge_ref, + edge_type, transaction_); + transaction_.manyDeltasCache.Invalidate(new_from_vertex, edge_type, EdgeDirection::OUT); transaction_.manyDeltasCache.Invalidate(old_from_vertex, edge_type, EdgeDirection::OUT); transaction_.manyDeltasCache.Invalidate(to_vertex, edge_type, EdgeDirection::IN); @@ -658,6 +668,11 @@ Result<EdgeAccessor> InMemoryStorage::InMemoryAccessor::EdgeSetTo(EdgeAccessor * CreateAndLinkDelta(&transaction_, new_to_vertex, Delta::RemoveInEdgeTag(), edge_type, from_vertex, edge_ref); new_to_vertex->in_edges.emplace_back(edge_type, from_vertex, edge_ref); + auto *in_memory = static_cast<InMemoryStorage *>(storage_); + auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(in_memory->indices_.edge_type_index_.get()); + mem_edge_type_index->UpdateOnEdgeModification(from_vertex, old_to_vertex, from_vertex, new_to_vertex, edge_ref, + edge_type, transaction_); + transaction_.manyDeltasCache.Invalidate(from_vertex, edge_type, EdgeDirection::OUT); transaction_.manyDeltasCache.Invalidate(old_to_vertex, edge_type, EdgeDirection::IN); transaction_.manyDeltasCache.Invalidate(new_to_vertex, edge_type, EdgeDirection::IN); @@ -764,9 +779,10 @@ utils::BasicResult<StorageManipulationError, void> InMemoryStorage::InMemoryAcce // This is usually done by the MVCC, but it does not handle the metadata deltas transaction_.EnsureCommitTimestampExists(); - if (transaction_.constraint_verification_info.NeedsExistenceConstraintVerification()) { + if (transaction_.constraint_verification_info && + transaction_.constraint_verification_info->NeedsExistenceConstraintVerification()) { const auto vertices_to_update = - transaction_.constraint_verification_info.GetVerticesForExistenceConstraintChecking(); + transaction_.constraint_verification_info->GetVerticesForExistenceConstraintChecking(); for (auto const *vertex : vertices_to_update) { // No need to take any locks here because we modified this vertex and no // one else can touch it until we commit. @@ -793,12 +809,13 @@ utils::BasicResult<StorageManipulationError, void> InMemoryStorage::InMemoryAcce static_cast<InMemoryUniqueConstraints *>(storage_->constraints_.unique_constraints_.get()); commit_timestamp_.emplace(mem_storage->CommitTimestamp(reparg.desired_commit_timestamp)); - if (transaction_.constraint_verification_info.NeedsUniqueConstraintVerification()) { + if (transaction_.constraint_verification_info && + transaction_.constraint_verification_info->NeedsUniqueConstraintVerification()) { // Before committing and validating vertices against unique constraints, // we have to update unique constraints with the vertices that are going // to be validated/committed. const auto vertices_to_update = - transaction_.constraint_verification_info.GetVerticesForUniqueConstraintChecking(); + transaction_.constraint_verification_info->GetVerticesForUniqueConstraintChecking(); for (auto const *vertex : vertices_to_update) { mem_unique_constraints->UpdateBeforeCommit(vertex, transaction_); @@ -979,10 +996,11 @@ void InMemoryStorage::InMemoryAccessor::Abort() { // note: this check also saves on unnecessary contention on `engine_lock_` if (!transaction_.deltas.empty()) { // CONSTRAINTS - if (transaction_.constraint_verification_info.NeedsUniqueConstraintVerification()) { + if (transaction_.constraint_verification_info && + transaction_.constraint_verification_info->NeedsUniqueConstraintVerification()) { // Need to remove elements from constraints before handling of the deltas, so the elements match the correct // values - auto vertices_to_check = transaction_.constraint_verification_info.GetVerticesForUniqueConstraintChecking(); + auto vertices_to_check = transaction_.constraint_verification_info->GetVerticesForUniqueConstraintChecking(); auto vertices_to_check_v = std::vector<Vertex const *>{vertices_to_check.begin(), vertices_to_check.end()}; storage_->constraints_.AbortEntries(vertices_to_check_v, transaction_.start_timestamp); } @@ -1263,6 +1281,18 @@ utils::BasicResult<StorageIndexDefinitionError, void> InMemoryStorage::InMemoryA return {}; } +utils::BasicResult<StorageIndexDefinitionError, void> InMemoryStorage::InMemoryAccessor::CreateIndex( + EdgeTypeId edge_type) { + MG_ASSERT(unique_guard_.owns_lock(), "Create index requires a unique access to the storage!"); + auto *in_memory = static_cast<InMemoryStorage *>(storage_); + auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(in_memory->indices_.edge_type_index_.get()); + if (!mem_edge_type_index->CreateIndex(edge_type, in_memory->vertices_.access())) { + return StorageIndexDefinitionError{IndexDefinitionError{}}; + } + transaction_.md_deltas.emplace_back(MetadataDelta::edge_index_create, edge_type); + return {}; +} + utils::BasicResult<StorageIndexDefinitionError, void> InMemoryStorage::InMemoryAccessor::DropIndex(LabelId label) { MG_ASSERT(unique_guard_.owns_lock(), "Dropping label index requires a unique access to the storage!"); auto *in_memory = static_cast<InMemoryStorage *>(storage_); @@ -1291,6 +1321,18 @@ utils::BasicResult<StorageIndexDefinitionError, void> InMemoryStorage::InMemoryA return {}; } +utils::BasicResult<StorageIndexDefinitionError, void> InMemoryStorage::InMemoryAccessor::DropIndex( + EdgeTypeId edge_type) { + MG_ASSERT(unique_guard_.owns_lock(), "Drop index requires a unique access to the storage!"); + auto *in_memory = static_cast<InMemoryStorage *>(storage_); + auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(in_memory->indices_.edge_type_index_.get()); + if (!mem_edge_type_index->DropIndex(edge_type)) { + return StorageIndexDefinitionError{IndexDefinitionError{}}; + } + transaction_.md_deltas.emplace_back(MetadataDelta::edge_index_drop, edge_type); + return {}; +} + utils::BasicResult<StorageExistenceConstraintDefinitionError, void> InMemoryStorage::InMemoryAccessor::CreateExistenceConstraint(LabelId label, PropertyId property) { MG_ASSERT(unique_guard_.owns_lock(), "Creating existence requires a unique access to the storage!"); @@ -1382,6 +1424,11 @@ VerticesIterable InMemoryStorage::InMemoryAccessor::Vertices( mem_label_property_index->Vertices(label, property, lower_bound, upper_bound, view, storage_, &transaction_)); } +EdgesIterable InMemoryStorage::InMemoryAccessor::Edges(EdgeTypeId edge_type, View view) { + auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(storage_->indices_.edge_type_index_.get()); + return EdgesIterable(mem_edge_type_index->Edges(edge_type, view, storage_, &transaction_)); +} + Transaction InMemoryStorage::CreateTransaction( IsolationLevel isolation_level, StorageMode storage_mode, memgraph::replication_coordination_glue::ReplicationRole replication_role) { @@ -1405,7 +1452,7 @@ Transaction InMemoryStorage::CreateTransaction( start_timestamp = timestamp_; } } - return {transaction_id, start_timestamp, isolation_level, storage_mode, false}; + return {transaction_id, start_timestamp, isolation_level, storage_mode, false, !constraints_.empty()}; } void InMemoryStorage::SetStorageMode(StorageMode new_storage_mode) { @@ -2016,6 +2063,10 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final AppendToWalDataDefinition(durability::StorageMetadataOperation::LABEL_INDEX_CREATE, md_delta.label, final_commit_timestamp); } break; + case MetadataDelta::Action::EDGE_INDEX_CREATE: { + AppendToWalDataDefinition(durability::StorageMetadataOperation::EDGE_TYPE_INDEX_CREATE, md_delta.edge_type, + final_commit_timestamp); + } break; case MetadataDelta::Action::LABEL_PROPERTY_INDEX_CREATE: { const auto &info = md_delta.label_property; AppendToWalDataDefinition(durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE, info.label, @@ -2025,6 +2076,10 @@ bool InMemoryStorage::AppendToWal(const Transaction &transaction, uint64_t final AppendToWalDataDefinition(durability::StorageMetadataOperation::LABEL_INDEX_DROP, md_delta.label, final_commit_timestamp); } break; + case MetadataDelta::Action::EDGE_INDEX_DROP: { + AppendToWalDataDefinition(durability::StorageMetadataOperation::EDGE_TYPE_INDEX_DROP, md_delta.edge_type, + final_commit_timestamp); + } break; case MetadataDelta::Action::LABEL_PROPERTY_INDEX_DROP: { const auto &info = md_delta.label_property; AppendToWalDataDefinition(durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP, info.label, @@ -2090,6 +2145,12 @@ void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOpera repl_storage_state_.AppendOperation(operation, label, properties, stats, property_stats, final_commit_timestamp); } +void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, EdgeTypeId edge_type, + uint64_t final_commit_timestamp) { + wal_file_->AppendOperation(operation, edge_type, final_commit_timestamp); + repl_storage_state_.AppendOperation(operation, edge_type, final_commit_timestamp); +} + void InMemoryStorage::AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, LabelPropertyIndexStats property_stats, @@ -2239,7 +2300,8 @@ IndicesInfo InMemoryStorage::InMemoryAccessor::ListAllIndices() const { auto *mem_label_index = static_cast<InMemoryLabelIndex *>(in_memory->indices_.label_index_.get()); auto *mem_label_property_index = static_cast<InMemoryLabelPropertyIndex *>(in_memory->indices_.label_property_index_.get()); - return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices()}; + auto *mem_edge_type_index = static_cast<InMemoryEdgeTypeIndex *>(in_memory->indices_.edge_type_index_.get()); + return {mem_label_index->ListIndices(), mem_label_property_index->ListIndices(), mem_edge_type_index->ListIndices()}; } ConstraintsInfo InMemoryStorage::InMemoryAccessor::ListAllConstraints() const { const auto *mem_storage = static_cast<InMemoryStorage *>(storage_); diff --git a/src/storage/v2/inmemory/storage.hpp b/src/storage/v2/inmemory/storage.hpp index c0e46d0c9..6d10e0fbd 100644 --- a/src/storage/v2/inmemory/storage.hpp +++ b/src/storage/v2/inmemory/storage.hpp @@ -16,6 +16,7 @@ #include <memory> #include <utility> #include "storage/v2/indices/label_index_stats.hpp" +#include "storage/v2/inmemory/edge_type_index.hpp" #include "storage/v2/inmemory/label_index.hpp" #include "storage/v2/inmemory/label_property_index.hpp" #include "storage/v2/inmemory/replication/recovery.hpp" @@ -53,6 +54,7 @@ class InMemoryStorage final : public Storage { const InMemoryStorage *storage); friend class InMemoryLabelIndex; friend class InMemoryLabelPropertyIndex; + friend class InMemoryEdgeTypeIndex; public: enum class CreateSnapshotError : uint8_t { DisabledForReplica, ReachedMaxNumTries }; @@ -107,6 +109,8 @@ class InMemoryStorage final : public Storage { const std::optional<utils::Bound<PropertyValue>> &lower_bound, const std::optional<utils::Bound<PropertyValue>> &upper_bound, View view) override; + EdgesIterable Edges(EdgeTypeId edge_type, View view) override; + /// Return approximate number of all vertices in the database. /// Note that this is always an over-estimate and never an under-estimate. uint64_t ApproximateVertexCount() const override { @@ -145,6 +149,10 @@ class InMemoryStorage final : public Storage { label, property, lower, upper); } + uint64_t ApproximateEdgeCount(EdgeTypeId id) const override { + return static_cast<InMemoryStorage *>(storage_)->indices_.edge_type_index_->ApproximateEdgeCount(id); + } + template <typename TResult, typename TIndex, typename TIndexKey> std::optional<TResult> GetIndexStatsForIndex(TIndex *index, TIndexKey &&key) const { return index->GetIndexStats(key); @@ -204,6 +212,10 @@ class InMemoryStorage final : public Storage { return static_cast<InMemoryStorage *>(storage_)->indices_.label_property_index_->IndexExists(label, property); } + bool EdgeTypeIndexExists(EdgeTypeId edge_type) const override { + return static_cast<InMemoryStorage *>(storage_)->indices_.edge_type_index_->IndexExists(edge_type); + } + IndicesInfo ListAllIndices() const override; ConstraintsInfo ListAllConstraints() const override; @@ -239,6 +251,14 @@ class InMemoryStorage final : public Storage { /// @throw std::bad_alloc utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(LabelId label, PropertyId property) override; + /// Create an index. + /// Returns void if the index has been created. + /// Returns `StorageIndexDefinitionError` if an error occures. Error can be: + /// * `ReplicationError`: there is at least one SYNC replica that has not confirmed receiving the transaction. + /// * `IndexDefinitionError`: the index already exists. + /// @throw std::bad_alloc + utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(EdgeTypeId edge_type) override; + /// Drop an existing index. /// Returns void if the index has been dropped. /// Returns `StorageIndexDefinitionError` if an error occures. Error can be: @@ -253,6 +273,13 @@ class InMemoryStorage final : public Storage { /// * `IndexDefinitionError`: the index does not exist. utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(LabelId label, PropertyId property) override; + /// Drop an existing index. + /// Returns void if the index has been dropped. + /// Returns `StorageIndexDefinitionError` if an error occures. Error can be: + /// * `ReplicationError`: there is at least one SYNC replica that has not confirmed receiving the transaction. + /// * `IndexDefinitionError`: the index does not exist. + utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(EdgeTypeId edge_type) override; + /// Returns void if the existence constraint has been created. /// Returns `StorageExistenceConstraintDefinitionError` if an error occures. Error can be: /// * `ReplicationError`: there is at least one SYNC replica that has not confirmed receiving the transaction. @@ -374,20 +401,17 @@ class InMemoryStorage final : public Storage { /// Return true in all cases excepted if any sync replicas have not sent confirmation. [[nodiscard]] bool AppendToWal(const Transaction &transaction, uint64_t final_commit_timestamp, DatabaseAccessProtector db_acc); - /// Return true in all cases excepted if any sync replicas have not sent confirmation. void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, uint64_t final_commit_timestamp); - /// Return true in all cases excepted if any sync replicas have not sent confirmation. + void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, EdgeTypeId edge_type, + uint64_t final_commit_timestamp); void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, uint64_t final_commit_timestamp); - /// Return true in all cases excepted if any sync replicas have not sent confirmation. void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, LabelIndexStats stats, uint64_t final_commit_timestamp); - /// Return true in all cases excepted if any sync replicas have not sent confirmation. void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp); - /// Return true in all cases excepted if any sync replicas have not sent confirmation. void AppendToWalDataDefinition(durability::StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, LabelIndexStats stats, LabelPropertyIndexStats property_stats, uint64_t final_commit_timestamp); diff --git a/src/storage/v2/inmemory/unique_constraints.cpp b/src/storage/v2/inmemory/unique_constraints.cpp index e08965eab..dd47a3f68 100644 --- a/src/storage/v2/inmemory/unique_constraints.cpp +++ b/src/storage/v2/inmemory/unique_constraints.cpp @@ -522,5 +522,6 @@ void InMemoryUniqueConstraints::Clear() { constraints_.clear(); constraints_by_label_.clear(); } +bool InMemoryUniqueConstraints::empty() const { return constraints_.empty() && constraints_by_label_.empty(); } } // namespace memgraph::storage diff --git a/src/storage/v2/inmemory/unique_constraints.hpp b/src/storage/v2/inmemory/unique_constraints.hpp index 27fae1b30..40ea0a19e 100644 --- a/src/storage/v2/inmemory/unique_constraints.hpp +++ b/src/storage/v2/inmemory/unique_constraints.hpp @@ -41,6 +41,9 @@ struct FixedCapacityArray { using PropertyIdArray = FixedCapacityArray<PropertyId>; class InMemoryUniqueConstraints : public UniqueConstraints { + public: + bool empty() const override; + private: struct Entry { std::vector<PropertyValue> values; diff --git a/src/storage/v2/metadata_delta.hpp b/src/storage/v2/metadata_delta.hpp index 94d806c19..b34966a62 100644 --- a/src/storage/v2/metadata_delta.hpp +++ b/src/storage/v2/metadata_delta.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -35,6 +35,8 @@ struct MetadataDelta { LABEL_PROPERTY_INDEX_DROP, LABEL_PROPERTY_INDEX_STATS_SET, LABEL_PROPERTY_INDEX_STATS_CLEAR, + EDGE_INDEX_CREATE, + EDGE_INDEX_DROP, EXISTENCE_CONSTRAINT_CREATE, EXISTENCE_CONSTRAINT_DROP, UNIQUE_CONSTRAINT_CREATE, @@ -57,6 +59,10 @@ struct MetadataDelta { } label_property_index_stats_set; static constexpr struct LabelPropertyIndexStatsClear { } label_property_index_stats_clear; + static constexpr struct EdgeIndexCreate { + } edge_index_create; + static constexpr struct EdgeIndexDrop { + } edge_index_drop; static constexpr struct ExistenceConstraintCreate { } existence_constraint_create; static constexpr struct ExistenceConstraintDrop { @@ -87,6 +93,11 @@ struct MetadataDelta { MetadataDelta(LabelPropertyIndexStatsClear /*tag*/, LabelId label) : action(Action::LABEL_PROPERTY_INDEX_STATS_CLEAR), label{label} {} + MetadataDelta(EdgeIndexCreate /*tag*/, EdgeTypeId edge_type) + : action(Action::EDGE_INDEX_CREATE), edge_type(edge_type) {} + + MetadataDelta(EdgeIndexDrop /*tag*/, EdgeTypeId edge_type) : action(Action::EDGE_INDEX_DROP), edge_type(edge_type) {} + MetadataDelta(ExistenceConstraintCreate /*tag*/, LabelId label, PropertyId property) : action(Action::EXISTENCE_CONSTRAINT_CREATE), label_property{label, property} {} @@ -114,6 +125,8 @@ struct MetadataDelta { case Action::LABEL_PROPERTY_INDEX_DROP: case Action::LABEL_PROPERTY_INDEX_STATS_SET: case Action::LABEL_PROPERTY_INDEX_STATS_CLEAR: + case Action::EDGE_INDEX_CREATE: + case Action::EDGE_INDEX_DROP: case Action::EXISTENCE_CONSTRAINT_CREATE: case Action::EXISTENCE_CONSTRAINT_DROP: break; @@ -129,6 +142,8 @@ struct MetadataDelta { union { LabelId label; + EdgeTypeId edge_type; + struct { LabelId label; PropertyId property; diff --git a/src/storage/v2/property_value.hpp b/src/storage/v2/property_value.hpp index e48be008a..161ad151a 100644 --- a/src/storage/v2/property_value.hpp +++ b/src/storage/v2/property_value.hpp @@ -92,7 +92,28 @@ class PropertyValue { // TODO: Implement copy assignment operators for primitive types. // TODO: Implement copy and move assignment operators for non-primitive types. - ~PropertyValue() { DestroyValue(); } + ~PropertyValue() { + switch (type_) { + // destructor for primitive types does nothing + case Type::Null: + case Type::Bool: + case Type::Int: + case Type::Double: + case Type::TemporalData: + return; + + // destructor for non primitive types since we used placement new + case Type::String: + std::destroy_at(&string_v.val_); + return; + case Type::List: + std::destroy_at(&list_v.val_); + return; + case Type::Map: + std::destroy_at(&map_v.val_); + return; + } + } Type type() const { return type_; } @@ -189,8 +210,6 @@ class PropertyValue { } private: - void DestroyValue() noexcept; - // NOTE: this may look strange but it is for better data layout // https://eel.is/c++draft/class.union#general-note-1 union { @@ -357,13 +376,13 @@ inline PropertyValue::PropertyValue(const PropertyValue &other) : type_(other.ty this->double_v.val_ = other.double_v.val_; return; case Type::String: - new (&string_v.val_) std::string(other.string_v.val_); + std::construct_at(&string_v.val_, other.string_v.val_); return; case Type::List: - new (&list_v.val_) std::vector<PropertyValue>(other.list_v.val_); + std::construct_at(&list_v.val_, other.list_v.val_); return; case Type::Map: - new (&map_v.val_) std::map<std::string, PropertyValue>(other.map_v.val_); + std::construct_at(&map_v.val_, other.map_v.val_); return; case Type::TemporalData: this->temporal_data_v.val_ = other.temporal_data_v.val_; @@ -371,7 +390,7 @@ inline PropertyValue::PropertyValue(const PropertyValue &other) : type_(other.ty } } -inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std::exchange(other.type_, Type::Null)) { +inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(other.type_) { switch (type_) { case Type::Null: break; @@ -386,15 +405,12 @@ inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std: break; case Type::String: std::construct_at(&string_v.val_, std::move(other.string_v.val_)); - std::destroy_at(&other.string_v.val_); break; case Type::List: std::construct_at(&list_v.val_, std::move(other.list_v.val_)); - std::destroy_at(&other.list_v.val_); break; case Type::Map: std::construct_at(&map_v.val_, std::move(other.map_v.val_)); - std::destroy_at(&other.map_v.val_); break; case Type::TemporalData: temporal_data_v.val_ = other.temporal_data_v.val_; @@ -403,38 +419,88 @@ inline PropertyValue::PropertyValue(PropertyValue &&other) noexcept : type_(std: } inline PropertyValue &PropertyValue::operator=(const PropertyValue &other) { - if (this == &other) return *this; + if (type_ == other.type_) { + if (this == &other) return *this; + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + int_v.val_ = other.int_v.val_; + break; + case Type::Double: + double_v.val_ = other.double_v.val_; + break; + case Type::String: + string_v.val_ = other.string_v.val_; + break; + case Type::List: + list_v.val_ = other.list_v.val_; + break; + case Type::Map: + map_v.val_ = other.map_v.val_; + break; + case Type::TemporalData: + temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } + return *this; + } else { + // destroy + switch (type_) { + case Type::Null: + break; + case Type::Bool: + break; + case Type::Int: + break; + case Type::Double: + break; + case Type::String: + std::destroy_at(&string_v.val_); + break; + case Type::List: + std::destroy_at(&list_v.val_); + break; + case Type::Map: + std::destroy_at(&map_v.val_); + break; + case Type::TemporalData: + break; + } + // construct + auto *new_this = std::launder(this); + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + new_this->bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + new_this->int_v.val_ = other.int_v.val_; + break; + case Type::Double: + new_this->double_v.val_ = other.double_v.val_; + break; + case Type::String: + std::construct_at(&new_this->string_v.val_, other.string_v.val_); + break; + case Type::List: + std::construct_at(&new_this->list_v.val_, other.list_v.val_); + break; + case Type::Map: + std::construct_at(&new_this->map_v.val_, other.map_v.val_); + break; + case Type::TemporalData: + new_this->temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } - DestroyValue(); - type_ = other.type_; - - switch (other.type_) { - case Type::Null: - break; - case Type::Bool: - this->bool_v.val_ = other.bool_v.val_; - break; - case Type::Int: - this->int_v.val_ = other.int_v.val_; - break; - case Type::Double: - this->double_v.val_ = other.double_v.val_; - break; - case Type::String: - new (&string_v.val_) std::string(other.string_v.val_); - break; - case Type::List: - new (&list_v.val_) std::vector<PropertyValue>(other.list_v.val_); - break; - case Type::Map: - new (&map_v.val_) std::map<std::string, PropertyValue>(other.map_v.val_); - break; - case Type::TemporalData: - this->temporal_data_v.val_ = other.temporal_data_v.val_; - break; + new_this->type_ = other.type_; + return *new_this; } - - return *this; } inline PropertyValue &PropertyValue::operator=(PropertyValue &&other) noexcept { @@ -456,48 +522,71 @@ inline PropertyValue &PropertyValue::operator=(PropertyValue &&other) noexcept { break; case Type::String: string_v.val_ = std::move(other.string_v.val_); - std::destroy_at(&other.string_v.val_); break; case Type::List: list_v.val_ = std::move(other.list_v.val_); - std::destroy_at(&other.list_v.val_); break; case Type::Map: map_v.val_ = std::move(other.map_v.val_); - std::destroy_at(&other.map_v.val_); break; case Type::TemporalData: temporal_data_v.val_ = other.temporal_data_v.val_; break; } - other.type_ = Type::Null; return *this; } else { - std::destroy_at(this); - return *std::construct_at(std::launder(this), std::move(other)); - } -} + // destroy + switch (type_) { + case Type::Null: + break; + case Type::Bool: + break; + case Type::Int: + break; + case Type::Double: + break; + case Type::String: + std::destroy_at(&string_v.val_); + break; + case Type::List: + std::destroy_at(&list_v.val_); + break; + case Type::Map: + std::destroy_at(&map_v.val_); + break; + case Type::TemporalData: + break; + } + // construct (no need to destroy moved from type) + auto *new_this = std::launder(this); + switch (other.type_) { + case Type::Null: + break; + case Type::Bool: + new_this->bool_v.val_ = other.bool_v.val_; + break; + case Type::Int: + new_this->int_v.val_ = other.int_v.val_; + break; + case Type::Double: + new_this->double_v.val_ = other.double_v.val_; + break; + case Type::String: + std::construct_at(&new_this->string_v.val_, std::move(other.string_v.val_)); + break; + case Type::List: + std::construct_at(&new_this->list_v.val_, std::move(other.list_v.val_)); + break; + case Type::Map: + std::construct_at(&new_this->map_v.val_, std::move(other.map_v.val_)); + break; + case Type::TemporalData: + new_this->temporal_data_v.val_ = other.temporal_data_v.val_; + break; + } -inline void PropertyValue::DestroyValue() noexcept { - switch (std::exchange(type_, Type::Null)) { - // destructor for primitive types does nothing - case Type::Null: - case Type::Bool: - case Type::Int: - case Type::Double: - case Type::TemporalData: - return; - - // destructor for non primitive types since we used placement new - case Type::String: - std::destroy_at(&string_v.val_); - return; - case Type::List: - std::destroy_at(&list_v.val_); - return; - case Type::Map: - std::destroy_at(&map_v.val_); - return; + new_this->type_ = other.type_; + return *new_this; } } diff --git a/src/storage/v2/replication/replication_client.cpp b/src/storage/v2/replication/replication_client.cpp index 16429d11f..a02c1eff0 100644 --- a/src/storage/v2/replication/replication_client.cpp +++ b/src/storage/v2/replication/replication_client.cpp @@ -13,6 +13,7 @@ #include "replication/replication_client.hpp" #include "storage/v2/inmemory/storage.hpp" +#include "storage/v2/replication/enums.hpp" #include "storage/v2/storage.hpp" #include "utils/exceptions.hpp" #include "utils/on_scope_exit.hpp" @@ -53,25 +54,60 @@ void ReplicationStorageClient::UpdateReplicaState(Storage *storage, DatabaseAcce #endif std::optional<uint64_t> branching_point; + // different epoch id, replica was main + // In case there is no epoch transfer, and MAIN doesn't hold all the epochs as it could have been down and miss it + // we need then just to check commit timestamp if (replica.epoch_id != replStorageState.epoch_.id() && replica.current_commit_timestamp != kTimestampInitialId) { + spdlog::trace( + "REPLICA: epoch UUID: {} and last_commit_timestamp: {}; MAIN: epoch UUID {} and last_commit_timestamp {}", + std::string(replica.epoch_id), replica.current_commit_timestamp, std::string(replStorageState.epoch_.id()), + replStorageState.last_commit_timestamp_); auto const &history = replStorageState.history; const auto epoch_info_iter = std::find_if(history.crbegin(), history.crend(), [&](const auto &main_epoch_info) { return main_epoch_info.first == replica.epoch_id; }); + // main didn't have that epoch, but why is here branching point if (epoch_info_iter == history.crend()) { + spdlog::info("Couldn't find epoch {} in MAIN, setting branching point", std::string(replica.epoch_id)); branching_point = 0; - } else if (epoch_info_iter->second != replica.current_commit_timestamp) { + } else if (epoch_info_iter->second < replica.current_commit_timestamp) { + spdlog::info("Found epoch {} on MAIN with last_commit_timestamp {}, REPLICA's last_commit_timestamp {}", + std::string(epoch_info_iter->first), epoch_info_iter->second, replica.current_commit_timestamp); branching_point = epoch_info_iter->second; } } if (branching_point) { - spdlog::error( - "You cannot register Replica {} to this Main because at one point " - "Replica {} acted as the Main instance. Both the Main and Replica {} " - "now hold unique data. Please resolve data conflicts and start the " - "replication on a clean instance.", - client_.name_, client_.name_, client_.name_); - replica_state_.WithLock([](auto &val) { val = replication::ReplicaState::DIVERGED_FROM_MAIN; }); + auto replica_state = replica_state_.Lock(); + if (*replica_state == replication::ReplicaState::DIVERGED_FROM_MAIN) { + return; + } + *replica_state = replication::ReplicaState::DIVERGED_FROM_MAIN; + + auto log_error = [client_name = client_.name_]() { + spdlog::error( + "You cannot register Replica {} to this Main because at one point " + "Replica {} acted as the Main instance. Both the Main and Replica {} " + "now hold unique data. Please resolve data conflicts and start the " + "replication on a clean instance.", + client_name, client_name, client_name); + }; +#ifdef MG_ENTERPRISE + if (!FLAGS_coordinator_server_port) { + log_error(); + return; + } + client_.thread_pool_.AddTask([storage, gk = std::move(db_acc), this] { + const auto [success, timestamp] = this->ForceResetStorage(storage); + if (success) { + spdlog::info("Successfully reset storage of REPLICA {} to timestamp {}.", client_.name_, timestamp); + return; + } + spdlog::error("You cannot register REPLICA {} to this MAIN because MAIN couldn't reset REPLICA's storage.", + client_.name_); + }); +#else + log_error(); +#endif return; } @@ -190,9 +226,6 @@ void ReplicationStorageClient::StartTransactionReplication(const uint64_t curren } } -//////// AF: you can't finialize transaction replication if you are not replicating -/////// AF: if there is no stream or it is Defunct than we need to set replica in MAYBE_BEHIND -> is that even used -/////// AF: bool ReplicationStorageClient::FinalizeTransactionReplication(Storage *storage, DatabaseAccessProtector db_acc) { // We can only check the state because it guarantees to be only // valid during a single transaction replication (if the assumption @@ -215,11 +248,13 @@ bool ReplicationStorageClient::FinalizeTransactionReplication(Storage *storage, MG_ASSERT(replica_stream_, "Missing stream for transaction deltas"); try { auto response = replica_stream_->Finalize(); - return replica_state_.WithLock([storage, &response, db_acc = std::move(db_acc), this](auto &state) mutable { + // NOLINTNEXTLINE + return replica_state_.WithLock([storage, response, db_acc = std::move(db_acc), this](auto &state) mutable { replica_stream_.reset(); if (!response.success || state == replication::ReplicaState::RECOVERY) { state = replication::ReplicaState::RECOVERY; - client_.thread_pool_.AddTask([storage, &response, db_acc = std::move(db_acc), this] { + // NOLINTNEXTLINE + client_.thread_pool_.AddTask([storage, response, db_acc = std::move(db_acc), this] { this->RecoverReplica(response.current_commit_timestamp, storage); }); return false; @@ -325,6 +360,21 @@ void ReplicationStorageClient::RecoverReplica(uint64_t replica_commit, memgraph: } } +std::pair<bool, uint64_t> ReplicationStorageClient::ForceResetStorage(memgraph::storage::Storage *storage) { + utils::OnScopeExit set_to_maybe_behind{ + [this]() { replica_state_.WithLock([](auto &state) { state = replication::ReplicaState::MAYBE_BEHIND; }); }}; + try { + auto stream{client_.rpc_client_.Stream<replication::ForceResetStorageRpc>(main_uuid_, storage->uuid())}; + const auto res = stream.AwaitResponse(); + return std::pair{res.success, res.current_commit_timestamp}; + } catch (const rpc::RpcFailedException &) { + spdlog::error( + utils::MessageWithLink("Couldn't ForceReset data to {}.", client_.name_, "https://memgr.ph/replication")); + } + + return {false, 0}; +} + ////// ReplicaStream ////// ReplicaStream::ReplicaStream(Storage *storage, rpc::Client &rpc_client, const uint64_t current_seq_num, utils::UUID main_uuid) @@ -360,6 +410,12 @@ void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operati timestamp); } +void ReplicaStream::AppendOperation(durability::StorageMetadataOperation operation, EdgeTypeId edge_type, + uint64_t timestamp) { + replication::Encoder encoder(stream_.GetBuilder()); + EncodeOperation(&encoder, storage_->name_id_mapper_.get(), operation, edge_type, timestamp); +} + replication::AppendDeltasRes ReplicaStream::Finalize() { return stream_.AwaitResponse(); } } // namespace memgraph::storage diff --git a/src/storage/v2/replication/replication_client.hpp b/src/storage/v2/replication/replication_client.hpp index 3352bab65..77a9ba40b 100644 --- a/src/storage/v2/replication/replication_client.hpp +++ b/src/storage/v2/replication/replication_client.hpp @@ -65,6 +65,9 @@ class ReplicaStream { const std::set<PropertyId> &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t timestamp); + /// @throw rpc::RpcFailedException + void AppendOperation(durability::StorageMetadataOperation operation, EdgeTypeId edge_type, uint64_t timestamp); + /// @throw rpc::RpcFailedException replication::AppendDeltasRes Finalize(); @@ -188,6 +191,13 @@ class ReplicationStorageClient { */ void UpdateReplicaState(Storage *storage, DatabaseAccessProtector db_acc); + /** + * @brief Forcefully reset storage to as it is when started from scratch. + * + * @param storage pointer to the storage associated with the client + */ + std::pair<bool, uint64_t> ForceResetStorage(Storage *storage); + void LogRpcFailure(); /** diff --git a/src/storage/v2/replication/replication_storage_state.cpp b/src/storage/v2/replication/replication_storage_state.cpp index 25cf484c9..b8f3fef62 100644 --- a/src/storage/v2/replication/replication_storage_state.cpp +++ b/src/storage/v2/replication/replication_storage_state.cpp @@ -53,6 +53,16 @@ void ReplicationStorageState::AppendOperation(durability::StorageMetadataOperati }); } +void ReplicationStorageState::AppendOperation(durability::StorageMetadataOperation operation, EdgeTypeId edge_type, + uint64_t final_commit_timestamp) { + replication_clients_.WithLock([&](auto &clients) { + for (auto &client : clients) { + client->IfStreamingTransaction( + [&](auto &stream) { stream.AppendOperation(operation, edge_type, final_commit_timestamp); }); + } + }); +} + bool ReplicationStorageState::FinalizeTransaction(uint64_t timestamp, Storage *storage, DatabaseAccessProtector db_acc) { return replication_clients_.WithLock([=, db_acc = std::move(db_acc)](auto &clients) mutable { diff --git a/src/storage/v2/replication/replication_storage_state.hpp b/src/storage/v2/replication/replication_storage_state.hpp index 91cec563c..f99807c13 100644 --- a/src/storage/v2/replication/replication_storage_state.hpp +++ b/src/storage/v2/replication/replication_storage_state.hpp @@ -46,6 +46,8 @@ struct ReplicationStorageState { void AppendOperation(durability::StorageMetadataOperation operation, LabelId label, const std::set<PropertyId> &properties, const LabelIndexStats &stats, const LabelPropertyIndexStats &property_stats, uint64_t final_commit_timestamp); + void AppendOperation(durability::StorageMetadataOperation operation, EdgeTypeId edge_type, + uint64_t final_commit_timestamp); bool FinalizeTransaction(uint64_t timestamp, Storage *storage, DatabaseAccessProtector db_acc); // Getters diff --git a/src/storage/v2/replication/rpc.cpp b/src/storage/v2/replication/rpc.cpp index f523bb5d7..71a9ca65c 100644 --- a/src/storage/v2/replication/rpc.cpp +++ b/src/storage/v2/replication/rpc.cpp @@ -59,6 +59,19 @@ void TimestampRes::Save(const TimestampRes &self, memgraph::slk::Builder *builde memgraph::slk::Save(self, builder); } void TimestampRes::Load(TimestampRes *self, memgraph::slk::Reader *reader) { memgraph::slk::Load(self, reader); } + +void ForceResetStorageReq::Save(const ForceResetStorageReq &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self, builder); +} +void ForceResetStorageReq::Load(ForceResetStorageReq *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(self, reader); +} +void ForceResetStorageRes::Save(const ForceResetStorageRes &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self, builder); +} +void ForceResetStorageRes::Load(ForceResetStorageRes *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(self, reader); +} } // namespace storage::replication constexpr utils::TypeInfo storage::replication::AppendDeltasReq::kType{utils::TypeId::REP_APPEND_DELTAS_REQ, @@ -97,6 +110,12 @@ constexpr utils::TypeInfo storage::replication::TimestampReq::kType{utils::TypeI constexpr utils::TypeInfo storage::replication::TimestampRes::kType{utils::TypeId::REP_TIMESTAMP_RES, "TimestampRes", nullptr}; +constexpr utils::TypeInfo storage::replication::ForceResetStorageReq::kType{utils::TypeId::REP_FORCE_RESET_STORAGE_REQ, + "ForceResetStorageReq", nullptr}; + +constexpr utils::TypeInfo storage::replication::ForceResetStorageRes::kType{utils::TypeId::REP_FORCE_RESET_STORAGE_RES, + "ForceResetStorageRes", nullptr}; + // Autogenerated SLK serialization code namespace slk { // Serialize code for TimestampRes @@ -255,6 +274,30 @@ void Load(memgraph::storage::replication::AppendDeltasReq *self, memgraph::slk:: memgraph::slk::Load(&self->seq_num, reader); } +// Serialize code for ForceResetStorageReq + +void Save(const memgraph::storage::replication::ForceResetStorageReq &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self.main_uuid, builder); + memgraph::slk::Save(self.db_uuid, builder); +} + +void Load(memgraph::storage::replication::ForceResetStorageReq *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(&self->main_uuid, reader); + memgraph::slk::Load(&self->db_uuid, reader); +} + +// Serialize code for ForceResetStorageRes + +void Save(const memgraph::storage::replication::ForceResetStorageRes &self, memgraph::slk::Builder *builder) { + memgraph::slk::Save(self.success, builder); + memgraph::slk::Save(self.current_commit_timestamp, builder); +} + +void Load(memgraph::storage::replication::ForceResetStorageRes *self, memgraph::slk::Reader *reader) { + memgraph::slk::Load(&self->success, reader); + memgraph::slk::Load(&self->current_commit_timestamp, reader); +} + // Serialize SalientConfig void Save(const memgraph::storage::SalientConfig &self, memgraph::slk::Builder *builder) { diff --git a/src/storage/v2/replication/rpc.hpp b/src/storage/v2/replication/rpc.hpp index 67f98d0ae..fb19d82f2 100644 --- a/src/storage/v2/replication/rpc.hpp +++ b/src/storage/v2/replication/rpc.hpp @@ -210,6 +210,36 @@ struct TimestampRes { using TimestampRpc = rpc::RequestResponse<TimestampReq, TimestampRes>; +struct ForceResetStorageReq { + static const utils::TypeInfo kType; + static const utils::TypeInfo &GetTypeInfo() { return kType; } + + static void Load(ForceResetStorageReq *self, memgraph::slk::Reader *reader); + static void Save(const ForceResetStorageReq &self, memgraph::slk::Builder *builder); + ForceResetStorageReq() = default; + explicit ForceResetStorageReq(const utils::UUID &main_uuid, const utils::UUID &db_uuid) + : main_uuid{main_uuid}, db_uuid{db_uuid} {} + + utils::UUID main_uuid; + utils::UUID db_uuid; +}; + +struct ForceResetStorageRes { + static const utils::TypeInfo kType; + static const utils::TypeInfo &GetTypeInfo() { return kType; } + + static void Load(ForceResetStorageRes *self, memgraph::slk::Reader *reader); + static void Save(const ForceResetStorageRes &self, memgraph::slk::Builder *builder); + ForceResetStorageRes() = default; + ForceResetStorageRes(bool success, uint64_t current_commit_timestamp) + : success(success), current_commit_timestamp(current_commit_timestamp) {} + + bool success; + uint64_t current_commit_timestamp; +}; + +using ForceResetStorageRpc = rpc::RequestResponse<ForceResetStorageReq, ForceResetStorageRes>; + } // namespace memgraph::storage::replication // SLK serialization declarations @@ -267,4 +297,12 @@ void Save(const memgraph::storage::SalientConfig &self, memgraph::slk::Builder * void Load(memgraph::storage::SalientConfig *self, memgraph::slk::Reader *reader); +void Save(const memgraph::storage::replication::ForceResetStorageReq &self, memgraph::slk::Builder *builder); + +void Load(memgraph::storage::replication::ForceResetStorageReq *self, memgraph::slk::Reader *reader); + +void Save(const memgraph::storage::replication::ForceResetStorageRes &self, memgraph::slk::Builder *builder); + +void Load(memgraph::storage::replication::ForceResetStorageRes *self, memgraph::slk::Reader *reader); + } // namespace memgraph::slk diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 5868d70a3..58936bd56 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -30,6 +30,7 @@ #include "storage/v2/durability/paths.hpp" #include "storage/v2/durability/wal.hpp" #include "storage/v2/edge_accessor.hpp" +#include "storage/v2/edges_iterable.hpp" #include "storage/v2/indices/indices.hpp" #include "storage/v2/mvcc.hpp" #include "storage/v2/replication/enums.hpp" @@ -61,6 +62,7 @@ class EdgeAccessor; struct IndicesInfo { std::vector<LabelId> label; std::vector<std::pair<LabelId, PropertyId>> label_property; + std::vector<EdgeTypeId> edge_type; }; struct ConstraintsInfo { @@ -172,6 +174,8 @@ class Storage { const std::optional<utils::Bound<PropertyValue>> &lower_bound, const std::optional<utils::Bound<PropertyValue>> &upper_bound, View view) = 0; + virtual EdgesIterable Edges(EdgeTypeId edge_type, View view) = 0; + virtual Result<std::optional<VertexAccessor>> DeleteVertex(VertexAccessor *vertex); virtual Result<std::optional<std::pair<VertexAccessor, std::vector<EdgeAccessor>>>> DetachDeleteVertex( @@ -192,6 +196,8 @@ class Storage { const std::optional<utils::Bound<PropertyValue>> &lower, const std::optional<utils::Bound<PropertyValue>> &upper) const = 0; + virtual uint64_t ApproximateEdgeCount(EdgeTypeId id) const = 0; + virtual std::optional<storage::LabelIndexStats> GetIndexStats(const storage::LabelId &label) const = 0; virtual std::optional<storage::LabelPropertyIndexStats> GetIndexStats( @@ -224,6 +230,8 @@ class Storage { virtual bool LabelPropertyIndexExists(LabelId label, PropertyId property) const = 0; + virtual bool EdgeTypeIndexExists(EdgeTypeId edge_type) const = 0; + virtual IndicesInfo ListAllIndices() const = 0; virtual ConstraintsInfo ListAllConstraints() const = 0; @@ -268,10 +276,14 @@ class Storage { virtual utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(LabelId label, PropertyId property) = 0; + virtual utils::BasicResult<StorageIndexDefinitionError, void> CreateIndex(EdgeTypeId edge_type) = 0; + virtual utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(LabelId label) = 0; virtual utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(LabelId label, PropertyId property) = 0; + virtual utils::BasicResult<StorageIndexDefinitionError, void> DropIndex(EdgeTypeId edge_type) = 0; + virtual utils::BasicResult<StorageExistenceConstraintDefinitionError, void> CreateExistenceConstraint( LabelId label, PropertyId property) = 0; diff --git a/src/storage/v2/transaction.hpp b/src/storage/v2/transaction.hpp index 9f973cbf0..ff1699626 100644 --- a/src/storage/v2/transaction.hpp +++ b/src/storage/v2/transaction.hpp @@ -41,7 +41,7 @@ const uint64_t kTransactionInitialId = 1ULL << 63U; struct Transaction { Transaction(uint64_t transaction_id, uint64_t start_timestamp, IsolationLevel isolation_level, - StorageMode storage_mode, bool edge_import_mode_active) + StorageMode storage_mode, bool edge_import_mode_active, bool has_constraints) : transaction_id(transaction_id), start_timestamp(start_timestamp), command_id(0), @@ -50,6 +50,8 @@ struct Transaction { isolation_level(isolation_level), storage_mode(storage_mode), edge_import_mode_active(edge_import_mode_active), + constraint_verification_info{(has_constraints) ? std::optional<ConstraintVerificationInfo>{std::in_place} + : std::nullopt}, vertices_{(storage_mode == StorageMode::ON_DISK_TRANSACTIONAL) ? std::optional<utils::SkipList<Vertex>>{std::in_place} : std::nullopt}, @@ -99,7 +101,7 @@ struct Transaction { // Used to speedup getting info about a vertex when there is a long delta // chain involved in rebuilding that info. mutable VertexInfoCache manyDeltasCache{}; - mutable ConstraintVerificationInfo constraint_verification_info{}; + mutable std::optional<ConstraintVerificationInfo> constraint_verification_info{}; // Store modified edges GID mapped to changed Delta and serialized edge key // Only for disk storage diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index ef0a6ab3e..7d78070a8 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -120,7 +120,7 @@ Result<bool> VertexAccessor::AddLabel(LabelId label) { /// TODO: some by pointers, some by reference => not good, make it better storage_->constraints_.unique_constraints_->UpdateOnAddLabel(label, *vertex_, transaction_->start_timestamp); - transaction_->constraint_verification_info.AddedLabel(vertex_); + if (transaction_->constraint_verification_info) transaction_->constraint_verification_info->AddedLabel(vertex_); storage_->indices_.UpdateOnAddLabel(label, vertex_, *transaction_); transaction_->manyDeltasCache.Invalidate(vertex_, label); @@ -276,10 +276,12 @@ Result<PropertyValue> VertexAccessor::SetProperty(PropertyId property, const Pro }}; std::invoke(atomic_memory_block); - if (!value.IsNull()) { - transaction_->constraint_verification_info.AddedProperty(vertex_); - } else { - transaction_->constraint_verification_info.RemovedProperty(vertex_); + if (transaction_->constraint_verification_info) { + if (!value.IsNull()) { + transaction_->constraint_verification_info->AddedProperty(vertex_); + } else { + transaction_->constraint_verification_info->RemovedProperty(vertex_); + } } storage_->indices_.UpdateOnSetProperty(property, value, vertex_, *transaction_); transaction_->manyDeltasCache.Invalidate(vertex_, property); @@ -309,10 +311,12 @@ Result<bool> VertexAccessor::InitProperties(const std::map<storage::PropertyId, CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, PropertyValue()); storage->indices_.UpdateOnSetProperty(property, value, vertex, *transaction); transaction->manyDeltasCache.Invalidate(vertex, property); - if (!value.IsNull()) { - transaction->constraint_verification_info.AddedProperty(vertex); - } else { - transaction->constraint_verification_info.RemovedProperty(vertex); + if (transaction->constraint_verification_info) { + if (!value.IsNull()) { + transaction->constraint_verification_info->AddedProperty(vertex); + } else { + transaction->constraint_verification_info->RemovedProperty(vertex); + } } } result = true; @@ -347,10 +351,12 @@ Result<std::vector<std::tuple<PropertyId, PropertyValue, PropertyValue>>> Vertex storage->indices_.UpdateOnSetProperty(id, new_value, vertex, *transaction); CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), id, std::move(old_value)); transaction->manyDeltasCache.Invalidate(vertex, id); - if (!new_value.IsNull()) { - transaction->constraint_verification_info.AddedProperty(vertex); - } else { - transaction->constraint_verification_info.RemovedProperty(vertex); + if (transaction->constraint_verification_info) { + if (!new_value.IsNull()) { + transaction->constraint_verification_info->AddedProperty(vertex); + } else { + transaction->constraint_verification_info->RemovedProperty(vertex); + } } } }}; @@ -380,9 +386,11 @@ Result<std::map<PropertyId, PropertyValue>> VertexAccessor::ClearProperties() { for (const auto &[property, value] : *properties) { CreateAndLinkDelta(transaction, vertex, Delta::SetPropertyTag(), property, value); storage->indices_.UpdateOnSetProperty(property, PropertyValue(), vertex, *transaction); - transaction->constraint_verification_info.RemovedProperty(vertex); transaction->manyDeltasCache.Invalidate(vertex, property); } + if (transaction->constraint_verification_info) { + transaction->constraint_verification_info->RemovedProperty(vertex); + } vertex->properties.ClearProperties(); }}; std::invoke(atomic_memory_block); diff --git a/src/storage/v2/vertices_iterable.cpp b/src/storage/v2/vertices_iterable.cpp index f6ff46da6..9753052ae 100644 --- a/src/storage/v2/vertices_iterable.cpp +++ b/src/storage/v2/vertices_iterable.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -10,7 +10,6 @@ // licenses/APL.txt. #include "storage/v2/vertices_iterable.hpp" - namespace memgraph::storage { VerticesIterable::VerticesIterable(AllVerticesIterable vertices) : type_(Type::ALL) { diff --git a/src/storage/v2/vertices_iterable.hpp b/src/storage/v2/vertices_iterable.hpp index e057e8a38..6075a68a2 100644 --- a/src/storage/v2/vertices_iterable.hpp +++ b/src/storage/v2/vertices_iterable.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index bac3e78f3..802b8ff6f 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -24,6 +24,7 @@ find_package(Threads REQUIRED) add_library(mg-utils STATIC ${utils_src_files}) add_library(mg::utils ALIAS mg-utils) + target_link_libraries(mg-utils PUBLIC Boost::headers fmt::fmt spdlog::spdlog json) target_link_libraries(mg-utils PRIVATE librdtsc stdc++fs Threads::Threads gflags uuid rt) diff --git a/src/utils/atomic_memory_block.hpp b/src/utils/atomic_memory_block.hpp index c15424549..31a3cf3a9 100644 --- a/src/utils/atomic_memory_block.hpp +++ b/src/utils/atomic_memory_block.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/utils/event_counter.cpp b/src/utils/event_counter.cpp index a7f4d30fb..54ff4ed5c 100644 --- a/src/utils/event_counter.cpp +++ b/src/utils/event_counter.cpp @@ -26,6 +26,7 @@ M(ScanAllByLabelPropertyValueOperator, Operator, "Number of times ScanAllByLabelPropertyValue operator was used.") \ M(ScanAllByLabelPropertyOperator, Operator, "Number of times ScanAllByLabelProperty operator was used.") \ M(ScanAllByIdOperator, Operator, "Number of times ScanAllById operator was used.") \ + M(ScanAllByEdgeTypeOperator, Operator, "Number of times ScanAllByEdgeTypeOperator operator was used.") \ M(ExpandOperator, Operator, "Number of times Expand operator was used.") \ M(ExpandVariableOperator, Operator, "Number of times ExpandVariable operator was used.") \ M(ConstructNamedPathOperator, Operator, "Number of times ConstructNamedPath operator was used.") \ diff --git a/src/utils/functional.hpp b/src/utils/functional.hpp index e0714de2a..fe60edc5c 100644 --- a/src/utils/functional.hpp +++ b/src/utils/functional.hpp @@ -18,8 +18,11 @@ namespace memgraph::utils { -template <class F, class T, class R = typename std::invoke_result<F, T>::type> -auto fmap(F &&f, std::vector<T> const &v) -> std::vector<R> { +template <template <typename, typename...> class Container, typename T, typename Allocator = std::allocator<T>, + typename F = std::identity, typename R = std::decay_t<std::invoke_result_t<F, T>>> +requires ranges::range<Container<T, Allocator>> && + (!std::same_as<Container<T, Allocator>, std::string>)auto fmap(const Container<T, Allocator> &v, F &&f = {}) + -> std::vector<R> { return v | ranges::views::transform(std::forward<F>(f)) | ranges::to<std::vector<R>>(); } diff --git a/src/utils/memory.cpp b/src/utils/memory.cpp index d09f70fc3..6b1f26c11 100644 --- a/src/utils/memory.cpp +++ b/src/utils/memory.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -150,128 +150,133 @@ void *MonotonicBufferResource::DoAllocate(size_t bytes, size_t alignment) { namespace impl { -Pool::Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *memory) - : blocks_per_chunk_(blocks_per_chunk), block_size_(block_size), chunks_(memory) {} - -Pool::~Pool() { MG_ASSERT(chunks_.empty(), "You need to call Release before destruction!"); } - -void *Pool::Allocate() { - auto allocate_block_from_chunk = [this](Chunk *chunk) { - unsigned char *available_block = chunk->data + (chunk->first_available_block_ix * block_size_); - // Update free-list pointer (index in our case) by reading "next" from the - // available_block. - chunk->first_available_block_ix = *available_block; - --chunk->blocks_available; - return available_block; - }; - if (last_alloc_chunk_ && last_alloc_chunk_->blocks_available > 0U) - return allocate_block_from_chunk(last_alloc_chunk_); - // Find a Chunk with available memory. - for (auto &chunk : chunks_) { - if (chunk.blocks_available > 0U) { - last_alloc_chunk_ = &chunk; - return allocate_block_from_chunk(last_alloc_chunk_); - } - } - // We haven't found a Chunk with available memory, so allocate a new one. - if (block_size_ > std::numeric_limits<size_t>::max() / blocks_per_chunk_) throw BadAlloc("Allocation size overflow"); - size_t data_size = blocks_per_chunk_ * block_size_; +Pool::Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *chunk_memory) + : blocks_per_chunk_(blocks_per_chunk), block_size_(block_size), chunks_(chunk_memory) { // Use the next pow2 of block_size_ as alignment, so that we cover alignment // requests between 1 and block_size_. Users of this class should make sure // that requested alignment of particular blocks is never greater than the // block itself. - size_t alignment = Ceil2(block_size_); - if (alignment < block_size_) throw BadAlloc("Allocation alignment overflow"); - auto *data = reinterpret_cast<unsigned char *>(GetUpstreamResource()->Allocate(data_size, alignment)); - // Form a free-list of blocks in data. - for (unsigned char i = 0U; i < blocks_per_chunk_; ++i) { - *(data + (i * block_size_)) = i + 1U; - } - Chunk chunk{data, 0, blocks_per_chunk_}; - // Insert the big block in the sorted position. - auto it = std::lower_bound(chunks_.begin(), chunks_.end(), chunk, - [](const auto &a, const auto &b) { return a.data < b.data; }); - try { - it = chunks_.insert(it, chunk); - } catch (...) { - GetUpstreamResource()->Deallocate(data, data_size, alignment); - throw; - } + if (block_size_ > std::numeric_limits<size_t>::max() / blocks_per_chunk_) throw BadAlloc("Allocation size overflow"); +} - last_alloc_chunk_ = &*it; - last_dealloc_chunk_ = &*it; - return allocate_block_from_chunk(last_alloc_chunk_); +Pool::~Pool() { + if (!chunks_.empty()) { + auto *resource = GetUpstreamResource(); + auto const dataSize = blocks_per_chunk_ * block_size_; + auto const alignment = Ceil2(block_size_); + for (auto &chunk : chunks_) { + resource->Deallocate(chunk.raw_data, dataSize, alignment); + } + chunks_.clear(); + } + free_list_ = nullptr; +} + +void *Pool::Allocate() { + if (!free_list_) [[unlikely]] { + // need new chunk + auto const data_size = blocks_per_chunk_ * block_size_; + auto const alignment = Ceil2(block_size_); + auto *resource = GetUpstreamResource(); + auto *data = reinterpret_cast<std::byte *>(resource->Allocate(data_size, alignment)); + try { + auto &new_chunk = chunks_.emplace_front(data); + free_list_ = new_chunk.build_freelist(block_size_, blocks_per_chunk_); + } catch (...) { + resource->Deallocate(data, data_size, alignment); + throw; + } + } + return std::exchange(free_list_, *reinterpret_cast<std::byte **>(free_list_)); } void Pool::Deallocate(void *p) { - MG_ASSERT(last_dealloc_chunk_, "No chunk to deallocate"); - MG_ASSERT(!chunks_.empty(), - "Expected a call to Deallocate after at least a " - "single Allocate has been done."); - auto is_in_chunk = [this, p](const Chunk &chunk) { - auto ptr = reinterpret_cast<uintptr_t>(p); - size_t data_size = blocks_per_chunk_ * block_size_; - return reinterpret_cast<uintptr_t>(chunk.data) <= ptr && ptr < reinterpret_cast<uintptr_t>(chunk.data + data_size); - }; - auto deallocate_block_from_chunk = [this, p](Chunk *chunk) { - // NOTE: This check is not enough to cover all double-free issues. - MG_ASSERT(chunk->blocks_available < blocks_per_chunk_, - "Deallocating more blocks than a chunk can contain, possibly a " - "double-free situation or we have a bug in the allocator."); - // Link the block into the free-list - auto *block = reinterpret_cast<unsigned char *>(p); - *block = chunk->first_available_block_ix; - chunk->first_available_block_ix = (block - chunk->data) / block_size_; - chunk->blocks_available++; - }; - if (is_in_chunk(*last_dealloc_chunk_)) { - deallocate_block_from_chunk(last_dealloc_chunk_); - return; - } - - // Find the chunk which served this allocation - Chunk chunk{reinterpret_cast<unsigned char *>(p) - blocks_per_chunk_ * block_size_, 0, 0}; - auto it = std::lower_bound(chunks_.begin(), chunks_.end(), chunk, - [](const auto &a, const auto &b) { return a.data <= b.data; }); - MG_ASSERT(it != chunks_.end(), "Failed deallocation in utils::Pool"); - MG_ASSERT(is_in_chunk(*it), "Failed deallocation in utils::Pool"); - - // Update last_alloc_chunk_ as well because it now has a free block. - // Additionally this corresponds with C++ pattern of allocations and - // deallocations being done in reverse order. - last_alloc_chunk_ = &*it; - last_dealloc_chunk_ = &*it; - deallocate_block_from_chunk(last_dealloc_chunk_); - // TODO: We could release the Chunk to upstream memory -} - -void Pool::Release() { - for (auto &chunk : chunks_) { - size_t data_size = blocks_per_chunk_ * block_size_; - size_t alignment = Ceil2(block_size_); - GetUpstreamResource()->Deallocate(chunk.data, data_size, alignment); - } - chunks_.clear(); - last_alloc_chunk_ = nullptr; - last_dealloc_chunk_ = nullptr; + *reinterpret_cast<std::byte **>(p) = std::exchange(free_list_, reinterpret_cast<std::byte *>(p)); } } // namespace impl -PoolResource::PoolResource(size_t max_blocks_per_chunk, size_t max_block_size, MemoryResource *memory_pools, - MemoryResource *memory_unpooled) - : pools_(memory_pools), - unpooled_(memory_unpooled), - max_blocks_per_chunk_(std::min(max_blocks_per_chunk, static_cast<size_t>(impl::Pool::MaxBlocksInChunk()))), - max_block_size_(max_block_size) { - MG_ASSERT(max_blocks_per_chunk_ > 0U, "Invalid number of blocks per chunk"); - MG_ASSERT(max_block_size_ > 0U, "Invalid size of block"); +struct NullMemoryResourceImpl final : public MemoryResource { + NullMemoryResourceImpl() = default; + NullMemoryResourceImpl(NullMemoryResourceImpl const &) = default; + NullMemoryResourceImpl &operator=(NullMemoryResourceImpl const &) = default; + NullMemoryResourceImpl(NullMemoryResourceImpl &&) = default; + NullMemoryResourceImpl &operator=(NullMemoryResourceImpl &&) = default; + ~NullMemoryResourceImpl() override = default; + + private: + void *DoAllocate(size_t /*bytes*/, size_t /*alignment*/) override { + throw BadAlloc{"NullMemoryResource doesn't allocate"}; + } + void DoDeallocate(void * /*p*/, size_t /*bytes*/, size_t /*alignment*/) override { + throw BadAlloc{"NullMemoryResource doesn't deallocate"}; + } + bool DoIsEqual(MemoryResource const &other) const noexcept override { + return dynamic_cast<NullMemoryResourceImpl const *>(&other) != nullptr; + } +}; + +MemoryResource *NullMemoryResource() noexcept { + static auto res = NullMemoryResourceImpl{}; + return &res; } +namespace impl { + +/// 1 bit sensitivity test +static_assert(bin_index<1>(9U) == 0); +static_assert(bin_index<1>(10U) == 0); +static_assert(bin_index<1>(11U) == 0); +static_assert(bin_index<1>(12U) == 0); +static_assert(bin_index<1>(13U) == 0); +static_assert(bin_index<1>(14U) == 0); +static_assert(bin_index<1>(15U) == 0); +static_assert(bin_index<1>(16U) == 0); + +static_assert(bin_index<1>(17U) == 1); +static_assert(bin_index<1>(18U) == 1); +static_assert(bin_index<1>(19U) == 1); +static_assert(bin_index<1>(20U) == 1); +static_assert(bin_index<1>(21U) == 1); +static_assert(bin_index<1>(22U) == 1); +static_assert(bin_index<1>(23U) == 1); +static_assert(bin_index<1>(24U) == 1); +static_assert(bin_index<1>(25U) == 1); +static_assert(bin_index<1>(26U) == 1); +static_assert(bin_index<1>(27U) == 1); +static_assert(bin_index<1>(28U) == 1); +static_assert(bin_index<1>(29U) == 1); +static_assert(bin_index<1>(30U) == 1); +static_assert(bin_index<1>(31U) == 1); +static_assert(bin_index<1>(32U) == 1); + +/// 2 bit sensitivity test + +static_assert(bin_index<2>(9U) == 0); +static_assert(bin_index<2>(10U) == 0); +static_assert(bin_index<2>(11U) == 0); +static_assert(bin_index<2>(12U) == 0); + +static_assert(bin_index<2>(13U) == 1); +static_assert(bin_index<2>(14U) == 1); +static_assert(bin_index<2>(15U) == 1); +static_assert(bin_index<2>(16U) == 1); + +static_assert(bin_index<2>(17U) == 2); +static_assert(bin_index<2>(18U) == 2); +static_assert(bin_index<2>(19U) == 2); +static_assert(bin_index<2>(20U) == 2); +static_assert(bin_index<2>(21U) == 2); +static_assert(bin_index<2>(22U) == 2); +static_assert(bin_index<2>(23U) == 2); +static_assert(bin_index<2>(24U) == 2); + +} // namespace impl + void *PoolResource::DoAllocate(size_t bytes, size_t alignment) { // Take the max of `bytes` and `alignment` so that we simplify handling // alignment requests. - size_t block_size = std::max(bytes, alignment); + size_t block_size = std::max({bytes, alignment, 1UL}); // Check that we have received a regular allocation request with non-padded // structs/classes in play. These will always have // `sizeof(T) % alignof(T) == 0`. Special requests which don't have that @@ -279,80 +284,36 @@ void *PoolResource::DoAllocate(size_t bytes, size_t alignment) { // have to write a general-purpose allocator which has to behave as complex // as malloc/free. if (block_size % alignment != 0) throw BadAlloc("Requested bytes must be a multiple of alignment"); - if (block_size > max_block_size_) { - // Allocate a big block. - BigBlock big_block{bytes, alignment, GetUpstreamResourceBlocks()->Allocate(bytes, alignment)}; - // Insert the big block in the sorted position. - auto it = std::lower_bound(unpooled_.begin(), unpooled_.end(), big_block, - [](const auto &a, const auto &b) { return a.data < b.data; }); - try { - unpooled_.insert(it, big_block); - } catch (...) { - GetUpstreamResourceBlocks()->Deallocate(big_block.data, bytes, alignment); - throw; - } - return big_block.data; - } - // Allocate a regular block, first check if last_alloc_pool_ is suitable. - if (last_alloc_pool_ && last_alloc_pool_->GetBlockSize() == block_size) { - return last_alloc_pool_->Allocate(); - } - // Find the pool with greater or equal block_size. - impl::Pool pool(block_size, max_blocks_per_chunk_, GetUpstreamResource()); - auto it = std::lower_bound(pools_.begin(), pools_.end(), pool, - [](const auto &a, const auto &b) { return a.GetBlockSize() < b.GetBlockSize(); }); - if (it != pools_.end() && it->GetBlockSize() == block_size) { - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Allocate(); - } - // We don't have a pool for this block_size, so insert it in the sorted - // position. - it = pools_.emplace(it, std::move(pool)); - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Allocate(); -} + if (block_size <= 64) { + return mini_pools_[(block_size - 1UL) / 8UL].Allocate(); + } + if (block_size <= 128) { + return pools_3bit_.allocate(block_size); + } + if (block_size <= 512) { + return pools_4bit_.allocate(block_size); + } + if (block_size <= 1024) { + return pools_5bit_.allocate(block_size); + } + return unpooled_memory_->Allocate(bytes, alignment); +} void PoolResource::DoDeallocate(void *p, size_t bytes, size_t alignment) { - size_t block_size = std::max(bytes, alignment); - MG_ASSERT(block_size % alignment == 0, - "PoolResource shouldn't serve allocation requests where bytes aren't " - "a multiple of alignment"); - if (block_size > max_block_size_) { - // Deallocate a big block. - BigBlock big_block{bytes, alignment, p}; - auto it = std::lower_bound(unpooled_.begin(), unpooled_.end(), big_block, - [](const auto &a, const auto &b) { return a.data < b.data; }); - MG_ASSERT(it != unpooled_.end(), "Failed deallocation"); - MG_ASSERT(it->data == p && it->bytes == bytes && it->alignment == alignment, "Failed deallocation"); - unpooled_.erase(it); - GetUpstreamResourceBlocks()->Deallocate(p, bytes, alignment); - return; + size_t block_size = std::max({bytes, alignment, 1UL}); + DMG_ASSERT(block_size % alignment == 0); + + if (block_size <= 64) { + mini_pools_[(block_size - 1UL) / 8UL].Deallocate(p); + } else if (block_size <= 128) { + pools_3bit_.deallocate(p, block_size); + } else if (block_size <= 512) { + pools_4bit_.deallocate(p, block_size); + } else if (block_size <= 1024) { + pools_5bit_.deallocate(p, block_size); + } else { + unpooled_memory_->Deallocate(p, bytes, alignment); } - // Deallocate a regular block, first check if last_dealloc_pool_ is suitable. - if (last_dealloc_pool_ && last_dealloc_pool_->GetBlockSize() == block_size) return last_dealloc_pool_->Deallocate(p); - // Find the pool with equal block_size. - impl::Pool pool(block_size, max_blocks_per_chunk_, GetUpstreamResource()); - auto it = std::lower_bound(pools_.begin(), pools_.end(), pool, - [](const auto &a, const auto &b) { return a.GetBlockSize() < b.GetBlockSize(); }); - MG_ASSERT(it != pools_.end(), "Failed deallocation"); - MG_ASSERT(it->GetBlockSize() == block_size, "Failed deallocation"); - last_alloc_pool_ = &*it; - last_dealloc_pool_ = &*it; - return it->Deallocate(p); } - -void PoolResource::Release() { - for (auto &pool : pools_) pool.Release(); - pools_.clear(); - for (auto &big_block : unpooled_) - GetUpstreamResourceBlocks()->Deallocate(big_block.data, big_block.bytes, big_block.alignment); - unpooled_.clear(); - last_alloc_pool_ = nullptr; - last_dealloc_pool_ = nullptr; -} - -// PoolResource END - +bool PoolResource::DoIsEqual(MemoryResource const &other) const noexcept { return this == &other; } } // namespace memgraph::utils diff --git a/src/utils/memory.hpp b/src/utils/memory.hpp index 225a3b6a1..8ff6c3523 100644 --- a/src/utils/memory.hpp +++ b/src/utils/memory.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -15,7 +15,11 @@ #pragma once +#include <climits> #include <cstddef> +#include <cstdint> +#include <forward_list> +#include <list> #include <memory> #include <mutex> #include <new> @@ -248,6 +252,8 @@ bool operator!=(const Allocator<T> &a, const Allocator<U> &b) { return !(a == b); } +auto NullMemoryResource() noexcept -> MemoryResource *; + /// Wraps std::pmr::memory_resource for use with out MemoryResource class StdMemoryResource final : public MemoryResource { public: @@ -380,37 +386,45 @@ class MonotonicBufferResource final : public MemoryResource { namespace impl { +template <class T> +using AList = std::forward_list<T, Allocator<T>>; + template <class T> using AVector = std::vector<T, Allocator<T>>; /// Holds a number of Chunks each serving blocks of particular size. When a -/// Chunk runs out of available blocks, a new Chunk is allocated. The naming is -/// taken from `libstdc++` implementation, but the implementation details are -/// more similar to `FixedAllocator` described in "Small Object Allocation" from -/// "Modern C++ Design". +/// Chunk runs out of available blocks, a new Chunk is allocated. class Pool final { /// Holds a pointer into a chunk of memory which consists of equal sized - /// blocks. Each Chunk can handle `std::numeric_limits<unsigned char>::max()` - /// number of blocks. Blocks form a "free-list", where each unused block has - /// an embedded index to the next unused block. + /// blocks. Blocks form a "free-list" struct Chunk { - unsigned char *data; - unsigned char first_available_block_ix; - unsigned char blocks_available; + // TODO: make blocks_per_chunk a per chunk thing (ie. allow chunk growth) + std::byte *raw_data; + explicit Chunk(std::byte *rawData) : raw_data(rawData) {} + std::byte *build_freelist(std::size_t block_size, std::size_t blocks_in_chunk) { + auto current = raw_data; + std::byte *prev = nullptr; + auto end = current + (blocks_in_chunk * block_size); + while (current != end) { + std::byte **list_entry = reinterpret_cast<std::byte **>(current); + *list_entry = std::exchange(prev, current); + current += block_size; + } + DMG_ASSERT(prev != nullptr); + return prev; + } }; - unsigned char blocks_per_chunk_; - size_t block_size_; - AVector<Chunk> chunks_; - Chunk *last_alloc_chunk_{nullptr}; - Chunk *last_dealloc_chunk_{nullptr}; + std::byte *free_list_{nullptr}; + uint8_t blocks_per_chunk_{}; + std::size_t block_size_{}; + + AList<Chunk> chunks_; // TODO: do ourself so we can do fast Release (detect monotonic, do nothing) public: - static constexpr auto MaxBlocksInChunk() { - return std::numeric_limits<decltype(Chunk::first_available_block_ix)>::max(); - } + static constexpr auto MaxBlocksInChunk = std::numeric_limits<decltype(blocks_per_chunk_)>::max(); - Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *memory); + Pool(size_t block_size, unsigned char blocks_per_chunk, MemoryResource *chunk_memory); Pool(const Pool &) = delete; Pool &operator=(const Pool &) = delete; @@ -430,8 +444,147 @@ class Pool final { void *Allocate(); void Deallocate(void *p); +}; - void Release(); +// C++ overloads for clz +constexpr auto clz(unsigned int x) { return __builtin_clz(x); } +constexpr auto clz(unsigned long x) { return __builtin_clzl(x); } +constexpr auto clz(unsigned long long x) { return __builtin_clzll(x); } + +template <typename T> +constexpr auto bits_sizeof = sizeof(T) * CHAR_BIT; + +/// 0-based bit index of the most significant bit assumed that `n` != 0 +template <typename T> +constexpr auto msb_index(T n) { + return bits_sizeof<T> - clz(n) - T(1); +} + +/* This function will in O(1) time provide a bin index based on: + * B - the number of most significant bits to be sensitive to + * LB - the value that should be considered below the consideration for bin index of 0 (LB is exclusive) + * + * lets say we were: + * - sensitive to two bits (B == 2) + * - lowest bin is for 8 (LB == 8) + * + * our bin indexes would look like: + * 0 - 0000'1100 12 + * 1 - 0001'0000 16 + * 2 - 0001'1000 24 + * 3 - 0010'0000 32 + * 4 - 0011'0000 48 + * 5 - 0100'0000 64 + * 6 - 0110'0000 96 + * 7 - 1000'0000 128 + * 8 - 1100'0000 192 + * ... + * + * Example: + * Given n == 70, we want to return the bin index to the first value which is + * larger than n. + * bin_index<2,8>(70) => 6, as 64 (index 5) < 70 and 70 <= 96 (index 6) + */ +template <std::size_t B = 2, std::size_t LB = 8> +constexpr std::size_t bin_index(std::size_t n) { + static_assert(B >= 1U, "Needs to be sensitive to at least one bit"); + static_assert(LB != 0U, "Lower bound need to be non-zero"); + DMG_ASSERT(n > LB); + + // We will alway be sensitive to at least the MSB + // exponent tells us how many bits we need to use to select within a level + constexpr auto kExponent = B - 1U; + // 2^exponent gives the size of each level + constexpr auto kSize = 1U << kExponent; + // offset help adjust results down to be inline with bin_index(LB) == 0 + constexpr auto kOffset = msb_index(LB); + + auto const msb_idx = msb_index(n); + DMG_ASSERT(msb_idx != 0); + + auto const mask = (1u << msb_idx) - 1u; + auto const under = n & mask; + auto const selector = under >> (msb_idx - kExponent); + + auto const rest = under & (mask >> kExponent); + auto const no_overflow = rest == 0U; + + auto const msb_level = kSize * (msb_idx - kOffset); + return msb_level + selector - no_overflow; +} + +// This is the inverse opperation for bin_index +// bin_size(bin_index(X)-1) < X <= bin_size(bin_index(X)) +template <std::size_t B = 2, std::size_t LB = 8> +std::size_t bin_size(std::size_t idx) { + constexpr auto kExponent = B - 1U; + constexpr auto kSize = 1U << kExponent; + constexpr auto kOffset = msb_index(LB); + + // no need to optimise `/` or `%` compiler can see `kSize` is a power of 2 + auto const level = (idx + 1) / kSize; + auto const sub_level = (idx + 1) % kSize; + return (1U << (level + kOffset)) | (sub_level << (level + kOffset - kExponent)); +} + +template <std::size_t Bits, std::size_t LB, std::size_t UB> +struct MultiPool { + static_assert(LB < UB, "lower bound must be less than upper bound"); + static_assert(IsPow2(LB) && IsPow2(UB), "Design untested for non powers of 2"); + static_assert((LB << Bits) % sizeof(void *) == 0, "Smallest pool must have space and alignment for freelist"); + + // upper bound is inclusive + static bool is_size_handled(std::size_t size) { return LB < size && size <= UB; } + static bool is_above_upper_bound(std::size_t size) { return UB < size; } + + static constexpr auto n_bins = bin_index<Bits, LB>(UB) + 1U; + + MultiPool(uint8_t blocks_per_chunk, MemoryResource *memory, MemoryResource *internal_memory) + : blocks_per_chunk_{blocks_per_chunk}, memory_{memory}, internal_memory_{internal_memory} {} + + ~MultiPool() { + if (pools_) { + auto pool_alloc = Allocator<Pool>(internal_memory_); + for (auto i = 0U; i != n_bins; ++i) { + pool_alloc.destroy(&pools_[i]); + } + pool_alloc.deallocate(pools_, n_bins); + } + } + + void *allocate(std::size_t bytes) { + auto idx = bin_index<Bits, LB>(bytes); + if (!pools_) [[unlikely]] { + initialise_pools(); + } + return pools_[idx].Allocate(); + } + + void deallocate(void *ptr, std::size_t bytes) { + auto idx = bin_index<Bits, LB>(bytes); + pools_[idx].Deallocate(ptr); + } + + private: + void initialise_pools() { + auto pool_alloc = Allocator<Pool>(internal_memory_); + auto pools = pool_alloc.allocate(n_bins); + try { + for (auto i = 0U; i != n_bins; ++i) { + auto block_size = bin_size<Bits, LB>(i); + pool_alloc.construct(&pools[i], block_size, blocks_per_chunk_, memory_); + } + pools_ = pools; + } catch (...) { + pool_alloc.deallocate(pools, n_bins); + throw; + } + } + + Pool *pools_{}; + uint8_t blocks_per_chunk_{}; + MemoryResource *memory_{}; + MemoryResource *internal_memory_{}; }; } // namespace impl @@ -442,8 +595,6 @@ class Pool final { /// /// This class has the following properties with regards to memory management. /// -/// * All allocated memory will be freed upon destruction, even if Deallocate -/// has not been called for some of the allocated blocks. /// * It consists of a collection of impl::Pool instances, each serving /// requests for different block sizes. Each impl::Pool manages a collection /// of impl::Pool::Chunk instances which are divided into blocks of uniform @@ -452,91 +603,46 @@ class Pool final { /// arbitrary alignment requests. Each requested block size must be a /// multiple of alignment or smaller than the alignment value. /// * An allocation request within the limits of the maximum block size will -/// find a Pool serving the requested size. If there's no Pool serving such -/// a request, a new one is instantiated. +/// find a Pool serving the requested size. Some requests will share a larger +/// pool size. /// * When a Pool exhausts its Chunk, a new one is allocated with the size for /// the maximum number of blocks. /// * Allocation requests which exceed the maximum block size will be /// forwarded to upstream MemoryResource. -/// * Maximum block size and maximum number of blocks per chunk can be tuned -/// by passing the arguments to the constructor. +/// * Maximum number of blocks per chunk can be tuned by passing the +/// arguments to the constructor. + class PoolResource final : public MemoryResource { public: - /// Construct with given max_blocks_per_chunk, max_block_size and upstream - /// memory. - /// - /// The implementation will use std::min(max_blocks_per_chunk, - /// impl::Pool::MaxBlocksInChunk()) as the real maximum number of blocks per - /// chunk. Allocation requests exceeding max_block_size are simply forwarded - /// to upstream memory. - PoolResource(size_t max_blocks_per_chunk, size_t max_block_size, MemoryResource *memory_pools = NewDeleteResource(), - MemoryResource *memory_unpooled = NewDeleteResource()); - - PoolResource(const PoolResource &) = delete; - PoolResource &operator=(const PoolResource &) = delete; - - PoolResource(PoolResource &&) = default; - PoolResource &operator=(PoolResource &&) = default; - - ~PoolResource() override { Release(); } - - MemoryResource *GetUpstreamResource() const { return pools_.get_allocator().GetMemoryResource(); } - MemoryResource *GetUpstreamResourceBlocks() const { return unpooled_.get_allocator().GetMemoryResource(); } - - /// Release all allocated memory. - void Release(); + PoolResource(uint8_t blocks_per_chunk, MemoryResource *memory = NewDeleteResource(), + MemoryResource *internal_memory = NewDeleteResource()) + : mini_pools_{ + impl::Pool{8, blocks_per_chunk, memory}, + impl::Pool{16, blocks_per_chunk, memory}, + impl::Pool{24, blocks_per_chunk, memory}, + impl::Pool{32, blocks_per_chunk, memory}, + impl::Pool{40, blocks_per_chunk, memory}, + impl::Pool{48, blocks_per_chunk, memory}, + impl::Pool{56, blocks_per_chunk, memory}, + impl::Pool{64, blocks_per_chunk, memory}, + }, + pools_3bit_(blocks_per_chunk, memory, internal_memory), + pools_4bit_(blocks_per_chunk, memory, internal_memory), + pools_5bit_(blocks_per_chunk, memory, internal_memory), + unpooled_memory_{internal_memory} {} + ~PoolResource() override = default; private: - // Big block larger than max_block_size_, doesn't go into a pool. - struct BigBlock { - size_t bytes; - size_t alignment; - void *data; - }; - - // TODO: Potential memory optimization is replacing `std::vector` with our - // custom vector implementation which doesn't store a `MemoryResource *`. - // Currently we have vectors for `pools_` and `unpooled_`, as well as each - // `impl::Pool` stores a `chunks_` vector. - - // Pools are sorted by bound_size_, ascending. - impl::AVector<impl::Pool> pools_; - impl::Pool *last_alloc_pool_{nullptr}; - impl::Pool *last_dealloc_pool_{nullptr}; - // Unpooled BigBlocks are sorted by data pointer. - impl::AVector<BigBlock> unpooled_; - size_t max_blocks_per_chunk_; - size_t max_block_size_; - void *DoAllocate(size_t bytes, size_t alignment) override; - void DoDeallocate(void *p, size_t bytes, size_t alignment) override; - - bool DoIsEqual(const MemoryResource &other) const noexcept override { return this == &other; } -}; - -/// Like PoolResource but uses SpinLock for thread safe usage. -class SynchronizedPoolResource final : public MemoryResource { - public: - SynchronizedPoolResource(size_t max_blocks_per_chunk, size_t max_block_size, - MemoryResource *memory = NewDeleteResource()) - : pool_memory_(max_blocks_per_chunk, max_block_size, memory) {} + bool DoIsEqual(MemoryResource const &other) const noexcept override; private: - PoolResource pool_memory_; - SpinLock lock_; - - void *DoAllocate(size_t bytes, size_t alignment) override { - std::lock_guard<SpinLock> guard(lock_); - return pool_memory_.Allocate(bytes, alignment); - } - - void DoDeallocate(void *p, size_t bytes, size_t alignment) override { - std::lock_guard<SpinLock> guard(lock_); - pool_memory_.Deallocate(p, bytes, alignment); - } - - bool DoIsEqual(const MemoryResource &other) const noexcept override { return this == &other; } + std::array<impl::Pool, 8> mini_pools_; + impl::MultiPool<3, 64, 128> pools_3bit_; + impl::MultiPool<4, 128, 512> pools_4bit_; + impl::MultiPool<5, 512, 1024> pools_5bit_; + MemoryResource *unpooled_memory_; }; class MemoryTrackingResource final : public utils::MemoryResource { diff --git a/src/utils/scheduler.hpp b/src/utils/scheduler.hpp index 742271a95..45b2c8b04 100644 --- a/src/utils/scheduler.hpp +++ b/src/utils/scheduler.hpp @@ -57,7 +57,7 @@ class Scheduler { // program and there is probably no work to do in scheduled function at // the start of the program. Since Server will log some messages on // the program start we let him log first and we make sure by first - // waiting that funcion f will not log before it. + // waiting that function f will not log before it. // Check for pause also. std::unique_lock<std::mutex> lk(mutex_); auto now = std::chrono::system_clock::now(); diff --git a/src/utils/settings.cpp b/src/utils/settings.cpp index 4768edc42..5e0954b4b 100644 --- a/src/utils/settings.cpp +++ b/src/utils/settings.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/src/utils/string.hpp b/src/utils/string.hpp index 31723bb65..26709439f 100644 --- a/src/utils/string.hpp +++ b/src/utils/string.hpp @@ -229,6 +229,13 @@ inline std::vector<std::string> Split(const std::string_view src, const std::str return res; } +inline std::vector<std::string_view> SplitView(const std::string_view src, const std::string_view delimiter, + int splits = -1) { + std::vector<std::string_view> res; + Split(&res, src, delimiter, splits); + return res; +} + /** * Split a string by whitespace into a vector. * Runs of consecutive whitespace are regarded as a single delimiter. diff --git a/src/utils/tag.hpp b/src/utils/tag.hpp new file mode 100644 index 000000000..dfd8c8f81 --- /dev/null +++ b/src/utils/tag.hpp @@ -0,0 +1,32 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#pragma once + +namespace memgraph::utils { + +template <typename T> +struct tag_type { + using type = T; +}; + +template <auto V> +struct tag_value { + static constexpr auto value = V; +}; + +template <typename T> +auto tag_t = tag_type<T>{}; + +template <auto V> +auto tag_v = tag_value<V>{}; + +} // namespace memgraph::utils diff --git a/src/utils/typeinfo.hpp b/src/utils/typeinfo.hpp index 1ca08a3f7..77910f731 100644 --- a/src/utils/typeinfo.hpp +++ b/src/utils/typeinfo.hpp @@ -32,6 +32,7 @@ enum class TypeId : uint64_t { SCAN_ALL_BY_LABEL_PROPERTY_VALUE, SCAN_ALL_BY_LABEL_PROPERTY, SCAN_ALL_BY_ID, + SCAN_ALL_BY_EDGE_TYPE, EXPAND_COMMON, EXPAND, EXPANSION_LAMBDA, @@ -68,6 +69,7 @@ enum class TypeId : uint64_t { APPLY, INDEXED_JOIN, HASH_JOIN, + ROLLUP_APPLY, // Replication // NOTE: these NEED to be stable in the 2000+ range (see rpc version) @@ -99,6 +101,8 @@ enum class TypeId : uint64_t { REP_DROP_AUTH_DATA_RES, REP_TRY_SET_MAIN_UUID_REQ, REP_TRY_SET_MAIN_UUID_RES, + REP_FORCE_RESET_STORAGE_REQ, + REP_FORCE_RESET_STORAGE_RES, // Coordinator COORD_FAILOVER_REQ, @@ -114,6 +118,8 @@ enum class TypeId : uint64_t { COORD_GET_UUID_REQ, COORD_GET_UUID_RES, + COORD_GET_INSTANCE_DATABASES_REQ, + COORD_GET_INSTANCE_DATABASES_RES, // AST AST_LABELIX = 3000, @@ -180,6 +186,7 @@ enum class TypeId : uint64_t { AST_EXPLAIN_QUERY, AST_PROFILE_QUERY, AST_INDEX_QUERY, + AST_EDGE_INDEX_QUERY, AST_CREATE, AST_CALL_PROCEDURE, AST_MATCH, diff --git a/src/utils/uuid.cpp b/src/utils/uuid.cpp index fbcf662de..d55cfaba3 100644 --- a/src/utils/uuid.cpp +++ b/src/utils/uuid.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -10,7 +10,7 @@ // licenses/APL.txt. #include "utils/uuid.hpp" -#include <uuid/uuid.h> + #include "slk/serialization.hpp" namespace memgraph::utils { diff --git a/src/utils/uuid.hpp b/src/utils/uuid.hpp index bca55d73b..bbfec6228 100644 --- a/src/utils/uuid.hpp +++ b/src/utils/uuid.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -12,6 +12,7 @@ #pragma once #include <uuid/uuid.h> + #include <array> #include <json/json.hpp> #include <string> @@ -39,9 +40,10 @@ struct UUID { UUID() { uuid_generate(uuid.data()); } explicit operator std::string() const { - auto decoded = std::array<char, UUID_STR_LEN>{}; + // Note not using UUID_STR_LEN so we can build with older libuuid + auto decoded = std::array<char, 37 /*UUID_STR_LEN*/>{}; uuid_unparse(uuid.data(), decoded.data()); - return std::string{decoded.data(), UUID_STR_LEN - 1}; + return std::string{decoded.data(), 37 /*UUID_STR_LEN*/ - 1}; } explicit operator arr_t() const { return uuid; } diff --git a/tests/benchmark/query/execution.cpp b/tests/benchmark/query/execution.cpp index d49b14fc3..1d65cdb93 100644 --- a/tests/benchmark/query/execution.cpp +++ b/tests/benchmark/query/execution.cpp @@ -55,12 +55,12 @@ class NewDeleteResource final { }; class PoolResource final { - memgraph::utils::PoolResource memory_{128, 4 * 1024}; + memgraph::utils::PoolResource memory_{128}; public: memgraph::utils::MemoryResource *get() { return &memory_; } - void Reset() { memory_.Release(); } + void Reset() {} }; static void AddVertices(memgraph::storage::Storage *db, int vertex_count) { diff --git a/tests/benchmark/skip_list_vs_stl.cpp b/tests/benchmark/skip_list_vs_stl.cpp index 1a17e56e1..9a856822f 100644 --- a/tests/benchmark/skip_list_vs_stl.cpp +++ b/tests/benchmark/skip_list_vs_stl.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -101,8 +101,7 @@ class StdSetWithPoolAllocatorInsertFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::set<uint64_t, std::less<>, memgraph::utils::Allocator<uint64_t>> container{&memory_}; memgraph::utils::SpinLock lock; }; @@ -208,8 +207,7 @@ class StdSetWithPoolAllocatorFindFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::set<uint64_t, std::less<>, memgraph::utils::Allocator<uint64_t>> container{&memory_}; memgraph::utils::SpinLock lock; }; @@ -325,8 +323,7 @@ class StdMapWithPoolAllocatorInsertFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::map<uint64_t, uint64_t, std::less<>, memgraph::utils::Allocator<std::pair<const uint64_t, uint64_t>>> container{ &memory_}; memgraph::utils::SpinLock lock; @@ -433,8 +430,7 @@ class StdMapWithPoolAllocatorFindFixture : public benchmark::Fixture { } protected: - memgraph::utils::PoolResource memory_{256U /* max_blocks_per_chunk */, 1024U /* max_block_size */, - memgraph::utils::NewDeleteResource()}; + memgraph::utils::PoolResource memory_{128U /* max_blocks_per_chunk */, memgraph::utils::NewDeleteResource()}; std::map<uint64_t, uint64_t, std::less<>, memgraph::utils::Allocator<std::pair<const uint64_t, uint64_t>>> container{ &memory_}; memgraph::utils::SpinLock lock; diff --git a/tests/code_analysis/clang_tidy.sh b/tests/code_analysis/clang_tidy.sh new file mode 100755 index 000000000..f8bdfc252 --- /dev/null +++ b/tests/code_analysis/clang_tidy.sh @@ -0,0 +1,23 @@ +#!/bin/bash +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$SCRIPT_DIR/../.." +BASE_BRANCH="origin/master" +THREADS=${THREADS:-$(nproc)} + +if [[ "$#" -gt 0 ]]; then + case "$1" in + --base-branch) + BASE_BRANCH=$2 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +fi + +cd $PROJECT_ROOT +git diff -U0 $BASE_BRANCH... -- src | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build -regex ".+\.cpp" | tee ./build/clang_tidy_output.txt +# Fail if any warning is reported +! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null +cd $SCRIPT_DIR diff --git a/tests/code_analysis/python_code_analysis.sh b/tests/code_analysis/python_code_analysis.sh new file mode 100755 index 000000000..500a873d1 --- /dev/null +++ b/tests/code_analysis/python_code_analysis.sh @@ -0,0 +1,27 @@ +#!/bin/bash +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$SCRIPT_DIR/../.." +BASE_BRANCH="origin/master" + +if [[ "$#" -gt 0 ]]; then + case "$1" in + --base-branch) + BASE_BRANCH=$2 + ;; + *) + echo "Error: Unknown flag '$1'" + exit 1 + ;; + esac +fi + +cd $PROJECT_ROOT +CHANGED_FILES=$(git diff -U0 $BASE_BRANCH... --name-only --diff-filter=d) +for file in ${CHANGED_FILES}; do + echo ${file} + if [[ ${file} == *.py ]]; then + python3 -m black --check --diff ${file} + python3 -m isort --profile black --check-only --diff ${file} + fi +done +cd $SCRIPT_DIR diff --git a/tests/e2e/high_availability/coord_cluster_registration.py b/tests/e2e/high_availability/coord_cluster_registration.py index 68a387281..13aaf27fe 100644 --- a/tests/e2e/high_availability/coord_cluster_registration.py +++ b/tests/e2e/high_availability/coord_cluster_registration.py @@ -117,28 +117,37 @@ def test_register_repl_instances_then_coordinators(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + ) def check_coordinator3(): return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) @@ -147,21 +156,23 @@ def test_register_repl_instances_then_coordinators(): def check_coordinator1(): return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - # TODO: (andi) This should be solved eventually - expected_cluster_not_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "replica"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), ] - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) coordinator2_cursor = connect(host="localhost", port=7691).cursor() def check_coordinator2(): return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator2) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) def test_register_coordinator_then_repl_instances(): @@ -170,16 +181,25 @@ def test_register_coordinator_then_repl_instances(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") - execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + ) + execute_and_fetch_all( + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") @@ -187,12 +207,12 @@ def test_register_coordinator_then_repl_instances(): return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) @@ -201,21 +221,23 @@ def test_register_coordinator_then_repl_instances(): def check_coordinator1(): return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - # TODO: (andi) This should be solved eventually - expected_cluster_not_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "replica"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), ] - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) coordinator2_cursor = connect(host="localhost", port=7691).cursor() def check_coordinator2(): return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator2) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) def test_coordinators_communication_with_restarts(): @@ -224,23 +246,35 @@ def test_coordinators_communication_with_restarts(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") - execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + ) + execute_and_fetch_all( + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - expected_cluster_not_shared = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "replica"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), ] coordinator1_cursor = connect(host="localhost", port=7690).cursor() @@ -248,20 +282,20 @@ def test_coordinators_communication_with_restarts(): def check_coordinator1(): return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) coordinator2_cursor = connect(host="localhost", port=7691).cursor() def check_coordinator2(): return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator2) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_1") interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_1") coordinator1_cursor = connect(host="localhost", port=7690).cursor() - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_1") interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_2") @@ -271,11 +305,11 @@ def test_coordinators_communication_with_restarts(): coordinator1_cursor = connect(host="localhost", port=7690).cursor() coordinator2_cursor = connect(host="localhost", port=7691).cursor() - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator1) - mg_sleep_and_assert(expected_cluster_not_shared, check_coordinator2) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) -# TODO: (andi) Test when dealing with distributed coordinators that you can register on one coordinator and unregister from any other coordinator +# # TODO: (andi) Test when dealing with distributed coordinators that you can register on one coordinator and unregister from any other coordinator @pytest.mark.parametrize( "kill_instance", [True, False], @@ -284,18 +318,38 @@ def test_unregister_replicas(kill_instance): safe_execute(shutil.rmtree, TEMP_DIR) interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) + coordinator1_cursor = connect(host="localhost", port=7690).cursor() + coordinator2_cursor = connect(host="localhost", port=7691).cursor() coordinator3_cursor = connect(host="localhost", port=7692).cursor() - execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + ) + execute_and_fetch_all( + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") + def check_coordinator1(): + return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) + + def check_coordinator2(): + return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) + def check_coordinator3(): return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) @@ -305,10 +359,21 @@ def test_unregister_replicas(kill_instance): return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) expected_cluster = [ - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ] + + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "replica"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), ] expected_replicas = [ @@ -328,6 +393,8 @@ def test_unregister_replicas(kill_instance): ), ] + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) mg_sleep_and_assert(expected_cluster, check_coordinator3) mg_sleep_and_assert(expected_replicas, check_main) @@ -336,9 +403,19 @@ def test_unregister_replicas(kill_instance): execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_1") expected_cluster = [ - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ] + + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), ] expected_replicas = [ @@ -351,6 +428,8 @@ def test_unregister_replicas(kill_instance): ), ] + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) mg_sleep_and_assert(expected_cluster, check_coordinator3) mg_sleep_and_assert(expected_replicas, check_main) @@ -359,11 +438,22 @@ def test_unregister_replicas(kill_instance): execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_2") expected_cluster = [ - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ] + + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_3", "", "", "unknown", "main"), ] expected_replicas = [] + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) mg_sleep_and_assert(expected_cluster, check_coordinator3) mg_sleep_and_assert(expected_replicas, check_main) @@ -372,28 +462,61 @@ def test_unregister_main(): safe_execute(shutil.rmtree, TEMP_DIR) interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) + coordinator1_cursor = connect(host="localhost", port=7690).cursor() + coordinator2_cursor = connect(host="localhost", port=7691).cursor() coordinator3_cursor = connect(host="localhost", port=7692).cursor() - execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'" + + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'" + coordinator3_cursor, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + ) + execute_and_fetch_all( + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") + def check_coordinator1(): + return sorted(list(execute_and_fetch_all(coordinator1_cursor, "SHOW INSTANCES"))) + + def check_coordinator2(): + return sorted(list(execute_and_fetch_all(coordinator2_cursor, "SHOW INSTANCES"))) + def check_coordinator3(): return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) expected_cluster = [ - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "replica"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), + ] + + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) mg_sleep_and_assert(expected_cluster, check_coordinator3) try: @@ -407,20 +530,43 @@ def test_unregister_main(): interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") expected_cluster = [ - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "main"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", False, "unknown"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), ] + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "main"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), + ] + + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) mg_sleep_and_assert(expected_cluster, check_coordinator3) execute_and_fetch_all(coordinator3_cursor, "UNREGISTER INSTANCE instance_3") expected_cluster = [ - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "main"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ] + + expected_cluster_shared = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "main"), + ("instance_2", "", "", "unknown", "replica"), ] expected_replicas = [ @@ -438,6 +584,8 @@ def test_unregister_main(): def check_main(): return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS"))) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator1) + mg_sleep_and_assert(expected_cluster_shared, check_coordinator2) mg_sleep_and_assert(expected_cluster, check_coordinator3) mg_sleep_and_assert(expected_replicas, check_main) diff --git a/tests/e2e/high_availability/coordinator.py b/tests/e2e/high_availability/coordinator.py index 4330c2194..8a6ae1a0a 100644 --- a/tests/e2e/high_availability/coordinator.py +++ b/tests/e2e/high_availability/coordinator.py @@ -44,10 +44,10 @@ def test_coordinator_show_instances(): return sorted(list(execute_and_fetch_all(cursor, "SHOW INSTANCES;"))) expected_data = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data, retrieve_data) @@ -79,7 +79,7 @@ def test_main_and_replicas_cannot_register_coord_server(port): with pytest.raises(Exception) as e: execute_and_fetch_all( cursor, - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10001' WITH '127.0.0.1:10011';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) assert str(e.value) == "Only coordinator can register coordinator server!" diff --git a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py index 53d570a6d..517bf346f 100644 --- a/tests/e2e/high_availability/disable_writing_on_main_after_restart.py +++ b/tests/e2e/high_availability/disable_writing_on_main_after_restart.py @@ -133,30 +133,37 @@ def test_writing_disabled_on_main_restart(): coordinator3_cursor = connect(host="localhost", port=7692).cursor() execute_and_fetch_all( - coordinator3_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'" + coordinator3_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(coordinator3_cursor, "SET INSTANCE instance_3 TO MAIN") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 1 ON '127.0.0.1:10111'") - assert add_coordinator(coordinator3_cursor, "ADD COORDINATOR 2 ON '127.0.0.1:10112'") + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + ) + assert add_coordinator( + coordinator3_cursor, + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + ) def check_coordinator3(): return sorted(list(execute_and_fetch_all(coordinator3_cursor, "SHOW INSTANCES"))) expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_3", "", "127.0.0.1:10013", False, "unknown"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), ] mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) @@ -173,10 +180,10 @@ def test_writing_disabled_on_main_restart(): ) expected_cluster_coord3 = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_cluster_coord3, check_coordinator3) diff --git a/tests/e2e/high_availability/distributed_coords.py b/tests/e2e/high_availability/distributed_coords.py index 07b6eefe0..59e083545 100644 --- a/tests/e2e/high_availability/distributed_coords.py +++ b/tests/e2e/high_availability/distributed_coords.py @@ -17,7 +17,11 @@ import tempfile import interactive_mg_runner import pytest from common import connect, execute_and_fetch_all, safe_execute -from mg_utils import mg_sleep_and_assert, mg_sleep_and_assert_collection +from mg_utils import ( + mg_sleep_and_assert, + mg_sleep_and_assert_any_function, + mg_sleep_and_assert_collection, +) interactive_mg_runner.SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) interactive_mg_runner.PROJECT_DIR = os.path.normpath( @@ -106,17 +110,216 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { ], "log_file": "coordinator3.log", "setup_queries": [ - "ADD COORDINATOR 1 ON '127.0.0.1:10111'", - "ADD COORDINATOR 2 ON '127.0.0.1:10112'", - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001'", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002'", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003'", + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ], }, } +def get_instances_description_no_setup(): + return { + "instance_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7687", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10011", + ], + "log_file": "instance_1.log", + "data_directory": f"{TEMP_DIR}/instance_1", + "setup_queries": [], + }, + "instance_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7688", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10012", + ], + "log_file": "instance_2.log", + "data_directory": f"{TEMP_DIR}/instance_2", + "setup_queries": [], + }, + "instance_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7689", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10013", + ], + "log_file": "instance_3.log", + "data_directory": f"{TEMP_DIR}/instance_3", + "setup_queries": [], + }, + "coordinator_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7690", + "--log-level=TRACE", + "--raft-server-id=1", + "--raft-server-port=10111", + ], + "log_file": "coordinator1.log", + "data_directory": f"{TEMP_DIR}/coordinator_1", + "setup_queries": [], + }, + "coordinator_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7691", + "--log-level=TRACE", + "--raft-server-id=2", + "--raft-server-port=10112", + ], + "log_file": "coordinator2.log", + "data_directory": f"{TEMP_DIR}/coordinator_2", + "setup_queries": [], + }, + "coordinator_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7692", + "--log-level=TRACE", + "--raft-server-id=3", + "--raft-server-port=10113", + ], + "log_file": "coordinator3.log", + "data_directory": f"{TEMP_DIR}/coordinator_3", + "setup_queries": [], + }, + } + + +def test_old_main_comes_back_on_new_leader_as_replica(): + # 1. Start all instances. + # 2. Kill the main instance + # 3. Kill the leader + # 4. Start the old main instance + # 5. Run SHOW INSTANCES on the new leader and check that the old main instance is registered as a replica + # 6. Start again previous leader + + safe_execute(shutil.rmtree, TEMP_DIR) + inner_instances_description = get_instances_description_no_setup() + + interactive_mg_runner.start_all(inner_instances_description) + + setup_queries = [ + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "SET INSTANCE instance_3 TO MAIN", + ] + coord_cursor_3 = connect(host="localhost", port=7692).cursor() + for query in setup_queries: + execute_and_fetch_all(coord_cursor_3, query) + + interactive_mg_runner.kill(inner_instances_description, "coordinator_3") + interactive_mg_runner.kill(inner_instances_description, "instance_3") + + coord_cursor_1 = connect(host="localhost", port=7690).cursor() + + def show_instances_coord1(): + return sorted(list(execute_and_fetch_all(coord_cursor_1, "SHOW INSTANCES;"))) + + coord_cursor_2 = connect(host="localhost", port=7691).cursor() + + def show_instances_coord2(): + return sorted(list(execute_and_fetch_all(coord_cursor_2, "SHOW INSTANCES;"))) + + leader_data = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ] + mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2]) + + follower_data = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "main"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), # TODO: (andi) Will become unknown. + ] + mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2]) + mg_sleep_and_assert_any_function(follower_data, [show_instances_coord1, show_instances_coord2]) + + interactive_mg_runner.start(inner_instances_description, "instance_3") + + leader_data = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "replica"), + ] + mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2]) + + new_main_cursor = connect(host="localhost", port=7687).cursor() + + def show_replicas(): + return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;"))) + + replicas = [ + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + ), + ( + "instance_3", + "127.0.0.1:10003", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + ), + ] + mg_sleep_and_assert_collection(replicas, show_replicas) + + execute_and_fetch_all(new_main_cursor, "CREATE (n:Node {name: 'node'})") + + replica_2_cursor = connect(host="localhost", port=7688).cursor() + + def get_vertex_count(): + return execute_and_fetch_all(replica_2_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(1, get_vertex_count) + + replica_3_cursor = connect(host="localhost", port=7689).cursor() + + def get_vertex_count(): + return execute_and_fetch_all(replica_3_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(1, get_vertex_count) + + interactive_mg_runner.start(inner_instances_description, "coordinator_3") + + def test_distributed_automatic_failover(): safe_execute(shutil.rmtree, TEMP_DIR) interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) @@ -138,8 +341,11 @@ def test_distributed_automatic_failover(): {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, ), ] - actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) - assert actual_data_on_main == sorted(expected_data_on_main) + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + mg_sleep_and_assert_collection(expected_data_on_main, retrieve_data_show_replicas) interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") @@ -149,13 +355,14 @@ def test_distributed_automatic_failover(): return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) expected_data_on_coord = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("coordinator_2", "127.0.0.1:10112", "", True, "coordinator"), - ("coordinator_3", "127.0.0.1:10113", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "main"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", False, "unknown"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) new_main_cursor = connect(host="localhost", port=7687).cursor() @@ -202,5 +409,854 @@ def test_distributed_automatic_failover(): mg_sleep_and_assert_collection(expected_data_on_new_main_old_alive, retrieve_data_show_replicas) +def test_distributed_automatic_failover_with_leadership_change(): + safe_execute(shutil.rmtree, TEMP_DIR) + inner_instances_description = get_instances_description_no_setup() + + interactive_mg_runner.start_all(inner_instances_description) + + setup_queries = [ + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "SET INSTANCE instance_3 TO MAIN", + ] + coord_cursor_3 = connect(host="localhost", port=7692).cursor() + for query in setup_queries: + execute_and_fetch_all(coord_cursor_3, query) + + interactive_mg_runner.kill(inner_instances_description, "coordinator_3") + interactive_mg_runner.kill(inner_instances_description, "instance_3") + + coord_cursor_1 = connect(host="localhost", port=7690).cursor() + + def show_instances_coord1(): + return sorted(list(execute_and_fetch_all(coord_cursor_1, "SHOW INSTANCES;"))) + + coord_cursor_2 = connect(host="localhost", port=7691).cursor() + + def show_instances_coord2(): + return sorted(list(execute_and_fetch_all(coord_cursor_2, "SHOW INSTANCES;"))) + + leader_data = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ] + mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2]) + + follower_data = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "main"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), # TODO: (andi) Will become unknown. + ] + mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2]) + mg_sleep_and_assert_any_function(follower_data, [show_instances_coord1, show_instances_coord2]) + + new_main_cursor = connect(host="localhost", port=7687).cursor() + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;"))) + + expected_data_on_new_main = [ + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + ), + ( + "instance_3", + "127.0.0.1:10003", + "sync", + {"ts": 0, "behind": None, "status": "invalid"}, + {"memgraph": {"ts": 0, "behind": 0, "status": "invalid"}}, + ), + ] + mg_sleep_and_assert_collection(expected_data_on_new_main, retrieve_data_show_replicas) + + interactive_mg_runner.start(inner_instances_description, "instance_3") + expected_data_on_new_main_old_alive = [ + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + ), + ( + "instance_3", + "127.0.0.1:10003", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + ), + ] + + mg_sleep_and_assert_collection(expected_data_on_new_main_old_alive, retrieve_data_show_replicas) + + interactive_mg_runner.start(inner_instances_description, "coordinator_3") + + +def test_no_leader_after_leader_and_follower_die(): + # 1. Register all but one replication instnce on the first leader. + # 2. Kill the leader and a follower. + # 3. Check that the remaining follower is not promoted to leader by trying to register remaining replication instance. + + safe_execute(shutil.rmtree, TEMP_DIR) + + interactive_mg_runner.start_all(MEMGRAPH_INSTANCES_DESCRIPTION) + + interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_3") + interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "coordinator_2") + + coord_cursor_1 = connect(host="localhost", port=7690).cursor() + + with pytest.raises(Exception) as e: + execute_and_fetch_all( + coord_cursor_1, + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + ) + assert str(e) == "Couldn't register replica instance since coordinator is not a leader!" + + +def test_old_main_comes_back_on_new_leader_as_main(): + # 1. Start all instances. + # 2. Kill all instances + # 3. Kill the leader + # 4. Start the old main instance + # 5. Run SHOW INSTANCES on the new leader and check that the old main instance is main once again + + safe_execute(shutil.rmtree, TEMP_DIR) + + inner_memgraph_instances = get_instances_description_no_setup() + interactive_mg_runner.start_all(inner_memgraph_instances) + + coord_cursor_3 = connect(host="localhost", port=7692).cursor() + + setup_queries = [ + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "SET INSTANCE instance_3 TO MAIN", + ] + + for query in setup_queries: + execute_and_fetch_all(coord_cursor_3, query) + + interactive_mg_runner.kill(inner_memgraph_instances, "instance_1") + interactive_mg_runner.kill(inner_memgraph_instances, "instance_2") + interactive_mg_runner.kill(inner_memgraph_instances, "instance_3") + interactive_mg_runner.kill(inner_memgraph_instances, "coordinator_3") + + coord_cursor_1 = connect(host="localhost", port=7690).cursor() + + def show_instances_coord1(): + return sorted(list(execute_and_fetch_all(coord_cursor_1, "SHOW INSTANCES;"))) + + coord_cursor_2 = connect(host="localhost", port=7691).cursor() + + def show_instances_coord2(): + return sorted(list(execute_and_fetch_all(coord_cursor_2, "SHOW INSTANCES;"))) + + interactive_mg_runner.start(inner_memgraph_instances, "instance_3") + + leader_data = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ] + + follower_data = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("instance_1", "", "", "unknown", "replica"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), + ] + mg_sleep_and_assert_any_function(leader_data, [show_instances_coord1, show_instances_coord2]) + mg_sleep_and_assert_any_function(follower_data, [show_instances_coord1, show_instances_coord2]) + + interactive_mg_runner.start(inner_memgraph_instances, "instance_1") + interactive_mg_runner.start(inner_memgraph_instances, "instance_2") + + new_main_cursor = connect(host="localhost", port=7689).cursor() + + def show_replicas(): + return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;"))) + + replicas = [ + ( + "instance_1", + "127.0.0.1:10001", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + ), + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + ), + ] + mg_sleep_and_assert_collection(replicas, show_replicas) + + execute_and_fetch_all(new_main_cursor, "CREATE (n:Node {name: 'node'})") + + replica_1_cursor = connect(host="localhost", port=7687).cursor() + assert len(execute_and_fetch_all(replica_1_cursor, "MATCH (n) RETURN n;")) == 1 + + replica_2_cursor = connect(host="localhost", port=7688).cursor() + assert len(execute_and_fetch_all(replica_2_cursor, "MATCH (n) RETURN n;")) == 1 + + interactive_mg_runner.start(inner_memgraph_instances, "coordinator_3") + + +def test_registering_4_coords(): + # Goal of this test is to assure registering of multiple coordinators in row works + safe_execute(shutil.rmtree, TEMP_DIR) + INSTANCES_DESCRIPTION = { + "instance_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7687", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10011", + ], + "log_file": "instance_1.log", + "data_directory": f"{TEMP_DIR}/instance_1", + "setup_queries": [], + }, + "instance_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7688", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10012", + ], + "log_file": "instance_2.log", + "data_directory": f"{TEMP_DIR}/instance_2", + "setup_queries": [], + }, + "instance_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7689", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10013", + ], + "log_file": "instance_3.log", + "data_directory": f"{TEMP_DIR}/instance_3", + "setup_queries": [], + }, + "coordinator_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7690", + "--log-level=TRACE", + "--raft-server-id=1", + "--raft-server-port=10111", + ], + "log_file": "coordinator1.log", + "setup_queries": [], + }, + "coordinator_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7691", + "--log-level=TRACE", + "--raft-server-id=2", + "--raft-server-port=10112", + ], + "log_file": "coordinator2.log", + "setup_queries": [], + }, + "coordinator_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7692", + "--log-level=TRACE", + "--raft-server-id=3", + "--raft-server-port=10113", + ], + "log_file": "coordinator3.log", + "setup_queries": [], + }, + "coordinator_4": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7693", + "--log-level=TRACE", + "--raft-server-id=4", + "--raft-server-port=10114", + ], + "log_file": "coordinator4.log", + "setup_queries": [ + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "ADD COORDINATOR 3 WITH CONFIG {'bolt_server': '127.0.0.1:7692', 'coordinator_server': '127.0.0.1:10113'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "SET INSTANCE instance_3 TO MAIN", + ], + }, + } + + interactive_mg_runner.start_all(INSTANCES_DESCRIPTION) + + coord_cursor = connect(host="localhost", port=7693).cursor() + + def retrieve_data_show_repl_cluster(): + return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("coordinator_4", "127.0.0.1:10114", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) + + +def test_registering_coord_log_store(): + # Goal of this test is to assure registering a bunch of instances and de-registering works properly + # w.r.t nuRaft log + # 1. Start basic instances # 3 logs + # 2. Check all is there + # 3. Create 3 additional instances and add them to cluster # 3 logs -> 1st snapshot + # 4. Check everything is there + # 5. Set main # 1 log + # 6. Check correct state + # 7. Drop 2 new instances # 2 logs + # 8. Check correct state + # 9. Drop 1 new instance # 1 log -> 2nd snapshot + # 10. Check correct state + safe_execute(shutil.rmtree, TEMP_DIR) + + INSTANCES_DESCRIPTION = { + "instance_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7687", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10011", + ], + "log_file": "instance_1.log", + "data_directory": f"{TEMP_DIR}/instance_1", + "setup_queries": [], + }, + "instance_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7688", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10012", + ], + "log_file": "instance_2.log", + "data_directory": f"{TEMP_DIR}/instance_2", + "setup_queries": [], + }, + "instance_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7689", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10013", + ], + "log_file": "instance_3.log", + "data_directory": f"{TEMP_DIR}/instance_3", + "setup_queries": [], + }, + "coordinator_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7690", + "--log-level=TRACE", + "--raft-server-id=1", + "--raft-server-port=10111", + ], + "log_file": "coordinator1.log", + "setup_queries": [], + }, + "coordinator_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7691", + "--log-level=TRACE", + "--raft-server-id=2", + "--raft-server-port=10112", + ], + "log_file": "coordinator2.log", + "setup_queries": [], + }, + "coordinator_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7692", + "--log-level=TRACE", + "--raft-server-id=3", + "--raft-server-port=10113", + ], + "log_file": "coordinator3.log", + "setup_queries": [], + }, + "coordinator_4": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7693", + "--log-level=TRACE", + "--raft-server-id=4", + "--raft-server-port=10114", + ], + "log_file": "coordinator4.log", + "setup_queries": [ + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "ADD COORDINATOR 3 WITH CONFIG {'bolt_server': '127.0.0.1:7692', 'coordinator_server': '127.0.0.1:10113'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + ], + }, + } + assert "SET INSTANCE instance_3 TO MAIN" not in INSTANCES_DESCRIPTION["coordinator_4"]["setup_queries"] + + # 1 + interactive_mg_runner.start_all(INSTANCES_DESCRIPTION) + + # 2 + coord_cursor = connect(host="localhost", port=7693).cursor() + + def retrieve_data_show_repl_cluster(): + return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) + + coordinators = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ("coordinator_4", "127.0.0.1:10114", "", "unknown", "coordinator"), + ] + + basic_instances = [ + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "replica"), + ] + + expected_data_on_coord = [] + expected_data_on_coord.extend(coordinators) + expected_data_on_coord.extend(basic_instances) + + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) + + # 3 + instances_ports_added = [10011, 10012, 10013] + bolt_port_id = 7700 + manag_port_id = 10014 + + additional_instances = [] + for i in range(4, 7): + instance_name = f"instance_{i}" + args_desc = [ + "--experimental-enabled=high-availability", + "--log-level=TRACE", + ] + + bolt_port = f"--bolt-port={bolt_port_id}" + + manag_server_port = f"--coordinator-server-port={manag_port_id}" + + args_desc.append(bolt_port) + args_desc.append(manag_server_port) + + instance_description = { + "args": args_desc, + "log_file": f"instance_{i}.log", + "data_directory": f"{TEMP_DIR}/instance_{i}", + "setup_queries": [], + } + + full_instance_desc = {instance_name: instance_description} + interactive_mg_runner.start(full_instance_desc, instance_name) + repl_port_id = manag_port_id - 10 + assert repl_port_id < 10011, "Wrong test setup, repl port must be smaller than smallest coord port id" + + bolt_server = f"127.0.0.1:{bolt_port_id}" + management_server = f"127.0.0.1:{manag_port_id}" + repl_server = f"127.0.0.1:{repl_port_id}" + + config_str = f"{{'bolt_server': '{bolt_server}', 'management_server': '{management_server}', 'replication_server': '{repl_server}'}}" + + execute_and_fetch_all( + coord_cursor, + f"REGISTER INSTANCE {instance_name} WITH CONFIG {config_str}", + ) + + additional_instances.append((f"{instance_name}", "", management_server, "up", "replica")) + instances_ports_added.append(manag_port_id) + manag_port_id += 1 + bolt_port_id += 1 + + # 4 + expected_data_on_coord.extend(additional_instances) + + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) + + # 5 + execute_and_fetch_all(coord_cursor, "SET INSTANCE instance_3 TO MAIN") + + # 6 + basic_instances.pop() + basic_instances.append(("instance_3", "", "127.0.0.1:10013", "up", "main")) + + new_expected_data_on_coordinator = [] + + new_expected_data_on_coordinator.extend(coordinators) + new_expected_data_on_coordinator.extend(basic_instances) + new_expected_data_on_coordinator.extend(additional_instances) + + mg_sleep_and_assert(new_expected_data_on_coordinator, retrieve_data_show_repl_cluster) + + # 7 + for i in range(6, 4, -1): + execute_and_fetch_all(coord_cursor, f"UNREGISTER INSTANCE instance_{i};") + additional_instances.pop() + + new_expected_data_on_coordinator = [] + new_expected_data_on_coordinator.extend(coordinators) + new_expected_data_on_coordinator.extend(basic_instances) + new_expected_data_on_coordinator.extend(additional_instances) + + # 8 + mg_sleep_and_assert(new_expected_data_on_coordinator, retrieve_data_show_repl_cluster) + + # 9 + + new_expected_data_on_coordinator = [] + new_expected_data_on_coordinator.extend(coordinators) + new_expected_data_on_coordinator.extend(basic_instances) + + execute_and_fetch_all(coord_cursor, f"UNREGISTER INSTANCE instance_4;") + + # 10 + mg_sleep_and_assert(new_expected_data_on_coordinator, retrieve_data_show_repl_cluster) + + +def test_multiple_failovers_in_row_no_leadership_change(): + # Goal of this test is to assure multiple failovers in row work without leadership change + # 1. Start basic instances + # 2. Check all is there + # 3. Kill MAIN (instance_3) + # 4. Expect failover (instance_1) + # 5. Kill instance_1 + # 6. Expect failover instance_2 + # 7. Start instance_3 + # 8. Expect instance_3 and instance_2 (MAIN) up + # 9. Kill instance_2 + # 10. Expect instance_3 MAIN + # 11. Write some data on instance_3 + # 12. Start instance_2 and instance_1 + # 13. Expect instance_1 and instance2 to be up and cluster to have correct state + # 13. Expect data to be replicated + + # 1 + inner_memgraph_instances = get_instances_description_no_setup() + interactive_mg_runner.start_all(inner_memgraph_instances, keep_directories=False) + + coord_cursor_3 = connect(host="localhost", port=7692).cursor() + + setup_queries = [ + "ADD COORDINATOR 1 WITH CONFIG {'bolt_server': '127.0.0.1:7690', 'coordinator_server': '127.0.0.1:10111'}", + "ADD COORDINATOR 2 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'coordinator_server': '127.0.0.1:10112'}", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "SET INSTANCE instance_3 TO MAIN", + ] + + for query in setup_queries: + execute_and_fetch_all(coord_cursor_3, query) + + # 2 + + def get_func_show_instances(cursor): + def show_instances_follower_coord(): + return sorted(list(execute_and_fetch_all(cursor, "SHOW INSTANCES;"))) + + return show_instances_follower_coord + + coordinator_data = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("coordinator_2", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("coordinator_3", "127.0.0.1:10113", "", "unknown", "coordinator"), + ] + + leader_data = [] + leader_data.extend(coordinator_data) + leader_data.extend( + [ + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ] + ) + + follower_data = [] + follower_data.extend(coordinator_data) + follower_data.extend( + [ + ("instance_1", "", "", "unknown", "replica"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), + ] + ) + + coord_cursor_1 = connect(host="localhost", port=7690).cursor() + coord_cursor_2 = connect(host="localhost", port=7691).cursor() + + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_1)) + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_2)) + mg_sleep_and_assert_collection(leader_data, get_func_show_instances(coord_cursor_3)) + + # 3 + + interactive_mg_runner.kill(inner_memgraph_instances, "instance_3") + + # 4 + + leader_data = [] + leader_data.extend(coordinator_data) + leader_data.extend( + [ + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ] + ) + + follower_data = [] + follower_data.extend(coordinator_data) + follower_data.extend( + [ + ("instance_1", "", "", "unknown", "main"), + ("instance_2", "", "", "unknown", "replica"), + ( + "instance_3", + "", + "", + "unknown", + "main", + ), # TODO(antoniofilipovic) change to unknown after PR with transitions + ] + ) + + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_1)) + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_2)) + mg_sleep_and_assert_collection(leader_data, get_func_show_instances(coord_cursor_3)) + + # 5 + interactive_mg_runner.kill(inner_memgraph_instances, "instance_1") + + # 6 + leader_data = [] + leader_data.extend(coordinator_data) + leader_data.extend( + [ + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ] + ) + + follower_data = [] + follower_data.extend(coordinator_data) + follower_data.extend( + [ + ("instance_1", "", "", "unknown", "main"), + ("instance_2", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown + ("instance_3", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown + ] + ) + + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_1)) + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_2)) + mg_sleep_and_assert_collection(leader_data, get_func_show_instances(coord_cursor_3)) + + # 7 + + interactive_mg_runner.start(inner_memgraph_instances, "instance_3") + + # 8 + + leader_data = [] + leader_data.extend(coordinator_data) + leader_data.extend( + [ + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("instance_3", "", "127.0.0.1:10013", "up", "replica"), + ] + ) + + follower_data = [] + follower_data.extend(coordinator_data) + follower_data.extend( + [ + ("instance_1", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown + ("instance_2", "", "", "unknown", "main"), + ("instance_3", "", "", "unknown", "replica"), + ] + ) + + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_1)) + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_2)) + mg_sleep_and_assert_collection(leader_data, get_func_show_instances(coord_cursor_3)) + + # 9 + interactive_mg_runner.kill(inner_memgraph_instances, "instance_2") + + # 10 + leader_data = [] + leader_data.extend(coordinator_data) + leader_data.extend( + [ + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ] + ) + + follower_data = [] + follower_data.extend(coordinator_data) + follower_data.extend( + [ + ("instance_1", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown + ("instance_2", "", "", "unknown", "main"), # TODO(antoniofilipovic) change to unknown + ("instance_3", "", "", "unknown", "main"), + ] + ) + + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_1)) + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_2)) + mg_sleep_and_assert_collection(leader_data, get_func_show_instances(coord_cursor_3)) + + # 11 + + instance_3_cursor = connect(port=7689, host="localhost").cursor() + + with pytest.raises(Exception) as e: + execute_and_fetch_all(instance_3_cursor, "CREATE ();") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + # 12 + interactive_mg_runner.start(inner_memgraph_instances, "instance_1") + interactive_mg_runner.start(inner_memgraph_instances, "instance_2") + + # 13 + leader_data = [] + leader_data.extend(coordinator_data) + leader_data.extend( + [ + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ] + ) + + follower_data = [] + follower_data.extend(coordinator_data) + follower_data.extend( + [ + ("instance_1", "", "", "unknown", "replica"), + ("instance_2", "", "", "unknown", "replica"), + ("instance_3", "", "", "unknown", "main"), + ] + ) + + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_1)) + mg_sleep_and_assert_collection(follower_data, get_func_show_instances(coord_cursor_2)) + mg_sleep_and_assert_collection(leader_data, get_func_show_instances(coord_cursor_3)) + + # 14. + + def show_replicas(): + return sorted(list(execute_and_fetch_all(instance_3_cursor, "SHOW REPLICAS;"))) + + replicas = [ + ( + "instance_1", + "127.0.0.1:10001", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 2, "behind": 0, "status": "ready"}}, + ), + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"ts": 0, "behind": None, "status": "ready"}, + {"memgraph": {"ts": 2, "behind": 0, "status": "ready"}}, + ), + ] + mg_sleep_and_assert_collection(replicas, show_replicas) + + def get_vertex_count_func(cursor): + def get_vertex_count(): + return execute_and_fetch_all(cursor, "MATCH (n) RETURN count(n)")[0][0] + + return get_vertex_count + + mg_sleep_and_assert(1, get_vertex_count_func(connect(port=7687, host="localhost").cursor())) + + mg_sleep_and_assert(1, get_vertex_count_func(connect(port=7688, host="localhost").cursor())) + + if __name__ == "__main__": sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/high_availability/not_replicate_from_old_main.py b/tests/e2e/high_availability/not_replicate_from_old_main.py index c2cc93cb1..d9729f650 100644 --- a/tests/e2e/high_availability/not_replicate_from_old_main.py +++ b/tests/e2e/high_availability/not_replicate_from_old_main.py @@ -75,8 +75,11 @@ def test_replication_works_on_failover(): {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, ), ] - actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) - assert actual_data_on_main == expected_data_on_main + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + mg_sleep_and_assert(expected_data_on_main, retrieve_data_show_replicas) # 3 interactive_mg_runner.start_all_keep_others(MEMGRAPH_SECOND_CLUSTER_DESCRIPTION) @@ -182,8 +185,8 @@ def test_not_replicate_old_main_register_new_cluster(): ], "log_file": "coordinator.log", "setup_queries": [ - "REGISTER INSTANCE shared_instance ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", + "REGISTER INSTANCE shared_instance WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", "SET INSTANCE instance_2 TO MAIN", ], }, @@ -200,9 +203,9 @@ def test_not_replicate_old_main_register_new_cluster(): return sorted(list(execute_and_fetch_all(first_cluster_coord_cursor, "SHOW INSTANCES;"))) expected_data_up_first_cluster = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_2", "", "127.0.0.1:10012", True, "main"), - ("shared_instance", "", "127.0.0.1:10011", True, "replica"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("shared_instance", "", "127.0.0.1:10011", "up", "replica"), ] mg_sleep_and_assert(expected_data_up_first_cluster, show_repl_cluster) @@ -241,10 +244,12 @@ def test_not_replicate_old_main_register_new_cluster(): interactive_mg_runner.start_all_keep_others(MEMGRAPH_SECOND_COORD_CLUSTER_DESCRIPTION) second_cluster_coord_cursor = connect(host="localhost", port=7691).cursor() execute_and_fetch_all( - second_cluster_coord_cursor, "REGISTER INSTANCE shared_instance ON '127.0.0.1:10011' WITH '127.0.0.1:10001';" + second_cluster_coord_cursor, + "REGISTER INSTANCE shared_instance WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", ) execute_and_fetch_all( - second_cluster_coord_cursor, "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';" + second_cluster_coord_cursor, + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", ) execute_and_fetch_all(second_cluster_coord_cursor, "SET INSTANCE instance_3 TO MAIN") @@ -254,9 +259,9 @@ def test_not_replicate_old_main_register_new_cluster(): return sorted(list(execute_and_fetch_all(second_cluster_coord_cursor, "SHOW INSTANCES;"))) expected_data_up_second_cluster = [ - ("coordinator_1", "127.0.0.1:10112", "", True, "coordinator"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), - ("shared_instance", "", "127.0.0.1:10011", True, "replica"), + ("coordinator_1", "127.0.0.1:10112", "", "unknown", "coordinator"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ("shared_instance", "", "127.0.0.1:10011", "up", "replica"), ] mg_sleep_and_assert(expected_data_up_second_cluster, show_repl_cluster) diff --git a/tests/e2e/high_availability/single_coordinator.py b/tests/e2e/high_availability/single_coordinator.py index ecf063092..1d839b4fc 100644 --- a/tests/e2e/high_availability/single_coordinator.py +++ b/tests/e2e/high_availability/single_coordinator.py @@ -37,6 +37,9 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "TRACE", "--coordinator-server-port", "10011", + "--replication-restore-state-on-startup=true", + "--storage-recover-on-startup=false", + "--data-recovery-on-startup=false", ], "log_file": "instance_1.log", "data_directory": f"{TEMP_DIR}/instance_1", @@ -51,6 +54,9 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "TRACE", "--coordinator-server-port", "10012", + "--replication-restore-state-on-startup=true", + "--storage-recover-on-startup=false", + "--data-recovery-on-startup=false", ], "log_file": "instance_2.log", "data_directory": f"{TEMP_DIR}/instance_2", @@ -65,6 +71,9 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { "TRACE", "--coordinator-server-port", "10013", + "--replication-restore-state-on-startup=true", + "--storage-recover-on-startup=false", + "--data-recovery-on-startup=false", ], "log_file": "instance_3.log", "data_directory": f"{TEMP_DIR}/instance_3", @@ -81,23 +90,1067 @@ MEMGRAPH_INSTANCES_DESCRIPTION = { ], "log_file": "coordinator.log", "setup_queries": [ - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN", ], }, } -def test_replication_works_on_failover(): +@pytest.mark.parametrize("data_recovery", ["false", "true"]) +def test_replication_works_on_failover_replica_1_epoch_2_commits_away(data_recovery): # Goal of this test is to check the replication works after failover command. - # 1. We start all replicas, main and coordinator manually: we want to be able to kill them ourselves without relying on external tooling to kill processes. + # 1. We start all replicas, main and coordinator manually + # 2. We check that main has correct state + # 3. Create initial data on MAIN + # 4. Expect data to be copied on all replicas + # 5. Kill instance_1 (replica 1) + # 6. Create data on MAIN and expect to be copied to only one replica (instance_2) + # 7. Kill main + # 8. Instance_2 new MAIN + # 9. Create vertex on instance 2 + # 10. Start instance_1(it should have one commit on old epoch and new epoch with new commit shouldn't be replicated) + # 11. Expect data to be copied on instance_1 + # 12. Start old MAIN (instance_3) + # 13. Expect data to be copied to instance_3 + + temp_dir = tempfile.TemporaryDirectory().name + + MEMGRAPH_INNER_INSTANCES_DESCRIPTION = { + "instance_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7688", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10011", + "--replication-restore-state-on-startup", + "true", + f"--data-recovery-on-startup={data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_1.log", + "data_directory": f"{temp_dir}/instance_1", + "setup_queries": [], + }, + "instance_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7689", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10012", + "--replication-restore-state-on-startup", + "true", + f"--data-recovery-on-startup={data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_2.log", + "data_directory": f"{temp_dir}/instance_2", + "setup_queries": [], + }, + "instance_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7687", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10013", + "--replication-restore-state-on-startup", + "true", + "--data-recovery-on-startup", + f"{data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_3.log", + "data_directory": f"{temp_dir}/instance_3", + "setup_queries": [], + }, + "coordinator": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7690", + "--log-level=TRACE", + "--raft-server-id=1", + "--raft-server-port=10111", + ], + "log_file": "coordinator.log", + "setup_queries": [ + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "SET INSTANCE instance_3 TO MAIN", + ], + }, + } + + # 1 + interactive_mg_runner.start_all(MEMGRAPH_INNER_INSTANCES_DESCRIPTION) + + # 2 + main_cursor = connect(host="localhost", port=7687).cursor() + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + expected_data_on_main = [ + ( + "instance_1", + "127.0.0.1:10001", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ] + mg_sleep_and_assert_collection(expected_data_on_main, retrieve_data_show_replicas) + + # 3 + execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:1});") + + # 4 + + instance_1_cursor = connect(host="localhost", port=7688).cursor() + instance_2_cursor = connect(host="localhost", port=7689).cursor() + + assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 1 + assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 1 + + # 5 + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1") + + # 6 + + with pytest.raises(Exception) as e: + execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:2});") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + + # 7 + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3") + + # 8. + coord_cursor = connect(host="localhost", port=7690).cursor() + + def retrieve_data_show_instances(): + return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 9 + + with pytest.raises(Exception) as e: + execute_and_fetch_all(instance_2_cursor, "CREATE (:Epoch3 {prop:3});") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + # 10 + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1") + + new_expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ] + mg_sleep_and_assert(new_expected_data_on_coord, retrieve_data_show_instances) + + # 11 + instance_1_cursor = connect(host="localhost", port=7688).cursor() + + def get_vertex_count(): + return execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(3, get_vertex_count) + + # 12 + + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3") + + new_expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("instance_3", "", "127.0.0.1:10013", "up", "replica"), + ] + mg_sleep_and_assert(new_expected_data_on_coord, retrieve_data_show_instances) + + # 13 + + instance_3_cursor = connect(host="localhost", port=7687).cursor() + + def get_vertex_count(): + return execute_and_fetch_all(instance_3_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(3, get_vertex_count) + + +@pytest.mark.parametrize("data_recovery", ["false", "true"]) +def test_replication_works_on_failover_replica_2_epochs_more_commits_away(data_recovery): + # Goal of this test is to check the replication works after failover command if one + # instance missed couple of epochs but data is still available on one of the instances + + # 1. We start all replicas, main and coordinator manually + # 2. Main does commit + # 3. instance_2 down + # 4. Main commits more + # 5. Main down + # 6. Instance_1 new main + # 7. Instance 1 commits + # 8. Instance 4 gets data + # 9. Instance 1 dies + # 10. Instance 4 new main + # 11. Instance 4 commits + # 12. Instance 2 wakes up + # 13. Instance 2 gets data from old epochs + # 14. All other instances wake up + # 15. Everything is replicated + + temp_dir = tempfile.TemporaryDirectory().name + + MEMGRAPH_INNER_INSTANCES_DESCRIPTION = { + "instance_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7688", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10011", + "--replication-restore-state-on-startup", + "true", + f"--data-recovery-on-startup={data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_1.log", + "data_directory": f"{temp_dir}/instance_1", + "setup_queries": [], + }, + "instance_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7689", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10012", + "--replication-restore-state-on-startup", + "true", + f"--data-recovery-on-startup={data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_2.log", + "data_directory": f"{temp_dir}/instance_2", + "setup_queries": [], + }, + "instance_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7687", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10013", + "--replication-restore-state-on-startup", + "true", + "--data-recovery-on-startup", + f"{data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_3.log", + "data_directory": f"{temp_dir}/instance_3", + "setup_queries": [], + }, + "instance_4": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7691", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10014", + "--replication-restore-state-on-startup", + "true", + "--data-recovery-on-startup", + f"{data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_4.log", + "data_directory": f"{temp_dir}/instance_4", + "setup_queries": [], + }, + "coordinator": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7690", + "--log-level=TRACE", + "--raft-server-id=1", + "--raft-server-port=10111", + ], + "log_file": "coordinator.log", + "setup_queries": [ + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "REGISTER INSTANCE instance_4 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'management_server': '127.0.0.1:10014', 'replication_server': '127.0.0.1:10004'};", + "SET INSTANCE instance_3 TO MAIN", + ], + }, + } + + # 1 + + interactive_mg_runner.start_all(MEMGRAPH_INNER_INSTANCES_DESCRIPTION) + + expected_data_on_main = [ + ( + "instance_1", + "127.0.0.1:10001", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ( + "instance_4", + "127.0.0.1:10004", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ] + + main_cursor = connect(host="localhost", port=7687).cursor() + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + mg_sleep_and_assert_collection(expected_data_on_main, retrieve_data_show_replicas) + + # 2 + + execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:1});") + execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:2});") + + instance_1_cursor = connect(host="localhost", port=7688).cursor() + instance_2_cursor = connect(host="localhost", port=7689).cursor() + instance_4_cursor = connect(host="localhost", port=7691).cursor() + + assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + + # 3 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_2") + + coord_cursor = connect(host="localhost", port=7690).cursor() + + def retrieve_data_show_instances(): + return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ("instance_4", "", "127.0.0.1:10014", "up", "replica"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 4 + + with pytest.raises(Exception) as e: + execute_and_fetch_all(main_cursor, "CREATE (:EpochVertex1 {prop:1});") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 3 + assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 3 + + # 5 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3") + + # 6 + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "up", "replica"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 7 + + with pytest.raises(Exception) as e: + execute_and_fetch_all(instance_1_cursor, "CREATE (:Epoch2Vertex {prop:1});") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + # 8 + + assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 4 + + # 9 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1") + + # 10 + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "up", "main"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 11 + + with pytest.raises(Exception) as e: + execute_and_fetch_all(instance_4_cursor, "CREATE (:Epoch3Vertex {prop:1});") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + # 12 + + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_2") + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "up", "main"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 13 + + instance_2_cursor = connect(host="localhost", port=7689).cursor() + + def get_vertex_count(): + return execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(5, get_vertex_count) + + # 14 + + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1") + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3") + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "replica"), + ("instance_4", "", "127.0.0.1:10014", "up", "main"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 15 + instance_1_cursor = connect(host="localhost", port=7688).cursor() + instance_4_cursor = connect(host="localhost", port=7691).cursor() + + def get_vertex_count(): + return execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(5, get_vertex_count) + + def get_vertex_count(): + return execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(5, get_vertex_count) + + +@pytest.mark.parametrize("data_recovery", ["true"]) +def test_replication_forcefully_works_on_failover_replica_misses_epoch(data_recovery): + # Goal of this test is to check the replication works forcefully if replica misses epoch + # 1. We start all replicas, main and coordinator manually + # 2. We check that main has correct state + # 3. Create initial data on MAIN + # 4. Expect data to be copied on all replicas + # 5. Kill instance_1 ( this one will miss complete epoch) + # 6. Kill main (instance_3) + # 7. Instance_2 + # 8. Instance_2 commits + # 9. Instance_2 down + # 10. instance_4 down + # 11. Instance 1 up (missed epoch) + # 12 Instance 1 new main + # 13 instance 2 up + # 14 Force data from instance 1 to instance 2 + + temp_dir = tempfile.TemporaryDirectory().name + + MEMGRAPH_INNER_INSTANCES_DESCRIPTION = { + "instance_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7688", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10011", + "--replication-restore-state-on-startup", + "true", + f"--data-recovery-on-startup={data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_1.log", + "data_directory": f"{temp_dir}/instance_1", + "setup_queries": [], + }, + "instance_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7689", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10012", + "--replication-restore-state-on-startup", + "true", + f"--data-recovery-on-startup={data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_2.log", + "data_directory": f"{temp_dir}/instance_2", + "setup_queries": [], + }, + "instance_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7687", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10013", + "--replication-restore-state-on-startup", + "true", + "--data-recovery-on-startup", + f"{data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_3.log", + "data_directory": f"{temp_dir}/instance_3", + "setup_queries": [], + }, + "instance_4": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7691", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10014", + "--replication-restore-state-on-startup", + "true", + "--data-recovery-on-startup", + f"{data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_4.log", + "data_directory": f"{temp_dir}/instance_4", + "setup_queries": [], + }, + "coordinator": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7690", + "--log-level=TRACE", + "--raft-server-id=1", + "--raft-server-port=10111", + ], + "log_file": "coordinator.log", + "setup_queries": [ + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "REGISTER INSTANCE instance_4 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'management_server': '127.0.0.1:10014', 'replication_server': '127.0.0.1:10004'};", + "SET INSTANCE instance_3 TO MAIN", + ], + }, + } + + # 1 + + interactive_mg_runner.start_all(MEMGRAPH_INNER_INSTANCES_DESCRIPTION) + + # 2 + + main_cursor = connect(host="localhost", port=7687).cursor() + expected_data_on_main = [ + ( + "instance_1", + "127.0.0.1:10001", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ( + "instance_4", + "127.0.0.1:10004", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ] + + main_cursor = connect(host="localhost", port=7687).cursor() + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + mg_sleep_and_assert_collection(expected_data_on_main, retrieve_data_show_replicas) + + coord_cursor = connect(host="localhost", port=7690).cursor() + + def retrieve_data_show_instances(): + return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ("instance_4", "", "127.0.0.1:10014", "up", "replica"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 3 + + execute_and_fetch_all(main_cursor, "CREATE (:Epoch1Vertex {prop:1});") + execute_and_fetch_all(main_cursor, "CREATE (:Epoch1Vertex {prop:2});") + + # 4 + instance_1_cursor = connect(host="localhost", port=7688).cursor() + instance_2_cursor = connect(host="localhost", port=7689).cursor() + instance_4_cursor = connect(host="localhost", port=7691).cursor() + + assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + + # 5 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1") + + # 6 + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3") + + # 7 + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "up", "replica"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 8 + + with pytest.raises(Exception) as e: + execute_and_fetch_all(instance_2_cursor, "CREATE (:Epoch2Vertex {prop:1});") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + def get_vertex_count(): + return execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(3, get_vertex_count) + + assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 3 + + # 9 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_2") + + # 10 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_4") + + # 11 + + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1") + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "down", "unknown"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 12 + + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_2") + + # 13 + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "down", "unknown"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 12 + instance_1_cursor = connect(host="localhost", port=7688).cursor() + instance_2_cursor = connect(host="localhost", port=7689).cursor() + + def get_vertex_count(): + return execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(2, get_vertex_count) + + def get_vertex_count(): + return execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(2, get_vertex_count) + + # 13 + with pytest.raises(Exception) as e: + execute_and_fetch_all(instance_1_cursor, "CREATE (:Epoch3Vertex {prop:1});") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + # 14 + + def get_vertex_objects_func_creator(cursor): + def get_vertex_objects(): + return list( + execute_and_fetch_all( + cursor, "MATCH (n) " "WITH labels(n) as labels, properties(n) as props " "RETURN labels[0], props;" + ) + ) + + return get_vertex_objects + + vertex_objects = [("Epoch1Vertex", {"prop": 1}), ("Epoch1Vertex", {"prop": 2}), ("Epoch3Vertex", {"prop": 1})] + + mg_sleep_and_assert_collection(vertex_objects, get_vertex_objects_func_creator(instance_1_cursor)) + + mg_sleep_and_assert_collection(vertex_objects, get_vertex_objects_func_creator(instance_2_cursor)) + + # 15 + + +@pytest.mark.parametrize("data_recovery", ["false", "true"]) +def test_replication_correct_replica_chosen_up_to_date_data(data_recovery): + # Goal of this test is to check that correct replica instance as new MAIN is chosen + # 1. We start all replicas, main and coordinator manually + # 2. We check that main has correct state + # 3. Create initial data on MAIN + # 4. Expect data to be copied on all replicas + # 5. Kill instance_1 ( this one will miss complete epoch) + # 6. Kill main (instance_3) + # 7. Instance_2 new MAIN + # 8. Instance_2 commits and replicates data + # 9. Instance_4 down (not main) + # 10. instance_2 down (MAIN), instance 1 up (missed epoch), + # instance 4 up (In this case we should always choose instance_4 because it has up-to-date data) + # 11 Instance 4 new main + # 12 instance_1 gets up-to-date data, instance_4 has all data + + temp_dir = tempfile.TemporaryDirectory().name + + MEMGRAPH_INNER_INSTANCES_DESCRIPTION = { + "instance_1": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7688", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10011", + "--replication-restore-state-on-startup", + "true", + f"--data-recovery-on-startup={data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_1.log", + "data_directory": f"{temp_dir}/instance_1", + "setup_queries": [], + }, + "instance_2": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7689", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10012", + "--replication-restore-state-on-startup", + "true", + f"--data-recovery-on-startup={data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_2.log", + "data_directory": f"{temp_dir}/instance_2", + "setup_queries": [], + }, + "instance_3": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7687", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10013", + "--replication-restore-state-on-startup", + "true", + "--data-recovery-on-startup", + f"{data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_3.log", + "data_directory": f"{temp_dir}/instance_3", + "setup_queries": [], + }, + "instance_4": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7691", + "--log-level", + "TRACE", + "--coordinator-server-port", + "10014", + "--replication-restore-state-on-startup", + "true", + "--data-recovery-on-startup", + f"{data_recovery}", + "--storage-recover-on-startup=false", + ], + "log_file": "instance_4.log", + "data_directory": f"{temp_dir}/instance_4", + "setup_queries": [], + }, + "coordinator": { + "args": [ + "--experimental-enabled=high-availability", + "--bolt-port", + "7690", + "--log-level=TRACE", + "--raft-server-id=1", + "--raft-server-port=10111", + ], + "log_file": "coordinator.log", + "setup_queries": [ + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", + "REGISTER INSTANCE instance_4 WITH CONFIG {'bolt_server': '127.0.0.1:7691', 'management_server': '127.0.0.1:10014', 'replication_server': '127.0.0.1:10004'};", + "SET INSTANCE instance_3 TO MAIN", + ], + }, + } + + # 1 + + interactive_mg_runner.start_all(MEMGRAPH_INNER_INSTANCES_DESCRIPTION) + + # 2 + + main_cursor = connect(host="localhost", port=7687).cursor() + expected_data_on_main = [ + ( + "instance_1", + "127.0.0.1:10001", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ( + "instance_2", + "127.0.0.1:10002", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ( + "instance_4", + "127.0.0.1:10004", + "sync", + {"behind": None, "status": "ready", "ts": 0}, + {"memgraph": {"behind": 0, "status": "ready", "ts": 0}}, + ), + ] + + main_cursor = connect(host="localhost", port=7687).cursor() + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + mg_sleep_and_assert_collection(expected_data_on_main, retrieve_data_show_replicas) + + coord_cursor = connect(host="localhost", port=7690).cursor() + + def retrieve_data_show_instances(): + return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) + + # TODO(antoniofilipovic) Before fixing durability, if this is removed we also have an issue. Check after fix + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), + ("instance_4", "", "127.0.0.1:10014", "up", "replica"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 3 + + execute_and_fetch_all(main_cursor, "CREATE (:Epoch1Vertex {prop:1});") + execute_and_fetch_all(main_cursor, "CREATE (:Epoch1Vertex {prop:2});") + + # 4 + instance_1_cursor = connect(host="localhost", port=7688).cursor() + instance_2_cursor = connect(host="localhost", port=7689).cursor() + instance_4_cursor = connect(host="localhost", port=7691).cursor() + + assert execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + assert execute_and_fetch_all(instance_2_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 2 + + # 5 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1") + + # 6 + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_3") + + # 7 + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "up", "replica"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 8 + + with pytest.raises(Exception) as e: + execute_and_fetch_all(instance_2_cursor, "CREATE (:Epoch2Vertex {prop:1});") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + + def get_vertex_count(): + return execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(3, get_vertex_count) + + assert execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n);")[0][0] == 3 + + # 9 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_4") + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "main"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "down", "unknown"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 10 + + interactive_mg_runner.kill(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_2") + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_1") + interactive_mg_runner.start(MEMGRAPH_INNER_INSTANCES_DESCRIPTION, "instance_4") + + # 11 + + expected_data_on_coord = [ + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), + ("instance_4", "", "127.0.0.1:10014", "up", "main"), + ] + mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_instances) + + # 12 + instance_1_cursor = connect(host="localhost", port=7688).cursor() + instance_4_cursor = connect(host="localhost", port=7691).cursor() + + def get_vertex_count(): + return execute_and_fetch_all(instance_1_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(3, get_vertex_count) + + def get_vertex_count(): + return execute_and_fetch_all(instance_4_cursor, "MATCH (n) RETURN count(n)")[0][0] + + mg_sleep_and_assert(3, get_vertex_count) + + +def test_replication_works_on_failover_simple(): + # Goal of this test is to check the replication works after failover command. + # 1. We start all replicas, main and coordinator manually # 2. We check that main has correct state # 3. We kill main # 4. We check that coordinator and new main have correct state # 5. We insert one vertex on new main # 6. We check that vertex appears on new replica + # 7. We bring back main up + # 8. Expect data to be copied to main safe_execute(shutil.rmtree, TEMP_DIR) # 1 @@ -121,8 +1174,11 @@ def test_replication_works_on_failover(): {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, ), ] - actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) - assert actual_data_on_main == expected_data_on_main + + def main_cursor_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + mg_sleep_and_assert_collection(expected_data_on_main, main_cursor_show_replicas) # 3 interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") @@ -134,10 +1190,10 @@ def test_replication_works_on_failover(): return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) expected_data_on_coord = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "main"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", False, "unknown"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), ] mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) @@ -164,33 +1220,48 @@ def test_replication_works_on_failover(): ] mg_sleep_and_assert_collection(expected_data_on_new_main, retrieve_data_show_replicas) + # 5 + with pytest.raises(Exception) as e: + execute_and_fetch_all(new_main_cursor, "CREATE ();") + assert "At least one SYNC replica has not confirmed committing last transaction." in str(e.value) + # 6 + alive_replica_cursor = connect(host="localhost", port=7689).cursor() + res = execute_and_fetch_all(alive_replica_cursor, "MATCH (n) RETURN count(n) as count;")[0][0] + assert res == 1, "Vertex should be replicated" + + # 7 interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") + + def retrieve_data_show_replicas(): + return sorted(list(execute_and_fetch_all(new_main_cursor, "SHOW REPLICAS;"))) + + new_main_cursor = connect(host="localhost", port=7688).cursor() + expected_data_on_new_main = [ ( "instance_2", "127.0.0.1:10002", "sync", {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + {"memgraph": {"ts": 2, "behind": 0, "status": "ready"}}, ), ( "instance_3", "127.0.0.1:10003", "sync", {"ts": 0, "behind": None, "status": "ready"}, - {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, + {"memgraph": {"ts": 2, "behind": 0, "status": "ready"}}, ), ] - mg_sleep_and_assert_collection(expected_data_on_new_main, retrieve_data_show_replicas) + mg_sleep_and_assert(expected_data_on_new_main, retrieve_data_show_replicas) - # 5 - execute_and_fetch_all(new_main_cursor, "CREATE ();") + # 8 + alive_main = connect(host="localhost", port=7687).cursor() - # 6 - alive_replica_cursror = connect(host="localhost", port=7689).cursor() - res = execute_and_fetch_all(alive_replica_cursror, "MATCH (n) RETURN count(n) as count;")[0][0] - assert res == 1, "Vertex should be replicated" - interactive_mg_runner.stop_all(MEMGRAPH_INSTANCES_DESCRIPTION) + def retrieve_vertices_count(): + return execute_and_fetch_all(alive_main, "MATCH (n) RETURN count(n) as count;")[0][0] + + mg_sleep_and_assert(1, retrieve_vertices_count) def test_replication_works_on_replica_instance_restart(): @@ -224,8 +1295,11 @@ def test_replication_works_on_replica_instance_restart(): {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, ), ] - actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) - assert actual_data_on_main == expected_data_on_main + + def main_cursor_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + mg_sleep_and_assert_collection(expected_data_on_main, main_cursor_show_replicas) # 3 coord_cursor = connect(host="localhost", port=7690).cursor() @@ -236,10 +1310,10 @@ def test_replication_works_on_replica_instance_restart(): return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) expected_data_on_coord = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", False, "unknown"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert_collection(expected_data_on_coord, retrieve_data_show_repl_cluster) @@ -302,10 +1376,10 @@ def test_replication_works_on_replica_instance_restart(): return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) expected_data_on_coord = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) @@ -350,10 +1424,10 @@ def test_show_instances(): return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) expected_data = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data, show_repl_cluster) @@ -373,20 +1447,20 @@ def test_show_instances(): interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") expected_data = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", False, "unknown"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data, show_repl_cluster) interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2") expected_data = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", False, "unknown"), - ("instance_2", "", "127.0.0.1:10012", False, "unknown"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data, show_repl_cluster) @@ -412,8 +1486,11 @@ def test_simple_automatic_failover(): {"memgraph": {"ts": 0, "behind": 0, "status": "ready"}}, ), ] - actual_data_on_main = sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) - assert actual_data_on_main == sorted(expected_data_on_main) + + def main_cursor_show_replicas(): + return sorted(list(execute_and_fetch_all(main_cursor, "SHOW REPLICAS;"))) + + mg_sleep_and_assert_collection(expected_data_on_main, main_cursor_show_replicas) interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") @@ -423,10 +1500,10 @@ def test_simple_automatic_failover(): return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) expected_data_on_coord = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "main"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", False, "unknown"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), ] mg_sleep_and_assert(expected_data_on_coord, retrieve_data_show_repl_cluster) @@ -482,7 +1559,7 @@ def test_registering_replica_fails_name_exists(): with pytest.raises(Exception) as e: execute_and_fetch_all( coord_cursor, - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10051' WITH '127.0.0.1:10111';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7693', 'management_server': '127.0.0.1:10051', 'replication_server': '127.0.0.1:10111'};", ) assert str(e.value) == "Couldn't register replica instance since instance with such name already exists!" shutil.rmtree(TEMP_DIR) @@ -496,9 +1573,12 @@ def test_registering_replica_fails_endpoint_exists(): with pytest.raises(Exception) as e: execute_and_fetch_all( coord_cursor, - "REGISTER INSTANCE instance_5 ON '127.0.0.1:10011' WITH '127.0.0.1:10005';", + "REGISTER INSTANCE instance_5 WITH CONFIG {'bolt_server': '127.0.0.1:7693', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10005'};", ) - assert str(e.value) == "Couldn't register replica instance since instance with such endpoint already exists!" + assert ( + str(e.value) + == "Couldn't register replica instance since instance with such coordinator endpoint already exists!" + ) def test_replica_instance_restarts(): @@ -511,20 +1591,20 @@ def test_replica_instance_restarts(): return sorted(list(execute_and_fetch_all(cursor, "SHOW INSTANCES;"))) expected_data_up = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data_up, show_repl_cluster) interactive_mg_runner.kill(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_1") expected_data_down = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", False, "unknown"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data_down, show_repl_cluster) @@ -553,18 +1633,18 @@ def test_automatic_failover_main_back_as_replica(): return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) expected_data_after_failover = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "main"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", False, "unknown"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), ] mg_sleep_and_assert(expected_data_after_failover, retrieve_data_show_repl_cluster) expected_data_after_main_coming_back = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "main"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "replica"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "main"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "replica"), ] interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") @@ -592,20 +1672,20 @@ def test_automatic_failover_main_back_as_main(): return sorted(list(execute_and_fetch_all(coord_cursor, "SHOW INSTANCES;"))) expected_data_all_down = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", False, "unknown"), - ("instance_2", "", "127.0.0.1:10012", False, "unknown"), - ("instance_3", "", "127.0.0.1:10013", False, "unknown"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "down", "unknown"), ] mg_sleep_and_assert(expected_data_all_down, retrieve_data_show_repl_cluster) interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_3") expected_data_main_back = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", False, "unknown"), - ("instance_2", "", "127.0.0.1:10012", False, "unknown"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "down", "unknown"), + ("instance_2", "", "127.0.0.1:10012", "down", "unknown"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data_main_back, retrieve_data_show_repl_cluster) @@ -620,10 +1700,10 @@ def test_automatic_failover_main_back_as_main(): interactive_mg_runner.start(MEMGRAPH_INSTANCES_DESCRIPTION, "instance_2") expected_data_replicas_back = [ - ("coordinator_1", "127.0.0.1:10111", "", True, "coordinator"), - ("instance_1", "", "127.0.0.1:10011", True, "replica"), - ("instance_2", "", "127.0.0.1:10012", True, "replica"), - ("instance_3", "", "127.0.0.1:10013", True, "main"), + ("coordinator_1", "127.0.0.1:10111", "", "unknown", "coordinator"), + ("instance_1", "", "127.0.0.1:10011", "up", "replica"), + ("instance_2", "", "127.0.0.1:10012", "up", "replica"), + ("instance_3", "", "127.0.0.1:10013", "up", "main"), ] mg_sleep_and_assert(expected_data_replicas_back, retrieve_data_show_repl_cluster) diff --git a/tests/e2e/high_availability/workloads.yaml b/tests/e2e/high_availability/workloads.yaml index 75f17b2f7..aaf76fc6b 100644 --- a/tests/e2e/high_availability/workloads.yaml +++ b/tests/e2e/high_availability/workloads.yaml @@ -16,9 +16,9 @@ ha_cluster: &ha_cluster args: ["--experimental-enabled=high-availability", "--bolt-port", "7690", "--log-level=TRACE", "--raft-server-id=1", "--raft-server-port=10111"] log_file: "replication-e2e-coordinator.log" setup_queries: [ - "REGISTER INSTANCE instance_1 ON '127.0.0.1:10011' WITH '127.0.0.1:10001';", - "REGISTER INSTANCE instance_2 ON '127.0.0.1:10012' WITH '127.0.0.1:10002';", - "REGISTER INSTANCE instance_3 ON '127.0.0.1:10013' WITH '127.0.0.1:10003';", + "REGISTER INSTANCE instance_1 WITH CONFIG {'bolt_server': '127.0.0.1:7688', 'management_server': '127.0.0.1:10011', 'replication_server': '127.0.0.1:10001'};", + "REGISTER INSTANCE instance_2 WITH CONFIG {'bolt_server': '127.0.0.1:7689', 'management_server': '127.0.0.1:10012', 'replication_server': '127.0.0.1:10002'};", + "REGISTER INSTANCE instance_3 WITH CONFIG {'bolt_server': '127.0.0.1:7687', 'management_server': '127.0.0.1:10013', 'replication_server': '127.0.0.1:10003'};", "SET INSTANCE instance_3 TO MAIN;" ] diff --git a/tests/e2e/load_csv/load_csv.py b/tests/e2e/load_csv/load_csv.py index 371803ed1..6483676e6 100644 --- a/tests/e2e/load_csv/load_csv.py +++ b/tests/e2e/load_csv/load_csv.py @@ -53,8 +53,45 @@ def test_given_one_row_in_db_when_load_csv_after_match_then_pass(): assert len(list(results)) == 4 -def test_load_csv_with_parameters(): +def test_creating_labels_with_load_csv_variable(): memgraph = Memgraph("localhost", 7687) + + results = list( + memgraph.execute_and_fetch( + f"""LOAD CSV FROM '{get_file_path(SIMPLE_CSV_FILE)}' WITH HEADER AS row + CREATE (p:row.name) + RETURN p + """ + ) + ) + + assert len(results) == 4 + assert results[0]["p"]._labels == {"Joseph"} + assert results[1]["p"]._labels == {"Peter"} + assert results[2]["p"]._labels == {"Ella"} + assert results[3]["p"]._labels == {"Joe"} + + +def test_create_relationships_with_load_csv_variable2(): + memgraph = Memgraph("localhost", 7687) + + results = list( + memgraph.execute_and_fetch( + f"""LOAD CSV FROM '{get_file_path(SIMPLE_CSV_FILE)}' WITH HEADER AS row + CREATE (p:row.name:Person:row.id) + RETURN p + """ + ) + ) + + assert len(results) == 4 + assert results[0]["p"]._labels == {"Joseph", "Person", "1"} + assert results[1]["p"]._labels == {"Peter", "Person", "2"} + assert results[2]["p"]._labels == {"Ella", "Person", "3"} + assert results[3]["p"]._labels == {"Joe", "Person", "4"} + + +def test_load_csv_with_parameters(): URI = "bolt://localhost:7687" AUTH = ("", "") diff --git a/tests/e2e/memory/workloads.yaml b/tests/e2e/memory/workloads.yaml index bf29e484c..c043e03d8 100644 --- a/tests/e2e/memory/workloads.yaml +++ b/tests/e2e/memory/workloads.yaml @@ -52,26 +52,26 @@ in_memory_query_limit_cluster: &in_memory_query_limit_cluster setup_queries: [] validation_queries: [] -args_450_MiB_limit: &args_450_MiB_limit +args_350_MiB_limit: &args_350_MiB_limit - "--bolt-port" - *bolt_port - - "--memory-limit=450" + - "--memory-limit=350" - "--storage-gc-cycle-sec=180" - "--log-level=INFO" -in_memory_450_MiB_limit_cluster: &in_memory_450_MiB_limit_cluster +in_memory_350_MiB_limit_cluster: &in_memory_350_MiB_limit_cluster cluster: main: - args: *args_450_MiB_limit + args: *args_350_MiB_limit log_file: "memory-e2e.log" setup_queries: [] validation_queries: [] -disk_450_MiB_limit_cluster: &disk_450_MiB_limit_cluster +disk_350_MiB_limit_cluster: &disk_350_MiB_limit_cluster cluster: main: - args: *args_450_MiB_limit + args: *args_350_MiB_limit log_file: "memory-e2e.log" setup_queries: [] validation_queries: [] @@ -192,22 +192,22 @@ workloads: - name: "Memory control for accumulation" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_accumulation" args: ["--bolt-port", *bolt_port] - <<: *in_memory_450_MiB_limit_cluster + <<: *in_memory_350_MiB_limit_cluster - name: "Memory control for accumulation on disk storage" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_accumulation" args: ["--bolt-port", *bolt_port] - <<: *disk_450_MiB_limit_cluster + <<: *disk_350_MiB_limit_cluster - name: "Memory control for edge create" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_edge_create" args: ["--bolt-port", *bolt_port] - <<: *in_memory_450_MiB_limit_cluster + <<: *in_memory_350_MiB_limit_cluster - name: "Memory control for edge create on disk storage" binary: "tests/e2e/memory/memgraph__e2e__memory__limit_edge_create" args: ["--bolt-port", *bolt_port] - <<: *disk_450_MiB_limit_cluster + <<: *disk_350_MiB_limit_cluster - name: "Memory control for memory limit global thread alloc" binary: "tests/e2e/memory/memgraph__e2e__memory_limit_global_thread_alloc_proc" diff --git a/tests/e2e/mg_utils.py b/tests/e2e/mg_utils.py index 3a475bf3c..7279f25f2 100644 --- a/tests/e2e/mg_utils.py +++ b/tests/e2e/mg_utils.py @@ -17,6 +17,28 @@ def mg_sleep_and_assert(expected_value, function_to_retrieve_data, max_duration= return result +def mg_sleep_and_assert_any_function( + expected_value, functions_to_retrieve_data, max_duration=20, time_between_attempt=0.2 +): + result = [f() for f in functions_to_retrieve_data] + if any((x == expected_value for x in result)): + return result + start_time = time.time() + while result != expected_value: + duration = time.time() - start_time + if duration > max_duration: + assert ( + False + ), f" mg_sleep_and_assert has tried for too long and did not get the expected result! Last result was: {result}" + + time.sleep(time_between_attempt) + result = [f() for f in functions_to_retrieve_data] + if any((x == expected_value for x in result)): + return result + + return result + + def mg_sleep_and_assert_collection( expected_value, function_to_retrieve_data, max_duration=20, time_between_attempt=0.2 ): diff --git a/tests/e2e/module_file_manager/module_file_manager.cpp b/tests/e2e/module_file_manager/module_file_manager.cpp index 20641b9d7..73508d81b 100644 --- a/tests/e2e/module_file_manager/module_file_manager.cpp +++ b/tests/e2e/module_file_manager/module_file_manager.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -126,7 +126,7 @@ std::filesystem::path CreateModuleFile(auto &client, const std::string_view file } std::string GetModuleFileQuery(const std::filesystem::path &path) { - return fmt::format("CALL mg.get_module_file({}) YIELD content", path); + return fmt::format("CALL mg.get_module_file('{}') YIELD content", path); } std::string GetModuleFile(auto &client, const std::filesystem::path &path) { @@ -141,7 +141,7 @@ std::string GetModuleFile(auto &client, const std::filesystem::path &path) { } std::string UpdateModuleFileQuery(const std::filesystem::path &path, const std::string_view content) { - return fmt::format("CALL mg.update_module_file({}, '{}')", path, content); + return fmt::format("CALL mg.update_module_file('{}', '{}')", path, content); } void UpdateModuleFile(auto &client, const std::filesystem::path &path, const std::string_view content) { @@ -151,7 +151,7 @@ void UpdateModuleFile(auto &client, const std::filesystem::path &path, const std } std::string DeleteModuleFileQuery(const std::filesystem::path &path) { - return fmt::format("CALL mg.delete_module_file({})", path); + return fmt::format("CALL mg.delete_module_file('{}')", path); } void DeleteModuleFile(auto &client, const std::filesystem::path &path) { diff --git a/tests/e2e/replication/common.hpp b/tests/e2e/replication/common.hpp index f5113ac37..1938eb0f3 100644 --- a/tests/e2e/replication/common.hpp +++ b/tests/e2e/replication/common.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -34,12 +34,13 @@ DEFINE_double(reads_duration_limit, 10.0, "How long should the client perform re namespace mg::e2e::replication { auto ParseDatabaseEndpoints(const std::string &database_endpoints_str) { - const auto db_endpoints_strs = memgraph::utils::Split(database_endpoints_str, ","); + const auto db_endpoints_strs = memgraph::utils::SplitView(database_endpoints_str, ","); std::vector<memgraph::io::network::Endpoint> database_endpoints; for (const auto &db_endpoint_str : db_endpoints_strs) { - const auto maybe_host_port = memgraph::io::network::Endpoint::ParseSocketOrIpAddress(db_endpoint_str, 7687); + const auto maybe_host_port = memgraph::io::network::Endpoint::ParseSocketOrAddress(db_endpoint_str, 7687); MG_ASSERT(maybe_host_port); - database_endpoints.emplace_back(maybe_host_port->first, maybe_host_port->second); + auto const [ip, port] = *maybe_host_port; + database_endpoints.emplace_back(std::string(ip), port); } return database_endpoints; } diff --git a/tests/e2e/replication/show_while_creating_invalid_state.py b/tests/e2e/replication/show_while_creating_invalid_state.py index be7cd2b54..963aad7fd 100644 --- a/tests/e2e/replication/show_while_creating_invalid_state.py +++ b/tests/e2e/replication/show_while_creating_invalid_state.py @@ -923,7 +923,7 @@ def test_replication_role_recovery(connection): "--log-level=TRACE", "--replication-restore-state-on-startup", "true", - "--storage-recover-on-startup", + "--data-recovery-on-startup", "false", ], "log_file": "replica.log", @@ -934,7 +934,7 @@ def test_replication_role_recovery(connection): "--bolt-port", "7687", "--log-level=TRACE", - "--storage-recover-on-startup=true", + "--data-recovery-on-startup=true", "--replication-restore-state-on-startup=true", ], "log_file": "main.log", @@ -1105,7 +1105,7 @@ def test_basic_recovery_when_replica_is_kill_when_main_is_down(): "--bolt-port", "7687", "--log-level=TRACE", - "--storage-recover-on-startup=true", + "--data-recovery-on-startup=true", "--replication-restore-state-on-startup=true", ], "log_file": "main.log", @@ -1201,7 +1201,7 @@ def test_async_replication_when_main_is_killed(): "data_directory": f"{data_directory_replica.name}", }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", "setup_queries": [], "data_directory": f"{data_directory_main.name}", @@ -1284,7 +1284,7 @@ def test_sync_replication_when_main_is_killed(): "data_directory": f"{data_directory_replica.name}", }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", "setup_queries": [], "data_directory": f"{data_directory_main.name}", @@ -1340,7 +1340,7 @@ def test_attempt_to_write_data_on_main_when_async_replica_is_down(): "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", "setup_queries": [ "REGISTER REPLICA async_replica1 ASYNC TO '127.0.0.1:10001';", @@ -1443,7 +1443,7 @@ def test_attempt_to_write_data_on_main_when_sync_replica_is_down(connection): "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup", "true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup", "true"], "log_file": "main.log", # need to do it manually "setup_queries": [], @@ -1572,7 +1572,7 @@ def test_attempt_to_create_indexes_on_main_when_async_replica_is_down(): "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", "setup_queries": [ "REGISTER REPLICA async_replica1 ASYNC TO '127.0.0.1:10001';", @@ -1673,7 +1673,7 @@ def test_attempt_to_create_indexes_on_main_when_sync_replica_is_down(connection) "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", # Need to do it manually "setup_queries": [], @@ -1818,7 +1818,7 @@ def test_trigger_on_create_before_commit_with_offline_sync_replica(connection): "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", # Need to do it manually since we kill replica "setup_queries": [], @@ -1937,7 +1937,7 @@ def test_trigger_on_update_before_commit_with_offline_sync_replica(connection): "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", "setup_queries": [], }, @@ -2060,7 +2060,7 @@ def test_trigger_on_delete_before_commit_with_offline_sync_replica(connection): "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", "setup_queries": [], }, @@ -2187,7 +2187,7 @@ def test_trigger_on_create_before_and_after_commit_with_offline_sync_replica(con "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", "setup_queries": [], }, @@ -2310,7 +2310,7 @@ def test_triggers_on_create_before_commit_with_offline_sync_replica(connection): "setup_queries": ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"], }, "main": { - "args": ["--bolt-port", "7687", "--log-level=TRACE", "--storage-recover-on-startup=true"], + "args": ["--bolt-port", "7687", "--log-level=TRACE", "--data-recovery-on-startup=true"], "log_file": "main.log", "setup_queries": [], }, diff --git a/tests/e2e/run.sh b/tests/e2e/run.sh index 88b70ae32..9ee4babe9 100755 --- a/tests/e2e/run.sh +++ b/tests/e2e/run.sh @@ -25,7 +25,7 @@ if [ "$#" -eq 0 ]; then # NOTE: If you want to run all tests under specific folder/section just # replace the dot (root directory below) with the folder name, e.g. # `--workloads-root-directory replication`. - python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build" + python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build/tests/e2e" elif [ "$#" -eq 1 ]; then if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then print_help @@ -34,7 +34,7 @@ elif [ "$#" -eq 1 ]; then # NOTE: --workload-name comes from each individual folder/section # workloads.yaml file. E.g. `streams/workloads.yaml` has a list of # `workloads:` and each workload has it's `-name`. - python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build" --workload-name "$1" + python3 runner.py --workloads-root-directory "$SCRIPT_DIR/../../build/tests/e2e" --workload-name "$1" else print_help fi diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index ae022d4d8..282629b20 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -40,6 +40,9 @@ def load_args(): def load_workloads(root_directory): workloads = [] for file in Path(root_directory).rglob("*.yaml"): + # 8.03.2024. - Skip streams e2e tests + if str(file).endswith("/streams/workloads.yaml"): + continue with open(file, "r") as f: workloads.extend(yaml.load(f, Loader=yaml.FullLoader)["workloads"]) return workloads diff --git a/tests/e2e/streams/conftest.py b/tests/e2e/streams/conftest.py index 1bf3544c2..2dadfcb28 100644 --- a/tests/e2e/streams/conftest.py +++ b/tests/e2e/streams/conftest.py @@ -9,6 +9,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import pulsar import pytest from common import NAME, PULSAR_SERVICE_URL, connect, execute_and_fetch_all @@ -20,6 +21,9 @@ import requests # To run these test locally a running Kafka sever is necessery. The test tries # to connect on localhost:9092. +# KAFKA_HOSTNAME=os.getenv("KAFKA_HOSTNAME", "localhost") +# PULSAR_HOSTNAME=os.getenv("PULSAR_HOSTNAME", "localhost") +# PULSAR_PORT="6652" if PULSAR_HOSTNAME == "localhost" else "8080" @pytest.fixture(autouse=True) def connection(): diff --git a/tests/e2e/streams/kafka_streams_tests.py b/tests/e2e/streams/kafka_streams_tests.py index b988a6c26..17decdc0f 100755 --- a/tests/e2e/streams/kafka_streams_tests.py +++ b/tests/e2e/streams/kafka_streams_tests.py @@ -11,6 +11,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import sys import time from multiprocessing import Process @@ -23,7 +24,7 @@ from mg_utils import mg_sleep_and_assert TRANSFORMATIONS_TO_CHECK_C = ["c_transformations.empty_transformation"] TRANSFORMATIONS_TO_CHECK_PY = ["kafka_transform.simple", "kafka_transform.with_parameters"] KAFKA_PRODUCER_SENDING_MSG_DEFAULT_TIMEOUT = 60 - +# KAFKA_HOSTNAME=os.getenv("KAFKA_HOSTNAME", "localhost") @pytest.mark.parametrize("transformation", TRANSFORMATIONS_TO_CHECK_PY) def test_simple(kafka_producer, kafka_topics, connection, transformation): @@ -162,7 +163,7 @@ def test_show_streams(kafka_topics, connection): complex_values_stream = "complex_values" common.create_stream( - cursor, default_values_stream, kafka_topics[0], "kafka_transform.simple", bootstrap_servers="'localhost:29092'" + cursor, default_values_stream, kafka_topics[0], "kafka_transform.simple", bootstrap_servers="'localhost:29092'" ) common.create_stream( cursor, diff --git a/tests/e2e/streams/pulsar_streams_tests.py b/tests/e2e/streams/pulsar_streams_tests.py index cf52416cb..49aa773e6 100755 --- a/tests/e2e/streams/pulsar_streams_tests.py +++ b/tests/e2e/streams/pulsar_streams_tests.py @@ -11,6 +11,7 @@ # by the Apache License, Version 2.0, included in the file # licenses/APL.txt. +# import os import sys import time from multiprocessing import Process, Value @@ -20,6 +21,7 @@ import mgclient import pytest TRANSFORMATIONS_TO_CHECK = ["pulsar_transform.simple", "pulsar_transform.with_parameters"] +# PULSAR_HOSTNAME=os.getenv("PULSAR_HOSTNAME", "127.0.0.1") def check_vertex_exists_with_topic_and_payload(cursor, topic, payload_byte): diff --git a/tests/gql_behave/tests/memgraph_V1/features/list_operations.feature b/tests/gql_behave/tests/memgraph_V1/features/list_operations.feature index 8c5538d6b..a6a4b15d2 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/list_operations.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/list_operations.feature @@ -291,3 +291,45 @@ Feature: List operators # Then the result should be: # | years | # | [2021,2003,2003,1999] | + + Scenario: Multiple entries with list pattern comprehension + Given graph "graph_keanu" + When executing query: + """ + MATCH (n:Person) + RETURN n.name, [(n)-->(b:Movie) WHERE b.title CONTAINS 'Matrix' | b.released] AS years + """ + Then an error should be raised + + Scenario: Multiple list pattern comprehensions in Return + Given graph "graph_keanu" + When executing query: + """ + MATCH (n:Person) + RETURN n.name, + [(n)-->(b:Movie) WHERE b.title CONTAINS 'Matrix' | b.released] AS years, + [(n)-->(b:Movie) WHERE b.title CONTAINS 'Matrix' | b.title] AS titles + """ + Then an error should be raised + + Scenario: Function inside pattern comprehension's expression + Given graph "graph_keanu" + When executing query: + """ + MATCH (keanu:Person {name: 'Keanu Reeves'}) + RETURN [p = (keanu)-->(b:Movie) WHERE b.title CONTAINS 'Matrix' | size(nodes(p))] AS nodes + """ + Then an error should be raised + + Scenario: Multiple list pattern comprehensions in With + Given graph "graph_keanu" + When executing query: + """ + MATCH (n) WHERE size(n.name) > 5 + WITH + n AS actor, + [(n)-->(m) WHERE m.released > 2000 | m.title] AS titles, + [(n)-->(m) WHERE m.released > 2000 | m.released] AS years + RETURN actor.name, years, titles; + """ + Then an error should be raised diff --git a/tests/gql_behave/tests/memgraph_V1/features/with.feature b/tests/gql_behave/tests/memgraph_V1/features/with.feature index f1882e8d7..53c63b5b0 100644 --- a/tests/gql_behave/tests/memgraph_V1/features/with.feature +++ b/tests/gql_behave/tests/memgraph_V1/features/with.feature @@ -264,3 +264,113 @@ Feature: With | id | | 0 | | 1 | + + Scenario: With test 17: + Given an empty graph + And having executed: + """ + CREATE ({name: "node1"}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + CREATE (m:node.name) + """ + When executing query: + """ + MATCH (n:node1) RETURN n; + """ + Then the result should be: + | n | + | (:node1) | + + Scenario: With test 18: + Given an empty graph + And having executed: + """ + CREATE ({name: "LabelToAdd"}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + SET node:node.name + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | (:LabelToAdd {name: 'LabelToAdd'}) | + + Scenario: With test 19: + Given an empty graph + And having executed: + """ + CREATE (:labelToRemove {name: 'labelToRemove'}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + REMOVE node:node.name + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | ({name: 'labelToRemove'}) | + + Scenario: With test 20: + Given an empty graph + And having executed: + """ + CREATE ({name: 'label1'}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + SET node:node.name:label2 + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | (:label1:label2 {name: 'label1'}) | + + Scenario: With test 21: + Given an empty graph + And having executed: + """ + CREATE ({name: 'label1'}) + """ + When executing query: + """ + MATCH (n) WITH n AS node + SET node:label2:node.name + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | (:label2:label1 {name: 'label1'}) | + + Scenario: With test 22: + Given an empty graph + And having executed: + """ + WITH {value: {label: "labelvalue"}} as label + CREATE (n:label.value.label); + """ + When executing query: + """ + MATCH (n) RETURN n; + """ + Then the result should be: + | n | + | (:labelvalue) | diff --git a/tests/gql_behave/tests/memgraph_V1/graphs/graph_keanu.cypher b/tests/gql_behave/tests/memgraph_V1/graphs/graph_keanu.cypher index a7a72aced..98f48c3c1 100644 --- a/tests/gql_behave/tests/memgraph_V1/graphs/graph_keanu.cypher +++ b/tests/gql_behave/tests/memgraph_V1/graphs/graph_keanu.cypher @@ -1,5 +1,7 @@ CREATE (keanu:Person {name: 'Keanu Reeves'}), + (trinity:Person {name: 'Carrie-Anne Moss'}), + (morpheus:Person {name: 'Laurence Fishburne'}), (johnnyMnemonic:Movie {title: 'Johnny Mnemonic', released: 1995}), (theMatrixRevolutions:Movie {title: 'The Matrix Revolutions', released: 2003}), (theMatrixReloaded:Movie {title: 'The Matrix Reloaded', released: 2003}), @@ -13,4 +15,7 @@ CREATE (keanu)-[:ACTED_IN]->(theReplacements), (keanu)-[:ACTED_IN]->(theMatrix), (keanu)-[:ACTED_IN]->(theDevilsAdvocate), - (keanu)-[:ACTED_IN]->(theMatrixResurrections); + (keanu)-[:ACTED_IN]->(theMatrixResurrections), + (trinity)-[:ACTED_IN]->(theMatrix), + (trinity)-[:ACTED_IN]->(theMatrixReloaded), + (morpheus)-[:ACTED_IN]->(theMatrix); diff --git a/tests/integration/durability/tests/v17/test_all/create_dataset.cypher b/tests/integration/durability/tests/v17/test_all/create_dataset.cypher new file mode 100644 index 000000000..9ee350d9a --- /dev/null +++ b/tests/integration/durability/tests/v17/test_all/create_dataset.cypher @@ -0,0 +1,22 @@ +// --storage-items-per-batch is set to 10 +CREATE INDEX ON :`label2`(`prop2`); +CREATE INDEX ON :`label2`(`prop`); +CREATE INDEX ON :`label`; +CREATE INDEX ON :__mg_vertex__(__mg_id__); +CREATE EDGE INDEX ON :`edge_type`; +CREATE (:`edge_index_from`), (:`edge_index_to`); +MATCH (n:`edge_index_from`), (m:`edge_index_to`) CREATE (n)-[r:`edge_type`]->(m); +CREATE (:__mg_vertex__:`label2` {__mg_id__: 0, `prop2`: ["kaj", 2, Null, {`prop4`: -1.341}], `ext`: 2, `prop`: "joj"}); +CREATE (:__mg_vertex__:`label2`:`label` {__mg_id__: 1, `ext`: 2, `prop`: "joj"}); +CREATE (:__mg_vertex__:`label2` {__mg_id__: 2, `prop2`: 2, `prop`: 1}); +CREATE (:__mg_vertex__:`label2` {__mg_id__: 3, `prop2`: 2, `prop`: 2}); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 1 AND v.__mg_id__ = 0 CREATE (u)-[:`link` {`ext`: [false, {`k`: "l"}], `prop`: -1}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 1 AND v.__mg_id__ = 1 CREATE (u)-[:`link` {`ext`: [false, {`k`: "l"}], `prop`: -1}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 1 AND v.__mg_id__ = 2 CREATE (u)-[:`link` {`ext`: [false, {`k`: "l"}], `prop`: -1}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 1 AND v.__mg_id__ = 3 CREATE (u)-[:`link` {`ext`: [false, {`k`: "l"}], `prop`: -1}]->(v); +CREATE CONSTRAINT ON (u:`label`) ASSERT EXISTS (u.`ext`); +CREATE CONSTRAINT ON (u:`label2`) ASSERT u.`prop2`, u.`prop` IS UNIQUE; +ANALYZE GRAPH; +DROP INDEX ON :__mg_vertex__(__mg_id__); +DROP EDGE INDEX ON :`edge_type`; +MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; diff --git a/tests/integration/durability/tests/v17/test_all/expected_snapshot.cypher b/tests/integration/durability/tests/v17/test_all/expected_snapshot.cypher new file mode 100644 index 000000000..fb2d74667 --- /dev/null +++ b/tests/integration/durability/tests/v17/test_all/expected_snapshot.cypher @@ -0,0 +1,19 @@ + CREATE (:__mg_vertex__:`edge_index_from` {__mg_id__: 0}); + CREATE (:__mg_vertex__:`edge_index_to` {__mg_id__: 1}); + CREATE (:__mg_vertex__:`label2` {__mg_id__: 2, `prop2`: ["kaj", 2, Null, {`prop4`: -1.341}], `ext`: 2, `prop`: "joj"}); + CREATE (:__mg_vertex__:`label2` {__mg_id__: 4, `prop2`: 2, `prop`: 1}); + CREATE (:__mg_vertex__:`label2` {__mg_id__: 5, `prop2`: 2, `prop`: 2}); + CREATE (:__mg_vertex__:`label`:`label2` {__mg_id__: 3, `ext`: 2, `prop`: "joj"}); + CREATE CONSTRAINT ON (u:`label2`) ASSERT u.`prop2`, u.`prop` IS UNIQUE; + CREATE CONSTRAINT ON (u:`label`) ASSERT EXISTS (u.`ext`); + CREATE INDEX ON :__mg_vertex__(__mg_id__); + CREATE INDEX ON :`label2`(`prop2`); + CREATE INDEX ON :`label2`(`prop`); + CREATE INDEX ON :`label`; + DROP INDEX ON :__mg_vertex__(__mg_id__); + MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE (u)-[:`edge_type`]->(v); + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 3 AND v.__mg_id__ = 2 CREATE (u)-[:`link` {`ext`: [false, {`k`: "l"}], `prop`: -1}]->(v); + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 3 AND v.__mg_id__ = 3 CREATE (u)-[:`link` {`ext`: [false, {`k`: "l"}], `prop`: -1}]->(v); + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 3 AND v.__mg_id__ = 4 CREATE (u)-[:`link` {`ext`: [false, {`k`: "l"}], `prop`: -1}]->(v); + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 3 AND v.__mg_id__ = 5 CREATE (u)-[:`link` {`ext`: [false, {`k`: "l"}], `prop`: -1}]->(v); diff --git a/tests/integration/durability/tests/v17/test_all/expected_wal.cypher b/tests/integration/durability/tests/v17/test_all/expected_wal.cypher new file mode 100644 index 000000000..33efec9e2 --- /dev/null +++ b/tests/integration/durability/tests/v17/test_all/expected_wal.cypher @@ -0,0 +1,19 @@ + CREATE (:__mg_vertex__:`edge_index_from` {__mg_id__: 0}); + CREATE (:__mg_vertex__:`edge_index_to` {__mg_id__: 1}); + CREATE (:__mg_vertex__:`label2` {__mg_id__: 2, `prop2`: ["kaj", 2, Null, {`prop4`: -1.341}], `prop`: "joj", `ext`: 2}); + CREATE (:__mg_vertex__:`label2` {__mg_id__: 4, `prop2`: 2, `prop`: 1}); + CREATE (:__mg_vertex__:`label2` {__mg_id__: 5, `prop2`: 2, `prop`: 2}); + CREATE (:__mg_vertex__:`label`:`label2` {__mg_id__: 3, `prop`: "joj", `ext`: 2}); + CREATE CONSTRAINT ON (u:`label2`) ASSERT u.`prop2`, u.`prop` IS UNIQUE; + CREATE CONSTRAINT ON (u:`label`) ASSERT EXISTS (u.`ext`); + CREATE INDEX ON :__mg_vertex__(__mg_id__); + CREATE INDEX ON :`label2`(`prop2`); + CREATE INDEX ON :`label2`(`prop`); + CREATE INDEX ON :`label`; + DROP INDEX ON :__mg_vertex__(__mg_id__); + MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE (u)-[:`edge_type`]->(v); + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 3 AND v.__mg_id__ = 2 CREATE (u)-[:`link` {`prop`: -1, `ext`: [false, {`k`: "l"}]}]->(v); + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 3 AND v.__mg_id__ = 3 CREATE (u)-[:`link` {`prop`: -1, `ext`: [false, {`k`: "l"}]}]->(v); + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 3 AND v.__mg_id__ = 4 CREATE (u)-[:`link` {`prop`: -1, `ext`: [false, {`k`: "l"}]}]->(v); + MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 3 AND v.__mg_id__ = 5 CREATE (u)-[:`link` {`prop`: -1, `ext`: [false, {`k`: "l"}]}]->(v); diff --git a/tests/integration/durability/tests/v17/test_all/snapshot.bin b/tests/integration/durability/tests/v17/test_all/snapshot.bin new file mode 100644 index 000000000..9cc54b480 Binary files /dev/null and b/tests/integration/durability/tests/v17/test_all/snapshot.bin differ diff --git a/tests/integration/durability/tests/v17/test_all/wal.bin b/tests/integration/durability/tests/v17/test_all/wal.bin new file mode 100644 index 000000000..61a33372d Binary files /dev/null and b/tests/integration/durability/tests/v17/test_all/wal.bin differ diff --git a/tests/integration/durability/tests/v17/test_constraints/create_dataset.cypher b/tests/integration/durability/tests/v17/test_constraints/create_dataset.cypher new file mode 100644 index 000000000..96bb4bac4 --- /dev/null +++ b/tests/integration/durability/tests/v17/test_constraints/create_dataset.cypher @@ -0,0 +1,6 @@ +CREATE CONSTRAINT ON (u:`label2`) ASSERT EXISTS (u.`ext2`); +CREATE CONSTRAINT ON (u:`label`) ASSERT EXISTS (u.`ext`); +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`a` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`b` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`c` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label2`) ASSERT u.`a`, u.`b` IS UNIQUE; diff --git a/tests/integration/durability/tests/v17/test_constraints/expected_snapshot.cypher b/tests/integration/durability/tests/v17/test_constraints/expected_snapshot.cypher new file mode 100644 index 000000000..fbe2c28ab --- /dev/null +++ b/tests/integration/durability/tests/v17/test_constraints/expected_snapshot.cypher @@ -0,0 +1,6 @@ +CREATE CONSTRAINT ON (u:`label2`) ASSERT EXISTS (u.`ext2`); +CREATE CONSTRAINT ON (u:`label`) ASSERT EXISTS (u.`ext`); +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`c` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`b` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`a` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label2`) ASSERT u.`b`, u.`a` IS UNIQUE; diff --git a/tests/integration/durability/tests/v17/test_constraints/expected_wal.cypher b/tests/integration/durability/tests/v17/test_constraints/expected_wal.cypher new file mode 100644 index 000000000..9260455ed --- /dev/null +++ b/tests/integration/durability/tests/v17/test_constraints/expected_wal.cypher @@ -0,0 +1,6 @@ +CREATE CONSTRAINT ON (u:`label2`) ASSERT EXISTS (u.`ext2`); +CREATE CONSTRAINT ON (u:`label`) ASSERT EXISTS (u.`ext`); +CREATE CONSTRAINT ON (u:`label2`) ASSERT u.`a`, u.`b` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`a` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`b` IS UNIQUE; +CREATE CONSTRAINT ON (u:`label`) ASSERT u.`c` IS UNIQUE; diff --git a/tests/integration/durability/tests/v17/test_constraints/snapshot.bin b/tests/integration/durability/tests/v17/test_constraints/snapshot.bin new file mode 100644 index 000000000..76986ab9a Binary files /dev/null and b/tests/integration/durability/tests/v17/test_constraints/snapshot.bin differ diff --git a/tests/integration/durability/tests/v17/test_constraints/wal.bin b/tests/integration/durability/tests/v17/test_constraints/wal.bin new file mode 100644 index 000000000..f2d54e5fd Binary files /dev/null and b/tests/integration/durability/tests/v17/test_constraints/wal.bin differ diff --git a/tests/integration/durability/tests/v17/test_edges/create_dataset.cypher b/tests/integration/durability/tests/v17/test_edges/create_dataset.cypher new file mode 100644 index 000000000..ab3b3af6d --- /dev/null +++ b/tests/integration/durability/tests/v17/test_edges/create_dataset.cypher @@ -0,0 +1,60 @@ +// --storage-items-per-batch is set to 7 +CREATE INDEX ON :__mg_vertex__(__mg_id__); +CREATE (:__mg_vertex__ {__mg_id__: 0}); +CREATE (:__mg_vertex__ {__mg_id__: 1}); +CREATE (:__mg_vertex__ {__mg_id__: 2}); +CREATE (:__mg_vertex__ {__mg_id__: 3}); +CREATE (:__mg_vertex__ {__mg_id__: 4}); +CREATE (:__mg_vertex__ {__mg_id__: 5}); +CREATE (:__mg_vertex__ {__mg_id__: 6}); +CREATE (:__mg_vertex__ {__mg_id__: 7}); +CREATE (:__mg_vertex__ {__mg_id__: 8}); +CREATE (:__mg_vertex__ {__mg_id__: 9}); +CREATE (:__mg_vertex__ {__mg_id__: 10}); +CREATE (:__mg_vertex__ {__mg_id__: 11}); +CREATE (:__mg_vertex__ {__mg_id__: 12}); +CREATE (:__mg_vertex__ {__mg_id__: 13}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 14}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 15}); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE (u)-[:`edge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 2 AND v.__mg_id__ = 3 CREATE (u)-[:`edge` {`prop`: 11}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 4 AND v.__mg_id__ = 5 CREATE (u)-[:`edge` {`prop`: true}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 6 AND v.__mg_id__ = 7 CREATE (u)-[:`edge2`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 8 AND v.__mg_id__ = 9 CREATE (u)-[:`edge2` {`prop`: -3.141}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 10 AND v.__mg_id__ = 11 CREATE (u)-[:`edgelink` {`prop`: {`prop`: 1, `prop2`: {`prop4`: 9}}}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 12 AND v.__mg_id__ = 13 CREATE (u)-[:`edgelink` {`prop`: [1, Null, false, "\n\n\n\n\\\"\"\n\t"]}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 0 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 1 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 2 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 3 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 4 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 5 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 6 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 7 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 8 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 9 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 10 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 11 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 12 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 13 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 14 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 15 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 0 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 1 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 2 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 3 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 4 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 5 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 6 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 7 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 8 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 9 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 10 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 11 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 12 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 13 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 14 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 15 CREATE (u)-[:`testedge`]->(v); +ANALYZE GRAPH; +DROP INDEX ON :__mg_vertex__(__mg_id__); +MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; diff --git a/tests/integration/durability/tests/v17/test_edges/expected_snapshot.cypher b/tests/integration/durability/tests/v17/test_edges/expected_snapshot.cypher new file mode 100644 index 000000000..596753ba5 --- /dev/null +++ b/tests/integration/durability/tests/v17/test_edges/expected_snapshot.cypher @@ -0,0 +1,58 @@ +CREATE INDEX ON :__mg_vertex__(__mg_id__); +CREATE (:__mg_vertex__ {__mg_id__: 0}); +CREATE (:__mg_vertex__ {__mg_id__: 1}); +CREATE (:__mg_vertex__ {__mg_id__: 2}); +CREATE (:__mg_vertex__ {__mg_id__: 3}); +CREATE (:__mg_vertex__ {__mg_id__: 4}); +CREATE (:__mg_vertex__ {__mg_id__: 5}); +CREATE (:__mg_vertex__ {__mg_id__: 6}); +CREATE (:__mg_vertex__ {__mg_id__: 7}); +CREATE (:__mg_vertex__ {__mg_id__: 8}); +CREATE (:__mg_vertex__ {__mg_id__: 9}); +CREATE (:__mg_vertex__ {__mg_id__: 10}); +CREATE (:__mg_vertex__ {__mg_id__: 11}); +CREATE (:__mg_vertex__ {__mg_id__: 12}); +CREATE (:__mg_vertex__ {__mg_id__: 13}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 14}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 15}); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE (u)-[:`edge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 2 AND v.__mg_id__ = 3 CREATE (u)-[:`edge` {`prop`: 11}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 4 AND v.__mg_id__ = 5 CREATE (u)-[:`edge` {`prop`: true}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 6 AND v.__mg_id__ = 7 CREATE (u)-[:`edge2`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 8 AND v.__mg_id__ = 9 CREATE (u)-[:`edge2` {`prop`: -3.141}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 10 AND v.__mg_id__ = 11 CREATE (u)-[:`edgelink` {`prop`: {`prop`: 1, `prop2`: {`prop4`: 9}}}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 12 AND v.__mg_id__ = 13 CREATE (u)-[:`edgelink` {`prop`: [1, Null, false, "\n\n\n\n\\\"\"\n\t"]}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 0 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 1 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 2 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 3 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 4 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 5 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 6 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 7 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 8 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 9 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 10 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 11 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 12 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 13 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 14 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 15 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 0 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 1 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 2 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 3 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 4 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 5 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 6 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 7 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 8 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 9 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 10 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 11 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 12 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 13 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 14 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 15 CREATE (u)-[:`testedge`]->(v); +DROP INDEX ON :__mg_vertex__(__mg_id__); +MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; diff --git a/tests/integration/durability/tests/v17/test_edges/expected_wal.cypher b/tests/integration/durability/tests/v17/test_edges/expected_wal.cypher new file mode 100644 index 000000000..596753ba5 --- /dev/null +++ b/tests/integration/durability/tests/v17/test_edges/expected_wal.cypher @@ -0,0 +1,58 @@ +CREATE INDEX ON :__mg_vertex__(__mg_id__); +CREATE (:__mg_vertex__ {__mg_id__: 0}); +CREATE (:__mg_vertex__ {__mg_id__: 1}); +CREATE (:__mg_vertex__ {__mg_id__: 2}); +CREATE (:__mg_vertex__ {__mg_id__: 3}); +CREATE (:__mg_vertex__ {__mg_id__: 4}); +CREATE (:__mg_vertex__ {__mg_id__: 5}); +CREATE (:__mg_vertex__ {__mg_id__: 6}); +CREATE (:__mg_vertex__ {__mg_id__: 7}); +CREATE (:__mg_vertex__ {__mg_id__: 8}); +CREATE (:__mg_vertex__ {__mg_id__: 9}); +CREATE (:__mg_vertex__ {__mg_id__: 10}); +CREATE (:__mg_vertex__ {__mg_id__: 11}); +CREATE (:__mg_vertex__ {__mg_id__: 12}); +CREATE (:__mg_vertex__ {__mg_id__: 13}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 14}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 15}); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 0 AND v.__mg_id__ = 1 CREATE (u)-[:`edge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 2 AND v.__mg_id__ = 3 CREATE (u)-[:`edge` {`prop`: 11}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 4 AND v.__mg_id__ = 5 CREATE (u)-[:`edge` {`prop`: true}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 6 AND v.__mg_id__ = 7 CREATE (u)-[:`edge2`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 8 AND v.__mg_id__ = 9 CREATE (u)-[:`edge2` {`prop`: -3.141}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 10 AND v.__mg_id__ = 11 CREATE (u)-[:`edgelink` {`prop`: {`prop`: 1, `prop2`: {`prop4`: 9}}}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 12 AND v.__mg_id__ = 13 CREATE (u)-[:`edgelink` {`prop`: [1, Null, false, "\n\n\n\n\\\"\"\n\t"]}]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 0 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 1 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 2 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 3 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 4 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 5 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 6 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 7 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 8 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 9 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 10 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 11 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 12 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 13 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 14 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 14 AND v.__mg_id__ = 15 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 0 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 1 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 2 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 3 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 4 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 5 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 6 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 7 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 8 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 9 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 10 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 11 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 12 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 13 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 14 CREATE (u)-[:`testedge`]->(v); +MATCH (u:__mg_vertex__), (v:__mg_vertex__) WHERE u.__mg_id__ = 15 AND v.__mg_id__ = 15 CREATE (u)-[:`testedge`]->(v); +DROP INDEX ON :__mg_vertex__(__mg_id__); +MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; diff --git a/tests/integration/durability/tests/v17/test_edges/snapshot.bin b/tests/integration/durability/tests/v17/test_edges/snapshot.bin new file mode 100644 index 000000000..070bbe530 Binary files /dev/null and b/tests/integration/durability/tests/v17/test_edges/snapshot.bin differ diff --git a/tests/integration/durability/tests/v17/test_edges/wal.bin b/tests/integration/durability/tests/v17/test_edges/wal.bin new file mode 100644 index 000000000..914f49154 Binary files /dev/null and b/tests/integration/durability/tests/v17/test_edges/wal.bin differ diff --git a/tests/integration/durability/tests/v17/test_indices/create_dataset.cypher b/tests/integration/durability/tests/v17/test_indices/create_dataset.cypher new file mode 100644 index 000000000..739062f19 --- /dev/null +++ b/tests/integration/durability/tests/v17/test_indices/create_dataset.cypher @@ -0,0 +1,6 @@ +CREATE INDEX ON :`label2`; +CREATE INDEX ON :`label2`(`prop2`); +CREATE INDEX ON :`label`(`prop2`); +CREATE INDEX ON :`label`(`prop`); +CREATE EDGE INDEX ON :`edgetype`; +ANALYZE GRAPH; diff --git a/tests/integration/durability/tests/v17/test_indices/expected_snapshot.cypher b/tests/integration/durability/tests/v17/test_indices/expected_snapshot.cypher new file mode 100644 index 000000000..1e930697a --- /dev/null +++ b/tests/integration/durability/tests/v17/test_indices/expected_snapshot.cypher @@ -0,0 +1,5 @@ +CREATE INDEX ON :`label2`; +CREATE INDEX ON :`label`(`prop`); +CREATE INDEX ON :`label`(`prop2`); +CREATE INDEX ON :`label2`(`prop2`); +CREATE EDGE INDEX ON :`edgetype`; diff --git a/tests/integration/durability/tests/v17/test_indices/expected_wal.cypher b/tests/integration/durability/tests/v17/test_indices/expected_wal.cypher new file mode 100644 index 000000000..bfae88b0b --- /dev/null +++ b/tests/integration/durability/tests/v17/test_indices/expected_wal.cypher @@ -0,0 +1,5 @@ +CREATE INDEX ON :`label2`; +CREATE INDEX ON :`label2`(`prop2`); +CREATE INDEX ON :`label`(`prop2`); +CREATE INDEX ON :`label`(`prop`); +CREATE EDGE INDEX ON :`edgetype`; diff --git a/tests/integration/durability/tests/v17/test_indices/snapshot.bin b/tests/integration/durability/tests/v17/test_indices/snapshot.bin new file mode 100644 index 000000000..99ad6e0ea Binary files /dev/null and b/tests/integration/durability/tests/v17/test_indices/snapshot.bin differ diff --git a/tests/integration/durability/tests/v17/test_indices/wal.bin b/tests/integration/durability/tests/v17/test_indices/wal.bin new file mode 100644 index 000000000..661cba6c1 Binary files /dev/null and b/tests/integration/durability/tests/v17/test_indices/wal.bin differ diff --git a/tests/integration/durability/tests/v17/test_vertices/create_dataset.cypher b/tests/integration/durability/tests/v17/test_vertices/create_dataset.cypher new file mode 100644 index 000000000..061df375b --- /dev/null +++ b/tests/integration/durability/tests/v17/test_vertices/create_dataset.cypher @@ -0,0 +1,18 @@ +// --storage-items-per-batch is set to 5 +CREATE INDEX ON :__mg_vertex__(__mg_id__); +CREATE (:__mg_vertex__ {__mg_id__: 0}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 1}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 2, `prop`: false}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 3, `prop`: true}); +CREATE (:__mg_vertex__:`label2` {__mg_id__: 4, `prop`: 1}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 5, `prop2`: 3.141}); +CREATE (:__mg_vertex__:`label6` {__mg_id__: 6, `prop3`: true, `prop2`: -314000000}); +CREATE (:__mg_vertex__:`label3`:`label1`:`label2` {__mg_id__: 7}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 8, `prop3`: "str", `prop2`: 2, `prop`: 1}); +CREATE (:__mg_vertex__:`label2`:`label1` {__mg_id__: 9, `prop`: {`prop_nes`: "kaj je"}}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 10, `prop_array`: [1, false, Null, "str", {`prop2`: 2}]}); +CREATE (:__mg_vertex__:`label3`:`label` {__mg_id__: 11, `prop`: {`prop`: [1, false], `prop2`: {}, `prop3`: "test2", `prop4`: "test"}}); +CREATE (:__mg_vertex__ {__mg_id__: 12, `prop`: " \n\"\'\t\\%"}); +ANALYZE GRAPH; +DROP INDEX ON :__mg_vertex__(__mg_id__); +MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; diff --git a/tests/integration/durability/tests/v17/test_vertices/expected_snapshot.cypher b/tests/integration/durability/tests/v17/test_vertices/expected_snapshot.cypher new file mode 100644 index 000000000..ecdc1229e --- /dev/null +++ b/tests/integration/durability/tests/v17/test_vertices/expected_snapshot.cypher @@ -0,0 +1,16 @@ +CREATE INDEX ON :__mg_vertex__(__mg_id__); +CREATE (:__mg_vertex__ {__mg_id__: 0}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 1}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 2, `prop`: false}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 3, `prop`: true}); +CREATE (:__mg_vertex__:`label2` {__mg_id__: 4, `prop`: 1}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 5, `prop2`: 3.141}); +CREATE (:__mg_vertex__:`label6` {__mg_id__: 6, `prop3`: true, `prop2`: -314000000}); +CREATE (:__mg_vertex__:`label2`:`label3`:`label1` {__mg_id__: 7}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 8, `prop3`: "str", `prop2`: 2, `prop`: 1}); +CREATE (:__mg_vertex__:`label1`:`label2` {__mg_id__: 9, `prop`: {`prop_nes`: "kaj je"}}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 10, `prop_array`: [1, false, Null, "str", {`prop2`: 2}]}); +CREATE (:__mg_vertex__:`label`:`label3` {__mg_id__: 11, `prop`: {`prop`: [1, false], `prop2`: {}, `prop3`: "test2", `prop4`: "test"}}); +CREATE (:__mg_vertex__ {__mg_id__: 12, `prop`: " \n\"\'\t\\%"}); +DROP INDEX ON :__mg_vertex__(__mg_id__); +MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; diff --git a/tests/integration/durability/tests/v17/test_vertices/expected_wal.cypher b/tests/integration/durability/tests/v17/test_vertices/expected_wal.cypher new file mode 100644 index 000000000..d8f758737 --- /dev/null +++ b/tests/integration/durability/tests/v17/test_vertices/expected_wal.cypher @@ -0,0 +1,16 @@ +CREATE INDEX ON :__mg_vertex__(__mg_id__); +CREATE (:__mg_vertex__ {__mg_id__: 0}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 1}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 2, `prop`: false}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 3, `prop`: true}); +CREATE (:__mg_vertex__:`label2` {__mg_id__: 4, `prop`: 1}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 5, `prop2`: 3.141}); +CREATE (:__mg_vertex__:`label6` {__mg_id__: 6, `prop2`: -314000000, `prop3`: true}); +CREATE (:__mg_vertex__:`label2`:`label3`:`label1` {__mg_id__: 7}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 8, `prop`: 1, `prop2`: 2, `prop3`: "str"}); +CREATE (:__mg_vertex__:`label1`:`label2` {__mg_id__: 9, `prop`: {`prop_nes`: "kaj je"}}); +CREATE (:__mg_vertex__:`label` {__mg_id__: 10, `prop_array`: [1, false, Null, "str", {`prop2`: 2}]}); +CREATE (:__mg_vertex__:`label`:`label3` {__mg_id__: 11, `prop`: {`prop`: [1, false], `prop2`: {}, `prop3`: "test2", `prop4`: "test"}}); +CREATE (:__mg_vertex__ {__mg_id__: 12, `prop`: " \n\"\'\t\\%"}); +DROP INDEX ON :__mg_vertex__(__mg_id__); +MATCH (u) REMOVE u:__mg_vertex__, u.__mg_id__; diff --git a/tests/integration/durability/tests/v17/test_vertices/snapshot.bin b/tests/integration/durability/tests/v17/test_vertices/snapshot.bin new file mode 100644 index 000000000..8a67d9a7d Binary files /dev/null and b/tests/integration/durability/tests/v17/test_vertices/snapshot.bin differ diff --git a/tests/integration/durability/tests/v17/test_vertices/wal.bin b/tests/integration/durability/tests/v17/test_vertices/wal.bin new file mode 100644 index 000000000..304db455f Binary files /dev/null and b/tests/integration/durability/tests/v17/test_vertices/wal.bin differ diff --git a/tests/manual/interactive_planning.cpp b/tests/manual/interactive_planning.cpp index 3f64c4f37..f0f60ba91 100644 --- a/tests/manual/interactive_planning.cpp +++ b/tests/manual/interactive_planning.cpp @@ -214,6 +214,8 @@ class InteractiveDbAccessor { return label_property_index_.at(key); } + bool EdgeTypeIndexExists(memgraph::storage::EdgeTypeId edge_type) { return true; } + std::optional<memgraph::storage::LabelIndexStats> GetIndexStats(const memgraph::storage::LabelId label) const { return dba_->GetIndexStats(label); } diff --git a/tests/mgbench/runners.py b/tests/mgbench/runners.py index e1f52b696..155ceac06 100644 --- a/tests/mgbench/runners.py +++ b/tests/mgbench/runners.py @@ -416,6 +416,7 @@ class Memgraph(BaseRunner): def __init__(self, benchmark_context: BenchmarkContext): super().__init__(benchmark_context=benchmark_context) self._memgraph_binary = benchmark_context.vendor_binary + self._bolt_num_workers = benchmark_context.num_workers_for_benchmark self._performance_tracking = benchmark_context.performance_tracking self._directory = tempfile.TemporaryDirectory(dir=benchmark_context.temporary_directory) self._vendor_args = benchmark_context.vendor_args @@ -440,6 +441,7 @@ class Memgraph(BaseRunner): kwargs["bolt_port"] = self._bolt_port kwargs["data_directory"] = data_directory kwargs["storage_properties_on_edges"] = True + kwargs["bolt_num_workers"] = self._bolt_num_workers for key, value in self._vendor_args.items(): kwargs[key] = value return _convert_args_to_flags(self._memgraph_binary, **kwargs) diff --git a/tests/setup.sh b/tests/setup.sh index 7cab86db6..b91eea7c3 100755 --- a/tests/setup.sh +++ b/tests/setup.sh @@ -37,6 +37,7 @@ pip --timeout 1000 install "pulsar-client==3.1.0" for pkg in "${PIP_DEPS[@]}"; do pip --timeout 1000 install "$pkg" done +pip --timeout 1000 install "networkx==2.4" # Install mgclient from source becasue of full flexibility. pushd "$DIR/../libs/pymgclient" > /dev/null diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 6f7b3bbef..44b24b6f6 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -430,3 +430,25 @@ target_include_directories(${test_prefix}distributed_lamport_clock PRIVATE ${CMA add_unit_test(query_hint_provider.cpp) target_link_libraries(${test_prefix}query_hint_provider mg-query mg-glue) + + +# Test coordination +if(MG_ENTERPRISE) +add_unit_test(coordination_utils.cpp) +target_link_libraries(${test_prefix}coordination_utils gflags mg-coordination mg-repl_coord_glue) +target_include_directories(${test_prefix}coordination_utils PRIVATE ${CMAKE_SOURCE_DIR}/include) +endif() + +# Test Raft log serialization +if(MG_ENTERPRISE) +add_unit_test(raft_log_serialization.cpp) +target_link_libraries(${test_prefix}raft_log_serialization gflags mg-coordination mg-repl_coord_glue) +target_include_directories(${test_prefix}raft_log_serialization PRIVATE ${CMAKE_SOURCE_DIR}/include) +endif() + +# Test Raft log serialization +if(MG_ENTERPRISE) +add_unit_test(coordinator_cluster_state.cpp) +target_link_libraries(${test_prefix}coordinator_cluster_state gflags mg-coordination mg-repl_coord_glue) +target_include_directories(${test_prefix}coordinator_cluster_state PRIVATE ${CMAKE_SOURCE_DIR}/include) +endif() diff --git a/tests/unit/coordination_utils.cpp b/tests/unit/coordination_utils.cpp new file mode 100644 index 000000000..7c77b4e68 --- /dev/null +++ b/tests/unit/coordination_utils.cpp @@ -0,0 +1,236 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include <gflags/gflags.h> +#include <gtest/gtest.h> +#include "coordination/coordinator_instance.hpp" +#include "dbms/constants.hpp" +#include "replication_coordination_glue/common.hpp" +#include "utils/functional.hpp" + +class CoordinationUtils : public ::testing::Test { + protected: + void SetUp() override {} + + void TearDown() override {} + + std::filesystem::path test_folder_{std::filesystem::temp_directory_path() / "MG_tests_unit_coordination"}; +}; + +TEST_F(CoordinationUtils, MemgraphDbHistorySimple) { + // Choose any if everything is same + // X = dead + // Main : A(24) B(36) C(48) D(50) E(51) X + // replica 1: A(24) B(36) C(48) D(50) E(51) + // replica 2: A(24) B(36) C(48) D(50) E(51) + // replica 3: A(24) B(36) C(48) D(50) E(51) + std::vector<std::pair<std::string, memgraph::replication_coordination_glue::DatabaseHistories>> + instance_database_histories; + + std::vector<std::pair<memgraph::utils::UUID, uint64_t>> histories; + histories.emplace_back(memgraph::utils::UUID{}, 24); + histories.emplace_back(memgraph::utils::UUID{}, 36); + histories.emplace_back(memgraph::utils::UUID{}, 48); + histories.emplace_back(memgraph::utils::UUID{}, 50); + histories.emplace_back(memgraph::utils::UUID{}, 51); + + memgraph::utils::UUID db_uuid; + std::string default_name = std::string(memgraph::dbms::kDefaultDB); + + auto db_histories = memgraph::utils::fmap(histories, [](const std::pair<memgraph::utils::UUID, uint64_t> &pair) { + return std::make_pair(std::string(pair.first), pair.second); + }); + + memgraph::replication_coordination_glue::DatabaseHistory history{ + .db_uuid = db_uuid, .history = db_histories, .name = default_name}; + + memgraph::replication_coordination_glue::DatabaseHistories instance_1_db_histories_{history}; + instance_database_histories.emplace_back("instance_1", instance_1_db_histories_); + + memgraph::replication_coordination_glue::DatabaseHistories instance_2_db_histories_{history}; + instance_database_histories.emplace_back("instance_2", instance_2_db_histories_); + + memgraph::replication_coordination_glue::DatabaseHistories instance_3_db_histories_{history}; + instance_database_histories.emplace_back("instance_3", instance_3_db_histories_); + memgraph::coordination::CoordinatorInstance instance; + + auto [instance_name, latest_epoch, latest_commit_timestamp] = + instance.ChooseMostUpToDateInstance(instance_database_histories); + ASSERT_TRUE(instance_name == "instance_1" || instance_name == "instance_2" || instance_name == "instance_3"); + ASSERT_TRUE(latest_epoch == db_histories.back().first); + ASSERT_TRUE(latest_commit_timestamp == db_histories.back().second); +} + +TEST_F(CoordinationUtils, MemgraphDbHistoryLastEpochDifferent) { + // Prioritize one with the biggest last commit timestamp on last epoch + // X = dead + // Main : A(24) B(36) C(48) D(50) E(59) X + // replica 1: A(24) B(12) C(15) D(17) E(51) + // replica 2: A(24) B(12) C(15) D(17) E(57) + // replica 3: A(24) B(12) C(15) D(17) E(59) + std::vector<std::pair<std::string, memgraph::replication_coordination_glue::DatabaseHistories>> + instance_database_histories; + + std::vector<std::pair<memgraph::utils::UUID, uint64_t>> histories; + histories.emplace_back(memgraph::utils::UUID{}, 24); + histories.emplace_back(memgraph::utils::UUID{}, 36); + histories.emplace_back(memgraph::utils::UUID{}, 48); + histories.emplace_back(memgraph::utils::UUID{}, 50); + histories.emplace_back(memgraph::utils::UUID{}, 59); + + memgraph::utils::UUID db_uuid; + std::string default_name = std::string(memgraph::dbms::kDefaultDB); + + auto db_histories = memgraph::utils::fmap(histories, [](const std::pair<memgraph::utils::UUID, uint64_t> &pair) { + return std::make_pair(std::string(pair.first), pair.second); + }); + + db_histories.back().second = 51; + memgraph::replication_coordination_glue::DatabaseHistory history1{ + .db_uuid = db_uuid, .history = db_histories, .name = default_name}; + + memgraph::replication_coordination_glue::DatabaseHistories instance_1_db_histories_{history1}; + instance_database_histories.emplace_back("instance_1", instance_1_db_histories_); + + db_histories.back().second = 57; + memgraph::replication_coordination_glue::DatabaseHistory history2{ + .db_uuid = db_uuid, .history = db_histories, .name = default_name}; + memgraph::replication_coordination_glue::DatabaseHistories instance_2_db_histories_{history2}; + instance_database_histories.emplace_back("instance_2", instance_2_db_histories_); + + db_histories.back().second = 59; + memgraph::replication_coordination_glue::DatabaseHistory history3{ + .db_uuid = db_uuid, .history = db_histories, .name = default_name}; + memgraph::replication_coordination_glue::DatabaseHistories instance_3_db_histories_{history3}; + instance_database_histories.emplace_back("instance_3", instance_3_db_histories_); + + memgraph::coordination::CoordinatorInstance instance; + auto [instance_name, latest_epoch, latest_commit_timestamp] = + instance.ChooseMostUpToDateInstance(instance_database_histories); + + ASSERT_TRUE(instance_name == "instance_3"); + ASSERT_TRUE(latest_epoch == db_histories.back().first); + ASSERT_TRUE(latest_commit_timestamp == db_histories.back().second); +} + +TEST_F(CoordinationUtils, MemgraphDbHistoryOneInstanceAheadFewEpochs) { + // Prioritize one biggest commit timestamp + // X = dead + // Main : A(24) B(36) C(48) D(50) E(51) X X X X + // replica 1: A(24) B(36) C(48) D(50) E(51) F(60) G(65) X up + // replica 2: A(24) B(36) C(48) D(50) E(51) X X X up + // replica 3: A(24) B(36) C(48) D(50) E(51) X X X up + std::vector<std::pair<std::string, memgraph::replication_coordination_glue::DatabaseHistories>> + instance_database_histories; + + std::vector<std::pair<memgraph::utils::UUID, uint64_t>> histories; + histories.emplace_back(memgraph::utils::UUID{}, 24); + histories.emplace_back(memgraph::utils::UUID{}, 36); + histories.emplace_back(memgraph::utils::UUID{}, 48); + histories.emplace_back(memgraph::utils::UUID{}, 50); + histories.emplace_back(memgraph::utils::UUID{}, 51); + + memgraph::utils::UUID db_uuid; + std::string default_name = std::string(memgraph::dbms::kDefaultDB); + + auto db_histories = memgraph::utils::fmap(histories, [](const std::pair<memgraph::utils::UUID, uint64_t> &pair) { + return std::make_pair(std::string(pair.first), pair.second); + }); + + memgraph::replication_coordination_glue::DatabaseHistory history{ + .db_uuid = db_uuid, .history = db_histories, .name = default_name}; + + memgraph::replication_coordination_glue::DatabaseHistories instance_1_db_histories_{history}; + instance_database_histories.emplace_back("instance_1", instance_1_db_histories_); + + memgraph::replication_coordination_glue::DatabaseHistories instance_2_db_histories_{history}; + instance_database_histories.emplace_back("instance_2", instance_2_db_histories_); + + histories.emplace_back(memgraph::utils::UUID{}, 60); + histories.emplace_back(memgraph::utils::UUID{}, 65); + auto db_histories_longest = + memgraph::utils::fmap(histories, [](const std::pair<memgraph::utils::UUID, uint64_t> &pair) { + return std::make_pair(std::string(pair.first), pair.second); + }); + + memgraph::replication_coordination_glue::DatabaseHistory history_longest{ + .db_uuid = db_uuid, .history = db_histories_longest, .name = default_name}; + + memgraph::replication_coordination_glue::DatabaseHistories instance_3_db_histories_{history_longest}; + instance_database_histories.emplace_back("instance_3", instance_3_db_histories_); + + memgraph::coordination::CoordinatorInstance instance; + auto [instance_name, latest_epoch, latest_commit_timestamp] = + instance.ChooseMostUpToDateInstance(instance_database_histories); + + ASSERT_TRUE(instance_name == "instance_3"); + ASSERT_TRUE(latest_epoch == db_histories_longest.back().first); + ASSERT_TRUE(latest_commit_timestamp == db_histories_longest.back().second); +} + +TEST_F(CoordinationUtils, MemgraphDbHistoryInstancesHistoryDiverged) { + // When history diverged, also prioritize one with biggest last commit timestamp + // Main : A(1) B(2) C(3) X + // replica 1: A(1) B(2) C(3) X X up + // replica 2: A(1) B(2) X D(5) X up + // replica 3: A(1) B(2) X D(4) X up + std::vector<std::pair<std::string, memgraph::replication_coordination_glue::DatabaseHistories>> + instance_database_histories; + + std::vector<std::pair<memgraph::utils::UUID, uint64_t>> histories; + histories.emplace_back(memgraph::utils::UUID{}, 1); + histories.emplace_back(memgraph::utils::UUID{}, 2); + histories.emplace_back(memgraph::utils::UUID{}, 3); + + memgraph::utils::UUID db_uuid; + std::string default_name = std::string(memgraph::dbms::kDefaultDB); + + auto db_histories = memgraph::utils::fmap(histories, [](const std::pair<memgraph::utils::UUID, uint64_t> &pair) { + return std::make_pair(std::string(pair.first), pair.second); + }); + + memgraph::replication_coordination_glue::DatabaseHistory history{ + .db_uuid = db_uuid, .history = db_histories, .name = default_name}; + + memgraph::replication_coordination_glue::DatabaseHistories instance_1_db_histories_{history}; + instance_database_histories.emplace_back("instance_1", instance_1_db_histories_); + + db_histories.pop_back(); + + auto oldest_commit_timestamp{5}; + auto newest_different_epoch = memgraph::utils::UUID{}; + histories.emplace_back(newest_different_epoch, oldest_commit_timestamp); + auto db_histories_different = + memgraph::utils::fmap(histories, [](const std::pair<memgraph::utils::UUID, uint64_t> &pair) { + return std::make_pair(std::string(pair.first), pair.second); + }); + + memgraph::replication_coordination_glue::DatabaseHistory history_3{ + .db_uuid = db_uuid, .history = db_histories_different, .name = default_name}; + + memgraph::replication_coordination_glue::DatabaseHistories instance_3_db_histories_{history_3}; + instance_database_histories.emplace_back("instance_3", instance_3_db_histories_); + + db_histories_different.back().second = 4; + memgraph::replication_coordination_glue::DatabaseHistory history_2{ + .db_uuid = db_uuid, .history = db_histories_different, .name = default_name}; + + memgraph::replication_coordination_glue::DatabaseHistories instance_2_db_histories_{history_2}; + instance_database_histories.emplace_back("instance_2", instance_2_db_histories_); + + memgraph::coordination::CoordinatorInstance instance; + auto [instance_name, latest_epoch, latest_commit_timestamp] = + instance.ChooseMostUpToDateInstance(instance_database_histories); + + ASSERT_TRUE(instance_name == "instance_3"); + ASSERT_TRUE(latest_epoch == std::string(newest_different_epoch)); + ASSERT_TRUE(latest_commit_timestamp == oldest_commit_timestamp); +} diff --git a/tests/unit/coordinator_cluster_state.cpp b/tests/unit/coordinator_cluster_state.cpp new file mode 100644 index 000000000..8df2797f2 --- /dev/null +++ b/tests/unit/coordinator_cluster_state.cpp @@ -0,0 +1,163 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "nuraft/coordinator_cluster_state.hpp" +#include "nuraft/coordinator_state_machine.hpp" +#include "replication_coordination_glue/role.hpp" + +#include "utils/file.hpp" + +#include <gflags/gflags.h> +#include <gtest/gtest.h> +#include "json/json.hpp" + +#include "libnuraft/nuraft.hxx" + +using memgraph::coordination::CoordinatorClientConfig; +using memgraph::coordination::CoordinatorClusterState; +using memgraph::coordination::CoordinatorStateMachine; +using memgraph::coordination::InstanceState; +using memgraph::coordination::RaftLogAction; +using memgraph::replication_coordination_glue::ReplicationMode; +using memgraph::replication_coordination_glue::ReplicationRole; +using nuraft::buffer; +using nuraft::buffer_serializer; +using nuraft::ptr; + +class CoordinatorClusterStateTest : public ::testing::Test { + protected: + void SetUp() override {} + + void TearDown() override {} + + std::filesystem::path test_folder_{std::filesystem::temp_directory_path() / + "MG_tests_unit_coordinator_cluster_state"}; +}; + +TEST_F(CoordinatorClusterStateTest, InstanceStateSerialization) { + InstanceState instance_state{ + CoordinatorClientConfig{"instance3", + "127.0.0.1", + 10112, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, + .ssl = std::nullopt}, + ReplicationRole::MAIN}; + + nlohmann::json j = instance_state; + InstanceState deserialized_instance_state = j.get<InstanceState>(); + + EXPECT_EQ(instance_state.config, deserialized_instance_state.config); + EXPECT_EQ(instance_state.status, deserialized_instance_state.status); +} + +TEST_F(CoordinatorClusterStateTest, DoActionRegisterInstances) { + auto coordinator_cluster_state = memgraph::coordination::CoordinatorClusterState{}; + + { + CoordinatorClientConfig config{"instance1", + "127.0.0.1", + 10111, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, + .ssl = std::nullopt}; + + auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + + coordinator_cluster_state.DoAction(payload, action); + } + { + CoordinatorClientConfig config{"instance2", + "127.0.0.1", + 10112, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10002}, + .ssl = std::nullopt}; + + auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + + coordinator_cluster_state.DoAction(payload, action); + } + { + CoordinatorClientConfig config{"instance3", + "127.0.0.1", + 10113, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10003}, + .ssl = std::nullopt}; + + auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + + coordinator_cluster_state.DoAction(payload, action); + } + { + CoordinatorClientConfig config{"instance4", + "127.0.0.1", + 10114, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10004}, + .ssl = std::nullopt}; + + auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + + coordinator_cluster_state.DoAction(payload, action); + } + { + CoordinatorClientConfig config{"instance5", + "127.0.0.1", + 10115, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10005}, + .ssl = std::nullopt}; + + auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + + coordinator_cluster_state.DoAction(payload, action); + } + { + CoordinatorClientConfig config{"instance6", + "127.0.0.1", + 10116, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10006}, + .ssl = std::nullopt}; + + auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + + coordinator_cluster_state.DoAction(payload, action); + } + + ptr<buffer> data; + coordinator_cluster_state.Serialize(data); + + auto deserialized_coordinator_cluster_state = CoordinatorClusterState::Deserialize(*data); + ASSERT_EQ(coordinator_cluster_state.GetInstances(), deserialized_coordinator_cluster_state.GetInstances()); +} diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 63cca3aa4..33e3af2e3 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -2633,15 +2633,99 @@ TEST_P(CypherMainVisitorTest, TestRegisterReplicationQuery) { } #ifdef MG_ENTERPRISE + +TEST_P(CypherMainVisitorTest, TestRegisterSyncInstance) { + auto &ast_generator = *GetParam(); + + std::string const sync_instance = R"(REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": "127.0.0.1:7688", + "replication_server": "127.0.0.1:10001", "management_server": "127.0.0.1:10011" + })"; + + auto *parsed_query = dynamic_cast<CoordinatorQuery *>(ast_generator.ParseQuery(sync_instance)); + + EXPECT_EQ(parsed_query->action_, CoordinatorQuery::Action::REGISTER_INSTANCE); + EXPECT_EQ(parsed_query->sync_mode_, CoordinatorQuery::SyncMode::SYNC); + + auto const evaluate_config_map = [&ast_generator](std::unordered_map<Expression *, Expression *> const &config_map) + -> std::unordered_map<std::string, std::string> { + auto const expr_to_str = [&ast_generator](Expression *expression) { + return std::string{ast_generator.GetLiteral(expression, ast_generator.context_.is_query_cached).ValueString()}; + }; + + return ranges::views::transform(config_map, + [&expr_to_str](auto const &expr_pair) { + return std::pair{expr_to_str(expr_pair.first), expr_to_str(expr_pair.second)}; + }) | + ranges::to<std::unordered_map<std::string, std::string>>; + }; + + auto const config_map = evaluate_config_map(parsed_query->configs_); + ASSERT_EQ(config_map.size(), 3); + EXPECT_EQ(config_map.at("bolt_server"), "127.0.0.1:7688"); + EXPECT_EQ(config_map.at("management_server"), "127.0.0.1:10011"); + EXPECT_EQ(config_map.at("replication_server"), "127.0.0.1:10001"); +} + +TEST_P(CypherMainVisitorTest, TestRegisterAsyncInstance) { + auto &ast_generator = *GetParam(); + + std::string const async_instance = + R"(REGISTER INSTANCE instance_1 AS ASYNC WITH CONFIG {"bolt_server": "127.0.0.1:7688", + "replication_server": "127.0.0.1:10001", + "management_server": "127.0.0.1:10011"})"; + + auto *parsed_query = dynamic_cast<CoordinatorQuery *>(ast_generator.ParseQuery(async_instance)); + + EXPECT_EQ(parsed_query->action_, CoordinatorQuery::Action::REGISTER_INSTANCE); + EXPECT_EQ(parsed_query->sync_mode_, CoordinatorQuery::SyncMode::ASYNC); + + auto const evaluate_config_map = [&ast_generator](std::unordered_map<Expression *, Expression *> const &config_map) + -> std::map<std::string, std::string, std::less<>> { + auto const expr_to_str = [&ast_generator](Expression *expression) { + return std::string{ast_generator.GetLiteral(expression, ast_generator.context_.is_query_cached).ValueString()}; + }; + + return ranges::views::transform(config_map, + [&expr_to_str](auto const &expr_pair) { + return std::pair{expr_to_str(expr_pair.first), expr_to_str(expr_pair.second)}; + }) | + ranges::to<std::map<std::string, std::string, std::less<>>>; + }; + + auto const config_map = evaluate_config_map(parsed_query->configs_); + ASSERT_EQ(config_map.size(), 3); + EXPECT_EQ(config_map.find(memgraph::query::kBoltServer)->second, "127.0.0.1:7688"); + EXPECT_EQ(config_map.find(memgraph::query::kManagementServer)->second, "127.0.0.1:10011"); + EXPECT_EQ(config_map.find(memgraph::query::kReplicationServer)->second, "127.0.0.1:10001"); +} + TEST_P(CypherMainVisitorTest, TestAddCoordinatorInstance) { auto &ast_generator = *GetParam(); - std::string const correct_query = R"(ADD COORDINATOR 1 ON "127.0.0.1:10111")"; + std::string const correct_query = + R"(ADD COORDINATOR 1 WITH CONFIG {"bolt_server": "127.0.0.1:7688", "coordinator_server": "127.0.0.1:10111"})"; auto *parsed_query = dynamic_cast<CoordinatorQuery *>(ast_generator.ParseQuery(correct_query)); EXPECT_EQ(parsed_query->action_, CoordinatorQuery::Action::ADD_COORDINATOR_INSTANCE); - ast_generator.CheckLiteral(parsed_query->raft_socket_address_, TypedValue("127.0.0.1:10111")); - ast_generator.CheckLiteral(parsed_query->raft_server_id_, TypedValue(1)); + ast_generator.CheckLiteral(parsed_query->coordinator_server_id_, TypedValue(1)); + + auto const evaluate_config_map = [&ast_generator](std::unordered_map<Expression *, Expression *> const &config_map) + -> std::map<std::string, std::string, std::less<>> { + auto const expr_to_str = [&ast_generator](Expression *expression) { + return std::string{ast_generator.GetLiteral(expression, ast_generator.context_.is_query_cached).ValueString()}; + }; + + return ranges::views::transform(config_map, + [&expr_to_str](auto const &expr_pair) { + return std::pair{expr_to_str(expr_pair.first), expr_to_str(expr_pair.second)}; + }) | + ranges::to<std::map<std::string, std::string, std::less<>>>; + }; + + auto const config_map = evaluate_config_map(parsed_query->configs_); + ASSERT_EQ(config_map.size(), 2); + EXPECT_EQ(config_map.find(kBoltServer)->second, "127.0.0.1:7688"); + EXPECT_EQ(config_map.find(kCoordinatorServer)->second, "127.0.0.1:10111"); } #endif @@ -4624,3 +4708,101 @@ TEST_P(CypherMainVisitorTest, CallSubquery) { ASSERT_TRUE(nested_match); } } + +TEST_P(CypherMainVisitorTest, PatternComprehension) { + auto &ast_generator = *GetParam(); + { + const auto *query = + dynamic_cast<CypherQuery *>(ast_generator.ParseQuery("MATCH (n) RETURN [(n)-->(b) | b.val] AS res;")); + const auto *ret = dynamic_cast<Return *>(query->single_query_->clauses_[1]); + + const auto *pc = dynamic_cast<PatternComprehension *>(ret->body_.named_expressions[0]->expression_); + ASSERT_TRUE(pc); + + // Check for variable_ + EXPECT_EQ(pc->variable_, nullptr); + + // Check for pattern_ + const auto pattern = pc->pattern_; + ASSERT_TRUE(pattern->atoms_.size() == 3); + + const auto *node1 = dynamic_cast<NodeAtom *>(pattern->atoms_[0]); + const auto *edge = dynamic_cast<EdgeAtom *>(pattern->atoms_[1]); + const auto *node2 = dynamic_cast<NodeAtom *>(pattern->atoms_[2]); + + ASSERT_TRUE(node1); + ASSERT_TRUE(edge); + ASSERT_TRUE(node2); + + // Check for filter_ + EXPECT_EQ(pc->filter_, nullptr); + + // Check for resultExpr_ + const auto *result_expr = pc->resultExpr_; + ASSERT_TRUE(result_expr); + } + { + const auto *query = dynamic_cast<CypherQuery *>( + ast_generator.ParseQuery("MATCH (n) RETURN [(n)-->(b) WHERE b.id=1 | b.val] AS res;")); + const auto *ret = dynamic_cast<Return *>(query->single_query_->clauses_[1]); + + const auto *pc = dynamic_cast<PatternComprehension *>(ret->body_.named_expressions[0]->expression_); + ASSERT_TRUE(pc); + + // Check for variable_ + EXPECT_EQ(pc->variable_, nullptr); + + // Check for pattern_ + const auto pattern = pc->pattern_; + ASSERT_TRUE(pattern->atoms_.size() == 3); + + const auto *node1 = dynamic_cast<NodeAtom *>(pattern->atoms_[0]); + const auto *edge = dynamic_cast<EdgeAtom *>(pattern->atoms_[1]); + const auto *node2 = dynamic_cast<NodeAtom *>(pattern->atoms_[2]); + + ASSERT_TRUE(node1); + ASSERT_TRUE(edge); + ASSERT_TRUE(node2); + + // Check for filter_ + const auto *filter = pc->filter_; + ASSERT_TRUE(filter); + ASSERT_TRUE(filter->expression_); + + // Check for resultExpr_ + const auto *result_expr = pc->resultExpr_; + ASSERT_TRUE(result_expr); + } + { + const auto *query = dynamic_cast<CypherQuery *>( + ast_generator.ParseQuery("MATCH (n) RETURN [p = (n)-->(b) WHERE b.id=1 | b.val] AS res;")); + const auto *ret = dynamic_cast<Return *>(query->single_query_->clauses_[1]); + + const auto *pc = dynamic_cast<PatternComprehension *>(ret->body_.named_expressions[0]->expression_); + ASSERT_TRUE(pc); + + // Check for variable_ + ASSERT_TRUE(pc->variable_); + + // Check for pattern_ + const auto pattern = pc->pattern_; + ASSERT_TRUE(pattern->atoms_.size() == 3); + + const auto *node1 = dynamic_cast<NodeAtom *>(pattern->atoms_[0]); + const auto *edge = dynamic_cast<EdgeAtom *>(pattern->atoms_[1]); + const auto *node2 = dynamic_cast<NodeAtom *>(pattern->atoms_[2]); + + ASSERT_TRUE(node1); + ASSERT_TRUE(edge); + ASSERT_TRUE(node2); + + // Check for filter_ + const auto *filter = pc->filter_; + ASSERT_TRUE(filter); + ASSERT_TRUE(filter->expression_); + + // Check for resultExpr_ + const auto *result_expr = pc->resultExpr_; + ASSERT_TRUE(result_expr); + } +} diff --git a/tests/unit/database_get_info.cpp b/tests/unit/database_get_info.cpp index be6885b37..2547378c6 100644 --- a/tests/unit/database_get_info.cpp +++ b/tests/unit/database_get_info.cpp @@ -197,8 +197,8 @@ TYPED_TEST(InfoTest, InfoCheck) { ASSERT_EQ(info.storage_info.vertex_count, 5); ASSERT_EQ(info.storage_info.edge_count, 2); ASSERT_EQ(info.storage_info.average_degree, 0.8); - ASSERT_GT(info.storage_info.memory_res, 10'000'000); // 250MB < > 10MB - ASSERT_LT(info.storage_info.memory_res, 250'000'000); + ASSERT_GT(info.storage_info.memory_res, 10'000'000); // 300MB < > 10MB + ASSERT_LT(info.storage_info.memory_res, 300'000'000); ASSERT_GT(info.storage_info.disk_usage, 100); // 1MB < > 100B ASSERT_LT(info.storage_info.disk_usage, 1000'000); ASSERT_EQ(info.storage_info.label_indices, 1); diff --git a/tests/unit/dbms_database.cpp b/tests/unit/dbms_database.cpp index 535c0c055..0fded2324 100644 --- a/tests/unit/dbms_database.cpp +++ b/tests/unit/dbms_database.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source diff --git a/tests/unit/plan_pretty_print.cpp b/tests/unit/plan_pretty_print.cpp index ef2395931..0bc7b35cf 100644 --- a/tests/unit/plan_pretty_print.cpp +++ b/tests/unit/plan_pretty_print.cpp @@ -12,6 +12,7 @@ #include <gtest/gtest.h> #include "disk_test_utils.hpp" +#include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol_table.hpp" #include "query/plan/operator.hpp" #include "query/plan/pretty_print.hpp" @@ -515,7 +516,7 @@ TYPED_TEST(PrintToJsonTest, SetLabels) { std::shared_ptr<LogicalOperator> last_op = std::make_shared<ScanAll>(nullptr, node_sym); last_op = std::make_shared<plan::SetLabels>( last_op, node_sym, - std::vector<memgraph::storage::LabelId>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector<StorageLabelType>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); this->Check(last_op.get(), R"( { @@ -554,7 +555,7 @@ TYPED_TEST(PrintToJsonTest, RemoveLabels) { std::shared_ptr<LogicalOperator> last_op = std::make_shared<ScanAll>(nullptr, node_sym); last_op = std::make_shared<plan::RemoveLabels>( last_op, node_sym, - std::vector<memgraph::storage::LabelId>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector<StorageLabelType>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); this->Check(last_op.get(), R"( { diff --git a/tests/unit/property_value_v2.cpp b/tests/unit/property_value_v2.cpp index aba322ce7..28937598e 100644 --- a/tests/unit/property_value_v2.cpp +++ b/tests/unit/property_value_v2.cpp @@ -1,4 +1,4 @@ -// Copyright 2022 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -570,7 +570,6 @@ TEST(PropertyValue, MoveConstructor) { for (auto &item : data) { memgraph::storage::PropertyValue copy(item); memgraph::storage::PropertyValue pv(std::move(item)); - ASSERT_EQ(item.type(), memgraph::storage::PropertyValue::Type::Null); ASSERT_EQ(pv.type(), copy.type()); switch (copy.type()) { case memgraph::storage::PropertyValue::Type::Null: @@ -668,7 +667,6 @@ TEST(PropertyValue, MoveAssignment) { memgraph::storage::PropertyValue copy(item); memgraph::storage::PropertyValue pv(123); pv = std::move(item); - ASSERT_EQ(item.type(), memgraph::storage::PropertyValue::Type::Null); ASSERT_EQ(pv.type(), copy.type()); switch (copy.type()) { case memgraph::storage::PropertyValue::Type::Null: diff --git a/tests/unit/query_common.hpp b/tests/unit/query_common.hpp index a14ef2d30..6f9b1260a 100644 --- a/tests/unit/query_common.hpp +++ b/tests/unit/query_common.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -425,7 +425,7 @@ auto GetSet(AstStorage &storage, const std::string &name, Expression *expr, bool /// Create a set labels clause for given identifier name and labels. auto GetSet(AstStorage &storage, const std::string &name, std::vector<std::string> label_names) { - std::vector<LabelIx> labels; + std::vector<QueryLabelType> labels; labels.reserve(label_names.size()); for (const auto &label : label_names) { labels.push_back(storage.GetLabelIx(label)); @@ -438,7 +438,7 @@ auto GetRemove(AstStorage &storage, PropertyLookup *prop_lookup) { return storag /// Create a remove labels clause for given identifier name and labels. auto GetRemove(AstStorage &storage, const std::string &name, std::vector<std::string> label_names) { - std::vector<LabelIx> labels; + std::vector<QueryLabelType> labels; labels.reserve(label_names.size()); for (const auto &label : label_names) { labels.push_back(storage.GetLabelIx(label)); @@ -635,3 +635,5 @@ auto GetForeach(AstStorage &storage, NamedExpression *named_expr, const std::vec #define DROP_USER(usernames) storage.Create<memgraph::query::DropUser>((usernames)) #define CALL_PROCEDURE(...) memgraph::query::test_common::GetCallProcedure(storage, __VA_ARGS__) #define CALL_SUBQUERY(...) memgraph::query::test_common::GetCallSubquery(this->storage, __VA_ARGS__) +#define PATTERN_COMPREHENSION(variable, pattern, filter, resultExpr) \ + this->storage.template Create<memgraph::query::PatternComprehension>(variable, pattern, filter, resultExpr) diff --git a/tests/unit/query_plan.cpp b/tests/unit/query_plan.cpp index bc4b2660c..5b574c1ff 100644 --- a/tests/unit/query_plan.cpp +++ b/tests/unit/query_plan.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -808,13 +808,68 @@ TYPED_TEST(TestPlanner, MatchWhereBeforeExpand) { CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectFilter(), ExpectExpand(), ExpectProduce()); } +TYPED_TEST(TestPlanner, MatchEdgeTypeIndex) { + FakeDbAccessor dba; + auto indexed_edge_type = dba.EdgeType("indexed_edgetype"); + dba.SetIndexCount(indexed_edge_type, 1); + { + // Test MATCH ()-[r:indexed_edgetype]->() RETURN r; + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("anon1"), EDGE("r", memgraph::query::EdgeAtom::Direction::OUT, {"indexed_edgetype"}), + NODE("anon2"))), + RETURN("r"))); + auto symbol_table = memgraph::query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, this->storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAllByEdgeType(), ExpectProduce()); + } + { + // Test MATCH (a)-[r:indexed_edgetype]->() RETURN r; + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("a"), EDGE("r", memgraph::query::EdgeAtom::Direction::OUT, {"indexed_edgetype"}), + NODE("anon2"))), + RETURN("r"))); + auto symbol_table = memgraph::query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, this->storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectExpand(), ExpectProduce()); + } + { + // Test MATCH ()-[r:indexed_edgetype]->(b) RETURN r; + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("anon1"), EDGE("r", memgraph::query::EdgeAtom::Direction::OUT, {"indexed_edgetype"}), + NODE("b"))), + RETURN("r"))); + auto symbol_table = memgraph::query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, this->storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectExpand(), ExpectProduce()); + } + { + // Test MATCH (a)-[r:indexed_edgetype]->(b) RETURN r; + auto *query = QUERY(SINGLE_QUERY( + MATCH( + PATTERN(NODE("a"), EDGE("r", memgraph::query::EdgeAtom::Direction::OUT, {"indexed_edgetype"}), NODE("b"))), + RETURN("r"))); + auto symbol_table = memgraph::query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, this->storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectExpand(), ExpectProduce()); + } + { + // Test MATCH ()-[r:not_indexed_edgetype]->() RETURN r; + auto *query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("anon1"), EDGE("r", memgraph::query::EdgeAtom::Direction::OUT, {"not_indexed_edgetype"}), + NODE("anon2"))), + RETURN("r"))); + auto symbol_table = memgraph::query::MakeSymbolTable(query); + auto planner = MakePlanner<TypeParam>(&dba, this->storage, symbol_table, query); + CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectExpand(), ExpectProduce()); + } +} + TYPED_TEST(TestPlanner, MatchFilterPropIsNotNull) { FakeDbAccessor dba; auto label = dba.Label("label"); auto prop = PROPERTY_PAIR(dba, "prop"); dba.SetIndexCount(label, 1); dba.SetIndexCount(label, prop.second, 1); - { // Test MATCH (n :label) -[r]- (m) WHERE n.prop IS NOT NULL RETURN n auto *query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("n", "label"), EDGE("r"), NODE("m"))), diff --git a/tests/unit/query_plan_checker.hpp b/tests/unit/query_plan_checker.hpp index 92089eb82..6eef3841a 100644 --- a/tests/unit/query_plan_checker.hpp +++ b/tests/unit/query_plan_checker.hpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -65,6 +65,7 @@ class PlanChecker : public virtual HierarchicalLogicalOperatorVisitor { PRE_VISIT(ScanAllByLabelPropertyValue); PRE_VISIT(ScanAllByLabelPropertyRange); PRE_VISIT(ScanAllByLabelProperty); + PRE_VISIT(ScanAllByEdgeType); PRE_VISIT(ScanAllById); PRE_VISIT(Expand); PRE_VISIT(ExpandVariable); @@ -170,6 +171,7 @@ using ExpectCreateExpand = OpChecker<CreateExpand>; using ExpectDelete = OpChecker<Delete>; using ExpectScanAll = OpChecker<ScanAll>; using ExpectScanAllByLabel = OpChecker<ScanAllByLabel>; +using ExpectScanAllByEdgeType = OpChecker<ScanAllByEdgeType>; using ExpectScanAllById = OpChecker<ScanAllById>; using ExpectExpand = OpChecker<Expand>; using ExpectConstructNamedPath = OpChecker<ConstructNamedPath>; @@ -560,6 +562,12 @@ class FakeDbAccessor { return 0; } + int64_t EdgesCount(memgraph::storage::EdgeTypeId edge_type) const { + auto found = edge_type_index_.find(edge_type); + if (found != edge_type_index_.end()) return found->second; + return 0; + } + bool LabelIndexExists(memgraph::storage::LabelId label) const { return label_index_.find(label) != label_index_.end(); } @@ -573,6 +581,10 @@ class FakeDbAccessor { return false; } + bool EdgeTypeIndexExists(memgraph::storage::EdgeTypeId edge_type) const { + return edge_type_index_.find(edge_type) != edge_type_index_.end(); + } + std::optional<memgraph::storage::LabelPropertyIndexStats> GetIndexStats( const memgraph::storage::LabelId label, const memgraph::storage::PropertyId property) const { return memgraph::storage::LabelPropertyIndexStats{.statistic = 0, .avg_group_size = 1}; // unique id @@ -594,6 +606,8 @@ class FakeDbAccessor { label_property_index_.emplace_back(label, property, count); } + void SetIndexCount(memgraph::storage::EdgeTypeId edge_type, int64_t count) { edge_type_index_[edge_type] = count; } + memgraph::storage::LabelId NameToLabel(const std::string &name) { auto found = labels_.find(name); if (found != labels_.end()) return found->second; @@ -608,6 +622,8 @@ class FakeDbAccessor { return edge_types_.emplace(name, memgraph::storage::EdgeTypeId::FromUint(edge_types_.size())).first->second; } + memgraph::storage::EdgeTypeId EdgeType(const std::string &name) { return NameToEdgeType(name); } + memgraph::storage::PropertyId NameToProperty(const std::string &name) { auto found = properties_.find(name); if (found != properties_.end()) return found->second; @@ -632,6 +648,7 @@ class FakeDbAccessor { std::unordered_map<memgraph::storage::LabelId, int64_t> label_index_; std::vector<std::tuple<memgraph::storage::LabelId, memgraph::storage::PropertyId, int64_t>> label_property_index_; + std::unordered_map<memgraph::storage::EdgeTypeId, int64_t> edge_type_index_; }; } // namespace memgraph::query::plan diff --git a/tests/unit/query_plan_create_set_remove_delete.cpp b/tests/unit/query_plan_create_set_remove_delete.cpp index 1fa400940..b32fa91b1 100644 --- a/tests/unit/query_plan_create_set_remove_delete.cpp +++ b/tests/unit/query_plan_create_set_remove_delete.cpp @@ -497,7 +497,7 @@ class MatchCreateNodeWithAuthFixture : public QueryPlanTest<StorageType> { NodeCreationInfo m{}; m.symbol = symbol_table.CreateSymbol("m", true); - std::vector<memgraph::storage::LabelId> labels{dba.NameToLabel("l2")}; + std::vector<StorageLabelType> labels{dba.NameToLabel("l2")}; m.labels = labels; // creation op auto create_node = std::make_shared<CreateNode>(n_scan_all.op_, m); @@ -627,7 +627,7 @@ class MatchCreateExpandWithAuthFixture : public QueryPlanTest<StorageType> { // data for the second node NodeCreationInfo m; m.symbol = cycle ? n_scan_all.sym_ : symbol_table.CreateSymbol("m", true); - std::vector<memgraph::storage::LabelId> labels{dba.NameToLabel("l2")}; + std::vector<StorageLabelType> labels{dba.NameToLabel("l2")}; m.labels = labels; EdgeCreationInfo r; @@ -1231,12 +1231,14 @@ TYPED_TEST(QueryPlanTest, SetLabels) { ASSERT_TRUE(dba.InsertVertex().AddLabel(label1).HasValue()); ASSERT_TRUE(dba.InsertVertex().AddLabel(label1).HasValue()); dba.AdvanceCommand(); + std::vector<StorageLabelType> labels; + labels.emplace_back(label2); + labels.emplace_back(label3); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); - auto label_set = - std::make_shared<plan::SetLabels>(n.op_, n.sym_, std::vector<memgraph::storage::LabelId>{label2, label3}); + auto label_set = std::make_shared<plan::SetLabels>(n.op_, n.sym_, labels); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(2, PullAll(*label_set, &context)); @@ -1255,12 +1257,14 @@ TYPED_TEST(QueryPlanTest, SetLabelsWithFineGrained) { ASSERT_TRUE(dba.InsertVertex().AddLabel(labels[0]).HasValue()); ASSERT_TRUE(dba.InsertVertex().AddLabel(labels[0]).HasValue()); dba.AdvanceCommand(); + std::vector<StorageLabelType> labels_variant; + labels_variant.emplace_back(labels[1]); + labels_variant.emplace_back(labels[2]); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); - auto label_set = - std::make_shared<plan::SetLabels>(n.op_, n.sym_, std::vector<memgraph::storage::LabelId>{labels[1], labels[2]}); + auto label_set = std::make_shared<plan::SetLabels>(n.op_, n.sym_, labels_variant); memgraph::glue::FineGrainedAuthChecker auth_checker{user, &dba}; auto context = MakeContextWithFineGrainedChecker(this->storage, symbol_table, &dba, &auth_checker); @@ -1396,12 +1400,14 @@ TYPED_TEST(QueryPlanTest, RemoveLabels) { ASSERT_TRUE(v2.AddLabel(label1).HasValue()); ASSERT_TRUE(v2.AddLabel(label3).HasValue()); dba.AdvanceCommand(); + std::vector<StorageLabelType> labels; + labels.emplace_back(label1); + labels.emplace_back(label2); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); - auto label_remove = - std::make_shared<plan::RemoveLabels>(n.op_, n.sym_, std::vector<memgraph::storage::LabelId>{label1, label2}); + auto label_remove = std::make_shared<plan::RemoveLabels>(n.op_, n.sym_, labels); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(2, PullAll(*label_remove, &context)); @@ -1425,12 +1431,14 @@ TYPED_TEST(QueryPlanTest, RemoveLabelsFineGrainedFiltering) { ASSERT_TRUE(v2.AddLabel(labels[0]).HasValue()); ASSERT_TRUE(v2.AddLabel(labels[2]).HasValue()); dba.AdvanceCommand(); + std::vector<StorageLabelType> labels_variant; + labels_variant.emplace_back(labels[0]); + labels_variant.emplace_back(labels[1]); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); - auto label_remove = std::make_shared<plan::RemoveLabels>( - n.op_, n.sym_, std::vector<memgraph::storage::LabelId>{labels[0], labels[1]}); + auto label_remove = std::make_shared<plan::RemoveLabels>(n.op_, n.sym_, labels_variant); memgraph::glue::FineGrainedAuthChecker auth_checker{user, &dba}; auto context = MakeContextWithFineGrainedChecker(this->storage, symbol_table, &dba, &auth_checker); @@ -1569,15 +1577,16 @@ TYPED_TEST(QueryPlanTest, SetRemove) { auto label1 = dba.NameToLabel("label1"); auto label2 = dba.NameToLabel("label2"); dba.AdvanceCommand(); + std::vector<StorageLabelType> labels; + labels.emplace_back(label1); + labels.emplace_back(label2); // Create operations which match (v) and set and remove v :label. // The expected result is single (v) as it was at the start. SymbolTable symbol_table; // MATCH (n) SET n :label1 :label2 REMOVE n :label1 :label2 auto scan_all = MakeScanAll(this->storage, symbol_table, "n"); - auto set = std::make_shared<plan::SetLabels>(scan_all.op_, scan_all.sym_, - std::vector<memgraph::storage::LabelId>{label1, label2}); - auto rem = - std::make_shared<plan::RemoveLabels>(set, scan_all.sym_, std::vector<memgraph::storage::LabelId>{label1, label2}); + auto set = std::make_shared<plan::SetLabels>(scan_all.op_, scan_all.sym_, labels); + auto rem = std::make_shared<plan::RemoveLabels>(set, scan_all.sym_, labels); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(1, PullAll(*rem, &context)); dba.AdvanceCommand(); @@ -1773,10 +1782,12 @@ TYPED_TEST(QueryPlanTest, SetLabelsOnNull) { auto storage_dba = this->db->Access(ReplicationRole::MAIN); memgraph::query::DbAccessor dba(storage_dba.get()); auto label = dba.NameToLabel("label"); + std::vector<StorageLabelType> labels; + labels.emplace_back(label); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); auto optional = std::make_shared<plan::Optional>(nullptr, n.op_, std::vector<Symbol>{n.sym_}); - auto set_op = std::make_shared<plan::SetLabels>(optional, n.sym_, std::vector<memgraph::storage::LabelId>{label}); + auto set_op = std::make_shared<plan::SetLabels>(optional, n.sym_, labels); EXPECT_EQ(0, CountIterable(dba.Vertices(memgraph::storage::View::OLD))); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(1, PullAll(*set_op, &context)); @@ -1801,11 +1812,12 @@ TYPED_TEST(QueryPlanTest, RemoveLabelsOnNull) { auto storage_dba = this->db->Access(ReplicationRole::MAIN); memgraph::query::DbAccessor dba(storage_dba.get()); auto label = dba.NameToLabel("label"); + std::vector<StorageLabelType> labels; + labels.emplace_back(label); SymbolTable symbol_table; auto n = MakeScanAll(this->storage, symbol_table, "n"); auto optional = std::make_shared<plan::Optional>(nullptr, n.op_, std::vector<Symbol>{n.sym_}); - auto remove_op = - std::make_shared<plan::RemoveLabels>(optional, n.sym_, std::vector<memgraph::storage::LabelId>{label}); + auto remove_op = std::make_shared<plan::RemoveLabels>(optional, n.sym_, labels); EXPECT_EQ(0, CountIterable(dba.Vertices(memgraph::storage::View::OLD))); auto context = MakeContext(this->storage, symbol_table, &dba); EXPECT_EQ(1, PullAll(*remove_op, &context)); @@ -1906,7 +1918,7 @@ TYPED_TEST(QueryPlanTest, DeleteRemoveLabels) { auto n = MakeScanAll(this->storage, symbol_table, "n"); auto n_get = this->storage.template Create<Identifier>("n")->MapTo(n.sym_); auto delete_op = std::make_shared<plan::Delete>(n.op_, std::vector<Expression *>{n_get}, false); - std::vector<memgraph::storage::LabelId> labels{dba.NameToLabel("label")}; + std::vector<StorageLabelType> labels{dba.NameToLabel("label")}; auto rem_op = std::make_shared<plan::RemoveLabels>(delete_op, n.sym_, labels); auto accumulate_op = std::make_shared<plan::Accumulate>(rem_op, rem_op->ModifiedSymbols(symbol_table), true); diff --git a/tests/unit/query_plan_match_filter_return.cpp b/tests/unit/query_plan_match_filter_return.cpp index d5468b6b5..925c90c3f 100644 --- a/tests/unit/query_plan_match_filter_return.cpp +++ b/tests/unit/query_plan_match_filter_return.cpp @@ -315,11 +315,12 @@ TYPED_TEST(QueryPlan, NodeFilterLabelsAndProperties) { // make a scan all auto n = MakeScanAll(this->storage, symbol_table, "n"); - n.node_->labels_.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label))); + std::vector<memgraph::query::LabelIx> labels; + labels.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label))); std::get<0>(n.node_->properties_)[this->storage.GetPropertyIx(property.first)] = LITERAL(42); // node filtering - auto *filter_expr = AND(this->storage.template Create<LabelsTest>(n.node_->identifier_, n.node_->labels_), + auto *filter_expr = AND(this->storage.template Create<LabelsTest>(n.node_->identifier_, labels), EQ(PROPERTY_LOOKUP(dba, n.node_->identifier_, property), LITERAL(42))); auto node_filter = std::make_shared<Filter>(n.op_, std::vector<std::shared_ptr<LogicalOperator>>{}, filter_expr); @@ -366,11 +367,12 @@ TYPED_TEST(QueryPlan, NodeFilterMultipleLabels) { // make a scan all auto n = MakeScanAll(this->storage, symbol_table, "n"); - n.node_->labels_.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label1))); - n.node_->labels_.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label2))); + std::vector<memgraph::query::LabelIx> labels; + labels.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label1))); + labels.emplace_back(this->storage.GetLabelIx(dba.LabelToName(label2))); // node filtering - auto *filter_expr = this->storage.template Create<LabelsTest>(n.node_->identifier_, n.node_->labels_); + auto *filter_expr = this->storage.template Create<LabelsTest>(n.node_->identifier_, labels); auto node_filter = std::make_shared<Filter>(n.op_, std::vector<std::shared_ptr<LogicalOperator>>{}, filter_expr); // make a named expression and a produce @@ -2805,9 +2807,10 @@ TYPED_TEST(QueryPlan, OptionalMatchThenExpandToMissingNode) { // OPTIONAL MATCH (n :missing) auto n = MakeScanAll(this->storage, symbol_table, "n"); auto label_missing = "missing"; - n.node_->labels_.emplace_back(this->storage.GetLabelIx(label_missing)); + std::vector<memgraph::query::LabelIx> labels; + labels.emplace_back(this->storage.GetLabelIx(label_missing)); - auto *filter_expr = this->storage.template Create<LabelsTest>(n.node_->identifier_, n.node_->labels_); + auto *filter_expr = this->storage.template Create<LabelsTest>(n.node_->identifier_, labels); auto node_filter = std::make_shared<Filter>(n.op_, std::vector<std::shared_ptr<LogicalOperator>>{}, filter_expr); auto optional = std::make_shared<plan::Optional>(nullptr, node_filter, std::vector<Symbol>{n.sym_}); // WITH n @@ -3619,7 +3622,8 @@ class ExistsFixture : public testing::Test { exists_expression->MapTo(symbol_table.CreateAnonymousSymbol()); auto scan_all = MakeScanAll(storage, symbol_table, "n"); - scan_all.node_->labels_.emplace_back(storage.GetLabelIx(match_label)); + std::vector<memgraph::query::LabelIx> labels; + labels.emplace_back(storage.GetLabelIx(match_label)); std::shared_ptr<LogicalOperator> last_op = std::make_shared<Expand>( nullptr, scan_all.sym_, dest_sym, edge_sym, direction, edge_types, false, memgraph::storage::View::OLD); @@ -3656,8 +3660,7 @@ class ExistsFixture : public testing::Test { last_op = std::make_shared<Limit>(std::move(last_op), storage.Create<PrimitiveLiteral>(1)); last_op = std::make_shared<EvaluatePatternFilter>(std::move(last_op), symbol_table.at(*exists_expression)); - auto *total_expression = - AND(storage.Create<LabelsTest>(scan_all.node_->identifier_, scan_all.node_->labels_), exists_expression); + auto *total_expression = AND(storage.Create<LabelsTest>(scan_all.node_->identifier_, labels), exists_expression); auto filter = std::make_shared<Filter>(scan_all.op_, std::vector<std::shared_ptr<LogicalOperator>>{last_op}, total_expression); @@ -3709,7 +3712,8 @@ class ExistsFixture : public testing::Test { exists_expression2->MapTo(symbol_table.CreateAnonymousSymbol()); auto scan_all = MakeScanAll(storage, symbol_table, "n"); - scan_all.node_->labels_.emplace_back(storage.GetLabelIx(match_label)); + std::vector<memgraph::query::LabelIx> labels; + labels.emplace_back(storage.GetLabelIx(match_label)); std::shared_ptr<LogicalOperator> last_op = std::make_shared<Expand>( nullptr, scan_all.sym_, dest_sym, edge_sym, direction, first_edge_type, false, memgraph::storage::View::OLD); @@ -3721,7 +3725,7 @@ class ExistsFixture : public testing::Test { last_op2 = std::make_shared<Limit>(std::move(last_op2), storage.Create<PrimitiveLiteral>(1)); last_op2 = std::make_shared<EvaluatePatternFilter>(std::move(last_op2), symbol_table.at(*exists_expression2)); - Expression *total_expression = storage.Create<LabelsTest>(scan_all.node_->identifier_, scan_all.node_->labels_); + Expression *total_expression = storage.Create<LabelsTest>(scan_all.node_->identifier_, labels); if (or_flag) { total_expression = AND(total_expression, OR(exists_expression, exists_expression2)); @@ -3841,7 +3845,11 @@ TYPED_TEST(SubqueriesFeature, BasicCartesianWithFilter) { // MATCH (n) WHERE n.prop = 2 CALL { MATCH (m) RETURN m } RETURN n, m auto n = MakeScanAll(this->storage, this->symbol_table, "n"); - auto *filter_expr = AND(this->storage.template Create<LabelsTest>(n.node_->identifier_, n.node_->labels_), + std::vector<memgraph::query::LabelIx> labels; + for (const auto &label : n.node_->labels_) { + labels.emplace_back(std::get<memgraph::query::LabelIx>(label)); + } + auto *filter_expr = AND(this->storage.template Create<LabelsTest>(n.node_->identifier_, labels), EQ(PROPERTY_LOOKUP(this->dba, n.node_->identifier_, this->prop), LITERAL(2))); auto filter = std::make_shared<Filter>(n.op_, std::vector<std::shared_ptr<LogicalOperator>>{}, filter_expr); @@ -3866,11 +3874,15 @@ TYPED_TEST(SubqueriesFeature, BasicCartesianWithFilterInsideSubquery) { // MATCH (n) CALL { MATCH (m) WHERE m.prop = 2 RETURN m } RETURN n, m auto n = MakeScanAll(this->storage, this->symbol_table, "n"); + std::vector<memgraph::query::LabelIx> labels; + for (const auto &label : n.node_->labels_) { + labels.emplace_back(std::get<memgraph::query::LabelIx>(label)); + } auto return_n = NEXPR("n", IDENT("n")->MapTo(n.sym_))->MapTo(this->symbol_table.CreateSymbol("named_expression_1", true)); auto m = MakeScanAll(this->storage, this->symbol_table, "m"); - auto *filter_expr = AND(this->storage.template Create<LabelsTest>(n.node_->identifier_, n.node_->labels_), + auto *filter_expr = AND(this->storage.template Create<LabelsTest>(n.node_->identifier_, labels), EQ(PROPERTY_LOOKUP(this->dba, n.node_->identifier_, this->prop), LITERAL(2))); auto filter = std::make_shared<Filter>(m.op_, std::vector<std::shared_ptr<LogicalOperator>>{}, filter_expr); @@ -3891,7 +3903,11 @@ TYPED_TEST(SubqueriesFeature, BasicCartesianWithFilterNoResults) { // MATCH (n) WHERE n.prop = 3 CALL { MATCH (m) RETURN m } RETURN n, m auto n = MakeScanAll(this->storage, this->symbol_table, "n"); - auto *filter_expr = AND(this->storage.template Create<LabelsTest>(n.node_->identifier_, n.node_->labels_), + std::vector<memgraph::query::LabelIx> labels; + for (const auto &label : n.node_->labels_) { + labels.emplace_back(std::get<memgraph::query::LabelIx>(label)); + } + auto *filter_expr = AND(this->storage.template Create<LabelsTest>(n.node_->identifier_, labels), EQ(PROPERTY_LOOKUP(this->dba, n.node_->identifier_, this->prop), LITERAL(3))); auto filter = std::make_shared<Filter>(n.op_, std::vector<std::shared_ptr<LogicalOperator>>{}, filter_expr); diff --git a/tests/unit/query_plan_operator_to_string.cpp b/tests/unit/query_plan_operator_to_string.cpp index 9696050f2..d60d38251 100644 --- a/tests/unit/query_plan_operator_to_string.cpp +++ b/tests/unit/query_plan_operator_to_string.cpp @@ -290,9 +290,10 @@ TYPED_TEST(OperatorToStringTest, SetProperties) { TYPED_TEST(OperatorToStringTest, SetLabels) { auto node_sym = this->GetSymbol("node"); std::shared_ptr<LogicalOperator> last_op = std::make_shared<ScanAll>(nullptr, node_sym); - last_op = std::make_shared<plan::SetLabels>( - last_op, node_sym, - std::vector<memgraph::storage::LabelId>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector<StorageLabelType> labels; + labels.emplace_back(this->dba.NameToLabel("label1")); + labels.emplace_back(this->dba.NameToLabel("label2")); + last_op = std::make_shared<plan::SetLabels>(last_op, node_sym, labels); std::string expected_string{"SetLabels"}; EXPECT_EQ(last_op->ToString(), expected_string); @@ -311,9 +312,10 @@ TYPED_TEST(OperatorToStringTest, RemoveProperty) { TYPED_TEST(OperatorToStringTest, RemoveLabels) { auto node_sym = this->GetSymbol("node"); std::shared_ptr<LogicalOperator> last_op = std::make_shared<ScanAll>(nullptr, node_sym); - last_op = std::make_shared<plan::RemoveLabels>( - last_op, node_sym, - std::vector<memgraph::storage::LabelId>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector<StorageLabelType> labels; + labels.emplace_back(this->dba.NameToLabel("label1")); + labels.emplace_back(this->dba.NameToLabel("label2")); + last_op = std::make_shared<plan::RemoveLabels>(last_op, node_sym, labels); std::string expected_string{"RemoveLabels"}; EXPECT_EQ(last_op->ToString(), expected_string); diff --git a/tests/unit/query_plan_read_write_typecheck.cpp b/tests/unit/query_plan_read_write_typecheck.cpp index f9f14902b..a6af9a03e 100644 --- a/tests/unit/query_plan_read_write_typecheck.cpp +++ b/tests/unit/query_plan_read_write_typecheck.cpp @@ -183,10 +183,10 @@ TYPED_TEST(ReadWriteTypeCheckTest, SetRemovePropertiesLabels) { plan::SetProperties::Op::REPLACE); last_op = std::make_shared<plan::SetLabels>( last_op, node_sym, - std::vector<memgraph::storage::LabelId>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector<StorageLabelType>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); last_op = std::make_shared<plan::RemoveLabels>( last_op, node_sym, - std::vector<memgraph::storage::LabelId>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); + std::vector<StorageLabelType>{this->dba.NameToLabel("label1"), this->dba.NameToLabel("label2")}); this->CheckPlanType(last_op.get(), RWType::RW); } diff --git a/tests/unit/query_semantic.cpp b/tests/unit/query_semantic.cpp index c4bb966eb..50a52c828 100644 --- a/tests/unit/query_semantic.cpp +++ b/tests/unit/query_semantic.cpp @@ -1442,3 +1442,27 @@ TYPED_TEST(TestSymbolGenerator, PropertyCachingMixedLookups2) { ASSERT_TRUE(prop3_eval_mode == PropertyLookup::EvaluationMode::GET_ALL_PROPERTIES); ASSERT_TRUE(prop4_eval_mode == PropertyLookup::EvaluationMode::GET_ALL_PROPERTIES); } + +TYPED_TEST(TestSymbolGenerator, PatternComprehension) { + auto prop = this->dba.NameToProperty("prop"); + + // MATCH (n) RETURN [(n)-[edge]->(m) | m.prop] AS alias + auto query = QUERY(SINGLE_QUERY( + MATCH(PATTERN(NODE("n"))), + RETURN(NEXPR("alias", PATTERN_COMPREHENSION(nullptr, + PATTERN(NODE("n"), EDGE("edge", EdgeAtom::Direction::BOTH, {}, false), + NODE("m", std::nullopt, false)), + nullptr, PROPERTY_LOOKUP(this->dba, "m", prop)))))); + + auto symbol_table = MakeSymbolTable(query); + ASSERT_EQ(symbol_table.max_position(), 7); + + memgraph::query::plan::UsedSymbolsCollector collector(symbol_table); + auto *ret = dynamic_cast<Return *>(query->single_query_->clauses_[1]); + auto *pc = dynamic_cast<PatternComprehension *>(ret->body_.named_expressions[0]->expression_); + + pc->Accept(collector); + + // n, edge, m, Path + ASSERT_EQ(collector.symbols_.size(), 4); +} diff --git a/tests/unit/raft_log_serialization.cpp b/tests/unit/raft_log_serialization.cpp new file mode 100644 index 000000000..8550cf5b8 --- /dev/null +++ b/tests/unit/raft_log_serialization.cpp @@ -0,0 +1,151 @@ +// Copyright 2024 Memgraph Ltd. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +// License, and you may not use this file except in compliance with the Business Source License. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +#include "coordination/coordinator_config.hpp" +#include "nuraft/coordinator_state_machine.hpp" +#include "nuraft/raft_log_action.hpp" +#include "utils/file.hpp" +#include "utils/uuid.hpp" + +#include <gflags/gflags.h> +#include <gtest/gtest.h> +#include "json/json.hpp" + +using memgraph::coordination::CoordinatorClientConfig; +using memgraph::coordination::CoordinatorStateMachine; +using memgraph::coordination::RaftLogAction; +using memgraph::coordination::ReplClientInfo; +using memgraph::replication_coordination_glue::ReplicationMode; +using memgraph::utils::UUID; + +class RaftLogSerialization : public ::testing::Test { + protected: + void SetUp() override {} + + void TearDown() override {} + + std::filesystem::path test_folder_{std::filesystem::temp_directory_path() / "MG_tests_unit_raft_log_serialization"}; +}; + +TEST_F(RaftLogSerialization, ReplClientInfo) { + ReplClientInfo info{"instance_name", ReplicationMode::SYNC, "127.0.0.1", 10111}; + + nlohmann::json j = info; + ReplClientInfo info2 = j.get<memgraph::coordination::ReplClientInfo>(); + + ASSERT_EQ(info, info2); +} + +TEST_F(RaftLogSerialization, CoordinatorClientConfig) { + CoordinatorClientConfig config{"instance3", + "127.0.0.1", + 10112, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, + .ssl = std::nullopt}; + + nlohmann::json j = config; + CoordinatorClientConfig config2 = j.get<memgraph::coordination::CoordinatorClientConfig>(); + + ASSERT_EQ(config, config2); +} + +TEST_F(RaftLogSerialization, RaftLogActionRegister) { + auto action = RaftLogAction::REGISTER_REPLICATION_INSTANCE; + + nlohmann::json j = action; + RaftLogAction action2 = j.get<memgraph::coordination::RaftLogAction>(); + + ASSERT_EQ(action, action2); +} + +TEST_F(RaftLogSerialization, RaftLogActionUnregister) { + auto action = RaftLogAction::UNREGISTER_REPLICATION_INSTANCE; + + nlohmann::json j = action; + RaftLogAction action2 = j.get<memgraph::coordination::RaftLogAction>(); + + ASSERT_EQ(action, action2); +} + +TEST_F(RaftLogSerialization, RaftLogActionPromote) { + auto action = RaftLogAction::SET_INSTANCE_AS_MAIN; + + nlohmann::json j = action; + RaftLogAction action2 = j.get<memgraph::coordination::RaftLogAction>(); + + ASSERT_EQ(action, action2); +} + +TEST_F(RaftLogSerialization, RaftLogActionDemote) { + auto action = RaftLogAction::SET_INSTANCE_AS_REPLICA; + + nlohmann::json j = action; + RaftLogAction action2 = j.get<memgraph::coordination::RaftLogAction>(); + + ASSERT_EQ(action, action2); +} + +TEST_F(RaftLogSerialization, RaftLogActionUpdateUUID) { + auto action = RaftLogAction::UPDATE_UUID; + + nlohmann::json j = action; + RaftLogAction action2 = j.get<memgraph::coordination::RaftLogAction>(); + + ASSERT_EQ(action, action2); +} + +TEST_F(RaftLogSerialization, RegisterInstance) { + CoordinatorClientConfig config{"instance3", + "127.0.0.1", + 10112, + std::chrono::seconds{1}, + std::chrono::seconds{5}, + std::chrono::seconds{10}, + {"instance_name", ReplicationMode::ASYNC, "replication_ip_address", 10001}, + .ssl = std::nullopt}; + + auto buffer = CoordinatorStateMachine::SerializeRegisterInstance(config); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + ASSERT_EQ(action, RaftLogAction::REGISTER_REPLICATION_INSTANCE); + ASSERT_EQ(config, std::get<CoordinatorClientConfig>(payload)); +} + +TEST_F(RaftLogSerialization, UnregisterInstance) { + auto buffer = CoordinatorStateMachine::SerializeUnregisterInstance("instance3"); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + ASSERT_EQ(action, RaftLogAction::UNREGISTER_REPLICATION_INSTANCE); + ASSERT_EQ("instance3", std::get<std::string>(payload)); +} + +TEST_F(RaftLogSerialization, SetInstanceAsMain) { + auto buffer = CoordinatorStateMachine::SerializeSetInstanceAsMain("instance3"); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + ASSERT_EQ(action, RaftLogAction::SET_INSTANCE_AS_MAIN); + ASSERT_EQ("instance3", std::get<std::string>(payload)); +} + +TEST_F(RaftLogSerialization, SetInstanceAsReplica) { + auto buffer = CoordinatorStateMachine::SerializeSetInstanceAsReplica("instance3"); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + ASSERT_EQ(action, RaftLogAction::SET_INSTANCE_AS_REPLICA); + ASSERT_EQ("instance3", std::get<std::string>(payload)); +} + +TEST_F(RaftLogSerialization, UpdateUUID) { + UUID uuid; + auto buffer = CoordinatorStateMachine::SerializeUpdateUUID(uuid); + auto [payload, action] = CoordinatorStateMachine::DecodeLog(*buffer); + ASSERT_EQ(action, RaftLogAction::UPDATE_UUID); + ASSERT_EQ(uuid, std::get<UUID>(payload)); +} diff --git a/tests/unit/storage_rocks.cpp b/tests/unit/storage_rocks.cpp index 5cdaf4691..539cf3e0a 100644 --- a/tests/unit/storage_rocks.cpp +++ b/tests/unit/storage_rocks.cpp @@ -17,8 +17,6 @@ #include <unordered_set> #include "disk_test_utils.hpp" -#include "query/common.hpp" -#include "query/db_accessor.hpp" #include "storage/v2/delta.hpp" #include "storage/v2/disk/storage.hpp" #include "storage/v2/id_types.hpp" diff --git a/tests/unit/storage_v2_decoder_encoder.cpp b/tests/unit/storage_v2_decoder_encoder.cpp index 9b627cb77..15db49b1c 100644 --- a/tests/unit/storage_v2_decoder_encoder.cpp +++ b/tests/unit/storage_v2_decoder_encoder.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -337,6 +337,7 @@ TEST_F(DecoderEncoderTest, PropertyValueInvalidMarker) { case memgraph::storage::durability::Marker::SECTION_CONSTRAINTS: case memgraph::storage::durability::Marker::SECTION_DELTA: case memgraph::storage::durability::Marker::SECTION_EPOCH_HISTORY: + case memgraph::storage::durability::Marker::SECTION_EDGE_INDICES: case memgraph::storage::durability::Marker::SECTION_OFFSETS: case memgraph::storage::durability::Marker::DELTA_VERTEX_CREATE: case memgraph::storage::durability::Marker::DELTA_VERTEX_DELETE: @@ -355,6 +356,8 @@ TEST_F(DecoderEncoderTest, PropertyValueInvalidMarker) { case memgraph::storage::durability::Marker::DELTA_LABEL_PROPERTY_INDEX_DROP: case memgraph::storage::durability::Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_SET: case memgraph::storage::durability::Marker::DELTA_LABEL_PROPERTY_INDEX_STATS_CLEAR: + case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_CREATE: + case memgraph::storage::durability::Marker::DELTA_EDGE_TYPE_INDEX_DROP: case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_CREATE: case memgraph::storage::durability::Marker::DELTA_EXISTENCE_CONSTRAINT_DROP: case memgraph::storage::durability::Marker::DELTA_UNIQUE_CONSTRAINT_CREATE: diff --git a/tests/unit/storage_v2_durability_inmemory.cpp b/tests/unit/storage_v2_durability_inmemory.cpp index 54671077f..7794f2ab9 100644 --- a/tests/unit/storage_v2_durability_inmemory.cpp +++ b/tests/unit/storage_v2_durability_inmemory.cpp @@ -69,6 +69,7 @@ class DurabilityTest : public ::testing::TestWithParam<bool> { ONLY_EXTENDED, ONLY_EXTENDED_WITH_BASE_INDICES_AND_CONSTRAINTS, BASE_WITH_EXTENDED, + BASE_WITH_EDGE_TYPE_INDEXED, }; public: @@ -270,6 +271,15 @@ class DurabilityTest : public ::testing::TestWithParam<bool> { if (single_transaction) ASSERT_FALSE(acc->Commit().HasError()); } + void CreateEdgeIndex(memgraph::storage::Storage *store, memgraph::storage::EdgeTypeId edge_type) { + { + // Create edge-type index. + auto unique_acc = store->UniqueAccess(ReplicationRole::MAIN); + ASSERT_FALSE(unique_acc->CreateIndex(edge_type).HasError()); + ASSERT_FALSE(unique_acc->Commit().HasError()); + } + } + void VerifyDataset(memgraph::storage::Storage *store, DatasetType type, bool properties_on_edges, bool verify_info = true) { auto base_label_indexed = store->NameToLabel("base_indexed"); @@ -310,13 +320,19 @@ class DurabilityTest : public ::testing::TestWithParam<bool> { UnorderedElementsAre(std::make_pair(base_label_indexed, property_id), std::make_pair(extended_label_indexed, property_count))); break; + case DatasetType::BASE_WITH_EDGE_TYPE_INDEXED: + ASSERT_THAT(info.label, UnorderedElementsAre(base_label_unindexed)); + ASSERT_THAT(info.label_property, UnorderedElementsAre(std::make_pair(base_label_indexed, property_id))); + ASSERT_THAT(info.edge_type, UnorderedElementsAre(et1)); + break; } } // Verify index statistics { switch (type) { - case DatasetType::ONLY_BASE: { + case DatasetType::ONLY_BASE: + case DatasetType::BASE_WITH_EDGE_TYPE_INDEXED: { const auto l_stats = acc->GetIndexStats(base_label_unindexed); ASSERT_TRUE(l_stats); ASSERT_EQ(l_stats->count, 1); @@ -379,6 +395,7 @@ class DurabilityTest : public ::testing::TestWithParam<bool> { auto info = acc->ListAllConstraints(); switch (type) { case DatasetType::ONLY_BASE: + case DatasetType::BASE_WITH_EDGE_TYPE_INDEXED: ASSERT_THAT(info.existence, UnorderedElementsAre(std::make_pair(base_label_unindexed, property_id))); ASSERT_THAT(info.unique, UnorderedElementsAre( std::make_pair(base_label_unindexed, std::set{property_id, property_extra}))); @@ -402,6 +419,7 @@ class DurabilityTest : public ::testing::TestWithParam<bool> { bool have_base_dataset = false; bool have_extended_dataset = false; + bool have_edge_type_indexed_dataset = false; switch (type) { case DatasetType::ONLY_BASE: case DatasetType::ONLY_BASE_WITH_EXTENDED_INDICES_AND_CONSTRAINTS: @@ -415,6 +433,9 @@ class DurabilityTest : public ::testing::TestWithParam<bool> { have_base_dataset = true; have_extended_dataset = true; break; + case DatasetType::BASE_WITH_EDGE_TYPE_INDEXED: + have_base_dataset = true; + have_edge_type_indexed_dataset = true; } // Verify base dataset. @@ -675,6 +696,19 @@ class DurabilityTest : public ::testing::TestWithParam<bool> { } } + if (have_edge_type_indexed_dataset) { + MG_ASSERT(properties_on_edges, "Edge-type indexing needs --properties-on-edges!"); + // Verify edge-type indices. + { + std::vector<memgraph::storage::EdgeAccessor> edges; + edges.reserve(kNumBaseEdges / 2); + for (auto edge : acc->Edges(et1, memgraph::storage::View::OLD)) { + edges.push_back(edge); + } + ASSERT_EQ(edges.size(), kNumBaseEdges / 2); + } + } + if (verify_info) { auto info = store->GetBaseInfo(); if (have_base_dataset) { @@ -2972,3 +3006,42 @@ TEST_P(DurabilityTest, ConstraintsRecoveryFunctionSetting) { &variant_existence_constraint_creation_func); MG_ASSERT(pval_existence, "Chose wrong type of function for recovery of existence constraint data"); } + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TEST_P(DurabilityTest, EdgeTypeIndexRecovered) { + if (GetParam() == false) { + return; + } + // Create snapshot. + { + memgraph::storage::Config config{.salient.items = {.properties_on_edges = GetParam()}, + .durability = {.storage_directory = storage_directory, .snapshot_on_exit = true}}; + memgraph::replication::ReplicationState repl_state{memgraph::storage::ReplicationStateRootPath(config)}; + memgraph::dbms::Database db{config, repl_state}; + CreateBaseDataset(db.storage(), GetParam()); + VerifyDataset(db.storage(), DatasetType::ONLY_BASE, GetParam()); + CreateEdgeIndex(db.storage(), db.storage()->NameToEdgeType("base_et1")); + VerifyDataset(db.storage(), DatasetType::BASE_WITH_EDGE_TYPE_INDEXED, GetParam()); + } + + ASSERT_EQ(GetSnapshotsList().size(), 1); + ASSERT_EQ(GetBackupSnapshotsList().size(), 0); + ASSERT_EQ(GetWalsList().size(), 0); + ASSERT_EQ(GetBackupWalsList().size(), 0); + + // Recover snapshot. + memgraph::storage::Config config{.salient.items = {.properties_on_edges = GetParam()}, + .durability = {.storage_directory = storage_directory, .recover_on_startup = true}}; + memgraph::replication::ReplicationState repl_state{memgraph::storage::ReplicationStateRootPath(config)}; + memgraph::dbms::Database db{config, repl_state}; + VerifyDataset(db.storage(), DatasetType::BASE_WITH_EDGE_TYPE_INDEXED, GetParam()); + + // Try to use the storage. + { + auto acc = db.Access(); + auto vertex = acc->CreateVertex(); + auto edge = acc->CreateEdge(&vertex, &vertex, db.storage()->NameToEdgeType("et")); + ASSERT_TRUE(edge.HasValue()); + ASSERT_FALSE(acc->Commit().HasError()); + } +} diff --git a/tests/unit/storage_v2_indices.cpp b/tests/unit/storage_v2_indices.cpp index 8ee053087..23c82313d 100644 --- a/tests/unit/storage_v2_indices.cpp +++ b/tests/unit/storage_v2_indices.cpp @@ -44,6 +44,8 @@ class IndexTest : public testing::Test { this->prop_val = acc->NameToProperty("val"); this->label1 = acc->NameToLabel("label1"); this->label2 = acc->NameToLabel("label2"); + this->edge_type_id1 = acc->NameToEdgeType("edge_type_1"); + this->edge_type_id2 = acc->NameToEdgeType("edge_type_2"); vertex_id = 0; } @@ -61,6 +63,8 @@ class IndexTest : public testing::Test { PropertyId prop_val; LabelId label1; LabelId label2; + EdgeTypeId edge_type_id1; + EdgeTypeId edge_type_id2; VertexAccessor CreateVertex(Storage::Accessor *accessor) { VertexAccessor vertex = accessor->CreateVertex(); @@ -68,11 +72,23 @@ class IndexTest : public testing::Test { return vertex; } + VertexAccessor CreateVertexWithoutProperties(Storage::Accessor *accessor) { + VertexAccessor vertex = accessor->CreateVertex(); + return vertex; + } + + EdgeAccessor CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type, Storage::Accessor *accessor) { + auto edge = accessor->CreateEdge(from, to, edge_type); + MG_ASSERT(!edge.HasError()); + MG_ASSERT(!edge->SetProperty(this->prop_id, PropertyValue(vertex_id++)).HasError()); + return edge.GetValue(); + } + template <class TIterable> std::vector<int64_t> GetIds(TIterable iterable, View view = View::OLD) { std::vector<int64_t> ret; - for (auto vertex : iterable) { - ret.push_back(vertex.GetProperty(this->prop_id, view)->ValueInt()); + for (auto item : iterable) { + ret.push_back(item.GetProperty(this->prop_id, view)->ValueInt()); } return ret; } @@ -1292,3 +1308,368 @@ TYPED_TEST(IndexTest, LabelPropertyIndexClearOldDataFromDisk) { ASSERT_EQ(disk_test_utils::GetRealNumberOfEntriesInRocksDB(tx_db), 1); } } + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TYPED_TEST(IndexTest, EdgeTypeIndexCreate) { + if constexpr ((std::is_same_v<TypeParam, memgraph::storage::InMemoryStorage>)) { + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_FALSE(acc->EdgeTypeIndexExists(this->edge_type_id1)); + EXPECT_EQ(acc->ListAllIndices().edge_type.size(), 0); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + for (int i = 0; i < 10; ++i) { + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + this->CreateEdge(&vertex_from, &vertex_to, i % 2 ? this->edge_type_id1 : this->edge_type_id2, acc.get()); + } + ASSERT_NO_ERROR(acc->Commit()); + } + + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9)); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + for (int i = 10; i < 20; ++i) { + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + this->CreateEdge(&vertex_from, &vertex_to, i % 2 ? this->edge_type_id1 : this->edge_type_id2, acc.get()); + } + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)); + + acc->AdvanceCommand(); + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)); + + acc->Abort(); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + for (int i = 10; i < 20; ++i) { + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + this->CreateEdge(&vertex_from, &vertex_to, i % 2 ? this->edge_type_id1 : this->edge_type_id2, acc.get()); + } + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 21, 23, 25, 27, 29)); + + acc->AdvanceCommand(); + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 21, 23, 25, 27, 29)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 21, 23, 25, 27, 29)); + + ASSERT_NO_ERROR(acc->Commit()); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9, 21, 23, 25, 27, 29)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 21, 23, 25, 27, 29)); + + acc->AdvanceCommand(); + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 21, 23, 25, 27, 29)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 21, 23, 25, 27, 29)); + + ASSERT_NO_ERROR(acc->Commit()); + } + } +} + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TYPED_TEST(IndexTest, EdgeTypeIndexDrop) { + if constexpr ((std::is_same_v<TypeParam, memgraph::storage::InMemoryStorage>)) { + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_FALSE(acc->EdgeTypeIndexExists(this->edge_type_id1)); + EXPECT_EQ(acc->ListAllIndices().edge_type.size(), 0); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + for (int i = 0; i < 10; ++i) { + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + this->CreateEdge(&vertex_from, &vertex_to, i % 2 ? this->edge_type_id1 : this->edge_type_id2, acc.get()); + } + ASSERT_NO_ERROR(acc->Commit()); + } + + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9)); + } + + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->DropIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_FALSE(acc->EdgeTypeIndexExists(this->edge_type_id1)); + EXPECT_EQ(acc->ListAllIndices().label.size(), 0); + } + + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_TRUE(unique_acc->DropIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_FALSE(acc->EdgeTypeIndexExists(this->edge_type_id1)); + EXPECT_EQ(acc->ListAllIndices().label.size(), 0); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + for (int i = 10; i < 20; ++i) { + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + this->CreateEdge(&vertex_from, &vertex_to, i % 2 ? this->edge_type_id1 : this->edge_type_id2, acc.get()); + } + ASSERT_NO_ERROR(acc->Commit()); + } + + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_TRUE(acc->EdgeTypeIndexExists(this->edge_type_id1)); + EXPECT_THAT(acc->ListAllIndices().edge_type, UnorderedElementsAre(this->edge_type_id1)); + } + + { + auto acc = this->storage->Access(ReplicationRole::MAIN); + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)); + + acc->AdvanceCommand(); + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9, 11, 13, 15, 17, 19)); + } + } +} + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TYPED_TEST(IndexTest, EdgeTypeIndexBasic) { + // The following steps are performed and index correctness is validated after + // each step: + // 1. Create 10 edges numbered from 0 to 9. + // 2. Add EdgeType1 to odd numbered, and EdgeType2 to even numbered edges. + // 3. Delete even numbered edges. + if constexpr ((std::is_same_v<TypeParam, memgraph::storage::InMemoryStorage>)) { + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id2).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + + auto acc = this->storage->Access(ReplicationRole::MAIN); + EXPECT_THAT(acc->ListAllIndices().edge_type, UnorderedElementsAre(this->edge_type_id1, this->edge_type_id2)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), IsEmpty()); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::OLD), View::OLD), IsEmpty()); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), IsEmpty()); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::NEW), View::NEW), IsEmpty()); + + for (int i = 0; i < 10; ++i) { + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + this->CreateEdge(&vertex_from, &vertex_to, i % 2 ? this->edge_type_id1 : this->edge_type_id2, acc.get()); + } + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), IsEmpty()); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::OLD), View::OLD), IsEmpty()); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::NEW), View::NEW), + UnorderedElementsAre(0, 2, 4, 6, 8)); + + acc->AdvanceCommand(); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::OLD), View::OLD), + UnorderedElementsAre(0, 2, 4, 6, 8)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::NEW), View::NEW), + UnorderedElementsAre(0, 2, 4, 6, 8)); + + for (auto vertex : acc->Vertices(View::OLD)) { + auto edges = vertex.OutEdges(View::OLD)->edges; + for (auto &edge : edges) { + int64_t id = edge.GetProperty(this->prop_id, View::OLD)->ValueInt(); + if (id % 2 == 0) { + ASSERT_NO_ERROR(acc->DetachDelete({}, {&edge}, false)); + } + } + } + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::OLD), View::OLD), + UnorderedElementsAre(0, 2, 4, 6, 8)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::NEW), View::NEW), IsEmpty()); + + acc->AdvanceCommand(); + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::OLD), View::OLD), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::OLD), View::OLD), IsEmpty()); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(1, 3, 5, 7, 9)); + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id2, View::NEW), View::NEW), IsEmpty()); + } +} + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TYPED_TEST(IndexTest, EdgeTypeIndexTransactionalIsolation) { + if constexpr ((std::is_same_v<TypeParam, memgraph::storage::InMemoryStorage>)) { + // Check that transactions only see entries they are supposed to see. + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id2).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + + auto acc_before = this->storage->Access(ReplicationRole::MAIN); + auto acc = this->storage->Access(ReplicationRole::MAIN); + auto acc_after = this->storage->Access(ReplicationRole::MAIN); + + for (int i = 0; i < 5; ++i) { + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + this->CreateEdge(&vertex_from, &vertex_to, this->edge_type_id1, acc.get()); + } + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(0, 1, 2, 3, 4)); + + EXPECT_THAT(this->GetIds(acc_before->Edges(this->edge_type_id1, View::NEW), View::NEW), IsEmpty()); + + EXPECT_THAT(this->GetIds(acc_after->Edges(this->edge_type_id1, View::NEW), View::NEW), IsEmpty()); + + ASSERT_NO_ERROR(acc->Commit()); + + auto acc_after_commit = this->storage->Access(ReplicationRole::MAIN); + + EXPECT_THAT(this->GetIds(acc_before->Edges(this->edge_type_id1, View::NEW), View::NEW), IsEmpty()); + + EXPECT_THAT(this->GetIds(acc_after->Edges(this->edge_type_id1, View::NEW), View::NEW), IsEmpty()); + + EXPECT_THAT(this->GetIds(acc_after_commit->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(0, 1, 2, 3, 4)); + } +} + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TYPED_TEST(IndexTest, EdgeTypeIndexCountEstimate) { + if constexpr ((std::is_same_v<TypeParam, memgraph::storage::InMemoryStorage>)) { + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id2).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + + auto acc = this->storage->Access(ReplicationRole::MAIN); + for (int i = 0; i < 20; ++i) { + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + this->CreateEdge(&vertex_from, &vertex_to, i % 3 ? this->edge_type_id1 : this->edge_type_id2, acc.get()); + } + + EXPECT_EQ(acc->ApproximateEdgeCount(this->edge_type_id1), 13); + EXPECT_EQ(acc->ApproximateEdgeCount(this->edge_type_id2), 7); + } +} + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TYPED_TEST(IndexTest, EdgeTypeIndexRepeatingEdgeTypesBetweenSameVertices) { + if constexpr ((std::is_same_v<TypeParam, memgraph::storage::InMemoryStorage>)) { + { + auto unique_acc = this->storage->UniqueAccess(ReplicationRole::MAIN); + EXPECT_FALSE(unique_acc->CreateIndex(this->edge_type_id1).HasError()); + ASSERT_NO_ERROR(unique_acc->Commit()); + } + + auto acc = this->storage->Access(ReplicationRole::MAIN); + auto vertex_from = this->CreateVertexWithoutProperties(acc.get()); + auto vertex_to = this->CreateVertexWithoutProperties(acc.get()); + + for (int i = 0; i < 5; ++i) { + this->CreateEdge(&vertex_from, &vertex_to, this->edge_type_id1, acc.get()); + } + + EXPECT_EQ(acc->ApproximateEdgeCount(this->edge_type_id1), 5); + + EXPECT_THAT(this->GetIds(acc->Edges(this->edge_type_id1, View::NEW), View::NEW), + UnorderedElementsAre(0, 1, 2, 3, 4)); + } +} diff --git a/tests/unit/storage_v2_wal_file.cpp b/tests/unit/storage_v2_wal_file.cpp index 07a35d754..4094090f5 100644 --- a/tests/unit/storage_v2_wal_file.cpp +++ b/tests/unit/storage_v2_wal_file.cpp @@ -37,6 +37,10 @@ memgraph::storage::durability::WalDeltaData::Type StorageMetadataOperationToWalD return memgraph::storage::durability::WalDeltaData::Type::LABEL_INDEX_CREATE; case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_DROP: return memgraph::storage::durability::WalDeltaData::Type::LABEL_INDEX_DROP; + case memgraph::storage::durability::StorageMetadataOperation::EDGE_TYPE_INDEX_CREATE: + return memgraph::storage::durability::WalDeltaData::Type::EDGE_INDEX_CREATE; + case memgraph::storage::durability::StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: + return memgraph::storage::durability::WalDeltaData::Type::EDGE_INDEX_DROP; case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_STATS_SET: return memgraph::storage::durability::WalDeltaData::Type::LABEL_INDEX_STATS_SET; case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_STATS_CLEAR: @@ -70,7 +74,7 @@ class DeltaGenerator final { explicit Transaction(DeltaGenerator *gen) : gen_(gen), transaction_(gen->transaction_id_++, gen->timestamp_++, memgraph::storage::IsolationLevel::SNAPSHOT_ISOLATION, - gen->storage_mode_, false) {} + gen->storage_mode_, false, false) {} public: memgraph::storage::Vertex *CreateVertex() { @@ -280,6 +284,41 @@ class DeltaGenerator final { case memgraph::storage::durability::StorageMetadataOperation::UNIQUE_CONSTRAINT_DROP: data.operation_label_properties.label = label; data.operation_label_properties.properties = properties; + break; + case memgraph::storage::durability::StorageMetadataOperation::EDGE_TYPE_INDEX_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: + MG_ASSERT(false, "Invalid function call!"); + } + data_.emplace_back(timestamp_, data); + } + } + + void AppendEdgeTypeOperation(memgraph::storage::durability::StorageMetadataOperation operation, + const std::string &edge_type) { + auto edge_type_id = memgraph::storage::EdgeTypeId::FromUint(mapper_.NameToId(edge_type)); + wal_file_.AppendOperation(operation, edge_type_id, timestamp_); + if (valid_) { + UpdateStats(timestamp_, 1); + memgraph::storage::durability::WalDeltaData data; + data.type = StorageMetadataOperationToWalDeltaDataType(operation); + switch (operation) { + case memgraph::storage::durability::StorageMetadataOperation::EDGE_TYPE_INDEX_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::EDGE_TYPE_INDEX_DROP: + data.operation_edge_type.edge_type = edge_type; + break; + case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_DROP: + case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_STATS_CLEAR: + case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_CLEAR: + case memgraph::storage::durability::StorageMetadataOperation::LABEL_INDEX_STATS_SET: + case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_DROP: + case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::EXISTENCE_CONSTRAINT_DROP:; + case memgraph::storage::durability::StorageMetadataOperation::LABEL_PROPERTY_INDEX_STATS_SET: + case memgraph::storage::durability::StorageMetadataOperation::UNIQUE_CONSTRAINT_CREATE: + case memgraph::storage::durability::StorageMetadataOperation::UNIQUE_CONSTRAINT_DROP: + MG_ASSERT(false, "Invalid function call!"); } data_.emplace_back(timestamp_, data); } diff --git a/tests/unit/utils_memory.cpp b/tests/unit/utils_memory.cpp index 5173a5f7b..e46c6c1f9 100644 --- a/tests/unit/utils_memory.cpp +++ b/tests/unit/utils_memory.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Memgraph Ltd. +// Copyright 2024 Memgraph Ltd. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source @@ -195,134 +195,6 @@ TEST(MonotonicBufferResource, AllocationWithInitialBufferOnStack) { } } -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, SingleSmallBlockAllocations) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - // Fill the first chunk. - CheckAllocation(&mem, 64U, 1U); - // May allocate more than once due to bookkeeping. - EXPECT_GE(test_mem.new_count_, 1U); - // Reset tracking and continue filling the first chunk. - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U, 64U); - CheckAllocation(&mem, 64U); - EXPECT_EQ(test_mem.new_count_, 0U); - // Reset tracking and fill the second chunk - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U, 32U); - auto *ptr1 = CheckAllocation(&mem, 32U, 64U); // this will become 64b block - auto *ptr2 = CheckAllocation(&mem, 64U, 32U); - // We expect one allocation for chunk and at most one for bookkeeping. - EXPECT_TRUE(test_mem.new_count_ >= 1U && test_mem.new_count_ <= 2U); - test_mem.delete_count_ = 0U; - mem.Deallocate(ptr1, 32U, 64U); - mem.Deallocate(ptr2, 64U, 32U); - EXPECT_EQ(test_mem.delete_count_, 0U); - mem.Release(); - EXPECT_GE(test_mem.delete_count_, 2U); - CheckAllocation(&mem, 64U, 1U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, MultipleSmallBlockAllocations) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 1U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - CheckAllocation(&mem, 64U); - CheckAllocation(&mem, 18U, 2U); - CheckAllocation(&mem, 24U, 8U); - // May allocate more than once per chunk due to bookkeeping. - EXPECT_GE(test_mem.new_count_, 3U); - // Reset tracking and fill the second chunk - test_mem.new_count_ = 0U; - CheckAllocation(&mem, 64U); - CheckAllocation(&mem, 18U, 2U); - CheckAllocation(&mem, 24U, 8U); - // We expect one allocation for chunk and at most one for bookkeeping. - EXPECT_TRUE(test_mem.new_count_ >= 3U && test_mem.new_count_ <= 6U); - mem.Release(); - EXPECT_GE(test_mem.delete_count_, 6U); - CheckAllocation(&mem, 64U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, BigBlockAllocations) { - TestMemory test_mem; - TestMemory test_mem_unpooled; - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem, &test_mem_unpooled); - CheckAllocation(&mem, max_block_size + 1, 1U); - // May allocate more than once per block due to bookkeeping. - EXPECT_GE(test_mem_unpooled.new_count_, 1U); - CheckAllocation(&mem, max_block_size + 1, 1U); - EXPECT_GE(test_mem_unpooled.new_count_, 2U); - auto *ptr = CheckAllocation(&mem, max_block_size * 2, 1U); - EXPECT_GE(test_mem_unpooled.new_count_, 3U); - mem.Deallocate(ptr, max_block_size * 2, 1U); - EXPECT_GE(test_mem_unpooled.delete_count_, 1U); - mem.Release(); - EXPECT_GE(test_mem_unpooled.delete_count_, 3U); - CheckAllocation(&mem, max_block_size + 1, 1U); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, BlockSizeIsNotMultipleOfAlignment) { - const size_t max_blocks_per_chunk = 3U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size); - EXPECT_THROW(mem.Allocate(64U, 24U), std::bad_alloc); - EXPECT_THROW(mem.Allocate(63U), std::bad_alloc); - EXPECT_THROW(mem.Allocate(max_block_size + 1, max_block_size), std::bad_alloc); -} - -// NOLINTNEXTLINE(hicpp-special-member-functions) -TEST(PoolResource, AllocationWithOverflow) { - { - const size_t max_blocks_per_chunk = 2U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, std::numeric_limits<size_t>::max()); - EXPECT_THROW(mem.Allocate(std::numeric_limits<size_t>::max(), 1U), std::bad_alloc); - // Throws because initial chunk block is aligned to - // memgraph::utils::Ceil2(block_size), which wraps in this case. - EXPECT_THROW(mem.Allocate((std::numeric_limits<size_t>::max() - 1U) / max_blocks_per_chunk, 1U), std::bad_alloc); - } - { - const size_t max_blocks_per_chunk = memgraph::utils::impl::Pool::MaxBlocksInChunk(); - memgraph::utils::PoolResource mem(max_blocks_per_chunk, std::numeric_limits<size_t>::max()); - EXPECT_THROW(mem.Allocate(std::numeric_limits<size_t>::max(), 1U), std::bad_alloc); - // Throws because initial chunk block is aligned to - // memgraph::utils::Ceil2(block_size), which wraps in this case. - EXPECT_THROW(mem.Allocate((std::numeric_limits<size_t>::max() - 1U) / max_blocks_per_chunk, 1U), std::bad_alloc); - } -} - -TEST(PoolResource, BlockDeallocation) { - TestMemory test_mem; - const size_t max_blocks_per_chunk = 2U; - const size_t max_block_size = 64U; - memgraph::utils::PoolResource mem(max_blocks_per_chunk, max_block_size, &test_mem); - auto *ptr = CheckAllocation(&mem, max_block_size); - test_mem.new_count_ = 0U; - // Do another allocation before deallocating `ptr`, so that we are sure that - // the chunk of 2 blocks is still alive and therefore `ptr` may be reused when - // it's deallocated. If we deallocate now, the implementation may choose to - // free the whole chunk, and we do not want that for the purposes of this - // test. - CheckAllocation(&mem, max_block_size); - EXPECT_EQ(test_mem.new_count_, 0U); - EXPECT_EQ(test_mem.delete_count_, 0U); - mem.Deallocate(ptr, max_block_size); - EXPECT_EQ(test_mem.delete_count_, 0U); - // CheckAllocation(&mem, max_block_size) will fail as PoolResource should - // reuse free blocks. - EXPECT_EQ(ptr, mem.Allocate(max_block_size)); - EXPECT_EQ(test_mem.new_count_, 0U); -} - class AllocationTrackingMemory final : public memgraph::utils::MemoryResource { public: std::vector<size_t> allocated_sizes_; diff --git a/tools/github/clang-tidy/clang-tidy-diff.py b/tools/github/clang-tidy/clang-tidy-diff.py index 1bdf1da25..609b2eedb 100755 --- a/tools/github/clang-tidy/clang-tidy-diff.py +++ b/tools/github/clang-tidy/clang-tidy-diff.py @@ -250,7 +250,6 @@ def main(): common_clang_tidy_args.append("-extra-arg=%s" % arg) for arg in args.extra_arg_before: common_clang_tidy_args.append("-extra-arg-before=%s" % arg) - for name in lines_by_file: line_filter_json = json.dumps([{"name": name, "lines": lines_by_file[name]}], separators=(",", ":")) @@ -266,7 +265,6 @@ def main(): command.extend(common_clang_tidy_args) command.append(name) command.extend(clang_tidy_args) - task_queue.put(command) # Wait for all threads to be done.