diff --git a/.clang-tidy b/.clang-tidy index 81be7c096..5e357feba 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -26,6 +26,7 @@ Checks: '*, -fuchsia-virtual-inheritance, -google-explicit-constructor, -google-readability-*, + -google-runtime-references, -hicpp-avoid-c-arrays, -hicpp-avoid-goto, -hicpp-braces-around-statements, @@ -52,9 +53,10 @@ Checks: '*, -readability-else-after-return, -readability-implicit-bool-conversion, -readability-magic-numbers, - -readability-named-parameter' + -readability-named-parameter, + -misc-no-recursion' WarningsAsErrors: '' -HeaderFilterRegex: '' +HeaderFilterRegex: 'src/.*' AnalyzeTemporaryDtors: false FormatStyle: none CheckOptions: diff --git a/.github/workflows/daily_banchmark.yaml b/.github/workflows/daily_banchmark.yaml new file mode 100644 index 000000000..4867f7097 --- /dev/null +++ b/.github/workflows/daily_banchmark.yaml @@ -0,0 +1,68 @@ +name: Daily Benchmark + +on: + workflow_dispatch: + schedule: + - cron: "0 1 * * *" + +jobs: + release_benchmarks: + name: "Release benchmarks" + runs-on: [self-hosted, Linux, X64, Diff, Gen7] + env: + THREADS: 24 + + steps: + - name: Set up repository + uses: actions/checkout@v2 + with: + # Number of commits to fetch. `0` indicates all history for all + # branches and tags. (default: 1) + fetch-depth: 0 + + - name: Build release binaries + run: | + # Activate toolchain. + source /opt/toolchain-v2/activate + + # Initialize dependencies. + ./init + + # Build only memgraph release binaries. + cd build + cmake -DCMAKE_BUILD_TYPE=release .. + make -j$THREADS + + - name: Run macro benchmarks + run: | + cd tests/macro_benchmark + ./harness QuerySuite MemgraphRunner \ + --groups aggregation 1000_create unwind_create dense_expand match \ + --no-strict + + - name: Upload macro benchmark results + run: | + cd tools/bench-graph-client + virtualenv -p python3 ve3 + source ve3/bin/activate + pip install -r requirements.txt + ./main.py --benchmark-name "macro_benchmark" \ + --benchmark-results-path "../../tests/macro_benchmark/.harness_summary" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" + + - name: Run mgbench + run: | + cd tests/mgbench + ./benchmark.py --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/* + + - name: Upload mgbench results + run: | + cd tools/bench-graph-client + virtualenv -p python3 ve3 + source ve3/bin/activate + pip install -r requirements.txt + ./main.py --benchmark-name "mgbench" \ + --benchmark-results-path "../../tests/mgbench/benchmark_result.json" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" diff --git a/.github/workflows/diff.yaml b/.github/workflows/diff.yaml index 10bf2de97..1c88bf766 100644 --- a/.github/workflows/diff.yaml +++ b/.github/workflows/diff.yaml @@ -5,12 +5,13 @@ on: paths-ignore: - 'docs/**' - '**/*.md' - - '.clang-*' + - '.clang-format' + - 'CODEOWNERS' jobs: community_build: name: "Community build" - runs-on: [self-hosted, General, Linux, X64, Debian10] + runs-on: [self-hosted, Linux, X64, Diff] env: THREADS: 24 @@ -42,7 +43,7 @@ jobs: # Run unit tests. cd build - ctest -R memgraph__unit --output-on-failure + ctest -R memgraph__unit --output-on-failure -j$THREADS - name: Run stress test run: | @@ -54,8 +55,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create community DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create community DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -65,9 +71,9 @@ jobs: name: "Community DEB package" path: build/output/memgraph*.deb - coverage_build: - name: "Coverage build" - runs-on: [self-hosted, General, Linux, X64, Debian10] + code_analysis: + name: "Code analysis" + runs-on: [self-hosted, Linux, X64, Diff] env: THREADS: 24 @@ -79,7 +85,7 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Build coverage binaries + - name: Build combined ASAN, UBSAN and coverage binaries run: | # Activate toolchain. source /opt/toolchain-v2/activate @@ -87,9 +93,8 @@ jobs: # Initialize dependencies. ./init - # Build coverage binaries. cd build - cmake -DTEST_COVERAGE=ON .. + cmake -DTEST_COVERAGE=ON -DASAN=ON -DUBSAN=ON .. make -j$THREADS memgraph__unit - name: Run unit tests @@ -97,9 +102,9 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Run unit tests. + # Run unit tests. It is restricted to 2 threads intentionally, because higher concurrency makes the timing related tests unstable. cd build - ctest -R memgraph__unit --output-on-failure + LSAN_OPTIONS=suppressions=$PWD/../tools/lsan.supp UBSAN_OPTIONS=halt_on_error=1 ctest -R memgraph__unit --output-on-failure -j2 - name: Compute code coverage run: | @@ -120,9 +125,19 @@ jobs: name: "Code coverage" path: tools/github/generated/code_coverage.tar.gz + - name: Run clang-tidy + run: | + source /opt/toolchain-v2/activate + + # Restrict clang-tidy results only to the modified parts + git diff -U0 master... -- src ':!*.hpp' | ./tools/github/clang-tidy/clang-tidy-diff.py -p 1 -j $THREADS -path build | tee ./build/clang_tidy_output.txt + + # Fail if any warning is reported + ! cat ./build/clang_tidy_output.txt | ./tools/github/clang-tidy/grep_error_lines.sh > /dev/null + debug_build: name: "Debug build" - runs-on: [self-hosted, General, Linux, X64, Debian10] + runs-on: [self-hosted, Linux, X64, Diff] env: THREADS: 24 @@ -196,7 +211,7 @@ jobs: release_build: name: "Release build" - runs-on: [self-hosted, General, Linux, X64, Debian10] + runs-on: [self-hosted, Linux, X64, Diff] env: THREADS: 24 @@ -208,21 +223,6 @@ jobs: # branches and tags. (default: 1) fetch-depth: 0 - - name: Set up parent - run: | - # Remove parent folder (if it exists). - cd .. - if [ -d parent ]; then - rm -rf parent - fi - - # Copy untouched repository to parent folder. - cp -r memgraph parent - - # Checkout previous commit - cd parent - git checkout HEAD~1 - - name: Build release binaries run: | # Activate toolchain. @@ -236,47 +236,6 @@ jobs: cmake -DCMAKE_BUILD_TYPE=release .. make -j$THREADS - - name: Build parent binaries - run: | - # Activate toolchain. - source /opt/toolchain-v2/activate - - # Initialize dependencies. - cd ../parent - ./init - - # Build parent binaries. - cd build - cmake -DCMAKE_BUILD_TYPE=release .. - make -j$THREADS memgraph memgraph__macro_benchmark - - - name: Run macro benchmark tests - run: | - cd tests/macro_benchmark - ./harness QuerySuite MemgraphRunner \ - --groups aggregation 1000_create unwind_create dense_expand match \ - --no-strict - - - name: Run parent macro benchmark tests - run: | - cd ../parent/tests/macro_benchmark - ./harness QuerySuite MemgraphRunner \ - --groups aggregation 1000_create unwind_create dense_expand match \ - --no-strict - - - name: Compute macro benchmark summary - run: | - ./tools/github/macro_benchmark_summary \ - --current tests/macro_benchmark/.harness_summary \ - --previous ../parent/tests/macro_benchmark/.harness_summary \ - --output macro_benchmark_summary.txt - - - name: Save macro benchmark summary - uses: actions/upload-artifact@v2 - with: - name: "Macro benchmark summary" - path: macro_benchmark_summary.txt - - name: Run GQL Behave tests run: | cd tests/gql_behave @@ -290,14 +249,14 @@ jobs: tests/gql_behave/gql_behave_status.csv tests/gql_behave/gql_behave_status.html - - name: Run e2e replication tests + - name: Run e2e tests run: | # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . - name: Run stress test (plain) run: | @@ -320,8 +279,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create enterprise DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create enterprise DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -331,6 +295,15 @@ jobs: name: "Enterprise DEB package" path: build/output/memgraph*.deb + - name: Save test data + uses: actions/upload-artifact@v2 + if: always() + with: + name: "Test data" + path: | + # multiple paths could be defined + build/logs + release_jepsen_test: name: "Release Jepsen Test" runs-on: [self-hosted, Linux, X64, Debian10, JepsenControl] @@ -370,3 +343,64 @@ jobs: with: name: "Jepsen Report" path: tests/jepsen/Jepsen.tar.gz + + release_benchmarks: + name: "Release benchmarks" + runs-on: [self-hosted, Linux, X64, Diff, Gen7] + env: + THREADS: 24 + + steps: + - name: Set up repository + uses: actions/checkout@v2 + with: + # Number of commits to fetch. `0` indicates all history for all + # branches and tags. (default: 1) + fetch-depth: 0 + + - name: Build release binaries + run: | + # Activate toolchain. + source /opt/toolchain-v2/activate + + # Initialize dependencies. + ./init + + # Build only memgraph release binaries. + cd build + cmake -DCMAKE_BUILD_TYPE=release .. + make -j$THREADS + + - name: Run macro benchmarks + run: | + cd tests/macro_benchmark + ./harness QuerySuite MemgraphRunner \ + --groups aggregation 1000_create unwind_create dense_expand match \ + --no-strict + + - name: Upload macro benchmark results + run: | + cd tools/bench-graph-client + virtualenv -p python3 ve3 + source ve3/bin/activate + pip install -r requirements.txt + ./main.py --benchmark-name "macro_benchmark" \ + --benchmark-results-path "../../tests/macro_benchmark/.harness_summary" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" + + - name: Run mgbench + run: | + cd tests/mgbench + ./benchmark.py --num-workers-for-benchmark 12 --export-results benchmark_result.json pokec/medium/*/* + + - name: Upload mgbench results + run: | + cd tools/bench-graph-client + virtualenv -p python3 ve3 + source ve3/bin/activate + pip install -r requirements.txt + ./main.py --benchmark-name "mgbench" \ + --benchmark-results-path "../../tests/mgbench/benchmark_result.json" \ + --github-run-id "${{ github.run_id }}" \ + --github-run-number "${{ github.run_number }}" diff --git a/.github/workflows/full_clang_tidy.yaml b/.github/workflows/full_clang_tidy.yaml new file mode 100644 index 000000000..d1f4151ba --- /dev/null +++ b/.github/workflows/full_clang_tidy.yaml @@ -0,0 +1,44 @@ +name: Run clang-tidy on the full codebase + +on: + workflow_dispatch: + +jobs: + clang_tidy_check: + name: "Clang-tidy check" + runs-on: [self-hosted, Linux, X64, Ubuntu20.04] + env: + THREADS: 24 + + steps: + - name: Set up repository + uses: actions/checkout@v2 + with: + # Number of commits to fetch. `0` indicates all history for all + # branches and tags. (default: 1) + fetch-depth: 0 + + - name: Build debug binaries + run: | + # Activate toolchain. + source /opt/toolchain-v2/activate + + # Initialize dependencies. + ./init + + # Build debug binaries. + + cd build + cmake .. + make -j$THREADS + + - name: Run clang-tidy + run: | + source /opt/toolchain-v2/activate + + # The results are also written to standard output in order to retain them in the logs + ./tools/github/clang-tidy/run-clang-tidy.py -p build -j $THREADS -clang-tidy-binary=/opt/toolchain-v2/bin/clang-tidy "$PWD/src/*" | + tee ./build/full_clang_tidy_output.txt + + - name: Summarize clang-tidy results + run: cat ./build/full_clang_tidy_output.txt | ./tools/github/clang-tidy/count_errors.sh diff --git a/.github/workflows/package_all.yaml b/.github/workflows/package_all.yaml new file mode 100644 index 000000000..656f1016a --- /dev/null +++ b/.github/workflows/package_all.yaml @@ -0,0 +1,248 @@ +name: Package All + +# TODO(gitbuda): Cleanup docker container if GHA job was canceled. + +on: workflow_dispatch + +jobs: + centos-7_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community centos-7 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: centos-7_community + path: build/output/centos-7/memgraph*.rpm + + centos-8_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community centos-8 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: centos-8_community + path: build/output/centos-8/memgraph*.rpm + + debian-9_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community debian-9 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: debian-9_community + path: build/output/debian-9/memgraph*.deb + + debian-10_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community debian-10 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: debian-10_community + path: build/output/debian-10/memgraph*.deb + + docker_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + cd release/package + ./run.sh package community debian-10 --for-docker + ./run.sh docker + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: docker_community + path: build/output/docker/memgraph*.tar.gz + + ubuntu-1804_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community ubuntu-18.04 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: ubuntu-1804_community + path: build/output/ubuntu-18.04/memgraph*.deb + + ubuntu-2004_community: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package community ubuntu-20.04 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: ubuntu-2004_community + path: build/output/ubuntu-20.04/memgraph*.deb + + centos-7_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise centos-7 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: centos-7_enterprise + path: build/output/centos-7/memgraph*.rpm + + centos-8_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise centos-8 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: centos-8_enterprise + path: build/output/centos-8/memgraph*.rpm + + debian-9_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise debian-9 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: debian-9_enterprise + path: build/output/debian-9/memgraph*.deb + + debian-10_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise debian-10 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: debian-10_enterprise + path: build/output/debian-10/memgraph*.deb + + docker_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + cd release/package + ./run.sh package enterprise debian-10 --for-docker + ./run.sh docker + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: docker_enterprise + path: build/output/docker/memgraph*.tar.gz + + ubuntu-1804_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise ubuntu-18.04 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: ubuntu-1804_enterprise + path: build/output/ubuntu-18.04/memgraph*.deb + + ubuntu-2004_enterprise: + runs-on: [self-hosted, DockerMgBuild] + timeout-minutes: 60 + steps: + - name: "Set up repository" + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Required because of release/get_version.py + - name: "Build package" + run: | + ./release/package/run.sh package enterprise ubuntu-20.04 + - name: "Upload package" + uses: actions/upload-artifact@v2 + with: + name: ubuntu-2004_enterprise + path: build/output/ubuntu-20.04/memgraph*.deb diff --git a/.github/workflows/release_centos8.yaml b/.github/workflows/release_centos8.yaml index 49584f5bd..e58ff5c88 100644 --- a/.github/workflows/release_centos8.yaml +++ b/.github/workflows/release_centos8.yaml @@ -39,8 +39,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create community RPM package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create community RPM package. mkdir output && cd output cpack -G RPM --config ../CPackConfig.cmake rpmlint memgraph*.rpm @@ -232,8 +237,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create enterprise RPM package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create enterprise RPM package. mkdir output && cd output cpack -G RPM --config ../CPackConfig.cmake rpmlint memgraph*.rpm @@ -283,14 +293,14 @@ jobs: tests/gql_behave/gql_behave_status.csv tests/gql_behave/gql_behave_status.html - - name: Run e2e replication tests + - name: Run e2e tests run: | # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . - name: Run stress test (plain) run: | diff --git a/.github/workflows/release_debian10.yaml b/.github/workflows/release_debian10.yaml index 7a728fef8..30e9b5a85 100644 --- a/.github/workflows/release_debian10.yaml +++ b/.github/workflows/release_debian10.yaml @@ -1,4 +1,4 @@ -name: Release Debian10 +name: Release Debian 10 on: workflow_dispatch: @@ -39,8 +39,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create community DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create community DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -231,8 +236,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create enterprise DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create enterprise DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -281,14 +291,14 @@ jobs: tests/gql_behave/gql_behave_status.csv tests/gql_behave/gql_behave_status.html - - name: Run e2e replication tests + - name: Run e2e tests run: | # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . - name: Run stress test (plain) run: | diff --git a/.github/workflows/release_ubuntu2004.yaml b/.github/workflows/release_ubuntu2004.yaml index 90386fce5..d309d1946 100644 --- a/.github/workflows/release_ubuntu2004.yaml +++ b/.github/workflows/release_ubuntu2004.yaml @@ -1,4 +1,4 @@ -name: Release Ubuntu20.04 +name: Release Ubuntu 20.04 on: workflow_dispatch: @@ -39,8 +39,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create community DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create community DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -231,8 +236,13 @@ jobs: # Activate toolchain. source /opt/toolchain-v2/activate - # Create enterprise DEB package. cd build + + # create mgconsole + # we use the -B to force the build + make -j$THREADS -B mgconsole + + # Create enterprise DEB package. mkdir output && cd output cpack -G DEB --config ../CPackConfig.cmake @@ -281,14 +291,14 @@ jobs: tests/gql_behave/gql_behave_status.csv tests/gql_behave/gql_behave_status.html - - name: Run e2e replication tests + - name: Run e2e tests run: | # TODO(gitbuda): Setup mgclient and pymgclient properly. cd tests ./setup.sh source ve3/bin/activate cd e2e - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-path replication/workloads.yaml + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../libs/mgclient/lib python runner.py --workloads-root-directory . - name: Run stress test (plain) run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 97bc4f122..2b9576171 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,461 +1,5 @@ -# Change Log +Change Log for all versions of Memgraph can be found on-line at +https://docs.memgraph.com/memgraph/changelog -## Future - -### Major Feature and Improvements - -* Added replication to community version. -* Add support for multiple query modules directories at the same time. - You can now define multiple, comma-separated paths to directories from - which the modules will be loaded using the `--query-modules-directory` flag. - -### Bug Fixes - -* Fixed garbage collector by correctly marking the oldest current timestamp - after the database was recovered using the durability files. - -## v1.3.0 - -### Breaking Changes - -* Added extra information in durability files to support replication, making it - incompatible with the durability files generated by older versions of - Memgraph. Even though the replication is an Enterprise feature, the files are - compatible with the Community version. - -### Major Features and Improvements - -* Added support for data replication across a cluster of Memgraph instances. - Supported instance types are MAIN and REPLICA. Supported replication modes - are SYNC (all SYNC REPLICAS have to receive data before the MAIN can commit - the transaction), ASYNC (MAIN doesn't care if data is replicated), SYNC WITH - TIMEOUT (MAIN will wait for REPLICAS within the given timeout period, after - timout, replication isn't aborted but the replication demotes the REPLICA to - the ASYNC mode). -* Added support for query type deduction. Possible query types are `r` (read), - `w` (write), `rw` (read-write). The query type is returned as a part of the - summary. -* Improved logging capabilities by introducing granular logging levels. Added - new flag, `--log-level`, which specifies the minimum log level that will be - printed. E.g., it's possible to print incoming queries or Bolt server states. -* Added ability to lock the storage data directory by executing the `LOCK DATA - DIRECTORY;` query which delays the deletion of the files contained in the - data directory. The data directory can be unlocked again by executing the - `UNLOCK DATA DIRECTORY;` query. - -### Bug Fixes and Other Changes - -* Added cleanup of query executions if not in an explicit transaction. -* Fix RPC dangling reference. - -## v1.2.0 - -### Breaking Changes - -* SSL is disabled by default (`--bolt-cert-file` and `--bolt-key-file` are - empty). This change might only affect the client connection configuration. - -### Major Features and Improvements - -* Added support for Bolt v4.0 and v4.1. -* Added `mgp_networkx.py` as an alternative implementation of NetworkX graph - objects, which is useful to use Memgraph data from NetworkX algorithms - optimally. -* Added `nxalg.py` query module as a proxy to NetworkX algorithms. -* Added plan optimization to use a label-property index where the property is - not null. As a result, the query engine, instead of scanning all elements and - applying the filter, performs a label-property index lookup when possible. - -### Bug Fixes and Other Changes - -* Fixed Cypher `ID` function `Null` handling. When the `ID` function receives - `Null`, it will also return `Null`. -* Fixed bug that caused random crashes in SSL communication on platforms - that use older versions of OpenSSL (< 1.1) by adding proper multi-threading - handling. -* Fix `DISCARD` message handling. The query is now executed before discarding - the results. - -## v1.1.0 - -### Major Features and Improvements - -* Properties in nodes and edges are now stored encoded and compressed. This - change significantly reduces memory usage. Depending on the specific dataset, - total memory usage can be reduced up to 50%. -* Added support for rescanning query modules. Previously, the query modules - directory was scanned only upon startup. Now it is scanned each time the user - requests to load a query module. The functions used to load the query modules - were renamed to `mg.load()` and `mg.load_all()` (from `mg.reload()` and - `mg.reload_all()`). -* Improved execution performance of queries that have an IN list filter by - using label+property indices. - Example: `MATCH (n:Label) WHERE n.property IN [] ...` -* Added support for `ANY` and `NONE` openCypher functions. Previously, only - `ALL` and `SINGLE` functions were implemented. - -### Bug Fixes and Other Changes - -* Fixed invalid paths returned by variable expansion when the starting node and - destination node used the same symbol. - Example: `MATCH path = (n:Person {name: "John"})-[:KNOWS*]->(n) RETURN path` -* Improved semantics of `ALL` and `SINGLE` functions to be consistent with - openCypher when handling lists with `Null`s. -* `SHOW CONSTRAINT INFO` now returns property names as a list for unique - constraints. -* Escaped label/property/edgetype names in `DUMP DATABASE` to support names - with spaces in them. -* Fixed handling of `DUMP DATABASE` queries in multi-command transactions - (`BEGIN`, ..., `COMMIT`). -* Fixed handling of various query types in explicit transactions. For example, - constraints were allowed to be created in multi-command transactions - (`BEGIN`, ..., `COMMIT`) but that isn't a transactional operation and as such - can't be allowed in multi-command transactions. -* Fixed integer overflow bugs in `COUNT`, `LIMIT` and `SKIP`. -* Fixed integer overflow bugs in weighted shortest path expansions. -* Fixed various other integer overflow bugs in query execution. -* Added Marvel Comic Universe tutorial. -* Added FootballTransfers tutorial. - -## v1.0.0 - -### Major Features and Improvements - -* [Enterprise Ed.] Exposed authentication username/rolename regex as a flag - (`--auth-user-or-role-name-regex`). -* [Enterprise Ed.] Improved auth module error handling and added support for - relative paths. -* Added support for Python query modules. This release of Memgraph supports - query modules written using the already existing C API and the new Python - API. -* Added support for unique constraints. The unique constraint is created with a - label and one or more properties. -* Implemented support for importing CSV files (`mg_import_csv`). The importer - is compatible with the Neo4j batch CSV importer. -* Snapshot and write-ahead log format changed (backward compatible with v0.50). -* Vertices looked up by their openCypher ID (`MATCH (n) WHERE ID(n) = ...`) - will now find the node in O(logn) instead of O(n). -* Improved planning of BFS expansion, a faster, specific approach is now - favored instead of a ScanAll+Filter operation. -* Added syntax for limiting memory of `CALL`. -* Exposed server name that should be used for Bolt handshake as flag - (`--bolt-server-name-for-init`). -* Added several more functions to the query module C API. -* Implemented a storage locking mechanism that prevents the user from - concurrently starting two Memgraph instances with the same data directory. - -### Bug Fixes and Other Changes - -* [Enterprise Ed.] Fixed a bug that crashed the database when granting - privileges to a user. -* [Enterprise Ed.] Improved Louvain algorithm for community detection. -* Type of variable expansion is now printed in `EXPLAIN` (e.g. ExpandVariable, - STShortestPath, BFSExpand, WeightedShortestPath). -* Correctly display `CALL` in `EXPLAIN` output. -* Correctly delimit arguments when printing the signature of a query module. -* Fixed a planning issue when `CALL` preceded filtering. -* Fixed spelling mistakes in the storage durability module. -* Fixed storage GC indices/constraints subtle race condition. -* Reduced memory allocations in storage API and indices. -* Memgraph version is now outputted to `stdout` when Memgraph is started. -* Improved RPM packaging. -* Reduced number of errors reported in production log when loading query - modules. -* Removed `early access` wording from the Community Offering license. - -## v0.50.0 - -### Breaking Changes - -* [Enterprise Ed.] Remove support for Kafka streams. -* Snapshot and write-ahead log format changed (not backward compatible). -* Removed support for unique constraints. -* Label indices aren't created automatically, create them explicitly instead. -* Renamed several database flags. Please see the configuration file for a list of current flags. - -### Major Features and Improvements - -* [Enterprise Ed.] Add support for auth module. -* [Enterprise Ed.] LDAP support migrated to auth module. -* Implemented new graph storage engine. -* Add support for disabling properties on edges. -* Add support for existence constraints. -* Add support for custom openCypher procedures using a C API. -* Support loading query modules implementing read-only procedures. -* Add `CALL YIELD ` syntax for invoking loaded procedures. -* Add `CREATE INDEX ON :Label` for creating label indices. -* Add `DROP INDEX ON :Label` for dropping label indices. -* Add `DUMP DATABASE` clause to openCypher. -* Add functions for treating character strings as byte strings. - -### Bug Fixes and Other Changes - -* Fix several memory management bugs. -* Reduce memory usage in query execution. -* Fix bug that crashes the database when `EXPLAIN` is used. - -## v0.15.0 - -### Breaking Changes - -* Snapshot and write-ahead log format changed (not backward compatible). -* `indexInfo()` function replaced with `SHOW INDEX INFO` syntax. -* Removed support for unique index. Use unique constraints instead. -* `CREATE UNIQUE INDEX ON :label (property)` replaced with `CREATE CONSTRAINT ON (n:label) ASSERT n.property IS UNIQUE`. -* Changed semantics for `COUNTER` openCypher function. - -### Major Features and Improvements - -* [Enterprise Ed.] Add new privilege, `STATS` for accessing storage info. -* [Enterprise Ed.] LDAP authentication and authorization support. -* [Enterprise Ed.] Add audit logging feature. -* Add multiple properties unique constraint which replace unique indices. -* Add `SHOW STORAGE INFO` feature. -* Add `PROFILE` clause to openCypher. -* Add `CREATE CONSTRAINT` clause to openCypher. -* Add `DROP CONSTRAINT` clause to openCypher. -* Add `SHOW CONSTRAINT INFO` feature. -* Add `uniformSample` function to openCypher. -* Add regex matching to openCypher. - -### Bug Fixes and Other Changes - -* Fix bug in explicit transaction handling. -* Fix bug in edge filtering by edge type and destination. -* Fix bug in query comment parsing. -* Fix bug in query symbol table. -* Fix OpenSSL memory leaks. -* Make authentication case insensitive. -* Remove `COALESCE` function. -* Add movie tutorial. -* Add backpacking tutorial. - -## v0.14.0 - -### Breaking Changes - -* Write-ahead log format changed (not backward compatible). - -### Major Features and Improvements - -* [Enterprise Ed.] Reduce memory usage in distributed usage. -* Add `DROP INDEX` feature. -* Improve SSL error messages. - -### Bug Fixes and Other Changes - -* [Enterprise Ed.] Fix issues with reading and writing in a distributed query. -* Correctly handle an edge case with unique constraint checks. -* Fix a minor issue with `mg_import_csv`. -* Fix an issue with `EXPLAIN`. - -## v0.13.0 - -### Breaking Changes - -* Write-ahead log format changed (not backward compatible). -* Snapshot format changed (not backward compatible). - -### Major Features and Improvements - -* [Enterprise Ed.] Authentication and authorization support. -* [Enterprise Ed.] Kafka integration. -* [Enterprise Ed.] Support dynamic worker addition in distributed. -* Reduce memory usage and improve overall performance. -* Add `CREATE UNIQUE INDEX` clause to openCypher. -* Add `EXPLAIN` clause to openCypher. -* Add `inDegree` and `outDegree` functions to openCypher. -* Improve BFS performance when both endpoints are known. -* Add new `node-label`, `relationship-type` and `quote` options to - `mg_import_csv` tool. -* Reduce memory usage of `mg_import_csv`. - -### Bug Fixes and Other Changes - -* [Enterprise Ed.] Fix an edge case in distributed index creation. -* [Enterprise Ed.] Fix issues with Cartesian in distributed queries. -* Correctly handle large messages in Bolt protocol. -* Fix issues when handling explicitly started transactions in queries. -* Allow openCypher keywords to be used as variable names. -* Revise and make user visible error messages consistent. -* Improve aborting time consuming execution. - -## v0.12.0 - -### Breaking Changes - -* Snapshot format changed (not backward compatible). - -### Major Features and Improvements - -* Improved Id Cypher function. -* Added string functions to openCypher (`lTrim`, `left`, `rTrim`, `replace`, - `reverse`, `right`, `split`, `substring`, `toLower`, `toUpper`, `trim`). -* Added `timestamp` function to openCypher. -* Added support for dynamic property access with `[]` operator. - -## v0.11.0 - -### Major Features and Improvements - -* [Enterprise Ed.] Improve Cartesian support in distributed queries. -* [Enterprise Ed.] Improve distributed execution of BFS. -* [Enterprise Ed.] Dynamic graph partitioner added. -* Static nodes/edges id generators exposed through the Id Cypher function. -* Properties on disk added. -* Telemetry added. -* SSL support added. -* `toString` function added. - -### Bug Fixes and Other Changes - -* Document issues with Docker on OS X. -* Add BFS and Dijkstra's algorithm examples to documentation. - -## v0.10.0 - -### Breaking Changes - -* Snapshot format changed (not backward compatible). - -### Major Features and Improvements - -* [Enterprise Ed.] Distributed storage and execution. -* `reduce` and `single` functions added to openCypher. -* `wShortest` edge expansion added to openCypher. -* Support packaging RPM on CentOS 7. - -### Bug Fixes and Other Changes - -* Report an error if updating a deleted element. -* Log an error if reading info on available memory fails. -* Fix a bug when `MATCH` would stop matching if a result was empty, but later - results still contain data to be matched. The simplest case of this was the - query: `UNWIND [1,2,3] AS x MATCH (n :Label {prop: x}) RETURN n`. If there - was no node `(:Label {prop: 1})`, then the `MATCH` wouldn't even try to find - for `x` being 2 or 3. -* Report an error if trying to compare a property value with something that - cannot be stored in a property. -* Fix crashes in some obscure cases. -* Commit log automatically garbage collected. -* Add minor performance improvements. - -## v0.9.0 - -### Breaking Changes - -* Snapshot format changed (not backward compatible). -* Snapshot configuration flags changed, general durability flags added. - -### Major Features and Improvements - -* Write-ahead log added. -* `nodes` and `relationships` functions added. -* `UNION` and `UNION ALL` is implemented. -* Concurrent index creation is now enabled. - -### Bug Fixes and Other Changes - - -## v0.8.0 - -### Major Features and Improvements - -* CASE construct (without aggregations). -* Named path support added. -* Maps can now be stored as node/edge properties. -* Map indexing supported. -* `rand` function added. -* `assert` function added. -* `counter` and `counterSet` functions added. -* `indexInfo` function added. -* `collect` aggregation now supports Map collection. -* Changed the BFS syntax. - -### Bug Fixes and Other Changes - -* Use \u to specify 4 digit codepoint and \U for 8 digit -* Keywords appearing in header (named expressions) keep original case. -* Our Bolt protocol implementation is now completely compatible with the protocol version 1 specification. (https://boltprotocol.org/v1/) -* Added a log warning when running out of memory and the `memory_warning_threshold` flag -* Edges are no longer additionally filtered after expansion. - -## v0.7.0 - -### Major Features and Improvements - -* Variable length path `MATCH`. -* Explicitly started transactions (multi-query transactions). -* Map literal. -* Query parameters (except for parameters in place of property maps). -* `all` function in openCypher. -* `degree` function in openCypher. -* User specified transaction execution timeout. - -### Bug Fixes and Other Changes - -* Concurrent `BUILD INDEX` deadlock now returns an error to the client. -* A `MATCH` preceeded by `OPTIONAL MATCH` expansion inconsistencies. -* High concurrency Antlr parsing bug. -* Indexing improvements. -* Query stripping and caching speedups. - -## v0.6.0 - -### Major Features and Improvements - -* AST caching. -* Label + property index support. -* Different logging setup & format. - -## v0.5.0 - -### Major Features and Improvements - -* Use label indexes to speed up querying. -* Generate multiple query plans and use the cost estimator to select the best. -* Snapshots & Recovery. -* Abandon old yaml configuration and migrate to gflags. -* Query stripping & AST caching support. - -### Bug Fixes and Other Changes - -* Fixed race condition in MVCC. Hints exp+aborted race condition prevented. -* Fixed conceptual bug in MVCC GC. Evaluate old records w.r.t. the oldest. - transaction's id AND snapshot. -* User friendly error messages thrown from the query engine. - -## Build 837 - -### Bug Fixes and Other Changes - -* List indexing supported with preceeding IN (for example in query `RETURN 1 IN [[1,2]][0]`). - -## Build 825 - -### Major Features and Improvements - -* RETURN *, count(*), OPTIONAL MATCH, UNWIND, DISTINCT (except DISTINCT in aggregate functions), list indexing and slicing, escaped labels, IN LIST operator, range function. - -### Bug Fixes and Other Changes - -* TCP_NODELAY -> import should be faster. -* Clear hint bits. - -## Build 783 - -### Major Features and Improvements - -* SKIP, LIMIT, ORDER BY. -* Math functions. -* Initial support for MERGE clause. - -### Bug Fixes and Other Changes - -* Unhandled Lock Timeout Exception. - -## Build 755 - -### Major Features and Improvements - -* MATCH, CREATE, WHERE, SET, REMOVE, DELETE. +All the updates to the Change Log can be made in the following repository: +https://github.com/memgraph/docs diff --git a/CMakeLists.txt b/CMakeLists.txt index 663eba656..9d3a099fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -312,8 +312,9 @@ if (UBSAN) # runtime library and c++ standard libraries are present. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer -fno-sanitize=vptr") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined -fno-sanitize=vptr") - # Run program with environment variable UBSAN_OPTIONS=print_stacktrace=1 - # Make sure llvm-symbolizer binary is in path + # Run program with environment variable UBSAN_OPTIONS=print_stacktrace=1. + # Make sure llvm-symbolizer binary is in path. + # To make the program abort on undefined behavior, use UBSAN_OPTIONS=halt_on_error=1. endif() set(MG_PYTHON_VERSION "" CACHE STRING "Specify the exact python version used by the query modules") @@ -335,3 +336,7 @@ endif() if(QUERY_MODULES) add_subdirectory(query_modules) endif() + +install(FILES ${CMAKE_BINARY_DIR}/bin/mgconsole + PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + TYPE BIN) diff --git a/CODEOWNERS b/CODEOWNERS index f871ca5f9..aed0ff4ed 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,4 +1 @@ -/docs/ @gitbuda -/src/communication/ @antonio2368 -/src/query/ @the-joksim -/src/storage/ @antonio2368 +* @gitbuda @antonio2368 @antaljanosbenjamin @kostasrim diff --git a/config/flags.yaml b/config/flags.yaml index 5245a9d90..93f9c73ed 100644 --- a/config/flags.yaml +++ b/config/flags.yaml @@ -83,6 +83,14 @@ modifications: value: "/usr/lib/memgraph/auth_module/example.py" override: false + - name: "memory_limit" + value: "0" + override: true + + - name: "isolation_level" + value: "SNAPSHOT_ISOLATION" + override: true + undocumented: - "flag_file" - "also_log_to_stderr" diff --git a/environment/os/centos-7.sh b/environment/os/centos-7.sh index 6f8c514b5..b5fa816c1 100755 --- a/environment/os/centos-7.sh +++ b/environment/os/centos-7.sh @@ -18,6 +18,7 @@ TOOLCHAIN_BUILD_DEPS=( libffi-devel libxml2-devel perl-Digest-MD5 # llvm libedit-devel pcre-devel automake bison # swig ) + TOOLCHAIN_RUN_DEPS=( make # generic build tools tar gzip bzip2 xz # used for archive unpacking @@ -26,6 +27,7 @@ TOOLCHAIN_RUN_DEPS=( readline # for cmake and llvm libffi libxml2 # for llvm ) + MEMGRAPH_BUILD_DEPS=( git # source code control make pkgconfig # build system @@ -46,10 +48,13 @@ MEMGRAPH_BUILD_DEPS=( rpm-build rpmlint # for RPM package building doxygen graphviz # source documentation generators which mono-complete dotnet-sdk-3.1 golang nodejs zip unzip java-11-openjdk-devel # for driver tests + autoconf # for jemalloc code generation ) + list() { echo "$1" } + check() { local missing="" for pkg in $1; do @@ -74,16 +79,13 @@ check() { exit 1 fi } + install() { cd "$DIR" if [ "$EUID" -ne 0 ]; then echo "Please run as root." exit 1 fi - if [ "$SUDO_USER" == "" ]; then - echo "Please run as sudo." - exit 1 - fi # If GitHub Actions runner is installed, append LANG to the environment. # Python related tests doesn't work the LANG export. if [ -d "/home/gh/actions-runner" ]; then @@ -117,11 +119,16 @@ install() { continue fi if [ "$pkg" == PyYAML ]; then - sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML" + if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker). + pip3 install --user PyYAML + else # Running using sudo. + sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML" + fi continue fi yum install -y "$pkg" done } + deps=$2"[*]" "$1" "${!deps}" diff --git a/environment/os/centos-8.sh b/environment/os/centos-8.sh index 0312926fa..2b316484b 100755 --- a/environment/os/centos-8.sh +++ b/environment/os/centos-8.sh @@ -17,6 +17,7 @@ TOOLCHAIN_BUILD_DEPS=( libffi-devel libxml2-devel # for llvm libedit-devel pcre-devel automake bison # for swig ) + TOOLCHAIN_RUN_DEPS=( make # generic build tools tar gzip bzip2 xz # used for archive unpacking @@ -25,6 +26,7 @@ TOOLCHAIN_RUN_DEPS=( readline # for cmake and llvm libffi libxml2 # for llvm ) + MEMGRAPH_BUILD_DEPS=( git # source code control make pkgconf-pkg-config # build system @@ -45,10 +47,13 @@ MEMGRAPH_BUILD_DEPS=( doxygen graphviz # source documentation generators which mono-complete dotnet-sdk-3.1 nodejs golang zip unzip java-11-openjdk-devel # for driver tests sbcl # for custom Lisp C++ preprocessing + autoconf # for jemalloc code generation ) + list() { echo "$1" } + check() { local missing="" for pkg in $1; do @@ -67,16 +72,13 @@ check() { exit 1 fi } + install() { cd "$DIR" if [ "$EUID" -ne 0 ]; then echo "Please run as root." exit 1 fi - if [ "$SUDO_USER" == "" ]; then - echo "Please run as sudo." - exit 1 - fi # If GitHub Actions runner is installed, append LANG to the environment. # Python related tests doesn't work the LANG export. if [ -d "/home/gh/actions-runner" ]; then @@ -85,6 +87,7 @@ install() { echo "NOTE: export LANG=en_US.utf8" fi dnf install -y epel-release + dnf install -y 'dnf-command(config-manager)' dnf config-manager --set-enabled powertools # Required to install texinfo. dnf update -y dnf install -y wget git python36 python3-pip @@ -134,11 +137,16 @@ install() { continue fi if [ "$pkg" == PyYAML ]; then - sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML" + if [ -z ${SUDO_USER+x} ]; then # Running as root (e.g. Docker). + pip3 install --user PyYAML + else # Running using sudo. + sudo -H -u "$SUDO_USER" bash -c "pip3 install --user PyYAML" + fi continue fi dnf install -y "$pkg" done } + deps=$2"[*]" "$1" "${!deps}" diff --git a/environment/os/debian-10.sh b/environment/os/debian-10.sh index 4c3b5ec2b..ccbd55f4b 100755 --- a/environment/os/debian-10.sh +++ b/environment/os/debian-10.sh @@ -17,6 +17,7 @@ TOOLCHAIN_BUILD_DEPS=( libffi-dev libxml2-dev # for llvm libedit-dev libpcre3-dev automake bison # for swig ) + TOOLCHAIN_RUN_DEPS=( make # generic build tools tar gzip bzip2 xz-utils # used for archive unpacking @@ -26,6 +27,7 @@ TOOLCHAIN_RUN_DEPS=( libreadline7 # for cmake and llvm libffi6 libxml2 # for llvm ) + MEMGRAPH_BUILD_DEPS=( git # source code control make pkg-config # build system @@ -43,13 +45,17 @@ MEMGRAPH_BUILD_DEPS=( doxygen graphviz # source documentation generators mono-runtime mono-mcs zip unzip default-jdk-headless # for driver tests dotnet-sdk-3.1 golang nodejs npm + autoconf # for jemalloc code generation ) + list() { echo "$1" } + check() { check_all_dpkg "$1" } + install() { cat >/etc/apt/sources.list < /dev/null; } +popd () { command popd "$@" > /dev/null; } +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +CPUS=$( grep -c processor < /proc/cpuinfo ) +cd "$DIR" + +source "$DIR/../util.sh" +DISTRO="$(operating_system)" + +# toolchain version +TOOLCHAIN_VERSION=3 + +# package versions used +GCC_VERSION=10.2.0 +BINUTILS_VERSION=2.35.1 +case "$DISTRO" in + centos-7) # because GDB >= 9 does NOT compile with readline6. + GDB_VERSION=8.3 + ;; + *) + GDB_VERSION=10.1 + ;; +esac +CMAKE_VERSION=3.18.4 +CPPCHECK_VERSION=2.2 +LLVM_VERSION=11.0.0 +SWIG_VERSION=4.0.2 # used only for LLVM compilation + +# Check for the dependencies. +echo "ALL BUILD PACKAGES: $($DIR/../os/$DISTRO.sh list TOOLCHAIN_BUILD_DEPS)" +$DIR/../os/$DISTRO.sh check TOOLCHAIN_BUILD_DEPS +echo "ALL RUN PACKAGES: $($DIR/../os/$DISTRO.sh list TOOLCHAIN_RUN_DEPS)" +$DIR/../os/$DISTRO.sh check TOOLCHAIN_RUN_DEPS + +# check installation directory +NAME=toolchain-v$TOOLCHAIN_VERSION +PREFIX=/opt/$NAME +mkdir -p $PREFIX >/dev/null 2>/dev/null || true +if [ ! -d $PREFIX ] || [ ! -w $PREFIX ]; then + echo "Please make sure that the directory '$PREFIX' exists and is writable by the current user!" + echo + echo "If unsure, execute these commands as root:" + echo " mkdir $PREFIX && chown $USER:$USER $PREFIX" + echo + echo "Press when you have created the directory and granted permissions." + # wait for the directory to be created + while true; do + read + if [ ! -d $PREFIX ] || [ ! -w $PREFIX ]; then + echo + echo "You can't continue before you have created the directory and granted permissions!" + echo + echo "Press when you have created the directory and granted permissions." + else + break + fi + done +fi + +# create archives directory +mkdir -p archives + +# download all archives +pushd archives +if [ ! -f gcc-$GCC_VERSION.tar.gz ]; then + wget https://ftp.gnu.org/gnu/gcc/gcc-$GCC_VERSION/gcc-$GCC_VERSION.tar.gz +fi +if [ ! -f binutils-$BINUTILS_VERSION.tar.gz ]; then + wget https://ftp.gnu.org/gnu/binutils/binutils-$BINUTILS_VERSION.tar.gz +fi +if [ ! -f gdb-$GDB_VERSION.tar.gz ]; then + wget https://ftp.gnu.org/gnu/gdb/gdb-$GDB_VERSION.tar.gz +fi +if [ ! -f cmake-$CMAKE_VERSION.tar.gz ]; then + wget https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION.tar.gz +fi +if [ ! -f swig-$SWIG_VERSION.tar.gz ]; then + wget https://github.com/swig/swig/archive/rel-$SWIG_VERSION.tar.gz -O swig-$SWIG_VERSION.tar.gz +fi +if [ ! -f cppcheck-$CPPCHECK_VERSION.tar.gz ]; then + wget https://github.com/danmar/cppcheck/archive/$CPPCHECK_VERSION.tar.gz -O cppcheck-$CPPCHECK_VERSION.tar.gz +fi +if [ ! -f llvm-$LLVM_VERSION.src.tar.xz ]; then + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/llvm-$LLVM_VERSION.src.tar.xz + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/clang-$LLVM_VERSION.src.tar.xz + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/lld-$LLVM_VERSION.src.tar.xz + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/clang-tools-extra-$LLVM_VERSION.src.tar.xz + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/compiler-rt-$LLVM_VERSION.src.tar.xz +fi +if [ ! -f pahole-gdb-master.zip ]; then + wget https://github.com/PhilArmstrong/pahole-gdb/archive/master.zip -O pahole-gdb-master.zip +fi + +# verify all archives +# NOTE: Verification can fail if the archive is signed by another developer. I +# haven't added commands to download all developer GnuPG keys because the +# download is very slow. If the verification fails for you, figure out who has +# signed the archive and download their public key instead. +GPG="gpg --homedir .gnupg" +KEYSERVER="hkp://keyserver.ubuntu.com" +mkdir -p .gnupg +chmod 700 .gnupg +# verify gcc +if [ ! -f gcc-$GCC_VERSION.tar.gz.sig ]; then + wget https://ftp.gnu.org/gnu/gcc/gcc-$GCC_VERSION/gcc-$GCC_VERSION.tar.gz.sig +fi +# list of valid gcc gnupg keys: https://gcc.gnu.org/mirrors.html +$GPG --keyserver $KEYSERVER --recv-keys 0x3AB00996FC26A641 +$GPG --verify gcc-$GCC_VERSION.tar.gz.sig gcc-$GCC_VERSION.tar.gz +# verify binutils +if [ ! -f binutils-$BINUTILS_VERSION.tar.gz.sig ]; then + wget https://ftp.gnu.org/gnu/binutils/binutils-$BINUTILS_VERSION.tar.gz.sig +fi +$GPG --keyserver $KEYSERVER --recv-keys 0xDD9E3C4F +$GPG --verify binutils-$BINUTILS_VERSION.tar.gz.sig binutils-$BINUTILS_VERSION.tar.gz +# verify gdb +if [ ! -f gdb-$GDB_VERSION.tar.gz.sig ]; then + wget https://ftp.gnu.org/gnu/gdb/gdb-$GDB_VERSION.tar.gz.sig +fi +$GPG --keyserver $KEYSERVER --recv-keys 0xFF325CF3 +$GPG --verify gdb-$GDB_VERSION.tar.gz.sig gdb-$GDB_VERSION.tar.gz +# verify cmake +if [ ! -f cmake-$CMAKE_VERSION-SHA-256.txt ] || [ ! -f cmake-$CMAKE_VERSION-SHA-256.txt.asc ]; then + wget https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-SHA-256.txt + wget https://github.com/Kitware/CMake/releases/download/v$CMAKE_VERSION/cmake-$CMAKE_VERSION-SHA-256.txt.asc + # Because CentOS 7 doesn't have the `--ignore-missing` flag for `sha256sum` + # we filter out the missing files from the sums here manually. + cat cmake-$CMAKE_VERSION-SHA-256.txt | grep "cmake-$CMAKE_VERSION.tar.gz" > cmake-$CMAKE_VERSION-SHA-256-filtered.txt +fi +$GPG --keyserver $KEYSERVER --recv-keys 0xC6C265324BBEBDC350B513D02D2CEF1034921684 +sha256sum -c cmake-$CMAKE_VERSION-SHA-256-filtered.txt +$GPG --verify cmake-$CMAKE_VERSION-SHA-256.txt.asc cmake-$CMAKE_VERSION-SHA-256.txt +# verify llvm, cfe, lld, clang-tools-extra +if [ ! -f llvm-$LLVM_VERSION.src.tar.xz.sig ]; then + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/llvm-$LLVM_VERSION.src.tar.xz.sig + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/clang-$LLVM_VERSION.src.tar.xz.sig + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/lld-$LLVM_VERSION.src.tar.xz.sig + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/clang-tools-extra-$LLVM_VERSION.src.tar.xz.sig + wget https://github.com/llvm/llvm-project/releases/download/llvmorg-$LLVM_VERSION/compiler-rt-$LLVM_VERSION.src.tar.xz.sig +fi +# list of valid llvm gnupg keys: https://releases.llvm.org/download.html +$GPG --keyserver $KEYSERVER --recv-keys 0x345AD05D +$GPG --verify llvm-$LLVM_VERSION.src.tar.xz.sig llvm-$LLVM_VERSION.src.tar.xz +$GPG --verify clang-$LLVM_VERSION.src.tar.xz.sig clang-$LLVM_VERSION.src.tar.xz +$GPG --verify lld-$LLVM_VERSION.src.tar.xz.sig lld-$LLVM_VERSION.src.tar.xz +$GPG --verify clang-tools-extra-$LLVM_VERSION.src.tar.xz.sig clang-tools-extra-$LLVM_VERSION.src.tar.xz +$GPG --verify compiler-rt-$LLVM_VERSION.src.tar.xz.sig compiler-rt-$LLVM_VERSION.src.tar.xz +popd + +# create build directory +mkdir -p build +pushd build + +# compile gcc +if [ ! -f $PREFIX/bin/gcc ]; then + if [ -d gcc-$GCC_VERSION ]; then + rm -rf gcc-$GCC_VERSION + fi + tar -xvf ../archives/gcc-$GCC_VERSION.tar.gz + pushd gcc-$GCC_VERSION + ./contrib/download_prerequisites + mkdir build && pushd build + # influenced by: https://buildd.debian.org/status/fetch.php?pkg=gcc-8&arch=amd64&ver=8.3.0-6&stamp=1554588545 + ../configure -v \ + --build=x86_64-linux-gnu \ + --host=x86_64-linux-gnu \ + --target=x86_64-linux-gnu \ + --prefix=$PREFIX \ + --disable-multilib \ + --with-system-zlib \ + --enable-checking=release \ + --enable-languages=c,c++,fortran \ + --enable-gold=yes \ + --enable-ld=yes \ + --enable-lto \ + --enable-bootstrap \ + --disable-vtable-verify \ + --disable-werror \ + --without-included-gettext \ + --enable-threads=posix \ + --enable-nls \ + --enable-clocale=gnu \ + --enable-libstdcxx-debug \ + --enable-libstdcxx-time=yes \ + --enable-gnu-unique-object \ + --enable-libmpx \ + --enable-plugin \ + --enable-default-pie \ + --with-target-system-zlib \ + --with-tune=generic \ + --without-cuda-driver + #--program-suffix=$( printf "$GCC_VERSION" | cut -d '.' -f 1,2 ) \ + make -j$CPUS + # make -k check # run test suite + make install + popd && popd +fi + +# activate toolchain +export PATH=$PREFIX/bin:$PATH +export LD_LIBRARY_PATH=$PREFIX/lib64 + +# compile binutils +if [ ! -f $PREFIX/bin/ld.gold ]; then + if [ -d binutils-$BINUTILS_VERSION ]; then + rm -rf binutils-$BINUTILS_VERSION + fi + tar -xvf ../archives/binutils-$BINUTILS_VERSION.tar.gz + pushd binutils-$BINUTILS_VERSION + mkdir build && pushd build + # influenced by: https://buildd.debian.org/status/fetch.php?pkg=binutils&arch=amd64&ver=2.32-7&stamp=1553247092 + env \ + CC=gcc \ + CXX=g++ \ + CFLAGS="-g -O2" \ + CXXFLAGS="-g -O2" \ + LDFLAGS="" \ + ../configure \ + --build=x86_64-linux-gnu \ + --host=x86_64-linux-gnu \ + --prefix=$PREFIX \ + --enable-ld=default \ + --enable-gold \ + --enable-lto \ + --enable-plugins \ + --enable-shared \ + --enable-threads \ + --with-system-zlib \ + --enable-deterministic-archives \ + --disable-compressed-debug-sections \ + --enable-new-dtags \ + --disable-werror + make -j$CPUS + # make -k check # run test suite + make install + popd && popd +fi + +# compile gdb +if [ ! -f $PREFIX/bin/gdb ]; then + if [ -d gdb-$GDB_VERSION ]; then + rm -rf gdb-$GDB_VERSION + fi + tar -xvf ../archives/gdb-$GDB_VERSION.tar.gz + pushd gdb-$GDB_VERSION + mkdir build && pushd build + # https://buildd.debian.org/status/fetch.php?pkg=gdb&arch=amd64&ver=8.2.1-2&stamp=1550831554&raw=0 + env \ + CC=gcc \ + CXX=g++ \ + CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security" \ + CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security" \ + CPPFLAGS="-Wdate-time -D_FORTIFY_SOURCE=2 -fPIC" \ + LDFLAGS="-Wl,-z,relro" \ + PYTHON="" \ + ../configure \ + --build=x86_64-linux-gnu \ + --host=x86_64-linux-gnu \ + --prefix=$PREFIX \ + --disable-maintainer-mode \ + --disable-dependency-tracking \ + --disable-silent-rules \ + --disable-gdbtk \ + --disable-shared \ + --without-guile \ + --with-system-gdbinit=$PREFIX/etc/gdb/gdbinit \ + --with-system-readline \ + --with-expat \ + --with-system-zlib \ + --with-lzma \ + --with-babeltrace \ + --with-intel-pt \ + --enable-tui \ + --with-python=python3 + make -j$CPUS + make install + popd && popd +fi + +# install pahole +if [ ! -d $PREFIX/share/pahole-gdb ]; then + unzip ../archives/pahole-gdb-master.zip + mv pahole-gdb-master $PREFIX/share/pahole-gdb +fi + +# setup system gdbinit +if [ ! -f $PREFIX/etc/gdb/gdbinit ]; then + mkdir -p $PREFIX/etc/gdb + cat >$PREFIX/etc/gdb/gdbinit <> build-flags.cmake + echo 'set(CMAKE_USE_RELATIVE_PATHS ON CACHE BOOL "Use relative paths" FORCE)' >> build-flags.cmake + echo 'set(CMAKE_C_FLAGS "-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2" CACHE STRING "C flags" FORCE)' >> build-flags.cmake + echo 'set(CMAKE_CXX_FLAGS "-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2" CACHE STRING "C++ flags" FORCE)' >> build-flags.cmake + echo 'set(CMAKE_SKIP_BOOTSTRAP_TEST ON CACHE BOOL "Skip BootstrapTest" FORCE)' >> build-flags.cmake + echo 'set(BUILD_CursesDialog ON CACHE BOOL "Build curses GUI" FORCE)' >> build-flags.cmake + mkdir build && pushd build + ../bootstrap \ + --prefix=$PREFIX \ + --init=../build-flags.cmake \ + --parallel=$CPUS \ + --system-curl + make -j$CPUS + # make test # run test suite + make install + popd && popd +fi + +# compile cppcheck +if [ ! -f $PREFIX/bin/cppcheck ]; then + if [ -d cppcheck-$CPPCHECK_VERSION ]; then + rm -rf cppcheck-$CPPCHECK_VERSION + fi + tar -xvf ../archives/cppcheck-$CPPCHECK_VERSION.tar.gz + pushd cppcheck-$CPPCHECK_VERSION + env \ + CC=gcc \ + CXX=g++ \ + PREFIX=$PREFIX \ + FILESDIR=$PREFIX/share/cppcheck \ + CFGDIR=$PREFIX/share/cppcheck/cfg \ + make -j$CPUS + env \ + CC=gcc \ + CXX=g++ \ + PREFIX=$PREFIX \ + FILESDIR=$PREFIX/share/cppcheck \ + CFGDIR=$PREFIX/share/cppcheck/cfg \ + make install + popd +fi + +# compile swig +if [ ! -d swig-$SWIG_VERSION/install ]; then + if [ -d swig-$SWIG_VERSION ]; then + rm -rf swig-$SWIG_VERSION + fi + tar -xvf ../archives/swig-$SWIG_VERSION.tar.gz + mv swig-rel-$SWIG_VERSION swig-$SWIG_VERSION + pushd swig-$SWIG_VERSION + ./autogen.sh + mkdir build && pushd build + ../configure --prefix=$DIR/build/swig-$SWIG_VERSION/install + make -j$CPUS + make install + popd && popd +fi + +# compile llvm +if [ ! -f $PREFIX/bin/clang ]; then + if [ -d llvm-$LLVM_VERSION ]; then + rm -rf llvm-$LLVM_VERSION + fi + tar -xvf ../archives/llvm-$LLVM_VERSION.src.tar.xz + mv llvm-$LLVM_VERSION.src llvm-$LLVM_VERSION + tar -xvf ../archives/clang-$LLVM_VERSION.src.tar.xz + mv clang-$LLVM_VERSION.src llvm-$LLVM_VERSION/tools/clang + tar -xvf ../archives/lld-$LLVM_VERSION.src.tar.xz + mv lld-$LLVM_VERSION.src/ llvm-$LLVM_VERSION/tools/lld + tar -xvf ../archives/clang-tools-extra-$LLVM_VERSION.src.tar.xz + mv clang-tools-extra-$LLVM_VERSION.src/ llvm-$LLVM_VERSION/tools/clang/tools/extra + tar -xvf ../archives/compiler-rt-$LLVM_VERSION.src.tar.xz + mv compiler-rt-$LLVM_VERSION.src/ llvm-$LLVM_VERSION/projects/compiler-rt + pushd llvm-$LLVM_VERSION + mkdir build && pushd build + # activate swig + export PATH=$DIR/build/swig-$SWIG_VERSION/install/bin:$PATH + # influenced by: https://buildd.debian.org/status/fetch.php?pkg=llvm-toolchain-7&arch=amd64&ver=1%3A7.0.1%7E%2Brc2-1%7Eexp1&stamp=1541506173&raw=0 + cmake .. \ + -DCMAKE_C_COMPILER=$PREFIX/bin/gcc \ + -DCMAKE_CXX_COMPILER=$PREFIX/bin/g++ \ + -DCMAKE_CXX_LINK_FLAGS="-L$PREFIX/lib64 -Wl,-rpath,$PREFIX/lib64" \ + -DCMAKE_INSTALL_PREFIX=$PREFIX \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="-O2 -DNDEBUG" \ + -DCMAKE_CXX_FLAGS=' -fuse-ld=gold -fPIC -Wno-unused-command-line-argument -Wno-unknown-warning-option' \ + -DCMAKE_C_FLAGS=' -fuse-ld=gold -fPIC -Wno-unused-command-line-argument -Wno-unknown-warning-option' \ + -DLLVM_LINK_LLVM_DYLIB=ON \ + -DLLVM_INSTALL_UTILS=ON \ + -DLLVM_VERSION_SUFFIX= \ + -DLLVM_BUILD_LLVM_DYLIB=ON \ + -DLLVM_ENABLE_RTTI=ON \ + -DLLVM_ENABLE_FFI=ON \ + -DLLVM_BINUTILS_INCDIR=$PREFIX/include/ \ + -DLLVM_USE_PERF=yes + make -j$CPUS + make -j$CPUS check-clang # run clang test suite + make -j$CPUS check-lld # run lld test suite + make install + popd && popd +fi + +# create README +if [ ! -f $PREFIX/README.md ]; then + cat >$PREFIX/README.md <$PREFIX/activate < 1: + # Nested object could be a type consisting of a list of types (e.g. mgp.Map) + # so we need to join the parts. + type_arg_as_str = ', '.join(type_arg_as_str) + else: + type_arg_as_str = type_arg_as_str[0] + simple_type = get_simple_type(type_arg_as_str) if simple_type is not None: return _mgp.type_list(simple_type) diff --git a/libs/.gitignore b/libs/.gitignore index 51e7aac4c..08bd2de9f 100644 --- a/libs/.gitignore +++ b/libs/.gitignore @@ -4,3 +4,4 @@ !cleanup.sh !CMakeLists.txt !__main.cpp +!jemalloc.cmake diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 2dd4c1bf9..9f7941c78 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -8,6 +8,8 @@ if (NPROC EQUAL 0) set(NPROC 1) endif() +set(LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + # convenience functions function(import_header_library name include_dir) add_library(${name} INTERFACE IMPORTED GLOBAL) @@ -89,11 +91,9 @@ import_external_library(antlr4 STATIC CMAKE_ARGS # http://stackoverflow.com/questions/37096062/get-a-basic-c-program-to-compile-using-clang-on-ubuntu-16/38385967#38385967 -DWITH_LIBCXX=OFF # because of debian bug -DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=true + -DCMAKE_CXX_STANDARD=20 BUILD_COMMAND $(MAKE) antlr4_static - # Make a License.txt out of thin air, so that antlr4.6 knows how to build. - # When we upgrade antlr, this will no longer be needed. - INSTALL_COMMAND touch ${CMAKE_CURRENT_SOURCE_DIR}/antlr4/runtime/Cpp/License.txt - COMMAND $(MAKE) install) + INSTALL_COMMAND $(MAKE) install) # Setup google benchmark. import_external_library(benchmark STATIC @@ -207,8 +207,18 @@ import_external_library(mgclient STATIC find_package(OpenSSL REQUIRED) target_link_libraries(mgclient INTERFACE ${OPENSSL_LIBRARIES}) +add_external_project(mgconsole + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/mgconsole + CMAKE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_BINARY_DIR} + BUILD_COMMAND $(MAKE) mgconsole) + +add_custom_target(mgconsole DEPENDS mgconsole-proj) + # Setup spdlog import_external_library(spdlog STATIC ${CMAKE_CURRENT_SOURCE_DIR}/spdlog/${CMAKE_INSTALL_LIBDIR}/libspdlog.a ${CMAKE_CURRENT_SOURCE_DIR}/spdlog/include BUILD_COMMAND $(MAKE) spdlog) + +include(jemalloc.cmake) diff --git a/libs/jemalloc.cmake b/libs/jemalloc.cmake new file mode 100644 index 000000000..4e95c55e6 --- /dev/null +++ b/libs/jemalloc.cmake @@ -0,0 +1,55 @@ +set(JEMALLOC_DIR "${LIB_DIR}/jemalloc") + +set(JEMALLOC_SRCS + ${JEMALLOC_DIR}/src/arena.c + ${JEMALLOC_DIR}/src/background_thread.c + ${JEMALLOC_DIR}/src/base.c + ${JEMALLOC_DIR}/src/bin.c + ${JEMALLOC_DIR}/src/bitmap.c + ${JEMALLOC_DIR}/src/ckh.c + ${JEMALLOC_DIR}/src/ctl.c + ${JEMALLOC_DIR}/src/div.c + ${JEMALLOC_DIR}/src/extent.c + ${JEMALLOC_DIR}/src/extent_dss.c + ${JEMALLOC_DIR}/src/extent_mmap.c + ${JEMALLOC_DIR}/src/hash.c + ${JEMALLOC_DIR}/src/hook.c + ${JEMALLOC_DIR}/src/jemalloc.c + ${JEMALLOC_DIR}/src/large.c + ${JEMALLOC_DIR}/src/log.c + ${JEMALLOC_DIR}/src/malloc_io.c + ${JEMALLOC_DIR}/src/mutex.c + ${JEMALLOC_DIR}/src/mutex_pool.c + ${JEMALLOC_DIR}/src/nstime.c + ${JEMALLOC_DIR}/src/pages.c + ${JEMALLOC_DIR}/src/prng.c + ${JEMALLOC_DIR}/src/prof.c + ${JEMALLOC_DIR}/src/rtree.c + ${JEMALLOC_DIR}/src/sc.c + ${JEMALLOC_DIR}/src/stats.c + ${JEMALLOC_DIR}/src/sz.c + ${JEMALLOC_DIR}/src/tcache.c + ${JEMALLOC_DIR}/src/test_hooks.c + ${JEMALLOC_DIR}/src/ticker.c + ${JEMALLOC_DIR}/src/tsd.c + ${JEMALLOC_DIR}/src/witness.c + ${JEMALLOC_DIR}/src/safety_check.c +) + +add_library(jemalloc ${JEMALLOC_SRCS}) +target_include_directories(jemalloc PUBLIC "${JEMALLOC_DIR}/include") + +find_package(Threads REQUIRED) +target_link_libraries(jemalloc PUBLIC Threads::Threads) + +target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) + +if (CMAKE_BUILD_TYPE STREQUAL "DEBUG") + target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_DEBUG=1 -DJEMALLOC_PROF=1) +endif() + +target_compile_options(jemalloc PRIVATE -Wno-redundant-decls) +# for RTLD_NEXT +target_compile_definitions(jemalloc PRIVATE _GNU_SOURCE) + +set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1) diff --git a/libs/setup.sh b/libs/setup.sh index 9c7df826e..bab12fbfc 100755 --- a/libs/setup.sh +++ b/libs/setup.sh @@ -2,8 +2,9 @@ # Download external dependencies. +local_cache_host=${MGDEPS_CACHE_HOST_PORT:-mgdeps-cache:8000} working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd ${working_dir} +cd "${working_dir}" # Clones a git repository and optionally cherry picks additional commits. The # function will try to preserve any local changes in the repo. @@ -15,7 +16,11 @@ clone () { shift 3 # Clone if there's no repo. if [[ ! -d "$dir_name" ]]; then - git clone "$git_repo" "$dir_name" + echo "Cloning from $git_repo" + # If the clone fails, it doesn't make sense to continue with the function + # execution but the whole script should continue executing because we might + # clone the same repo from a different source. + git clone "$git_repo" "$dir_name" || return 1 fi pushd "$dir_name" # Just fetch new commits from remote repository. Don't merge/pull them in, so @@ -29,12 +34,17 @@ clone () { # Stash regardless of local_changes, so that a user gets a message on stdout. git stash # Checkout the primary commit (there's no need to pull/merge). - git checkout $checkout_id + # The checkout fail should exit this script immediately because the target + # commit is not there and that will most likely create build-time errors. + git checkout "$checkout_id" || exit 1 # Apply any optional cherry pick fixes. while [[ $# -ne 0 ]]; do local cherry_pick_id=$1 shift - git cherry-pick -n $cherry_pick_id + # The cherry-pick fail should exit this script immediately because the + # target commit is not there and that will most likely create build-time + # errors. + git cherry-pick -n "$cherry_pick_id" || exit 1 done # Reapply any local changes. if [[ $local_changes == true ]]; then @@ -43,81 +53,188 @@ clone () { popd } +file_get_try_double () { + primary_url="$1" + secondary_url="$2" + echo "Download primary from $primary_url secondary from $secondary_url" + if [ -z "$primary_url" ]; then echo "Primary should not be empty." && exit 1; fi + if [ -z "$secondary_url" ]; then echo "Secondary should not be empty." && exit 1; fi + filename="$(basename "$secondary_url")" + wget -nv "$primary_url" -O "$filename" || wget -nv "$secondary_url" -O "$filename" || exit 1 + echo "" +} + +repo_clone_try_double () { + primary_url="$1" + secondary_url="$2" + folder_name="$3" + ref="$4" + echo "Cloning primary from $primary_url secondary from $secondary_url" + if [ -z "$primary_url" ]; then echo "Primary should not be empty." && exit 1; fi + if [ -z "$secondary_url" ]; then echo "Secondary should not be empty." && exit 1; fi + if [ -z "$folder_name" ]; then echo "Clone folder should not be empty." && exit 1; fi + if [ -z "$ref" ]; then echo "Git clone ref should not be empty." && exit 1; fi + clone "$primary_url" "$folder_name" "$ref" || clone "$secondary_url" "$folder_name" "$ref" || exit 1 + echo "" +} + +# List all dependencies. + +# The reason for introducing primary and secondary urls are: +# * HTTPS is hard to cache +# * Remote development workflow is more flexible if people don't have to connect to VPN +# * Direct download from the "source of truth" is slower and unreliable because of the whole internet in-between +# * When a new dependency has to be added, both urls could be the same, later someone could optimize if required + +# The goal of having primary urls is to have links to the "local" cache of +# dependencies where these dependencies could be downloaded as fast as +# possible. The actual cache server could be on your local machine, on a +# dedicated machine inside the build cluster or on the actual build machine. +# Download from primary_urls might fail because the cache is not installed. +declare -A primary_urls=( + ["antlr4-code"]="http://$local_cache_host/git/antlr4.git" + ["antlr4-generator"]="http://$local_cache_host/file/antlr-4.9.2-complete.jar" + ["cppitertools"]="http://$local_cache_host/git/cppitertools.git" + ["fmt"]="http://$local_cache_host/git/fmt.git" + ["rapidcheck"]="http://$local_cache_host/git/rapidcheck.git" + ["gbenchmark"]="http://$local_cache_host/git/benchmark.git" + ["gtest"]="http://$local_cache_host/git/googletest.git" + ["gflags"]="http://$local_cache_host/git/gflags.git" + ["libbcrypt"]="http://$local_cache_host/git/libbcrypt.git" + ["bzip2"]="http://$local_cache_host/git/bzip2.git" + ["zlib"]="http://$local_cache_host/git/zlib.git" + ["rocksdb"]="http://$local_cache_host/git/rocksdb.git" + ["mgclient"]="http://$local_cache_host/git/mgclient.git" + ["pymgclient"]="http://$local_cache_host/git/pymgclient.git" + ["mgconsole"]="http://$local_cache_host/git/mgconsole.git" + ["spdlog"]="http://$local_cache_host/git/spdlog" + ["jemalloc"]="http://$local_cache_host/git/jemalloc.git" + ["nlohmann"]="http://$local_cache_host/file/nlohmann/json/b3e5cb7f20dcc5c806e418df34324eca60d17d4e/single_include/nlohmann/json.hpp" + ["neo4j"]="http://$local_cache_host/file/neo4j-community-3.2.3-unix.tar.gz" +) + +# The goal of secondary urls is to have links to the "source of truth" of +# dependencies, e.g., Github or S3. Download from secondary urls, if happens +# at all, should never fail. In other words, if it fails, the whole build +# should fail. +declare -A secondary_urls=( + ["antlr4-code"]="https://github.com/antlr/antlr4.git" + ["antlr4-generator"]="http://www.antlr.org/download/antlr-4.9.2-complete.jar" + ["cppitertools"]="https://github.com/ryanhaining/cppitertools.git" + ["fmt"]="https://github.com/fmtlib/fmt.git" + ["rapidcheck"]="https://github.com/emil-e/rapidcheck.git" + ["gbenchmark"]="https://github.com/google/benchmark.git" + ["gtest"]="https://github.com/google/googletest.git" + ["gflags"]="https://github.com/memgraph/gflags.git" + ["libbcrypt"]="https://github.com/rg3/libbcrypt" + ["bzip2"]="https://github.com/VFR-maniac/bzip2" + ["zlib"]="https://github.com/madler/zlib.git" + ["rocksdb"]="https://github.com/facebook/rocksdb.git" + ["mgclient"]="https://github.com/memgraph/mgclient.git" + ["pymgclient"]="https://github.com/memgraph/pymgclient.git" + ["mgconsole"]="http://github.com/memgraph/mgconsole.git" + ["spdlog"]="https://github.com/gabime/spdlog" + ["jemalloc"]="https://github.com/jemalloc/jemalloc.git" + ["nlohmann"]="https://raw.githubusercontent.com/nlohmann/json/b3e5cb7f20dcc5c806e418df34324eca60d17d4e/single_include/nlohmann/json.hpp" + ["neo4j"]="https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/neo4j-community-3.2.3-unix.tar.gz" +) + # antlr -antlr_generator_filename="antlr-4.6-complete.jar" -# wget -O ${antlr_generator_filename} http://www.antlr.org/download/${antlr_generator_filename} -wget -nv -O ${antlr_generator_filename} https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/${antlr_generator_filename} -antlr4_tag="aacd2a2c95816d8dc1c05814051d631bfec4cf3e" # v4.6 -clone https://github.com/antlr/antlr4.git antlr4 $antlr4_tag -# fix missing include -sed -i 's/^#pragma once/#pragma once\n#include /' antlr4/runtime/Cpp/runtime/src/support/CPPUtils.h +file_get_try_double "${primary_urls[antlr4-generator]}" "${secondary_urls[antlr4-generator]}" + +antlr4_tag="5e5b6d35b4183fd330102c40947b95c4b5c6abb5" # v4.9.2 +repo_clone_try_double "${primary_urls[antlr4-code]}" "${secondary_urls[antlr4-code]}" "antlr4" "$antlr4_tag" # remove shared library from install dependencies sed -i 's/install(TARGETS antlr4_shared/install(TARGETS antlr4_shared OPTIONAL/' antlr4/runtime/Cpp/runtime/CMakeLists.txt +# fix issue https://github.com/antlr/antlr4/issues/3194 - should update Antlr commit once the PR related to the issue gets merged +sed -i 's/std::is_nothrow_copy_constructible/std::is_copy_constructible/' antlr4/runtime/Cpp/runtime/src/support/Any.h # cppitertools v2.0 2019-12-23 cppitertools_ref="cb3635456bdb531121b82b4d2e3afc7ae1f56d47" -clone https://github.com/ryanhaining/cppitertools.git cppitertools $cppitertools_ref +repo_clone_try_double "${primary_urls[cppitertools]}" "${secondary_urls[cppitertools]}" "cppitertools" "$cppitertools_ref" # fmt -fmt_tag="7bdf0628b1276379886c7f6dda2cef2b3b374f0b" # (2020-11-25) -clone https://github.com/fmtlib/fmt.git fmt $fmt_tag +fmt_tag="7bdf0628b1276379886c7f6dda2cef2b3b374f0b" # (2020-11-25) +repo_clone_try_double "${primary_urls[fmt]}" "${secondary_urls[fmt]}" "fmt" "$fmt_tag" # rapidcheck rapidcheck_tag="7bc7d302191a4f3d0bf005692677126136e02f60" # (2020-05-04) -clone https://github.com/emil-e/rapidcheck.git rapidcheck $rapidcheck_tag +repo_clone_try_double "${primary_urls[rapidcheck]}" "${secondary_urls[rapidcheck]}" "rapidcheck" "$rapidcheck_tag" # google benchmark benchmark_tag="4f8bfeae470950ef005327973f15b0044eceaceb" # v1.1.0 -clone https://github.com/google/benchmark.git benchmark $benchmark_tag +repo_clone_try_double "${primary_urls[gbenchmark]}" "${secondary_urls[gbenchmark]}" "benchmark" "$benchmark_tag" # google test googletest_tag="ec44c6c1675c25b9827aacd08c02433cccde7780" # v1.8.0 -clone https://github.com/google/googletest.git googletest $googletest_tag +repo_clone_try_double "${primary_urls[gtest]}" "${secondary_urls[gtest]}" "googletest" "$googletest_tag" # google flags gflags_tag="b37ceb03a0e56c9f15ce80409438a555f8a67b7c" # custom version (May 6, 2017) -clone https://github.com/memgraph/gflags.git gflags $gflags_tag +repo_clone_try_double "${primary_urls[gflags]}" "${secondary_urls[gflags]}" "gflags" "$gflags_tag" # libbcrypt libbcrypt_tag="8aa32ad94ebe06b76853b0767c910c9fbf7ccef4" # custom version (Dec 16, 2016) -clone https://github.com/rg3/libbcrypt libbcrypt $libbcrypt_tag +repo_clone_try_double "${primary_urls[libbcrypt]}" "${secondary_urls[libbcrypt]}" "libbcrypt" "$libbcrypt_tag" # neo4j -wget -nv https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/neo4j-community-3.2.3-unix.tar.gz -O neo4j.tar.gz -tar -xzf neo4j.tar.gz -rm -rf neo4j +file_get_try_double "${primary_urls[neo4j]}" "${secondary_urls[neo4j]}" +tar -xzf neo4j-community-3.2.3-unix.tar.gz mv neo4j-community-3.2.3 neo4j -rm neo4j.tar.gz +rm neo4j-community-3.2.3-unix.tar.gz # nlohmann json # We wget header instead of cloning repo since repo is huge (lots of test data). # We use head on Sep 1, 2017 instead of last release since it was long time ago. mkdir -p json cd json -wget "https://raw.githubusercontent.com/nlohmann/json/b3e5cb7f20dcc5c806e418df34324eca60d17d4e/single_include/nlohmann/json.hpp" +file_get_try_double "${primary_urls[nlohmann]}" "${secondary_urls[nlohmann]}" cd .. bzip2_tag="0405487e2b1de738e7f1c8afb50d19cf44e8d580" # v1.0.6 (May 26, 2011) -clone https://github.com/VFR-maniac/bzip2 bzip2 $bzip2_tag +repo_clone_try_double "${primary_urls[bzip2]}" "${secondary_urls[bzip2]}" "bzip2" "$bzip2_tag" zlib_tag="cacf7f1d4e3d44d871b605da3b647f07d718623f" # v1.2.11. -clone https://github.com/madler/zlib.git zlib $zlib_tag +repo_clone_try_double "${primary_urls[zlib]}" "${secondary_urls[zlib]}" "zlib" "$zlib_tag" # remove shared library from install dependencies sed -i 's/install(TARGETS zlib zlibstatic/install(TARGETS zlibstatic/g' zlib/CMakeLists.txt rocksdb_tag="f3e33549c151f30ac4eb7c22356c6d0331f37652" # (2020-10-14) -clone https://github.com/facebook/rocksdb.git rocksdb $rocksdb_tag +repo_clone_try_double "${primary_urls[rocksdb]}" "${secondary_urls[rocksdb]}" "rocksdb" "$rocksdb_tag" # remove shared library from install dependencies sed -i 's/TARGETS ${ROCKSDB_SHARED_LIB}/TARGETS ${ROCKSDB_SHARED_LIB} OPTIONAL/' rocksdb/CMakeLists.txt # mgclient mgclient_tag="v1.2.0" # (2021-01-14) -clone https://github.com/memgraph/mgclient.git mgclient $mgclient_tag +repo_clone_try_double "${primary_urls[mgclient]}" "${secondary_urls[mgclient]}" "mgclient" "$mgclient_tag" sed -i 's/\${CMAKE_INSTALL_LIBDIR}/lib/' mgclient/src/CMakeLists.txt # pymgclient pymgclient_tag="4f85c179e56302d46a1e3e2cf43509db65f062b3" # (2021-01-15) -clone https://github.com/memgraph/pymgclient.git pymgclient $pymgclient_tag +repo_clone_try_double "${primary_urls[pymgclient]}" "${secondary_urls[pymgclient]}" "pymgclient" "$pymgclient_tag" + +# mgconsole +mgconsole_tag="01ae99bfce772e540e75c076ba03cf06c0c2ac7d" # (2021-05-26) +repo_clone_try_double "${primary_urls[mgconsole]}" "${secondary_urls[mgconsole]}" "mgconsole" "$mgconsole_tag" spdlog_tag="46d418164dd4cd9822cf8ca62a116a3f71569241" # (2020-12-01) -clone https://github.com/gabime/spdlog spdlog $spdlog_tag +repo_clone_try_double "${primary_urls[spdlog]}" "${secondary_urls[spdlog]}" "spdlog" "$spdlog_tag" + +jemalloc_tag="ea6b3e973b477b8061e0076bb257dbd7f3faa756" # (2021-02-11) +repo_clone_try_double "${primary_urls[jemalloc]}" "${secondary_urls[jemalloc]}" "jemalloc" "$jemalloc_tag" +pushd jemalloc +# ThreadPool select job randomly, and there can be some threads that had been +# performed some memory heavy task before and will be inactive for some time, +# but until it will became active again, the memory will not be freed since by +# default each thread has it's own arena, but there should be not more then +# 4*CPU arenas (see opt.nareans description). +# +# By enabling percpu_arena number of arenas limited to number of CPUs and hence +# this problem should go away. +# +# muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to +# avoid spurious latencies and additional work associated with +# MADV_DONTNEED. See +# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. +./autogen.sh --with-malloc-conf="percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000" +popd diff --git a/query_modules/CMakeLists.txt b/query_modules/CMakeLists.txt index d7b6cef9a..7efddb2c6 100644 --- a/query_modules/CMakeLists.txt +++ b/query_modules/CMakeLists.txt @@ -35,8 +35,3 @@ install(FILES graph_analyzer.py DESTINATION lib/memgraph/query_modules) install(FILES mgp_networkx.py DESTINATION lib/memgraph/query_modules) install(FILES nxalg.py DESTINATION lib/memgraph/query_modules) install(FILES wcc.py DESTINATION lib/memgraph/query_modules) - -if (MG_ENTERPRISE) - add_subdirectory(louvain) - add_subdirectory(connectivity) -endif() diff --git a/query_modules/connectivity/CMakeLists.txt b/query_modules/connectivity/CMakeLists.txt deleted file mode 100644 index 185d747a5..000000000 --- a/query_modules/connectivity/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -set(MODULE src/connectivity_module.cpp) - -include_directories(src) - -add_library(connectivity SHARED ${MODULE}) -target_include_directories(connectivity PRIVATE ${CMAKE_SOURCE_DIR}/include) - -# Strip the library in release build. -string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) -if (lower_build_type STREQUAL "release") - add_custom_command(TARGET connectivity POST_BUILD - COMMAND strip -s $ - COMMENT "Stripping symbols and sections from connectivity module") -endif() - -install(PROGRAMS $ - DESTINATION lib/memgraph/query_modules - RENAME connectivity.so) diff --git a/query_modules/connectivity/src/connectivity_module.cpp b/query_modules/connectivity/src/connectivity_module.cpp deleted file mode 100644 index f87b34e2a..000000000 --- a/query_modules/connectivity/src/connectivity_module.cpp +++ /dev/null @@ -1,131 +0,0 @@ -#include "mg_procedure.h" - -#include -#include - -// Finds weakly connected components of a graph. -// Time complexity: O(|V|+|E|) -static void weak(const mgp_list *args, const mgp_graph *graph, - mgp_result *result, mgp_memory *memory) { - std::unordered_map vertex_component; - mgp_vertices_iterator *vertices_iterator = - mgp_graph_iter_vertices(graph, memory); - if (vertices_iterator == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - int64_t curr_component = 0; - for (const mgp_vertex *vertex = mgp_vertices_iterator_get(vertices_iterator); - vertex != nullptr; - vertex = mgp_vertices_iterator_next(vertices_iterator)) { - mgp_vertex_id vertex_id = mgp_vertex_get_id(vertex); - if (vertex_component.find(vertex_id.as_int) != vertex_component.end()) - continue; - - // run bfs from current vertex - std::queue q; - q.push(vertex_id.as_int); - vertex_component[vertex_id.as_int] = curr_component; - while (!q.empty()) { - mgp_vertex *v = mgp_graph_get_vertex_by_id(graph, {q.front()}, memory); - if (v == nullptr) { - mgp_vertices_iterator_destroy(vertices_iterator); - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - q.pop(); - - // iterate over inbound edges - mgp_edges_iterator *edges_iterator = mgp_vertex_iter_in_edges(v, memory); - if (edges_iterator == nullptr) { - mgp_vertex_destroy(v); - mgp_vertices_iterator_destroy(vertices_iterator); - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - for (const mgp_edge *edge = mgp_edges_iterator_get(edges_iterator); - edge != nullptr; edge = mgp_edges_iterator_next(edges_iterator)) { - mgp_vertex_id next_id = mgp_vertex_get_id(mgp_edge_get_from(edge)); - if (vertex_component.find(next_id.as_int) != vertex_component.end()) - continue; - vertex_component[next_id.as_int] = curr_component; - q.push(next_id.as_int); - } - - // iterate over outbound edges - mgp_edges_iterator_destroy(edges_iterator); - edges_iterator = mgp_vertex_iter_out_edges(v, memory); - if (edges_iterator == nullptr) { - mgp_vertex_destroy(v); - mgp_vertices_iterator_destroy(vertices_iterator); - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - for (const mgp_edge *edge = mgp_edges_iterator_get(edges_iterator); - edge != nullptr; edge = mgp_edges_iterator_next(edges_iterator)) { - mgp_vertex_id next_id = mgp_vertex_get_id(mgp_edge_get_to(edge)); - if (vertex_component.find(next_id.as_int) != vertex_component.end()) - continue; - vertex_component[next_id.as_int] = curr_component; - q.push(next_id.as_int); - } - - mgp_vertex_destroy(v); - mgp_edges_iterator_destroy(edges_iterator); - } - - ++curr_component; - } - - mgp_vertices_iterator_destroy(vertices_iterator); - - for (const auto &p : vertex_component) { - mgp_result_record *record = mgp_result_new_record(result); - if (record == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - mgp_value *mem_id_value = mgp_value_make_int(p.first, memory); - if (mem_id_value == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - mgp_value *comp_value = mgp_value_make_int(p.second, memory); - if (comp_value == nullptr) { - mgp_value_destroy(mem_id_value); - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - - int mem_id_inserted = mgp_result_record_insert(record, "id", mem_id_value); - int comp_inserted = - mgp_result_record_insert(record, "component", comp_value); - - mgp_value_destroy(mem_id_value); - mgp_value_destroy(comp_value); - - if (!mem_id_inserted || !comp_inserted) { - mgp_result_set_error_msg(result, "Not enough memory"); - return; - } - } -} - -extern "C" int mgp_init_module(struct mgp_module *module, - struct mgp_memory *memory) { - struct mgp_proc *wcc_proc = - mgp_module_add_read_procedure(module, "weak", weak); - if (!mgp_proc_add_result(wcc_proc, "id", mgp_type_int())) return 1; - if (!mgp_proc_add_result(wcc_proc, "component", mgp_type_int())) return 1; - return 0; -} - -extern "C" int mgp_shutdown_module() { - return 0; -} diff --git a/query_modules/louvain/CMakeLists.txt b/query_modules/louvain/CMakeLists.txt deleted file mode 100644 index aff268e9b..000000000 --- a/query_modules/louvain/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -set(MAIN src/main.cpp) -set(MODULE src/louvain_module.cpp) -set(SOURCES src/algorithms/louvain.cpp - src/data_structures/graph.cpp) - -include_directories(src) - -add_library(louvain-core STATIC ${SOURCES}) -set_target_properties(louvain-core PROPERTIES POSITION_INDEPENDENT_CODE ON) - -add_executable(louvain-main ${MAIN}) -target_link_libraries(louvain-main louvain-core) - -enable_testing() -add_subdirectory(test) - -add_library(louvain SHARED ${MODULE}) -target_link_libraries(louvain louvain-core) -target_include_directories(louvain PRIVATE ${CMAKE_SOURCE_DIR}/include) - -# Strip the library in release build. -string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) -if (lower_build_type STREQUAL "release") - add_custom_command(TARGET louvain POST_BUILD - COMMAND strip -s $ - COMMENT "Stripping symbols and sections from louvain module") -endif() - -if (NOT MG_COMMUNITY) - install(PROGRAMS $ - DESTINATION lib/memgraph/query_modules - RENAME louvain.so) -endif() diff --git a/query_modules/louvain/src/algorithms/algorithms.hpp b/query_modules/louvain/src/algorithms/algorithms.hpp deleted file mode 100644 index 4aa1f4bb5..000000000 --- a/query_modules/louvain/src/algorithms/algorithms.hpp +++ /dev/null @@ -1,18 +0,0 @@ -/// @file -/// -/// The file contains function declarations of several community-detection -/// graph algorithms. - -#pragma once - -#include "data_structures/graph.hpp" - -namespace algorithms { - /// Detects communities of an unidrected, weighted graph using the Louvain - /// algorithm. The algorithm attempts to maximze the modularity of a weighted - /// graph. - /// - /// @param graph pointer to an undirected, weighted graph which may contain - /// self-loops. - void Louvain(comdata::Graph *graph); -} // namespace algorithms diff --git a/query_modules/louvain/src/algorithms/louvain.cpp b/query_modules/louvain/src/algorithms/louvain.cpp deleted file mode 100644 index 2be08385c..000000000 --- a/query_modules/louvain/src/algorithms/louvain.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "algorithms/algorithms.hpp" - -#include -#include -#include -#include - -namespace { - -void OptimizeLocally(comdata::Graph *graph) { - // We will consider local optimizations uniformly at random. - std::random_device rd; - std::mt19937 g(rd()); - std::vector p(graph->Size()); - std::iota(p.begin(), p.end(), 0); - std::shuffle(p.begin(), p.end(), g); - - // Modularity of a graph can be expressed as: - // - // Q = 1 / (2m) * sum_over_pairs_of_nodes[(Aij - ki * kj / 2m) * delta(ci, cj)] - // - // where m is the sum of all weights in the graph, - // Aij is the weight on edge that connects i and j (i=j for a self-loop) - // ki is the sum of weights incident to node i - // ci is the community of node i - // delta(a, b) is the Kronecker delta function. - // - // With some simple algebraic manipulations, we can transform the formula into: - // - // Q = sum_over_components[M * ((sum_over_pairs(Aij + M * ki * kj)))] = - // = sum_over_components[M * (sum_over_pairs(Aij) + M * sum_over_nodes^2(ki))] = - // = sum_over_components[M * (w_contrib(ci) + M * k_contrib^2(ci))] - // - // where M = 1 / (2m) - // - // Therefore, we could store for each community the following: - // * Weight contribution (w_contrib) - // * Weighted degree contribution (k_contrib) - // - // This allows us to efficiently remove a node from one community and insert - // it into a community of its neighbour without the need to recalculate - // modularity from scratch. - - std::unordered_map w_contrib; - std::unordered_map k_contrib; - - for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) { - k_contrib[graph->Community(node_id)] += graph->IncidentWeight(node_id); - for (const auto &neigh : graph->Neighbours(node_id)) { - uint32_t nxt_id = neigh.dest; - double w = neigh.weight; - if (graph->Community(node_id) == graph->Community(nxt_id)) - w_contrib[graph->Community(node_id)] += w; - } - } - - bool stable = false; - double total_w = graph->TotalWeight(); - - while (!stable) { - stable = true; - for (uint32_t node_id : p) { - std::unordered_map sum_w; - double self_loop = 0; - sum_w[graph->Community(node_id)] = 0; - for (const auto &neigh : graph->Neighbours(node_id)) { - uint32_t nxt_id = neigh.dest; - double weight = neigh.weight; - if (nxt_id == node_id) { - self_loop += weight; - continue; - } - sum_w[graph->Community(nxt_id)] += weight; - } - - uint32_t my_c = graph->Community(node_id); - - uint32_t best_c = my_c; - double best_dq = 0; - - for (const auto &p : sum_w) { - if (p.first == my_c) continue; - uint32_t nxt_c = p.first; - double dq = 0; - - // contributions before swap (dq = d_after - d_before) - for (uint32_t c : {my_c, nxt_c}) - dq -= w_contrib[c] - k_contrib[c] * k_contrib[c] / (2.0 * total_w); - - // leave the current community - dq += (w_contrib[my_c] - 2.0 * sum_w[my_c] - self_loop) - - (k_contrib[my_c] - graph->IncidentWeight(node_id)) * - (k_contrib[my_c] - graph->IncidentWeight(node_id)) / - (2.0 * total_w); - - // join a new community - dq += (w_contrib[nxt_c] + 2.0 * sum_w[nxt_c] + self_loop) - - (k_contrib[nxt_c] + graph->IncidentWeight(node_id)) * - (k_contrib[nxt_c] + graph->IncidentWeight(node_id)) / - (2.0 * total_w); - - if (dq > best_dq) { - best_dq = dq; - best_c = nxt_c; - } - } - - if (best_c != my_c) { - graph->SetCommunity(node_id, best_c); - w_contrib[my_c] -= 2.0 * sum_w[my_c] + self_loop; - k_contrib[my_c] -= graph->IncidentWeight(node_id); - w_contrib[best_c] += 2.0 * sum_w[best_c] + self_loop; - k_contrib[best_c] += graph->IncidentWeight(node_id); - stable = false; - } - } - } -} - -} // anonymous namespace - -namespace algorithms { - -void Louvain(comdata::Graph *graph) { - OptimizeLocally(graph); - - // Collapse the locally optimized graph. - uint32_t collapsed_nodes = graph->NormalizeCommunities(); - if (collapsed_nodes == graph->Size()) return; - comdata::Graph collapsed_graph(collapsed_nodes); - std::map, double> collapsed_edges; - - for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) { - std::unordered_map edges; - for (const auto &neigh : graph->Neighbours(node_id)) { - uint32_t nxt_id = neigh.dest; - double weight = neigh.weight; - if (graph->Community(nxt_id) < graph->Community(node_id)) continue; - edges[graph->Community(nxt_id)] += weight; - } - for (const auto &neigh : edges) { - uint32_t a = std::min(graph->Community(node_id), neigh.first); - uint32_t b = std::max(graph->Community(node_id), neigh.first); - collapsed_edges[{a, b}] += neigh.second; - } - } - - for (const auto &p : collapsed_edges) - collapsed_graph.AddEdge(p.first.first, p.first.second, p.second); - - // Repeat until no local optimizations can be found. - Louvain(&collapsed_graph); - - // Propagate results from collapsed graph. - for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) { - graph->SetCommunity(node_id, - collapsed_graph.Community(graph->Community(node_id))); - } - - graph->NormalizeCommunities(); -} - -} // namespace algorithms diff --git a/query_modules/louvain/src/data_structures/graph.cpp b/query_modules/louvain/src/data_structures/graph.cpp deleted file mode 100644 index 8f4f16217..000000000 --- a/query_modules/louvain/src/data_structures/graph.cpp +++ /dev/null @@ -1,99 +0,0 @@ -#include "data_structures/graph.hpp" - -#include -#include -#include -#include -#include -#include - -namespace comdata { - -Graph::Graph(uint32_t n_nodes) : n_nodes_(n_nodes), total_w_(0) { - adj_list_.resize(n_nodes, {}); - inc_w_.resize(n_nodes, 0); - - // each node starts as its own separate community. - community_.resize(n_nodes); - std::iota(community_.begin(), community_.end(), 0); -} - -uint32_t Graph::Size() const { return n_nodes_; } - -uint32_t Graph::Community(uint32_t node) const { return community_.at(node); } - -void Graph::SetCommunity(uint32_t node, uint32_t c) { community_.at(node) = c; } - -uint32_t Graph::NormalizeCommunities() { - std::set c_id(community_.begin(), community_.end()); - std::unordered_map cmap; - uint32_t id = 0; - for (uint32_t c : c_id) { - cmap[c] = id; - ++id; - } - for (uint32_t node_id = 0; node_id < n_nodes_; ++node_id) - community_[node_id] = cmap[community_[node_id]]; - return id; -} - -void Graph::AddEdge(uint32_t node1, uint32_t node2, double weight) { - if (node1 >= n_nodes_ || node2 >= n_nodes_) - throw std::out_of_range("Node index out of range"); - if (weight <= 0) throw std::out_of_range("Weights must be positive"); - if (edges_.find({node1, node2}) != edges_.end()) - throw std::invalid_argument("Edge already exists"); - - edges_.emplace(node1, node2); - edges_.emplace(node2, node1); - - total_w_ += weight; - - adj_list_[node1].emplace_back(node2, weight); - inc_w_[node1] += weight; - - if (node1 != node2) { - adj_list_[node2].emplace_back(node1, weight); - inc_w_[node2] += weight; - } -} - -uint32_t Graph::Degree(uint32_t node) const { - return static_cast(adj_list_.at(node).size()); -} - -double Graph::IncidentWeight(uint32_t node) const { return inc_w_.at(node); } - -double Graph::TotalWeight() const { return total_w_; } - -double Graph::Modularity() const { - double ret = 0; - // Since all weights should be positive, this implies that our graph has - // no edges. - if (total_w_ == 0) return 0; - - std::unordered_map weight_c; - std::unordered_map degree_c; - - for (uint32_t i = 0; i < n_nodes_; ++i) { - degree_c[Community(i)] += IncidentWeight(i); - for (const auto &neigh : adj_list_[i]) { - uint32_t j = neigh.dest; - double w = neigh.weight; - if (Community(i) != Community(j)) continue; - weight_c[Community(i)] += w; - } - } - - for (const auto &p : degree_c) - ret += weight_c[p.first] - (p.second * p.second) / (2 * total_w_); - - ret /= 2 * total_w_; - return ret; -} - -const std::vector &Graph::Neighbours(uint32_t node) const { - return adj_list_.at(node); -} - -} // namespace comdata diff --git a/query_modules/louvain/src/data_structures/graph.hpp b/query_modules/louvain/src/data_structures/graph.hpp deleted file mode 100644 index a818fd2fd..000000000 --- a/query_modules/louvain/src/data_structures/graph.hpp +++ /dev/null @@ -1,125 +0,0 @@ -/// @file - -#pragma once - -#include -#include -#include - -namespace comdata { - -struct Neighbour { - uint32_t dest; - double weight; - Neighbour(uint32_t d, double w) : dest(d), weight(w) {} -}; - -/// Class which models a weighted, undirected graph with necessary -/// functionalities for community detection algorithms. -class Graph { -public: - /// Constructs a new graph with a given number of nodes and no edges between - /// them. - /// - /// The implementation assumes (and enforces) that all nodes - /// are indexed from 0 to n_nodes. - /// - /// @param n_nodes Number of nodes in the graph. - explicit Graph(uint32_t n_nodes); - - /// @return number of nodes in the graph. - uint32_t Size() const; - - /// Adds a bidirectional, weighted edge to the graph between the given - /// nodes. If both given nodes are the same, the method inserts a weighted - /// self-loop. - /// - /// There should be no edges between the given nodes when before invoking - /// this method. - /// - /// @param node1 index of an incident node. - /// @param node2 index of an incident node. - /// @param weight real value which represents the weight of the edge. - /// - /// @throw std::out_of_range - /// @throw std::invalid_argument - void AddEdge(uint32_t node1, uint32_t node2, double weight); - - /// @param node index of node. - /// - /// @return community where the node belongs to. - /// - /// @throw std::out_of_range - uint32_t Community(uint32_t node) const; - - /// Adds a given node to a given community. - /// - /// @param node index of node. - /// @param c community where the given node should go in. - /// - /// @throw std::out_of_range - void SetCommunity(uint32_t node, uint32_t c); - - /// Normalizes the values of communities. More precisely, after invoking this - /// method communities will be indexed by successive integers starting from 0. - /// - /// Note: this method is computationally expensive and takes O(|V|) - /// time, i.e., it traverses all nodes in the graph. - /// - /// @return number of communities in the graph - uint32_t NormalizeCommunities(); - - /// Returns the number of incident edges to a given node. Self-loops - /// contribute a single edge to the degree. - /// - /// @param node index of node. - /// - /// @return degree of given node. - /// - /// @throw std::out_of_range - uint32_t Degree(uint32_t node) const; - - /// Returns the total weight of incident edges to a given node. Weight - /// of a self loop contributes once to the total sum. - /// - /// @param node index of node. - /// - /// @return total incident weight of a given node. - /// - /// @throw std::out_of_range - double IncidentWeight(uint32_t node) const; - - /// @return total weight of all edges in a graph. - double TotalWeight() const; - - /// Calculates the modularity of the graph which is defined as a real value - /// between -1 and 1 that measures the density of links inside communities - /// compared to links between communities. - /// - /// Note: this method is computationally expensive and takes O(|V| + |E|) - /// time, i.e., it traverses the entire graph. - /// - /// @return modularity of the graph. - double Modularity() const; - - /// Returns nodes adjacent to a given node. - /// - /// @param node index of node. - /// - /// @return list of neighbouring nodes. - /// - /// @throw std::out_of_range - const std::vector& Neighbours(uint32_t node) const; - -private: - uint32_t n_nodes_; - double total_w_; - - std::vector> adj_list_; - std::set> edges_; - - std::vector inc_w_; - std::vector community_; -}; - -} // namespace comdata diff --git a/query_modules/louvain/src/louvain_module.cpp b/query_modules/louvain/src/louvain_module.cpp deleted file mode 100644 index 110c79bdc..000000000 --- a/query_modules/louvain/src/louvain_module.cpp +++ /dev/null @@ -1,228 +0,0 @@ -#include "mg_procedure.h" - -#include -#include -#include - -#include "algorithms/algorithms.hpp" -#include "data_structures/graph.hpp" - -namespace { - -std::optional> NormalizeVertexIds( - const mgp_graph *graph, mgp_result *result, mgp_memory *memory) { - std::unordered_map mem_to_louv_id; - mgp_vertices_iterator *vertices_iterator = - mgp_graph_iter_vertices(graph, memory); - if (vertices_iterator == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return std::nullopt; - } - - uint32_t louv_id = 0; - for (const mgp_vertex *vertex = mgp_vertices_iterator_get(vertices_iterator); - vertex != nullptr; - vertex = mgp_vertices_iterator_next(vertices_iterator)) { - mgp_vertex_id mem_id = mgp_vertex_get_id(vertex); - mem_to_louv_id[mem_id.as_int] = louv_id; - ++louv_id; - } - - mgp_vertices_iterator_destroy(vertices_iterator); - return mem_to_louv_id; -} - -std::optional RunLouvain( - const mgp_graph *graph, mgp_result *result, mgp_memory *memory, - const std::unordered_map &mem_to_louv_id) { - comdata::Graph louvain_graph(mem_to_louv_id.size()); - // Extract the graph structure - // TODO(ipaljak): consider filtering nodes and edges by labels. - for (const auto &p : mem_to_louv_id) { - mgp_vertex *vertex = mgp_graph_get_vertex_by_id(graph, {p.first}, memory); - if (!vertex) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return std::nullopt; - } - - // iterate over inbound edges. This is enough because we will eventually - // iterate over outbound edges in another direction. - mgp_edges_iterator *edges_iterator = - mgp_vertex_iter_in_edges(vertex, memory); - if (edges_iterator == nullptr) { - mgp_vertex_destroy(vertex); - mgp_result_set_error_msg(result, "Not enough memory!"); - return std::nullopt; - } - - for (const mgp_edge *edge = mgp_edges_iterator_get(edges_iterator); - edge != nullptr; edge = mgp_edges_iterator_next(edges_iterator)) { - const mgp_vertex *next_vertex = mgp_edge_get_from(edge); - mgp_vertex_id next_mem_id = mgp_vertex_get_id(next_vertex); - uint32_t next_louv_id; - try { - next_louv_id = mem_to_louv_id.at(next_mem_id.as_int); - } catch (const std::exception &e) { - const auto msg = std::string("[Internal error] ") + e.what(); - mgp_result_set_error_msg(result, msg.c_str()); - return std::nullopt; - } - - // retrieve edge weight (default to 1) - mgp_value *weight_prop = mgp_edge_get_property(edge, "weight", memory); - if (!weight_prop) { - mgp_vertex_destroy(vertex); - mgp_edges_iterator_destroy(edges_iterator); - mgp_result_set_error_msg(result, "Not enough memory"); - } - - double weight = 1; - if (mgp_value_is_double(weight_prop)) - weight = mgp_value_get_double(weight_prop); - if (mgp_value_is_int(weight_prop)) - weight = static_cast(mgp_value_get_int(weight_prop)); - - mgp_value_destroy(weight_prop); - - try { - louvain_graph.AddEdge(p.second, next_louv_id, weight); - } catch (const std::exception &e) { - mgp_vertex_destroy(vertex); - mgp_edges_iterator_destroy(edges_iterator); - mgp_result_set_error_msg(result, e.what()); - return std::nullopt; - } - } - - mgp_vertex_destroy(vertex); - mgp_edges_iterator_destroy(edges_iterator); - } - - try { - algorithms::Louvain(&louvain_graph); - } catch (const std::exception &e) { - const auto msg = std::string("[Internal error] ") + e.what(); - mgp_result_set_error_msg(result, msg.c_str()); - return std::nullopt; - } - - return louvain_graph; -} - -void communities(const mgp_list *args, const mgp_graph *graph, - mgp_result *result, mgp_memory *memory) { - try { - // Normalize vertex ids - auto mem_to_louv_id = NormalizeVertexIds(graph, result, memory); - if (!mem_to_louv_id) return; - - // Run louvain - auto louvain_graph = RunLouvain(graph, result, memory, *mem_to_louv_id); - if (!louvain_graph) return; - - // Return node ids and their corresponding communities. - for (const auto &p : *mem_to_louv_id) { - mgp_result_record *record = mgp_result_new_record(result); - if (record == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - mgp_value *mem_id_value = mgp_value_make_int(p.first, memory); - if (mem_id_value == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - mgp_value *com_value = - mgp_value_make_int(louvain_graph->Community(p.second), memory); - if (com_value == nullptr) { - mgp_value_destroy(mem_id_value); - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - int mem_id_inserted = - mgp_result_record_insert(record, "id", mem_id_value); - int com_inserted = - mgp_result_record_insert(record, "community", com_value); - - mgp_value_destroy(mem_id_value); - mgp_value_destroy(com_value); - - if (!mem_id_inserted || !com_inserted) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - } - } catch (const std::exception &e) { - mgp_result_set_error_msg(result, e.what()); - return; - } -} - -void modularity(const mgp_list *args, const mgp_graph *graph, - mgp_result *result, mgp_memory *memory) { - try { - // Normalize vertex ids - auto mem_to_louv_id = NormalizeVertexIds(graph, result, memory); - if (!mem_to_louv_id) return; - - // Run louvain - auto louvain_graph = RunLouvain(graph, result, memory, *mem_to_louv_id); - if (!louvain_graph) return; - - // Return graph modularity after Louvain - // TODO(ipaljak) - consider allowing the user to specify seed communities - // and - // yield modularity values both before and after running - // louvain. - mgp_result_record *record = mgp_result_new_record(result); - if (record == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - mgp_value *modularity_value = - mgp_value_make_double(louvain_graph->Modularity(), memory); - if (modularity_value == nullptr) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - - int value_inserted = - mgp_result_record_insert(record, "modularity", modularity_value); - - mgp_value_destroy(modularity_value); - - if (!value_inserted) { - mgp_result_set_error_msg(result, "Not enough memory!"); - return; - } - } catch (const std::exception &e) { - mgp_result_set_error_msg(result, e.what()); - return; - } -} - -} // namespace - -extern "C" int mgp_init_module(struct mgp_module *module, - struct mgp_memory *memory) { - struct mgp_proc *community_proc = - mgp_module_add_read_procedure(module, "communities", communities); - if (!community_proc) return 1; - if (!mgp_proc_add_result(community_proc, "id", mgp_type_int())) return 1; - if (!mgp_proc_add_result(community_proc, "community", mgp_type_int())) - return 1; - - struct mgp_proc *modularity_proc = - mgp_module_add_read_procedure(module, "modularity", modularity); - if (!modularity_proc) return 1; - if (!mgp_proc_add_result(modularity_proc, "modularity", mgp_type_float())) - return 1; - - return 0; -} - -extern "C" int mgp_shutdown_module() { return 0; } diff --git a/query_modules/louvain/src/main.cpp b/query_modules/louvain/src/main.cpp deleted file mode 100644 index 297efe767..000000000 --- a/query_modules/louvain/src/main.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include - -#include "algorithms/algorithms.hpp" -#include "data_structures/graph.hpp" - -// A simple program that reads the graph from STDIN and -// outputs the detected communities from louvain along with -// its modularity measure on STDOUT. -int main() { - int n; - int m; - std::cin >> n >> m; - comdata::Graph graph(n); - for (int i = 0; i < m; ++i) { - int a; - int b; - double c; - std::cin >> a >> b >> c; - graph.AddEdge(a, b, c); - } - - algorithms::Louvain(&graph); - - for (int i = 0; i < n; ++i) - std::cout << i << " " << graph.Community(i) << "\n"; - std::cout << graph.Modularity() << "\n"; - return 0; -} diff --git a/query_modules/louvain/test/.clang-tidy b/query_modules/louvain/test/.clang-tidy deleted file mode 100644 index 559bdb744..000000000 --- a/query_modules/louvain/test/.clang-tidy +++ /dev/null @@ -1,80 +0,0 @@ ---- -Checks: '*, - -android-*, - -cert-err58-cpp, - -cppcoreguidelines-avoid-c-arrays, - -cppcoreguidelines-avoid-goto, - -cppcoreguidelines-avoid-magic-numbers, - -cppcoreguidelines-macro-usage, - -cppcoreguidelines-no-malloc, - -cppcoreguidelines-non-private-member-variables-in-classes, - -cppcoreguidelines-owning-memory, - -cppcoreguidelines-pro-bounds-array-to-pointer-decay, - -cppcoreguidelines-pro-bounds-constant-array-index, - -cppcoreguidelines-pro-bounds-pointer-arithmetic, - -cppcoreguidelines-pro-type-member-init, - -cppcoreguidelines-pro-type-reinterpret-cast, - -cppcoreguidelines-pro-type-static-cast-downcast, - -cppcoreguidelines-pro-type-union-access, - -cppcoreguidelines-pro-type-vararg, - -cppcoreguidelines-special-member-functions, - -fuchsia-default-arguments, - -fuchsia-default-arguments-calls, - -fuchsia-default-arguments-declarations, - -fuchsia-overloaded-operator, - -fuchsia-statically-constructed-objects, - -fuchsia-trailing-return, - -fuchsia-virtual-inheritance, - -google-explicit-constructor, - -google-readability-*, - -hicpp-avoid-c-arrays, - -hicpp-avoid-goto, - -hicpp-braces-around-statements, - -hicpp-member-init, - -hicpp-no-array-decay, - -hicpp-no-assembler, - -hicpp-no-malloc, - -hicpp-special-member-functions, - -hicpp-use-equals-default, - -hicpp-vararg, - -llvm-header-guard, - -misc-non-private-member-variables-in-classes, - -misc-unused-parameters, - -modernize-avoid-c-arrays, - -modernize-concat-nested-namespaces, - -modernize-pass-by-value, - -modernize-use-equals-default, - -modernize-use-nodiscard, - -modernize-use-trailing-return-type, - -performance-unnecessary-value-param, - -readability-braces-around-statements, - -readability-else-after-return, - -readability-implicit-bool-conversion, - -readability-magic-numbers, - -readability-named-parameter' -WarningsAsErrors: '' -HeaderFilterRegex: '' -AnalyzeTemporaryDtors: false -FormatStyle: none -CheckOptions: - - key: google-readability-braces-around-statements.ShortStatementLines - value: '1' - - key: google-readability-function-size.StatementThreshold - value: '800' - - key: google-readability-namespace-comments.ShortNamespaceLines - value: '10' - - key: google-readability-namespace-comments.SpacesBeforeComments - value: '2' - - key: modernize-loop-convert.MaxCopySize - value: '16' - - key: modernize-loop-convert.MinConfidence - value: reasonable - - key: modernize-loop-convert.NamingStyle - value: CamelCase - - key: modernize-pass-by-value.IncludeStyle - value: llvm - - key: modernize-replace-auto-ptr.IncludeStyle - value: llvm - - key: modernize-use-nullptr.NullMacros - value: 'NULL' -... diff --git a/query_modules/louvain/test/CMakeLists.txt b/query_modules/louvain/test/CMakeLists.txt deleted file mode 100644 index eadc4f19a..000000000 --- a/query_modules/louvain/test/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -include_directories(${GTEST_INCLUDE_DIR}) - -add_subdirectory(unit) diff --git a/query_modules/louvain/test/unit/CMakeLists.txt b/query_modules/louvain/test/unit/CMakeLists.txt deleted file mode 100644 index 778cd21d2..000000000 --- a/query_modules/louvain/test/unit/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -set(test_prefix louvain__unit__) - -add_custom_target(louvain__unit) - -add_library(louvain-test STATIC utils.cpp) -set_target_properties(louvain-test PROPERTIES POSITION_INDEPENDENT_CODE ON) - -function(add_unit_test test_cpp) - # get exec name (remove extension from the abs path) - get_filename_component(exec_name ${test_cpp} NAME_WE) - set(target_name ${test_prefix}${exec_name}) - add_executable(${target_name} ${test_cpp}) - # OUTPUT_NAME sets the real name of a target when it is built and can be - # used to help create two targets of the same name even though CMake - # requires unique logical target names - set_target_properties(${target_name} PROPERTIES OUTPUT_NAME ${exec_name}) - # TODO: this is a temporary workaround the test build warnings - target_compile_options(${target_name} PRIVATE -Wno-comment -Wno-sign-compare - -Wno-unused-variable) - target_link_libraries(${target_name} spdlog gflags gtest gtest_main Threads::Threads - louvain-core louvain-test) - # register test - add_test(${target_name} ${exec_name}) - # add to unit target - add_dependencies(louvain__unit ${target_name}) -endfunction(add_unit_test) - -add_unit_test(graph.cpp) diff --git a/query_modules/louvain/test/unit/graph.cpp b/query_modules/louvain/test/unit/graph.cpp deleted file mode 100644 index 989e8cafc..000000000 --- a/query_modules/louvain/test/unit/graph.cpp +++ /dev/null @@ -1,349 +0,0 @@ -#include - -#include "data_structures/graph.hpp" -#include "utils.hpp" - -// Checks if commmunities of nodes in graph correspond to a given community -// vector. -bool CommunityCheck(const comdata::Graph &graph, - const std::vector &c) { - if (graph.Size() != c.size()) return false; - for (uint32_t node_id = 0; node_id < graph.Size(); ++node_id) - if (graph.Community(node_id) != c[node_id]) return false; - return true; -} - -// Checks if degrees of nodes in graph correspond to a given degree vector. -bool DegreeCheck(const comdata::Graph &graph, - const std::vector °) { - if (graph.Size() != deg.size()) return false; - for (uint32_t node_id = 0; node_id < graph.Size(); ++node_id) - if (graph.Degree(node_id) != deg[node_id]) return false; - return true; -} - -// Checks if incident weights of nodes in graph correspond to a given weight -// vector. -bool IncidentWeightCheck(const comdata::Graph &graph, - const std::vector &inc_w) { - if (graph.Size() != inc_w.size()) return false; - for (uint32_t node_id = 0; node_id < graph.Size(); ++node_id) - if (std::abs(graph.IncidentWeight(node_id) - inc_w[node_id]) > 1e-6) - return false; - return true; -} - -// Sets communities of nodes in graph. Returns true on success. -bool SetCommunities(comdata::Graph *graph, const std::vector &c) { - if (graph->Size() != c.size()) return false; - for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) - graph->SetCommunity(node_id, c[node_id]); - return true; -} - -TEST(Graph, Constructor) { - uint32_t nodes = 100; - comdata::Graph graph(nodes); - ASSERT_EQ(graph.Size(), nodes); - for (uint32_t node_id = 0; node_id < nodes; ++node_id) { - ASSERT_EQ(graph.IncidentWeight(node_id), 0); - ASSERT_EQ(graph.Community(node_id), node_id); - } -} - -TEST(Graph, Size) { - comdata::Graph graph1 = GenRandomUnweightedGraph(0, 0); - comdata::Graph graph2 = GenRandomUnweightedGraph(42, 41); - comdata::Graph graph3 = GenRandomUnweightedGraph(100, 250); - ASSERT_EQ(graph1.Size(), 0); - ASSERT_EQ(graph2.Size(), 42); - ASSERT_EQ(graph3.Size(), 100); -} - -TEST(Graph, Communities) { - comdata::Graph graph = GenRandomUnweightedGraph(100, 250); - - for (int i = 0; i < 100; ++i) graph.SetCommunity(i, i % 5); - for (int i = 0; i < 100; ++i) ASSERT_EQ(graph.Community(i), i % 5); - - // Try to set communities on non-existing nodes - EXPECT_THROW({ graph.SetCommunity(100, 2); }, std::out_of_range); - EXPECT_THROW({ graph.SetCommunity(150, 0); }, std::out_of_range); - - // Try to get a the community of a non-existing node - EXPECT_THROW({ graph.Community(100); }, std::out_of_range); - EXPECT_THROW({ graph.Community(150); }, std::out_of_range); -} - -TEST(Graph, CommunityNormalization) { - // Communities are already normalized. - comdata::Graph graph = GenRandomUnweightedGraph(5, 10); - std::vector init_c = {0, 2, 1, 3, 4}; - std::vector final_c = {0, 2, 1, 3, 4}; - ASSERT_TRUE(SetCommunities(&graph, init_c)); - graph.NormalizeCommunities(); - ASSERT_TRUE(CommunityCheck(graph, final_c)); - - // Each node in its own community. - graph = GenRandomUnweightedGraph(5, 10); - init_c = {20, 30, 10, 40, 50}; - final_c = {1, 2, 0, 3, 4}; - ASSERT_TRUE(SetCommunities(&graph, init_c)); - graph.NormalizeCommunities(); - ASSERT_TRUE(CommunityCheck(graph, final_c)); - - // Multiple nodes in the same community - graph = GenRandomUnweightedGraph(7, 10); - init_c = {13, 99, 13, 13, 1, 99, 1}; - final_c = {1, 2, 1, 1, 0, 2, 0}; - ASSERT_TRUE(SetCommunities(&graph, init_c)); - graph.NormalizeCommunities(); - ASSERT_TRUE(CommunityCheck(graph, final_c)); -} - -TEST(Graph, AddEdge) { - comdata::Graph graph = GenRandomUnweightedGraph(5, 0); - - // Node out of bounds. - EXPECT_THROW({ graph.AddEdge(1, 5, 7); }, std::out_of_range); - - // Repeated edge - graph.AddEdge(1, 2, 1); - EXPECT_THROW({ graph.AddEdge(1, 2, 7); }, std::invalid_argument); - - // Non-positive edge weight - EXPECT_THROW({ graph.AddEdge(2, 3, -7); }, std::out_of_range); - EXPECT_THROW({ graph.AddEdge(3, 4, 0); }, std::out_of_range); -} - -TEST(Graph, Degrees) { - // Graph without edges - comdata::Graph graph = GenRandomUnweightedGraph(5, 0); - std::vector deg = {0, 0, 0, 0, 0}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Chain - // (0)--(1)--(2)--(3)--(4) - graph = BuildGraph(5, {{0, 1, 1}, {1, 2, 1}, {2, 3, 1}, {3, 4, 1}}); - deg = {1, 2, 2, 2, 1}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Tree - // (0)--(3) - // / \ - // (1) (2) - // | / \ - // (4) (5) (6) - graph = BuildGraph( - 7, {{0, 1, 1}, {0, 2, 1}, {0, 3, 1}, {1, 4, 1}, {2, 5, 1}, {2, 6, 1}}); - deg = {3, 2, 3, 1, 1, 1, 1}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Graph without self-loops - // (0)--(1) - // | \ | \ - // | \ | \ - // (2)--(3)-(4) - graph = BuildGraph(5, {{0, 1, 1}, - {0, 2, 1}, - {0, 3, 1}, - {1, 3, 1}, - {1, 4, 1}, - {2, 3, 1}, - {3, 4, 1}}); - deg = {3, 3, 2, 4, 2}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Graph with self loop [*nodes have self loops] - // (0)--(1*) - // | \ | \ - // | \ | \ - // (2*)--(3)-(4*) - graph = BuildGraph(5, {{0, 1, 1}, - {0, 2, 1}, - {0, 3, 1}, - {1, 3, 1}, - {1, 4, 1}, - {2, 3, 1}, - {3, 4, 1}, - {1, 1, 1}, - {2, 2, 2}, - {4, 4, 4}}); - deg = {3, 4, 3, 4, 3}; - ASSERT_TRUE(DegreeCheck(graph, deg)); - - // Try to get degree of non-existing nodes - EXPECT_THROW({ graph.Degree(5); }, std::out_of_range); - EXPECT_THROW({ graph.Degree(100); }, std::out_of_range); -} - -TEST(Graph, Weights) { - // Graph without edges - comdata::Graph graph = GenRandomUnweightedGraph(5, 0); - std::vector inc_w = {0, 0, 0, 0, 0}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - ASSERT_EQ(graph.TotalWeight(), 0); - - // Chain - // (0)--(1)--(2)--(3)--(4) - graph = BuildGraph(5, {{0, 1, 0.1}, {1, 2, 0.5}, {2, 3, 2.3}, {3, 4, 4.2}}); - inc_w = {0.1, 0.6, 2.8, 6.5, 4.2}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - ASSERT_NEAR(graph.TotalWeight(), 7.1, 1e-6); - - // Tree - // (0)--(3) - // / \ - // (1) (2) - // | / \ - // (4) (5) (6) - graph = BuildGraph(7, {{0, 1, 1.3}, - {0, 2, 0.2}, - {0, 3, 1}, - {1, 4, 3.2}, - {2, 5, 4.2}, - {2, 6, 0.7}}); - inc_w = {2.5, 4.5, 5.1, 1, 3.2, 4.2, 0.7}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - EXPECT_NEAR(graph.TotalWeight(), 10.6, 1e-6); - - // Graph without self-loops - // (0)--(1) - // | \ | \ - // | \ | \ - // (2)--(3)-(4) - graph = BuildGraph(5, {{0, 1, 0.1}, - {0, 2, 0.2}, - {0, 3, 0.3}, - {1, 3, 0.4}, - {1, 4, 0.5}, - {2, 3, 0.6}, - {3, 4, 0.7}}); - inc_w = {0.6, 1, 0.8, 2, 1.2}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - EXPECT_NEAR(graph.TotalWeight(), 2.8, 1e-6); - - // Graph with self loop [*nodes have self loops] - // (0)--(1*) - // | \ | \ - // | \ | \ - // (2*)--(3)-(4*) - graph = BuildGraph(5, {{0, 1, 0.1}, - {0, 2, 0.2}, - {0, 3, 0.3}, - {1, 3, 0.4}, - {1, 4, 0.5}, - {2, 3, 0.6}, - {3, 4, 0.7}, - {1, 1, 0.8}, - {2, 2, 0.9}, - {4, 4, 1}}); - inc_w = {0.6, 1.8, 1.7, 2, 2.2}; - ASSERT_TRUE(IncidentWeightCheck(graph, inc_w)); - EXPECT_NEAR(graph.TotalWeight(), 5.5, 1e-6); - - // Try to get incident weight of non-existing node - EXPECT_THROW({ graph.IncidentWeight(5); }, std::out_of_range); - EXPECT_THROW({ graph.IncidentWeight(100); }, std::out_of_range); -} - -TEST(Graph, Modularity) { - // Graph without edges - comdata::Graph graph = GenRandomUnweightedGraph(5, 0); - ASSERT_EQ(graph.Modularity(), 0); - - // Chain - // (0)--(1)--(2)--(3)--(4) - graph = BuildGraph(5, {{0, 1, 0.1}, {1, 2, 0.5}, {2, 3, 2.3}, {3, 4, 4.2}}); - std::vector c = {0, 1, 1, 2, 2}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.036798254314620096, 1e-6); - - // Tree - // (0)--(3) - // / \ - // (1) (2) - // | / \ - // (4) (5) (6) - graph = BuildGraph(7, {{0, 1, 1.3}, - {0, 2, 0.2}, - {0, 3, 1}, - {1, 4, 3.2}, - {2, 5, 4.2}, - {2, 6, 0.7}}); - c = {0, 0, 1, 0, 0, 1, 2}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.4424617301530794, 1e-6); - - // Graph without self-loops - // (0)--(1) - // | \ | \ - // | \ | \ - // (2)--(3)-(4) - graph = BuildGraph(5, {{0, 1, 0.1}, - {0, 2, 0.2}, - {0, 3, 0.3}, - {1, 3, 0.4}, - {1, 4, 0.5}, - {2, 3, 0.6}, - {3, 4, 0.7}}); - c = {0, 1, 1, 1, 1}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), -0.022959183673469507, 1e-6); - - // Graph with self loop [*nodes have self loops] - // (0)--(1*) - // | \ | \ - // | \ | \ - // (2*)--(3)-(4*) - graph = BuildGraph(5, {{0, 1, 0.1}, - {0, 2, 0.2}, - {0, 3, 0.3}, - {1, 3, 0.4}, - {1, 4, 0.5}, - {2, 3, 0.6}, - {3, 4, 0.7}, - {1, 1, 0.8}, - {2, 2, 0.9}, - {4, 4, 1}}); - c = {0, 0, 0, 0, 1}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.188842975206611, 1e-6); - - // Neo4j example graph - // (0)--(1)---(3)--(4) - // \ / \ / - // (2) (5) - graph = BuildGraph(6, {{0, 1, 1}, - {1, 2, 1}, - {0, 2, 1}, - {1, 3, 1}, - {3, 5, 1}, - {5, 4, 1}, - {3, 4, 1}}); - c = {0, 0, 0, 1, 1, 1}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.3571428571428571, 1e-6); - - // Example graph from wikipedia - // (0)--(1)--(3)--(4)--(5) - // \ / | \ / - // (2) (7) (6) - // / \ - // (8)--(9) - graph = BuildGraph(10, {{0, 1, 1}, - {1, 2, 1}, - {0, 2, 1}, - {1, 3, 1}, - {3, 4, 1}, - {4, 5, 1}, - {5, 6, 1}, - {6, 4, 1}, - {3, 7, 1}, - {7, 8, 1}, - {7, 9, 1}, - {8, 9, 1}}); - c = {0, 0, 0, 0, 1, 1, 1, 2, 2, 2}; - SetCommunities(&graph, c); - EXPECT_NEAR(graph.Modularity(), 0.4896, 1e-4); -} diff --git a/query_modules/louvain/test/unit/utils.cpp b/query_modules/louvain/test/unit/utils.cpp deleted file mode 100644 index 681d7e1e8..000000000 --- a/query_modules/louvain/test/unit/utils.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "utils.hpp" - -#include - -comdata::Graph BuildGraph( - uint32_t nodes, std::vector> edges) { - comdata::Graph G(nodes); - for (auto &edge : edges) - G.AddEdge(std::get<0>(edge), std::get<1>(edge), std::get<2>(edge)); - return G; -} - -comdata::Graph GenRandomUnweightedGraph(uint32_t nodes, uint32_t edges) { - auto seed = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); - std::mt19937 rng(seed); - std::uniform_int_distribution dist(0, nodes - 1); - std::set> E; - for (uint32_t i = 0; i < edges; ++i) { - int u; - int v; - do { - u = dist(rng); - v = dist(rng); - if (u > v) std::swap(u, v); - } while (u == v || E.find({u, v, 1}) != E.end()); - E.insert({u, v, 1}); - } - return BuildGraph(nodes, std::vector>( - E.begin(), E.end())); -} - diff --git a/query_modules/louvain/test/unit/utils.hpp b/query_modules/louvain/test/unit/utils.hpp deleted file mode 100644 index f7fba53b2..000000000 --- a/query_modules/louvain/test/unit/utils.hpp +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "data_structures/graph.hpp" - -/// Builds the graph from a given number of nodes and a list of edges. -/// Nodes should be 0-indexed and each edge should be provided only once. -comdata::Graph BuildGraph( - uint32_t nodes, std::vector> edges); - -/// Generates random undirected graph with a given number of nodes and edges. -/// The generated graph is not picked out of a uniform distribution. All weights -/// are the same and equal to one. -comdata::Graph GenRandomUnweightedGraph(uint32_t nodes, uint32_t edges); diff --git a/release/LICENSE_COMMUNITY.md b/release/LICENSE_COMMUNITY.md index a93a80d68..34a75c4d5 100644 --- a/release/LICENSE_COMMUNITY.md +++ b/release/LICENSE_COMMUNITY.md @@ -1,33 +1,40 @@ -# User License Agreement +# Memgraph Community User License Agreement -1. Description +This License Agreement governs your use of the Memgraph Community Release (the +"Software") and documentation ("Documentation"). -THIS LICENSE AGREEMENT GOVERNS LICENSEE’S USE OF THE MEMGRAPH COMMUNITY -RELEASE AND DOCUMENTATION. +BY DOWNLOADING AND/OR ACCESSING THIS SOFTWARE, YOU ("LICENSEE") AGREE TO THESE +TERMS. -2. License Grant +1. License Grant The Software and Documentation are provided to Licensee at no charge and are licensed, not sold to Licensee. No ownership of any part of the Software and Documentation is hereby transferred to Licensee. Subject to (i) the terms and -conditions of this License Agreement, (ii) any additional license restrictions -and parameters contained on Licensor’s quotation, website, or order form -(“Order Form”), Licensor hereby grants Licensee a personal, non-assignable, +conditions of this License Agreement, and (ii) any additional license +restrictions and parameters contained on Licensor’s quotation, website, or +order form, Licensor hereby grants Licensee a personal, non-assignable, non-transferable and non-exclusive license to install, access and use the Software (in object code form only) and Documentation for Licensee’s internal -business purposes only. All rights relating to the Software and Documentation -that are not expressly licensed in this License Agreement, whether now existing -or which may hereafter come into existence are reserved for Licensor. Licensee -shall not remove, obscure, or alter any proprietary rights notices (including -without limitation copyright and trademark notices), which may be affixed to or -contained within the Software or Documentation. +business purposes (including for use in a production environment) only. All +rights relating to the Software and Documentation that are not expressly +licensed in this License Agreement, whether now existing or which may hereafter +come into existence are reserved for Licensor. Licensee shall not remove, +obscure, or alter any proprietary rights notices (including without limitation +copyright and trademark notices), which may be affixed to or contained within +the Software or Documentation. -3. Restrictions +Licensor may terminate this License Agreement with immediate effect upon +written notice to the Licensee. Upon termination Licensee shall delete all +electronic copies of all or any part of the Software and/or the Documentation +resident in its systems or elsewhere. + +2. Restrictions Licensee will not, directly or indirectly, (a) copy the Software or Documentation in any manner or for any purpose; (b) install, access or use any component of the Software or Documentation for any purpose not expressly -granted in Section 2 above; (c) resell, distribute, publicly display or +granted in Section 1 above; (c) resell, distribute, publicly display or publicly perform the Software or Documentation or any component thereof, by transfer, lease, loan or any other means, or make it available for use by others in any time-sharing, service bureau or similar arrangement; (d) @@ -37,25 +44,55 @@ algorithms or techniques incorporated in the Software; (e) export the Software or Documentation in violation of any applicable laws or regulations; (f) modify, translate, adapt, or create derivative works from the Software or Documentation; (g) circumvent, disable or otherwise interfere with -security-related features of the Software or Documentation; (h) -reverse-engineer, disassemble, attempt to derive the source code; (i) use the +security-related features of the Software or Documentation; (h) use the Software or Documentation for any illegal purpose, in any manner that is inconsistent with the terms of this License Agreement, or to engage in illegal -activity; (j) remove or alter any trademark, logo, copyright or other +activity; (i) remove or alter any trademark, logo, copyright or other proprietary notices, legends, symbols or labels on, or embedded in, the -Software or Documentation; or (k) provide access to the Software or +Software or Documentation; or (j) provide access to the Software or Documentation to third parties. -4. Warranty Disclaimer +3. Warranty Disclaimer -THE MEMGRAPH COMMUNITY RELEASE AND DOCUMENTATION ARE PROVIDED “AS IS” FOR -DEVELOPMENT, TESTING AND EVALUATION PURPOSES ONLY. IT IS NOT LICENSED FOR -PRODUCTION USE AND LICENSOR MAKES NO AND DISCLAIMS ALL WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NONINFRINGEMENT OF -THIRD PARTIES’ INTELLECTUAL PROPERTY RIGHTS OR OTHER PROPRIETARY RIGHTS. -NEITHER THIS LICENSE AGREEMENT NOR ANY DOCUMENTATION FURNISHED UNDER IT IS -INTENDED TO EXPRESS OR IMPLY ANY WARRANTY THAT THE OPERATION OF THE SOFTWARE -WILL BE UNINTERRUPTED, TIMELY, OR ERROR-FREE. +THE SOFTWARE AND DOCUMENTATION ARE PROVIDED "AS IS" AND LICENSOR MAKES NO +WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON +INFRINGEMENT OF THIRD PARTIES’ INTELLECTUAL PROPERTY RIGHTS OR OTHER +PROPRIETARY RIGHTS. NEITHER THIS LICENSE AGREEMENT NOR ANY DOCUMENTATION +FURNISHED UNDER IT IS INTENDED TO EXPRESS OR IMPLY ANY WARRANTY THAT THE +OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED, TIMELY, OR ERROR-FREE. -BY DOWNLOADING AND/OR ACCESSING THIS SOFTWARE, YOU AGREE TO SUCH TERMS. +4. Limitation of Liability + +Licensor shall not in any circumstances be liable, whether in tort (including +for negligence or breach of statutory duty howsoever arising), contract, +misrepresentation (whether innocent or negligent) or otherwise for: loss of +profits, loss of business, depletion of goodwill or similar losses, loss of +anticipated savings, loss of goods, loss or corruption of data or computer +downtime, or any special, indirect, consequential or pure economic loss, costs, +damages, charges or expenses. + +Licensor's total aggregate liability in contract, tort (including without +limitation negligence or breach of statutory duty howsoever arising), +misrepresentation (whether innocent or negligent), restitution or otherwise, +arising in connection with the performance or contemplated performance of this +License Agreement shall in all circumstances be limited to GBP10.00 (ten pounds +sterling). + +Nothing in this License Agreement shall limit Licensor’s liability in the case +of death or personal injury caused by negligence, fraud, or fraudulent +misrepresentation, or where it otherwise cannot be limited by law. + +5. Technical Data + +Licensor may collect and use technical information (such as usage patterns) +gathered when the Licensee downloads and uses the Software. This is generally +statistical data which does not identify an identified or identifiable +individual. It may also include Licensee’s IP address which is personal data +and is processed in accordance with our Privacy Policy. We only use this +technical information to improve our products. + +6. Law and Jurisdiction + +This License Agreement is governed by the laws of England and is subject to the +non-exclusive jurisdiction of the courts of England. diff --git a/release/arch-pkg/package_arch b/release/arch-pkg/package_arch index e5b24d867..7e9b4e6c6 100755 --- a/release/arch-pkg/package_arch +++ b/release/arch-pkg/package_arch @@ -1,7 +1,7 @@ #!/bin/bash -e function print_help () { - echo "Usage: $0 MEMGPRAH_PACKAGE.tar.gz" + echo "Usage: $0 MEMGRAPH_PACKAGE.tar.gz" echo "Optional arguments:" echo -e " -h|--help Print help." } diff --git a/release/docker/memgraph_community.dockerfile b/release/docker/memgraph_community.dockerfile index 7c1556f0f..1d1a9d2ba 100644 --- a/release/docker/memgraph_community.dockerfile +++ b/release/docker/memgraph_community.dockerfile @@ -1,4 +1,5 @@ FROM debian:buster +# NOTE: If you change the base distro update release/package as well. ARG deb_release diff --git a/release/docker/memgraph_enterprise.dockerfile b/release/docker/memgraph_enterprise.dockerfile index a002425df..f9a12f0df 100644 --- a/release/docker/memgraph_enterprise.dockerfile +++ b/release/docker/memgraph_enterprise.dockerfile @@ -1,4 +1,5 @@ FROM debian:buster +# NOTE: If you change the base distro update release/package as well. ARG deb_release diff --git a/release/get_version.py b/release/get_version.py index a3956d433..dec645e31 100755 --- a/release/get_version.py +++ b/release/get_version.py @@ -192,7 +192,19 @@ if args.version: try: current_branch = get_output("git", "rev-parse", "--abbrev-ref", "HEAD") if current_branch != "master": - get_output("git", "fetch", "origin", "master:master") + branches = get_output("git", "branch") + if "master" in branches: + # If master is present locally, the fetch is allowed to fail + # because this script will still be able to compare against the + # master branch. + try: + get_output("git", "fetch", "origin", "master:master") + except Exception: + pass + else: + # If master is not present locally, the fetch command has to + # succeed because something else will fail otherwise. + get_output("git", "fetch", "origin", "master:master") except Exception: print("Fatal error while ensuring local master branch.") sys.exit(1) diff --git a/release/package/centos-7/Dockerfile b/release/package/centos-7/Dockerfile new file mode 100644 index 000000000..ff3b57033 --- /dev/null +++ b/release/package/centos-7/Dockerfile @@ -0,0 +1,12 @@ +FROM centos:7 + +RUN yum -y update \ + && yum install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-v2/toolchain-v2-binaries-centos-7.tar.gz \ + -O toolchain-v2-binaries-centos-7.tar.gz \ + && tar xzvf toolchain-v2-binaries-centos-7.tar.gz -C /opt + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/centos-8/Dockerfile b/release/package/centos-8/Dockerfile new file mode 100644 index 000000000..724387e2a --- /dev/null +++ b/release/package/centos-8/Dockerfile @@ -0,0 +1,12 @@ +FROM centos:8 + +RUN dnf -y update \ + && dnf install -y wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-v2/toolchain-v2-binaries-centos-8.tar.gz \ + -O toolchain-v2-binaries-centos-8.tar.gz \ + && tar xzvf toolchain-v2-binaries-centos-8.tar.gz -C /opt + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-10/Dockerfile b/release/package/debian-10/Dockerfile new file mode 100644 index 000000000..f6ff36235 --- /dev/null +++ b/release/package/debian-10/Dockerfile @@ -0,0 +1,15 @@ +FROM debian:10 + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-v2/toolchain-v2-binaries-debian-10.tar.gz \ + -O toolchain-v2-binaries-debian-10.tar.gz \ + && tar xzvf toolchain-v2-binaries-debian-10.tar.gz -C /opt + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/debian-9/Dockerfile b/release/package/debian-9/Dockerfile new file mode 100644 index 000000000..fd49077ce --- /dev/null +++ b/release/package/debian-9/Dockerfile @@ -0,0 +1,15 @@ +FROM debian:9 + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-v2/toolchain-v2-binaries-debian-9.tar.gz \ + -O toolchain-v2-binaries-debian-9.tar.gz \ + && tar xzvf toolchain-v2-binaries-debian-9.tar.gz -C /opt + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/docker-compose.yml b/release/package/docker-compose.yml new file mode 100644 index 000000000..b6ad75faf --- /dev/null +++ b/release/package/docker-compose.yml @@ -0,0 +1,26 @@ +version: "3" +services: + mgbuild_centos-7: + build: + context: centos-7 + container_name: "mgbuild_centos-7" + mgbuild_centos-8: + build: + context: centos-8 + container_name: "mgbuild_centos-8" + mgbuild_debian-9: + build: + context: debian-9 + container_name: "mgbuild_debian-9" + mgbuild_debian-10: + build: + context: debian-10 + container_name: "mgbuild_debian-10" + mgbuild_ubuntu-18.04: + build: + context: ubuntu-18.04 + container_name: "mgbuild_ubuntu-18.04" + mgbuild_ubuntu-20.04: + build: + context: ubuntu-20.04 + container_name: "mgbuild_ubuntu-20.04" diff --git a/release/package/run.sh b/release/package/run.sh new file mode 100755 index 000000000..151d48712 --- /dev/null +++ b/release/package/run.sh @@ -0,0 +1,153 @@ +#!/bin/bash + +set -Eeuo pipefail + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +SUPPORTED_OFFERING=(community enterprise) +SUPPORTED_OS=(centos-7 centos-8 debian-9 debian-10 ubuntu-18.04 ubuntu-20.04) +PROJECT_ROOT="$SCRIPT_DIR/../.." +ACTIVATE_TOOLCHAIN="source /opt/toolchain-v2/activate" +HOST_OUTPUT_DIR="$PROJECT_ROOT/build/output" + +print_help () { + echo "$0 init|package {offering} {os} [--for-docker]|docker|test" + echo "" + echo " offerings: ${SUPPORTED_OFFERING[*]}" + echo " OSs: ${SUPPORTED_OS[*]}" + exit 1 +} + +make_package () { + offering="$1" + offering_flag=" -DMG_ENTERPRISE=OFF " + if [[ "$offering" == "enterprise" ]]; then + offering_flag=" -DMG_ENTERPRISE=ON " + fi + if [[ "$offering" == "community" ]]; then + offering_flag=" -DMG_ENTERPRISE=OFF " + fi + os="$2" + package_command="" + if [[ "$os" =~ ^"centos".* ]]; then + package_command=" cpack -G RPM --config ../CPackConfig.cmake && rpmlint memgraph*.rpm " + fi + if [[ "$os" =~ ^"debian".* ]]; then + package_command=" cpack -G DEB --config ../CPackConfig.cmake " + fi + if [[ "$os" =~ ^"ubuntu".* ]]; then + package_command=" cpack -G DEB --config ../CPackConfig.cmake " + fi + docker_flag=" -DBUILD_FOR_DOCKER=OFF " + if [[ "$#" -gt 2 ]]; then + if [[ "$3" == "--for-docker" ]]; then + docker_flag=" -DBUILD_FOR_DOCKER=ON " + fi + fi + build_container="mgbuild_$os" + echo "Building Memgraph $offering for $os on $build_container..." + + echo "Copying project files..." + # If master is not the current branch, fetch it, because the get_version + # script depends on it. If we are on master, the fetch command is going to + # fail so that's why there is the explicit check. + # Required here because Docker build container can't access remote. + cd "$PROJECT_ROOT" + if [[ "$(git rev-parse --abbrev-ref HEAD)" != "master" ]]; then + git fetch origin master:master + fi + docker exec "$build_container" mkdir -p /memgraph + docker cp "$PROJECT_ROOT/." "$build_container:/memgraph/" + + container_build_dir="/memgraph/build" + container_output_dir="$container_build_dir/output" + + # TODO(gitbuda): TOOLCHAIN_RUN_DEPS should be installed during the Docker + # image build phase, but that is not easy at this point because the + # environment/os/{os}.sh does not come within the toolchain package. When + # migrating to the next version of toolchain do that, and remove the + # TOOLCHAIN_RUN_DEPS installation from here. + echo "Installing dependencies..." + docker exec "$build_container" bash -c "/memgraph/environment/os/$os.sh install TOOLCHAIN_RUN_DEPS" + docker exec "$build_container" bash -c "/memgraph/environment/os/$os.sh install MEMGRAPH_BUILD_DEPS" + + echo "Building targeted package..." + docker exec "$build_container" bash -c "cd /memgraph && ./init" + docker exec "$build_container" bash -c "cd $container_build_dir && rm -rf ./*" + docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN && cmake -DCMAKE_BUILD_TYPE=release $offering_flag $docker_flag .." + # ' is used instead of " because we need to run make within the allowed + # container resources. + # shellcheck disable=SC2016 + docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc)' + docker exec "$build_container" bash -c "cd $container_build_dir && $ACTIVATE_TOOLCHAIN "'&& make -j$(nproc) -B mgconsole' + docker exec "$build_container" bash -c "mkdir -p $container_output_dir && cd $container_output_dir && $ACTIVATE_TOOLCHAIN && $package_command" + + echo "Copying targeted package to host..." + last_package_name=$(docker exec "$build_container" bash -c "cd $container_output_dir && ls -t memgraph* | head -1") + # The operating system folder is introduced because multiple different + # packages could be preserved during the same build "session". + mkdir -p "$HOST_OUTPUT_DIR/$os" + package_host_destination="$HOST_OUTPUT_DIR/$os/$last_package_name" + docker cp "$build_container:$container_output_dir/$last_package_name" "$package_host_destination" + echo "Package saved to $package_host_destination." +} + +case "$1" in + init) + cd "$SCRIPT_DIR" + docker-compose build + docker-compose up -d + ;; + + docker) + # NOTE: Docker is build on top of Debian 10 package. + based_on_os="debian-10" + # shellcheck disable=SC2012 + last_package_name=$(cd "$HOST_OUTPUT_DIR/$based_on_os" && ls -t memgraph* | head -1) + docker_build_folder="$PROJECT_ROOT/release/docker" + cd "$docker_build_folder" + ./package_deb_docker --latest "$HOST_OUTPUT_DIR/$based_on_os/$last_package_name" + # shellcheck disable=SC2012 + docker_image_name=$(cd "$docker_build_folder" && ls -t memgraph* | head -1) + docker_host_folder="$HOST_OUTPUT_DIR/docker" + docker_host_image_path="$docker_host_folder/$docker_image_name" + mkdir -p "$docker_host_folder" + cp "$docker_build_folder/$docker_image_name" "$docker_host_image_path" + echo "Docker images saved to $docker_host_image_path." + ;; + + package) + shift 1 + if [[ "$#" -lt 2 ]]; then + print_help + fi + offering="$1" + shift 1 + is_offering_ok=false + for supported_offering in "${SUPPORTED_OFFERING[@]}"; do + if [[ "$supported_offering" == "${offering}" ]]; then + is_offering_ok=true + fi + done + os="$1" + shift 1 + is_os_ok=false + for supported_os in "${SUPPORTED_OS[@]}"; do + if [[ "$supported_os" == "${os}" ]]; then + is_os_ok=true + fi + done + if [[ "$is_offering_ok" == true ]] && [[ "$is_os_ok" == true ]]; then + make_package "$offering" "$os" "$@" + else + print_help + fi + ;; + + test) + echo "TODO(gitbuda): Test all packages on mgtest containers." + ;; + + *) + print_help + ;; +esac diff --git a/release/package/ubuntu-18.04/Dockerfile b/release/package/ubuntu-18.04/Dockerfile new file mode 100644 index 000000000..dc876be77 --- /dev/null +++ b/release/package/ubuntu-18.04/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:18.04 + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-v2/toolchain-v2-binaries-ubuntu-18.04.tar.gz \ + -O toolchain-v2-binaries-ubuntu-18.04.tar.gz \ + && tar xzvf toolchain-v2-binaries-ubuntu-18.04.tar.gz -C /opt + +ENTRYPOINT ["sleep", "infinity"] diff --git a/release/package/ubuntu-20.04/Dockerfile b/release/package/ubuntu-20.04/Dockerfile new file mode 100644 index 000000000..c3773ec6f --- /dev/null +++ b/release/package/ubuntu-20.04/Dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:20.04 + +# Stops tzdata interactive configuration. +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt update && apt install -y \ + ca-certificates wget git +# Do NOT be smart here and clean the cache because the container is used in the +# stateful context. + +RUN wget -q https://s3-eu-west-1.amazonaws.com/deps.memgraph.io/toolchain-v2/toolchain-v2-binaries-ubuntu-20.04.tar.gz \ + -O toolchain-v2-binaries-ubuntu-20.04.tar.gz \ + && tar xzvf toolchain-v2-binaries-ubuntu-20.04.tar.gz -C /opt + +ENTRYPOINT ["sleep", "infinity"] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 143e128ec..ddae1b4ad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,17 +37,17 @@ if (MG_ENTERPRISE) glue/auth.cpp) endif() -set(MG_SINGLE_NODE_V2_LIBS stdc++fs Threads::Threads - telemetry_lib mg-query mg-communication) +set(mg_single_node_v2_libs stdc++fs Threads::Threads + telemetry_lib mg-query mg-communication mg-new-delete mg-utils) if (MG_ENTERPRISE) # These are enterprise subsystems - set(MG_SINGLE_NODE_V2_LIBS ${MG_SINGLE_NODE_V2_LIBS} mg-auth mg-audit) + set(mg_single_node_v2_libs ${mg_single_node_v2_libs} mg-auth mg-audit) endif() # memgraph main executable add_executable(memgraph ${mg_single_node_v2_sources}) target_include_directories(memgraph PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_link_libraries(memgraph ${MG_SINGLE_NODE_V2_LIBS}) +target_link_libraries(memgraph ${mg_single_node_v2_libs}) # NOTE: `include/mg_procedure.syms` describes a pattern match for symbols which # should be dynamically exported, so that `dlopen` can correctly link the # symbols in custom procedure module libraries. diff --git a/src/auth/models.cpp b/src/auth/models.cpp index 2f5bb210f..cc34ca410 100644 --- a/src/auth/models.cpp +++ b/src/auth/models.cpp @@ -43,6 +43,14 @@ std::string PermissionToString(Permission permission) { return "REPLICATION"; case Permission::LOCK_PATH: return "LOCK_PATH"; + case Permission::READ_FILE: + return "READ_FILE"; + case Permission::FREE_MEMORY: + return "FREE_MEMORY"; + case Permission::TRIGGER: + return "TRIGGER"; + case Permission::CONFIG: + return "CONFIG"; case Permission::AUTH: return "AUTH"; } diff --git a/src/auth/models.hpp b/src/auth/models.hpp index cb627b052..56cf897d2 100644 --- a/src/auth/models.hpp +++ b/src/auth/models.hpp @@ -23,15 +23,21 @@ enum class Permission : uint64_t { DUMP = 1U << 9U, REPLICATION = 1U << 10U, LOCK_PATH = 1U << 11U, + READ_FILE = 1U << 12U, + FREE_MEMORY = 1U << 13U, + TRIGGER = 1U << 14U, + CONFIG = 1U << 15U, AUTH = 1U << 16U }; // clang-format on // Constant list of all available permissions. -const std::vector kPermissionsAll = { - Permission::MATCH, Permission::CREATE, Permission::MERGE, Permission::DELETE, Permission::SET, - Permission::REMOVE, Permission::INDEX, Permission::STATS, Permission::CONSTRAINT, Permission::DUMP, - Permission::AUTH, Permission::REPLICATION, Permission::LOCK_PATH}; +const std::vector kPermissionsAll = {Permission::MATCH, Permission::CREATE, Permission::MERGE, + Permission::DELETE, Permission::SET, Permission::REMOVE, + Permission::INDEX, Permission::STATS, Permission::CONSTRAINT, + Permission::DUMP, Permission::AUTH, Permission::REPLICATION, + Permission::LOCK_PATH, Permission::READ_FILE, Permission::FREE_MEMORY, + Permission::TRIGGER, Permission::CONFIG}; // Function that converts a permission to its string representation. std::string PermissionToString(Permission permission); diff --git a/src/auth/reference_modules/example.py b/src/auth/reference_modules/example.py index e1898f4f6..d40f34892 100755 --- a/src/auth/reference_modules/example.py +++ b/src/auth/reference_modules/example.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/python3 import json import io diff --git a/src/auth/reference_modules/ldap.py b/src/auth/reference_modules/ldap.py index ac090a09c..761db8fd6 100755 --- a/src/auth/reference_modules/ldap.py +++ b/src/auth/reference_modules/ldap.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/python3 import json import io import ssl diff --git a/src/glue/auth.cpp b/src/glue/auth.cpp index d5b8b706b..fe55c6dde 100644 --- a/src/glue/auth.cpp +++ b/src/glue/auth.cpp @@ -28,6 +28,14 @@ auth::Permission PrivilegeToPermission(query::AuthQuery::Privilege privilege) { return auth::Permission::REPLICATION; case query::AuthQuery::Privilege::LOCK_PATH: return auth::Permission::LOCK_PATH; + case query::AuthQuery::Privilege::READ_FILE: + return auth::Permission::READ_FILE; + case query::AuthQuery::Privilege::FREE_MEMORY: + return auth::Permission::FREE_MEMORY; + case query::AuthQuery::Privilege::TRIGGER: + return auth::Permission::TRIGGER; + case query::AuthQuery::Privilege::CONFIG: + return auth::Permission::CONFIG; case query::AuthQuery::Privilege::AUTH: return auth::Permission::AUTH; } diff --git a/src/memgraph.cpp b/src/memgraph.cpp index 4f2bbcb61..d128f1a2e 100644 --- a/src/memgraph.cpp +++ b/src/memgraph.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,7 @@ #include "query/procedure/module.hpp" #include "query/procedure/py_module.hpp" #include "requests/requests.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/storage.hpp" #include "storage/v2/view.hpp" #include "telemetry/telemetry.hpp" @@ -35,6 +37,8 @@ #include "utils/file.hpp" #include "utils/flag_validation.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" +#include "utils/readable_size.hpp" #include "utils/signals.hpp" #include "utils/string.hpp" #include "utils/sysinfo/memory.hpp" @@ -65,6 +69,42 @@ #include "glue/auth.hpp" #endif +namespace { +std::string GetAllowedEnumValuesString(const auto &mappings) { + std::vector allowed_values; + allowed_values.reserve(mappings.size()); + std::transform(mappings.begin(), mappings.end(), std::back_inserter(allowed_values), + [](const auto &mapping) { return std::string(mapping.first); }); + return utils::Join(allowed_values, ", "); +} + +enum class ValidationError : uint8_t { EmptyValue, InvalidValue }; + +utils::BasicResult IsValidEnumValueString(const auto &value, const auto &mappings) { + if (value.empty()) { + return ValidationError::EmptyValue; + } + + if (std::find_if(mappings.begin(), mappings.end(), [&](const auto &mapping) { return mapping.first == value; }) == + mappings.cend()) { + return ValidationError::InvalidValue; + } + + return {}; +} + +template +std::optional StringToEnum(const auto &value, const auto &mappings) { + const auto mapping_iter = + std::find_if(mappings.begin(), mappings.end(), [&](const auto &mapping) { return mapping.first == value; }); + if (mapping_iter == mappings.cend()) { + return std::nullopt; + } + + return mapping_iter->second; +} +} // namespace + // Bolt server flags. DEFINE_string(bolt_address, "0.0.0.0", "IP address on which the Bolt server should listen."); DEFINE_VALIDATED_int32(bolt_port, 7687, "Port on which the Bolt server should listen.", @@ -138,6 +178,72 @@ DEFINE_uint64(query_execution_timeout_sec, 180, "Maximum allowed query execution time. Queries exceeding this " "limit will be aborted. Value of 0 means no limit."); +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_uint64( + memory_limit, 0, + "Total memory limit in MiB. Set to 0 to use the default values which are 100\% of the phyisical memory if the swap " + "is enabled and 90\% of the physical memory otherwise."); + +namespace { +using namespace std::literals; +constexpr std::array isolation_level_mappings{ + std::pair{"SNAPSHOT_ISOLATION"sv, storage::IsolationLevel::SNAPSHOT_ISOLATION}, + std::pair{"READ_COMMITTED"sv, storage::IsolationLevel::READ_COMMITTED}, + std::pair{"READ_UNCOMMITTED"sv, storage::IsolationLevel::READ_UNCOMMITTED}}; + +const std::string isolation_level_help_string = + fmt::format("Default isolation level used for the transactions. Allowed values: {}", + GetAllowedEnumValuesString(isolation_level_mappings)); +} // namespace + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_VALIDATED_string(isolation_level, "SNAPSHOT_ISOLATION", isolation_level_help_string.c_str(), { + if (const auto result = IsValidEnumValueString(value, isolation_level_mappings); result.HasError()) { + const auto error = result.GetError(); + switch (error) { + case ValidationError::EmptyValue: { + std::cout << "Isolation level cannot be empty." << std::endl; + break; + } + case ValidationError::InvalidValue: { + std::cout << "Invalid value for isolation level. Allowed values: " + << GetAllowedEnumValuesString(isolation_level_mappings) << std::endl; + break; + } + } + return false; + } + + return true; +}); + +namespace { +storage::IsolationLevel ParseIsolationLevel() { + const auto isolation_level = StringToEnum(FLAGS_isolation_level, isolation_level_mappings); + MG_ASSERT(isolation_level, "Invalid isolation level"); + return *isolation_level; +} + +int64_t GetMemoryLimit() { + if (FLAGS_memory_limit == 0) { + auto maybe_total_memory = utils::sysinfo::TotalMemory(); + MG_ASSERT(maybe_total_memory, "Failed to fetch the total physical memory"); + const auto maybe_swap_memory = utils::sysinfo::SwapTotalMemory(); + MG_ASSERT(maybe_swap_memory, "Failed to fetch the total swap memory"); + + if (*maybe_swap_memory == 0) { + // take only 90% of the total memory + *maybe_total_memory *= 9; + *maybe_total_memory /= 10; + } + return *maybe_total_memory * 1024; + } + + // We parse the memory as MiB every time + return FLAGS_memory_limit * 1024 * 1024; +} +} // namespace + namespace { std::vector query_modules_directories; } // namespace @@ -168,31 +274,28 @@ DEFINE_string(log_file, "", "Path to where the log should be stored."); namespace { constexpr std::array log_level_mappings{ - std::pair{"TRACE", spdlog::level::trace}, std::pair{"DEBUG", spdlog::level::debug}, - std::pair{"INFO", spdlog::level::info}, std::pair{"WARNING", spdlog::level::warn}, - std::pair{"ERROR", spdlog::level::err}, std::pair{"CRITICAL", spdlog::level::critical}}; - -std::string GetAllowedLogLevelsString() { - std::vector allowed_log_levels; - allowed_log_levels.reserve(log_level_mappings.size()); - std::transform(log_level_mappings.cbegin(), log_level_mappings.cend(), std::back_inserter(allowed_log_levels), - [](const auto &mapping) { return mapping.first; }); - return utils::Join(allowed_log_levels, ", "); -} + std::pair{"TRACE"sv, spdlog::level::trace}, std::pair{"DEBUG"sv, spdlog::level::debug}, + std::pair{"INFO"sv, spdlog::level::info}, std::pair{"WARNING"sv, spdlog::level::warn}, + std::pair{"ERROR"sv, spdlog::level::err}, std::pair{"CRITICAL"sv, spdlog::level::critical}}; const std::string log_level_help_string = - fmt::format("Minimum log level. Allowed values: {}", GetAllowedLogLevelsString()); + fmt::format("Minimum log level. Allowed values: {}", GetAllowedEnumValuesString(log_level_mappings)); } // namespace DEFINE_VALIDATED_string(log_level, "WARNING", log_level_help_string.c_str(), { - if (value.empty()) { - std::cout << "Log level cannot be empty." << std::endl; - return false; - } - - if (std::find_if(log_level_mappings.cbegin(), log_level_mappings.cend(), - [&](const auto &mapping) { return mapping.first == value; }) == log_level_mappings.cend()) { - std::cout << "Invalid value for log level. Allowed values: " << GetAllowedLogLevelsString() << std::endl; + if (const auto result = IsValidEnumValueString(value, log_level_mappings); result.HasError()) { + const auto error = result.GetError(); + switch (error) { + case ValidationError::EmptyValue: { + std::cout << "Log level cannot be empty." << std::endl; + break; + } + case ValidationError::InvalidValue: { + std::cout << "Invalid value for log level. Allowed values: " << GetAllowedEnumValuesString(log_level_mappings) + << std::endl; + break; + } + } return false; } @@ -201,11 +304,9 @@ DEFINE_VALIDATED_string(log_level, "WARNING", log_level_help_string.c_str(), { namespace { void ParseLogLevel() { - const auto mapping_iter = std::find_if(log_level_mappings.cbegin(), log_level_mappings.cend(), - [](const auto &mapping) { return mapping.first == FLAGS_log_level; }); - MG_ASSERT(mapping_iter != log_level_mappings.cend(), "Invalid log level"); - - spdlog::set_level(mapping_iter->second); + const auto log_level = StringToEnum(FLAGS_log_level, log_level_mappings); + MG_ASSERT(log_level, "Invalid log level"); + spdlog::set_level(*log_level); } // 5 weeks * 7 days @@ -875,10 +976,10 @@ int main(int argc, char **argv) { // Start memory warning logger. utils::Scheduler mem_log_scheduler; if (FLAGS_memory_warning_threshold > 0) { - auto free_ram = utils::sysinfo::AvailableMemoryKilobytes(); + auto free_ram = utils::sysinfo::AvailableMemory(); if (free_ram) { mem_log_scheduler.Run("Memory warning", std::chrono::seconds(3), [] { - auto free_ram = utils::sysinfo::AvailableMemoryKilobytes(); + auto free_ram = utils::sysinfo::AvailableMemory(); if (free_ram && *free_ram / 1024 < FLAGS_memory_warning_threshold) spdlog::warn("Running out of available RAM, only {} MB left", *free_ram / 1024); }); @@ -924,8 +1025,11 @@ int main(int argc, char **argv) { // End enterprise features initialization #endif - // Main storage and execution engines initialization + const auto memory_limit = GetMemoryLimit(); + spdlog::info("Memory limit set to {}", utils::GetReadableSize(memory_limit)); + utils::total_memory_tracker.SetHardLimit(memory_limit); + // Main storage and execution engines initialization storage::Config db_config{ .gc = {.type = storage::Config::Gc::Type::PERIODIC, .interval = std::chrono::seconds(FLAGS_storage_gc_cycle_sec)}, .items = {.properties_on_edges = FLAGS_storage_properties_on_edges}, @@ -934,7 +1038,8 @@ int main(int argc, char **argv) { .snapshot_retention_count = FLAGS_storage_snapshot_retention_count, .wal_file_size_kibibytes = FLAGS_storage_wal_file_size_kib, .wal_file_flush_every_n_tx = FLAGS_storage_wal_file_flush_every_n_tx, - .snapshot_on_exit = FLAGS_storage_snapshot_on_exit}}; + .snapshot_on_exit = FLAGS_storage_snapshot_on_exit}, + .transaction = {.isolation_level = ParseIsolationLevel()}}; if (FLAGS_storage_snapshot_interval_sec == 0) { if (FLAGS_storage_wal_enabled) { LOG_FATAL( @@ -952,7 +1057,7 @@ int main(int argc, char **argv) { db_config.durability.snapshot_interval = std::chrono::seconds(FLAGS_storage_snapshot_interval_sec); } storage::Storage db(db_config); - query::InterpreterContext interpreter_context{&db}; + query::InterpreterContext interpreter_context{&db, FLAGS_data_directory}; query::SetExecutionTimeout(&interpreter_context, FLAGS_query_execution_timeout_sec); #ifdef MG_ENTERPRISE @@ -1024,5 +1129,7 @@ int main(int argc, char **argv) { // Shutdown Python Py_Finalize(); PyMem_RawFree(program_name); + + utils::total_memory_tracker.LogPeakMemoryUsage(); return 0; } diff --git a/src/mg_import_csv.cpp b/src/mg_import_csv.cpp index 2e3b08869..152ac22c9 100644 --- a/src/mg_import_csv.cpp +++ b/src/mg_import_csv.cpp @@ -436,9 +436,9 @@ void ProcessNodeRow(storage::Storage *store, const std::vector &fields, c } else { pv_id = storage::PropertyValue(node_id.id); } - auto node_property = node.SetProperty(acc.NameToProperty(field.name), pv_id); - if (!node_property.HasValue()) throw LoadException("Couldn't add property '{}' to the node", field.name); - if (!*node_property) throw LoadException("The property '{}' already exists", field.name); + auto old_node_property = node.SetProperty(acc.NameToProperty(field.name), pv_id); + if (!old_node_property.HasValue()) throw LoadException("Couldn't add property '{}' to the node", field.name); + if (!old_node_property->IsNull()) throw LoadException("The property '{}' already exists", field.name); } id = node_id; } else if (field.type == "LABEL") { @@ -448,9 +448,9 @@ void ProcessNodeRow(storage::Storage *store, const std::vector &fields, c if (!*node_label) throw LoadException("The label '{}' already exists", label); } } else if (field.type != "IGNORE") { - auto node_property = node.SetProperty(acc.NameToProperty(field.name), StringToValue(value, field.type)); - if (!node_property.HasValue()) throw LoadException("Couldn't add property '{}' to the node", field.name); - if (!*node_property) throw LoadException("The property '{}' already exists", field.name); + auto old_node_property = node.SetProperty(acc.NameToProperty(field.name), StringToValue(value, field.type)); + if (!old_node_property.HasValue()) throw LoadException("Couldn't add property '{}' to the node", field.name); + if (!old_node_property->IsNull()) throw LoadException("The property '{}' already exists", field.name); } } for (const auto &label : additional_labels) { diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index 17626e681..53f9ee8f9 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -9,6 +9,7 @@ add_custom_target(generate_lcp_query DEPENDS ${generated_lcp_query_files}) set(mg_query_sources ${lcp_query_cpp_files} common.cpp + cypher_query_interpreter.cpp dump.cpp frontend/ast/cypher_main_visitor.cpp frontend/ast/pretty_print.cpp @@ -17,6 +18,7 @@ set(mg_query_sources frontend/semantic/symbol_generator.cpp frontend/stripped.cpp interpret/awesome_memgraph_functions.cpp + interpret/eval.cpp interpreter.cpp plan/operator.cpp plan/preprocess.cpp @@ -29,13 +31,16 @@ set(mg_query_sources procedure/mg_procedure_impl.cpp procedure/module.cpp procedure/py_module.cpp + serialization/property_value.cpp + trigger.cpp + trigger_context.cpp typed_value.cpp) add_library(mg-query STATIC ${mg_query_sources}) add_dependencies(mg-query generate_lcp_query) target_include_directories(mg-query PUBLIC ${CMAKE_SOURCE_DIR}/include) target_link_libraries(mg-query dl cppitertools) -target_link_libraries(mg-query mg-storage-v2 mg-utils) +target_link_libraries(mg-query mg-storage-v2 mg-utils mg-kvstore) if("${MG_PYTHON_VERSION}" STREQUAL "") find_package(Python3 3.5 REQUIRED COMPONENTS Development) else() @@ -67,7 +72,7 @@ add_custom_command( OUTPUT ${antlr_opencypher_generated_src} ${antlr_opencypher_generated_include} COMMAND ${CMAKE_COMMAND} -E make_directory ${opencypher_generated} COMMAND - java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.6-complete.jar + java -jar ${CMAKE_SOURCE_DIR}/libs/antlr-4.9.2-complete.jar -Dlanguage=Cpp -visitor -package antlropencypher -o ${opencypher_generated} ${opencypher_lexer_grammar} ${opencypher_parser_grammar} diff --git a/src/query/common.hpp b/src/query/common.hpp index 19b302058..72f020c3f 100644 --- a/src/query/common.hpp +++ b/src/query/common.hpp @@ -1,6 +1,7 @@ /// @file #pragma once +#include #include #include @@ -10,6 +11,7 @@ #include "query/frontend/semantic/symbol.hpp" #include "query/typed_value.hpp" #include "storage/v2/id_types.hpp" +#include "storage/v2/property_value.hpp" #include "storage/v2/view.hpp" #include "utils/logging.hpp" @@ -61,15 +63,22 @@ inline void ExpectType(const Symbol &symbol, const TypedValue &value, TypedValue throw QueryRuntimeException("Expected a {} for '{}', but got {}.", expected, symbol.name(), value.type()); } +template +concept AccessorWithSetProperty = requires(T accessor, const storage::PropertyId key, + const storage::PropertyValue new_value) { + { accessor.SetProperty(key, new_value) } + ->std::same_as>; +}; + /// Set a property `value` mapped with given `key` on a `record`. /// /// @throw QueryRuntimeException if value cannot be set as a property value -template -void PropsSetChecked(TRecordAccessor *record, const storage::PropertyId &key, const TypedValue &value) { +template +storage::PropertyValue PropsSetChecked(T *record, const storage::PropertyId &key, const TypedValue &value) { try { - auto maybe_error = record->SetProperty(key, storage::PropertyValue(value)); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_old_value = record->SetProperty(key, storage::PropertyValue(value)); + if (maybe_old_value.HasError()) { + switch (maybe_old_value.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -81,6 +90,7 @@ void PropsSetChecked(TRecordAccessor *record, const storage::PropertyId &key, co throw QueryRuntimeException("Unexpected error when setting a property."); } } + return std::move(*maybe_old_value); } catch (const TypedValueException &) { throw QueryRuntimeException("'{}' cannot be used as a property value.", value.type()); } diff --git a/src/query/context.hpp b/src/query/context.hpp index d937769da..afdd83f5d 100644 --- a/src/query/context.hpp +++ b/src/query/context.hpp @@ -1,10 +1,13 @@ #pragma once +#include + #include "query/common.hpp" #include "query/frontend/semantic/symbol_table.hpp" #include "query/parameters.hpp" #include "query/plan/profile.hpp" -#include "utils/tsc.hpp" +#include "query/trigger.hpp" +#include "utils/async_timer.hpp" namespace query { @@ -49,19 +52,25 @@ struct ExecutionContext { DbAccessor *db_accessor{nullptr}; SymbolTable symbol_table; EvaluationContext evaluation_context; - utils::TSCTimer execution_tsc_timer; - double max_execution_time_sec{0.0}; std::atomic *is_shutting_down{nullptr}; bool is_profile_query{false}; std::chrono::duration profile_execution_time; plan::ProfilingStats stats; plan::ProfilingStats *stats_root{nullptr}; + TriggerContextCollector *trigger_context_collector{nullptr}; + utils::AsyncTimer timer; }; +static_assert(std::is_move_assignable_v, "ExecutionContext must be move assignable!"); +static_assert(std::is_move_constructible_v, "ExecutionContext must be move constructible!"); + inline bool MustAbort(const ExecutionContext &context) { - return (context.is_shutting_down && context.is_shutting_down->load(std::memory_order_acquire)) || - (context.max_execution_time_sec > 0 && - context.execution_tsc_timer.Elapsed() >= context.max_execution_time_sec); + return (context.is_shutting_down != nullptr && context.is_shutting_down->load(std::memory_order_acquire)) || + context.timer.IsExpired(); +} + +inline plan::ProfilingStatsWithTotalTime GetStatsWithTotalTime(const ExecutionContext &context) { + return plan::ProfilingStatsWithTotalTime{context.stats, context.profile_execution_time}; } } // namespace query diff --git a/src/query/cypher_query_interpreter.cpp b/src/query/cypher_query_interpreter.cpp new file mode 100644 index 000000000..f0b322862 --- /dev/null +++ b/src/query/cypher_query_interpreter.cpp @@ -0,0 +1,141 @@ +#include "query/cypher_query_interpreter.hpp" + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_HIDDEN_bool(query_cost_planner, true, "Use the cost-estimating query planner."); +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DEFINE_VALIDATED_int32(query_plan_cache_ttl, 60, "Time to live for cached query plans, in seconds.", + FLAG_IN_RANGE(0, std::numeric_limits::max())); + +namespace query { +CachedPlan::CachedPlan(std::unique_ptr plan) : plan_(std::move(plan)) {} + +ParsedQuery ParseQuery(const std::string &query_string, const std::map ¶ms, + utils::SkipList *cache, utils::SpinLock *antlr_lock) { + // Strip the query for caching purposes. The process of stripping a query + // "normalizes" it by replacing any literals with new parameters. This + // results in just the *structure* of the query being taken into account for + // caching. + frontend::StrippedQuery stripped_query{query_string}; + + // Copy over the parameters that were introduced during stripping. + Parameters parameters{stripped_query.literals()}; + + // Check that all user-specified parameters are provided. + for (const auto ¶m_pair : stripped_query.parameters()) { + auto it = params.find(param_pair.second); + + if (it == params.end()) { + throw query::UnprovidedParameterError("Parameter ${} not provided.", param_pair.second); + } + + parameters.Add(param_pair.first, it->second); + } + + // Cache the query's AST if it isn't already. + auto hash = stripped_query.hash(); + auto accessor = cache->access(); + auto it = accessor.find(hash); + std::unique_ptr parser; + + // Return a copy of both the AST storage and the query. + CachedQuery result; + bool is_cacheable = true; + + auto get_information_from_cache = [&](const auto &cached_query) { + result.ast_storage.properties_ = cached_query.ast_storage.properties_; + result.ast_storage.labels_ = cached_query.ast_storage.labels_; + result.ast_storage.edge_types_ = cached_query.ast_storage.edge_types_; + + result.query = cached_query.query->Clone(&result.ast_storage); + result.required_privileges = cached_query.required_privileges; + }; + + if (it == accessor.end()) { + { + std::unique_lock guard(*antlr_lock); + + try { + parser = std::make_unique(stripped_query.query()); + } catch (const SyntaxException &e) { + // There is a syntax exception in the stripped query. Re-run the parser + // on the original query to get an appropriate error messsage. + parser = std::make_unique(query_string); + + // If an exception was not thrown here, the stripper messed something + // up. + LOG_FATAL("The stripped query can't be parsed, but the original can."); + } + } + + // Convert the ANTLR4 parse tree into an AST. + AstStorage ast_storage; + frontend::ParsingContext context{true}; + frontend::CypherMainVisitor visitor(context, &ast_storage); + + visitor.visit(parser->tree()); + + if (visitor.IsCacheable()) { + CachedQuery cached_query{std::move(ast_storage), visitor.query(), query::GetRequiredPrivileges(visitor.query())}; + it = accessor.insert({hash, std::move(cached_query)}).first; + + get_information_from_cache(it->second); + } else { + result.ast_storage.properties_ = ast_storage.properties_; + result.ast_storage.labels_ = ast_storage.labels_; + result.ast_storage.edge_types_ = ast_storage.edge_types_; + + result.query = visitor.query()->Clone(&result.ast_storage); + result.required_privileges = query::GetRequiredPrivileges(visitor.query()); + + is_cacheable = false; + } + } else { + get_information_from_cache(it->second); + } + + return ParsedQuery{query_string, + params, + std::move(parameters), + std::move(stripped_query), + std::move(result.ast_storage), + result.query, + std::move(result.required_privileges), + is_cacheable}; +} + +std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, + DbAccessor *db_accessor, + const std::vector &predefined_identifiers) { + auto vertex_counts = plan::MakeVertexCountCache(db_accessor); + auto symbol_table = MakeSymbolTable(query, predefined_identifiers); + auto planning_context = plan::MakePlanningContext(&ast_storage, &symbol_table, query, &vertex_counts); + auto [root, cost] = plan::MakeLogicalPlan(&planning_context, parameters, FLAGS_query_cost_planner); + return std::make_unique(std::move(root), cost, std::move(ast_storage), + std::move(symbol_table)); +} + +std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, + const Parameters ¶meters, utils::SkipList *plan_cache, + DbAccessor *db_accessor, + const std::vector &predefined_identifiers) { + std::optional::Accessor> plan_cache_access; + if (plan_cache) { + plan_cache_access.emplace(plan_cache->access()); + auto it = plan_cache_access->find(hash); + if (it != plan_cache_access->end()) { + if (it->second->IsExpired()) { + plan_cache_access->remove(hash); + } else { + return it->second; + } + } + } + + auto plan = std::make_shared( + MakeLogicalPlan(std::move(ast_storage), query, parameters, db_accessor, predefined_identifiers)); + if (plan_cache_access) { + plan_cache_access->insert({hash, plan}); + } + return plan; +} +} // namespace query diff --git a/src/query/cypher_query_interpreter.hpp b/src/query/cypher_query_interpreter.hpp new file mode 100644 index 000000000..2d763eec6 --- /dev/null +++ b/src/query/cypher_query_interpreter.hpp @@ -0,0 +1,149 @@ +#pragma once + +////////////////////////////////////////////////////// +// THIS INCLUDE SHOULD ALWAYS COME BEFORE THE +// "cypher_main_visitor.hpp" +// "planner.hpp" includes json.hpp which uses libc's +// EOF macro while "cypher_main_visitor.hpp" includes +// "antlr4-runtime.h" which contains a static variable +// of the same name, EOF. +// This hides the definition of the macro which causes +// the compilation to fail. +#include "query/plan/planner.hpp" +////////////////////////////////////////////////////// +#include "query/frontend/ast/cypher_main_visitor.hpp" +#include "query/frontend/opencypher/parser.hpp" +#include "query/frontend/semantic/required_privileges.hpp" +#include "query/frontend/semantic/symbol_generator.hpp" +#include "query/frontend/stripped.hpp" +#include "utils/flag_validation.hpp" +#include "utils/timer.hpp" + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_bool(query_cost_planner); +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +DECLARE_int32(query_plan_cache_ttl); + +namespace query { + +// TODO: Maybe this should move to query/plan/planner. +/// Interface for accessing the root operator of a logical plan. +class LogicalPlan { + public: + explicit LogicalPlan() = default; + + virtual ~LogicalPlan() = default; + + LogicalPlan(const LogicalPlan &) = default; + LogicalPlan &operator=(const LogicalPlan &) = default; + LogicalPlan(LogicalPlan &&) = default; + LogicalPlan &operator=(LogicalPlan &&) = default; + + virtual const plan::LogicalOperator &GetRoot() const = 0; + virtual double GetCost() const = 0; + virtual const SymbolTable &GetSymbolTable() const = 0; + virtual const AstStorage &GetAstStorage() const = 0; +}; + +class CachedPlan { + public: + explicit CachedPlan(std::unique_ptr plan); + + const auto &plan() const { return plan_->GetRoot(); } + double cost() const { return plan_->GetCost(); } + const auto &symbol_table() const { return plan_->GetSymbolTable(); } + const auto &ast_storage() const { return plan_->GetAstStorage(); } + + bool IsExpired() const { + // NOLINTNEXTLINE (modernize-use-nullptr) + return cache_timer_.Elapsed() > std::chrono::seconds(FLAGS_query_plan_cache_ttl); + }; + + private: + std::unique_ptr plan_; + utils::Timer cache_timer_; +}; + +struct CachedQuery { + AstStorage ast_storage; + Query *query; + std::vector required_privileges; +}; + +struct QueryCacheEntry { + bool operator==(const QueryCacheEntry &other) const { return first == other.first; } + bool operator<(const QueryCacheEntry &other) const { return first < other.first; } + bool operator==(const uint64_t &other) const { return first == other; } + bool operator<(const uint64_t &other) const { return first < other; } + + uint64_t first; + // TODO: Maybe store the query string here and use it as a key with the hash + // so that we eliminate the risk of hash collisions. + CachedQuery second; +}; + +struct PlanCacheEntry { + bool operator==(const PlanCacheEntry &other) const { return first == other.first; } + bool operator<(const PlanCacheEntry &other) const { return first < other.first; } + bool operator==(const uint64_t &other) const { return first == other; } + bool operator<(const uint64_t &other) const { return first < other; } + + uint64_t first; + // TODO: Maybe store the query string here and use it as a key with the hash + // so that we eliminate the risk of hash collisions. + std::shared_ptr second; +}; + +/** + * A container for data related to the parsing of a query. + */ +struct ParsedQuery { + std::string query_string; + std::map user_parameters; + Parameters parameters; + frontend::StrippedQuery stripped_query; + AstStorage ast_storage; + Query *query; + std::vector required_privileges; + bool is_cacheable{true}; +}; + +ParsedQuery ParseQuery(const std::string &query_string, const std::map ¶ms, + utils::SkipList *cache, utils::SpinLock *antlr_lock); + +class SingleNodeLogicalPlan final : public LogicalPlan { + public: + SingleNodeLogicalPlan(std::unique_ptr root, double cost, AstStorage storage, + const SymbolTable &symbol_table) + : root_(std::move(root)), cost_(cost), storage_(std::move(storage)), symbol_table_(symbol_table) {} + + const plan::LogicalOperator &GetRoot() const override { return *root_; } + double GetCost() const override { return cost_; } + const SymbolTable &GetSymbolTable() const override { return symbol_table_; } + const AstStorage &GetAstStorage() const override { return storage_; } + + private: + std::unique_ptr root_; + double cost_; + AstStorage storage_; + SymbolTable symbol_table_; +}; + +std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, + DbAccessor *db_accessor, + const std::vector &predefined_identifiers); + +/** + * Return the parsed *Cypher* query's AST cached logical plan, or create and + * cache a fresh one if it doesn't yet exist. + * @param predefined_identifiers optional identifiers you want to inject into a query. + * If an identifier is not defined in a scope, we check the predefined identifiers. + * If an identifier is contained there, we inject it at that place and remove it, + * because a predefined identifier can be used only in one scope. + */ +std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, + const Parameters ¶meters, utils::SkipList *plan_cache, + DbAccessor *db_accessor, + const std::vector &predefined_identifiers = {}); + +} // namespace query diff --git a/src/query/db_accessor.hpp b/src/query/db_accessor.hpp index 0c7f24977..26b7b6a19 100644 --- a/src/query/db_accessor.hpp +++ b/src/query/db_accessor.hpp @@ -43,6 +43,8 @@ class EdgeAccessor final { public: explicit EdgeAccessor(storage::EdgeAccessor impl) : impl_(std::move(impl)) {} + bool IsVisible(storage::View view) const { return impl_.IsVisible(view); } + storage::EdgeTypeId EdgeType() const { return impl_.EdgeType(); } auto Properties(storage::View view) const { return impl_.Properties(view); } @@ -51,16 +53,16 @@ class EdgeAccessor final { return impl_.GetProperty(key, view); } - storage::Result SetProperty(storage::PropertyId key, const storage::PropertyValue &value) { + storage::Result SetProperty(storage::PropertyId key, const storage::PropertyValue &value) { return impl_.SetProperty(key, value); } - storage::Result RemoveProperty(storage::PropertyId key) { return SetProperty(key, storage::PropertyValue()); } + storage::Result RemoveProperty(storage::PropertyId key) { + return SetProperty(key, storage::PropertyValue()); + } - utils::BasicResult ClearProperties() { - auto ret = impl_.ClearProperties(); - if (ret.HasError()) return ret.GetError(); - return {}; + storage::Result> ClearProperties() { + return impl_.ClearProperties(); } VertexAccessor To() const; @@ -87,6 +89,8 @@ class VertexAccessor final { public: explicit VertexAccessor(storage::VertexAccessor impl) : impl_(std::move(impl)) {} + bool IsVisible(storage::View view) const { return impl_.IsVisible(view); } + auto Labels(storage::View view) const { return impl_.Labels(view); } storage::Result AddLabel(storage::LabelId label) { return impl_.AddLabel(label); } @@ -103,16 +107,16 @@ class VertexAccessor final { return impl_.GetProperty(key, view); } - storage::Result SetProperty(storage::PropertyId key, const storage::PropertyValue &value) { + storage::Result SetProperty(storage::PropertyId key, const storage::PropertyValue &value) { return impl_.SetProperty(key, value); } - storage::Result RemoveProperty(storage::PropertyId key) { return SetProperty(key, storage::PropertyValue()); } + storage::Result RemoveProperty(storage::PropertyId key) { + return SetProperty(key, storage::PropertyValue()); + } - utils::BasicResult ClearProperties() { - auto ret = impl_.ClearProperties(); - if (ret.HasError()) return ret.GetError(); - return {}; + storage::Result> ClearProperties() { + return impl_.ClearProperties(); } auto InEdges(storage::View view, const std::vector &edge_types) const @@ -208,6 +212,8 @@ class DbAccessor final { return std::nullopt; } + void FinalizeTransaction() { accessor_->FinalizeTransaction(); } + VerticesIterable Vertices(storage::View view) { return VerticesIterable(accessor_->Vertices(view)); } VerticesIterable Vertices(storage::View view, storage::LabelId label) { @@ -235,17 +241,59 @@ class DbAccessor final { const storage::EdgeTypeId &edge_type) { auto maybe_edge = accessor_->CreateEdge(&from->impl_, &to->impl_, edge_type); if (maybe_edge.HasError()) return storage::Result(maybe_edge.GetError()); - return EdgeAccessor(std::move(*maybe_edge)); + return EdgeAccessor(*maybe_edge); } - storage::Result RemoveEdge(EdgeAccessor *edge) { return accessor_->DeleteEdge(&edge->impl_); } + storage::Result> RemoveEdge(EdgeAccessor *edge) { + auto res = accessor_->DeleteEdge(&edge->impl_); + if (res.HasError()) { + return res.GetError(); + } - storage::Result DetachRemoveVertex(VertexAccessor *vertex_accessor) { - return accessor_->DetachDeleteVertex(&vertex_accessor->impl_); + const auto &value = res.GetValue(); + if (!value) { + return std::optional{}; + } + + return std::make_optional(*value); } - storage::Result RemoveVertex(VertexAccessor *vertex_accessor) { - return accessor_->DeleteVertex(&vertex_accessor->impl_); + storage::Result>>> DetachRemoveVertex( + VertexAccessor *vertex_accessor) { + using ReturnType = std::pair>; + + auto res = accessor_->DetachDeleteVertex(&vertex_accessor->impl_); + if (res.HasError()) { + return res.GetError(); + } + + const auto &value = res.GetValue(); + if (!value) { + return std::optional{}; + } + + const auto &[vertex, edges] = *value; + + std::vector deleted_edges; + deleted_edges.reserve(edges.size()); + std::transform(edges.begin(), edges.end(), std::back_inserter(deleted_edges), + [](const auto &deleted_edge) { return EdgeAccessor{deleted_edge}; }); + + return std::make_optional(vertex, std::move(deleted_edges)); + } + + storage::Result> RemoveVertex(VertexAccessor *vertex_accessor) { + auto res = accessor_->DeleteVertex(&vertex_accessor->impl_); + if (res.HasError()) { + return res.GetError(); + } + + const auto &value = res.GetValue(); + if (!value) { + return std::optional{}; + } + + return std::make_optional(*value); } storage::PropertyId NameToProperty(const std::string_view &name) { return accessor_->NameToProperty(name); } diff --git a/src/query/exceptions.hpp b/src/query/exceptions.hpp index 0a8bd5b11..6c757e91b 100644 --- a/src/query/exceptions.hpp +++ b/src/query/exceptions.hpp @@ -161,7 +161,24 @@ class ReplicationModificationInMulticommandTxException : public QueryException { class LockPathModificationInMulticommandTxException : public QueryException { public: LockPathModificationInMulticommandTxException() - : QueryException("Lock path clause not allowed in multicommand transactions.") {} + : QueryException("Lock path query not allowed in multicommand transactions.") {} }; +class FreeMemoryModificationInMulticommandTxException : public QueryException { + public: + FreeMemoryModificationInMulticommandTxException() + : QueryException("Free memory query not allowed in multicommand transactions.") {} +}; + +class TriggerModificationInMulticommandTxException : public QueryException { + public: + TriggerModificationInMulticommandTxException() + : QueryException("Trigger queries not allowed in multicommand transactions.") {} +}; + +class IsolationLevelModificationInMulticommandTxException : public QueryException { + public: + IsolationLevelModificationInMulticommandTxException() + : QueryException("Isolation level cannot be modified in multicommand transactions.") {} +}; } // namespace query diff --git a/src/query/frontend/ast/ast.lcp b/src/query/frontend/ast/ast.lcp index ee0e2b975..81b6038e5 100644 --- a/src/query/frontend/ast/ast.lcp +++ b/src/query/frontend/ast/ast.lcp @@ -686,9 +686,7 @@ cpp<# symbol_pos_ = symbol.position(); return this; } - cpp<#) - (:protected - #>cpp + explicit Identifier(const std::string &name) : name_(name) {} Identifier(const std::string &name, bool user_declared) : name_(name), user_declared_(user_declared) {} @@ -1543,7 +1541,11 @@ cpp<# :scope :public :slk-save #'slk-save-ast-vector :slk-load (slk-load-ast-vector "CypherUnion") - :documentation "Contains remaining queries that should form and union with `single_query_`.")) + :documentation "Contains remaining queries that should form and union with `single_query_`.") + (memory-limit "Expression *" :initval "nullptr" :scope :public + :slk-save #'slk-save-ast-pointer + :slk-load (slk-load-ast-pointer "Expression")) + (memory-scale "size_t" :initval "1024U" :scope :public)) (:public #>cpp CypherQuery() = default; @@ -2191,7 +2193,7 @@ cpp<# (:serialize)) (lcp:define-enum privilege (create delete match merge set remove index stats auth constraint - dump replication lock_path) + dump replication lock_path read_file free_memory trigger config) (:serialize)) #>cpp AuthQuery() = default; @@ -2228,7 +2230,10 @@ const std::vector kPrivilegesAll = { AuthQuery::Privilege::AUTH, AuthQuery::Privilege::CONSTRAINT, AuthQuery::Privilege::DUMP, AuthQuery::Privilege::REPLICATION, - AuthQuery::Privilege::LOCK_PATH}; + AuthQuery::Privilege::READ_FILE, + AuthQuery::Privilege::LOCK_PATH, + AuthQuery::Privilege::FREE_MEMORY, AuthQuery::Privilege::TRIGGER, + AuthQuery::Privilege::CONFIG}; cpp<# (lcp:define-class info-query (query) @@ -2353,4 +2358,102 @@ cpp<# (:serialize (:slk)) (:clone)) +(lcp:define-class load-csv (clause) + ((file "Expression *" :scope :public) + (with_header "bool" :scope :public) + (ignore_bad "bool" :scope :public) + (delimiter "Expression *" :initval "nullptr" :scope :public) + (quote "Expression *" :initval "nullptr" :scope :public) + (row_var "Identifier *" :initval "nullptr" :scope :public + :slk-save #'slk-save-ast-pointer + :slk-load (slk-load-ast-pointer "Identifier"))) + + (:public + #>cpp + LoadCsv() = default; + + bool Accept(HierarchicalTreeVisitor &visitor) override { + if (visitor.PreVisit(*this)) { + row_var_->Accept(visitor); + } + return visitor.PostVisit(*this); + } + cpp<#) + (:protected + #>cpp + explicit LoadCsv(Expression *file, bool with_header, bool ignore_bad, Expression *delimiter, + Expression* quote, Identifier* row_var) + : file_(file), + with_header_(with_header), + ignore_bad_(ignore_bad), + delimiter_(delimiter), + quote_(quote), + row_var_(row_var) { + DMG_ASSERT(row_var, "LoadCsv cannot take nullptr for identifier"); + } + cpp<#) + (:private + #>cpp + friend class AstStorage; + cpp<#) + (:serialize (:slk)) + (:clone)) + +(lcp:define-class free-memory-query (query) () + (:public + #>cpp + DEFVISITABLE(QueryVisitor); + cpp<#) + (:serialize (:slk)) + (:clone)) + +(lcp:define-class trigger-query (query) + ((action "Action" :scope :public) + (event_type "EventType" :scope :public) + (trigger_name "std::string" :scope :public) + (before_commit "bool" :scope :public) + (statement "std::string" :scope :public)) + + (:public + (lcp:define-enum action + (create-trigger drop-trigger show-triggers) + (:serialize)) + (lcp:define-enum event-type + (any vertex_create edge_create create vertex_delete edge_delete delete vertex_update edge_update update) + (:serialize)) + #>cpp + TriggerQuery() = default; + + DEFVISITABLE(QueryVisitor); + cpp<#) + (:private + #>cpp + friend class AstStorage; + cpp<#) + (:serialize (:slk)) + (:clone)) + +(lcp:define-class isolation-level-query (query) + ((isolation_level "IsolationLevel" :scope :public) + (isolation_level_scope "IsolationLevelScope" :scope :public)) + + (:public + (lcp:define-enum isolation-level + (snapshot-isolation read-committed read-uncommitted) + (:serialize)) + (lcp:define-enum isolation-level-scope + (next session global) + (:serialize)) + #>cpp + IsolationLevelQuery() = default; + + DEFVISITABLE(QueryVisitor); + cpp<#) + (:private + #>cpp + friend class AstStorage; + cpp<#) + (:serialize (:slk)) + (:clone)) + (lcp:pop-namespace) ;; namespace query diff --git a/src/query/frontend/ast/ast_visitor.hpp b/src/query/frontend/ast/ast_visitor.hpp index f637c84e9..9d55f6b12 100644 --- a/src/query/frontend/ast/ast_visitor.hpp +++ b/src/query/frontend/ast/ast_visitor.hpp @@ -74,6 +74,10 @@ class RegexMatch; class DumpQuery; class ReplicationQuery; class LockPathQuery; +class LoadCsv; +class FreeMemoryQuery; +class TriggerQuery; +class IsolationLevelQuery; using TreeCompositeVisitor = ::utils::CompositeVisitor< SingleQuery, CypherUnion, NamedExpression, OrOperator, XorOperator, AndOperator, NotOperator, AdditionOperator, @@ -82,7 +86,7 @@ using TreeCompositeVisitor = ::utils::CompositeVisitor< ListSlicingOperator, IfOperator, UnaryPlusOperator, UnaryMinusOperator, IsNullOperator, ListLiteral, MapLiteral, PropertyLookup, LabelsTest, Aggregation, Function, Reduce, Coalesce, Extract, All, Single, Any, None, CallProcedure, Create, Match, Return, With, Pattern, NodeAtom, EdgeAtom, Delete, Where, SetProperty, SetProperties, SetLabels, - RemoveProperty, RemoveLabels, Merge, Unwind, RegexMatch>; + RemoveProperty, RemoveLabels, Merge, Unwind, RegexMatch, LoadCsv>; using TreeLeafVisitor = ::utils::LeafVisitor; @@ -106,6 +110,7 @@ class ExpressionVisitor template class QueryVisitor : public ::utils::Visitor {}; + InfoQuery, ConstraintQuery, DumpQuery, ReplicationQuery, LockPathQuery, + FreeMemoryQuery, TriggerQuery, IsolationLevelQuery> {}; } // namespace query diff --git a/src/query/frontend/ast/cypher_main_visitor.cpp b/src/query/frontend/ast/cypher_main_visitor.cpp index a59f05a80..aaab87a4f 100644 --- a/src/query/frontend/ast/cypher_main_visitor.cpp +++ b/src/query/frontend/ast/cypher_main_visitor.cpp @@ -33,6 +33,28 @@ namespace query::frontend { const std::string CypherMainVisitor::kAnonPrefix = "anon"; +namespace { +template +std::optional> VisitMemoryLimit( + MemgraphCypher::MemoryLimitContext *memory_limit_ctx, TVisitor *visitor) { + MG_ASSERT(memory_limit_ctx); + if (memory_limit_ctx->UNLIMITED()) { + return std::nullopt; + } + + auto memory_limit = memory_limit_ctx->literal()->accept(visitor); + size_t memory_scale = 1024U; + if (memory_limit_ctx->MB()) { + memory_scale = 1024U * 1024U; + } else { + MG_ASSERT(memory_limit_ctx->KB()); + memory_scale = 1024U; + } + + return std::make_pair(memory_limit, memory_scale); +} +} // namespace + antlrcpp::Any CypherMainVisitor::visitExplainQuery(MemgraphCypher::ExplainQueryContext *ctx) { MG_ASSERT(ctx->children.size() == 2, "ExplainQuery should have exactly two children!"); auto *cypher_query = ctx->children[1]->accept(this).as(); @@ -127,6 +149,14 @@ antlrcpp::Any CypherMainVisitor::visitCypherQuery(MemgraphCypher::CypherQueryCon cypher_query->cypher_unions_.push_back(child->accept(this).as()); } + if (auto *memory_limit_ctx = ctx->queryMemoryLimit()) { + const auto memory_limit_info = VisitMemoryLimit(memory_limit_ctx->memoryLimit(), this); + if (memory_limit_info) { + cypher_query->memory_limit_ = memory_limit_info->first; + cypher_query->memory_scale_ = memory_limit_info->second; + } + } + query_ = cypher_query; return cypher_query; } @@ -263,6 +293,152 @@ antlrcpp::Any CypherMainVisitor::visitLockPathQuery(MemgraphCypher::LockPathQuer return lock_query; } +antlrcpp::Any CypherMainVisitor::visitLoadCsv(MemgraphCypher::LoadCsvContext *ctx) { + auto *load_csv = storage_->Create(); + // handle file name + if (ctx->csvFile()->literal()->StringLiteral()) { + load_csv->file_ = ctx->csvFile()->accept(this); + } else { + throw SemanticException("CSV file path should be a string literal"); + } + + // handle header options + // Don't have to check for ctx->HEADER(), as it's a mandatory token. + // Just need to check if ctx->WITH() is not nullptr - otherwise, we have a + // ctx->NO() and ctx->HEADER() present. + load_csv->with_header_ = ctx->WITH() != nullptr; + + // handle skip bad row option + load_csv->ignore_bad_ = ctx->IGNORE() && ctx->BAD(); + + // handle delimiter + if (ctx->DELIMITER()) { + if (ctx->delimiter()->literal()->StringLiteral()) { + load_csv->delimiter_ = ctx->delimiter()->accept(this); + } else { + throw SemanticException("Delimiter should be a string literal"); + } + } + + // handle quote + if (ctx->QUOTE()) { + if (ctx->quote()->literal()->StringLiteral()) { + load_csv->quote_ = ctx->quote()->accept(this); + } else { + throw SemanticException("Quote should be a string literal"); + } + } + + // handle row variable + load_csv->row_var_ = storage_->Create(ctx->rowVar()->variable()->accept(this).as()); + return load_csv; +} + +antlrcpp::Any CypherMainVisitor::visitFreeMemoryQuery(MemgraphCypher::FreeMemoryQueryContext *ctx) { + auto *free_memory_query = storage_->Create(); + query_ = free_memory_query; + return free_memory_query; +} + +antlrcpp::Any CypherMainVisitor::visitTriggerQuery(MemgraphCypher::TriggerQueryContext *ctx) { + MG_ASSERT(ctx->children.size() == 1, "TriggerQuery should have exactly one child!"); + auto *trigger_query = ctx->children[0]->accept(this).as(); + query_ = trigger_query; + return trigger_query; +} + +antlrcpp::Any CypherMainVisitor::visitCreateTrigger(MemgraphCypher::CreateTriggerContext *ctx) { + auto *trigger_query = storage_->Create(); + trigger_query->action_ = TriggerQuery::Action::CREATE_TRIGGER; + trigger_query->trigger_name_ = ctx->triggerName()->symbolicName()->accept(this).as(); + + auto *statement = ctx->triggerStatement(); + antlr4::misc::Interval interval{statement->start->getStartIndex(), statement->stop->getStopIndex()}; + trigger_query->statement_ = ctx->start->getInputStream()->getText(interval); + + trigger_query->event_type_ = [ctx] { + if (!ctx->ON()) { + return TriggerQuery::EventType::ANY; + } + + if (ctx->CREATE(1)) { + if (ctx->emptyVertex()) { + return TriggerQuery::EventType::VERTEX_CREATE; + } + if (ctx->emptyEdge()) { + return TriggerQuery::EventType::EDGE_CREATE; + } + return TriggerQuery::EventType::CREATE; + } + + if (ctx->DELETE()) { + if (ctx->emptyVertex()) { + return TriggerQuery::EventType::VERTEX_DELETE; + } + if (ctx->emptyEdge()) { + return TriggerQuery::EventType::EDGE_DELETE; + } + return TriggerQuery::EventType::DELETE; + } + + if (ctx->UPDATE()) { + if (ctx->emptyVertex()) { + return TriggerQuery::EventType::VERTEX_UPDATE; + } + if (ctx->emptyEdge()) { + return TriggerQuery::EventType::EDGE_UPDATE; + } + return TriggerQuery::EventType::UPDATE; + } + + LOG_FATAL("Invalid token allowed for the query"); + }(); + + trigger_query->before_commit_ = ctx->BEFORE(); + + return trigger_query; +} + +antlrcpp::Any CypherMainVisitor::visitDropTrigger(MemgraphCypher::DropTriggerContext *ctx) { + auto *trigger_query = storage_->Create(); + trigger_query->action_ = TriggerQuery::Action::DROP_TRIGGER; + trigger_query->trigger_name_ = ctx->triggerName()->symbolicName()->accept(this).as(); + return trigger_query; +} + +antlrcpp::Any CypherMainVisitor::visitShowTriggers(MemgraphCypher::ShowTriggersContext *ctx) { + auto *trigger_query = storage_->Create(); + trigger_query->action_ = TriggerQuery::Action::SHOW_TRIGGERS; + return trigger_query; +} + +antlrcpp::Any CypherMainVisitor::visitIsolationLevelQuery(MemgraphCypher::IsolationLevelQueryContext *ctx) { + auto *isolation_level_query = storage_->Create(); + + isolation_level_query->isolation_level_scope_ = [scope = ctx->isolationLevelScope()]() { + if (scope->GLOBAL()) { + return IsolationLevelQuery::IsolationLevelScope::GLOBAL; + } + if (scope->SESSION()) { + return IsolationLevelQuery::IsolationLevelScope::SESSION; + } + return IsolationLevelQuery::IsolationLevelScope::NEXT; + }(); + + isolation_level_query->isolation_level_ = [level = ctx->isolationLevel()]() { + if (level->SNAPSHOT()) { + return IsolationLevelQuery::IsolationLevel::SNAPSHOT_ISOLATION; + } + if (level->COMMITTED()) { + return IsolationLevelQuery::IsolationLevel::READ_COMMITTED; + } + return IsolationLevelQuery::IsolationLevel::READ_UNCOMMITTED; + }(); + + query_ = isolation_level_query; + return isolation_level_query; +} + antlrcpp::Any CypherMainVisitor::visitCypherUnion(MemgraphCypher::CypherUnionContext *ctx) { bool distinct = !ctx->ALL(); auto *cypher_union = storage_->Create(distinct); @@ -292,6 +468,7 @@ antlrcpp::Any CypherMainVisitor::visitSingleQuery(MemgraphCypher::SingleQueryCon bool has_return = false; bool has_optional_match = false; bool has_call_procedure = false; + bool has_load_csv = false; for (Clause *clause : single_query->clauses_) { const auto &clause_type = clause->GetTypeInfo(); @@ -304,6 +481,14 @@ antlrcpp::Any CypherMainVisitor::visitSingleQuery(MemgraphCypher::SingleQueryCon if (has_update || has_return) { throw SemanticException("UNWIND can't be put after RETURN clause or after an update."); } + } else if (utils::IsSubtype(clause_type, LoadCsv::kType)) { + if (has_load_csv) { + throw SemanticException("Can't have multiple LOAD CSV clauses in a single query."); + } + if (has_return) { + throw SemanticException("LOAD CSV can't be put after RETURN clause."); + } + has_load_csv = true; } else if (auto *match = utils::Downcast(clause)) { if (has_update || has_return) { throw SemanticException("MATCH can't be put after RETURN clause or after an update."); @@ -388,6 +573,9 @@ antlrcpp::Any CypherMainVisitor::visitClause(MemgraphCypher::ClauseContext *ctx) if (ctx->callProcedure()) { return static_cast(ctx->callProcedure()->accept(this).as()); } + if (ctx->loadCsv()) { + return static_cast(ctx->loadCsv()->accept(this).as()); + } // TODO: implement other clauses. throw utils::NotYetImplemented("clause '{}'", ctx->getText()); return 0; @@ -410,6 +598,14 @@ antlrcpp::Any CypherMainVisitor::visitCreate(MemgraphCypher::CreateContext *ctx) } antlrcpp::Any CypherMainVisitor::visitCallProcedure(MemgraphCypher::CallProcedureContext *ctx) { + // Don't cache queries which call procedures because the + // procedure definition can affect the behaviour of the visitor and + // the execution of the query. + // If a user recompiles and reloads the procedure with different result + // names, because of the cache, old result names will be expected while the + // procedure will return results mapped to new names. + is_cacheable_ = false; + auto *call_proc = storage_->Create(); MG_ASSERT(!ctx->procedureName()->symbolicName().empty()); std::vector procedure_subnames; @@ -422,21 +618,19 @@ antlrcpp::Any CypherMainVisitor::visitCallProcedure(MemgraphCypher::CallProcedur for (auto *expr : ctx->expression()) { call_proc->arguments_.push_back(expr->accept(this)); } - if (auto *memory_limit_ctx = ctx->callProcedureMemoryLimit()) { - if (memory_limit_ctx->LIMIT()) { - call_proc->memory_limit_ = memory_limit_ctx->literal()->accept(this); - if (memory_limit_ctx->MB()) { - call_proc->memory_scale_ = 1024U * 1024U; - } else { - MG_ASSERT(memory_limit_ctx->KB()); - call_proc->memory_scale_ = 1024U; - } + + if (auto *memory_limit_ctx = ctx->procedureMemoryLimit()) { + const auto memory_limit_info = VisitMemoryLimit(memory_limit_ctx->memoryLimit(), this); + if (memory_limit_info) { + call_proc->memory_limit_ = memory_limit_info->first; + call_proc->memory_scale_ = memory_limit_info->second; } } else { // Default to 100 MB call_proc->memory_limit_ = storage_->Create(TypedValue(100)); call_proc->memory_scale_ = 1024U * 1024U; } + auto *yield_ctx = ctx->yieldProcedureResults(); if (!yield_ctx) { const auto &maybe_found = @@ -493,6 +687,7 @@ antlrcpp::Any CypherMainVisitor::visitCallProcedure(MemgraphCypher::CallProcedur // fields removed, then the query execution will report an error that we are // yielding missing fields. The user can then just retry the query. } + return call_proc; } @@ -671,6 +866,12 @@ antlrcpp::Any CypherMainVisitor::visitPrivilege(MemgraphCypher::PrivilegeContext if (ctx->AUTH()) return AuthQuery::Privilege::AUTH; if (ctx->CONSTRAINT()) return AuthQuery::Privilege::CONSTRAINT; if (ctx->DUMP()) return AuthQuery::Privilege::DUMP; + if (ctx->REPLICATION()) return AuthQuery::Privilege::REPLICATION; + if (ctx->LOCK_PATH()) return AuthQuery::Privilege::LOCK_PATH; + if (ctx->READ_FILE()) return AuthQuery::Privilege::READ_FILE; + if (ctx->FREE_MEMORY()) return AuthQuery::Privilege::FREE_MEMORY; + if (ctx->TRIGGER()) return AuthQuery::Privilege::TRIGGER; + if (ctx->CONFIG()) return AuthQuery::Privilege::CONFIG; LOG_FATAL("Should not get here - unknown privilege!"); } diff --git a/src/query/frontend/ast/cypher_main_visitor.hpp b/src/query/frontend/ast/cypher_main_visitor.hpp index 7aa31b8ad..799f74986 100644 --- a/src/query/frontend/ast/cypher_main_visitor.hpp +++ b/src/query/frontend/ast/cypher_main_visitor.hpp @@ -208,6 +208,41 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { */ antlrcpp::Any visitLockPathQuery(MemgraphCypher::LockPathQueryContext *ctx) override; + /** + * @return LoadCsvQuery* + */ + antlrcpp::Any visitLoadCsv(MemgraphCypher::LoadCsvContext *ctx) override; + + /** + * @return FreeMemoryQuery* + */ + antlrcpp::Any visitFreeMemoryQuery(MemgraphCypher::FreeMemoryQueryContext *ctx) override; + + /** + * @return TriggerQuery* + */ + antlrcpp::Any visitTriggerQuery(MemgraphCypher::TriggerQueryContext *ctx) override; + + /** + * @return CreateTrigger* + */ + antlrcpp::Any visitCreateTrigger(MemgraphCypher::CreateTriggerContext *ctx) override; + + /** + * @return DropTrigger* + */ + antlrcpp::Any visitDropTrigger(MemgraphCypher::DropTriggerContext *ctx) override; + + /** + * @return ShowTriggers* + */ + antlrcpp::Any visitShowTriggers(MemgraphCypher::ShowTriggersContext *ctx) override; + + /** + * @return IsolationLevelQuery* + */ + antlrcpp::Any visitIsolationLevelQuery(MemgraphCypher::IsolationLevelQueryContext *ctx) override; + /** * @return CypherUnion* */ @@ -693,6 +728,8 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { Query *query() { return query_; } const static std::string kAnonPrefix; + bool IsCacheable() const { return is_cacheable_; } + private: LabelIx AddLabel(const std::string &name); PropertyIx AddProperty(const std::string &name); @@ -710,6 +747,8 @@ class CypherMainVisitor : public antlropencypher::MemgraphCypherBaseVisitor { // We use this variable in visitReturnItem to check if we are in with or // return. bool in_with_ = false; + + bool is_cacheable_ = true; }; } // namespace frontend } // namespace query diff --git a/src/query/frontend/opencypher/grammar/Cypher.g4 b/src/query/frontend/opencypher/grammar/Cypher.g4 index c26a331c2..b2276389b 100644 --- a/src/query/frontend/opencypher/grammar/Cypher.g4 +++ b/src/query/frontend/opencypher/grammar/Cypher.g4 @@ -52,7 +52,7 @@ explainQuery : EXPLAIN cypherQuery ; profileQuery : PROFILE cypherQuery ; -cypherQuery : singleQuery ( cypherUnion )* ; +cypherQuery : singleQuery ( cypherUnion )* ( queryMemoryLimit )? ; indexQuery : createIndex | dropIndex; @@ -106,14 +106,18 @@ with : WITH ( DISTINCT )? returnBody ( where )? ; cypherReturn : RETURN ( DISTINCT )? returnBody ; -callProcedure : CALL procedureName '(' ( expression ( ',' expression )* )? ')' ( callProcedureMemoryLimit )? ( yieldProcedureResults )? ; +callProcedure : CALL procedureName '(' ( expression ( ',' expression )* )? ')' ( procedureMemoryLimit )? ( yieldProcedureResults )? ; procedureName : symbolicName ( '.' symbolicName )* ; -callProcedureMemoryLimit : MEMORY ( UNLIMITED | LIMIT literal ( MB | KB ) ) ; - yieldProcedureResults : YIELD ( '*' | ( procedureResult ( ',' procedureResult )* ) ) ; +memoryLimit : MEMORY ( UNLIMITED | LIMIT literal ( MB | KB ) ) ; + +queryMemoryLimit : QUERY memoryLimit ; + +procedureMemoryLimit : PROCEDURE memoryLimit ; + procedureResult : ( variable AS variable ) | variable ; returnBody : returnItems ( order )? ( skip )? ( limit )? ; @@ -357,7 +361,9 @@ cypherKeyword : ALL | OPTIONAL | OR | ORDER + | PROCEDURE | PROFILE + | QUERY | REDUCE | REMOVE | RETURN diff --git a/src/query/frontend/opencypher/grammar/CypherLexer.g4 b/src/query/frontend/opencypher/grammar/CypherLexer.g4 index c36114c61..1377fbc82 100644 --- a/src/query/frontend/opencypher/grammar/CypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/CypherLexer.g4 @@ -118,7 +118,9 @@ ON : O N ; OPTIONAL : O P T I O N A L ; OR : O R ; ORDER : O R D E R ; +PROCEDURE : P R O C E D U R E ; PROFILE : P R O F I L E ; +QUERY : Q U E R Y ; REDUCE : R E D U C E ; REMOVE : R E M O V E ; RETURN : R E T U R N ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 index c3d3c75e7..e90157503 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypher.g4 @@ -7,24 +7,43 @@ options { tokenVocab=MemgraphCypherLexer; } import Cypher ; memgraphCypherKeyword : cypherKeyword + | AFTER | ALTER | ASYNC | AUTH + | BAD + | BEFORE | CLEAR + | CONFIG + | CSV + | COMMIT + | COMMITTED + | DATA + | DELIMITER | DATABASE | DENY | DROP | DUMP + | EXECUTE | FOR + | FREE | FROM + | GLOBAL | GRANT + | HEADER | IDENTIFIED + | ISOLATION + | LEVEL + | LOAD | LOCK | MAIN | MODE + | NEXT + | NO | PASSWORD | PORT | PRIVILEGES + | READ | REGISTER | REPLICA | REPLICAS @@ -32,11 +51,19 @@ memgraphCypherKeyword : cypherKeyword | REVOKE | ROLE | ROLES + | QUOTE + | SESSION + | SNAPSHOT | STATS | SYNC + | TRANSACTION + | TRIGGER + | TRIGGERS | TIMEOUT | TO + | UNCOMMITTED | UNLOCK + | UPDATE | USER | USERS ; @@ -56,6 +83,9 @@ query : cypherQuery | dumpQuery | replicationQuery | lockPathQuery + | freeMemoryQuery + | triggerQuery + | isolationLevelQuery ; authQuery : createRole @@ -82,6 +112,38 @@ replicationQuery : setReplicationRole | showReplicas ; +triggerQuery : createTrigger + | dropTrigger + | showTriggers + ; + +clause : cypherMatch + | unwind + | merge + | create + | set + | cypherDelete + | remove + | with + | cypherReturn + | callProcedure + | loadCsv + ; + +loadCsv : LOAD CSV FROM csvFile ( WITH | NO ) HEADER + ( IGNORE BAD ) ? + ( DELIMITER delimiter ) ? + ( QUOTE quote ) ? + AS rowVar ; + +csvFile : literal ; + +delimiter : literal ; + +quote : literal ; + +rowVar : variable ; + userOrRoleName : symbolicName ; createRole : CREATE ROLE role=userOrRoleName ; @@ -109,8 +171,24 @@ denyPrivilege : DENY ( ALL PRIVILEGES | privileges=privilegeList ) TO userOrRole revokePrivilege : REVOKE ( ALL PRIVILEGES | privileges=privilegeList ) FROM userOrRole=userOrRoleName ; -privilege : CREATE | DELETE | MATCH | MERGE | SET - | REMOVE | INDEX | STATS | AUTH | CONSTRAINT | DUMP ; +privilege : CREATE + | DELETE + | MATCH + | MERGE + | SET + | REMOVE + | INDEX + | STATS + | AUTH + | CONSTRAINT + | DUMP + | REPLICATION + | LOCK_PATH + | READ_FILE + | FREE_MEMORY + | TRIGGER + | CONFIG + ; privilegeList : privilege ( ',' privilege )* ; @@ -141,3 +219,25 @@ showReplicas : SHOW REPLICAS ; lockPathQuery : ( LOCK | UNLOCK ) DATA DIRECTORY ; +freeMemoryQuery : FREE MEMORY ; + +triggerName : symbolicName ; + +triggerStatement : .*? ; + +emptyVertex : '(' ')' ; + +emptyEdge : dash dash rightArrowHead ; + +createTrigger : CREATE TRIGGER triggerName ( ON ( emptyVertex | emptyEdge ) ? ( CREATE | UPDATE | DELETE ) ) ? + ( AFTER | BEFORE ) COMMIT EXECUTE triggerStatement ; + +dropTrigger : DROP TRIGGER triggerName ; + +showTriggers : SHOW TRIGGERS ; + +isolationLevel : SNAPSHOT ISOLATION | READ COMMITTED | READ UNCOMMITTED ; + +isolationLevelScope : GLOBAL | SESSION | NEXT ; + +isolationLevelQuery : SET isolationLevelScope TRANSACTION ISOLATION LEVEL isolationLevel ; diff --git a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 index 15923dcf2..37d8cafc1 100644 --- a/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 +++ b/src/query/frontend/opencypher/grammar/MemgraphCypherLexer.g4 @@ -10,27 +10,51 @@ lexer grammar MemgraphCypherLexer ; import CypherLexer ; +UNDERSCORE : '_' ; + +AFTER : A F T E R ; ALTER : A L T E R ; ASYNC : A S Y N C ; AUTH : A U T H ; +BAD : B A D ; +BEFORE : B E F O R E ; CLEAR : C L E A R ; +COMMIT : C O M M I T ; +COMMITTED : C O M M I T T E D ; +CONFIG : C O N F I G ; +CSV : C S V ; DATA : D A T A ; +DELIMITER : D E L I M I T E R ; DATABASE : D A T A B A S E ; DENY : D E N Y ; DIRECTORY : D I R E C T O R Y ; DROP : D R O P ; DUMP : D U M P ; +EXECUTE : E X E C U T E ; FOR : F O R ; +FREE : F R E E ; +FREE_MEMORY : F R E E UNDERSCORE M E M O R Y ; FROM : F R O M ; +GLOBAL : G L O B A L ; GRANT : G R A N T ; GRANTS : G R A N T S ; +HEADER : H E A D E R ; IDENTIFIED : I D E N T I F I E D ; +IGNORE : I G N O R E ; +ISOLATION : I S O L A T I O N ; +LEVEL : L E V E L ; +LOAD : L O A D ; LOCK : L O C K ; +LOCK_PATH : L O C K UNDERSCORE P A T H ; MAIN : M A I N ; MODE : M O D E ; +NEXT : N E X T ; +NO : N O ; PASSWORD : P A S S W O R D ; PORT : P O R T ; PRIVILEGES : P R I V I L E G E S ; +READ : R E A D ; +READ_FILE : R E A D UNDERSCORE F I L E ; REGISTER : R E G I S T E R ; REPLICA : R E P L I C A ; REPLICAS : R E P L I C A S ; @@ -38,10 +62,18 @@ REPLICATION : R E P L I C A T I O N ; REVOKE : R E V O K E ; ROLE : R O L E ; ROLES : R O L E S ; +QUOTE : Q U O T E ; +SESSION : S E S S I O N ; +SNAPSHOT : S N A P S H O T ; STATS : S T A T S ; SYNC : S Y N C ; TIMEOUT : T I M E O U T ; TO : T O ; +TRANSACTION : T R A N S A C T I O N ; +TRIGGER : T R I G G E R ; +TRIGGERS : T R I G G E R S ; +UNCOMMITTED : U N C O M M I T T E D ; UNLOCK : U N L O C K ; +UPDATE : U P D A T E ; USER : U S E R ; USERS : U S E R S ; diff --git a/src/query/frontend/opencypher/parser.hpp b/src/query/frontend/opencypher/parser.hpp index dcdedfa32..0bdaae228 100644 --- a/src/query/frontend/opencypher/parser.hpp +++ b/src/query/frontend/opencypher/parser.hpp @@ -35,7 +35,7 @@ class Parser { private: class FirstMessageErrorListener : public antlr4::BaseErrorListener { - void syntaxError(antlr4::IRecognizer *, antlr4::Token *, size_t line, size_t position, const std::string &message, + void syntaxError(antlr4::Recognizer *, antlr4::Token *, size_t line, size_t position, const std::string &message, std::exception_ptr) override { if (error_.empty()) { error_ = "line " + std::to_string(line) + ":" + std::to_string(position + 1) + " " + message; @@ -48,7 +48,7 @@ class Parser { FirstMessageErrorListener error_listener_; std::string query_; - antlr4::ANTLRInputStream input_{query_.c_str()}; + antlr4::ANTLRInputStream input_{query_}; antlropencypher::MemgraphCypherLexer lexer_{&input_}; antlr4::CommonTokenStream tokens_{&lexer_}; diff --git a/src/query/frontend/semantic/required_privileges.cpp b/src/query/frontend/semantic/required_privileges.cpp index e65b60aa0..49e78c4bf 100644 --- a/src/query/frontend/semantic/required_privileges.cpp +++ b/src/query/frontend/semantic/required_privileges.cpp @@ -1,4 +1,5 @@ #include "query/frontend/ast/ast.hpp" +#include "query/frontend/ast/ast_visitor.hpp" namespace query { @@ -50,70 +51,62 @@ class PrivilegeExtractor : public QueryVisitor, public HierarchicalTreeVis void Visit(LockPathQuery &lock_path_query) override { AddPrivilege(AuthQuery::Privilege::LOCK_PATH); } - void Visit(ReplicationQuery &replication_query) override { - switch (replication_query.action_) { - case ReplicationQuery::Action::SET_REPLICATION_ROLE: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - case ReplicationQuery::Action::SHOW_REPLICATION_ROLE: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - case ReplicationQuery::Action::REGISTER_REPLICA: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - case ReplicationQuery::Action::DROP_REPLICA: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - case ReplicationQuery::Action::SHOW_REPLICAS: - AddPrivilege(AuthQuery::Privilege::REPLICATION); - break; - } - } + void Visit(FreeMemoryQuery &free_memory_query) override { AddPrivilege(AuthQuery::Privilege::FREE_MEMORY); } - bool PreVisit(Create &) override { + void Visit(TriggerQuery &trigger_query) override { AddPrivilege(AuthQuery::Privilege::TRIGGER); } + + void Visit(ReplicationQuery &replication_query) override { AddPrivilege(AuthQuery::Privilege::REPLICATION); } + + void Visit(IsolationLevelQuery &isolation_level_query) override { AddPrivilege(AuthQuery::Privilege::CONFIG); } + + bool PreVisit(Create & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::CREATE); return false; } - bool PreVisit(CallProcedure &) override { + bool PreVisit(CallProcedure & /*unused*/) override { // TODO: Corresponding privilege return false; } - bool PreVisit(Delete &) override { + bool PreVisit(Delete & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::DELETE); return false; } - bool PreVisit(Match &) override { + bool PreVisit(Match & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::MATCH); return false; } - bool PreVisit(Merge &) override { + bool PreVisit(Merge & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::MERGE); return false; } - bool PreVisit(SetProperty &) override { + bool PreVisit(SetProperty & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::SET); return false; } - bool PreVisit(SetProperties &) override { + bool PreVisit(SetProperties & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::SET); return false; } - bool PreVisit(SetLabels &) override { + bool PreVisit(SetLabels & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::SET); return false; } - bool PreVisit(RemoveProperty &) override { + bool PreVisit(RemoveProperty & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::REMOVE); return false; } - bool PreVisit(RemoveLabels &) override { + bool PreVisit(RemoveLabels & /*unused*/) override { AddPrivilege(AuthQuery::Privilege::REMOVE); return false; } + bool PreVisit(LoadCsv & /*unused*/) override { + AddPrivilege(AuthQuery::Privilege::READ_FILE); + return false; + } - bool Visit(Identifier &) override { return true; } - bool Visit(PrimitiveLiteral &) override { return true; } - bool Visit(ParameterLookup &) override { return true; } + bool Visit(Identifier & /*unused*/) override { return true; } + bool Visit(PrimitiveLiteral & /*unused*/) override { return true; } + bool Visit(ParameterLookup & /*unused*/) override { return true; } private: void AddPrivilege(AuthQuery::Privilege privilege) { diff --git a/src/query/frontend/semantic/symbol_generator.cpp b/src/query/frontend/semantic/symbol_generator.cpp index 81455175f..96aff3646 100644 --- a/src/query/frontend/semantic/symbol_generator.cpp +++ b/src/query/frontend/semantic/symbol_generator.cpp @@ -12,8 +12,23 @@ namespace query { +namespace { +std::unordered_map GeneratePredefinedIdentifierMap( + const std::vector &predefined_identifiers) { + std::unordered_map identifier_map; + for (const auto &identifier : predefined_identifiers) { + identifier_map.emplace(identifier->name_, identifier); + } + + return identifier_map; +} +} // namespace + +SymbolGenerator::SymbolGenerator(SymbolTable *symbol_table, const std::vector &predefined_identifiers) + : symbol_table_(symbol_table), predefined_identifiers_{GeneratePredefinedIdentifierMap(predefined_identifiers)} {} + auto SymbolGenerator::CreateSymbol(const std::string &name, bool user_declared, Symbol::Type type, int token_position) { - auto symbol = symbol_table_.CreateSymbol(name, user_declared, type, token_position); + auto symbol = symbol_table_->CreateSymbol(name, user_declared, type, token_position); scope_.symbols[name] = symbol; return symbol; } @@ -162,6 +177,16 @@ bool SymbolGenerator::PostVisit(CallProcedure &call_proc) { return true; } +bool SymbolGenerator::PreVisit(LoadCsv &load_csv) { return false; } + +bool SymbolGenerator::PostVisit(LoadCsv &load_csv) { + if (HasSymbol(load_csv.row_var_->name_)) { + throw RedeclareVariableError(load_csv.row_var_->name_); + } + load_csv.row_var_->MapTo(CreateSymbol(load_csv.row_var_->name_, true)); + return true; +} + bool SymbolGenerator::PreVisit(Return &ret) { scope_.in_return = true; VisitReturnBody(ret.body_); @@ -217,7 +242,8 @@ bool SymbolGenerator::PostVisit(Match &) { // Check variables in property maps after visiting Match, so that they can // reference symbols out of bind order. for (auto &ident : scope_.identifiers_in_match) { - if (!HasSymbol(ident->name_)) throw UnboundVariableError(ident->name_); + if (!HasSymbol(ident->name_) && !ConsumePredefinedIdentifier(ident->name_)) + throw UnboundVariableError(ident->name_); ident->MapTo(scope_.symbols[ident->name_]); } scope_.identifiers_in_match.clear(); @@ -267,7 +293,7 @@ SymbolGenerator::ReturnType SymbolGenerator::Visit(Identifier &ident) { scope_.identifiers_in_match.emplace_back(&ident); } else { // Everything else references a bound symbol. - if (!HasSymbol(ident.name_)) throw UnboundVariableError(ident.name_); + if (!HasSymbol(ident.name_) && !ConsumePredefinedIdentifier(ident.name_)) throw UnboundVariableError(ident.name_); symbol = scope_.symbols[ident.name_]; } ident.MapTo(symbol); @@ -438,10 +464,10 @@ bool SymbolGenerator::PreVisit(EdgeAtom &edge_atom) { // Create inner symbols, but don't bind them in scope, since they are to // be used in the missing filter expression. auto *inner_edge = edge_atom.filter_lambda_.inner_edge; - inner_edge->MapTo(symbol_table_.CreateSymbol(inner_edge->name_, inner_edge->user_declared_, Symbol::Type::EDGE)); + inner_edge->MapTo(symbol_table_->CreateSymbol(inner_edge->name_, inner_edge->user_declared_, Symbol::Type::EDGE)); auto *inner_node = edge_atom.filter_lambda_.inner_node; inner_node->MapTo( - symbol_table_.CreateSymbol(inner_node->name_, inner_node->user_declared_, Symbol::Type::VERTEX)); + symbol_table_->CreateSymbol(inner_node->name_, inner_node->user_declared_, Symbol::Type::VERTEX)); } if (edge_atom.weight_lambda_.expression) { VisitWithIdentifiers(edge_atom.weight_lambda_.expression, @@ -496,4 +522,20 @@ void SymbolGenerator::VisitWithIdentifiers(Expression *expr, const std::vectorsecond; + MG_ASSERT(!identifier->user_declared_, "Predefined symbols cannot be user declared!"); + identifier->MapTo(CreateSymbol(identifier->name_, identifier->user_declared_)); + predefined_identifiers_.erase(it); + return true; +} + } // namespace query diff --git a/src/query/frontend/semantic/symbol_generator.hpp b/src/query/frontend/semantic/symbol_generator.hpp index 4d827d61d..0da1082ec 100644 --- a/src/query/frontend/semantic/symbol_generator.hpp +++ b/src/query/frontend/semantic/symbol_generator.hpp @@ -17,7 +17,7 @@ namespace query { /// variable types. class SymbolGenerator : public HierarchicalTreeVisitor { public: - explicit SymbolGenerator(SymbolTable &symbol_table) : symbol_table_(symbol_table) {} + explicit SymbolGenerator(SymbolTable *symbol_table, const std::vector &predefined_identifiers); using HierarchicalTreeVisitor::PostVisit; using HierarchicalTreeVisitor::PreVisit; @@ -36,6 +36,8 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool PostVisit(Create &) override; bool PreVisit(CallProcedure &) override; bool PostVisit(CallProcedure &) override; + bool PreVisit(LoadCsv &) override; + bool PostVisit(LoadCsv &) override; bool PreVisit(Return &) override; bool PostVisit(Return &) override; bool PreVisit(With &) override; @@ -114,6 +116,9 @@ class SymbolGenerator : public HierarchicalTreeVisitor { bool HasSymbol(const std::string &name); + // @return true if it added a predefined identifier with that name + bool ConsumePredefinedIdentifier(const std::string &name); + // Returns a freshly generated symbol. Previous mapping of the same name to a // different symbol is replaced with the new one. auto CreateSymbol(const std::string &name, bool user_declared, Symbol::Type type = Symbol::Type::ANY, @@ -127,15 +132,19 @@ class SymbolGenerator : public HierarchicalTreeVisitor { void VisitWithIdentifiers(Expression *, const std::vector &); - SymbolTable &symbol_table_; + SymbolTable *symbol_table_; + + // Identifiers which are injected from outside the query. Each identifier + // is mapped by its name. + std::unordered_map predefined_identifiers_; Scope scope_; std::unordered_set prev_return_names_; std::unordered_set curr_return_names_; }; -inline SymbolTable MakeSymbolTable(CypherQuery *query) { +inline SymbolTable MakeSymbolTable(CypherQuery *query, const std::vector &predefined_identifiers = {}) { SymbolTable symbol_table; - SymbolGenerator symbol_generator(symbol_table); + SymbolGenerator symbol_generator(&symbol_table, predefined_identifiers); query->single_query_->Accept(symbol_generator); for (auto *cypher_union : query->cypher_unions_) { cypher_union->Accept(symbol_generator); diff --git a/src/query/frontend/stripped.cpp b/src/query/frontend/stripped.cpp index 4f8988389..9a6c31959 100644 --- a/src/query/frontend/stripped.cpp +++ b/src/query/frontend/stripped.cpp @@ -35,6 +35,7 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { }; std::vector> tokens; + std::string unstripped_chunk; for (int i = 0; i < static_cast(original_.size());) { Token token = Token::UNMATCHED; int len = 0; @@ -58,6 +59,13 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { if (token == Token::UNMATCHED) throw LexingException("Invalid query."); tokens.emplace_back(token, original_.substr(i, len)); i += len; + + // if we notice execute, we create a trigger which has defined statements + // the statements will be parsed separately later on so we skip it for now + if (utils::IEquals(tokens.back().second, "execute")) { + unstripped_chunk = original_.substr(i); + break; + } } std::vector token_strings; @@ -79,6 +87,7 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { // named expressions in return. for (int i = 0; i < static_cast(tokens.size()); ++i) { auto &token = tokens[i]; + // We need to shift token index for every parameter since antlr's parser // thinks of parameter as two tokens. int token_index = token_strings.size() + parameters_.size(); @@ -123,6 +132,10 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { } } + if (!unstripped_chunk.empty()) { + token_strings.push_back(std::move(unstripped_chunk)); + } + query_ = utils::Join(token_strings, " "); hash_ = utils::Fnv(query_); @@ -156,6 +169,7 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { } // There is only whitespace, nothing to do... if (it == tokens.end()) break; + bool has_as = false; auto last_non_space = it; auto jt = it; @@ -168,7 +182,8 @@ StrippedQuery::StrippedQuery(const std::string &query) : original_(query) { for (; jt != tokens.end() && (jt->second != "," || num_open_braces || num_open_parantheses || num_open_brackets) && !utils::IEquals(jt->second, "order") && !utils::IEquals(jt->second, "skip") && - !utils::IEquals(jt->second, "limit") && !utils::IEquals(jt->second, "union") && jt->second != ";"; + !utils::IEquals(jt->second, "limit") && !utils::IEquals(jt->second, "union") && + !utils::IEquals(jt->second, "query") && jt->second != ";"; ++jt) { if (jt->second == "(") { ++num_open_parantheses; diff --git a/src/query/frontend/stripped_lexer_constants.hpp b/src/query/frontend/stripped_lexer_constants.hpp index 89d1503f9..5a03c9cf9 100644 --- a/src/query/frontend/stripped_lexer_constants.hpp +++ b/src/query/frontend/stripped_lexer_constants.hpp @@ -79,15 +79,19 @@ class Trie { const int kBitsetSize = 65536; const trie::Trie kKeywords = { - "union", "all", "optional", "match", "unwind", "as", "merge", "on", "create", - "set", "detach", "delete", "remove", "with", "distinct", "return", "order", "by", - "skip", "limit", "ascending", "asc", "descending", "desc", "where", "or", "xor", - "and", "not", "in", "starts", "ends", "contains", "is", "null", "case", - "when", "then", "else", "end", "count", "filter", "extract", "any", "none", - "single", "true", "false", "reduce", "coalesce", "user", "password", "alter", "drop", - "show", "stats", "unique", "explain", "profile", "storage", "index", "info", "exists", - "assert", "constraint", "node", "key", "dump", "database", "call", "yield", "memory", - "mb", "kb", "unlimited"}; + "union", "all", "optional", "match", "unwind", "as", "merge", "on", + "create", "set", "detach", "delete", "remove", "with", "distinct", "return", + "order", "by", "skip", "limit", "ascending", "asc", "descending", "desc", + "where", "or", "xor", "and", "not", "in", "starts", "ends", + "contains", "is", "null", "case", "when", "then", "else", "end", + "count", "filter", "extract", "any", "none", "single", "true", "false", + "reduce", "coalesce", "user", "password", "alter", "drop", "show", "stats", + "unique", "explain", "profile", "storage", "index", "info", "exists", "assert", + "constraint", "node", "key", "dump", "database", "call", "yield", "memory", + "mb", "kb", "unlimited", "free", "procedure", "query", "free_memory", "read_file", + "lock_path", "after", "before", "execute", "transaction", "trigger", "triggers", "update", + "comitted", "uncomitted", "global", "isolation", "level", "next", "read", "session", + "snapshot", "transaction"}; // Unicode codepoints that are allowed at the start of the unescaped name. const std::bitset kUnescapedNameAllowedStarts( diff --git a/src/query/interpret/eval.cpp b/src/query/interpret/eval.cpp new file mode 100644 index 000000000..6d32d38aa --- /dev/null +++ b/src/query/interpret/eval.cpp @@ -0,0 +1,24 @@ +#include "query/interpret/eval.hpp" + +namespace query { + +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what) { + TypedValue value = expr->Accept(*evaluator); + try { + return value.ValueInt(); + } catch (TypedValueException &e) { + throw QueryRuntimeException(what + " must be an int"); + } +} + +std::optional EvaluateMemoryLimit(ExpressionEvaluator *eval, Expression *memory_limit, size_t memory_scale) { + if (!memory_limit) return std::nullopt; + auto limit_value = memory_limit->Accept(*eval); + if (!limit_value.IsInt() || limit_value.ValueInt() <= 0) + throw QueryRuntimeException("Memory limit must be a non-negative integer."); + size_t limit = limit_value.ValueInt(); + if (std::numeric_limits::max() / memory_scale < limit) throw QueryRuntimeException("Memory limit overflow."); + return limit * memory_scale; +} + +} // namespace query diff --git a/src/query/interpret/eval.hpp b/src/query/interpret/eval.hpp index d0ffec3fc..40d0b1abb 100644 --- a/src/query/interpret/eval.hpp +++ b/src/query/interpret/eval.hpp @@ -656,13 +656,8 @@ class ExpressionEvaluator : public ExpressionVisitor { /// @param what - Name of what's getting evaluated. Used for user feedback (via /// exception) when the evaluated value is not an int. /// @throw QueryRuntimeException if expression doesn't evaluate to an int. -inline int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what) { - TypedValue value = expr->Accept(*evaluator); - try { - return value.ValueInt(); - } catch (TypedValueException &e) { - throw QueryRuntimeException(what + " must be an int"); - } -} +int64_t EvaluateInt(ExpressionEvaluator *evaluator, Expression *expr, const std::string &what); + +std::optional EvaluateMemoryLimit(ExpressionEvaluator *eval, Expression *memory_limit, size_t memory_scale); } // namespace query diff --git a/src/query/interpreter.cpp b/src/query/interpreter.cpp index 4fd28e1be..de3a341b5 100644 --- a/src/query/interpreter.cpp +++ b/src/query/interpreter.cpp @@ -9,6 +9,7 @@ #include "query/db_accessor.hpp" #include "query/dump.hpp" #include "query/exceptions.hpp" +#include "query/frontend/ast/ast.hpp" #include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/opencypher/parser.hpp" #include "query/frontend/semantic/required_privileges.hpp" @@ -17,20 +18,22 @@ #include "query/plan/planner.hpp" #include "query/plan/profile.hpp" #include "query/plan/vertex_count_cache.hpp" +#include "query/trigger.hpp" #include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" #include "utils/algorithm.hpp" +#include "utils/csv_parsing.hpp" #include "utils/event_counter.hpp" #include "utils/exceptions.hpp" #include "utils/flag_validation.hpp" +#include "utils/likely.hpp" #include "utils/logging.hpp" #include "utils/memory.hpp" +#include "utils/memory_tracker.hpp" +#include "utils/readable_size.hpp" #include "utils/string.hpp" #include "utils/tsc.hpp" -DEFINE_HIDDEN_bool(query_cost_planner, true, "Use the cost-estimating query planner."); -DEFINE_VALIDATED_int32(query_plan_cache_ttl, 60, "Time to live for cached query plans, in seconds.", - FLAG_IN_RANGE(0, std::numeric_limits::max())); - namespace EventCounter { extern Event ReadQuery; extern Event WriteQuery; @@ -58,109 +61,6 @@ void UpdateTypeCount(const plan::ReadWriteTypeChecker::RWType type) { break; } } -} // namespace - -/** - * A container for data related to the parsing of a query. - */ -struct ParsedQuery { - std::string query_string; - std::map user_parameters; - Parameters parameters; - frontend::StrippedQuery stripped_query; - AstStorage ast_storage; - Query *query; - std::vector required_privileges; -}; - -ParsedQuery ParseQuery(const std::string &query_string, const std::map ¶ms, - utils::SkipList *cache, utils::SpinLock *antlr_lock) { - // Strip the query for caching purposes. The process of stripping a query - // "normalizes" it by replacing any literals with new parameters . This - // results in just the *structure* of the query being taken into account for - // caching. - frontend::StrippedQuery stripped_query{query_string}; - - // Copy over the parameters that were introduced during stripping. - Parameters parameters{stripped_query.literals()}; - - // Check that all user-specified parameters are provided. - for (const auto ¶m_pair : stripped_query.parameters()) { - auto it = params.find(param_pair.second); - - if (it == params.end()) { - throw query::UnprovidedParameterError("Parameter ${} not provided.", param_pair.second); - } - - parameters.Add(param_pair.first, it->second); - } - - // Cache the query's AST if it isn't already. - auto hash = stripped_query.hash(); - auto accessor = cache->access(); - auto it = accessor.find(hash); - std::unique_ptr parser; - - if (it == accessor.end()) { - { - std::unique_lock guard(*antlr_lock); - - try { - parser = std::make_unique(stripped_query.query()); - } catch (const SyntaxException &e) { - // There is a syntax exception in the stripped query. Re-run the parser - // on the original query to get an appropriate error messsage. - parser = std::make_unique(query_string); - - // If an exception was not thrown here, the stripper messed something - // up. - LOG_FATAL("The stripped query can't be parsed, but the original can."); - } - } - - // Convert the ANTLR4 parse tree into an AST. - AstStorage ast_storage; - frontend::ParsingContext context{true}; - frontend::CypherMainVisitor visitor(context, &ast_storage); - - visitor.visit(parser->tree()); - - CachedQuery cached_query{std::move(ast_storage), visitor.query(), query::GetRequiredPrivileges(visitor.query())}; - - it = accessor.insert({hash, std::move(cached_query)}).first; - } - - // Return a copy of both the AST storage and the query. - AstStorage ast_storage; - ast_storage.properties_ = it->second.ast_storage.properties_; - ast_storage.labels_ = it->second.ast_storage.labels_; - ast_storage.edge_types_ = it->second.ast_storage.edge_types_; - - Query *query = it->second.query->Clone(&ast_storage); - - return ParsedQuery{query_string, params, std::move(parameters), std::move(stripped_query), - std::move(ast_storage), query, it->second.required_privileges}; -} - -class SingleNodeLogicalPlan final : public LogicalPlan { - public: - SingleNodeLogicalPlan(std::unique_ptr root, double cost, AstStorage storage, - const SymbolTable &symbol_table) - : root_(std::move(root)), cost_(cost), storage_(std::move(storage)), symbol_table_(symbol_table) {} - - const plan::LogicalOperator &GetRoot() const override { return *root_; } - double GetCost() const override { return cost_; } - const SymbolTable &GetSymbolTable() const override { return symbol_table_; } - const AstStorage &GetAstStorage() const override { return storage_; } - - private: - std::unique_ptr root_; - double cost_; - AstStorage storage_; - SymbolTable symbol_table_; -}; - -CachedPlan::CachedPlan(std::unique_ptr plan) : plan_(std::move(plan)) {} struct Callback { std::vector header; @@ -441,11 +341,13 @@ Callback HandleAuthQuery(AuthQuery *auth_query, AuthQueryHandler *auth, const Pa } } -Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler *handler, const Parameters ¶meters, - DbAccessor *db_accessor) { +Callback HandleReplicationQuery(ReplicationQuery *repl_query, const Parameters ¶meters, + InterpreterContext *interpreter_context, DbAccessor *db_accessor) { Frame frame(0); SymbolTable symbol_table; EvaluationContext evaluation_context; + // TODO: MemoryResource for EvaluationContext, it should probably be passed as + // the argument to Callback. evaluation_context.timestamp = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) .count(); @@ -460,16 +362,17 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler * if (port.IsInt()) { maybe_port = port.ValueInt(); } - callback.fn = [handler, role = repl_query->role_, maybe_port] { - handler->SetReplicationRole(role, maybe_port); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, role = repl_query->role_, + maybe_port]() mutable { + handler.SetReplicationRole(role, maybe_port); return std::vector>(); }; return callback; } case ReplicationQuery::Action::SHOW_REPLICATION_ROLE: { callback.header = {"replication mode"}; - callback.fn = [handler] { - auto mode = handler->ShowReplicationRole(); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}] { + auto mode = handler.ShowReplicationRole(); switch (mode) { case ReplicationQuery::ReplicationRole::MAIN: { return std::vector>{{TypedValue("main")}}; @@ -492,24 +395,25 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler * } else if (timeout.IsInt()) { maybe_timeout = static_cast(timeout.ValueInt()); } - callback.fn = [handler, name, socket_address, sync_mode, maybe_timeout] { - handler->RegisterReplica(name, std::string(socket_address.ValueString()), sync_mode, maybe_timeout); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, name, socket_address, sync_mode, + maybe_timeout]() mutable { + handler.RegisterReplica(name, std::string(socket_address.ValueString()), sync_mode, maybe_timeout); return std::vector>(); }; return callback; } case ReplicationQuery::Action::DROP_REPLICA: { const auto &name = repl_query->replica_name_; - callback.fn = [handler, name] { - handler->DropReplica(name); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, name]() mutable { + handler.DropReplica(name); return std::vector>(); }; return callback; } case ReplicationQuery::Action::SHOW_REPLICAS: { callback.header = {"name", "socket_address", "sync_mode", "timeout"}; - callback.fn = [handler, replica_nfields = callback.header.size()] { - const auto &replicas = handler->ShowReplicas(); + callback.fn = [handler = ReplQueryHandler{interpreter_context->db}, replica_nfields = callback.header.size()] { + const auto &replicas = handler.ShowReplicas(); auto typed_replicas = std::vector>{}; typed_replicas.reserve(replicas.size()); for (const auto &replica : replicas) { @@ -539,15 +443,9 @@ Callback HandleReplicationQuery(ReplicationQuery *repl_query, ReplQueryHandler * }; return callback; } - return callback; } } -Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_context_(interpreter_context) { - MG_ASSERT(interpreter_context_, "Interpreter context must not be NULL"); -} - -namespace { // Struct for lazy pulling from a vector struct PullPlanVector { explicit PullPlanVector(std::vector> values) : values_(std::move(values)) {} @@ -572,17 +470,19 @@ struct PullPlanVector { struct PullPlan { explicit PullPlan(std::shared_ptr plan, const Parameters ¶meters, bool is_profile_query, - DbAccessor *dba, InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory); - std::optional Pull(AnyStream *stream, std::optional n, - const std::vector &output_symbols, - std::map *summary); + DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, + TriggerContextCollector *trigger_context_collector = nullptr, + std::optional memory_limit = {}); + std::optional Pull(AnyStream *stream, std::optional n, + const std::vector &output_symbols, + std::map *summary); private: std::shared_ptr plan_ = nullptr; plan::UniqueCursorPtr cursor_ = nullptr; Frame frame_; ExecutionContext ctx_; + std::optional memory_limit_; // As it's possible to query execution using multiple pulls // we need the keep track of the total execution time across @@ -599,11 +499,12 @@ struct PullPlan { }; PullPlan::PullPlan(const std::shared_ptr plan, const Parameters ¶meters, const bool is_profile_query, - DbAccessor *dba, InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory) + DbAccessor *dba, InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory, + TriggerContextCollector *trigger_context_collector, const std::optional memory_limit) : plan_(plan), cursor_(plan->plan().MakeCursor(execution_memory)), - frame_(plan->symbol_table().max_position(), execution_memory) { + frame_(plan->symbol_table().max_position(), execution_memory), + memory_limit_(memory_limit) { ctx_.db_accessor = dba; ctx_.symbol_table = plan->symbol_table(); ctx_.evaluation_context.timestamp = @@ -612,30 +513,41 @@ PullPlan::PullPlan(const std::shared_ptr plan, const Parameters &par ctx_.evaluation_context.parameters = parameters; ctx_.evaluation_context.properties = NamesToProperties(plan->ast_storage().properties_, dba); ctx_.evaluation_context.labels = NamesToLabels(plan->ast_storage().labels_, dba); - ctx_.execution_tsc_timer = utils::TSCTimer(interpreter_context->tsc_frequency); - ctx_.max_execution_time_sec = interpreter_context->execution_timeout_sec; + if (interpreter_context->execution_timeout_sec > 0) { + ctx_.timer = utils::AsyncTimer{interpreter_context->execution_timeout_sec}; + } ctx_.is_shutting_down = &interpreter_context->is_shutting_down; ctx_.is_profile_query = is_profile_query; + ctx_.trigger_context_collector = trigger_context_collector; } -std::optional PullPlan::Pull(AnyStream *stream, std::optional n, - const std::vector &output_symbols, - std::map *summary) { +std::optional PullPlan::Pull(AnyStream *stream, std::optional n, + const std::vector &output_symbols, + std::map *summary) { // Set up temporary memory for a single Pull. Initial memory comes from the // stack. 256 KiB should fit on the stack and should be more than enough for a // single `Pull`. constexpr size_t stack_size = 256 * 1024; char stack_data[stack_size]; + utils::ResourceWithOutOfMemoryException resource_with_exception; + utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); + // We can throw on every query because a simple queries for deleting will use only + // the stack allocated buffer. + // Also, we want to throw only when the query engine requests more memory and not the storage + // so we add the exception to the allocator. + // TODO (mferencevic): Tune the parameters accordingly. + utils::PoolResource pool_memory(128, 1024, &monotonic_memory); + std::optional maybe_limited_resource; + + if (memory_limit_) { + maybe_limited_resource.emplace(&pool_memory, *memory_limit_); + ctx_.evaluation_context.memory = &*maybe_limited_resource; + } else { + ctx_.evaluation_context.memory = &pool_memory; + } // Returns true if a result was pulled. - const auto pull_result = [&]() -> bool { - utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size); - // TODO (mferencevic): Tune the parameters accordingly. - utils::PoolResource pool_memory(128, 1024, &monotonic_memory); - ctx_.evaluation_context.memory = &pool_memory; - - return cursor_->Pull(frame_, ctx_); - }; + const auto pull_result = [&]() -> bool { return cursor_->Pull(frame_, ctx_); }; const auto stream_values = [&]() { // TODO: The streamed values should also probably use the above memory. @@ -684,51 +596,21 @@ std::optional PullPlan::Pull(AnyStream *stream, std::optional< summary->insert_or_assign("plan_execution_time", execution_time_.count()); cursor_->Shutdown(); ctx_.profile_execution_time = execution_time_; - return ctx_; -} -} // namespace - -/** - * Convert a parsed *Cypher* query's AST into a logical plan. - * - * The created logical plan will take ownership of the `AstStorage` within - * `ParsedQuery` and might modify it during planning. - */ -std::unique_ptr MakeLogicalPlan(AstStorage ast_storage, CypherQuery *query, const Parameters ¶meters, - DbAccessor *db_accessor) { - auto vertex_counts = plan::MakeVertexCountCache(db_accessor); - auto symbol_table = MakeSymbolTable(query); - auto planning_context = plan::MakePlanningContext(&ast_storage, &symbol_table, query, &vertex_counts); - std::unique_ptr root; - double cost; - std::tie(root, cost) = plan::MakeLogicalPlan(&planning_context, parameters, FLAGS_query_cost_planner); - return std::make_unique(std::move(root), cost, std::move(ast_storage), - std::move(symbol_table)); -} - -/** - * Return the parsed *Cypher* query's AST cached logical plan, or create and - * cache a fresh one if it doesn't yet exist. - */ -std::shared_ptr CypherQueryToPlan(uint64_t hash, AstStorage ast_storage, CypherQuery *query, - const Parameters ¶meters, utils::SkipList *plan_cache, - DbAccessor *db_accessor) { - auto plan_cache_access = plan_cache->access(); - auto it = plan_cache_access.find(hash); - if (it != plan_cache_access.end()) { - if (it->second->IsExpired()) { - plan_cache_access.remove(hash); - } else { - return it->second; - } - } - return plan_cache_access - .insert({hash, - std::make_shared(MakeLogicalPlan(std::move(ast_storage), (query), parameters, db_accessor))}) - .first->second; + return GetStatsWithTotalTime(ctx_); } using RWType = plan::ReadWriteTypeChecker::RWType; +} // namespace + +InterpreterContext::InterpreterContext(storage::Storage *db, const std::filesystem::path &data_directory) : db(db) { + auto storage_accessor = db->Access(); + DbAccessor dba{&storage_accessor}; + trigger_store.emplace(data_directory / "triggers", &ast_cache, &dba, &antlr_lock); +} + +Interpreter::Interpreter(InterpreterContext *interpreter_context) : interpreter_context_(interpreter_context) { + MG_ASSERT(interpreter_context_, "Interpreter context must not be NULL"); +} PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) { std::function handler; @@ -741,8 +623,13 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) in_explicit_transaction_ = true; expect_rollback_ = false; - db_accessor_.emplace(interpreter_context_->db->Access()); - execution_db_accessor_.emplace(&*db_accessor_); + db_accessor_ = + std::make_unique(interpreter_context_->db->Access(GetIsolationLevelOverride())); + execution_db_accessor_.emplace(db_accessor_.get()); + + if (interpreter_context_->trigger_store->HasTriggers()) { + trigger_context_collector_.emplace(interpreter_context_->trigger_store->GetEventTypes()); + } }; } else if (query_upper == "COMMIT") { handler = [this] { @@ -789,10 +676,26 @@ PreparedQuery Interpreter::PrepareTransactionQuery(std::string_view query_upper) PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory) { - auto plan = CypherQueryToPlan(parsed_query.stripped_query.hash(), std::move(parsed_query.ast_storage), - utils::Downcast(parsed_query.query), parsed_query.parameters, - &interpreter_context->plan_cache, dba); + utils::MemoryResource *execution_memory, + TriggerContextCollector *trigger_context_collector = nullptr) { + auto *cypher_query = utils::Downcast(parsed_query.query); + + Frame frame(0); + SymbolTable symbol_table; + EvaluationContext evaluation_context; + evaluation_context.timestamp = + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) + .count(); + evaluation_context.parameters = parsed_query.parameters; + ExpressionEvaluator evaluator(&frame, symbol_table, evaluation_context, dba, storage::View::OLD); + const auto memory_limit = EvaluateMemoryLimit(&evaluator, cypher_query->memory_limit_, cypher_query->memory_scale_); + if (memory_limit) { + spdlog::info("Running query with memory limit of {}", utils::GetReadableSize(*memory_limit)); + } + + auto plan = CypherQueryToPlan(parsed_query.stripped_query.hash(), std::move(parsed_query.ast_storage), cypher_query, + parsed_query.parameters, + parsed_query.is_cacheable ? &interpreter_context->plan_cache : nullptr, dba); summary->insert_or_assign("cost_estimate", plan->cost()); auto rw_type_checker = plan::ReadWriteTypeChecker(); @@ -811,8 +714,8 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map(plan, parsed_query.parameters, false, dba, interpreter_context, execution_memory); + auto pull_plan = std::make_shared(plan, parsed_query.parameters, false, dba, interpreter_context, + execution_memory, trigger_context_collector, memory_limit); return PreparedQuery{std::move(header), std::move(parsed_query.required_privileges), [pull_plan = std::move(pull_plan), output_symbols = std::move(output_symbols), summary]( AnyStream *stream, std::optional n) -> std::optional { @@ -826,7 +729,7 @@ PreparedQuery PrepareCypherQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory) { + utils::MemoryResource *execution_memory) { const std::string kExplainQueryStart = "explain "; MG_ASSERT(utils::StartsWith(utils::ToLowerCase(parsed_query.stripped_query.query()), kExplainQueryStart), "Expected stripped query to start with '{}'", kExplainQueryStart); @@ -844,9 +747,9 @@ PreparedQuery PrepareExplainQuery(ParsedQuery parsed_query, std::map(parsed_inner_query.query); MG_ASSERT(cypher_query, "Cypher grammar should not allow other queries in EXPLAIN"); - auto cypher_query_plan = - CypherQueryToPlan(parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), - cypher_query, parsed_inner_query.parameters, &interpreter_context->plan_cache, dba); + auto cypher_query_plan = CypherQueryToPlan( + parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), cypher_query, + parsed_inner_query.parameters, parsed_inner_query.is_cacheable ? &interpreter_context->plan_cache : nullptr, dba); std::stringstream printed_plan; plan::PrettyPrint(*dba, &cypher_query_plan->plan(), &printed_plan); @@ -872,7 +775,7 @@ PreparedQuery PrepareExplainQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, - DbAccessor *dba, utils::MonotonicBufferResource *execution_memory) { + DbAccessor *dba, utils::MemoryResource *execution_memory) { const std::string kProfileQueryStart = "profile "; MG_ASSERT(utils::StartsWith(utils::ToLowerCase(parsed_query.stripped_query.query()), kProfileQueryStart), @@ -910,44 +813,53 @@ PreparedQuery PrepareProfileQuery(ParsedQuery parsed_query, bool in_explicit_tra auto *cypher_query = utils::Downcast(parsed_inner_query.query); MG_ASSERT(cypher_query, "Cypher grammar should not allow other queries in PROFILE"); + Frame frame(0); + SymbolTable symbol_table; + EvaluationContext evaluation_context; + evaluation_context.timestamp = + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) + .count(); + evaluation_context.parameters = parsed_inner_query.parameters; + ExpressionEvaluator evaluator(&frame, symbol_table, evaluation_context, dba, storage::View::OLD); + const auto memory_limit = EvaluateMemoryLimit(&evaluator, cypher_query->memory_limit_, cypher_query->memory_scale_); - auto cypher_query_plan = - CypherQueryToPlan(parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), - cypher_query, parsed_inner_query.parameters, &interpreter_context->plan_cache, dba); - + auto cypher_query_plan = CypherQueryToPlan( + parsed_inner_query.stripped_query.hash(), std::move(parsed_inner_query.ast_storage), cypher_query, + parsed_inner_query.parameters, parsed_inner_query.is_cacheable ? &interpreter_context->plan_cache : nullptr, dba); auto rw_type_checker = plan::ReadWriteTypeChecker(); rw_type_checker.InferRWType(const_cast(cypher_query_plan->plan())); - return PreparedQuery{ - {"OPERATOR", "ACTUAL HITS", "RELATIVE TIME", "ABSOLUTE TIME"}, - std::move(parsed_query.required_privileges), - [plan = std::move(cypher_query_plan), parameters = std::move(parsed_inner_query.parameters), summary, dba, - interpreter_context, execution_memory, - // We want to execute the query we are profiling lazily, so we delay - // the construction of the corresponding context. - ctx = std::optional{}, pull_plan = std::shared_ptr(nullptr)]( - AnyStream *stream, std::optional n) mutable -> std::optional { - // No output symbols are given so that nothing is streamed. - if (!ctx) { - ctx = PullPlan(plan, parameters, true, dba, interpreter_context, execution_memory) - .Pull(stream, {}, {}, summary); - pull_plan = std::make_shared(ProfilingStatsToTable(ctx->stats, ctx->profile_execution_time)); - } + return PreparedQuery{{"OPERATOR", "ACTUAL HITS", "RELATIVE TIME", "ABSOLUTE TIME"}, + std::move(parsed_query.required_privileges), + [plan = std::move(cypher_query_plan), parameters = std::move(parsed_inner_query.parameters), + summary, dba, interpreter_context, execution_memory, memory_limit, + // We want to execute the query we are profiling lazily, so we delay + // the construction of the corresponding context. + stats_and_total_time = std::optional{}, + pull_plan = std::shared_ptr(nullptr)]( + AnyStream *stream, std::optional n) mutable -> std::optional { + // No output symbols are given so that nothing is streamed. + if (!stats_and_total_time) { + stats_and_total_time = PullPlan(plan, parameters, true, dba, interpreter_context, + execution_memory, nullptr, memory_limit) + .Pull(stream, {}, {}, summary); + pull_plan = std::make_shared(ProfilingStatsToTable(*stats_and_total_time)); + } - MG_ASSERT(ctx, "Failed to execute the query!"); + MG_ASSERT(stats_and_total_time, "Failed to execute the query!"); - if (pull_plan->Pull(stream, n)) { - summary->insert_or_assign("profile", ProfilingStatsToJson(ctx->stats, ctx->profile_execution_time).dump()); - return QueryHandlerResult::ABORT; - } + if (pull_plan->Pull(stream, n)) { + summary->insert_or_assign("profile", ProfilingStatsToJson(*stats_and_total_time).dump()); + return QueryHandlerResult::ABORT; + } - return std::nullopt; - }, - rw_type_checker.type}; + return std::nullopt; + }, + rw_type_checker.type}; } PreparedQuery PrepareDumpQuery(ParsedQuery parsed_query, std::map *summary, DbAccessor *dba, - utils::MonotonicBufferResource *execution_memory) { + utils::MemoryResource *execution_memory) { return PreparedQuery{{"QUERY"}, std::move(parsed_query.required_privileges), [pull_plan = std::make_shared(dba)]( @@ -962,7 +874,7 @@ PreparedQuery PrepareDumpQuery(ParsedQuery parsed_query, std::map *summary, InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory) { + utils::MemoryResource *execution_memory) { if (in_explicit_transaction) { throw IndexInMulticommandTxException(); } @@ -1031,7 +943,7 @@ PreparedQuery PrepareIndexQuery(ParsedQuery parsed_query, bool in_explicit_trans PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, InterpreterContext *interpreter_context, - DbAccessor *dba, utils::MonotonicBufferResource *execution_memory) { + DbAccessor *dba, utils::MemoryResource *execution_memory) { if (in_explicit_transaction) { throw UserModificationInMulticommandTxException(); } @@ -1065,25 +977,30 @@ PreparedQuery PrepareAuthQuery(ParsedQuery parsed_query, bool in_explicit_transa RWType::NONE}; } -PreparedQuery PrepareReplicationQuery(ParsedQuery parsed_query, bool in_explicit_transaction, +PreparedQuery PrepareReplicationQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, InterpreterContext *interpreter_context, DbAccessor *dba) { if (in_explicit_transaction) { throw ReplicationModificationInMulticommandTxException(); } auto *replication_query = utils::Downcast(parsed_query.query); - ReplQueryHandler handler{interpreter_context->db}; - auto callback = HandleReplicationQuery(replication_query, &handler, parsed_query.parameters, dba); + auto callback = HandleReplicationQuery(replication_query, parsed_query.parameters, interpreter_context, dba); return PreparedQuery{callback.header, std::move(parsed_query.required_privileges), - [pull_plan = std::make_shared(callback.fn())]( - AnyStream *stream, std::optional n) -> std::optional { + [callback_fn = std::move(callback.fn), pull_plan = std::shared_ptr{nullptr}]( + AnyStream *stream, std::optional n) mutable -> std::optional { + if (UNLIKELY(!pull_plan)) { + pull_plan = std::make_shared(callback_fn()); + } + if (pull_plan->Pull(stream, n)) { return QueryHandlerResult::COMMIT; } return std::nullopt; }, RWType::NONE}; + // False positive report for the std::make_shared above + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) } PreparedQuery PrepareLockPathQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, @@ -1094,39 +1011,207 @@ PreparedQuery PrepareLockPathQuery(ParsedQuery parsed_query, const bool in_expli auto *lock_path_query = utils::Downcast(parsed_query.query); - Frame frame(0); - SymbolTable symbol_table; - EvaluationContext evaluation_context; - evaluation_context.timestamp = - std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) - .count(); - evaluation_context.parameters = parsed_query.parameters; - ExpressionEvaluator evaluator(&frame, symbol_table, evaluation_context, dba, storage::View::OLD); - - Callback callback; - switch (lock_path_query->action_) { - case LockPathQuery::Action::LOCK_PATH: - if (!interpreter_context->db->LockPath()) { - throw QueryRuntimeException("Failed to lock the data directory"); - } - break; - case LockPathQuery::Action::UNLOCK_PATH: - if (!interpreter_context->db->UnlockPath()) { - throw QueryRuntimeException("Failed to unlock the data directory"); - } - break; - } - - return PreparedQuery{callback.header, std::move(parsed_query.required_privileges), - [](AnyStream *stream, std::optional n) -> std::optional { + return PreparedQuery{{}, + std::move(parsed_query.required_privileges), + [interpreter_context, action = lock_path_query->action_]( + AnyStream *stream, std::optional n) -> std::optional { + switch (action) { + case LockPathQuery::Action::LOCK_PATH: + if (!interpreter_context->db->LockPath()) { + throw QueryRuntimeException("Failed to lock the data directory"); + } + break; + case LockPathQuery::Action::UNLOCK_PATH: + if (!interpreter_context->db->UnlockPath()) { + throw QueryRuntimeException("Failed to unlock the data directory"); + } + break; + } return QueryHandlerResult::COMMIT; }, RWType::NONE}; } +PreparedQuery PrepareFreeMemoryQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, + InterpreterContext *interpreter_context) { + if (in_explicit_transaction) { + throw FreeMemoryModificationInMulticommandTxException(); + } + + return PreparedQuery{ + {}, + std::move(parsed_query.required_privileges), + [interpreter_context](AnyStream *stream, std::optional n) -> std::optional { + interpreter_context->db->FreeMemory(); + return QueryHandlerResult::COMMIT; + }, + RWType::NONE}; +} + +TriggerEventType ToTriggerEventType(const TriggerQuery::EventType event_type) { + switch (event_type) { + case TriggerQuery::EventType::ANY: + return TriggerEventType::ANY; + + case TriggerQuery::EventType::CREATE: + return TriggerEventType::CREATE; + + case TriggerQuery::EventType::VERTEX_CREATE: + return TriggerEventType::VERTEX_CREATE; + + case TriggerQuery::EventType::EDGE_CREATE: + return TriggerEventType::EDGE_CREATE; + + case TriggerQuery::EventType::DELETE: + return TriggerEventType::DELETE; + + case TriggerQuery::EventType::VERTEX_DELETE: + return TriggerEventType::VERTEX_DELETE; + + case TriggerQuery::EventType::EDGE_DELETE: + return TriggerEventType::EDGE_DELETE; + + case TriggerQuery::EventType::UPDATE: + return TriggerEventType::UPDATE; + + case TriggerQuery::EventType::VERTEX_UPDATE: + return TriggerEventType::VERTEX_UPDATE; + + case TriggerQuery::EventType::EDGE_UPDATE: + return TriggerEventType::EDGE_UPDATE; + } +} + +Callback CreateTrigger(TriggerQuery *trigger_query, + const std::map &user_parameters, + InterpreterContext *interpreter_context, DbAccessor *dba) { + return { + {}, + [trigger_name = std::move(trigger_query->trigger_name_), trigger_statement = std::move(trigger_query->statement_), + event_type = trigger_query->event_type_, before_commit = trigger_query->before_commit_, interpreter_context, dba, + user_parameters]() -> std::vector> { + interpreter_context->trigger_store->AddTrigger( + trigger_name, trigger_statement, user_parameters, ToTriggerEventType(event_type), + before_commit ? TriggerPhase::BEFORE_COMMIT : TriggerPhase::AFTER_COMMIT, &interpreter_context->ast_cache, + dba, &interpreter_context->antlr_lock); + return {}; + }}; +} + +Callback DropTrigger(TriggerQuery *trigger_query, InterpreterContext *interpreter_context) { + return {{}, + [trigger_name = std::move(trigger_query->trigger_name_), + interpreter_context]() -> std::vector> { + interpreter_context->trigger_store->DropTrigger(trigger_name); + return {}; + }}; +} + +Callback ShowTriggers(InterpreterContext *interpreter_context) { + return {{"trigger name", "statement", "event type", "phase"}, [interpreter_context] { + std::vector> results; + auto trigger_infos = interpreter_context->trigger_store->GetTriggerInfo(); + results.reserve(trigger_infos.size()); + for (auto &trigger_info : trigger_infos) { + std::vector typed_trigger_info; + typed_trigger_info.reserve(4); + typed_trigger_info.emplace_back(std::move(trigger_info.name)); + typed_trigger_info.emplace_back(std::move(trigger_info.statement)); + typed_trigger_info.emplace_back(TriggerEventTypeToString(trigger_info.event_type)); + typed_trigger_info.emplace_back(trigger_info.phase == TriggerPhase::BEFORE_COMMIT ? "BEFORE COMMIT" + : "AFTER COMMIT"); + results.push_back(std::move(typed_trigger_info)); + } + + return results; + }}; +} + +PreparedQuery PrepareTriggerQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, + InterpreterContext *interpreter_context, DbAccessor *dba, + const std::map &user_parameters) { + if (in_explicit_transaction) { + throw TriggerModificationInMulticommandTxException(); + } + + auto *trigger_query = utils::Downcast(parsed_query.query); + MG_ASSERT(trigger_query); + + auto callback = [trigger_query, interpreter_context, dba, &user_parameters] { + switch (trigger_query->action_) { + case TriggerQuery::Action::CREATE_TRIGGER: + return CreateTrigger(trigger_query, user_parameters, interpreter_context, dba); + case TriggerQuery::Action::DROP_TRIGGER: + return DropTrigger(trigger_query, interpreter_context); + case TriggerQuery::Action::SHOW_TRIGGERS: + return ShowTriggers(interpreter_context); + } + }(); + + return PreparedQuery{std::move(callback.header), std::move(parsed_query.required_privileges), + [callback_fn = std::move(callback.fn), pull_plan = std::shared_ptr{nullptr}]( + AnyStream *stream, std::optional n) mutable -> std::optional { + if (UNLIKELY(!pull_plan)) { + pull_plan = std::make_shared(callback_fn()); + } + + if (pull_plan->Pull(stream, n)) { + return QueryHandlerResult::COMMIT; + } + return std::nullopt; + }, + RWType::NONE}; + // False positive report for the std::make_shared above + // NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks) +} + +constexpr auto ToStorageIsolationLevel(const IsolationLevelQuery::IsolationLevel isolation_level) noexcept { + switch (isolation_level) { + case IsolationLevelQuery::IsolationLevel::SNAPSHOT_ISOLATION: + return storage::IsolationLevel::SNAPSHOT_ISOLATION; + case IsolationLevelQuery::IsolationLevel::READ_COMMITTED: + return storage::IsolationLevel::READ_COMMITTED; + case IsolationLevelQuery::IsolationLevel::READ_UNCOMMITTED: + return storage::IsolationLevel::READ_UNCOMMITTED; + } +} + +PreparedQuery PrepareIsolationLevelQuery(ParsedQuery parsed_query, const bool in_explicit_transaction, + InterpreterContext *interpreter_context, Interpreter *interpreter) { + if (in_explicit_transaction) { + throw IsolationLevelModificationInMulticommandTxException(); + } + + auto *isolation_level_query = utils::Downcast(parsed_query.query); + MG_ASSERT(isolation_level_query); + + const auto isolation_level = ToStorageIsolationLevel(isolation_level_query->isolation_level_); + + auto callback = [isolation_level_query, isolation_level, interpreter_context, + interpreter]() -> std::function { + switch (isolation_level_query->isolation_level_scope_) { + case IsolationLevelQuery::IsolationLevelScope::GLOBAL: + return [interpreter_context, isolation_level] { interpreter_context->db->SetIsolationLevel(isolation_level); }; + case IsolationLevelQuery::IsolationLevelScope::SESSION: + return [interpreter, isolation_level] { interpreter->SetSessionIsolationLevel(isolation_level); }; + case IsolationLevelQuery::IsolationLevelScope::NEXT: + return [interpreter, isolation_level] { interpreter->SetNextTransactionIsolationLevel(isolation_level); }; + } + }(); + + return PreparedQuery{ + {}, + std::move(parsed_query.required_privileges), + [callback = std::move(callback)](AnyStream *stream, std::optional n) -> std::optional { + callback(); + return QueryHandlerResult::COMMIT; + }, + RWType::NONE}; +} + PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, InterpreterContext *interpreter_context, - storage::Storage *db, utils::MonotonicBufferResource *execution_memory) { + storage::Storage *db, utils::MemoryResource *execution_memory) { if (in_explicit_transaction) { throw InfoInMulticommandTxException(); } @@ -1145,7 +1230,10 @@ PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transa {TypedValue("edge_count"), TypedValue(static_cast(info.edge_count))}, {TypedValue("average_degree"), TypedValue(info.average_degree)}, {TypedValue("memory_usage"), TypedValue(static_cast(info.memory_usage))}, - {TypedValue("disk_usage"), TypedValue(static_cast(info.disk_usage))}}; + {TypedValue("disk_usage"), TypedValue(static_cast(info.disk_usage))}, + {TypedValue("memory_allocated"), TypedValue(static_cast(utils::total_memory_tracker.Amount()))}, + {TypedValue("allocation_limit"), + TypedValue(static_cast(utils::total_memory_tracker.HardLimit()))}}; return std::pair{results, QueryHandlerResult::COMMIT}; }; break; @@ -1211,8 +1299,7 @@ PreparedQuery PrepareInfoQuery(ParsedQuery parsed_query, bool in_explicit_transa PreparedQuery PrepareConstraintQuery(ParsedQuery parsed_query, bool in_explicit_transaction, std::map *summary, - InterpreterContext *interpreter_context, - utils::MonotonicBufferResource *execution_memory) { + InterpreterContext *interpreter_context, utils::MemoryResource *execution_memory) { if (in_explicit_transaction) { throw ConstraintInMulticommandTxException(); } @@ -1377,10 +1464,12 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, in_explicit_transaction_ ? static_cast(query_executions_.size() - 1) : std::optional{}; // Handle transaction control queries. - auto query_upper = utils::Trim(utils::ToUpperCase(query_string)); - if (query_upper == "BEGIN" || query_upper == "COMMIT" || query_upper == "ROLLBACK") { - query_execution->prepared_query.emplace(PrepareTransactionQuery(query_upper)); + const auto upper_case_query = utils::ToUpperCase(query_string); + const auto trimmed_query = utils::Trim(upper_case_query); + + if (trimmed_query == "BEGIN" || trimmed_query == "COMMIT" || trimmed_query == "ROLLBACK") { + query_execution->prepared_query.emplace(PrepareTransactionQuery(trimmed_query)); return {query_execution->prepared_query->header, query_execution->prepared_query->privileges, qid}; } @@ -1408,9 +1497,15 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, // Some queries require an active transaction in order to be prepared. if (!in_explicit_transaction_ && (utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || - utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query))) { - db_accessor_.emplace(interpreter_context_->db->Access()); - execution_db_accessor_.emplace(&*db_accessor_); + utils::Downcast(parsed_query.query) || utils::Downcast(parsed_query.query) || + utils::Downcast(parsed_query.query))) { + db_accessor_ = + std::make_unique(interpreter_context_->db->Access(GetIsolationLevelOverride())); + execution_db_accessor_.emplace(db_accessor_.get()); + + if (utils::Downcast(parsed_query.query) && interpreter_context_->trigger_store->HasTriggers()) { + trigger_context_collector_.emplace(interpreter_context_->trigger_store->GetEventTypes()); + } } utils::Timer planning_timer; @@ -1418,7 +1513,8 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareCypherQuery(std::move(parsed_query), &query_execution->summary, interpreter_context_, - &*execution_db_accessor_, &query_execution->execution_memory); + &*execution_db_accessor_, &query_execution->execution_memory, + trigger_context_collector_ ? &*trigger_context_collector_ : nullptr); } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareExplainQuery(std::move(parsed_query), &query_execution->summary, interpreter_context_, &*execution_db_accessor_, &query_execution->execution_memory); @@ -1450,6 +1546,14 @@ Interpreter::PrepareResult Interpreter::Prepare(const std::string &query_string, } else if (utils::Downcast(parsed_query.query)) { prepared_query = PrepareLockPathQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_, &*execution_db_accessor_); + } else if (utils::Downcast(parsed_query.query)) { + prepared_query = PrepareFreeMemoryQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_); + } else if (utils::Downcast(parsed_query.query)) { + prepared_query = PrepareTriggerQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_, + &*execution_db_accessor_, params); + } else if (utils::Downcast(parsed_query.query)) { + prepared_query = + PrepareIsolationLevelQuery(std::move(parsed_query), in_explicit_transaction_, interpreter_context_, this); } else { LOG_FATAL("Should not get here -- unknown query type!"); } @@ -1482,10 +1586,59 @@ void Interpreter::Abort() { in_explicit_transaction_ = false; if (!db_accessor_) return; db_accessor_->Abort(); - execution_db_accessor_ = std::nullopt; - db_accessor_ = std::nullopt; + execution_db_accessor_.reset(); + db_accessor_.reset(); + trigger_context_collector_.reset(); } +namespace { +void RunTriggersIndividually(const utils::SkipList &triggers, InterpreterContext *interpreter_context, + TriggerContext trigger_context) { + // Run the triggers + for (const auto &trigger : triggers.access()) { + utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + + // create a new transaction for each trigger + auto storage_acc = interpreter_context->db->Access(); + DbAccessor db_accessor{&storage_acc}; + + trigger_context.AdaptForAccessor(&db_accessor); + try { + trigger.Execute(&db_accessor, &execution_memory, interpreter_context->execution_timeout_sec, + &interpreter_context->is_shutting_down, trigger_context); + } catch (const utils::BasicException &exception) { + spdlog::warn("Trigger '{}' failed with exception:\n{}", trigger.Name(), exception.what()); + db_accessor.Abort(); + continue; + } + + auto maybe_constraint_violation = db_accessor.Commit(); + if (maybe_constraint_violation.HasError()) { + const auto &constraint_violation = maybe_constraint_violation.GetError(); + switch (constraint_violation.type) { + case storage::ConstraintViolation::Type::EXISTENCE: { + const auto &label_name = db_accessor.LabelToName(constraint_violation.label); + MG_ASSERT(constraint_violation.properties.size() == 1U); + const auto &property_name = db_accessor.PropertyToName(*constraint_violation.properties.begin()); + spdlog::warn("Trigger '{}' failed to commit due to existence constraint violation on :{}({})", trigger.Name(), + label_name, property_name); + break; + } + case storage::ConstraintViolation::Type::UNIQUE: { + const auto &label_name = db_accessor.LabelToName(constraint_violation.label); + std::stringstream property_names_stream; + utils::PrintIterable(property_names_stream, constraint_violation.properties, ", ", + [&](auto &stream, const auto &prop) { stream << db_accessor.PropertyToName(prop); }); + spdlog::warn("Trigger '{}' failed to commit due to unique constraint violation on :{}({})", trigger.Name(), + label_name, property_names_stream.str()); + break; + } + } + } + } +} +} // namespace + void Interpreter::Commit() { // It's possible that some queries did not finish because the user did // not pull all of the results from the query. @@ -1493,6 +1646,35 @@ void Interpreter::Commit() { // We should document clearly that all results should be pulled to complete // a query. if (!db_accessor_) return; + + std::optional trigger_context = std::nullopt; + if (trigger_context_collector_) { + trigger_context.emplace(std::move(*trigger_context_collector_).TransformToTriggerContext()); + trigger_context_collector_.reset(); + } + + if (trigger_context) { + // Run the triggers + for (const auto &trigger : interpreter_context_->trigger_store->BeforeCommitTriggers().access()) { + utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + AdvanceCommand(); + try { + trigger.Execute(&*execution_db_accessor_, &execution_memory, interpreter_context_->execution_timeout_sec, + &interpreter_context_->is_shutting_down, *trigger_context); + } catch (const utils::BasicException &e) { + throw utils::BasicException( + fmt::format("Trigger '{}' caused the transaction to fail.\nException: {}", trigger.Name(), e.what())); + } + } + SPDLOG_DEBUG("Finished executing before commit triggers"); + } + + const auto reset_necessary_members = [this]() { + execution_db_accessor_.reset(); + db_accessor_.reset(); + trigger_context_collector_.reset(); + }; + auto maybe_constraint_violation = db_accessor_->Commit(); if (maybe_constraint_violation.HasError()) { const auto &constraint_violation = maybe_constraint_violation.GetError(); @@ -1501,8 +1683,7 @@ void Interpreter::Commit() { auto label_name = execution_db_accessor_->LabelToName(constraint_violation.label); MG_ASSERT(constraint_violation.properties.size() == 1U); auto property_name = execution_db_accessor_->PropertyToName(*constraint_violation.properties.begin()); - execution_db_accessor_ = std::nullopt; - db_accessor_ = std::nullopt; + reset_necessary_members(); throw QueryException("Unable to commit due to existence constraint violation on :{}({})", label_name, property_name); break; @@ -1513,16 +1694,33 @@ void Interpreter::Commit() { utils::PrintIterable( property_names_stream, constraint_violation.properties, ", ", [this](auto &stream, const auto &prop) { stream << execution_db_accessor_->PropertyToName(prop); }); - execution_db_accessor_ = std::nullopt; - db_accessor_ = std::nullopt; + reset_necessary_members(); throw QueryException("Unable to commit due to unique constraint violation on :{}({})", label_name, property_names_stream.str()); break; } } } - execution_db_accessor_ = std::nullopt; - db_accessor_ = std::nullopt; + + // The ordered execution of after commit triggers is heavily depending on the exclusiveness of db_accessor_->Commit(): + // only one of the transactions can be commiting at the same time, so when the commit is finished, that transaction + // probably will schedule its after commit triggers, because the other transactions that want to commit are still + // waiting for commiting or one of them just started commiting its changes. + // This means the ordered execution of after commit triggers are not guaranteed. + if (trigger_context && interpreter_context_->trigger_store->AfterCommitTriggers().size() > 0) { + interpreter_context_->after_commit_trigger_pool.AddTask( + [trigger_context = std::move(*trigger_context), interpreter_context = this->interpreter_context_, + user_transaction = std::shared_ptr(std::move(db_accessor_))]() mutable { + RunTriggersIndividually(interpreter_context->trigger_store->AfterCommitTriggers(), interpreter_context, + std::move(trigger_context)); + user_transaction->FinalizeTransaction(); + SPDLOG_DEBUG("Finished executing after commit triggers"); // NOLINT(bugprone-lambda-function-name) + }); + } + + reset_necessary_members(); + + SPDLOG_DEBUG("Finished comitting the transaction"); } void Interpreter::AdvanceCommand() { @@ -1541,4 +1739,22 @@ void Interpreter::AbortCommand(std::unique_ptr *query_execution) } } +std::optional Interpreter::GetIsolationLevelOverride() { + if (next_transaction_isolation_level) { + const auto isolation_level = *next_transaction_isolation_level; + next_transaction_isolation_level.reset(); + return isolation_level; + } + + return interpreter_isolation_level; +} + +void Interpreter::SetNextTransactionIsolationLevel(const storage::IsolationLevel isolation_level) { + next_transaction_isolation_level.emplace(isolation_level); +} + +void Interpreter::SetSessionIsolationLevel(const storage::IsolationLevel isolation_level) { + interpreter_isolation_level.emplace(isolation_level); +} + } // namespace query diff --git a/src/query/interpreter.hpp b/src/query/interpreter.hpp index 3a8ff61be..d68d9cee6 100644 --- a/src/query/interpreter.hpp +++ b/src/query/interpreter.hpp @@ -3,6 +3,7 @@ #include #include "query/context.hpp" +#include "query/cypher_query_interpreter.hpp" #include "query/db_accessor.hpp" #include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" @@ -12,18 +13,18 @@ #include "query/plan/operator.hpp" #include "query/plan/read_write_type_checker.hpp" #include "query/stream.hpp" +#include "query/trigger.hpp" #include "query/typed_value.hpp" +#include "storage/v2/isolation_level.hpp" #include "utils/event_counter.hpp" #include "utils/logging.hpp" #include "utils/memory.hpp" #include "utils/skip_list.hpp" #include "utils/spin_lock.hpp" +#include "utils/thread_pool.hpp" #include "utils/timer.hpp" #include "utils/tsc.hpp" -DECLARE_bool(query_cost_planner); -DECLARE_int32(query_plan_cache_ttl); - namespace EventCounter { extern const Event FailedQuery; } // namespace EventCounter @@ -99,11 +100,11 @@ class ReplicationQueryHandler { ReplicationQueryHandler() = default; virtual ~ReplicationQueryHandler() = default; - ReplicationQueryHandler(const ReplicationQueryHandler &) = delete; - ReplicationQueryHandler &operator=(const ReplicationQueryHandler &) = delete; + ReplicationQueryHandler(const ReplicationQueryHandler &) = default; + ReplicationQueryHandler &operator=(const ReplicationQueryHandler &) = default; - ReplicationQueryHandler(ReplicationQueryHandler &&) = delete; - ReplicationQueryHandler &operator=(ReplicationQueryHandler &&) = delete; + ReplicationQueryHandler(ReplicationQueryHandler &&) = default; + ReplicationQueryHandler &operator=(ReplicationQueryHandler &&) = default; struct Replica { std::string name; @@ -139,64 +140,6 @@ struct PreparedQuery { plan::ReadWriteTypeChecker::RWType rw_type; }; -// TODO: Maybe this should move to query/plan/planner. -/// Interface for accessing the root operator of a logical plan. -class LogicalPlan { - public: - virtual ~LogicalPlan() {} - - virtual const plan::LogicalOperator &GetRoot() const = 0; - virtual double GetCost() const = 0; - virtual const SymbolTable &GetSymbolTable() const = 0; - virtual const AstStorage &GetAstStorage() const = 0; -}; - -class CachedPlan { - public: - explicit CachedPlan(std::unique_ptr plan); - - const auto &plan() const { return plan_->GetRoot(); } - double cost() const { return plan_->GetCost(); } - const auto &symbol_table() const { return plan_->GetSymbolTable(); } - const auto &ast_storage() const { return plan_->GetAstStorage(); } - - bool IsExpired() const { return cache_timer_.Elapsed() > std::chrono::seconds(FLAGS_query_plan_cache_ttl); }; - - private: - std::unique_ptr plan_; - utils::Timer cache_timer_; -}; - -struct CachedQuery { - AstStorage ast_storage; - Query *query; - std::vector required_privileges; -}; - -struct QueryCacheEntry { - bool operator==(const QueryCacheEntry &other) const { return first == other.first; } - bool operator<(const QueryCacheEntry &other) const { return first < other.first; } - bool operator==(const uint64_t &other) const { return first == other; } - bool operator<(const uint64_t &other) const { return first < other; } - - uint64_t first; - // TODO: Maybe store the query string here and use it as a key with the hash - // so that we eliminate the risk of hash collisions. - CachedQuery second; -}; - -struct PlanCacheEntry { - bool operator==(const PlanCacheEntry &other) const { return first == other.first; } - bool operator<(const PlanCacheEntry &other) const { return first < other.first; } - bool operator==(const uint64_t &other) const { return first == other; } - bool operator<(const uint64_t &other) const { return first < other; } - - uint64_t first; - // TODO: Maybe store the query string here and use it as a key with the hash - // so that we eliminate the risk of hash collisions. - std::shared_ptr second; -}; - /** * Holds data shared between multiple `Interpreter` instances (which might be * running concurrently). @@ -205,7 +148,7 @@ struct PlanCacheEntry { * been passed to an `Interpreter` instance. */ struct InterpreterContext { - explicit InterpreterContext(storage::Storage *db) : db(db) {} + explicit InterpreterContext(storage::Storage *db, const std::filesystem::path &data_directory); storage::Storage *db; @@ -225,6 +168,9 @@ struct InterpreterContext { utils::SkipList ast_cache; utils::SkipList plan_cache; + + std::optional trigger_store; + utils::ThreadPool after_commit_trigger_pool{1}; }; /// Function that is used to tell all active interpreters that they should stop @@ -309,6 +255,9 @@ class Interpreter final { void RollbackTransaction(); + void SetNextTransactionIsolationLevel(storage::IsolationLevel isolation_level); + void SetSessionIsolationLevel(storage::IsolationLevel isolation_level); + /** * Abort the current multicommand transaction. */ @@ -317,7 +266,9 @@ class Interpreter final { private: struct QueryExecution { std::optional prepared_query; - utils::MonotonicBufferResource execution_memory{kExecutionMemoryBlockSize}; + utils::MonotonicBufferResource execution_monotonic_memory{kExecutionMemoryBlockSize}; + utils::ResourceWithOutOfMemoryException execution_memory{&execution_monotonic_memory}; + std::map summary; explicit QueryExecution() = default; @@ -331,7 +282,7 @@ class Interpreter final { // destroy the prepared query which is using that instance // of execution memory. prepared_query.reset(); - execution_memory.Release(); + execution_monotonic_memory.Release(); } }; @@ -350,15 +301,23 @@ class Interpreter final { InterpreterContext *interpreter_context_; - std::optional db_accessor_; + // This cannot be std::optional because we need to move this accessor later on into a lambda capture + // which is assigned to std::function. std::function requires every object to be copyable, so we + // move this unique_ptr into a shrared_ptr. + std::unique_ptr db_accessor_; std::optional execution_db_accessor_; + std::optional trigger_context_collector_; bool in_explicit_transaction_{false}; bool expect_rollback_{false}; + std::optional interpreter_isolation_level; + std::optional next_transaction_isolation_level; + PreparedQuery PrepareTransactionQuery(std::string_view query_upper); void Commit(); void AdvanceCommand(); void AbortCommand(std::unique_ptr *query_execution); + std::optional GetIsolationLevelOverride(); size_t ActiveQueryExecutions() { return std::count_if(query_executions_.begin(), query_executions_.end(), diff --git a/src/query/plan/operator.cpp b/src/query/plan/operator.cpp index 6b3ad4b47..d29e980b8 100644 --- a/src/query/plan/operator.cpp +++ b/src/query/plan/operator.cpp @@ -15,6 +15,7 @@ #include #include "query/context.hpp" +#include "query/db_accessor.hpp" #include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol_table.hpp" @@ -23,14 +24,18 @@ #include "query/plan/scoped_profile.hpp" #include "query/procedure/mg_procedure_impl.hpp" #include "query/procedure/module.hpp" +#include "storage/v2/property_value.hpp" #include "utils/algorithm.hpp" +#include "utils/csv_parsing.hpp" #include "utils/event_counter.hpp" #include "utils/exceptions.hpp" #include "utils/fnv.hpp" +#include "utils/likely.hpp" #include "utils/logging.hpp" #include "utils/pmr/unordered_map.hpp" #include "utils/pmr/unordered_set.hpp" #include "utils/pmr/vector.hpp" +#include "utils/readable_size.hpp" #include "utils/string.hpp" // macro for the default implementation of LogicalOperator::Accept @@ -203,7 +208,10 @@ bool CreateNode::CreateNodeCursor::Pull(Frame &frame, ExecutionContext &context) SCOPED_PROFILE_OP("CreateNode"); if (input_cursor_->Pull(frame, context)) { - CreateLocalVertex(self_.node_info_, &frame, context); + auto created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); + if (context.trigger_context_collector) { + context.trigger_context_collector->RegisterCreatedObject(created_vertex); + } return true; } @@ -242,8 +250,8 @@ CreateExpand::CreateExpandCursor::CreateExpandCursor(const CreateExpand &self, u namespace { -void CreateEdge(const EdgeCreationInfo &edge_info, DbAccessor *dba, VertexAccessor *from, VertexAccessor *to, - Frame *frame, ExpressionEvaluator *evaluator) { +EdgeAccessor CreateEdge(const EdgeCreationInfo &edge_info, DbAccessor *dba, VertexAccessor *from, VertexAccessor *to, + Frame *frame, ExpressionEvaluator *evaluator) { auto maybe_edge = dba->InsertEdge(from, to, edge_info.edge_type); if (maybe_edge.HasValue()) { auto &edge = *maybe_edge; @@ -261,6 +269,8 @@ void CreateEdge(const EdgeCreationInfo &edge_info, DbAccessor *dba, VertexAccess throw QueryRuntimeException("Unexpected error when creating an edge."); } } + + return *maybe_edge; } } // namespace @@ -286,19 +296,23 @@ bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, ExecutionContext &cont // create an edge between the two nodes auto *dba = context.db_accessor; - switch (self_.edge_info_.direction) { - case EdgeAtom::Direction::IN: - CreateEdge(self_.edge_info_, dba, &v2, &v1, &frame, &evaluator); - break; - case EdgeAtom::Direction::OUT: - CreateEdge(self_.edge_info_, dba, &v1, &v2, &frame, &evaluator); - break; - case EdgeAtom::Direction::BOTH: + + auto created_edge = [&] { + switch (self_.edge_info_.direction) { + case EdgeAtom::Direction::IN: + return CreateEdge(self_.edge_info_, dba, &v2, &v1, &frame, &evaluator); + case EdgeAtom::Direction::OUT: // in the case of an undirected CreateExpand we choose an arbitrary // direction. this is used in the MERGE clause // it is not allowed in the CREATE clause, and the semantic // checker needs to ensure it doesn't reach this point - CreateEdge(self_.edge_info_, dba, &v1, &v2, &frame, &evaluator); + case EdgeAtom::Direction::BOTH: + return CreateEdge(self_.edge_info_, dba, &v1, &v2, &frame, &evaluator); + } + }(); + + if (context.trigger_context_collector) { + context.trigger_context_collector->RegisterCreatedObject(created_edge); } return true; @@ -314,18 +328,26 @@ VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(Frame &frame, Exec ExpectType(self_.node_info_.symbol, dest_node_value, TypedValue::Type::Vertex); return dest_node_value.ValueVertex(); } else { - return CreateLocalVertex(self_.node_info_, &frame, context); + auto &created_vertex = CreateLocalVertex(self_.node_info_, &frame, context); + if (context.trigger_context_collector) { + context.trigger_context_collector->RegisterCreatedObject(created_vertex); + } + return created_vertex; } } template class ScanAllCursor : public Cursor { public: - explicit ScanAllCursor(Symbol output_symbol, UniqueCursorPtr input_cursor, TVerticesFun get_vertices) - : output_symbol_(output_symbol), input_cursor_(std::move(input_cursor)), get_vertices_(std::move(get_vertices)) {} + explicit ScanAllCursor(Symbol output_symbol, UniqueCursorPtr input_cursor, TVerticesFun get_vertices, + const char *op_name) + : output_symbol_(output_symbol), + input_cursor_(std::move(input_cursor)), + get_vertices_(std::move(get_vertices)), + op_name_(op_name) {} bool Pull(Frame &frame, ExecutionContext &context) override { - SCOPED_PROFILE_OP("ScanAll"); + SCOPED_PROFILE_OP(op_name_); if (MustAbort(context)) throw HintedAbortError(); @@ -361,6 +383,7 @@ class ScanAllCursor : public Cursor { TVerticesFun get_vertices_; std::optional::type::value_type> vertices_; std::optional vertices_it_; + const char *op_name_; }; ScanAll::ScanAll(const std::shared_ptr &input, Symbol output_symbol, storage::View view) @@ -376,7 +399,7 @@ UniqueCursorPtr ScanAll::MakeCursor(utils::MemoryResource *mem) const { return std::make_optional(db->Vertices(view_)); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAll"); } std::vector ScanAll::ModifiedSymbols(const SymbolTable &table) const { @@ -399,7 +422,7 @@ UniqueCursorPtr ScanAllByLabel::MakeCursor(utils::MemoryResource *mem) const { return std::make_optional(db->Vertices(view_, label_)); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllByLabel"); } // TODO(buda): Implement ScanAllByLabelProperty operator to iterate over @@ -463,7 +486,7 @@ UniqueCursorPtr ScanAllByLabelPropertyRange::MakeCursor(utils::MemoryResource *m return std::make_optional(db->Vertices(view_, label_, property_, maybe_lower, maybe_upper)); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllByLabelPropertyRange"); } ScanAllByLabelPropertyValue::ScanAllByLabelPropertyValue(const std::shared_ptr &input, @@ -495,7 +518,7 @@ UniqueCursorPtr ScanAllByLabelPropertyValue::MakeCursor(utils::MemoryResource *m return std::make_optional(db->Vertices(view_, label_, property_, storage::PropertyValue(value))); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllByLabelPropertyValue"); } ScanAllByLabelProperty::ScanAllByLabelProperty(const std::shared_ptr &input, Symbol output_symbol, @@ -513,7 +536,7 @@ UniqueCursorPtr ScanAllByLabelProperty::MakeCursor(utils::MemoryResource *mem) c return std::make_optional(db->Vertices(view_, label_, property_)); }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllByLabelProperty"); } ScanAllById::ScanAllById(const std::shared_ptr &input, Symbol output_symbol, Expression *expression, @@ -539,7 +562,7 @@ UniqueCursorPtr ScanAllById::MakeCursor(utils::MemoryResource *mem) const { return std::vector{*maybe_vertex}; }; return MakeUniqueCursorPtr>(mem, output_symbol_, input_->MakeCursor(mem), - std::move(vertices)); + std::move(vertices), "ScanAllById"); } namespace { @@ -1794,8 +1817,7 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { if (!input_cursor_->Pull(frame, context)) return false; // Delete should get the latest information, this way it is also possible - // to - // delete newly added nodes and edges. + // to delete newly added nodes and edges. ExpressionEvaluator evaluator(&frame, context.symbol_table, context.evaluation_context, context.db_accessor, storage::View::NEW); auto *pull_memory = context.evaluation_context.memory; @@ -1813,9 +1835,9 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { for (TypedValue &expression_result : expression_results) { if (MustAbort(context)) throw HintedAbortError(); if (expression_result.type() == TypedValue::Type::Edge) { - auto maybe_error = dba.RemoveEdge(&expression_result.ValueEdge()); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_value = dba.RemoveEdge(&expression_result.ValueEdge()); + if (maybe_value.HasError()) { + switch (maybe_value.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -1825,6 +1847,10 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when deleting an edge."); } } + + if (context.trigger_context_collector && maybe_value.GetValue()) { + context.trigger_context_collector->RegisterDeletedObject(*maybe_value.GetValue()); + } } } @@ -1835,9 +1861,9 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { case TypedValue::Type::Vertex: { auto &va = expression_result.ValueVertex(); if (self_.detach_) { - auto maybe_error = dba.DetachRemoveVertex(&va); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto res = dba.DetachRemoveVertex(&va); + if (res.HasError()) { + switch (res.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -1847,6 +1873,13 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when deleting a node."); } } + if (context.trigger_context_collector && + context.trigger_context_collector->ShouldRegisterDeletedObject() && res.GetValue()) { + context.trigger_context_collector->RegisterDeletedObject(res.GetValue()->first); + for (const auto &deleted_edge : res.GetValue()->second) { + context.trigger_context_collector->RegisterDeletedObject(deleted_edge); + } + } } else { auto res = dba.RemoveVertex(&va); if (res.HasError()) { @@ -1861,6 +1894,10 @@ bool Delete::DeleteCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when deleting a node."); } } + + if (context.trigger_context_collector && res.GetValue()) { + context.trigger_context_collector->RegisterDeletedObject(*res.GetValue()); + } } break; } @@ -1914,12 +1951,26 @@ bool SetProperty::SetPropertyCursor::Pull(Frame &frame, ExecutionContext &contex TypedValue rhs = self_.rhs_->Accept(evaluator); switch (lhs.type()) { - case TypedValue::Type::Vertex: - PropsSetChecked(&lhs.ValueVertex(), self_.property_, rhs); + case TypedValue::Type::Vertex: { + auto old_value = PropsSetChecked(&lhs.ValueVertex(), self_.property_, rhs); + + if (context.trigger_context_collector) { + // rhs cannot be moved because it was created with the allocator that is only valid during current pull + context.trigger_context_collector->RegisterSetObjectProperty(lhs.ValueVertex(), self_.property_, + TypedValue{std::move(old_value)}, TypedValue{rhs}); + } break; - case TypedValue::Type::Edge: - PropsSetChecked(&lhs.ValueEdge(), self_.property_, rhs); + } + case TypedValue::Type::Edge: { + auto old_value = PropsSetChecked(&lhs.ValueEdge(), self_.property_, rhs); + + if (context.trigger_context_collector) { + // rhs cannot be moved because it was created with the allocator that is only valid during current pull + context.trigger_context_collector->RegisterSetObjectProperty(lhs.ValueEdge(), self_.property_, + TypedValue{std::move(old_value)}, TypedValue{rhs}); + } break; + } case TypedValue::Type::Null: // Skip setting properties on Null (can occur in optional match). break; @@ -1959,16 +2010,29 @@ SetProperties::SetPropertiesCursor::SetPropertiesCursor(const SetProperties &sel namespace { +template +concept AccessorWithProperties = requires(T value, storage::PropertyId property_id, + storage::PropertyValue property_value) { + { value.ClearProperties() } + ->std::same_as>>; + {value.SetProperty(property_id, property_value)}; +}; + /// Helper function that sets the given values on either a Vertex or an Edge. /// /// @tparam TRecordAccessor Either RecordAccessor or /// RecordAccessor -template -void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const TypedValue &rhs, SetProperties::Op op) { +template +void SetPropertiesOnRecord(TRecordAccessor *record, const TypedValue &rhs, SetProperties::Op op, + ExecutionContext *context) { + std::optional> old_values; + const bool should_register_change = + context->trigger_context_collector && + context->trigger_context_collector->ShouldRegisterObjectPropertyChange(); if (op == SetProperties::Op::REPLACE) { - auto maybe_error = record->ClearProperties(); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_value = record->ClearProperties(); + if (maybe_value.HasError()) { + switch (maybe_value.GetError()) { case storage::Error::DELETED_OBJECT: throw QueryRuntimeException("Trying to set properties on a deleted graph element."); case storage::Error::SERIALIZATION_ERROR: @@ -1980,6 +2044,10 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed throw QueryRuntimeException("Unexpected error when setting properties."); } } + + if (should_register_change) { + old_values.emplace(std::move(*maybe_value)); + } } auto get_props = [](const auto &record) { @@ -1999,8 +2067,25 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed return *maybe_props; }; - auto set_props = [record](const auto &properties) { - for (const auto &kv : properties) { + auto register_set_property = [&](auto &&returned_old_value, auto key, auto &&new_value) { + auto old_value = [&]() -> storage::PropertyValue { + if (!old_values) { + return std::forward(returned_old_value); + } + + if (auto it = old_values->find(key); it != old_values->end()) { + return std::move(it->second); + } + + return {}; + }(); + + context->trigger_context_collector->RegisterSetObjectProperty( + *record, key, TypedValue(std::move(old_value)), TypedValue(std::forward(new_value))); + }; + + auto set_props = [&, record](auto properties) { + for (auto &kv : properties) { auto maybe_error = record->SetProperty(kv.first, kv.second); if (maybe_error.HasError()) { switch (maybe_error.GetError()) { @@ -2015,6 +2100,10 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed throw QueryRuntimeException("Unexpected error when setting properties."); } } + + if (should_register_change) { + register_set_property(std::move(*maybe_error), kv.first, std::move(kv.second)); + } } }; @@ -2026,7 +2115,13 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed set_props(get_props(rhs.ValueVertex())); break; case TypedValue::Type::Map: { - for (const auto &kv : rhs.ValueMap()) PropsSetChecked(record, dba->NameToProperty(kv.first), kv.second); + for (const auto &kv : rhs.ValueMap()) { + auto key = context->db_accessor->NameToProperty(kv.first); + auto old_value = PropsSetChecked(record, key, kv.second); + if (should_register_change) { + register_set_property(std::move(old_value), key, kv.second); + } + } break; } default: @@ -2034,6 +2129,14 @@ void SetPropertiesOnRecord(DbAccessor *dba, TRecordAccessor *record, const Typed "Right-hand side in SET expression must be a node, an edge or a " "map."); } + + if (should_register_change && old_values) { + // register removed properties + for (auto &[property_id, property_value] : *old_values) { + context->trigger_context_collector->RegisterRemovedObjectProperty(*record, property_id, + TypedValue(std::move(property_value))); + } + } } } // namespace @@ -2052,10 +2155,10 @@ bool SetProperties::SetPropertiesCursor::Pull(Frame &frame, ExecutionContext &co switch (lhs.type()) { case TypedValue::Type::Vertex: - SetPropertiesOnRecord(context.db_accessor, &lhs.ValueVertex(), rhs, self_.op_); + SetPropertiesOnRecord(&lhs.ValueVertex(), rhs, self_.op_, &context); break; case TypedValue::Type::Edge: - SetPropertiesOnRecord(context.db_accessor, &lhs.ValueEdge(), rhs, self_.op_); + SetPropertiesOnRecord(&lhs.ValueEdge(), rhs, self_.op_, &context); break; case TypedValue::Type::Null: // Skip setting properties on Null (can occur in optional match). @@ -2100,9 +2203,9 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex); auto &vertex = vertex_value.ValueVertex(); for (auto label : self_.labels_) { - auto maybe_error = vertex.AddLabel(label); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_value = vertex.AddLabel(label); + if (maybe_value.HasError()) { + switch (maybe_value.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -2113,6 +2216,10 @@ bool SetLabels::SetLabelsCursor::Pull(Frame &frame, ExecutionContext &context) { throw QueryRuntimeException("Unexpected error when setting a label."); } } + + if (context.trigger_context_collector && *maybe_value) { + context.trigger_context_collector->RegisterSetVertexLabel(vertex, label); + } } return true; @@ -2151,10 +2258,10 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext & storage::View::NEW); TypedValue lhs = self_.lhs_->expression_->Accept(evaluator); - auto remove_prop = [property = self_.property_](auto *record) { - auto maybe_error = record->RemoveProperty(property); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto remove_prop = [property = self_.property_, &context](auto *record) { + auto maybe_old_value = record->RemoveProperty(property); + if (maybe_old_value.HasError()) { + switch (maybe_old_value.GetError()) { case storage::Error::DELETED_OBJECT: throw QueryRuntimeException("Trying to remove a property on a deleted graph element."); case storage::Error::SERIALIZATION_ERROR: @@ -2168,6 +2275,11 @@ bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame, ExecutionContext & throw QueryRuntimeException("Unexpected error when removing property."); } } + + if (context.trigger_context_collector) { + context.trigger_context_collector->RegisterRemovedObjectProperty(*record, property, + TypedValue(std::move(*maybe_old_value))); + } }; switch (lhs.type()) { @@ -2220,9 +2332,9 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex); auto &vertex = vertex_value.ValueVertex(); for (auto label : self_.labels_) { - auto maybe_error = vertex.RemoveLabel(label); - if (maybe_error.HasError()) { - switch (maybe_error.GetError()) { + auto maybe_value = vertex.RemoveLabel(label); + if (maybe_value.HasError()) { + switch (maybe_value.GetError()) { case storage::Error::SERIALIZATION_ERROR: throw QueryRuntimeException("Can't serialize due to concurrent operations."); case storage::Error::DELETED_OBJECT: @@ -2233,6 +2345,10 @@ bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, ExecutionContext &cont throw QueryRuntimeException("Unexpected error when removing labels from a node."); } } + + if (context.trigger_context_collector && *maybe_value) { + context.trigger_context_collector->RegisterRemovedVertexLabel(vertex, label); + } } return true; @@ -3485,16 +3601,6 @@ std::unordered_map CallProcedure::GetAndResetCounters() { namespace { -std::optional EvalMemoryLimit(ExpressionEvaluator *eval, Expression *memory_limit, size_t memory_scale) { - if (!memory_limit) return std::nullopt; - auto limit_value = memory_limit->Accept(*eval); - if (!limit_value.IsInt() || limit_value.ValueInt() <= 0) - throw QueryRuntimeException("Memory limit must be a non-negative integer."); - size_t limit = limit_value.ValueInt(); - if (std::numeric_limits::max() / memory_scale < limit) throw QueryRuntimeException("Memory limit overflow."); - return limit * memory_scale; -} - void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name, const mgp_proc &proc, const std::vector &args, const mgp_graph &graph, ExpressionEvaluator *evaluator, utils::MemoryResource *memory, std::optional memory_limit, mgp_result *result) { @@ -3544,7 +3650,8 @@ void CallCustomProcedure(const std::string_view &fully_qualified_procedure_name, proc_args.elems.emplace_back(std::get<2>(proc.opt_args[i]), &graph); } if (memory_limit) { - SPDLOG_INFO("Running '{}' with memory limit of {} bytes", fully_qualified_procedure_name, *memory_limit); + SPDLOG_INFO("Running '{}' with memory limit of {}", fully_qualified_procedure_name, + utils::GetReadableSize(*memory_limit)); utils::LimitedMemoryResource limited_mem(memory, *memory_limit); mgp_memory proc_memory{&limited_mem}; MG_ASSERT(result->signature == &proc.results); @@ -3623,7 +3730,7 @@ class CallProcedureCursor : public Cursor { // TODO: This will probably need to be changed when we add support for // generator like procedures which yield a new result on each invocation. auto *memory = context.evaluation_context.memory; - auto memory_limit = EvalMemoryLimit(&evaluator, self_->memory_limit_, self_->memory_scale_); + auto memory_limit = EvaluateMemoryLimit(&evaluator, self_->memory_limit_, self_->memory_scale_); mgp_graph graph{context.db_accessor, graph_view, &context}; CallCustomProcedure(self_->procedure_name_, *proc, self_->arguments_, graph, &evaluator, memory, memory_limit, &result_); @@ -3679,4 +3786,142 @@ UniqueCursorPtr CallProcedure::MakeCursor(utils::MemoryResource *mem) const { return MakeUniqueCursorPtr(mem, this, mem); } +LoadCsv::LoadCsv(std::shared_ptr input, Expression *file, bool with_header, bool ignore_bad, + Expression *delimiter, Expression *quote, Symbol row_var) + : input_(input ? input : (std::make_shared())), + file_(file), + with_header_(with_header), + ignore_bad_(ignore_bad), + delimiter_(delimiter), + quote_(quote), + row_var_(row_var) { + MG_ASSERT(file_, "Something went wrong - '{}' member file_ shouldn't be a nullptr", __func__); +} + +bool LoadCsv::Accept(HierarchicalLogicalOperatorVisitor &visitor) { return false; }; + +class LoadCsvCursor; + +std::vector LoadCsv::OutputSymbols(const SymbolTable &sym_table) const { return {row_var_}; }; + +std::vector LoadCsv::ModifiedSymbols(const SymbolTable &sym_table) const { + auto symbols = input_->ModifiedSymbols(sym_table); + symbols.push_back(row_var_); + return symbols; +}; + +namespace { +// copy-pasted from interpreter.cpp +TypedValue EvaluateOptionalExpression(Expression *expression, ExpressionEvaluator *eval) { + return expression ? expression->Accept(*eval) : TypedValue(); +} + +auto ToOptionalString(ExpressionEvaluator *evaluator, Expression *expression) -> std::optional { + const auto evaluated_expr = EvaluateOptionalExpression(expression, evaluator); + if (evaluated_expr.IsString()) { + return utils::pmr::string(evaluated_expr.ValueString(), utils::NewDeleteResource()); + } + return std::nullopt; +}; + +TypedValue CsvRowToTypedList(csv::Reader::Row row) { + auto *mem = row.get_allocator().GetMemoryResource(); + auto typed_columns = utils::pmr::vector(mem); + typed_columns.reserve(row.size()); + for (auto &column : row) { + typed_columns.emplace_back(std::move(column)); + } + return TypedValue(typed_columns, mem); +} + +TypedValue CsvRowToTypedMap(csv::Reader::Row row, csv::Reader::Header header) { + // a valid row has the same number of elements as the header + auto *mem = row.get_allocator().GetMemoryResource(); + utils::pmr::map m(mem); + for (auto i = 0; i < row.size(); ++i) { + m.emplace(std::move(header[i]), std::move(row[i])); + } + return TypedValue(m, mem); +} + +} // namespace + +class LoadCsvCursor : public Cursor { + const LoadCsv *self_; + const UniqueCursorPtr input_cursor_; + bool input_is_once_; + std::optional reader_{}; + + public: + LoadCsvCursor(const LoadCsv *self, utils::MemoryResource *mem) + : self_(self), input_cursor_(self_->input_->MakeCursor(mem)) { + input_is_once_ = dynamic_cast(self_->input_.get()); + } + + bool Pull(Frame &frame, ExecutionContext &context) override { + SCOPED_PROFILE_OP("LoadCsv"); + + if (MustAbort(context)) throw HintedAbortError(); + + // ToDo(the-joksim): + // - this is an ungodly hack because the pipeline of creating a plan + // doesn't allow evaluating the expressions contained in self_->file_, + // self_->delimiter_, and self_->quote_ earlier (say, in the interpreter.cpp) + // without massacring the code even worse than I did here + if (UNLIKELY(!reader_)) { + reader_ = MakeReader(&context.evaluation_context); + } + + bool input_pulled = input_cursor_->Pull(frame, context); + + // If the input is Once, we have to keep going until we read all the rows, + // regardless of whether the pull on Once returned false. + // If we have e.g. MATCH(n) LOAD CSV ... AS x SET n.name = x.name, then we + // have to read at most cardinality(n) rows (but we can read less and stop + // pulling MATCH). + if (!input_is_once_ && !input_pulled) return false; + + if (auto row = reader_->GetNextRow(context.evaluation_context.memory)) { + if (!reader_->HasHeader()) { + frame[self_->row_var_] = CsvRowToTypedList(std::move(*row)); + } else { + frame[self_->row_var_] = CsvRowToTypedMap( + std::move(*row), csv::Reader::Header(reader_->GetHeader(), context.evaluation_context.memory)); + } + return true; + } + + return false; + } + + void Reset() override { input_cursor_->Reset(); } + void Shutdown() override { input_cursor_->Shutdown(); } + + private: + csv::Reader MakeReader(EvaluationContext *eval_context) { + Frame frame(0); + SymbolTable symbol_table; + DbAccessor *dba = nullptr; + auto evaluator = ExpressionEvaluator(&frame, symbol_table, *eval_context, dba, storage::View::OLD); + + auto maybe_file = ToOptionalString(&evaluator, self_->file_); + auto maybe_delim = ToOptionalString(&evaluator, self_->delimiter_); + auto maybe_quote = ToOptionalString(&evaluator, self_->quote_); + + // No need to check if maybe_file is std::nullopt, as the parser makes sure + // we can't get a nullptr for the 'file_' member in the LoadCsv clause. + // Note that the reader has to be given its own memory resource, as it + // persists between pulls, so it can't use the evalutation context memory + // resource. + return csv::Reader( + *maybe_file, + csv::Reader::Config(self_->with_header_, self_->ignore_bad_, std::move(maybe_delim), std::move(maybe_quote)), + utils::NewDeleteResource()); + } +}; + +UniqueCursorPtr LoadCsv::MakeCursor(utils::MemoryResource *mem) const { + return MakeUniqueCursorPtr(mem, this, mem); +}; + } // namespace query::plan diff --git a/src/query/plan/operator.lcp b/src/query/plan/operator.lcp index 929cf4a97..139890356 100644 --- a/src/query/plan/operator.lcp +++ b/src/query/plan/operator.lcp @@ -117,6 +117,7 @@ class Distinct; class Union; class Cartesian; class CallProcedure; +class LoadCsv; using LogicalOperatorCompositeVisitor = ::utils::CompositeVisitor< Once, CreateNode, CreateExpand, ScanAll, ScanAllByLabel, @@ -125,7 +126,7 @@ using LogicalOperatorCompositeVisitor = ::utils::CompositeVisitor< Expand, ExpandVariable, ConstructNamedPath, Filter, Produce, Delete, SetProperty, SetProperties, SetLabels, RemoveProperty, RemoveLabels, EdgeUniquenessFilter, Accumulate, Aggregate, Skip, Limit, OrderBy, Merge, - Optional, Unwind, Distinct, Union, Cartesian, CallProcedure>; + Optional, Unwind, Distinct, Union, Cartesian, CallProcedure, LoadCsv>; using LogicalOperatorLeafVisitor = ::utils::LeafVisitor; @@ -2156,5 +2157,38 @@ at once. Instead, each call of the callback should return a single row of the ta (:serialize (:slk)) (:clone)) +(lcp:define-class load-csv (logical-operator) + ((input "std::shared_ptr" :scope :public + :slk-save #'slk-save-operator-pointer + :slk-load #'slk-load-operator-pointer) + (file "Expression *" :scope :public) + (with_header "bool" :scope :public) + (ignore_bad "bool" :scope :public) + (delimiter "Expression *" :initval "nullptr" :scope :public + :slk-save #'slk-save-ast-pointer + :slk-load (slk-load-ast-pointer "Expression")) + (quote "Expression *" :initval "nullptr" :scope :public + :slk-save #'slk-save-ast-pointer + :slk-load (slk-load-ast-pointer "Expression")) + (row_var "Symbol" :scope :public)) + (:public + #>cpp + LoadCsv() = default; + LoadCsv(std::shared_ptr input, Expression *file, bool with_header, bool ignore_bad, + Expression* delimiter, Expression* quote, Symbol row_var); + bool Accept(HierarchicalLogicalOperatorVisitor &visitor) override; + UniqueCursorPtr MakeCursor(utils::MemoryResource *) const override; + std::vector OutputSymbols(const SymbolTable &) const override; + std::vector ModifiedSymbols(const SymbolTable &) const override; + + bool HasSingleInput() const override { return true; } + std::shared_ptr input() const override { return input_; } + void set_input(std::shared_ptr input) override { + input_ = input; + } + cpp<#) + (:serialize (:slk)) + (:clone)) + (lcp:pop-namespace) ;; plan (lcp:pop-namespace) ;; query diff --git a/src/query/plan/preprocess.cpp b/src/query/plan/preprocess.cpp index 7b8e08975..b89071709 100644 --- a/src/query/plan/preprocess.cpp +++ b/src/query/plan/preprocess.cpp @@ -522,7 +522,8 @@ std::vector CollectSingleQueryParts(SymbolTable &symbol_table, query_part->merge_matching.emplace_back(Matching{}); AddMatching({merge->pattern_}, nullptr, symbol_table, storage, query_part->merge_matching.back()); } else if (utils::IsSubtype(*clause, With::kType) || utils::IsSubtype(*clause, query::Unwind::kType) || - utils::IsSubtype(*clause, query::CallProcedure::kType)) { + utils::IsSubtype(*clause, query::CallProcedure::kType) || + utils::IsSubtype(*clause, query::LoadCsv::kType)) { // This query part is done, continue with a new one. query_parts.emplace_back(SingleQueryPart{}); query_part = &query_parts.back(); diff --git a/src/query/plan/pretty_print.cpp b/src/query/plan/pretty_print.cpp index b440664dc..cd3fe0069 100644 --- a/src/query/plan/pretty_print.cpp +++ b/src/query/plan/pretty_print.cpp @@ -206,6 +206,11 @@ bool PlanPrinter::PreVisit(query::plan::CallProcedure &op) { return true; } +bool PlanPrinter::PreVisit(query::plan::LoadCsv &op) { + WithPrintLn([&op](auto &out) { out << "* LoadCsv {" << op.row_var_.name() << "}"; }); + return true; +} + bool PlanPrinter::Visit(query::plan::Once &op) { WithPrintLn([](auto &out) { out << "* Once"; }); return true; @@ -803,6 +808,23 @@ bool PlanToJsonVisitor::PreVisit(query::plan::CallProcedure &op) { return false; } +bool PlanToJsonVisitor::PreVisit(query::plan::LoadCsv &op) { + json self; + self["name"] = "LoadCsv"; + self["file"] = ToJson(op.file_); + self["with_header"] = op.with_header_; + self["ignore_bad"] = op.ignore_bad_; + self["delimiter"] = ToJson(op.delimiter_); + self["quote"] = ToJson(op.quote_); + self["row_variable"] = ToJson(op.row_var_); + + op.input_->Accept(*this); + self["input"] = PopOutput(); + + output_ = std::move(self); + return false; +} + bool PlanToJsonVisitor::PreVisit(Distinct &op) { json self; self["name"] = "Distinct"; diff --git a/src/query/plan/pretty_print.hpp b/src/query/plan/pretty_print.hpp index 2cb102840..08d5b9c33 100644 --- a/src/query/plan/pretty_print.hpp +++ b/src/query/plan/pretty_print.hpp @@ -81,6 +81,7 @@ class PlanPrinter : public virtual HierarchicalLogicalOperatorVisitor { bool PreVisit(Unwind &) override; bool PreVisit(CallProcedure &) override; + bool PreVisit(LoadCsv &) override; bool Visit(Once &) override; @@ -194,6 +195,7 @@ class PlanToJsonVisitor : public virtual HierarchicalLogicalOperatorVisitor { bool PreVisit(Unwind &) override; bool PreVisit(CallProcedure &) override; + bool PreVisit(LoadCsv &) override; bool Visit(Once &) override; diff --git a/src/query/plan/profile.cpp b/src/query/plan/profile.cpp index f8afeb03c..bafba17d1 100644 --- a/src/query/plan/profile.cpp +++ b/src/query/plan/profile.cpp @@ -98,10 +98,9 @@ class ProfilingStatsToTableHelper { } // namespace -std::vector> ProfilingStatsToTable(const ProfilingStats &cumulative_stats, - std::chrono::duration total_time) { - ProfilingStatsToTableHelper helper{cumulative_stats.num_cycles, total_time}; - helper.Output(cumulative_stats); +std::vector> ProfilingStatsToTable(const ProfilingStatsWithTotalTime &stats) { + ProfilingStatsToTableHelper helper{stats.cumulative_stats.num_cycles, stats.total_time}; + helper.Output(stats.cumulative_stats); return helper.rows(); } @@ -147,9 +146,9 @@ class ProfilingStatsToJsonHelper { } // namespace -nlohmann::json ProfilingStatsToJson(const ProfilingStats &cumulative_stats, std::chrono::duration total_time) { - ProfilingStatsToJsonHelper helper{cumulative_stats.num_cycles, total_time}; - helper.Output(cumulative_stats); +nlohmann::json ProfilingStatsToJson(const ProfilingStatsWithTotalTime &stats) { + ProfilingStatsToJsonHelper helper{stats.cumulative_stats.num_cycles, stats.total_time}; + helper.Output(stats.cumulative_stats); return helper.ToJson(); } diff --git a/src/query/plan/profile.hpp b/src/query/plan/profile.hpp index bea2536a5..ed88f29e9 100644 --- a/src/query/plan/profile.hpp +++ b/src/query/plan/profile.hpp @@ -23,10 +23,14 @@ struct ProfilingStats { std::vector children; }; -std::vector> ProfilingStatsToTable(const ProfilingStats &cumulative_stats, - std::chrono::duration); +struct ProfilingStatsWithTotalTime { + ProfilingStats cumulative_stats{}; + std::chrono::duration total_time{}; +}; -nlohmann::json ProfilingStatsToJson(const ProfilingStats &cumulative_stats, std::chrono::duration); +std::vector> ProfilingStatsToTable(const ProfilingStatsWithTotalTime &stats); + +nlohmann::json ProfilingStatsToJson(const ProfilingStatsWithTotalTime &stats); } // namespace plan } // namespace query diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp index 2c9489afa..ac548aa6e 100644 --- a/src/query/plan/rule_based_planner.hpp +++ b/src/query/plan/rule_based_planner.hpp @@ -203,6 +203,13 @@ class RuleBasedPlanner { input_op = std::make_unique( std::move(input_op), call_proc->procedure_name_, call_proc->arguments_, call_proc->result_fields_, result_symbols, call_proc->memory_limit_, call_proc->memory_scale_); + } else if (auto *load_csv = utils::Downcast(clause)) { + const auto &row_sym = context.symbol_table->at(*load_csv->row_var_); + context.bound_symbols.insert(row_sym); + + input_op = + std::make_unique(std::move(input_op), load_csv->file_, load_csv->with_header_, + load_csv->ignore_bad_, load_csv->delimiter_, load_csv->quote_, row_sym); } else { throw utils::NotYetImplemented("clause '{}' conversion to operator(s)", clause->GetTypeInfo().name); } diff --git a/src/query/procedure/mg_procedure_impl.cpp b/src/query/procedure/mg_procedure_impl.cpp index 98e7d66aa..e65f9437a 100644 --- a/src/query/procedure/mg_procedure_impl.cpp +++ b/src/query/procedure/mg_procedure_impl.cpp @@ -6,21 +6,20 @@ #include #include +#include "module.hpp" #include "utils/algorithm.hpp" #include "utils/logging.hpp" #include "utils/math.hpp" +#include "utils/memory.hpp" #include "utils/string.hpp" - // This file contains implementation of top level C API functions, but this is // all actually part of query::procedure. So use that namespace for simplicity. // NOLINTNEXTLINE(google-build-using-namespace) using namespace query::procedure; -void *mgp_alloc(mgp_memory *memory, size_t size_in_bytes) { - return mgp_aligned_alloc(memory, size_in_bytes, alignof(std::max_align_t)); -} +namespace { -void *mgp_aligned_alloc(mgp_memory *memory, const size_t size_in_bytes, const size_t alignment) { +void *MgpAlignedAllocImpl(utils::MemoryResource &memory, const size_t size_in_bytes, const size_t alignment) { if (size_in_bytes == 0U || !utils::IsPow2(alignment)) return nullptr; // Simplify alignment by always using values greater or equal to max_align. const size_t alloc_align = std::max(alignment, alignof(std::max_align_t)); @@ -37,7 +36,7 @@ void *mgp_aligned_alloc(mgp_memory *memory, const size_t size_in_bytes, const si const size_t alloc_size = bytes_for_header + size_in_bytes; if (alloc_size < size_in_bytes) return nullptr; try { - void *ptr = memory->impl->Allocate(alloc_size, alloc_align); + void *ptr = memory.Allocate(alloc_size, alloc_align); char *data = reinterpret_cast(ptr) + bytes_for_header; std::memcpy(data - sizeof(size_in_bytes), &size_in_bytes, sizeof(size_in_bytes)); std::memcpy(data - sizeof(size_in_bytes) - sizeof(alloc_align), &alloc_align, sizeof(alloc_align)); @@ -47,7 +46,7 @@ void *mgp_aligned_alloc(mgp_memory *memory, const size_t size_in_bytes, const si } } -void mgp_free(mgp_memory *memory, void *const p) { +void MgpFreeImpl(utils::MemoryResource &memory, void *const p) { if (!p) return; char *const data = reinterpret_cast(p); // Read the header containing size & alignment info. @@ -63,9 +62,31 @@ void mgp_free(mgp_memory *memory, void *const p) { const size_t alloc_size = bytes_for_header + size_in_bytes; // Get the original ptr we allocated. void *const original_ptr = data - bytes_for_header; - memory->impl->Deallocate(original_ptr, alloc_size, alloc_align); + memory.Deallocate(original_ptr, alloc_size, alloc_align); } +} // namespace + +void *mgp_alloc(mgp_memory *memory, size_t size_in_bytes) { + return mgp_aligned_alloc(memory, size_in_bytes, alignof(std::max_align_t)); +} + +void *mgp_aligned_alloc(mgp_memory *memory, const size_t size_in_bytes, const size_t alignment) { + return MgpAlignedAllocImpl(*memory->impl, size_in_bytes, alignment); +} + +void mgp_free(mgp_memory *memory, void *const p) { MgpFreeImpl(*memory->impl, p); } + +void *mgp_global_alloc(size_t size_in_bytes) { + return mgp_global_aligned_alloc(size_in_bytes, alignof(std::max_align_t)); +} + +void *mgp_global_aligned_alloc(size_t size_in_bytes, size_t alignment) { + return MgpAlignedAllocImpl(gModuleRegistry.GetSharedMemoryResource(), size_in_bytes, alignment); +} + +void mgp_global_free(void *const p) { MgpFreeImpl(gModuleRegistry.GetSharedMemoryResource(), p); } + namespace { // May throw whatever the constructor of U throws. `std::bad_alloc` is handled diff --git a/src/query/procedure/module.cpp b/src/query/procedure/module.cpp index 99bc54dc9..a0c9a4103 100644 --- a/src/query/procedure/module.cpp +++ b/src/query/procedure/module.cpp @@ -1,4 +1,5 @@ #include "query/procedure/module.hpp" +#include "utils/memory.hpp" extern "C" { #include @@ -478,6 +479,8 @@ void ModuleRegistry::UnloadAllModules() { DoUnloadAllModules(); } +utils::MemoryResource &ModuleRegistry::GetSharedMemoryResource() { return *shared_; } + std::optional> FindProcedure( const ModuleRegistry &module_registry, const std::string_view &fully_qualified_procedure_name, utils::MemoryResource *memory) { diff --git a/src/query/procedure/module.hpp b/src/query/procedure/module.hpp index cdae588c8..0a96c7d83 100644 --- a/src/query/procedure/module.hpp +++ b/src/query/procedure/module.hpp @@ -52,6 +52,7 @@ class ModulePtr final { class ModuleRegistry final { std::map, std::less<>> modules_; mutable utils::RWLock lock_{utils::RWLock::Priority::WRITE}; + std::unique_ptr shared_{std::make_unique()}; bool RegisterModule(const std::string_view &name, std::unique_ptr module); @@ -96,6 +97,9 @@ class ModuleRegistry final { /// Takes a write lock. void UnloadAllModules(); + /// Returns the shared memory allocator used by modules + utils::MemoryResource &GetSharedMemoryResource(); + private: std::vector modules_dirs_; }; diff --git a/src/query/procedure/py_module.cpp b/src/query/procedure/py_module.cpp index 43fbef989..96ef5f387 100644 --- a/src/query/procedure/py_module.cpp +++ b/src/query/procedure/py_module.cpp @@ -505,7 +505,7 @@ std::optional AddMultipleRecordsFromPython(mgp_result *result return std::nullopt; } -void CallPythonProcedure(py::Object py_cb, const mgp_list *args, const mgp_graph *graph, mgp_result *result, +void CallPythonProcedure(const py::Object &py_cb, const mgp_list *args, const mgp_graph *graph, mgp_result *result, mgp_memory *memory) { auto gil = py::EnsureGIL(); diff --git a/src/query/serialization/property_value.cpp b/src/query/serialization/property_value.cpp new file mode 100644 index 000000000..262ff3cf0 --- /dev/null +++ b/src/query/serialization/property_value.cpp @@ -0,0 +1,94 @@ +#include "query/serialization/property_value.hpp" +#include "storage/v2/property_value.hpp" +#include "utils/logging.hpp" + +namespace query::serialization { + +nlohmann::json SerializePropertyValue(const storage::PropertyValue &property_value) { + using Type = storage::PropertyValue::Type; + switch (property_value.type()) { + case Type::Null: + return {}; + case Type::Bool: + return property_value.ValueBool(); + case Type::Int: + return property_value.ValueInt(); + case Type::Double: + return property_value.ValueDouble(); + case Type::String: + return property_value.ValueString(); + case Type::List: + return SerializePropertyValueVector(property_value.ValueList()); + case Type::Map: + return SerializePropertyValueMap(property_value.ValueMap()); + } +} + +nlohmann::json SerializePropertyValueVector(const std::vector &values) { + nlohmann::json array = nlohmann::json::array(); + for (const auto &value : values) { + array.push_back(SerializePropertyValue(value)); + } + return array; +} + +nlohmann::json SerializePropertyValueMap(const std::map ¶meters) { + nlohmann::json data = nlohmann::json::object(); + + for (const auto &[key, value] : parameters) { + data[key] = SerializePropertyValue(value); + } + + return data; +}; + +storage::PropertyValue DeserializePropertyValue(const nlohmann::json &data) { + if (data.is_null()) { + return storage::PropertyValue(); + } + + if (data.is_boolean()) { + return storage::PropertyValue(data.get()); + } + + if (data.is_number_integer()) { + return storage::PropertyValue(data.get()); + } + + if (data.is_number_float()) { + return storage::PropertyValue(data.get()); + } + + if (data.is_string()) { + return storage::PropertyValue(data.get()); + } + + if (data.is_array()) { + return storage::PropertyValue(DeserializePropertyValueList(data)); + } + + MG_ASSERT(data.is_object(), "Unknown type found in the trigger storage"); + return storage::PropertyValue(DeserializePropertyValueMap(data)); +} + +std::vector DeserializePropertyValueList(const nlohmann::json::array_t &data) { + std::vector property_values; + property_values.reserve(data.size()); + for (const auto &value : data) { + property_values.emplace_back(DeserializePropertyValue(value)); + } + + return property_values; +} + +std::map DeserializePropertyValueMap(const nlohmann::json::object_t &data) { + std::map property_values; + + for (const auto &[key, value] : data) { + property_values.emplace(key, DeserializePropertyValue(value)); + } + + return property_values; +} + +} // namespace query::serialization diff --git a/src/query/serialization/property_value.hpp b/src/query/serialization/property_value.hpp new file mode 100644 index 000000000..2ea5892c2 --- /dev/null +++ b/src/query/serialization/property_value.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include + +#include "storage/v2/property_value.hpp" + +namespace query::serialization { + +nlohmann::json SerializePropertyValue(const storage::PropertyValue &property_value); + +nlohmann::json SerializePropertyValueVector(const std::vector &values); + +nlohmann::json SerializePropertyValueMap(const std::map ¶meters); + +storage::PropertyValue DeserializePropertyValue(const nlohmann::json &data); + +std::vector DeserializePropertyValueList(const nlohmann::json::array_t &data); + +std::map DeserializePropertyValueMap(const nlohmann::json::object_t &data); + +} // namespace query::serialization diff --git a/src/query/trigger.cpp b/src/query/trigger.cpp new file mode 100644 index 000000000..89fb90681 --- /dev/null +++ b/src/query/trigger.cpp @@ -0,0 +1,398 @@ +#include "query/trigger.hpp" + +#include + +#include "query/context.hpp" +#include "query/cypher_query_interpreter.hpp" +#include "query/db_accessor.hpp" +#include "query/frontend/ast/ast.hpp" +#include "query/interpret/frame.hpp" +#include "query/serialization/property_value.hpp" +#include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" +#include "utils/memory.hpp" + +namespace query { +namespace { +auto IdentifierString(const TriggerIdentifierTag tag) noexcept { + switch (tag) { + case TriggerIdentifierTag::CREATED_VERTICES: + return "createdVertices"; + + case TriggerIdentifierTag::CREATED_EDGES: + return "createdEdges"; + + case TriggerIdentifierTag::CREATED_OBJECTS: + return "createdObjects"; + + case TriggerIdentifierTag::DELETED_VERTICES: + return "deletedVertices"; + + case TriggerIdentifierTag::DELETED_EDGES: + return "deletedEdges"; + + case TriggerIdentifierTag::DELETED_OBJECTS: + return "deletedObjects"; + + case TriggerIdentifierTag::SET_VERTEX_PROPERTIES: + return "setVertexProperties"; + + case TriggerIdentifierTag::SET_EDGE_PROPERTIES: + return "setEdgeProperties"; + + case TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES: + return "removedVertexProperties"; + + case TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES: + return "removedEdgeProperties"; + + case TriggerIdentifierTag::SET_VERTEX_LABELS: + return "setVertexLabels"; + + case TriggerIdentifierTag::REMOVED_VERTEX_LABELS: + return "removedVertexLabels"; + + case TriggerIdentifierTag::UPDATED_VERTICES: + return "updatedVertices"; + + case TriggerIdentifierTag::UPDATED_EDGES: + return "updatedEdges"; + + case TriggerIdentifierTag::UPDATED_OBJECTS: + return "updatedObjects"; + } +} + +template +concept SameAsIdentifierTag = std::same_as; + +template +std::vector> TagsToIdentifiers(const TArgs &...args) { + std::vector> identifiers; + identifiers.reserve(sizeof...(args)); + + auto add_identifier = [&identifiers](const auto tag) { + identifiers.emplace_back(Identifier{IdentifierString(tag), false}, tag); + }; + + (add_identifier(args), ...); + + return identifiers; +}; + +std::vector> GetPredefinedIdentifiers(const TriggerEventType event_type) { + using IdentifierTag = TriggerIdentifierTag; + using EventType = TriggerEventType; + + switch (event_type) { + case EventType::ANY: + return TagsToIdentifiers( + IdentifierTag::CREATED_VERTICES, IdentifierTag::CREATED_EDGES, IdentifierTag::CREATED_OBJECTS, + IdentifierTag::DELETED_VERTICES, IdentifierTag::DELETED_EDGES, IdentifierTag::DELETED_OBJECTS, + IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::REMOVED_VERTEX_PROPERTIES, + IdentifierTag::SET_VERTEX_LABELS, IdentifierTag::REMOVED_VERTEX_LABELS, IdentifierTag::UPDATED_VERTICES, + IdentifierTag::SET_EDGE_PROPERTIES, IdentifierTag::REMOVED_EDGE_PROPERTIES, IdentifierTag::UPDATED_EDGES, + IdentifierTag::UPDATED_OBJECTS); + + case EventType::CREATE: + return TagsToIdentifiers(IdentifierTag::CREATED_VERTICES, IdentifierTag::CREATED_EDGES, + IdentifierTag::CREATED_OBJECTS); + + case EventType::VERTEX_CREATE: + return TagsToIdentifiers(IdentifierTag::CREATED_VERTICES); + + case EventType::EDGE_CREATE: + return TagsToIdentifiers(IdentifierTag::CREATED_EDGES); + + case EventType::DELETE: + return TagsToIdentifiers(IdentifierTag::DELETED_VERTICES, IdentifierTag::DELETED_EDGES, + IdentifierTag::DELETED_OBJECTS); + + case EventType::VERTEX_DELETE: + return TagsToIdentifiers(IdentifierTag::DELETED_VERTICES); + + case EventType::EDGE_DELETE: + return TagsToIdentifiers(IdentifierTag::DELETED_EDGES); + + case EventType::UPDATE: + return TagsToIdentifiers(IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::REMOVED_VERTEX_PROPERTIES, + IdentifierTag::SET_VERTEX_LABELS, IdentifierTag::REMOVED_VERTEX_LABELS, + IdentifierTag::UPDATED_VERTICES, IdentifierTag::SET_EDGE_PROPERTIES, + IdentifierTag::REMOVED_EDGE_PROPERTIES, IdentifierTag::UPDATED_EDGES, + IdentifierTag::UPDATED_OBJECTS); + + case EventType::VERTEX_UPDATE: + return TagsToIdentifiers(IdentifierTag::SET_VERTEX_PROPERTIES, IdentifierTag::REMOVED_VERTEX_PROPERTIES, + IdentifierTag::SET_VERTEX_LABELS, IdentifierTag::REMOVED_VERTEX_LABELS, + IdentifierTag::UPDATED_VERTICES); + + case EventType::EDGE_UPDATE: + return TagsToIdentifiers(IdentifierTag::SET_EDGE_PROPERTIES, IdentifierTag::REMOVED_EDGE_PROPERTIES, + IdentifierTag::UPDATED_EDGES); + } +} +} // namespace + +Trigger::Trigger(std::string name, const std::string &query, + const std::map &user_parameters, + const TriggerEventType event_type, utils::SkipList *query_cache, + DbAccessor *db_accessor, utils::SpinLock *antlr_lock) + : name_{std::move(name)}, + parsed_statements_{ParseQuery(query, user_parameters, query_cache, antlr_lock)}, + event_type_{event_type} { + // We check immediately if the query is valid by trying to create a plan. + GetPlan(db_accessor); +} + +Trigger::TriggerPlan::TriggerPlan(std::unique_ptr logical_plan, std::vector identifiers) + : cached_plan(std::move(logical_plan)), identifiers(std::move(identifiers)) {} + +std::shared_ptr Trigger::GetPlan(DbAccessor *db_accessor) const { + std::lock_guard plan_guard{plan_lock_}; + if (parsed_statements_.is_cacheable && trigger_plan_ && !trigger_plan_->cached_plan.IsExpired()) { + return trigger_plan_; + } + + auto identifiers = GetPredefinedIdentifiers(event_type_); + + AstStorage ast_storage; + ast_storage.properties_ = parsed_statements_.ast_storage.properties_; + ast_storage.labels_ = parsed_statements_.ast_storage.labels_; + ast_storage.edge_types_ = parsed_statements_.ast_storage.edge_types_; + + std::vector predefined_identifiers; + predefined_identifiers.reserve(identifiers.size()); + std::transform(identifiers.begin(), identifiers.end(), std::back_inserter(predefined_identifiers), + [](auto &identifier) { return &identifier.first; }); + + auto logical_plan = MakeLogicalPlan(std::move(ast_storage), utils::Downcast(parsed_statements_.query), + parsed_statements_.parameters, db_accessor, predefined_identifiers); + + trigger_plan_ = std::make_shared(std::move(logical_plan), std::move(identifiers)); + return trigger_plan_; +} + +void Trigger::Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, + const double max_execution_time_sec, std::atomic *is_shutting_down, + const TriggerContext &context) const { + if (!context.ShouldEventTrigger(event_type_)) { + return; + } + + spdlog::debug("Executing trigger '{}'", name_); + auto trigger_plan = GetPlan(dba); + MG_ASSERT(trigger_plan, "Invalid trigger plan received"); + auto &[plan, identifiers] = *trigger_plan; + + ExecutionContext ctx; + ctx.db_accessor = dba; + ctx.symbol_table = plan.symbol_table(); + ctx.evaluation_context.timestamp = + std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()) + .count(); + ctx.evaluation_context.parameters = parsed_statements_.parameters; + ctx.evaluation_context.properties = NamesToProperties(plan.ast_storage().properties_, dba); + ctx.evaluation_context.labels = NamesToLabels(plan.ast_storage().labels_, dba); + ctx.timer = utils::AsyncTimer(max_execution_time_sec); + ctx.is_shutting_down = is_shutting_down; + ctx.is_profile_query = false; + + // Set up temporary memory for a single Pull. Initial memory comes from the + // stack. 256 KiB should fit on the stack and should be more than enough for a + // single `Pull`. + constexpr size_t stack_size = 256 * 1024; + char stack_data[stack_size]; + + // We can throw on every query because a simple queries for deleting will use only + // the stack allocated buffer. + // Also, we want to throw only when the query engine requests more memory and not the storage + // so we add the exception to the allocator. + utils::ResourceWithOutOfMemoryException resource_with_exception; + utils::MonotonicBufferResource monotonic_memory(&stack_data[0], stack_size, &resource_with_exception); + // TODO (mferencevic): Tune the parameters accordingly. + utils::PoolResource pool_memory(128, 1024, &monotonic_memory); + ctx.evaluation_context.memory = &pool_memory; + + auto cursor = plan.plan().MakeCursor(execution_memory); + Frame frame{plan.symbol_table().max_position(), execution_memory}; + for (const auto &[identifier, tag] : identifiers) { + if (identifier.symbol_pos_ == -1) { + continue; + } + + frame[plan.symbol_table().at(identifier)] = context.GetTypedValue(tag, dba); + } + + while (cursor->Pull(frame, ctx)) + ; + + cursor->Shutdown(); +} + +namespace { +constexpr uint64_t kVersion{1}; +} // namespace + +TriggerStore::TriggerStore(std::filesystem::path directory, utils::SkipList *query_cache, + DbAccessor *db_accessor, utils::SpinLock *antlr_lock) + : storage_{std::move(directory)} { + spdlog::info("Loading triggers..."); + + for (const auto &[trigger_name, trigger_data] : storage_) { + // structured binding cannot be captured in lambda + const auto get_failed_message = [](const std::string_view trigger_name, const std::string_view message) { + return fmt::format("Failed to load trigger '{}'. {}", trigger_name, message); + }; + + const auto invalid_state_message = get_failed_message(trigger_name, "Invalid state of the trigger data."); + + spdlog::debug("Loading trigger '{}'", trigger_name); + auto json_trigger_data = nlohmann::json::parse(trigger_data); + + if (!json_trigger_data["version"].is_number_unsigned()) { + spdlog::warn(invalid_state_message); + continue; + } + if (json_trigger_data["version"] != kVersion) { + spdlog::warn(get_failed_message(trigger_name, "Invalid version of the trigger data.")); + continue; + } + + if (!json_trigger_data["statement"].is_string()) { + spdlog::warn(invalid_state_message); + continue; + } + auto statement = json_trigger_data["statement"].get(); + + if (!json_trigger_data["phase"].is_number_integer()) { + spdlog::warn(invalid_state_message); + continue; + } + const auto phase = json_trigger_data["phase"].get(); + + if (!json_trigger_data["event_type"].is_number_integer()) { + spdlog::warn(invalid_state_message); + continue; + } + const auto event_type = json_trigger_data["event_type"].get(); + + if (!json_trigger_data["user_parameters"].is_object()) { + spdlog::warn(invalid_state_message); + continue; + } + const auto user_parameters = serialization::DeserializePropertyValueMap(json_trigger_data["user_parameters"]); + + std::optional trigger; + try { + trigger.emplace(trigger_name, statement, user_parameters, event_type, query_cache, db_accessor, antlr_lock); + } catch (const utils::BasicException &e) { + spdlog::warn("Failed to create trigger '{}' because: {}", trigger_name, e.what()); + continue; + } + + auto triggers_acc = + phase == TriggerPhase::BEFORE_COMMIT ? before_commit_triggers_.access() : after_commit_triggers_.access(); + triggers_acc.insert(std::move(*trigger)); + + spdlog::debug("Trigger loaded successfully!"); + } +} + +void TriggerStore::AddTrigger(const std::string &name, const std::string &query, + const std::map &user_parameters, + TriggerEventType event_type, TriggerPhase phase, + utils::SkipList *query_cache, DbAccessor *db_accessor, + utils::SpinLock *antlr_lock) { + std::unique_lock store_guard{store_lock_}; + if (storage_.Get(name)) { + throw utils::BasicException("Trigger with the same name already exists."); + } + + std::optional trigger; + try { + trigger.emplace(name, query, user_parameters, event_type, query_cache, db_accessor, antlr_lock); + } catch (const utils::BasicException &e) { + const auto identifiers = GetPredefinedIdentifiers(event_type); + std::stringstream identifier_names_stream; + utils::PrintIterable(identifier_names_stream, identifiers, ", ", + [](auto &stream, const auto &identifier) { stream << identifier.first.name_; }); + + throw utils::BasicException( + "Failed creating the trigger.\nError message: '{}'\nThe error was mostly likely generated because of the wrong " + "statement that this trigger executes.\nMake sure all predefined variables used are present for the specified " + "event.\nAllowed variables for event '{}' are: {}", + e.what(), TriggerEventTypeToString(event_type), identifier_names_stream.str()); + } + + nlohmann::json data = nlohmann::json::object(); + data["statement"] = query; + data["user_parameters"] = serialization::SerializePropertyValueMap(user_parameters); + data["event_type"] = event_type; + data["phase"] = phase; + data["version"] = kVersion; + storage_.Put(name, data.dump()); + store_guard.unlock(); + + auto triggers_acc = + phase == TriggerPhase::BEFORE_COMMIT ? before_commit_triggers_.access() : after_commit_triggers_.access(); + triggers_acc.insert(std::move(*trigger)); +} + +void TriggerStore::DropTrigger(const std::string &name) { + std::unique_lock store_guard{store_lock_}; + const auto maybe_trigger_data = storage_.Get(name); + if (!maybe_trigger_data) { + throw utils::BasicException("Trigger with name '{}' doesn't exist", name); + } + + nlohmann::json data; + try { + data = nlohmann::json::parse(*maybe_trigger_data); + } catch (const nlohmann::json::parse_error &e) { + throw utils::BasicException("Couldn't load trigger data!"); + } + + if (!data.is_object()) { + throw utils::BasicException("Couldn't load trigger data!"); + } + + if (!data["phase"].is_number_integer()) { + throw utils::BasicException("Invalid type loaded inside the trigger data!"); + } + + auto triggers_acc = + data["phase"] == TriggerPhase::BEFORE_COMMIT ? before_commit_triggers_.access() : after_commit_triggers_.access(); + triggers_acc.remove(name); + storage_.Delete(name); +} + +std::vector TriggerStore::GetTriggerInfo() const { + std::vector info; + info.reserve(before_commit_triggers_.size() + after_commit_triggers_.size()); + + const auto add_info = [&](const utils::SkipList &trigger_list, const TriggerPhase phase) { + for (const auto &trigger : trigger_list.access()) { + info.push_back({trigger.Name(), trigger.OriginalStatement(), trigger.EventType(), phase}); + } + }; + + add_info(before_commit_triggers_, TriggerPhase::BEFORE_COMMIT); + add_info(after_commit_triggers_, TriggerPhase::AFTER_COMMIT); + + return info; +} + +std::unordered_set TriggerStore::GetEventTypes() const { + std::unordered_set event_types; + + const auto add_event_types = [&](const utils::SkipList &trigger_list) { + for (const auto &trigger : trigger_list.access()) { + event_types.insert(trigger.EventType()); + } + }; + + add_event_types(before_commit_triggers_); + add_event_types(after_commit_triggers_); + return event_types; +} +} // namespace query diff --git a/src/query/trigger.hpp b/src/query/trigger.hpp new file mode 100644 index 000000000..38e9005ef --- /dev/null +++ b/src/query/trigger.hpp @@ -0,0 +1,96 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "kvstore/kvstore.hpp" +#include "query/cypher_query_interpreter.hpp" +#include "query/db_accessor.hpp" +#include "query/frontend/ast/ast.hpp" +#include "query/trigger_context.hpp" +#include "storage/v2/property_value.hpp" +#include "utils/skip_list.hpp" +#include "utils/spin_lock.hpp" + +namespace query { +struct Trigger { + explicit Trigger(std::string name, const std::string &query, + const std::map &user_parameters, TriggerEventType event_type, + utils::SkipList *query_cache, DbAccessor *db_accessor, utils::SpinLock *antlr_lock); + + void Execute(DbAccessor *dba, utils::MonotonicBufferResource *execution_memory, double max_execution_time_sec, + std::atomic *is_shutting_down, const TriggerContext &context) const; + + bool operator==(const Trigger &other) const { return name_ == other.name_; } + // NOLINTNEXTLINE (modernize-use-nullptr) + bool operator<(const Trigger &other) const { return name_ < other.name_; } + bool operator==(const std::string &other) const { return name_ == other; } + // NOLINTNEXTLINE (modernize-use-nullptr) + bool operator<(const std::string &other) const { return name_ < other; } + + const auto &Name() const noexcept { return name_; } + const auto &OriginalStatement() const noexcept { return parsed_statements_.query_string; } + auto EventType() const noexcept { return event_type_; } + + private: + struct TriggerPlan { + using IdentifierInfo = std::pair; + + explicit TriggerPlan(std::unique_ptr logical_plan, std::vector identifiers); + + CachedPlan cached_plan; + std::vector identifiers; + }; + std::shared_ptr GetPlan(DbAccessor *db_accessor) const; + + std::string name_; + ParsedQuery parsed_statements_; + + TriggerEventType event_type_; + + mutable utils::SpinLock plan_lock_; + mutable std::shared_ptr trigger_plan_; +}; + +enum class TriggerPhase : uint8_t { BEFORE_COMMIT, AFTER_COMMIT }; + +struct TriggerStore { + explicit TriggerStore(std::filesystem::path directory, utils::SkipList *query_cache, + DbAccessor *db_accessor, utils::SpinLock *antlr_lock); + + void AddTrigger(const std::string &name, const std::string &query, + const std::map &user_parameters, TriggerEventType event_type, + TriggerPhase phase, utils::SkipList *query_cache, DbAccessor *db_accessor, + utils::SpinLock *antlr_lock); + + void DropTrigger(const std::string &name); + + struct TriggerInfo { + std::string name; + std::string statement; + TriggerEventType event_type; + TriggerPhase phase; + }; + + std::vector GetTriggerInfo() const; + + const auto &BeforeCommitTriggers() const noexcept { return before_commit_triggers_; } + const auto &AfterCommitTriggers() const noexcept { return after_commit_triggers_; } + + bool HasTriggers() const noexcept { return before_commit_triggers_.size() > 0 || after_commit_triggers_.size() > 0; } + std::unordered_set GetEventTypes() const; + + private: + utils::SpinLock store_lock_; + kvstore::KVStore storage_; + + utils::SkipList before_commit_triggers_; + utils::SkipList after_commit_triggers_; +}; + +} // namespace query diff --git a/src/query/trigger_context.cpp b/src/query/trigger_context.cpp new file mode 100644 index 000000000..7daca97a5 --- /dev/null +++ b/src/query/trigger_context.cpp @@ -0,0 +1,549 @@ +#include "query/trigger.hpp" + +#include + +#include "query/context.hpp" +#include "query/cypher_query_interpreter.hpp" +#include "query/db_accessor.hpp" +#include "query/frontend/ast/ast.hpp" +#include "query/interpret/frame.hpp" +#include "query/serialization/property_value.hpp" +#include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" +#include "utils/memory.hpp" + +namespace query { +namespace { +template +concept WithToMap = requires(const T value, DbAccessor *dba) { + { value.ToMap(dba) } + ->std::same_as>; +}; + +template +TypedValue ToTypedValue(const T &value, DbAccessor *dba) { + return TypedValue{value.ToMap(dba)}; +} + +template +TypedValue ToTypedValue(const detail::CreatedObject &created_object, [[maybe_unused]] DbAccessor *dba) { + return TypedValue{created_object.object}; +} + +template +TypedValue ToTypedValue(const detail::DeletedObject &deleted_object, [[maybe_unused]] DbAccessor *dba) { + return TypedValue{deleted_object.object}; +} + +template +concept WithIsValid = requires(const T value) { + { value.IsValid() } + ->std::same_as; +}; + +template +concept ConvertableToTypedValue = requires(T value, DbAccessor *dba) { + { ToTypedValue(value, dba) } + ->std::same_as; +} +&&WithIsValid; + +template +concept LabelUpdateContext = utils::SameAsAnyOf; + +template +TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) { + std::unordered_map> vertices_by_labels; + + for (const auto &value : values) { + if (value.IsValid()) { + vertices_by_labels[value.label_id].emplace_back(value.object); + } + } + + TypedValue result{std::vector{}}; + auto &typed_values = result.ValueList(); + for (auto &[label_id, vertices] : vertices_by_labels) { + typed_values.emplace_back(std::map{ + {std::string{"label"}, TypedValue(dba->LabelToName(label_id))}, + {std::string{"vertices"}, TypedValue(std::move(vertices))}, + }); + } + + return result; +} + +template +TypedValue ToTypedValue(const std::vector &values, DbAccessor *dba) requires(!LabelUpdateContext) { + TypedValue result{std::vector{}}; + auto &typed_values = result.ValueList(); + typed_values.reserve(values.size()); + + for (const auto &value : values) { + if (value.IsValid()) { + typed_values.push_back(ToTypedValue(value, dba)); + } + } + + return result; +} + +template +const char *TypeToString() { + if constexpr (std::same_as>) { + return "created_vertex"; + } else if constexpr (std::same_as>) { + return "created_edge"; + } else if constexpr (std::same_as>) { + return "deleted_vertex"; + } else if constexpr (std::same_as>) { + return "deleted_edge"; + } else if constexpr (std::same_as>) { + return "set_vertex_property"; + } else if constexpr (std::same_as>) { + return "set_edge_property"; + } else if constexpr (std::same_as>) { + return "removed_vertex_property"; + } else if constexpr (std::same_as>) { + return "removed_edge_property"; + } else if constexpr (std::same_as) { + return "set_vertex_label"; + } else if constexpr (std::same_as) { + return "removed_vertex_label"; + } +} + +template +concept ContextInfo = WithToMap &&WithIsValid; + +template +TypedValue Concatenate(DbAccessor *dba, const std::vector &...args) { + const auto size = (args.size() + ...); + TypedValue result{std::vector{}}; + auto &concatenated = result.ValueList(); + concatenated.reserve(size); + + const auto add_to_concatenated = [&](const std::vector &values) { + for (const auto &value : values) { + if (value.IsValid()) { + auto map = value.ToMap(dba); + map["event_type"] = TypeToString(); + concatenated.emplace_back(std::move(map)); + } + } + }; + + (add_to_concatenated(args), ...); + + return result; +} + +template +concept WithEmpty = requires(const T value) { + { value.empty() } + ->std::same_as; +}; + +template +bool AnyContainsValue(const TContainer &...value_containers) { + return (!value_containers.empty() || ...); +} + +template +using ChangesSummary = + std::tuple>, std::vector>, + std::vector>, + std::vector>>; + +template +using PropertyChangesLists = + std::pair>, std::vector>>; + +template +[[nodiscard]] PropertyChangesLists PropertyMapToList( + query::TriggerContextCollector::PropertyChangesMap &&map) { + std::vector> set_object_properties; + std::vector> removed_object_properties; + + for (auto it = map.begin(); it != map.end(); it = map.erase(it)) { + const auto &[key, property_change_info] = *it; + if (property_change_info.old_value.IsNull() && property_change_info.new_value.IsNull()) { + // no change happened on the transaction level + continue; + } + + if (const auto is_equal = property_change_info.old_value == property_change_info.new_value; + is_equal.IsBool() && is_equal.ValueBool()) { + // no change happened on the transaction level + continue; + } + + if (property_change_info.new_value.IsNull()) { + removed_object_properties.emplace_back(key.first, key.second /* property_id */, + std::move(property_change_info.old_value)); + } else { + set_object_properties.emplace_back(key.first, key.second, std::move(property_change_info.old_value), + std::move(property_change_info.new_value)); + } + } + + return PropertyChangesLists{std::move(set_object_properties), std::move(removed_object_properties)}; +} + +template +[[nodiscard]] ChangesSummary Summarize(query::TriggerContextCollector::Registry &®istry) { + auto [set_object_properties, removed_object_properties] = PropertyMapToList(std::move(registry.property_changes)); + std::vector> created_objects_vec; + created_objects_vec.reserve(registry.created_objects.size()); + std::transform(registry.created_objects.begin(), registry.created_objects.end(), + std::back_inserter(created_objects_vec), + [](const auto &gid_and_created_object) { return gid_and_created_object.second; }); + registry.created_objects.clear(); + + return {std::move(created_objects_vec), std::move(registry.deleted_objects), std::move(set_object_properties), + std::move(removed_object_properties)}; +} +} // namespace + +namespace detail { +bool SetVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } + +std::map SetVertexLabel::ToMap(DbAccessor *dba) const { + return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; +} + +bool RemovedVertexLabel::IsValid() const { return object.IsVisible(storage::View::OLD); } + +std::map RemovedVertexLabel::ToMap(DbAccessor *dba) const { + return {{"vertex", TypedValue{object}}, {"label", TypedValue{dba->LabelToName(label_id)}}}; +} +} // namespace detail + +const char *TriggerEventTypeToString(const TriggerEventType event_type) { + switch (event_type) { + case TriggerEventType::ANY: + return "ANY"; + + case TriggerEventType::CREATE: + return "CREATE"; + + case TriggerEventType::VERTEX_CREATE: + return "() CREATE"; + + case TriggerEventType::EDGE_CREATE: + return "--> CREATE"; + + case TriggerEventType::DELETE: + return "DELETE"; + + case TriggerEventType::VERTEX_DELETE: + return "() DELETE"; + + case TriggerEventType::EDGE_DELETE: + return "--> DELETE"; + + case TriggerEventType::UPDATE: + return "UPDATE"; + + case TriggerEventType::VERTEX_UPDATE: + return "() UPDATE"; + + case TriggerEventType::EDGE_UPDATE: + return "--> UPDATE"; + } +} + +void TriggerContext::AdaptForAccessor(DbAccessor *accessor) { + { + // adapt created_vertices_ + auto it = created_vertices_.begin(); + for (auto &created_vertex : created_vertices_) { + if (auto maybe_vertex = accessor->FindVertex(created_vertex.object.Gid(), storage::View::OLD); maybe_vertex) { + *it = detail::CreatedObject{*maybe_vertex}; + ++it; + } + } + created_vertices_.erase(it, created_vertices_.end()); + } + + // deleted_vertices_ should keep the transaction context of the transaction which deleted it + // because no other transaction can modify an object after it's deleted so it should be the + // latest state of the object + + const auto adapt_context_with_vertex = [accessor](auto *values) { + auto it = values->begin(); + for (auto &value : *values) { + if (auto maybe_vertex = accessor->FindVertex(value.object.Gid(), storage::View::OLD); maybe_vertex) { + *it = std::move(value); + it->object = *maybe_vertex; + ++it; + } + } + values->erase(it, values->end()); + }; + + adapt_context_with_vertex(&set_vertex_properties_); + adapt_context_with_vertex(&removed_vertex_properties_); + adapt_context_with_vertex(&set_vertex_labels_); + adapt_context_with_vertex(&removed_vertex_labels_); + + { + // adapt created_edges + auto it = created_edges_.begin(); + for (auto &created_edge : created_edges_) { + const auto maybe_from_vertex = accessor->FindVertex(created_edge.object.From().Gid(), storage::View::OLD); + if (!maybe_from_vertex) { + continue; + } + auto maybe_out_edges = maybe_from_vertex->OutEdges(storage::View::OLD); + MG_ASSERT(maybe_out_edges.HasValue()); + const auto edge_gid = created_edge.object.Gid(); + for (const auto &edge : *maybe_out_edges) { + if (edge.Gid() == edge_gid) { + *it = detail::CreatedObject{edge}; + ++it; + } + } + } + created_edges_.erase(it, created_edges_.end()); + } + + // deleted_edges_ should keep the transaction context of the transaction which deleted it + // because no other transaction can modify an object after it's deleted so it should be the + // latest state of the object + + const auto adapt_context_with_edge = [accessor](auto *values) { + auto it = values->begin(); + for (const auto &value : *values) { + if (auto maybe_vertex = accessor->FindVertex(value.object.From().Gid(), storage::View::OLD); maybe_vertex) { + auto maybe_out_edges = maybe_vertex->OutEdges(storage::View::OLD); + MG_ASSERT(maybe_out_edges.HasValue()); + for (const auto &edge : *maybe_out_edges) { + if (edge.Gid() == value.object.Gid()) { + *it = std::move(value); + it->object = edge; + ++it; + break; + } + } + } + } + values->erase(it, values->end()); + }; + + adapt_context_with_edge(&set_edge_properties_); + adapt_context_with_edge(&removed_edge_properties_); +} + +TypedValue TriggerContext::GetTypedValue(const TriggerIdentifierTag tag, DbAccessor *dba) const { + switch (tag) { + case TriggerIdentifierTag::CREATED_VERTICES: + return ToTypedValue(created_vertices_, dba); + + case TriggerIdentifierTag::CREATED_EDGES: + return ToTypedValue(created_edges_, dba); + + case TriggerIdentifierTag::CREATED_OBJECTS: + return Concatenate(dba, created_vertices_, created_edges_); + + case TriggerIdentifierTag::DELETED_VERTICES: + return ToTypedValue(deleted_vertices_, dba); + + case TriggerIdentifierTag::DELETED_EDGES: + return ToTypedValue(deleted_edges_, dba); + + case TriggerIdentifierTag::DELETED_OBJECTS: + return Concatenate(dba, deleted_vertices_, deleted_edges_); + + case TriggerIdentifierTag::SET_VERTEX_PROPERTIES: + return ToTypedValue(set_vertex_properties_, dba); + + case TriggerIdentifierTag::SET_EDGE_PROPERTIES: + return ToTypedValue(set_edge_properties_, dba); + + case TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES: + return ToTypedValue(removed_vertex_properties_, dba); + + case TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES: + return ToTypedValue(removed_edge_properties_, dba); + + case TriggerIdentifierTag::SET_VERTEX_LABELS: + return ToTypedValue(set_vertex_labels_, dba); + + case TriggerIdentifierTag::REMOVED_VERTEX_LABELS: + return ToTypedValue(removed_vertex_labels_, dba); + + case TriggerIdentifierTag::UPDATED_VERTICES: + return Concatenate(dba, set_vertex_properties_, removed_vertex_properties_, set_vertex_labels_, + removed_vertex_labels_); + + case TriggerIdentifierTag::UPDATED_EDGES: + return Concatenate(dba, set_edge_properties_, removed_edge_properties_); + + case TriggerIdentifierTag::UPDATED_OBJECTS: + return Concatenate(dba, set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, + removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); + } +} + +bool TriggerContext::ShouldEventTrigger(const TriggerEventType event_type) const { + using EventType = TriggerEventType; + switch (event_type) { + case EventType::ANY: + return AnyContainsValue(created_vertices_, created_edges_, deleted_vertices_, deleted_edges_, + set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, + removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); + + case EventType::CREATE: + return AnyContainsValue(created_vertices_, created_edges_); + + case EventType::VERTEX_CREATE: + return AnyContainsValue(created_vertices_); + + case EventType::EDGE_CREATE: + return AnyContainsValue(created_edges_); + + case EventType::DELETE: + return AnyContainsValue(deleted_vertices_, deleted_edges_); + + case EventType::VERTEX_DELETE: + return AnyContainsValue(deleted_vertices_); + + case EventType::EDGE_DELETE: + return AnyContainsValue(deleted_edges_); + + case EventType::UPDATE: + return AnyContainsValue(set_vertex_properties_, set_edge_properties_, removed_vertex_properties_, + removed_edge_properties_, set_vertex_labels_, removed_vertex_labels_); + + case EventType::VERTEX_UPDATE: + return AnyContainsValue(set_vertex_properties_, removed_vertex_properties_, set_vertex_labels_, + removed_vertex_labels_); + + case EventType::EDGE_UPDATE: + return AnyContainsValue(set_edge_properties_, removed_edge_properties_); + } +} + +void TriggerContextCollector::UpdateLabelMap(const VertexAccessor vertex, const storage::LabelId label_id, + const LabelChange change) { + auto ®istry = GetRegistry(); + if (!registry.should_register_updated_objects || registry.created_objects.count(vertex.Gid())) { + return; + } + + if (auto it = label_changes_.find({vertex, label_id}); it != label_changes_.end()) { + it->second = std::clamp(it->second + LabelChangeToInt(change), -1, 1); + return; + } + + label_changes_.emplace(std::make_pair(vertex, label_id), LabelChangeToInt(change)); +} + +TriggerContextCollector::TriggerContextCollector(const std::unordered_set &event_types) { + for (const auto event_type : event_types) { + switch (event_type) { + case TriggerEventType::ANY: + vertex_registry_.should_register_created_objects = true; + edge_registry_.should_register_created_objects = true; + vertex_registry_.should_register_deleted_objects = true; + edge_registry_.should_register_deleted_objects = true; + vertex_registry_.should_register_updated_objects = true; + edge_registry_.should_register_updated_objects = true; + break; + case TriggerEventType::VERTEX_CREATE: + vertex_registry_.should_register_created_objects = true; + break; + case TriggerEventType::EDGE_CREATE: + edge_registry_.should_register_created_objects = true; + break; + case TriggerEventType::CREATE: + vertex_registry_.should_register_created_objects = true; + edge_registry_.should_register_created_objects = true; + break; + case TriggerEventType::VERTEX_DELETE: + vertex_registry_.should_register_deleted_objects = true; + break; + case TriggerEventType::EDGE_DELETE: + edge_registry_.should_register_deleted_objects = true; + break; + case TriggerEventType::DELETE: + vertex_registry_.should_register_deleted_objects = true; + edge_registry_.should_register_deleted_objects = true; + break; + case TriggerEventType::VERTEX_UPDATE: + vertex_registry_.should_register_updated_objects = true; + break; + case TriggerEventType::EDGE_UPDATE: + edge_registry_.should_register_updated_objects = true; + break; + case TriggerEventType::UPDATE: + vertex_registry_.should_register_updated_objects = true; + edge_registry_.should_register_updated_objects = true; + break; + } + } + + const auto deduce_if_should_register_created = [](auto ®istry) { + // Registering the created objects is necessary to: + // - eliminate deleted objects that were created in the same transaction + // - eliminate set/removed properties and labels of newly created objects + // because those changes are only relevant for objects that have existed before the transaction. + registry.should_register_created_objects |= + registry.should_register_updated_objects || registry.should_register_deleted_objects; + }; + + deduce_if_should_register_created(vertex_registry_); + deduce_if_should_register_created(edge_registry_); +} + +bool TriggerContextCollector::ShouldRegisterVertexLabelChange() const { + return vertex_registry_.should_register_updated_objects; +} + +void TriggerContextCollector::RegisterSetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) { + UpdateLabelMap(vertex, label_id, LabelChange::ADD); +} + +void TriggerContextCollector::RegisterRemovedVertexLabel(const VertexAccessor &vertex, + const storage::LabelId label_id) { + UpdateLabelMap(vertex, label_id, LabelChange::REMOVE); +} + +int8_t TriggerContextCollector::LabelChangeToInt(LabelChange change) { + static_assert(std::is_same_v, int8_t>, + "The underlying type of LabelChange doesn't match the return type!"); + return static_cast(change); +} + +TriggerContext TriggerContextCollector::TransformToTriggerContext() && { + auto [created_vertices, deleted_vertices, set_vertex_properties, removed_vertex_properties] = + Summarize(std::move(vertex_registry_)); + auto [set_vertex_labels, removed_vertex_labels] = LabelMapToList(std::move(label_changes_)); + auto [created_edges, deleted_edges, set_edge_properties, removed_edge_properties] = + Summarize(std::move(edge_registry_)); + + return {std::move(created_vertices), std::move(deleted_vertices), + std::move(set_vertex_properties), std::move(removed_vertex_properties), + std::move(set_vertex_labels), std::move(removed_vertex_labels), + std::move(created_edges), std::move(deleted_edges), + std::move(set_edge_properties), std::move(removed_edge_properties)}; +} + +TriggerContextCollector::LabelChangesLists TriggerContextCollector::LabelMapToList(LabelChangesMap &&label_changes) { + std::vector set_vertex_labels; + std::vector removed_vertex_labels; + + for (const auto &[key, label_state] : label_changes) { + if (label_state == LabelChangeToInt(LabelChange::ADD)) { + set_vertex_labels.emplace_back(key.first, key.second); + } else if (label_state == LabelChangeToInt(LabelChange::REMOVE)) { + removed_vertex_labels.emplace_back(key.first, key.second); + } + } + + label_changes.clear(); + + return {std::move(set_vertex_labels), std::move(removed_vertex_labels)}; +} +} // namespace query diff --git a/src/query/trigger_context.hpp b/src/query/trigger_context.hpp new file mode 100644 index 000000000..8e985e6ce --- /dev/null +++ b/src/query/trigger_context.hpp @@ -0,0 +1,353 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "query/db_accessor.hpp" +#include "query/typed_value.hpp" +#include "storage/v2/property_value.hpp" +#include "storage/v2/view.hpp" +#include "utils/concepts.hpp" +#include "utils/fnv.hpp" + +namespace query { +namespace detail { +template +concept ObjectAccessor = utils::SameAsAnyOf; + +template +const char *ObjectString() { + if constexpr (std::same_as) { + return "vertex"; + } else { + return "edge"; + } +} + +template +struct CreatedObject { + explicit CreatedObject(const TAccessor &object) : object{object} {} + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + std::map ToMap([[maybe_unused]] DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}}; + } + + TAccessor object; +}; + +template +struct DeletedObject { + explicit DeletedObject(const TAccessor &object) : object{object} {} + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + std::map ToMap([[maybe_unused]] DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}}; + } + + TAccessor object; +}; + +template +struct SetObjectProperty { + explicit SetObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value, + TypedValue new_value) + : object{object}, key{key}, old_value{std::move(old_value)}, new_value{std::move(new_value)} {} + + std::map ToMap(DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}, + {"key", TypedValue{dba->PropertyToName(key)}}, + {"old", old_value}, + {"new", new_value}}; + } + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + storage::PropertyId key; + TypedValue old_value; + TypedValue new_value; +}; + +template +struct RemovedObjectProperty { + explicit RemovedObjectProperty(const TAccessor &object, storage::PropertyId key, TypedValue old_value) + : object{object}, key{key}, old_value{std::move(old_value)} {} + + std::map ToMap(DbAccessor *dba) const { + return {{ObjectString(), TypedValue{object}}, + {"key", TypedValue{dba->PropertyToName(key)}}, + {"old", old_value}}; + } + + bool IsValid() const { return object.IsVisible(storage::View::OLD); } + + TAccessor object; + storage::PropertyId key; + TypedValue old_value; +}; + +struct SetVertexLabel { + explicit SetVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) + : object{vertex}, label_id{label_id} {} + + std::map ToMap(DbAccessor *dba) const; + bool IsValid() const; + + VertexAccessor object; + storage::LabelId label_id; +}; + +struct RemovedVertexLabel { + explicit RemovedVertexLabel(const VertexAccessor &vertex, const storage::LabelId label_id) + : object{vertex}, label_id{label_id} {} + + std::map ToMap(DbAccessor *dba) const; + bool IsValid() const; + + VertexAccessor object; + storage::LabelId label_id; +}; +} // namespace detail + +enum class TriggerIdentifierTag : uint8_t { + CREATED_VERTICES, + CREATED_EDGES, + CREATED_OBJECTS, + DELETED_VERTICES, + DELETED_EDGES, + DELETED_OBJECTS, + SET_VERTEX_PROPERTIES, + SET_EDGE_PROPERTIES, + REMOVED_VERTEX_PROPERTIES, + REMOVED_EDGE_PROPERTIES, + SET_VERTEX_LABELS, + REMOVED_VERTEX_LABELS, + UPDATED_VERTICES, + UPDATED_EDGES, + UPDATED_OBJECTS +}; + +enum class TriggerEventType : uint8_t { + ANY, // Triggers on any change + VERTEX_CREATE, + EDGE_CREATE, + CREATE, + VERTEX_DELETE, + EDGE_DELETE, + DELETE, + VERTEX_UPDATE, + EDGE_UPDATE, + UPDATE +}; + +const char *TriggerEventTypeToString(TriggerEventType event_type); + +static_assert(std::is_trivially_copy_constructible_v, + "VertexAccessor is not trivially copy constructible, move it where possible and remove this assert"); +static_assert(std::is_trivially_copy_constructible_v, + "EdgeAccessor is not trivially copy constructible, move it where possible and remove this asssert"); + +// Holds the information necessary for triggers +class TriggerContext { + public: + TriggerContext() = default; + TriggerContext(std::vector> created_vertices, + std::vector> deleted_vertices, + std::vector> set_vertex_properties, + std::vector> removed_vertex_properties, + std::vector set_vertex_labels, + std::vector removed_vertex_labels, + std::vector> created_edges, + std::vector> deleted_edges, + std::vector> set_edge_properties, + std::vector> removed_edge_properties) + : created_vertices_{std::move(created_vertices)}, + deleted_vertices_{std::move(deleted_vertices)}, + set_vertex_properties_{std::move(set_vertex_properties)}, + removed_vertex_properties_{std::move(removed_vertex_properties)}, + set_vertex_labels_{std::move(set_vertex_labels)}, + removed_vertex_labels_{std::move(removed_vertex_labels)}, + created_edges_{std::move(created_edges)}, + deleted_edges_{std::move(deleted_edges)}, + set_edge_properties_{std::move(set_edge_properties)}, + removed_edge_properties_{std::move(removed_edge_properties)} {} + TriggerContext(const TriggerContext &) = default; + TriggerContext(TriggerContext &&) = default; + TriggerContext &operator=(const TriggerContext &) = default; + TriggerContext &operator=(TriggerContext &&) = default; + + // Adapt the TriggerContext object inplace for a different DbAccessor + // (each derived accessor, e.g. VertexAccessor, gets adapted + // to the sent DbAccessor so they can be used safely) + void AdaptForAccessor(DbAccessor *accessor); + + // Get TypedValue for the identifier defined with tag + TypedValue GetTypedValue(TriggerIdentifierTag tag, DbAccessor *dba) const; + bool ShouldEventTrigger(TriggerEventType) const; + + private: + std::vector> created_vertices_; + std::vector> deleted_vertices_; + std::vector> set_vertex_properties_; + std::vector> removed_vertex_properties_; + std::vector set_vertex_labels_; + std::vector removed_vertex_labels_; + + std::vector> created_edges_; + std::vector> deleted_edges_; + std::vector> set_edge_properties_; + std::vector> removed_edge_properties_; +}; + +// Collects the information necessary for triggers during a single transaction run. +class TriggerContextCollector { + public: + struct HashPairWithAccessor { + template + size_t operator()(const std::pair &pair) const { + using GidType = decltype(std::declval().Gid()); + return utils::HashCombine{}(pair.first.Gid(), pair.second); + } + }; + + struct PropertyChangeInfo { + TypedValue old_value; + TypedValue new_value; + }; + + template + using PropertyChangesMap = + std::unordered_map, PropertyChangeInfo, HashPairWithAccessor>; + + template + struct Registry { + bool should_register_created_objects{false}; + bool should_register_deleted_objects{false}; + bool should_register_updated_objects{false}; // Set/removed properties (and labels for vertices) + std::unordered_map> created_objects; + std::vector> deleted_objects; + // During the transaction, a single property on a single object could be changed multiple times. + // We want to register only the global change, at the end of the transaction. The change consists of + // the value before the transaction start, and the latest value assigned throughout the transaction. + PropertyChangesMap property_changes; + }; + + explicit TriggerContextCollector(const std::unordered_set &event_types); + TriggerContextCollector(const TriggerContextCollector &) = default; + TriggerContextCollector(TriggerContextCollector &&) = default; + TriggerContextCollector &operator=(const TriggerContextCollector &) = default; + TriggerContextCollector &operator=(TriggerContextCollector &&) = default; + ~TriggerContextCollector() = default; + + template + bool ShouldRegisterCreatedObject() const { + return GetRegistry().should_register_created_objects; + } + + template + void RegisterCreatedObject(const TAccessor &created_object) { + auto ®istry = GetRegistry(); + if (!registry.should_register_created_objects) { + return; + } + registry.created_objects.emplace(created_object.Gid(), detail::CreatedObject{created_object}); + } + + template + bool ShouldRegisterDeletedObject() const { + return GetRegistry().should_register_deleted_objects; + } + + template + void RegisterDeletedObject(const TAccessor &deleted_object) { + auto ®istry = GetRegistry(); + if (!registry.should_register_deleted_objects || registry.created_objects.count(deleted_object.Gid())) { + return; + } + + registry.deleted_objects.emplace_back(deleted_object); + } + + template + bool ShouldRegisterObjectPropertyChange() const { + return GetRegistry().should_register_updated_objects; + } + + template + void RegisterSetObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value, + TypedValue new_value) { + auto ®istry = GetRegistry(); + if (!registry.should_register_updated_objects) { + return; + } + + if (registry.created_objects.count(object.Gid())) { + return; + } + + if (auto it = registry.property_changes.find({object, key}); it != registry.property_changes.end()) { + it->second.new_value = std::move(new_value); + return; + } + + registry.property_changes.emplace(std::make_pair(object, key), + PropertyChangeInfo{std::move(old_value), std::move(new_value)}); + } + + template + void RegisterRemovedObjectProperty(const TAccessor &object, const storage::PropertyId key, TypedValue old_value) { + // property is already removed + if (old_value.IsNull()) { + return; + } + + RegisterSetObjectProperty(object, key, std::move(old_value), TypedValue()); + } + + bool ShouldRegisterVertexLabelChange() const; + void RegisterSetVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); + void RegisterRemovedVertexLabel(const VertexAccessor &vertex, storage::LabelId label_id); + [[nodiscard]] TriggerContext TransformToTriggerContext() &&; + + private: + template + const Registry &GetRegistry() const { + if constexpr (std::same_as) { + return vertex_registry_; + } else { + return edge_registry_; + } + } + + template + Registry &GetRegistry() { + return const_cast &>( + const_cast(this)->GetRegistry()); + } + + using LabelChangesMap = std::unordered_map, int8_t, HashPairWithAccessor>; + using LabelChangesLists = std::pair, std::vector>; + + enum class LabelChange : int8_t { REMOVE = -1, ADD = 1 }; + + static int8_t LabelChangeToInt(LabelChange change); + + [[nodiscard]] static LabelChangesLists LabelMapToList(LabelChangesMap &&label_changes); + + void UpdateLabelMap(VertexAccessor vertex, storage::LabelId label_id, LabelChange change); + + Registry vertex_registry_; + Registry edge_registry_; + // During the transaction, a single label on a single vertex could be added and removed multiple times. + // We want to register only the global change, at the end of the transaction. The change consists of + // the state of the label before the transaction start, and the latest state assigned throughout the transaction. + LabelChangesMap label_changes_; +}; +} // namespace query diff --git a/src/storage/v2/config.hpp b/src/storage/v2/config.hpp index 42f9ebee1..5c3bab2ca 100644 --- a/src/storage/v2/config.hpp +++ b/src/storage/v2/config.hpp @@ -3,6 +3,8 @@ #include #include #include +#include "storage/v2/isolation_level.hpp" +#include "storage/v2/transaction.hpp" namespace storage { @@ -38,6 +40,10 @@ struct Config { bool snapshot_on_exit{false}; } durability; + + struct Transaction { + IsolationLevel isolation_level{IsolationLevel::SNAPSHOT_ISOLATION}; + } transaction; }; } // namespace storage diff --git a/src/storage/v2/durability/durability.cpp b/src/storage/v2/durability/durability.cpp index d26728f3f..f904099b7 100644 --- a/src/storage/v2/durability/durability.cpp +++ b/src/storage/v2/durability/durability.cpp @@ -17,6 +17,7 @@ #include "storage/v2/durability/snapshot.hpp" #include "storage/v2/durability/wal.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" namespace storage::durability { @@ -101,30 +102,47 @@ std::optional> GetWalFiles(const std::filesystem: // recovery process. void RecoverIndicesAndConstraints(const RecoveredIndicesAndConstraints &indices_constraints, Indices *indices, Constraints *constraints, utils::SkipList *vertices) { + spdlog::info("Recreating indices from metadata."); // Recover label indices. + spdlog::info("Recreating {} label indices from metadata.", indices_constraints.indices.label.size()); for (const auto &item : indices_constraints.indices.label) { if (!indices->label_index.CreateIndex(item, vertices->access())) throw RecoveryFailure("The label index must be created here!"); + spdlog::info("A label index is recreated from metadata."); } + spdlog::info("Label indices are recreated."); // Recover label+property indices. + spdlog::info("Recreating {} label+property indices from metadata.", + indices_constraints.indices.label_property.size()); for (const auto &item : indices_constraints.indices.label_property) { if (!indices->label_property_index.CreateIndex(item.first, item.second, vertices->access())) throw RecoveryFailure("The label+property index must be created here!"); + spdlog::info("A label+property index is recreated from metadata."); } + spdlog::info("Label+property indices are recreated."); + spdlog::info("Indices are recreated."); + spdlog::info("Recreating constraints from metadata."); // Recover existence constraints. + spdlog::info("Recreating {} existence constraints from metadata.", indices_constraints.constraints.existence.size()); for (const auto &item : indices_constraints.constraints.existence) { auto ret = CreateExistenceConstraint(constraints, item.first, item.second, vertices->access()); if (ret.HasError() || !ret.GetValue()) throw RecoveryFailure("The existence constraint must be created here!"); + spdlog::info("A existence constraint is recreated from metadata."); } + spdlog::info("Existence constraints are recreated from metadata."); // Recover unique constraints. + spdlog::info("Recreating {} unique constraints from metadata.", indices_constraints.constraints.unique.size()); for (const auto &item : indices_constraints.constraints.unique) { auto ret = constraints->unique_constraints.CreateConstraint(item.first, item.second, vertices->access()); if (ret.HasError() || ret.GetValue() != UniqueConstraints::CreationStatus::SUCCESS) throw RecoveryFailure("The unique constraint must be created here!"); + spdlog::info("A unique constraint is recreated from metadata."); } + spdlog::info("Unique constraints are recreated from metadata."); + spdlog::info("Constraints are recreated from metadata."); } std::optional RecoverData(const std::filesystem::path &snapshot_directory, @@ -135,7 +153,13 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di std::atomic *edge_count, NameIdMapper *name_id_mapper, Indices *indices, Constraints *constraints, Config::Items items, uint64_t *wal_seq_num) { - if (!utils::DirExists(snapshot_directory) && !utils::DirExists(wal_directory)) return std::nullopt; + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; + spdlog::info("Recovering persisted data using snapshot ({}) and WAL directory ({}).", snapshot_directory, + wal_directory); + if (!utils::DirExists(snapshot_directory) && !utils::DirExists(wal_directory)) { + spdlog::warn("Snapshot or WAL directory don't exist, there is nothing to recover."); + return std::nullopt; + } auto snapshot_files = GetSnapshotFiles(snapshot_directory); @@ -143,6 +167,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di RecoveredIndicesAndConstraints indices_constraints; std::optional snapshot_timestamp; if (!snapshot_files.empty()) { + spdlog::info("Try recovering from snapshot directory {}.", snapshot_directory); // Order the files by name std::sort(snapshot_files.begin(), snapshot_files.end()); @@ -155,13 +180,13 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di spdlog::warn("The snapshot file {} isn't related to the latest snapshot file!", path); continue; } - spdlog::info("Starting snapshot recovery from {}", path); + spdlog::info("Starting snapshot recovery from {}.", path); try { recovered_snapshot = LoadSnapshot(path, vertices, edges, epoch_history, name_id_mapper, edge_count, items); spdlog::info("Snapshot recovery successful!"); break; } catch (const RecoveryFailure &e) { - spdlog::warn("Couldn't recover snapshot from {} because of: {}", path, e.what()); + spdlog::warn("Couldn't recover snapshot from {} because of: {}.", path, e.what()); continue; } } @@ -179,6 +204,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di return recovered_snapshot->recovery_info; } } else { + spdlog::info("No snapshot file was found, collecting information from WAL directory {}.", wal_directory); std::error_code error_code; if (!utils::DirExists(wal_directory)) return std::nullopt; // We use this smaller struct that contains only a subset of information @@ -204,7 +230,10 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di } } MG_ASSERT(!error_code, "Couldn't recover data because an error occurred: {}!", error_code.message()); - if (wal_files.empty()) return std::nullopt; + if (wal_files.empty()) { + spdlog::warn("No snapshot or WAL file found!"); + return std::nullopt; + } std::sort(wal_files.begin(), wal_files.end()); // UUID used for durability is the UUID of the last WAL file. // Same for the epoch id. @@ -213,7 +242,10 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di } auto maybe_wal_files = GetWalFiles(wal_directory, *uuid); - if (!maybe_wal_files) return std::nullopt; + if (!maybe_wal_files) { + spdlog::warn("Couldn't get WAL file info from the WAL directory!"); + return std::nullopt; + } // Array of all discovered WAL files, ordered by sequence number. auto &wal_files = *maybe_wal_files; @@ -230,6 +262,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di "files that match the last WAL file!"); if (!wal_files.empty()) { + spdlog::info("Checking WAL files."); { const auto &first_wal = wal_files[0]; if (first_wal.seq_num != 0) { @@ -253,6 +286,7 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di } std::optional previous_seq_num; auto last_loaded_timestamp = snapshot_timestamp; + spdlog::info("Trying to load WAL files."); for (auto &wal_file : wal_files) { if (previous_seq_num && (wal_file.seq_num - *previous_seq_num) > 1) { LOG_FATAL("You are missing a WAL file with the sequence number {}!", *previous_seq_num + 1); @@ -288,6 +322,8 @@ std::optional RecoverData(const std::filesystem::path &snapshot_di // The sequence number needs to be recovered even though `LoadWal` didn't // load any deltas from that file. *wal_seq_num = *previous_seq_num + 1; + + spdlog::info("All necessary WAL files are loaded successfully."); } RecoverIndicesAndConstraints(indices_constraints, indices, constraints, vertices); diff --git a/src/storage/v2/durability/snapshot.cpp b/src/storage/v2/durability/snapshot.cpp index 171645b59..db2d82a6c 100644 --- a/src/storage/v2/durability/snapshot.cpp +++ b/src/storage/v2/durability/snapshot.cpp @@ -168,14 +168,15 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis }); // Read snapshot info. - auto info = ReadSnapshotInfo(path); - + const auto info = ReadSnapshotInfo(path); + spdlog::info("Recovering {} vertices and {} edges.", info.vertices_count, info.edges_count); // Check for edges. bool snapshot_has_edges = info.offset_edges != 0; // Recover mapper. std::unordered_map snapshot_id_map; { + spdlog::info("Recovering mapper metadata."); if (!snapshot.SetPosition(info.offset_mapper)) throw RecoveryFailure("Couldn't read data from snapshot!"); auto marker = snapshot.ReadMarker(); @@ -191,6 +192,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!name) throw RecoveryFailure("Invalid snapshot data!"); auto my_id = name_id_mapper->NameToId(*name); snapshot_id_map.emplace(*id, my_id); + SPDLOG_TRACE("Mapping \"{}\"from snapshot id {} to actual id {}.", *name, *id, my_id); } } auto get_label_from_id = [&snapshot_id_map](uint64_t snapshot_id) { @@ -217,10 +219,11 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis auto edge_acc = edges->access(); uint64_t last_edge_gid = 0; if (snapshot_has_edges) { + spdlog::info("Recovering {} edges.", info.edges_count); if (!snapshot.SetPosition(info.offset_edges)) throw RecoveryFailure("Couldn't read data from snapshot!"); for (uint64_t i = 0; i < info.edges_count; ++i) { { - auto marker = snapshot.ReadMarker(); + const auto marker = snapshot.ReadMarker(); if (!marker || *marker != Marker::SECTION_EDGE) throw RecoveryFailure("Invalid snapshot data!"); } @@ -230,6 +233,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!gid) throw RecoveryFailure("Invalid snapshot data!"); if (i > 0 && *gid <= last_edge_gid) throw RecoveryFailure("Invalid snapshot data!"); last_edge_gid = *gid; + spdlog::debug("Recovering edge {} with properties.", *gid); auto [it, inserted] = edge_acc.insert(Edge{Gid::FromUint(*gid), nullptr}); if (!inserted) throw RecoveryFailure("The edge must be inserted here!"); @@ -243,6 +247,8 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!key) throw RecoveryFailure("Invalid snapshot data!"); auto value = snapshot.ReadPropertyValue(); if (!value) throw RecoveryFailure("Invalid snapshot data!"); + SPDLOG_TRACE("Recovered property \"{}\" with value \"{}\" for edge {}.", + name_id_mapper->IdToName(snapshot_id_map.at(*key)), *value, *gid); props.SetProperty(get_property_from_id(*key), *value); } } @@ -253,6 +259,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (i > 0 && *gid <= last_edge_gid) throw RecoveryFailure("Invalid snapshot data!"); last_edge_gid = *gid; + spdlog::debug("Ensuring edge {} doesn't have any properties.", *gid); // Read properties. { auto props_size = snapshot.ReadUint(); @@ -264,12 +271,14 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis } } } + spdlog::info("Edges are recovered."); } // Recover vertices (labels and properties). if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!"); auto vertex_acc = vertices->access(); uint64_t last_vertex_gid = 0; + spdlog::info("Recovering {} vertices.", info.vertices_count); for (uint64_t i = 0; i < info.vertices_count; ++i) { { auto marker = snapshot.ReadMarker(); @@ -283,10 +292,12 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis throw RecoveryFailure("Invalid snapshot data!"); } last_vertex_gid = *gid; + spdlog::debug("Recovering vertex {}.", *gid); auto [it, inserted] = vertex_acc.insert(Vertex{Gid::FromUint(*gid), nullptr}); if (!inserted) throw RecoveryFailure("The vertex must be inserted here!"); // Recover labels. + spdlog::trace("Recovering labels for vertex {}.", *gid); { auto labels_size = snapshot.ReadUint(); if (!labels_size) throw RecoveryFailure("Invalid snapshot data!"); @@ -295,11 +306,14 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis for (uint64_t j = 0; j < *labels_size; ++j) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); + SPDLOG_TRACE("Recovered label \"{}\" for vertex {}.", name_id_mapper->IdToName(snapshot_id_map.at(*label)), + *gid); labels.emplace_back(get_label_from_id(*label)); } } // Recover properties. + spdlog::trace("Recovering properties for vertex {}.", *gid); { auto props_size = snapshot.ReadUint(); if (!props_size) throw RecoveryFailure("Invalid snapshot data!"); @@ -309,6 +323,8 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!key) throw RecoveryFailure("Invalid snapshot data!"); auto value = snapshot.ReadPropertyValue(); if (!value) throw RecoveryFailure("Invalid snapshot data!"); + SPDLOG_TRACE("Recovered property \"{}\" with value \"{}\" for vertex {}.", + name_id_mapper->IdToName(snapshot_id_map.at(*key)), *value, *gid); props.SetProperty(get_property_from_id(*key), *value); } } @@ -339,8 +355,10 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!edge_type) throw RecoveryFailure("Invalid snapshot data!"); } } + spdlog::info("Vertices are recovered."); // Recover vertices (in/out edges). + spdlog::info("Recovering connectivity."); if (!snapshot.SetPosition(info.offset_vertices)) throw RecoveryFailure("Couldn't read data from snapshot!"); for (auto &vertex : vertex_acc) { { @@ -348,6 +366,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (!marker || *marker != Marker::SECTION_VERTEX) throw RecoveryFailure("Invalid snapshot data!"); } + spdlog::trace("Recovering connectivity for vertex {}.", vertex.gid.AsUint()); // Check vertex. auto gid = snapshot.ReadUint(); if (!gid) throw RecoveryFailure("Invalid snapshot data!"); @@ -377,6 +396,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis // Recover in edges. { + spdlog::trace("Recovering inbound edges for vertex {}.", vertex.gid.AsUint()); auto in_size = snapshot.ReadUint(); if (!in_size) throw RecoveryFailure("Invalid snapshot data!"); vertex.in_edges.reserve(*in_size); @@ -404,12 +424,15 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis edge_ref = EdgeRef(&*edge); } } + SPDLOG_TRACE("Recovered inbound edge {} with label \"{}\" from vertex {}.", *edge_gid, + name_id_mapper->IdToName(snapshot_id_map.at(*edge_type)), from_vertex->gid.AsUint()); vertex.in_edges.emplace_back(get_edge_type_from_id(*edge_type), &*from_vertex, edge_ref); } } // Recover out edges. { + spdlog::trace("Recovering outbound edges for vertex {}.", vertex.gid.AsUint()); auto out_size = snapshot.ReadUint(); if (!out_size) throw RecoveryFailure("Invalid snapshot data!"); vertex.out_edges.reserve(*out_size); @@ -437,6 +460,8 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis edge_ref = EdgeRef(&*edge); } } + SPDLOG_TRACE("Recovered outbound edge {} with label \"{}\" to vertex {}.", *edge_gid, + name_id_mapper->IdToName(snapshot_id_map.at(*edge_type)), to_vertex->gid.AsUint()); vertex.out_edges.emplace_back(get_edge_type_from_id(*edge_type), &*to_vertex, edge_ref); } // Increment edge count. We only increment the count here because the @@ -444,6 +469,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis edge_count->fetch_add(*out_size, std::memory_order_acq_rel); } } + spdlog::info("Connectivity is recovered."); // Set initial values for edge/vertex ID generators. ret.next_edge_id = last_edge_gid + 1; @@ -452,6 +478,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis // Recover indices. { + spdlog::info("Recovering metadata of indices."); if (!snapshot.SetPosition(info.offset_indices)) throw RecoveryFailure("Couldn't read data from snapshot!"); auto marker = snapshot.ReadMarker(); @@ -461,18 +488,22 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis { auto size = snapshot.ReadUint(); if (!size) throw RecoveryFailure("Invalid snapshot data!"); + spdlog::info("Recovering metadata of {} label indices.", *size); for (uint64_t i = 0; i < *size; ++i) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); AddRecoveredIndexConstraint(&indices_constraints.indices.label, get_label_from_id(*label), "The label index already exists!"); + SPDLOG_TRACE("Recovered metadata of label index for :{}", name_id_mapper->IdToName(snapshot_id_map.at(*label))); } + spdlog::info("Metadata of label indices are recovered."); } // Recover label+property indices. { auto size = snapshot.ReadUint(); if (!size) throw RecoveryFailure("Invalid snapshot data!"); + spdlog::info("Recovering metadata of {} label+property indices.", *size); for (uint64_t i = 0; i < *size; ++i) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); @@ -481,12 +512,18 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis AddRecoveredIndexConstraint(&indices_constraints.indices.label_property, {get_label_from_id(*label), get_property_from_id(*property)}, "The label+property index already exists!"); + SPDLOG_TRACE("Recovered metadata of label+property index for :{}({})", + name_id_mapper->IdToName(snapshot_id_map.at(*label)), + name_id_mapper->IdToName(snapshot_id_map.at(*property))); } + spdlog::info("Metadata of label+property indices are recovered."); } + spdlog::info("Metadata of indices are recovered."); } // Recover constraints. { + spdlog::info("Recovering metadata of constraints."); if (!snapshot.SetPosition(info.offset_constraints)) throw RecoveryFailure("Couldn't read data from snapshot!"); auto marker = snapshot.ReadMarker(); @@ -496,6 +533,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis { auto size = snapshot.ReadUint(); if (!size) throw RecoveryFailure("Invalid snapshot data!"); + spdlog::info("Recovering metadata of {} existence constraints.", *size); for (uint64_t i = 0; i < *size; ++i) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); @@ -504,7 +542,11 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis AddRecoveredIndexConstraint(&indices_constraints.constraints.existence, {get_label_from_id(*label), get_property_from_id(*property)}, "The existence constraint already exists!"); + SPDLOG_TRACE("Recovered metadata of existence constraint for :{}({})", + name_id_mapper->IdToName(snapshot_id_map.at(*label)), + name_id_mapper->IdToName(snapshot_id_map.at(*property))); } + spdlog::info("Metadata of existence constraints are recovered."); } // Recover unique constraints. @@ -513,6 +555,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis if (*version >= kUniqueConstraintVersion) { auto size = snapshot.ReadUint(); if (!size) throw RecoveryFailure("Invalid snapshot data!"); + spdlog::info("Recovering metadata of {} unique constraints.", *size); for (uint64_t i = 0; i < *size; ++i) { auto label = snapshot.ReadUint(); if (!label) throw RecoveryFailure("Invalid snapshot data!"); @@ -526,10 +569,15 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis } AddRecoveredIndexConstraint(&indices_constraints.constraints.unique, {get_label_from_id(*label), properties}, "The unique constraint already exists!"); + SPDLOG_TRACE("Recovered metadata of unique constraints for :{}", + name_id_mapper->IdToName(snapshot_id_map.at(*label))); } + spdlog::info("Metadata of unique constraints are recovered."); } + spdlog::info("Metadata of constraints are recovered."); } + spdlog::info("Recovering metadata."); // Recover epoch history { if (!snapshot.SetPosition(info.offset_epoch_history)) throw RecoveryFailure("Couldn't read data from snapshot!"); @@ -555,6 +603,7 @@ RecoveredSnapshot LoadSnapshot(const std::filesystem::path &path, utils::SkipLis } } + spdlog::info("Metadata recovered."); // Recover timestamp. ret.next_timestamp = info.start_timestamp + 1; diff --git a/src/storage/v2/durability/wal.cpp b/src/storage/v2/durability/wal.cpp index 4f362bc3b..9f4bc46e7 100644 --- a/src/storage/v2/durability/wal.cpp +++ b/src/storage/v2/durability/wal.cpp @@ -610,6 +610,7 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst const std::optional last_loaded_timestamp, utils::SkipList *vertices, utils::SkipList *edges, NameIdMapper *name_id_mapper, std::atomic *edge_count, Config::Items items) { + spdlog::info("Trying to load WAL file {}.", path); RecoveryInfo ret; Decoder wal; @@ -622,13 +623,17 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst ret.last_commit_timestamp = info.to_timestamp; // Check timestamp. - if (last_loaded_timestamp && info.to_timestamp <= *last_loaded_timestamp) return ret; + if (last_loaded_timestamp && info.to_timestamp <= *last_loaded_timestamp) { + spdlog::info("Skip loading WAL file because it is too old."); + return ret; + } // Recover deltas. wal.SetPosition(info.offset_deltas); uint64_t deltas_applied = 0; auto edge_acc = edges->access(); auto vertex_acc = vertices->access(); + spdlog::info("WAL file contains {} deltas.", info.num_deltas); for (uint64_t i = 0; i < info.num_deltas; ++i) { // Read WAL delta header to find out the delta timestamp. auto timestamp = ReadWalDeltaHeader(&wal); @@ -839,7 +844,8 @@ RecoveryInfo LoadWal(const std::filesystem::path &path, RecoveredIndicesAndConst } } - spdlog::info("Applied {} deltas from WAL", deltas_applied, path); + spdlog::info("Applied {} deltas from WAL. Skipped {} deltas, because they were too old.", deltas_applied, + info.num_deltas - deltas_applied); return ret; } diff --git a/src/storage/v2/edge_accessor.cpp b/src/storage/v2/edge_accessor.cpp index 7c2fff5ec..1b1491f99 100644 --- a/src/storage/v2/edge_accessor.cpp +++ b/src/storage/v2/edge_accessor.cpp @@ -3,10 +3,45 @@ #include #include "storage/v2/mvcc.hpp" +#include "storage/v2/property_value.hpp" #include "storage/v2/vertex_accessor.hpp" +#include "utils/memory_tracker.hpp" namespace storage { +bool EdgeAccessor::IsVisible(const View view) const { + bool deleted = true; + bool exists = true; + Delta *delta = nullptr; + { + std::lock_guard guard(edge_.ptr->lock); + deleted = edge_.ptr->deleted; + delta = edge_.ptr->delta; + } + ApplyDeltasForRead(transaction_, delta, view, [&](const Delta &delta) { + switch (delta.action) { + case Delta::Action::ADD_LABEL: + case Delta::Action::REMOVE_LABEL: + case Delta::Action::SET_PROPERTY: + case Delta::Action::ADD_IN_EDGE: + case Delta::Action::ADD_OUT_EDGE: + case Delta::Action::REMOVE_IN_EDGE: + case Delta::Action::REMOVE_OUT_EDGE: + break; + case Delta::Action::RECREATE_OBJECT: { + deleted = false; + break; + } + case Delta::Action::DELETE_OBJECT: { + exists = false; + break; + } + } + }); + + return exists && (for_deleted_ || !deleted); +} + VertexAccessor EdgeAccessor::FromVertex() const { return VertexAccessor{from_vertex_, transaction_, indices_, constraints_, config_}; } @@ -15,7 +50,8 @@ VertexAccessor EdgeAccessor::ToVertex() const { return VertexAccessor{to_vertex_, transaction_, indices_, constraints_, config_}; } -Result EdgeAccessor::SetProperty(PropertyId property, const PropertyValue &value) { +Result EdgeAccessor::SetProperty(PropertyId property, const PropertyValue &value) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; if (!config_.properties_on_edges) return Error::PROPERTIES_DISABLED; std::lock_guard guard(edge_.ptr->lock); @@ -25,20 +61,19 @@ Result EdgeAccessor::SetProperty(PropertyId property, const PropertyValue if (edge_.ptr->deleted) return Error::DELETED_OBJECT; auto current_value = edge_.ptr->properties.GetProperty(property); - bool existed = !current_value.IsNull(); // We could skip setting the value if the previous one is the same to the new // one. This would save some memory as a delta would not be created as well as // avoid copying the value. The reason we are not doing that is because the // current code always follows the logical pattern of "create a delta" and // "modify in-place". Additionally, the created delta will make other // transactions get a SERIALIZATION_ERROR. - CreateAndLinkDelta(transaction_, edge_.ptr, Delta::SetPropertyTag(), property, std::move(current_value)); + CreateAndLinkDelta(transaction_, edge_.ptr, Delta::SetPropertyTag(), property, current_value); edge_.ptr->properties.SetProperty(property, value); - return !existed; + return std::move(current_value); } -Result EdgeAccessor::ClearProperties() { +Result> EdgeAccessor::ClearProperties() { if (!config_.properties_on_edges) return Error::PROPERTIES_DISABLED; std::lock_guard guard(edge_.ptr->lock); @@ -48,14 +83,13 @@ Result EdgeAccessor::ClearProperties() { if (edge_.ptr->deleted) return Error::DELETED_OBJECT; auto properties = edge_.ptr->properties.Properties(); - bool removed = !properties.empty(); for (const auto &property : properties) { CreateAndLinkDelta(transaction_, edge_.ptr, Delta::SetPropertyTag(), property.first, property.second); } edge_.ptr->properties.ClearProperties(); - return removed; + return std::move(properties); } Result EdgeAccessor::GetProperty(PropertyId property, View view) const { @@ -96,7 +130,7 @@ Result EdgeAccessor::GetProperty(PropertyId property, View view) } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(value); } @@ -147,7 +181,7 @@ Result> EdgeAccessor::Properties(View view) } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(properties); } diff --git a/src/storage/v2/edge_accessor.hpp b/src/storage/v2/edge_accessor.hpp index e90565fcf..261a5a60e 100644 --- a/src/storage/v2/edge_accessor.hpp +++ b/src/storage/v2/edge_accessor.hpp @@ -23,7 +23,7 @@ class EdgeAccessor final { public: EdgeAccessor(EdgeRef edge, EdgeTypeId edge_type, Vertex *from_vertex, Vertex *to_vertex, Transaction *transaction, - Indices *indices, Constraints *constraints, Config::Items config) + Indices *indices, Constraints *constraints, Config::Items config, bool for_deleted = false) : edge_(edge), edge_type_(edge_type), from_vertex_(from_vertex), @@ -31,7 +31,11 @@ class EdgeAccessor final { transaction_(transaction), indices_(indices), constraints_(constraints), - config_(config) {} + config_(config), + for_deleted_(for_deleted) {} + + /// @return true if the object is visible from the current transaction + bool IsVisible(View view) const; VertexAccessor FromVertex() const; @@ -39,15 +43,13 @@ class EdgeAccessor final { EdgeTypeId EdgeType() const { return edge_type_; } - /// Set a property value and return `true` if insertion took place. - /// `false` is returned if assignment took place. + /// Set a property value and return the old value. /// @throw std::bad_alloc - Result SetProperty(PropertyId property, const PropertyValue &value); + Result SetProperty(PropertyId property, const PropertyValue &value); - /// Remove all properties and return `true` if any removal took place. - /// `false` is returned if there were no properties to remove. + /// Remove all properties and return old values for each removed property. /// @throw std::bad_alloc - Result ClearProperties(); + Result> ClearProperties(); /// @throw std::bad_alloc Result GetProperty(PropertyId property, View view) const; @@ -79,6 +81,14 @@ class EdgeAccessor final { Indices *indices_; Constraints *constraints_; Config::Items config_; + + // if the accessor was created for a deleted edge. + // Accessor behaves differently for some methods based on this + // flag. + // E.g. If this field is set to true, GetProperty will return the property of the edge + // even though the edge is deleted. + // All the write operations will still return an error if it's called for a deleted edge. + bool for_deleted_{false}; }; } // namespace storage diff --git a/src/storage/v2/indices.cpp b/src/storage/v2/indices.cpp index 057690726..f7bd634fd 100644 --- a/src/storage/v2/indices.cpp +++ b/src/storage/v2/indices.cpp @@ -2,6 +2,7 @@ #include "storage/v2/mvcc.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" namespace storage { @@ -256,17 +257,24 @@ void LabelIndex::UpdateOnAddLabel(LabelId label, Vertex *vertex, const Transacti } bool LabelIndex::CreateIndex(LabelId label, utils::SkipList::Accessor vertices) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; auto [it, emplaced] = index_.emplace(std::piecewise_construct, std::forward_as_tuple(label), std::forward_as_tuple()); if (!emplaced) { // Index already exists. return false; } - auto acc = it->second.access(); - for (Vertex &vertex : vertices) { - if (vertex.deleted || !utils::Contains(vertex.labels, label)) { - continue; + try { + auto acc = it->second.access(); + for (Vertex &vertex : vertices) { + if (vertex.deleted || !utils::Contains(vertex.labels, label)) { + continue; + } + acc.insert(Entry{&vertex, 0}); } - acc.insert(Entry{&vertex, 0}); + } catch (const utils::OutOfMemoryException &) { + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_exception_blocker; + index_.erase(it); + throw; } return true; } @@ -341,6 +349,12 @@ LabelIndex::Iterable::Iterable(utils::SkipList::Accessor index_accessor, constraints_(constraints), config_(config) {} +void LabelIndex::RunGC() { + for (auto &index_entry : index_) { + index_entry.second.run_gc(); + } +} + bool LabelPropertyIndex::Entry::operator<(const Entry &rhs) { if (value < rhs.value) { return true; @@ -389,22 +403,29 @@ void LabelPropertyIndex::UpdateOnSetProperty(PropertyId property, const Property } bool LabelPropertyIndex::CreateIndex(LabelId label, PropertyId property, utils::SkipList::Accessor vertices) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; auto [it, emplaced] = index_.emplace(std::piecewise_construct, std::forward_as_tuple(label, property), std::forward_as_tuple()); if (!emplaced) { // Index already exists. return false; } - auto acc = it->second.access(); - for (Vertex &vertex : vertices) { - if (vertex.deleted || !utils::Contains(vertex.labels, label)) { - continue; + try { + auto acc = it->second.access(); + for (Vertex &vertex : vertices) { + if (vertex.deleted || !utils::Contains(vertex.labels, label)) { + continue; + } + auto value = vertex.properties.GetProperty(property); + if (value.IsNull()) { + continue; + } + acc.insert(Entry{std::move(value), &vertex, 0}); } - auto value = vertex.properties.GetProperty(property); - if (value.IsNull()) { - continue; - } - acc.insert(Entry{std::move(value), &vertex, 0}); + } catch (const utils::OutOfMemoryException &) { + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_exception_blocker; + index_.erase(it); + throw; } return true; } @@ -646,6 +667,12 @@ int64_t LabelPropertyIndex::ApproximateVertexCount(LabelId label, PropertyId pro return acc.estimate_range_count(lower, upper, utils::SkipListLayerForCountEstimation(acc.size())); } +void LabelPropertyIndex::RunGC() { + for (auto &index_entry : index_) { + index_entry.second.run_gc(); + } +} + void RemoveObsoleteEntries(Indices *indices, uint64_t oldest_active_start_timestamp) { indices->label_index.RemoveObsoleteEntries(oldest_active_start_timestamp); indices->label_property_index.RemoveObsoleteEntries(oldest_active_start_timestamp); diff --git a/src/storage/v2/indices.hpp b/src/storage/v2/indices.hpp index 7d7a30222..dd58e16fc 100644 --- a/src/storage/v2/indices.hpp +++ b/src/storage/v2/indices.hpp @@ -110,6 +110,8 @@ class LabelIndex { void Clear() { index_.clear(); } + void RunGC(); + private: std::map> index_; Indices *indices_; @@ -225,6 +227,8 @@ class LabelPropertyIndex { void Clear() { index_.clear(); } + void RunGC(); + private: std::map, utils::SkipList> index_; Indices *indices_; @@ -263,5 +267,4 @@ void UpdateOnAddLabel(Indices *indices, LabelId label, Vertex *vertex, const Tra /// @throw std::bad_alloc void UpdateOnSetProperty(Indices *indices, PropertyId property, const PropertyValue &value, Vertex *vertex, const Transaction &tx); - } // namespace storage diff --git a/src/storage/v2/isolation_level.hpp b/src/storage/v2/isolation_level.hpp new file mode 100644 index 000000000..42bf8f421 --- /dev/null +++ b/src/storage/v2/isolation_level.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace storage { + +enum class IsolationLevel : std::uint8_t { SNAPSHOT_ISOLATION, READ_COMMITTED, READ_UNCOMMITTED }; + +} // namespace storage diff --git a/src/storage/v2/mvcc.hpp b/src/storage/v2/mvcc.hpp index 719cfa5d4..1c449fa6b 100644 --- a/src/storage/v2/mvcc.hpp +++ b/src/storage/v2/mvcc.hpp @@ -1,6 +1,5 @@ #pragma once -#include "storage/v2/delta.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/transaction.hpp" #include "storage/v2/view.hpp" @@ -15,24 +14,40 @@ namespace storage { /// caller to apply the deltas. template inline void ApplyDeltasForRead(Transaction *transaction, const Delta *delta, View view, const TCallback &callback) { + // if the transaction is not committed, then its deltas have transaction_id for the timestamp, otherwise they have + // its commit timestamp set. + // This allows the transaction to see its changes even though it's committed. + const auto commit_timestamp = transaction->commit_timestamp + ? transaction->commit_timestamp->load(std::memory_order_acquire) + : transaction->transaction_id; while (delta != nullptr) { auto ts = delta->timestamp->load(std::memory_order_acquire); auto cid = delta->command_id; - // This is a committed change that we see so we shouldn't undo it. - if (ts < transaction->start_timestamp) { + // For SNAPSHOT ISOLATION -> we can only see the changes which were committed before the start of the current + // transaction + // + // For READ COMMITTED -> we can only see the changes which are committed. Commit timestamps of + // uncommitted changes are set to the transaction id of the transaction that made the change. Transaction id is + // always higher than start or commit timestamps so we know if the timestamp is lower than the initial transaction + // id value, that the change is committed. + // + // For READ UNCOMMITTED -> we accept any change. + if ((transaction->isolation_level == IsolationLevel::SNAPSHOT_ISOLATION && ts < transaction->start_timestamp) || + (transaction->isolation_level == IsolationLevel::READ_COMMITTED && ts < kTransactionInitialId) || + (transaction->isolation_level == IsolationLevel::READ_UNCOMMITTED)) { break; } // We shouldn't undo our newest changes because the user requested a NEW // view of the database. - if (view == View::NEW && ts == transaction->transaction_id && cid <= transaction->command_id) { + if (view == View::NEW && ts == commit_timestamp && cid <= transaction->command_id) { break; } // We shouldn't undo our older changes because the user requested a OLD view // of the database. - if (view == View::OLD && ts == transaction->transaction_id && cid < transaction->command_id) { + if (view == View::OLD && ts == commit_timestamp && cid < transaction->command_id) { break; } diff --git a/src/storage/v2/replication/replication_server.cpp b/src/storage/v2/replication/replication_server.cpp index 495b03d04..503898ef1 100644 --- a/src/storage/v2/replication/replication_server.cpp +++ b/src/storage/v2/replication/replication_server.cpp @@ -1,12 +1,33 @@ #include "storage/v2/replication/replication_server.hpp" +#include +#include #include "storage/v2/durability/durability.hpp" +#include "storage/v2/durability/paths.hpp" +#include "storage/v2/durability/serialization.hpp" #include "storage/v2/durability/snapshot.hpp" +#include "storage/v2/durability/version.hpp" +#include "storage/v2/durability/wal.hpp" #include "storage/v2/replication/config.hpp" #include "storage/v2/transaction.hpp" #include "utils/exceptions.hpp" namespace storage { +namespace { +std::pair ReadDelta(durability::BaseDecoder *decoder) { + try { + auto timestamp = ReadWalDeltaHeader(decoder); + SPDLOG_INFO(" Timestamp {}", timestamp); + auto delta = ReadWalDeltaData(decoder); + return {timestamp, delta}; + } catch (const slk::SlkReaderException &) { + throw utils::BasicException("Missing data!"); + } catch (const durability::RecoveryFailure &) { + throw utils::BasicException("Invalid data!"); + } +}; +} // namespace + Storage::ReplicationServer::ReplicationServer(Storage *storage, io::network::Endpoint endpoint, const replication::ReplicationServerConfig &config) : storage_(storage) { @@ -68,33 +89,6 @@ void Storage::ReplicationServer::AppendDeltasHandler(slk::Reader *req_reader, sl storage_->epoch_id_ = std::move(*maybe_epoch_id); } - const auto read_delta = [&]() -> std::pair { - try { - auto timestamp = ReadWalDeltaHeader(&decoder); - SPDLOG_INFO(" Timestamp {}", timestamp); - auto delta = ReadWalDeltaData(&decoder); - return {timestamp, delta}; - } catch (const slk::SlkReaderException &) { - throw utils::BasicException("Missing data!"); - } catch (const durability::RecoveryFailure &) { - throw utils::BasicException("Invalid data!"); - } - }; - - if (req.previous_commit_timestamp != storage_->last_commit_timestamp_.load()) { - // Empty the stream - bool transaction_complete = false; - while (!transaction_complete) { - SPDLOG_INFO("Skipping delta"); - const auto [timestamp, delta] = read_delta(); - transaction_complete = durability::IsWalDeltaDataTypeTransactionEnd(delta.type); - } - - AppendDeltasRes res{false, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); - return; - } - if (storage_->wal_file_) { if (req.seq_num > storage_->wal_file_->SequenceNumber() || *maybe_epoch_id != storage_->epoch_id_) { storage_->wal_file_->FinalizeWal(); @@ -108,6 +102,173 @@ void Storage::ReplicationServer::AppendDeltasHandler(slk::Reader *req_reader, sl storage_->wal_seq_num_ = req.seq_num; } + if (req.previous_commit_timestamp != storage_->last_commit_timestamp_.load()) { + // Empty the stream + bool transaction_complete = false; + while (!transaction_complete) { + SPDLOG_INFO("Skipping delta"); + const auto [timestamp, delta] = ReadDelta(&decoder); + transaction_complete = durability::IsWalDeltaDataTypeTransactionEnd(delta.type); + } + + AppendDeltasRes res{false, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); + return; + } + + ReadAndApplyDelta(&decoder); + + AppendDeltasRes res{true, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); +} + +void Storage::ReplicationServer::SnapshotHandler(slk::Reader *req_reader, slk::Builder *res_builder) { + SnapshotReq req; + slk::Load(&req, req_reader); + + replication::Decoder decoder(req_reader); + + utils::EnsureDirOrDie(storage_->snapshot_directory_); + + const auto maybe_snapshot_path = decoder.ReadFile(storage_->snapshot_directory_); + MG_ASSERT(maybe_snapshot_path, "Failed to load snapshot!"); + spdlog::info("Received snapshot saved to {}", *maybe_snapshot_path); + + std::unique_lock storage_guard(storage_->main_lock_); + // Clear the database + storage_->vertices_.clear(); + storage_->edges_.clear(); + + storage_->constraints_ = Constraints(); + storage_->indices_.label_index = LabelIndex(&storage_->indices_, &storage_->constraints_, storage_->config_.items); + storage_->indices_.label_property_index = + LabelPropertyIndex(&storage_->indices_, &storage_->constraints_, storage_->config_.items); + try { + spdlog::debug("Loading snapshot"); + auto recovered_snapshot = durability::LoadSnapshot(*maybe_snapshot_path, &storage_->vertices_, &storage_->edges_, + &storage_->epoch_history_, &storage_->name_id_mapper_, + &storage_->edge_count_, storage_->config_.items); + spdlog::debug("Snapshot loaded successfully"); + // If this step is present it should always be the first step of + // the recovery so we use the UUID we read from snasphost + storage_->uuid_ = std::move(recovered_snapshot.snapshot_info.uuid); + storage_->epoch_id_ = std::move(recovered_snapshot.snapshot_info.epoch_id); + const auto &recovery_info = recovered_snapshot.recovery_info; + storage_->vertex_id_ = recovery_info.next_vertex_id; + storage_->edge_id_ = recovery_info.next_edge_id; + storage_->timestamp_ = std::max(storage_->timestamp_, recovery_info.next_timestamp); + + durability::RecoverIndicesAndConstraints(recovered_snapshot.indices_constraints, &storage_->indices_, + &storage_->constraints_, &storage_->vertices_); + } catch (const durability::RecoveryFailure &e) { + LOG_FATAL("Couldn't load the snapshot because of: {}", e.what()); + } + storage_guard.unlock(); + + SnapshotRes res{true, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); + + // Delete other durability files + auto snapshot_files = durability::GetSnapshotFiles(storage_->snapshot_directory_, storage_->uuid_); + for (const auto &[path, uuid, _] : snapshot_files) { + if (path != *maybe_snapshot_path) { + storage_->file_retainer_.DeleteFile(path); + } + } + + auto wal_files = durability::GetWalFiles(storage_->wal_directory_, storage_->uuid_); + if (wal_files) { + for (const auto &wal_file : *wal_files) { + storage_->file_retainer_.DeleteFile(wal_file.path); + } + + storage_->wal_file_.reset(); + } +} + +void Storage::ReplicationServer::WalFilesHandler(slk::Reader *req_reader, slk::Builder *res_builder) { + WalFilesReq req; + slk::Load(&req, req_reader); + + const auto wal_file_number = req.file_number; + spdlog::debug("Received WAL files: {}", wal_file_number); + + replication::Decoder decoder(req_reader); + + utils::EnsureDirOrDie(storage_->wal_directory_); + + for (auto i = 0; i < wal_file_number; ++i) { + LoadWal(&decoder); + } + + WalFilesRes res{true, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); +} + +void Storage::ReplicationServer::CurrentWalHandler(slk::Reader *req_reader, slk::Builder *res_builder) { + CurrentWalReq req; + slk::Load(&req, req_reader); + + replication::Decoder decoder(req_reader); + + utils::EnsureDirOrDie(storage_->wal_directory_); + + LoadWal(&decoder); + + CurrentWalRes res{true, storage_->last_commit_timestamp_.load()}; + slk::Save(res, res_builder); +} + +void Storage::ReplicationServer::LoadWal(replication::Decoder *decoder) { + const auto temp_wal_directory = std::filesystem::temp_directory_path() / "memgraph" / durability::kWalDirectory; + utils::EnsureDir(temp_wal_directory); + auto maybe_wal_path = decoder->ReadFile(temp_wal_directory); + MG_ASSERT(maybe_wal_path, "Failed to load WAL!"); + spdlog::trace("Received WAL saved to {}", *maybe_wal_path); + try { + auto wal_info = durability::ReadWalInfo(*maybe_wal_path); + if (wal_info.seq_num == 0) { + storage_->uuid_ = wal_info.uuid; + } + + if (wal_info.epoch_id != storage_->epoch_id_) { + storage_->epoch_history_.emplace_back(wal_info.epoch_id, storage_->last_commit_timestamp_); + storage_->epoch_id_ = std::move(wal_info.epoch_id); + } + + if (storage_->wal_file_) { + if (storage_->wal_file_->SequenceNumber() != wal_info.seq_num) { + storage_->wal_file_->FinalizeWal(); + storage_->wal_seq_num_ = wal_info.seq_num; + storage_->wal_file_.reset(); + } + } else { + storage_->wal_seq_num_ = wal_info.seq_num; + } + + durability::Decoder wal; + const auto version = wal.Initialize(*maybe_wal_path, durability::kWalMagic); + if (!version) throw durability::RecoveryFailure("Couldn't read WAL magic and/or version!"); + if (!durability::IsVersionSupported(*version)) throw durability::RecoveryFailure("Invalid WAL version!"); + wal.SetPosition(wal_info.offset_deltas); + + for (size_t i = 0; i < wal_info.num_deltas;) { + i += ReadAndApplyDelta(&wal); + } + + spdlog::debug("{} loaded successfully", *maybe_wal_path); + } catch (const durability::RecoveryFailure &e) { + LOG_FATAL("Couldn't recover WAL deltas from {} because of: {}", *maybe_wal_path, e.what()); + } +} + +Storage::ReplicationServer::~ReplicationServer() { + if (rpc_server_) { + rpc_server_->Shutdown(); + rpc_server_->AwaitShutdown(); + } +} +uint64_t Storage::ReplicationServer::ReadAndApplyDelta(durability::BaseDecoder *decoder) { auto edge_acc = storage_->edges_.access(); auto vertex_acc = storage_->vertices_.access(); @@ -121,11 +282,22 @@ void Storage::ReplicationServer::AppendDeltasHandler(slk::Reader *req_reader, sl return &commit_timestamp_and_accessor->second; }; - bool transaction_complete = false; - for (uint64_t i = 0; !transaction_complete; ++i) { - SPDLOG_INFO(" Delta {}", i); - const auto [timestamp, delta] = read_delta(); + uint64_t applied_deltas = 0; + auto max_commit_timestamp = storage_->last_commit_timestamp_.load(); + for (bool transaction_complete = false; !transaction_complete; ++applied_deltas) { + const auto [timestamp, delta] = ReadDelta(decoder); + if (timestamp > max_commit_timestamp) { + max_commit_timestamp = timestamp; + } + + transaction_complete = durability::IsWalDeltaDataTypeTransactionEnd(delta.type); + + if (timestamp < storage_->timestamp_) { + continue; + } + + SPDLOG_INFO(" Delta {}", applied_deltas); switch (delta.type) { case durability::WalDeltaData::Type::VERTEX_CREATE: { spdlog::trace(" Create vertex {}", delta.vertex_create_delete.gid.AsUint()); @@ -368,189 +540,12 @@ void Storage::ReplicationServer::AppendDeltasHandler(slk::Reader *req_reader, sl break; } } - transaction_complete = durability::IsWalDeltaDataTypeTransactionEnd(delta.type); } if (commit_timestamp_and_accessor) throw utils::BasicException("Invalid data!"); - AppendDeltasRes res{true, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); -} + storage_->last_commit_timestamp_ = max_commit_timestamp; -void Storage::ReplicationServer::SnapshotHandler(slk::Reader *req_reader, slk::Builder *res_builder) { - SnapshotReq req; - slk::Load(&req, req_reader); - - replication::Decoder decoder(req_reader); - - utils::EnsureDirOrDie(storage_->snapshot_directory_); - - const auto maybe_snapshot_path = decoder.ReadFile(storage_->snapshot_directory_); - MG_ASSERT(maybe_snapshot_path, "Failed to load snapshot!"); - spdlog::info("Received snapshot saved to {}", *maybe_snapshot_path); - - { - std::unique_lock storage_guard(storage_->main_lock_); - // Clear the database - storage_->vertices_.clear(); - storage_->edges_.clear(); - - storage_->constraints_ = Constraints(); - storage_->indices_.label_index = LabelIndex(&storage_->indices_, &storage_->constraints_, storage_->config_.items); - storage_->indices_.label_property_index = - LabelPropertyIndex(&storage_->indices_, &storage_->constraints_, storage_->config_.items); - try { - spdlog::debug("Loading snapshot"); - auto recovered_snapshot = durability::LoadSnapshot(*maybe_snapshot_path, &storage_->vertices_, &storage_->edges_, - &storage_->epoch_history_, &storage_->name_id_mapper_, - &storage_->edge_count_, storage_->config_.items); - spdlog::debug("Snapshot loaded successfully"); - // If this step is present it should always be the first step of - // the recovery so we use the UUID we read from snasphost - storage_->uuid_ = std::move(recovered_snapshot.snapshot_info.uuid); - storage_->epoch_id_ = std::move(recovered_snapshot.snapshot_info.epoch_id); - const auto &recovery_info = recovered_snapshot.recovery_info; - storage_->vertex_id_ = recovery_info.next_vertex_id; - storage_->edge_id_ = recovery_info.next_edge_id; - storage_->timestamp_ = std::max(storage_->timestamp_, recovery_info.next_timestamp); - storage_->commit_log_.emplace(storage_->timestamp_); - - durability::RecoverIndicesAndConstraints(recovered_snapshot.indices_constraints, &storage_->indices_, - &storage_->constraints_, &storage_->vertices_); - } catch (const durability::RecoveryFailure &e) { - LOG_FATAL("Couldn't load the snapshot because of: {}", e.what()); - } - } - - SnapshotRes res{true, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); - - // Delete other durability files - auto snapshot_files = durability::GetSnapshotFiles(storage_->snapshot_directory_, storage_->uuid_); - for (const auto &[path, uuid, _] : snapshot_files) { - if (path != *maybe_snapshot_path) { - storage_->file_retainer_.DeleteFile(path); - } - } - - auto wal_files = durability::GetWalFiles(storage_->wal_directory_, storage_->uuid_); - if (wal_files) { - for (const auto &wal_file : *wal_files) { - storage_->file_retainer_.DeleteFile(wal_file.path); - } - - storage_->wal_file_.reset(); - } -} - -void Storage::ReplicationServer::WalFilesHandler(slk::Reader *req_reader, slk::Builder *res_builder) { - WalFilesReq req; - slk::Load(&req, req_reader); - - const auto wal_file_number = req.file_number; - spdlog::debug("Received WAL files: {}", wal_file_number); - - replication::Decoder decoder(req_reader); - - utils::EnsureDirOrDie(storage_->wal_directory_); - - { - std::unique_lock storage_guard(storage_->main_lock_); - durability::RecoveredIndicesAndConstraints indices_constraints; - auto [wal_info, path] = LoadWal(&decoder, &indices_constraints); - if (wal_info.seq_num == 0) { - storage_->uuid_ = wal_info.uuid; - } - - // Check the seq number of the first wal file to see if it's the - // finalized form of the current wal on replica - if (storage_->wal_file_) { - if (storage_->wal_file_->SequenceNumber() == wal_info.seq_num && storage_->wal_file_->Path() != path) { - storage_->wal_file_->DeleteWal(); - } - storage_->wal_file_.reset(); - } - - for (auto i = 1; i < wal_file_number; ++i) { - LoadWal(&decoder, &indices_constraints); - } - - durability::RecoverIndicesAndConstraints(indices_constraints, &storage_->indices_, &storage_->constraints_, - &storage_->vertices_); - } - - WalFilesRes res{true, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); -} - -void Storage::ReplicationServer::CurrentWalHandler(slk::Reader *req_reader, slk::Builder *res_builder) { - CurrentWalReq req; - slk::Load(&req, req_reader); - - replication::Decoder decoder(req_reader); - - utils::EnsureDirOrDie(storage_->wal_directory_); - - { - std::unique_lock storage_guard(storage_->main_lock_); - durability::RecoveredIndicesAndConstraints indices_constraints; - auto [wal_info, path] = LoadWal(&decoder, &indices_constraints); - if (wal_info.seq_num == 0) { - storage_->uuid_ = wal_info.uuid; - } - - if (storage_->wal_file_ && storage_->wal_file_->SequenceNumber() == wal_info.seq_num && - storage_->wal_file_->Path() != path) { - // Delete the old wal file - storage_->file_retainer_.DeleteFile(storage_->wal_file_->Path()); - } - MG_ASSERT(storage_->config_.durability.snapshot_wal_mode == - Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL); - storage_->wal_file_.emplace(std::move(path), storage_->config_.items, &storage_->name_id_mapper_, wal_info.seq_num, - wal_info.from_timestamp, wal_info.to_timestamp, wal_info.num_deltas, - &storage_->file_retainer_); - durability::RecoverIndicesAndConstraints(indices_constraints, &storage_->indices_, &storage_->constraints_, - &storage_->vertices_); - } - - CurrentWalRes res{true, storage_->last_commit_timestamp_.load()}; - slk::Save(res, res_builder); -} - -std::pair Storage::ReplicationServer::LoadWal( - replication::Decoder *decoder, durability::RecoveredIndicesAndConstraints *indices_constraints) { - auto maybe_wal_path = decoder->ReadFile(storage_->wal_directory_, "_MAIN"); - MG_ASSERT(maybe_wal_path, "Failed to load WAL!"); - spdlog::trace("Received WAL saved to {}", *maybe_wal_path); - try { - auto wal_info = durability::ReadWalInfo(*maybe_wal_path); - if (wal_info.epoch_id != storage_->epoch_id_) { - storage_->epoch_history_.emplace_back(wal_info.epoch_id, storage_->last_commit_timestamp_); - storage_->epoch_id_ = std::move(wal_info.epoch_id); - } - const auto last_loaded_timestamp = - storage_->timestamp_ == kTimestampInitialId ? std::nullopt : std::optional{storage_->timestamp_ - 1}; - auto info = durability::LoadWal(*maybe_wal_path, indices_constraints, last_loaded_timestamp, &storage_->vertices_, - &storage_->edges_, &storage_->name_id_mapper_, &storage_->edge_count_, - storage_->config_.items); - storage_->vertex_id_ = std::max(storage_->vertex_id_.load(), info.next_vertex_id); - storage_->edge_id_ = std::max(storage_->edge_id_.load(), info.next_edge_id); - storage_->timestamp_ = std::max(storage_->timestamp_, info.next_timestamp); - storage_->commit_log_.emplace(storage_->timestamp_); - if (info.last_commit_timestamp) { - storage_->last_commit_timestamp_ = *info.last_commit_timestamp; - } - spdlog::debug("{} loaded successfully", *maybe_wal_path); - return {std::move(wal_info), std::move(*maybe_wal_path)}; - } catch (const durability::RecoveryFailure &e) { - LOG_FATAL("Couldn't recover WAL deltas from {} because of: {}", *maybe_wal_path, e.what()); - } -} - -Storage::ReplicationServer::~ReplicationServer() { - if (rpc_server_) { - rpc_server_->Shutdown(); - rpc_server_->AwaitShutdown(); - } + return applied_deltas; } } // namespace storage diff --git a/src/storage/v2/replication/replication_server.hpp b/src/storage/v2/replication/replication_server.hpp index bff872f80..2d0846ab6 100644 --- a/src/storage/v2/replication/replication_server.hpp +++ b/src/storage/v2/replication/replication_server.hpp @@ -23,8 +23,8 @@ class Storage::ReplicationServer { void WalFilesHandler(slk::Reader *req_reader, slk::Builder *res_builder); void CurrentWalHandler(slk::Reader *req_reader, slk::Builder *res_builder); - std::pair LoadWal( - replication::Decoder *decoder, durability::RecoveredIndicesAndConstraints *indices_constraints); + void LoadWal(replication::Decoder *decoder); + uint64_t ReadAndApplyDelta(durability::BaseDecoder *decoder); std::optional rpc_server_context_; std::optional rpc_server_; diff --git a/src/storage/v2/storage.cpp b/src/storage/v2/storage.cpp index ba7dc5d8d..fae4c710b 100644 --- a/src/storage/v2/storage.cpp +++ b/src/storage/v2/storage.cpp @@ -13,12 +13,15 @@ #include "storage/v2/durability/paths.hpp" #include "storage/v2/durability/snapshot.hpp" #include "storage/v2/durability/wal.hpp" +#include "storage/v2/edge_accessor.hpp" #include "storage/v2/indices.hpp" #include "storage/v2/mvcc.hpp" #include "storage/v2/replication/config.hpp" #include "storage/v2/transaction.hpp" +#include "storage/v2/vertex_accessor.hpp" #include "utils/file.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" #include "utils/rw_lock.hpp" #include "utils/spin_lock.hpp" #include "utils/stat.hpp" @@ -31,6 +34,8 @@ namespace storage { +using OOMExceptionEnabler = utils::MemoryTracker::OutOfMemoryExceptionEnabler; + namespace { [[maybe_unused]] constexpr uint16_t kEpochHistoryRetention = 1000; } // namespace @@ -281,6 +286,7 @@ bool VerticesIterable::Iterator::operator==(const Iterator &other) const { Storage::Storage(Config config) : indices_(&constraints_, config.items), + isolation_level_(config.transaction.isolation_level), config_(config), snapshot_directory_(config_.durability.storage_directory / durability::kSnapshotDirectory), wal_directory_(config_.durability.storage_directory / durability::kWalDirectory), @@ -358,7 +364,7 @@ Storage::Storage(Config config) snapshot_runner_.Run("Snapshot", config_.durability.snapshot_interval, [this] { this->CreateSnapshot(); }); } if (config_.gc.type == Config::Gc::Type::PERIODIC) { - gc_runner_.Run("Storage GC", config_.gc.interval, [this] { this->CollectGarbage(); }); + gc_runner_.Run("Storage GC", config_.gc.interval, [this] { this->CollectGarbage(); }); } if (timestamp_ == kTimestampInitialId) { @@ -389,32 +395,38 @@ Storage::~Storage() { } } -Storage::Accessor::Accessor(Storage *storage) +Storage::Accessor::Accessor(Storage *storage, IsolationLevel isolation_level) : storage_(storage), // The lock must be acquired before creating the transaction object to // prevent freshly created transactions from dangling in an active state // during exclusive operations. storage_guard_(storage_->main_lock_), - transaction_(storage->CreateTransaction()), + transaction_(storage->CreateTransaction(isolation_level)), is_transaction_active_(true), config_(storage->config_.items) {} Storage::Accessor::Accessor(Accessor &&other) noexcept : storage_(other.storage_), + storage_guard_(std::move(other.storage_guard_)), transaction_(std::move(other.transaction_)), + commit_timestamp_(other.commit_timestamp_), is_transaction_active_(other.is_transaction_active_), config_(other.config_) { // Don't allow the other accessor to abort our transaction in destructor. other.is_transaction_active_ = false; + other.commit_timestamp_.reset(); } Storage::Accessor::~Accessor() { if (is_transaction_active_) { Abort(); } + + FinalizeTransaction(); } VertexAccessor Storage::Accessor::CreateVertex() { + OOMExceptionEnabler oom_exception; auto gid = storage_->vertex_id_.fetch_add(1, std::memory_order_acq_rel); auto acc = storage_->vertices_.access(); auto delta = CreateDeleteObjectDelta(&transaction_); @@ -426,6 +438,7 @@ VertexAccessor Storage::Accessor::CreateVertex() { } VertexAccessor Storage::Accessor::CreateVertex(storage::Gid gid) { + OOMExceptionEnabler oom_exception; // NOTE: When we update the next `vertex_id_` here we perform a RMW // (read-modify-write) operation that ISN'T atomic! But, that isn't an issue // because this function is only called from the replication delta applier @@ -450,31 +463,37 @@ std::optional Storage::Accessor::FindVertex(Gid gid, View view) return VertexAccessor::Create(&*it, &transaction_, &storage_->indices_, &storage_->constraints_, config_, view); } -Result Storage::Accessor::DeleteVertex(VertexAccessor *vertex) { +Result> Storage::Accessor::DeleteVertex(VertexAccessor *vertex) { MG_ASSERT(vertex->transaction_ == &transaction_, "VertexAccessor must be from the same transaction as the storage " "accessor when deleting a vertex!"); - auto vertex_ptr = vertex->vertex_; + auto *vertex_ptr = vertex->vertex_; std::lock_guard guard(vertex_ptr->lock); if (!PrepareForWrite(&transaction_, vertex_ptr)) return Error::SERIALIZATION_ERROR; - if (vertex_ptr->deleted) return false; + if (vertex_ptr->deleted) { + return std::optional{}; + } if (!vertex_ptr->in_edges.empty() || !vertex_ptr->out_edges.empty()) return Error::VERTEX_HAS_EDGES; CreateAndLinkDelta(&transaction_, vertex_ptr, Delta::RecreateObjectTag()); vertex_ptr->deleted = true; - return true; + return std::make_optional(vertex_ptr, &transaction_, &storage_->indices_, &storage_->constraints_, + config_, true); } -Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { +Result>>> Storage::Accessor::DetachDeleteVertex( + VertexAccessor *vertex) { + using ReturnType = std::pair>; + MG_ASSERT(vertex->transaction_ == &transaction_, "VertexAccessor must be from the same transaction as the storage " "accessor when deleting a vertex!"); - auto vertex_ptr = vertex->vertex_; + auto *vertex_ptr = vertex->vertex_; std::vector> in_edges; std::vector> out_edges; @@ -484,12 +503,13 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { if (!PrepareForWrite(&transaction_, vertex_ptr)) return Error::SERIALIZATION_ERROR; - if (vertex_ptr->deleted) return false; + if (vertex_ptr->deleted) return std::optional{}; in_edges = vertex_ptr->in_edges; out_edges = vertex_ptr->out_edges; } + std::vector deleted_edges; for (const auto &item : in_edges) { auto [edge_type, from_vertex, edge] = item; EdgeAccessor e(edge, edge_type, from_vertex, vertex_ptr, &transaction_, &storage_->indices_, @@ -497,7 +517,11 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { auto ret = DeleteEdge(&e); if (ret.HasError()) { MG_ASSERT(ret.GetError() == Error::SERIALIZATION_ERROR, "Invalid database state!"); - return ret; + return ret.GetError(); + } + + if (ret.GetValue()) { + deleted_edges.push_back(*ret.GetValue()); } } for (const auto &item : out_edges) { @@ -507,7 +531,11 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { auto ret = DeleteEdge(&e); if (ret.HasError()) { MG_ASSERT(ret.GetError() == Error::SERIALIZATION_ERROR, "Invalid database state!"); - return ret; + return ret.GetError(); + } + + if (ret.GetValue()) { + deleted_edges.push_back(*ret.GetValue()); } } @@ -524,10 +552,13 @@ Result Storage::Accessor::DetachDeleteVertex(VertexAccessor *vertex) { CreateAndLinkDelta(&transaction_, vertex_ptr, Delta::RecreateObjectTag()); vertex_ptr->deleted = true; - return true; + return std::make_optional( + VertexAccessor{vertex_ptr, &transaction_, &storage_->indices_, &storage_->constraints_, config_, true}, + std::move(deleted_edges)); } Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type) { + OOMExceptionEnabler oom_exception; MG_ASSERT(from->transaction_ == to->transaction_, "VertexAccessors must be from the same transaction when creating " "an edge!"); @@ -587,6 +618,7 @@ Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexA Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type, storage::Gid gid) { + OOMExceptionEnabler oom_exception; MG_ASSERT(from->transaction_ == to->transaction_, "VertexAccessors must be from the same transaction when creating " "an edge!"); @@ -652,7 +684,7 @@ Result Storage::Accessor::CreateEdge(VertexAccessor *from, VertexA &storage_->constraints_, config_); } -Result Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { +Result> Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { MG_ASSERT(edge->transaction_ == &transaction_, "EdgeAccessor must be from the same transaction as the storage " "accessor when deleting an edge!"); @@ -666,11 +698,11 @@ Result Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { if (!PrepareForWrite(&transaction_, edge_ptr)) return Error::SERIALIZATION_ERROR; - if (edge_ptr->deleted) return false; + if (edge_ptr->deleted) return std::optional{}; } - auto from_vertex = edge->from_vertex_; - auto to_vertex = edge->to_vertex_; + auto *from_vertex = edge->from_vertex_; + auto *to_vertex = edge->to_vertex_; // Obtain the locks by `gid` order to avoid lock cycles. std::unique_lock guard_from(from_vertex->lock, std::defer_lock); @@ -716,12 +748,12 @@ Result Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { MG_ASSERT((op1 && op2) || (!op1 && !op2), "Invalid database state!"); if (!op1 && !op2) { // The edge is already deleted. - return false; + return std::optional{}; } } if (config_.properties_on_edges) { - auto edge_ptr = edge_ref.ptr; + auto *edge_ptr = edge_ref.ptr; CreateAndLinkDelta(&transaction_, edge_ptr, Delta::RecreateObjectTag()); edge_ptr->deleted = true; } @@ -732,7 +764,8 @@ Result Storage::Accessor::DeleteEdge(EdgeAccessor *edge) { // Decrement edge count. storage_->edge_count_.fetch_add(-1, std::memory_order_acq_rel); - return true; + return std::make_optional(edge_ref, edge_type, from_vertex, to_vertex, &transaction_, + &storage_->indices_, &storage_->constraints_, config_, true); } const std::string &Storage::Accessor::LabelToName(LabelId label) const { return storage_->LabelToName(label); } @@ -786,11 +819,10 @@ utils::BasicResult Storage::Accessor::Commit( // Save these so we can mark them used in the commit log. uint64_t start_timestamp = transaction_.start_timestamp; - uint64_t commit_timestamp; { std::unique_lock engine_guard(storage_->engine_lock_); - commit_timestamp = storage_->CommitTimestamp(desired_commit_timestamp); + commit_timestamp_.emplace(storage_->CommitTimestamp(desired_commit_timestamp)); // Before committing and validating vertices against unique constraints, // we have to update unique constraints with the vertices that are going @@ -814,7 +846,7 @@ utils::BasicResult Storage::Accessor::Commit( // No need to take any locks here because we modified this vertex and no // one else can touch it until we commit. unique_constraint_violation = - storage_->constraints_.unique_constraints.Validate(*prev.vertex, transaction_, commit_timestamp); + storage_->constraints_.unique_constraints.Validate(*prev.vertex, transaction_, *commit_timestamp_); if (unique_constraint_violation) { break; } @@ -831,7 +863,7 @@ utils::BasicResult Storage::Accessor::Commit( // Replica can log only the write transaction received from Main // so the Wal files are consistent if (storage_->replication_role_ == ReplicationRole::MAIN || desired_commit_timestamp.has_value()) { - storage_->AppendToWal(transaction_, commit_timestamp); + storage_->AppendToWal(transaction_, *commit_timestamp_); } // Take committed_transactions lock while holding the engine lock to @@ -841,27 +873,24 @@ utils::BasicResult Storage::Accessor::Commit( // TODO: release lock, and update all deltas to have a local copy // of the commit timestamp MG_ASSERT(transaction_.commit_timestamp != nullptr, "Invalid database state!"); - transaction_.commit_timestamp->store(commit_timestamp, std::memory_order_release); + transaction_.commit_timestamp->store(*commit_timestamp_, std::memory_order_release); // Replica can only update the last commit timestamp with // the commits received from main. if (storage_->replication_role_ == ReplicationRole::MAIN || desired_commit_timestamp.has_value()) { // Update the last commit timestamp - storage_->last_commit_timestamp_.store(commit_timestamp); + storage_->last_commit_timestamp_.store(*commit_timestamp_); } // Release engine lock because we don't have to hold it anymore // and emplace back could take a long time. engine_guard.unlock(); - committed_transactions.emplace_back(std::move(transaction_)); }); storage_->commit_log_->MarkFinished(start_timestamp); - storage_->commit_log_->MarkFinished(commit_timestamp); } } if (unique_constraint_violation) { Abort(); - storage_->commit_log_->MarkFinished(commit_timestamp); return *unique_constraint_violation; } } @@ -1034,6 +1063,15 @@ void Storage::Accessor::Abort() { is_transaction_active_ = false; } +void Storage::Accessor::FinalizeTransaction() { + if (commit_timestamp_) { + storage_->commit_log_->MarkFinished(*commit_timestamp_); + storage_->committed_transactions_.WithLock( + [&](auto &committed_transactions) { committed_transactions.emplace_back(std::move(transaction_)); }); + commit_timestamp_.reset(); + } +} + const std::string &Storage::LabelToName(LabelId label) const { return name_id_mapper_.IdToName(label.AsUint()); } const std::string &Storage::PropertyToName(PropertyId property) const { @@ -1190,7 +1228,7 @@ VerticesIterable Storage::Accessor::Vertices(LabelId label, PropertyId property, storage_->indices_.label_property_index.Vertices(label, property, lower_bound, upper_bound, view, &transaction_)); } -Transaction Storage::CreateTransaction() { +Transaction Storage::CreateTransaction(IsolationLevel isolation_level) { // We acquire the transaction engine lock here because we access (and // modify) the transaction engine variables (`transaction_id` and // `timestamp`) below. @@ -1211,14 +1249,32 @@ Transaction Storage::CreateTransaction() { start_timestamp = timestamp_++; } } - return {transaction_id, start_timestamp}; + return {transaction_id, start_timestamp, isolation_level}; } +template void Storage::CollectGarbage() { - // Because the garbage collector iterates through the indices and constraints - // to clean them up, it must take the main lock for reading to make sure that - // the indices and constraints aren't concurrently being modified. - std::shared_lock main_guard(main_lock_); + if constexpr (force) { + // We take the unique lock on the main storage lock so we can forcefully clean + // everything we can + if (!main_lock_.try_lock()) { + CollectGarbage(); + return; + } + } else { + // Because the garbage collector iterates through the indices and constraints + // to clean them up, it must take the main lock for reading to make sure that + // the indices and constraints aren't concurrently being modified. + main_lock_.lock_shared(); + } + + utils::OnScopeExit lock_releaser{[&] { + if constexpr (force) { + main_lock_.unlock(); + } else { + main_lock_.unlock_shared(); + } + }}; // Garbage collection must be performed in two phases. In the first phase, // deltas that won't be applied by any transaction anymore are unlinked from @@ -1411,19 +1467,32 @@ void Storage::CollectGarbage() { } } - while (true) { - auto garbage_undo_buffers_ptr = garbage_undo_buffers_.Lock(); - if (garbage_undo_buffers_ptr->empty() || garbage_undo_buffers_ptr->front().first > oldest_active_start_timestamp) { - break; + garbage_undo_buffers_.WithLock([&](auto &undo_buffers) { + // if force is set to true we can simply delete all the leftover undos because + // no transaction is active + if constexpr (force) { + undo_buffers.clear(); + } else { + while (!undo_buffers.empty() && undo_buffers.front().first <= oldest_active_start_timestamp) { + undo_buffers.pop_front(); + } } - garbage_undo_buffers_ptr->pop_front(); - } + }); { auto vertex_acc = vertices_.access(); - while (!garbage_vertices_.empty() && garbage_vertices_.front().first < oldest_active_start_timestamp) { - MG_ASSERT(vertex_acc.remove(garbage_vertices_.front().second), "Invalid database state!"); - garbage_vertices_.pop_front(); + if constexpr (force) { + // if force is set to true, then we have unique_lock and no transactions are active + // so we can clean all of the deleted vertices + while (!garbage_vertices_.empty()) { + MG_ASSERT(vertex_acc.remove(garbage_vertices_.front().second), "Invalid database state!"); + garbage_vertices_.pop_front(); + } + } else { + while (!garbage_vertices_.empty() && garbage_vertices_.front().first < oldest_active_start_timestamp) { + MG_ASSERT(vertex_acc.remove(garbage_vertices_.front().second), "Invalid database state!"); + garbage_vertices_.pop_front(); + } } } { @@ -1434,6 +1503,10 @@ void Storage::CollectGarbage() { } } +// tell the linker he can find the CollectGarbage definitions here +template void Storage::CollectGarbage(); +template void Storage::CollectGarbage(); + bool Storage::InitializeWalFile() { if (config_.durability.snapshot_wal_mode != Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT_WITH_WAL) return false; @@ -1664,7 +1737,7 @@ void Storage::CreateSnapshot() { std::shared_lock storage_guard(main_lock_); // Create the transaction used to create the snapshot. - auto transaction = CreateTransaction(); + auto transaction = CreateTransaction(IsolationLevel::SNAPSHOT_ISOLATION); // Create snapshot. durability::CreateSnapshot(&transaction, snapshot_directory_, wal_directory_, @@ -1695,6 +1768,16 @@ bool Storage::UnlockPath() { return true; } +void Storage::FreeMemory() { + CollectGarbage(); + + // SkipList is already threadsafe + vertices_.run_gc(); + edges_.run_gc(); + indices_.label_index.RunGC(); + indices_.label_property_index.RunGC(); +} + uint64_t Storage::CommitTimestamp(const std::optional desired_commit_timestamp) { if (!desired_commit_timestamp) { return timestamp_++; @@ -1812,4 +1895,9 @@ std::vector Storage::ReplicasInfo() { }); } +void Storage::SetIsolationLevel(IsolationLevel isolation_level) { + std::unique_lock main_guard{main_lock_}; + isolation_level_ = isolation_level; +} + } // namespace storage diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index 712519701..fcb13c349 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "io/network/endpoint.hpp" #include "storage/v2/commit_log.hpp" @@ -14,6 +15,7 @@ #include "storage/v2/edge.hpp" #include "storage/v2/edge_accessor.hpp" #include "storage/v2/indices.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/mvcc.hpp" #include "storage/v2/name_id_mapper.hpp" #include "storage/v2/result.hpp" @@ -21,6 +23,7 @@ #include "storage/v2/vertex.hpp" #include "storage/v2/vertex_accessor.hpp" #include "utils/file_locker.hpp" +#include "utils/on_scope_exit.hpp" #include "utils/rw_lock.hpp" #include "utils/scheduler.hpp" #include "utils/skip_list.hpp" @@ -182,7 +185,7 @@ class Storage final { private: friend class Storage; - explicit Accessor(Storage *storage); + explicit Accessor(Storage *storage, IsolationLevel isolation_level); public: Accessor(const Accessor &) = delete; @@ -248,17 +251,21 @@ class Storage final { return storage_->indices_.label_property_index.ApproximateVertexCount(label, property, lower, upper); } + /// @return Accessor to the deleted vertex if a deletion took place, std::nullopt otherwise /// @throw std::bad_alloc - Result DeleteVertex(VertexAccessor *vertex); + Result> DeleteVertex(VertexAccessor *vertex); + /// @return Accessor to the deleted vertex and deleted edges if a deletion took place, std::nullopt otherwise /// @throw std::bad_alloc - Result DetachDeleteVertex(VertexAccessor *vertex); + Result>>> DetachDeleteVertex( + VertexAccessor *vertex); /// @throw std::bad_alloc Result CreateEdge(VertexAccessor *from, VertexAccessor *to, EdgeTypeId edge_type); + /// Accessor to the deleted edge if a deletion took place, std::nullopt otherwise /// @throw std::bad_alloc - Result DeleteEdge(EdgeAccessor *edge); + Result> DeleteEdge(EdgeAccessor *edge); const std::string &LabelToName(LabelId label) const; const std::string &PropertyToName(PropertyId property) const; @@ -299,6 +306,8 @@ class Storage final { /// @throw std::bad_alloc void Abort(); + void FinalizeTransaction(); + private: /// @throw std::bad_alloc VertexAccessor CreateVertex(storage::Gid gid); @@ -309,11 +318,14 @@ class Storage final { Storage *storage_; std::shared_lock storage_guard_; Transaction transaction_; + std::optional commit_timestamp_; bool is_transaction_active_; Config::Items config_; }; - Accessor Access() { return Accessor{this}; } + Accessor Access(std::optional override_isolation_level = {}) { + return Accessor{this, override_isolation_level.value_or(isolation_level_)}; + } const std::string &LabelToName(LabelId label) const; const std::string &PropertyToName(PropertyId property) const; @@ -412,11 +424,25 @@ class Storage final { std::vector ReplicasInfo(); - private: - Transaction CreateTransaction(); + void FreeMemory(); + void SetIsolationLevel(IsolationLevel isolation_level); + + private: + Transaction CreateTransaction(IsolationLevel isolation_level); + + /// The force parameter determines the behaviour of the garbage collector. + /// If it's set to true, it will behave as a global operation, i.e. it can't + /// be part of a transaction, and no other transaction can be active at the same time. + /// This allows it to delete immediately vertices without worrying that some other + /// transaction is possibly using it. If there are active transactions when this method + /// is called with force set to true, it will fallback to the same method with the force + /// set to false. + /// If it's set to false, it will execute in parallel with other transactions, ensuring + /// that no object in use can be deleted. /// @throw std::system_error /// @throw std::bad_alloc + template void CollectGarbage(); bool InitializeWalFile(); @@ -464,6 +490,7 @@ class Storage final { std::optional commit_log_; utils::Synchronized, utils::SpinLock> committed_transactions_; + IsolationLevel isolation_level_; Config config_; utils::Scheduler gc_runner_; diff --git a/src/storage/v2/transaction.hpp b/src/storage/v2/transaction.hpp index 979b26da8..dbf029dba 100644 --- a/src/storage/v2/transaction.hpp +++ b/src/storage/v2/transaction.hpp @@ -9,6 +9,7 @@ #include "storage/v2/delta.hpp" #include "storage/v2/edge.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/property_value.hpp" #include "storage/v2/vertex.hpp" #include "storage/v2/view.hpp" @@ -19,8 +20,12 @@ const uint64_t kTimestampInitialId = 0; const uint64_t kTransactionInitialId = 1ULL << 63U; struct Transaction { - Transaction(uint64_t transaction_id, uint64_t start_timestamp) - : transaction_id(transaction_id), start_timestamp(start_timestamp), command_id(0), must_abort(false) {} + Transaction(uint64_t transaction_id, uint64_t start_timestamp, IsolationLevel isolation_level) + : transaction_id(transaction_id), + start_timestamp(start_timestamp), + command_id(0), + must_abort(false), + isolation_level(isolation_level) {} Transaction(Transaction &&other) noexcept : transaction_id(other.transaction_id), @@ -28,7 +33,8 @@ struct Transaction { commit_timestamp(std::move(other.commit_timestamp)), command_id(other.command_id), deltas(std::move(other.deltas)), - must_abort(other.must_abort) {} + must_abort(other.must_abort), + isolation_level(other.isolation_level) {} Transaction(const Transaction &) = delete; Transaction &operator=(const Transaction &) = delete; @@ -52,6 +58,7 @@ struct Transaction { uint64_t command_id; std::list deltas; bool must_abort; + IsolationLevel isolation_level; }; inline bool operator==(const Transaction &first, const Transaction &second) { diff --git a/src/storage/v2/vertex_accessor.cpp b/src/storage/v2/vertex_accessor.cpp index d06d0fa94..4b749e9ab 100644 --- a/src/storage/v2/vertex_accessor.cpp +++ b/src/storage/v2/vertex_accessor.cpp @@ -6,20 +6,24 @@ #include "storage/v2/id_types.hpp" #include "storage/v2/indices.hpp" #include "storage/v2/mvcc.hpp" +#include "storage/v2/property_value.hpp" #include "utils/logging.hpp" +#include "utils/memory_tracker.hpp" namespace storage { -std::optional VertexAccessor::Create(Vertex *vertex, Transaction *transaction, Indices *indices, - Constraints *constraints, Config::Items config, View view) { - bool is_visible = true; +namespace detail { +namespace { +std::pair IsVisible(Vertex *vertex, Transaction *transaction, View view) { + bool exists = true; + bool deleted = false; Delta *delta = nullptr; { std::lock_guard guard(vertex->lock); - is_visible = !vertex->deleted; + deleted = vertex->deleted; delta = vertex->delta; } - ApplyDeltasForRead(transaction, delta, view, [&is_visible](const Delta &delta) { + ApplyDeltasForRead(transaction, delta, view, [&](const Delta &delta) { switch (delta.action) { case Delta::Action::ADD_LABEL: case Delta::Action::REMOVE_LABEL: @@ -30,20 +34,37 @@ std::optional VertexAccessor::Create(Vertex *vertex, Transaction case Delta::Action::REMOVE_OUT_EDGE: break; case Delta::Action::RECREATE_OBJECT: { - is_visible = true; + deleted = false; break; } case Delta::Action::DELETE_OBJECT: { - is_visible = false; + exists = false; break; } } }); - if (!is_visible) return std::nullopt; + + return {exists, deleted}; +} +} // namespace +} // namespace detail + +std::optional VertexAccessor::Create(Vertex *vertex, Transaction *transaction, Indices *indices, + Constraints *constraints, Config::Items config, View view) { + if (const auto [exists, deleted] = detail::IsVisible(vertex, transaction, view); !exists || deleted) { + return std::nullopt; + } + return VertexAccessor{vertex, transaction, indices, constraints, config}; } +bool VertexAccessor::IsVisible(View view) const { + const auto [exists, deleted] = detail::IsVisible(vertex_, transaction_, view); + return exists && (for_deleted_ || !deleted); +} + Result VertexAccessor::AddLabel(LabelId label) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; std::lock_guard guard(vertex_->lock); if (!PrepareForWrite(transaction_, vertex_)) return Error::SERIALIZATION_ERROR; @@ -122,7 +143,7 @@ Result VertexAccessor::HasLabel(LabelId label, View view) const { } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return has_label; } @@ -171,11 +192,12 @@ Result> VertexAccessor::Labels(View view) const { } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(labels); } -Result VertexAccessor::SetProperty(PropertyId property, const PropertyValue &value) { +Result VertexAccessor::SetProperty(PropertyId property, const PropertyValue &value) { + utils::MemoryTracker::OutOfMemoryExceptionEnabler oom_exception; std::lock_guard guard(vertex_->lock); if (!PrepareForWrite(transaction_, vertex_)) return Error::SERIALIZATION_ERROR; @@ -183,22 +205,21 @@ Result VertexAccessor::SetProperty(PropertyId property, const PropertyValu if (vertex_->deleted) return Error::DELETED_OBJECT; auto current_value = vertex_->properties.GetProperty(property); - bool existed = !current_value.IsNull(); // We could skip setting the value if the previous one is the same to the new // one. This would save some memory as a delta would not be created as well as // avoid copying the value. The reason we are not doing that is because the // current code always follows the logical pattern of "create a delta" and // "modify in-place". Additionally, the created delta will make other // transactions get a SERIALIZATION_ERROR. - CreateAndLinkDelta(transaction_, vertex_, Delta::SetPropertyTag(), property, std::move(current_value)); + CreateAndLinkDelta(transaction_, vertex_, Delta::SetPropertyTag(), property, current_value); vertex_->properties.SetProperty(property, value); UpdateOnSetProperty(indices_, property, value, vertex_, *transaction_); - return !existed; + return std::move(current_value); } -Result VertexAccessor::ClearProperties() { +Result> VertexAccessor::ClearProperties() { std::lock_guard guard(vertex_->lock); if (!PrepareForWrite(transaction_, vertex_)) return Error::SERIALIZATION_ERROR; @@ -206,7 +227,6 @@ Result VertexAccessor::ClearProperties() { if (vertex_->deleted) return Error::DELETED_OBJECT; auto properties = vertex_->properties.Properties(); - bool removed = !properties.empty(); for (const auto &property : properties) { CreateAndLinkDelta(transaction_, vertex_, Delta::SetPropertyTag(), property.first, property.second); UpdateOnSetProperty(indices_, property.first, PropertyValue(), vertex_, *transaction_); @@ -214,7 +234,7 @@ Result VertexAccessor::ClearProperties() { vertex_->properties.ClearProperties(); - return removed; + return std::move(properties); } Result VertexAccessor::GetProperty(PropertyId property, View view) const { @@ -254,7 +274,7 @@ Result VertexAccessor::GetProperty(PropertyId property, View view } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(value); } @@ -304,7 +324,7 @@ Result> VertexAccessor::Properties(View view } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return std::move(properties); } @@ -502,7 +522,7 @@ Result VertexAccessor::InDegree(View view) const { } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return degree; } @@ -540,7 +560,7 @@ Result VertexAccessor::OutDegree(View view) const { } }); if (!exists) return Error::NONEXISTENT_OBJECT; - if (deleted) return Error::DELETED_OBJECT; + if (!for_deleted_ && deleted) return Error::DELETED_OBJECT; return degree; } diff --git a/src/storage/v2/vertex_accessor.hpp b/src/storage/v2/vertex_accessor.hpp index 1c3e4d609..a24f02a23 100644 --- a/src/storage/v2/vertex_accessor.hpp +++ b/src/storage/v2/vertex_accessor.hpp @@ -22,12 +22,20 @@ class VertexAccessor final { public: VertexAccessor(Vertex *vertex, Transaction *transaction, Indices *indices, Constraints *constraints, - Config::Items config) - : vertex_(vertex), transaction_(transaction), indices_(indices), constraints_(constraints), config_(config) {} + Config::Items config, bool for_deleted = false) + : vertex_(vertex), + transaction_(transaction), + indices_(indices), + constraints_(constraints), + config_(config), + for_deleted_(for_deleted) {} static std::optional Create(Vertex *vertex, Transaction *transaction, Indices *indices, Constraints *constraints, Config::Items config, View view); + /// @return true if the object is visible from the current transaction + bool IsVisible(View view) const; + /// Add a label and return `true` if insertion took place. /// `false` is returned if the label already existed. /// @throw std::bad_alloc @@ -45,15 +53,13 @@ class VertexAccessor final { /// std::vector::max_size(). Result> Labels(View view) const; - /// Set a property value and return `true` if insertion took place. - /// `false` is returned if assignment took place. + /// Set a property value and return the old value. /// @throw std::bad_alloc - Result SetProperty(PropertyId property, const PropertyValue &value); + Result SetProperty(PropertyId property, const PropertyValue &value); - /// Remove all properties and return `true` if any removal took place. - /// `false` is returned if there were no properties to remove. + /// Remove all properties and return the values of the removed properties. /// @throw std::bad_alloc - Result ClearProperties(); + Result> ClearProperties(); /// @throw std::bad_alloc Result GetProperty(PropertyId property, View view) const; @@ -90,6 +96,15 @@ class VertexAccessor final { Indices *indices_; Constraints *constraints_; Config::Items config_; + + // if the accessor was created for a deleted vertex. + // Accessor behaves differently for some methods based on this + // flag. + // E.g. If this field is set to true, GetProperty will return the property of the node + // even though the node is deleted. + // All the write operations, and operators used for traversal (e.g. InEdges) will still + // return an error if it's called for a deleted vertex. + bool for_deleted_{false}; }; } // namespace storage diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index b87a6d291..bd806e870 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -1,12 +1,20 @@ set(utils_src_files + async_timer.cpp event_counter.cpp + csv_parsing.cpp file.cpp file_locker.cpp memory.cpp + memory_tracker.cpp + readable_size.cpp signals.cpp + sysinfo/memory.cpp thread.cpp thread_pool.cpp uuid.cpp) add_library(mg-utils STATIC ${utils_src_files}) -target_link_libraries(mg-utils stdc++fs Threads::Threads spdlog fmt gflags uuid) +target_link_libraries(mg-utils stdc++fs Threads::Threads spdlog fmt gflags uuid rt) + +add_library(mg-new-delete STATIC new_delete.cpp) +target_link_libraries(mg-new-delete jemalloc fmt) diff --git a/src/utils/async_timer.cpp b/src/utils/async_timer.cpp new file mode 100644 index 000000000..afe90ce7f --- /dev/null +++ b/src/utils/async_timer.cpp @@ -0,0 +1,187 @@ +#include "utils/async_timer.hpp" + +#include + +#include +#include +#include +#include +#include + +#include "utils/skip_list.hpp" +#include "utils/spin_lock.hpp" +#include "utils/synchronized.hpp" + +namespace { + +constexpr uint64_t kInvalidFlagId = 0U; +// std::numeric_limits::max() cannot be represented precisely as a double, so the next smallest value is the +// maximum number of seconds the timer can be used with +const double max_seconds_as_double = std::nexttoward(std::numeric_limits::max(), 0.0); + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +std::atomic expiration_flag_counter{kInvalidFlagId + 1U}; + +struct ExpirationFlagInfo { + uint64_t id{0U}; + std::weak_ptr> flag{}; +}; + +bool operator==(const ExpirationFlagInfo &lhs, const ExpirationFlagInfo &rhs) { return lhs.id == rhs.id; } +bool operator<(const ExpirationFlagInfo &lhs, const ExpirationFlagInfo &rhs) { return lhs.id < rhs.id; } +bool operator==(const ExpirationFlagInfo &flag_info, const uint64_t id) { return flag_info.id == id; } +bool operator<(const ExpirationFlagInfo &flag_info, const uint64_t id) { return flag_info.id < id; } + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) +utils::SkipList expiration_flags{}; + +uint64_t AddFlag(std::weak_ptr> flag) { + const auto id = expiration_flag_counter.fetch_add(1, std::memory_order_relaxed); + expiration_flags.access().insert({id, std::move(flag)}); + return id; +} + +void EraseFlag(uint64_t flag_id) { expiration_flags.access().remove(flag_id); } + +std::weak_ptr> GetFlag(uint64_t flag_id) { + const auto flag_accessor = expiration_flags.access(); + const auto it = flag_accessor.find(flag_id); + if (it == flag_accessor.end()) { + return {}; + } + + return it->flag; +} + +void MarkDone(const uint64_t flag_id) { + const auto weak_flag = GetFlag(flag_id); + if (weak_flag.expired()) { + return; + } + auto flag = weak_flag.lock(); + if (flag != nullptr) { + flag->store(true, std::memory_order_relaxed); + } +} +} // namespace + +namespace utils { + +namespace { +struct ThreadInfo { + pid_t thread_id; + std::atomic setup_done{false}; +}; + +void *TimerBackgroundWorker(void *args) { + auto *thread_info = static_cast(args); + thread_info->thread_id = syscall(SYS_gettid); + thread_info->setup_done.store(true, std::memory_order_release); + + sigset_t ss; + sigemptyset(&ss); + sigaddset(&ss, SIGTIMER); + sigprocmask(SIG_BLOCK, &ss, nullptr); + + while (true) { + siginfo_t si; + int result = sigwaitinfo(&ss, &si); + + if (result <= 0) { + continue; + } + + if (si.si_code == SI_TIMER) { + auto flag_id = kInvalidFlagId; + std::memcpy(&flag_id, &si.si_value.sival_ptr, sizeof(flag_id)); + MarkDone(flag_id); + } else if (si.si_code == SI_TKILL) { + pthread_exit(nullptr); + } + } +} +} // namespace + +AsyncTimer::AsyncTimer() : flag_id_{kInvalidFlagId} {}; + +AsyncTimer::AsyncTimer(double seconds) + : expiration_flag_{std::make_shared>(false)}, flag_id_{kInvalidFlagId}, timer_id_{} { + MG_ASSERT(seconds <= max_seconds_as_double, + "The AsyncTimer cannot handle larger time values than {:f}, the specified value: {:f}", + max_seconds_as_double, seconds); + MG_ASSERT(seconds >= 0.0, "The AsyncTimer cannot handle negative time values: {:f}", seconds); + + static pthread_t background_timer_thread; + static ThreadInfo thread_info; + static std::once_flag timer_thread_setup_flag; + + std::call_once(timer_thread_setup_flag, [] { + pthread_create(&background_timer_thread, nullptr, TimerBackgroundWorker, &thread_info); + while (!thread_info.setup_done.load(std::memory_order_acquire)) + ; + }); + + flag_id_ = AddFlag(std::weak_ptr>{expiration_flag_}); + + sigevent notification_settings{}; + notification_settings.sigev_notify = SIGEV_THREAD_ID; + notification_settings.sigev_signo = SIGTIMER; + notification_settings._sigev_un._tid = thread_info.thread_id; + static_assert(sizeof(void *) == sizeof(flag_id_), "ID size must be equal to pointer size!"); + std::memcpy(¬ification_settings.sigev_value.sival_ptr, &flag_id_, sizeof(flag_id_)); + MG_ASSERT(timer_create(CLOCK_MONOTONIC, ¬ification_settings, &timer_id_) == 0, "Couldn't create timer: ({}) {}", + errno, strerror(errno)); + + constexpr auto kSecondsToNanos = 1000 * 1000 * 1000; + // Casting will truncate down, but that's exactly what we want. + const auto second_as_time_t = static_cast(seconds); + const auto remaining_nano_seconds = static_cast((seconds - second_as_time_t) * kSecondsToNanos); + + struct itimerspec spec; + spec.it_interval.tv_sec = 0; + spec.it_interval.tv_nsec = 0; + spec.it_value.tv_sec = second_as_time_t; + spec.it_value.tv_nsec = remaining_nano_seconds; + + MG_ASSERT(timer_settime(timer_id_, 0, &spec, nullptr) == 0, "Couldn't set timer: ({}) {}", errno, strerror(errno)); +} + +AsyncTimer::~AsyncTimer() { ReleaseResources(); } + +AsyncTimer::AsyncTimer(AsyncTimer &&other) noexcept + : expiration_flag_{std::move(other.expiration_flag_)}, flag_id_{other.flag_id_}, timer_id_{other.timer_id_} { + other.flag_id_ = kInvalidFlagId; +} + +// NOLINTNEXTLINE (hicpp-noexcept-move) +AsyncTimer &AsyncTimer::operator=(AsyncTimer &&other) { + if (this == &other) { + return *this; + } + + ReleaseResources(); + + expiration_flag_ = std::move(other.expiration_flag_); + flag_id_ = std::exchange(other.flag_id_, kInvalidFlagId); + timer_id_ = other.timer_id_; + + return *this; +}; + +bool AsyncTimer::IsExpired() const { + if (expiration_flag_ != nullptr) { + return expiration_flag_->load(std::memory_order_relaxed); + } + return false; +} + +void AsyncTimer::ReleaseResources() { + if (expiration_flag_ != nullptr) { + timer_delete(timer_id_); + EraseFlag(flag_id_); + flag_id_ = kInvalidFlagId; + expiration_flag_ = std::shared_ptr>{}; + } +} + +} // namespace utils diff --git a/src/utils/async_timer.hpp b/src/utils/async_timer.hpp new file mode 100644 index 000000000..4ac2ffc87 --- /dev/null +++ b/src/utils/async_timer.hpp @@ -0,0 +1,37 @@ +#pragma once +#include + +#include + +#include "utils/logging.hpp" + +namespace utils { + +#define SIGTIMER (SIGRTMAX - 2) + +class AsyncTimer { + public: + AsyncTimer(); + explicit AsyncTimer(double seconds); + ~AsyncTimer(); + AsyncTimer(AsyncTimer &&other) noexcept; + // NOLINTNEXTLINE (hicpp-noexcept-move) + AsyncTimer &operator=(AsyncTimer &&other); + + AsyncTimer(const AsyncTimer &) = delete; + AsyncTimer &operator=(const AsyncTimer &) = delete; + + // Returns false if the object isn't associated with any timer. + bool IsExpired() const; + + private: + void ReleaseResources(); + + // If the expiration_flag_ is nullptr, then the object is not associated with any timer, therefore no clean up + // is necessary. Furthermore, the the POSIX API doesn't specify any value as "invalid" for timer_t, so the timer_id_ + // cannot be used to determine whether the object is associated with any timer or not. + std::shared_ptr> expiration_flag_; + uint64_t flag_id_; + timer_t timer_id_; +}; +} // namespace utils diff --git a/src/utils/concepts.hpp b/src/utils/concepts.hpp new file mode 100644 index 000000000..37365fe98 --- /dev/null +++ b/src/utils/concepts.hpp @@ -0,0 +1,7 @@ +#pragma once +#include + +namespace utils { +template +concept SameAsAnyOf = (std::same_as || ...); +} // namespace utils diff --git a/src/utils/csv_parsing.cpp b/src/utils/csv_parsing.cpp new file mode 100644 index 000000000..d500b517e --- /dev/null +++ b/src/utils/csv_parsing.cpp @@ -0,0 +1,237 @@ +#include "utils/csv_parsing.hpp" + +#include + +#include "utils/file.hpp" +#include "utils/string.hpp" + +namespace csv { + +using ParseError = Reader::ParseError; + +void Reader::InitializeStream() { + if (!std::filesystem::exists(path_)) { + throw CsvReadException("CSV file not found: {}", path_.string()); + } + csv_stream_.open(path_); + if (!csv_stream_.good()) { + throw CsvReadException("CSV file {} couldn't be opened!", path_.string()); + } +} + +std::optional Reader::GetNextLine(utils::MemoryResource *mem) { + utils::pmr::string line(mem); + if (!std::getline(csv_stream_, line)) { + // reached end of file or an I/0 error occurred + if (!csv_stream_.good()) { + csv_stream_.close(); + } + return std::nullopt; + } + ++line_count_; + return line; +} + +Reader::ParsingResult Reader::ParseHeader() { + // header must be the very first line in the file + MG_ASSERT(line_count_ == 1, fmt::format("Invalid use of {}", __func__)); + return ParseRow(memory_); +} + +void Reader::TryInitializeHeader() { + if (!HasHeader()) { + return; + } + + auto header = ParseHeader(); + if (header.HasError()) { + throw CsvReadException("CSV reading : {}", header.GetError().message); + } + + if (header->empty()) { + throw CsvReadException("CSV file {} empty!", path_); + } + + number_of_columns_ = header->size(); + header_ = std::move(*header); +} + +[[nodiscard]] bool Reader::HasHeader() const { return read_config_.with_header; } + +const Reader::Header &Reader::GetHeader() const { return header_; } + +namespace { +enum class CsvParserState : uint8_t { INITIAL_FIELD, NEXT_FIELD, QUOTING, EXPECT_DELIMITER, DONE }; + +} // namespace + +Reader::ParsingResult Reader::ParseRow(utils::MemoryResource *mem) { + utils::pmr::vector row(mem); + if (number_of_columns_ != 0) { + row.reserve(number_of_columns_); + } + + utils::pmr::string column(memory_); + + auto state = CsvParserState::INITIAL_FIELD; + + do { + const auto maybe_line = GetNextLine(mem); + if (!maybe_line) { + // The whole file was processed. + break; + } + + std::string_view line_string_view = *maybe_line; + + // remove '\r' from the end in case we have dos file format + if (line_string_view.back() == '\r') { + line_string_view.remove_suffix(1); + } + + while (state != CsvParserState::DONE && !line_string_view.empty()) { + const auto c = line_string_view[0]; + + // Line feeds and carriage returns are ignored in CSVs. + if (c == '\n' || c == '\r') { + line_string_view.remove_prefix(1); + continue; + } + // Null bytes aren't allowed in CSVs. + if (c == '\0') { + return ParseError(ParseError::ErrorCode::NULL_BYTE, + fmt::format("CSV: Line {:d} contains NULL byte", line_count_ - 1)); + } + + switch (state) { + case CsvParserState::INITIAL_FIELD: + case CsvParserState::NEXT_FIELD: { + if (utils::StartsWith(line_string_view, *read_config_.quote)) { + // The current field is a quoted field. + state = CsvParserState::QUOTING; + line_string_view.remove_prefix(read_config_.quote->size()); + } else if (utils::StartsWith(line_string_view, *read_config_.delimiter)) { + // The current field has an empty value. + row.emplace_back(""); + state = CsvParserState::NEXT_FIELD; + line_string_view.remove_prefix(read_config_.delimiter->size()); + } else { + // The current field is a regular field. + const auto delimiter_idx = line_string_view.find(*read_config_.delimiter); + row.emplace_back(line_string_view.substr(0, delimiter_idx)); + if (delimiter_idx == std::string_view::npos) { + state = CsvParserState::DONE; + } else { + line_string_view.remove_prefix(delimiter_idx + read_config_.delimiter->size()); + state = CsvParserState::NEXT_FIELD; + } + } + break; + } + case CsvParserState::QUOTING: { + const auto quote_now = utils::StartsWith(line_string_view, *read_config_.quote); + const auto quote_next = + utils::StartsWith(line_string_view.substr(read_config_.quote->size()), *read_config_.quote); + if (quote_now && quote_next) { + // This is an escaped quote character. + column += *read_config_.quote; + line_string_view.remove_prefix(read_config_.quote->size() * 2); + } else if (quote_now) { + // This is the end of the quoted field. + row.emplace_back(std::move(column)); + column.clear(); + state = CsvParserState::EXPECT_DELIMITER; + line_string_view.remove_prefix(read_config_.quote->size()); + } else { + column.push_back(c); + line_string_view.remove_prefix(1); + } + break; + } + case CsvParserState::EXPECT_DELIMITER: { + if (utils::StartsWith(line_string_view, *read_config_.delimiter)) { + state = CsvParserState::NEXT_FIELD; + line_string_view.remove_prefix(read_config_.delimiter->size()); + } else { + return ParseError(ParseError::ErrorCode::UNEXPECTED_TOKEN, + fmt::format("CSV Reader: Expected '{}' after '{}', but got '{}' at line {:d}", + *read_config_.delimiter, *read_config_.quote, c, line_count_ - 1)); + } + break; + } + case CsvParserState::DONE: { + LOG_FATAL("Invalid state of the CSV parser!"); + } + } + } + } while (state == CsvParserState::QUOTING); + + switch (state) { + case CsvParserState::INITIAL_FIELD: + case CsvParserState::DONE: + case CsvParserState::EXPECT_DELIMITER: + break; + case CsvParserState::NEXT_FIELD: + row.emplace_back(""); + break; + case CsvParserState::QUOTING: { + return ParseError(ParseError::ErrorCode::NO_CLOSING_QUOTE, + "There is no more data left to load while inside a quoted string. " + "Did you forget to close the quote?"); + break; + } + } + + // reached the end of file - return empty row + if (row.empty()) { + return row; + } + + // Has header, but the header has already been read and the number_of_columns_ + // is already set. Otherwise, we would get an error every time we'd try to + // parse the header. + // Also, if we don't have a header, the 'number_of_columns_' will be 0, so no + // need to check the number of columns. + if (UNLIKELY(number_of_columns_ != 0 && row.size() != number_of_columns_)) { + return ParseError(ParseError::ErrorCode::BAD_NUM_OF_COLUMNS, + // ToDo(the-joksim): + // - 'line_count_ - 1' is the last line of a row (as a + // row may span several lines) ==> should have a row + // counter + fmt::format("Expected {:d} columns in row {:d}, but got {:d}", number_of_columns_, + line_count_ - 1, row.size())); + } + + return std::move(row); +} + +// Returns Reader::Row if the read row if valid; +// Returns std::nullopt if end of file is reached or an error occurred +// making it unreadable; +// @throws CsvReadException if a bad row is encountered, and the ignore_bad is set +// to 'true' in the Reader::Config. +std::optional Reader::GetNextRow(utils::MemoryResource *mem) { + auto row = ParseRow(mem); + + if (row.HasError()) { + if (!read_config_.ignore_bad) { + throw CsvReadException("CSV Reader: Bad row at line {:d}: {}", line_count_ - 1, row.GetError().message); + } + // try to parse as many times as necessary to reach a valid row + do { + spdlog::debug("CSV Reader: Bad row at line {:d}: {}", line_count_ - 1, row.GetError().message); + if (!csv_stream_.good()) { + return std::nullopt; + } + row = ParseRow(mem); + } while (row.HasError()); + } + + if (row->empty()) { + // reached end of file + return std::nullopt; + } + return std::move(*row); +} + +} // namespace csv diff --git a/src/utils/csv_parsing.hpp b/src/utils/csv_parsing.hpp new file mode 100644 index 000000000..efe4bf469 --- /dev/null +++ b/src/utils/csv_parsing.hpp @@ -0,0 +1,97 @@ +/** + * @file + * + * This file contains utilities for parsing CSV files. + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "utils/exceptions.hpp" +#include "utils/pmr/string.hpp" +#include "utils/pmr/vector.hpp" +#include "utils/result.hpp" + +namespace csv { + +class CsvReadException : public utils::BasicException { + using utils::BasicException::BasicException; +}; + +class Reader { + public: + struct Config { + Config() = default; + Config(const bool with_header, const bool ignore_bad, std::optional delim, + std::optional qt) + : with_header(with_header), ignore_bad(ignore_bad), delimiter(std::move(delim)), quote(std::move(qt)) {} + + bool with_header{false}; + bool ignore_bad{false}; + std::optional delimiter{}; + std::optional quote{}; + }; + + using Row = utils::pmr::vector; + using Header = utils::pmr::vector; + + Reader() = default; + explicit Reader(std::filesystem::path path, Config cfg, utils::MemoryResource *mem = utils::NewDeleteResource()) + : memory_(mem), path_(std::move(path)) { + read_config_.with_header = cfg.with_header; + read_config_.ignore_bad = cfg.ignore_bad; + read_config_.delimiter = cfg.delimiter ? std::move(*cfg.delimiter) : utils::pmr::string{",", memory_}; + read_config_.quote = cfg.quote ? std::move(*cfg.quote) : utils::pmr::string{"\"", memory_}; + InitializeStream(); + TryInitializeHeader(); + } + + Reader(const Reader &) = delete; + Reader &operator=(const Reader &) = delete; + + Reader(Reader &&) = default; + Reader &operator=(Reader &&) = default; + + ~Reader() = default; + + struct ParseError { + enum class ErrorCode : uint8_t { BAD_HEADER, NO_CLOSING_QUOTE, UNEXPECTED_TOKEN, BAD_NUM_OF_COLUMNS, NULL_BYTE }; + ParseError(ErrorCode code, std::string message) : code(code), message(std::move(message)) {} + + ErrorCode code; + std::string message; + }; + + using ParsingResult = utils::BasicResult; + [[nodiscard]] bool HasHeader() const; + const Header &GetHeader() const; + std::optional GetNextRow(utils::MemoryResource *mem); + + private: + utils::MemoryResource *memory_; + std::filesystem::path path_; + std::ifstream csv_stream_; + Config read_config_; + uint64_t line_count_{1}; + uint16_t number_of_columns_{0}; + Header header_{memory_}; + + void InitializeStream(); + + void TryInitializeHeader(); + + std::optional GetNextLine(utils::MemoryResource *mem); + + ParsingResult ParseHeader(); + + ParsingResult ParseRow(utils::MemoryResource *mem); +}; + +} // namespace csv diff --git a/src/utils/file.hpp b/src/utils/file.hpp index 6bf3e3b0d..3d281a3bc 100644 --- a/src/utils/file.hpp +++ b/src/utils/file.hpp @@ -57,7 +57,7 @@ bool RenamePath(const std::filesystem::path &src, const std::filesystem::path &d /// `write` for each of our (very small) logical reads/writes. Because of that, /// `read` or `write` is only called when the buffer is full and/or needs /// emptying. -const size_t kFileBufferSize = 262144; +constexpr size_t kFileBufferSize = 262144; /// This class implements a file handler that is used to read binary files. It /// was developed because the C++ standard library has an awful API and makes diff --git a/src/utils/logging.hpp b/src/utils/logging.hpp index 152917563..72bbff309 100644 --- a/src/utils/logging.hpp +++ b/src/utils/logging.hpp @@ -65,7 +65,7 @@ void Fatal(const char *msg, const Args &...msg_args) { do { \ spdlog::critical(__VA_ARGS__); \ std::terminate(); \ - } while (0); + } while (0) #ifndef NDEBUG #define DLOG_FATAL(...) LOG_FATAL(__VA_ARGS__) diff --git a/src/utils/memory.cpp b/src/utils/memory.cpp index 20e8c43a8..78db731cc 100644 --- a/src/utils/memory.cpp +++ b/src/utils/memory.cpp @@ -24,6 +24,9 @@ size_t GrowMonotonicBuffer(size_t current_size, size_t max_size) { return std::ceil(next_size); } +__attribute__((no_sanitize("pointer-overflow"))) void CheckAllocationSizeOverflow(void *aligned_ptr, size_t bytes) { + if (reinterpret_cast(aligned_ptr) + bytes <= aligned_ptr) throw BadAlloc("Allocation size overflow"); +} } // namespace MonotonicBufferResource::MonotonicBufferResource(size_t initial_size) : initial_size_(initial_size) {} @@ -122,7 +125,7 @@ void *MonotonicBufferResource::DoAllocate(size_t bytes, size_t alignment) { next_buffer_size_ = GrowMonotonicBuffer(next_buffer_size_, std::numeric_limits::max() - sizeof(Buffer)); } if (reinterpret_cast(aligned_ptr) < buffer_head) throw BadAlloc("Allocation alignment overflow"); - if (reinterpret_cast(aligned_ptr) + bytes <= aligned_ptr) throw BadAlloc("Allocation size overflow"); + CheckAllocationSizeOverflow(aligned_ptr, bytes); allocated_ = reinterpret_cast(aligned_ptr) - data + bytes; return aligned_ptr; } diff --git a/src/utils/memory.hpp b/src/utils/memory.hpp index bdfe73cd8..d4b99b724 100644 --- a/src/utils/memory.hpp +++ b/src/utils/memory.hpp @@ -27,6 +27,7 @@ #include "utils/logging.hpp" #include "utils/math.hpp" +#include "utils/memory_tracker.hpp" #include "utils/spin_lock.hpp" namespace utils { @@ -767,4 +768,25 @@ class PrintMemoryResource : public MemoryResource { }; #endif +// Allocate memory with the OutOfMemoryException enabled if the requested size +// puts total allocated amount over the limit. +class ResourceWithOutOfMemoryException : public MemoryResource { + public: + explicit ResourceWithOutOfMemoryException(utils::MemoryResource *upstream = utils::NewDeleteResource()) + : upstream_{upstream} {} + + utils::MemoryResource *GetUpstream() noexcept { return upstream_; } + + private: + void *DoAllocate(size_t bytes, size_t alignment) override { + utils::MemoryTracker::OutOfMemoryExceptionEnabler exception_enabler; + return upstream_->Allocate(bytes, alignment); + } + + void DoDeallocate(void *p, size_t bytes, size_t alignment) override { upstream_->Deallocate(p, bytes, alignment); } + + bool DoIsEqual(const utils::MemoryResource &other) const noexcept override { return upstream_->IsEqual(other); } + + MemoryResource *upstream_{utils::NewDeleteResource()}; +}; } // namespace utils diff --git a/src/utils/memory_tracker.cpp b/src/utils/memory_tracker.cpp new file mode 100644 index 000000000..d1ccceade --- /dev/null +++ b/src/utils/memory_tracker.cpp @@ -0,0 +1,86 @@ +#include "utils/memory_tracker.hpp" + +#include +#include +#include + +#include "utils/likely.hpp" +#include "utils/logging.hpp" +#include "utils/on_scope_exit.hpp" +#include "utils/readable_size.hpp" + +namespace utils { + +namespace { + +// Prevent memory tracker for throwing during the stack unwinding +bool MemoryTrackerCanThrow() { + return !std::uncaught_exceptions() && MemoryTracker::OutOfMemoryExceptionEnabler::CanThrow() && + !MemoryTracker::OutOfMemoryExceptionBlocker::IsBlocked(); +} + +} // namespace + +thread_local uint64_t MemoryTracker::OutOfMemoryExceptionEnabler::counter_ = 0; +MemoryTracker::OutOfMemoryExceptionEnabler::OutOfMemoryExceptionEnabler() { ++counter_; } +MemoryTracker::OutOfMemoryExceptionEnabler::~OutOfMemoryExceptionEnabler() { --counter_; } +bool MemoryTracker::OutOfMemoryExceptionEnabler::CanThrow() { return counter_ > 0; } + +thread_local uint64_t MemoryTracker::OutOfMemoryExceptionBlocker::counter_ = 0; +MemoryTracker::OutOfMemoryExceptionBlocker::OutOfMemoryExceptionBlocker() { ++counter_; } +MemoryTracker::OutOfMemoryExceptionBlocker::~OutOfMemoryExceptionBlocker() { --counter_; } +bool MemoryTracker::OutOfMemoryExceptionBlocker::IsBlocked() { return counter_ > 0; } + +MemoryTracker total_memory_tracker; + +// TODO (antonio2368): Define how should the peak memory be logged. +// Logging every time the peak changes is too much so some kind of distribution +// should be used. +void MemoryTracker::LogPeakMemoryUsage() const { spdlog::info("Peak memory usage: {}", GetReadableSize(peak_)); } + +// TODO (antonio2368): Define how should the memory be logged. +// Logging on each allocation is too much so some kind of distribution +// should be used. +void MemoryTracker::LogMemoryUsage(const int64_t current) { + spdlog::info("Current memory usage: {}", GetReadableSize(current)); +} + +void MemoryTracker::UpdatePeak(const int64_t will_be) { + auto peak_old = peak_.load(std::memory_order_relaxed); + if (will_be > peak_old) { + peak_.store(will_be, std::memory_order_relaxed); + } +} + +void MemoryTracker::SetHardLimit(const int64_t limit) { hard_limit_.store(limit, std::memory_order_relaxed); } + +void MemoryTracker::TryRaiseHardLimit(const int64_t limit) { + int64_t old_limit = hard_limit_.load(std::memory_order_relaxed); + while (old_limit < limit && !hard_limit_.compare_exchange_weak(old_limit, limit)) + ; +} + +void MemoryTracker::Alloc(const int64_t size) { + MG_ASSERT(size >= 0, "Negative size passed to the MemoryTracker."); + + const int64_t will_be = size + amount_.fetch_add(size, std::memory_order_relaxed); + + const auto current_hard_limit = hard_limit_.load(std::memory_order_relaxed); + + if (UNLIKELY(current_hard_limit && will_be > current_hard_limit && MemoryTrackerCanThrow())) { + MemoryTracker::OutOfMemoryExceptionBlocker exception_blocker; + + amount_.fetch_sub(size, std::memory_order_relaxed); + + throw OutOfMemoryException( + fmt::format("Memory limit exceeded! Atempting to allocate a chunk of {} which would put the current " + "use to {}, while the maximum allowed size for allocation is set to {}.", + GetReadableSize(size), GetReadableSize(will_be), GetReadableSize(current_hard_limit))); + } + + UpdatePeak(will_be); +} + +void MemoryTracker::Free(const int64_t size) { amount_.fetch_sub(size, std::memory_order_relaxed); } + +} // namespace utils diff --git a/src/utils/memory_tracker.hpp b/src/utils/memory_tracker.hpp new file mode 100644 index 000000000..28019664d --- /dev/null +++ b/src/utils/memory_tracker.hpp @@ -0,0 +1,87 @@ +#pragma once + +#include + +#include "utils/exceptions.hpp" + +namespace utils { + +class OutOfMemoryException : public utils::BasicException { + public: + explicit OutOfMemoryException(const std::string &msg) : utils::BasicException(msg) {} +}; + +class MemoryTracker final { + private: + std::atomic amount_{0}; + std::atomic peak_{0}; + std::atomic hard_limit_{0}; + + void UpdatePeak(int64_t will_be); + + static void LogMemoryUsage(int64_t current); + + public: + void LogPeakMemoryUsage() const; + + MemoryTracker() = default; + ~MemoryTracker() = default; + + MemoryTracker(const MemoryTracker &) = delete; + MemoryTracker &operator=(const MemoryTracker &) = delete; + MemoryTracker(MemoryTracker &&) = delete; + MemoryTracker &operator=(MemoryTracker &&) = delete; + + void Alloc(int64_t size); + void Free(int64_t size); + + auto Amount() const { return amount_.load(std::memory_order_relaxed); } + + auto Peak() const { return peak_.load(std::memory_order_relaxed); } + + auto HardLimit() const { return hard_limit_.load(std::memory_order_relaxed); } + + void SetHardLimit(int64_t limit); + void TryRaiseHardLimit(int64_t limit); + + // By creating an object of this class, every allocation in its scope that goes over + // the set hard limit produces an OutOfMemoryException. + class OutOfMemoryExceptionEnabler final { + public: + OutOfMemoryExceptionEnabler(const OutOfMemoryExceptionEnabler &) = delete; + OutOfMemoryExceptionEnabler &operator=(const OutOfMemoryExceptionEnabler &) = delete; + OutOfMemoryExceptionEnabler(OutOfMemoryExceptionEnabler &&) = delete; + OutOfMemoryExceptionEnabler &operator=(OutOfMemoryExceptionEnabler &&) = delete; + + OutOfMemoryExceptionEnabler(); + ~OutOfMemoryExceptionEnabler(); + + static bool CanThrow(); + + private: + static thread_local uint64_t counter_; + }; + + // By creating an object of this class, we negate the effect of every OutOfMemoryExceptionEnabler + // object. We need this object so we can guard only the smaller parts of code from exceptions while + // allowing the exception in the other parts if the OutOfMemoryExceptionEnabler is defined. + class OutOfMemoryExceptionBlocker final { + public: + OutOfMemoryExceptionBlocker(const OutOfMemoryExceptionBlocker &) = delete; + OutOfMemoryExceptionBlocker &operator=(const OutOfMemoryExceptionBlocker &) = delete; + OutOfMemoryExceptionBlocker(OutOfMemoryExceptionBlocker &&) = delete; + OutOfMemoryExceptionBlocker &operator=(OutOfMemoryExceptionBlocker &&) = delete; + + OutOfMemoryExceptionBlocker(); + ~OutOfMemoryExceptionBlocker(); + + static bool IsBlocked(); + + private: + static thread_local uint64_t counter_; + }; +}; + +// Global memory tracker which tracks every allocation in the application. +extern MemoryTracker total_memory_tracker; +} // namespace utils diff --git a/src/utils/new_delete.cpp b/src/utils/new_delete.cpp new file mode 100644 index 000000000..1c6aeba5e --- /dev/null +++ b/src/utils/new_delete.cpp @@ -0,0 +1,252 @@ +#include +#include + +#if USE_JEMALLOC +#include +#else +#include +#endif + +#include "utils/likely.hpp" +#include "utils/memory_tracker.hpp" + +namespace { +void *newImpl(const std::size_t size) { + auto *ptr = malloc(size); + if (LIKELY(ptr != nullptr)) { + return ptr; + } + + throw std::bad_alloc{}; +} + +void *newImpl(const std::size_t size, const std::align_val_t align) { + auto *ptr = aligned_alloc(static_cast(align), size); + if (LIKELY(ptr != nullptr)) { + return ptr; + } + + throw std::bad_alloc{}; +} + +void *newNoExcept(const std::size_t size) noexcept { return malloc(size); } +void *newNoExcept(const std::size_t size, const std::align_val_t align) noexcept { + return aligned_alloc(size, static_cast(align)); +} + +#if USE_JEMALLOC +void deleteImpl(void *ptr) noexcept { dallocx(ptr, 0); } + +void deleteImpl(void *ptr, const std::align_val_t align) noexcept { + dallocx(ptr, MALLOCX_ALIGN(align)); // NOLINT(hicpp-signed-bitwise) +} + +void deleteSized(void *ptr, const std::size_t size) noexcept { + if (UNLIKELY(ptr == nullptr)) { + return; + } + + sdallocx(ptr, size, 0); +} + +void deleteSized(void *ptr, const std::size_t size, const std::align_val_t align) noexcept { + if (UNLIKELY(ptr == nullptr)) { + return; + } + + sdallocx(ptr, size, MALLOCX_ALIGN(align)); // NOLINT(hicpp-signed-bitwise) +} + +#else +void deleteImpl(void *ptr) noexcept { free(ptr); } + +void deleteImpl(void *ptr, const std::align_val_t /*unused*/) noexcept { free(ptr); } + +void deleteSized(void *ptr, const std::size_t /*unused*/) noexcept { free(ptr); } + +void deleteSized(void *ptr, const std::size_t /*unused*/, const std::align_val_t /*unused*/) noexcept { free(ptr); } +#endif + +void TrackMemory(std::size_t size) { +#if USE_JEMALLOC + if (LIKELY(size != 0)) { + size = nallocx(size, 0); + } +#endif + utils::total_memory_tracker.Alloc(size); +} + +void TrackMemory(std::size_t size, const std::align_val_t align) { +#if USE_JEMALLOC + if (LIKELY(size != 0)) { + size = nallocx(size, MALLOCX_ALIGN(align)); // NOLINT(hicpp-signed-bitwise) + } +#endif + utils::total_memory_tracker.Alloc(size); +} + +bool TrackMemoryNoExcept(const std::size_t size) { + try { + TrackMemory(size); + } catch (...) { + return false; + } + + return true; +} + +bool TrackMemoryNoExcept(const std::size_t size, const std::align_val_t align) { + try { + TrackMemory(size, align); + } catch (...) { + return false; + } + + return true; +} + +void UntrackMemory([[maybe_unused]] void *ptr, [[maybe_unused]] std::size_t size = 0) noexcept { + try { +#if USE_JEMALLOC + if (LIKELY(ptr != nullptr)) { + utils::total_memory_tracker.Free(sallocx(ptr, 0)); + } +#else + if (size) { + utils::total_memory_tracker.Free(size); + } else { + // Innaccurate because malloc_usable_size() result is greater or equal to allocated size. + utils::total_memory_tracker.Free(malloc_usable_size(ptr)); + } +#endif + } catch (...) { + } +} + +void UntrackMemory(void *ptr, const std::align_val_t align, [[maybe_unused]] std::size_t size = 0) noexcept { + try { +#if USE_JEMALLOC + if (LIKELY(ptr != nullptr)) { + utils::total_memory_tracker.Free(sallocx(ptr, MALLOCX_ALIGN(align))); // NOLINT(hicpp-signed-bitwise) + } +#else + if (size) { + utils::total_memory_tracker.Free(size); + } else { + // Innaccurate because malloc_usable_size() result is greater or equal to allocated size. + utils::total_memory_tracker.Free(malloc_usable_size(ptr)); + } +#endif + } catch (...) { + } +} + +} // namespace + +void *operator new(const std::size_t size) { + TrackMemory(size); + return newImpl(size); +} + +void *operator new[](const std::size_t size) { + TrackMemory(size); + return newImpl(size); +} + +void *operator new(const std::size_t size, const std::align_val_t align) { + TrackMemory(size, align); + return newImpl(size, align); +} + +void *operator new[](const std::size_t size, const std::align_val_t align) { + TrackMemory(size, align); + return newImpl(size, align); +} + +void *operator new(const std::size_t size, const std::nothrow_t & /*unused*/) noexcept { + if (LIKELY(TrackMemoryNoExcept(size))) { + return newNoExcept(size); + } + return nullptr; +} + +void *operator new[](const std::size_t size, const std::nothrow_t & /*unused*/) noexcept { + if (LIKELY(TrackMemoryNoExcept(size))) { + return newNoExcept(size); + } + return nullptr; +} + +void *operator new(const std::size_t size, const std::align_val_t align, const std::nothrow_t & /*unused*/) noexcept { + if (LIKELY(TrackMemoryNoExcept(size, align))) { + return newNoExcept(size, align); + } + return nullptr; +} + +void *operator new[](const std::size_t size, const std::align_val_t align, const std::nothrow_t & /*unused*/) noexcept { + if (LIKELY(TrackMemoryNoExcept(size, align))) { + return newNoExcept(size, align); + } + return nullptr; +} + +void operator delete(void *ptr) noexcept { + UntrackMemory(ptr); + deleteImpl(ptr); +} + +void operator delete[](void *ptr) noexcept { + UntrackMemory(ptr); + deleteImpl(ptr); +} + +void operator delete(void *ptr, const std::align_val_t align) noexcept { + UntrackMemory(ptr, align); + deleteImpl(ptr, align); +} + +void operator delete[](void *ptr, const std::align_val_t align) noexcept { + UntrackMemory(ptr, align); + deleteImpl(ptr, align); +} + +void operator delete(void *ptr, const std::size_t size) noexcept { + UntrackMemory(ptr, size); + deleteSized(ptr, size); +} + +void operator delete[](void *ptr, const std::size_t size) noexcept { + UntrackMemory(ptr, size); + deleteSized(ptr, size); +} + +void operator delete(void *ptr, const std::size_t size, const std::align_val_t align) noexcept { + UntrackMemory(ptr, align, size); + deleteSized(ptr, size, align); +} + +void operator delete[](void *ptr, const std::size_t size, const std::align_val_t align) noexcept { + UntrackMemory(ptr, align, size); + deleteSized(ptr, size, align); +} + +void operator delete(void *ptr, const std::nothrow_t & /*unused*/) noexcept { + UntrackMemory(ptr); + deleteImpl(ptr); +} + +void operator delete[](void *ptr, const std::nothrow_t & /*unused*/) noexcept { + UntrackMemory(ptr); + deleteImpl(ptr); +} + +void operator delete(void *ptr, const std::align_val_t align, const std::nothrow_t & /*unused*/) noexcept { + UntrackMemory(ptr, align); + deleteImpl(ptr, align); +} + +void operator delete[](void *ptr, const std::align_val_t align, const std::nothrow_t & /*unused*/) noexcept { + UntrackMemory(ptr, align); + deleteImpl(ptr, align); +} diff --git a/src/utils/readable_size.cpp b/src/utils/readable_size.cpp new file mode 100644 index 000000000..27f7b4c8d --- /dev/null +++ b/src/utils/readable_size.cpp @@ -0,0 +1,27 @@ +#include "utils/readable_size.hpp" + +#include + +#include + +namespace utils { + +std::string GetReadableSize(double size) { + // TODO (antonio2368): Add support for base 1000 (KB, GB, TB...) + constexpr std::array units = {"B", "KiB", "MiB", "GiB", "TiB"}; + constexpr double delimiter = 1024; + + size_t i = 0; + for (; i + 1 < units.size() && size >= delimiter; ++i) { + size /= delimiter; + } + + // bytes don't need decimals + if (i == 0) { + return fmt::format("{:.0f}{}", size, units[i]); + } + + return fmt::format("{:.2f}{}", size, units[i]); +} + +} // namespace utils diff --git a/src/utils/readable_size.hpp b/src/utils/readable_size.hpp new file mode 100644 index 000000000..d7b959cf3 --- /dev/null +++ b/src/utils/readable_size.hpp @@ -0,0 +1,6 @@ +#pragma once +#include + +namespace utils { +std::string GetReadableSize(double size); +} // namespace utils diff --git a/src/utils/skip_list.hpp b/src/utils/skip_list.hpp index d770ca768..b5ee81544 100644 --- a/src/utils/skip_list.hpp +++ b/src/utils/skip_list.hpp @@ -259,11 +259,15 @@ class SkipListGc final { } void Collect(TNode *node) { - std::lock_guard guard(lock_); + std::unique_lock guard(lock_); deleted_.Push({accessor_id_.load(std::memory_order_acquire), node}); } void Run() { + // This method can be called after any skip list method, including the add method + // which could have OOMException enabled in its thread so to ensure no exception + // is thrown while cleaning the skip list, we add the blocker. + utils::MemoryTracker::OutOfMemoryExceptionBlocker oom_blocker; if (!lock_.try_lock()) return; OnScopeExit cleanup([&] { lock_.unlock(); }); Block *tail = tail_.load(std::memory_order_acquire); @@ -662,7 +666,7 @@ class SkipList final { /// @return Iterator to the item in the list, will be equal to `end()` when /// the key isn't found template - Iterator find(const TKey &key) { + Iterator find(const TKey &key) const { return skiplist_->template find(key); } @@ -672,7 +676,7 @@ class SkipList final { /// @return Iterator to the item in the list, will be equal to `end()` when /// no items match the search template - Iterator find_equal_or_greater(const TKey &key) { + Iterator find_equal_or_greater(const TKey &key) const { return skiplist_->template find_equal_or_greater(key); } @@ -891,6 +895,8 @@ class SkipList final { gc_.Clear(); } + void run_gc() { gc_.Run(); } + private: template int find_node(const TKey &key, TNode *preds[], TNode *succs[]) const { diff --git a/src/utils/string.hpp b/src/utils/string.hpp index 18f399387..7edafe475 100644 --- a/src/utils/string.hpp +++ b/src/utils/string.hpp @@ -437,7 +437,7 @@ inline std::string Escape(const std::string_view &src) { inline std::string_view Substr(const std::string_view &string, size_t pos = 0, size_t count = std::string::npos) { if (pos >= string.size()) return std::string_view(string.data(), 0); auto len = std::min(string.size() - pos, count); - return std::string_view(string.data() + pos, len); + return string.substr(pos, len); } } // namespace utils diff --git a/src/utils/sysinfo/memory.cpp b/src/utils/sysinfo/memory.cpp new file mode 100644 index 000000000..5ee1c4fc4 --- /dev/null +++ b/src/utils/sysinfo/memory.cpp @@ -0,0 +1,33 @@ +#include "utils/sysinfo/memory.hpp" + +namespace utils::sysinfo { + +namespace { +std::optional ExtractAmountFromMemInfo(const std::string_view header_name) { + std::string token; + std::ifstream meminfo("/proc/meminfo"); + const auto meminfo_header = fmt::format("{}:", header_name); + while (meminfo >> token) { + if (token == meminfo_header) { + uint64_t mem = 0; + if (meminfo >> mem) { + return mem; + } else { + return std::nullopt; + } + } + meminfo.ignore(std::numeric_limits::max(), '\n'); + } + SPDLOG_WARN("Failed to read {} from /proc/meminfo", header_name); + return std::nullopt; +} + +} // namespace + +std::optional AvailableMemory() { return ExtractAmountFromMemInfo("MemAvailable"); } + +std::optional TotalMemory() { return ExtractAmountFromMemInfo("MemTotal"); } + +std::optional SwapTotalMemory() { return ExtractAmountFromMemInfo("SwapTotal"); } + +} // namespace utils::sysinfo diff --git a/src/utils/sysinfo/memory.hpp b/src/utils/sysinfo/memory.hpp index 264d94079..6971833a8 100644 --- a/src/utils/sysinfo/memory.hpp +++ b/src/utils/sysinfo/memory.hpp @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -8,25 +10,21 @@ namespace utils::sysinfo { /** - * Gets the amount of available RAM in kilobytes. If the information is + * Gets the amount of available RAM in KiB. If the information is * unavalable an empty value is returned. */ -inline std::optional AvailableMemoryKilobytes() { - std::string token; - std::ifstream meminfo("/proc/meminfo"); - while (meminfo >> token) { - if (token == "MemAvailable:") { - uint64_t mem = 0; - if (meminfo >> mem) { - return mem; - } else { - return std::nullopt; - } - } - meminfo.ignore(std::numeric_limits::max(), '\n'); - } - SPDLOG_WARN("Failed to read amount of available memory from /proc/meminfo"); - return std::nullopt; -} +std::optional AvailableMemory(); + +/** + * Gets the amount of total RAM in KiB. If the information is + * unavalable an empty value is returned. + */ +std::optional TotalMemory(); + +/** + * Gets the amount of total swap space in KiB. If the information is + * unavalable an empty value is returned. + */ +std::optional SwapTotalMemory(); } // namespace utils::sysinfo diff --git a/tests/benchmark/expansion.cpp b/tests/benchmark/expansion.cpp index 9bfe2607b..5551f4829 100644 --- a/tests/benchmark/expansion.cpp +++ b/tests/benchmark/expansion.cpp @@ -4,6 +4,7 @@ #include "communication/result_stream_faker.hpp" #include "query/interpreter.hpp" #include "query/typed_value.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/storage.hpp" class ExpansionBenchFixture : public benchmark::Fixture { @@ -11,6 +12,7 @@ class ExpansionBenchFixture : public benchmark::Fixture { std::optional db; std::optional interpreter_context; std::optional interpreter; + std::filesystem::path data_directory{std::filesystem::temp_directory_path() / "expansion-benchmark"}; void SetUp(const benchmark::State &state) override { db.emplace(); @@ -34,7 +36,7 @@ class ExpansionBenchFixture : public benchmark::Fixture { MG_ASSERT(db->CreateIndex(label)); - interpreter_context.emplace(&*db); + interpreter_context.emplace(&*db, data_directory); interpreter.emplace(&*interpreter_context); } @@ -42,6 +44,7 @@ class ExpansionBenchFixture : public benchmark::Fixture { interpreter = std::nullopt; interpreter_context = std::nullopt; db = std::nullopt; + std::filesystem::remove_all(data_directory); } }; diff --git a/tests/concurrent/storage_indices.cpp b/tests/concurrent/storage_indices.cpp index 5494fd1ed..792ef2937 100644 --- a/tests/concurrent/storage_indices.cpp +++ b/tests/concurrent/storage_indices.cpp @@ -120,9 +120,9 @@ TEST(Storage, LabelPropertyIndex) { ASSERT_TRUE(*ret); } { - auto ret = vertex.SetProperty(prop, storage::PropertyValue(vertex.Gid().AsInt())); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(*ret); + auto old_value = vertex.SetProperty(prop, storage::PropertyValue(vertex.Gid().AsInt())); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_FALSE(acc.Commit().HasError()); } @@ -164,9 +164,9 @@ TEST(Storage, LabelPropertyIndex) { ASSERT_TRUE(*ret); } { - auto ret = vertex.SetProperty(prop, storage::PropertyValue(vertex.Gid().AsInt())); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(*ret); + auto old_value = vertex.SetProperty(prop, storage::PropertyValue(vertex.Gid().AsInt())); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_FALSE(acc.Commit().HasError()); } diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt index 29749fd19..1a14bd21a 100644 --- a/tests/e2e/CMakeLists.txt +++ b/tests/e2e/CMakeLists.txt @@ -1 +1,4 @@ add_subdirectory(replication) +add_subdirectory(memory) +add_subdirectory(triggers) +add_subdirectory(isolation_levels) diff --git a/tests/e2e/isolation_levels/CMakeLists.txt b/tests/e2e/isolation_levels/CMakeLists.txt new file mode 100644 index 000000000..d2441aced --- /dev/null +++ b/tests/e2e/isolation_levels/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(memgraph__e2e__isolation_levels isolation_levels.cpp) +target_link_libraries(memgraph__e2e__isolation_levels gflags mgclient mg-utils mg-io Threads::Threads) diff --git a/tests/e2e/isolation_levels/isolation_levels.cpp b/tests/e2e/isolation_levels/isolation_levels.cpp new file mode 100644 index 000000000..df91c3e70 --- /dev/null +++ b/tests/e2e/isolation_levels/isolation_levels.cpp @@ -0,0 +1,241 @@ +#include +#include + +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); +DEFINE_uint64(timeout, 120, "Timeout seconds"); + +namespace { + +auto GetClient() { + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + MG_ASSERT(client, "Failed to connect!"); + + return client; +} + +auto GetVertexCount(std::unique_ptr &client) { + MG_ASSERT(client->Execute("MATCH (n) RETURN count(n)")); + auto maybe_row = client->FetchOne(); + MG_ASSERT(maybe_row, "Failed to fetch vertex count"); + + const auto &row = *maybe_row; + MG_ASSERT(row.size() == 1, "Got invalid result for vertex count"); + + client->FetchOne(); + return row[0].ValueInt(); +} + +void CleanDatabase() { + auto client = GetClient(); + MG_ASSERT(client->Execute("MATCH (n) DETACH DELETE n;")); + client->DiscardAll(); +} + +void TestSnapshotIsolation(std::unique_ptr &client) { + spdlog::info("Verifying SNAPSHOT ISOLATION"); + + auto creator = GetClient(); + + MG_ASSERT(client->BeginTransaction()); + MG_ASSERT(creator->BeginTransaction()); + + constexpr auto vertex_count = 10; + for (size_t i = 0; i < vertex_count; ++i) { + MG_ASSERT(creator->Execute("CREATE ()")); + creator->DiscardAll(); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == 0, + "Invalid number of vertices found for SNAPSHOT ISOLATION (found {}, expected {}). Read vertices from a " + "transaction which started " + "at a later point.", + current_vertex_count, 0); + } + + MG_ASSERT(creator->CommitTransaction()); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == 0, + "Invalid number of vertices found for SNAPSHOT ISOLATION (found {}, expected {}). Read vertices from a " + "transaction which started " + "at a later point.", + current_vertex_count, 0); + MG_ASSERT(client->CommitTransaction()); + CleanDatabase(); +} + +void TestReadCommitted(std::unique_ptr &client) { + spdlog::info("Verifying READ COMMITTED"); + + auto creator = GetClient(); + + MG_ASSERT(client->BeginTransaction()); + MG_ASSERT(creator->BeginTransaction()); + + constexpr auto vertex_count = 10; + for (size_t i = 0; i < vertex_count; ++i) { + MG_ASSERT(creator->Execute("CREATE ()")); + creator->DiscardAll(); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == 0, + "Invalid number of vertices found for READ COMMITTED (found {}, expected {}. Read vertices from a " + "transaction which is not " + "committed.", + current_vertex_count, 0); + } + + MG_ASSERT(creator->CommitTransaction()); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == vertex_count, + "Invalid number of vertices found for READ COMMITTED (found {}, expected {}). Failed to read vertices " + "from a committed transaction", + current_vertex_count, vertex_count); + MG_ASSERT(client->CommitTransaction()); + CleanDatabase(); +} + +void TestReadUncommitted(std::unique_ptr &client) { + spdlog::info("Verifying READ UNCOMMITTED"); + + auto creator = GetClient(); + + MG_ASSERT(client->BeginTransaction()); + MG_ASSERT(creator->BeginTransaction()); + + constexpr auto vertex_count = 10; + for (size_t i = 1; i <= vertex_count; ++i) { + MG_ASSERT(creator->Execute("CREATE ()")); + creator->DiscardAll(); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == i, + "Invalid number of vertices found for READ UNCOMMITTED (found {}, expected {}). Failed to read vertices " + "from a different transaction.", + current_vertex_count, i); + } + + MG_ASSERT(creator->CommitTransaction()); + + auto current_vertex_count = GetVertexCount(client); + MG_ASSERT(current_vertex_count == vertex_count, + "Invalid number of vertices found for READ UNCOMMITTED (found {}, expected {}). Failed to read vertices " + "from a different transaction", + current_vertex_count, vertex_count); + MG_ASSERT(client->CommitTransaction()); + CleanDatabase(); +} + +constexpr std::array isolation_levels{std::pair{"SNAPSHOT ISOLATION", &TestSnapshotIsolation}, + std::pair{"READ COMMITTED", &TestReadCommitted}, + std::pair{"READ UNCOMMITTED", &TestReadUncommitted}}; + +void TestGlobalIsolationLevel() { + spdlog::info("\n\n----Test global isolation levels----\n"); + auto first_client = GetClient(); + auto second_client = GetClient(); + + for (const auto &[isolation_level, verification_function] : isolation_levels) { + spdlog::info("--------------------------"); + spdlog::info("Setting global isolation level to {}", isolation_level); + MG_ASSERT(first_client->Execute(fmt::format("SET GLOBAL TRANSACTION ISOLATION LEVEL {}", isolation_level))); + first_client->DiscardAll(); + + verification_function(first_client); + verification_function(second_client); + spdlog::info("--------------------------\n"); + } +} + +void TestSessionIsolationLevel() { + spdlog::info("\n\n----Test session isolation levels----\n"); + + auto global_client = GetClient(); + auto session_client = GetClient(); + for (const auto &[global_isolation_level, global_verification_function] : isolation_levels) { + spdlog::info("Setting global isolation level to {}", global_isolation_level); + MG_ASSERT(global_client->Execute(fmt::format("SET GLOBAL TRANSACTION ISOLATION LEVEL {}", global_isolation_level))); + global_client->DiscardAll(); + + for (const auto &[session_isolation_level, session_verification_function] : isolation_levels) { + spdlog::info("--------------------------"); + spdlog::info("Setting session isolation level to {}", session_isolation_level); + MG_ASSERT( + session_client->Execute(fmt::format("SET SESSION TRANSACTION ISOLATION LEVEL {}", session_isolation_level))); + session_client->DiscardAll(); + + spdlog::info("Verifying client which is using global isolation level"); + global_verification_function(global_client); + spdlog::info("Verifying client which is using session isolation level"); + session_verification_function(session_client); + spdlog::info("--------------------------\n"); + } + } +} + +// Priority of applying the isolation level from highest priority NEXT -> SESSION -> GLOBAL +void TestNextIsolationLevel() { + spdlog::info("\n\n----Test next isolation levels----\n"); + + auto global_client = GetClient(); + auto session_client = GetClient(); + for (const auto &[global_isolation_level, global_verification_function] : isolation_levels) { + spdlog::info("Setting global isolation level to {}", global_isolation_level); + MG_ASSERT(global_client->Execute(fmt::format("SET GLOBAL TRANSACTION ISOLATION LEVEL {}", global_isolation_level))); + global_client->DiscardAll(); + + for (const auto &[session_isolation_level, session_verification_function] : isolation_levels) { + spdlog::info("Setting session isolation level to {}", session_isolation_level); + MG_ASSERT( + session_client->Execute(fmt::format("SET SESSION TRANSACTION ISOLATION LEVEL {}", session_isolation_level))); + session_client->DiscardAll(); + + for (const auto &[next_isolation_level, next_verification_function] : isolation_levels) { + spdlog::info("--------------------------"); + spdlog::info("Verifying client which is using global isolation level"); + global_verification_function(global_client); + spdlog::info("Verifying client which is using session isolation level"); + session_verification_function(session_client); + + spdlog::info("Setting isolation level of the next transaction to {}", next_isolation_level); + MG_ASSERT(global_client->Execute(fmt::format("SET NEXT TRANSACTION ISOLATION LEVEL {}", next_isolation_level))); + global_client->DiscardAll(); + MG_ASSERT( + session_client->Execute(fmt::format("SET NEXT TRANSACTION ISOLATION LEVEL {}", next_isolation_level))); + session_client->DiscardAll(); + + spdlog::info("Verifying client which is using global isolation level while next isolation level is set"); + next_verification_function(global_client); + spdlog::info("Verifying client which is using session isolation level while next isolation level is set"); + next_verification_function(session_client); + + spdlog::info("Verifying client which is using global isolation level after the next isolation level was used"); + global_verification_function(global_client); + spdlog::info("Verifying client which is using session isolation level after the next isolation level was used"); + session_verification_function(session_client); + spdlog::info("--------------------------\n"); + } + } + } +} + +} // namespace + +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph E2E Isolation Levels"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + TestGlobalIsolationLevel(); + TestSessionIsolationLevel(); + TestNextIsolationLevel(); + + return 0; +} diff --git a/tests/e2e/isolation_levels/workloads.yaml b/tests/e2e/isolation_levels/workloads.yaml new file mode 100644 index 000000000..5d793396e --- /dev/null +++ b/tests/e2e/isolation_levels/workloads.yaml @@ -0,0 +1,14 @@ +bolt_port: &bolt_port "7687" +template_cluster: &template_cluster + cluster: + main: + args: ["--bolt-port", *bolt_port, "--log-level=TRACE"] + log_file: "isolation-levels-e2e.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Isolation levels" + binary: "tests/e2e/isolation_levels/memgraph__e2e__isolation_levels" + args: ["--bolt-port", *bolt_port] + <<: *template_cluster diff --git a/tests/e2e/memory/CMakeLists.txt b/tests/e2e/memory/CMakeLists.txt new file mode 100644 index 000000000..95f3a145c --- /dev/null +++ b/tests/e2e/memory/CMakeLists.txt @@ -0,0 +1,11 @@ +add_subdirectory(procedures) + +add_executable(memgraph__e2e__memory__control memory_control.cpp) +target_link_libraries(memgraph__e2e__memory__control gflags mgclient mg-utils mg-io Threads::Threads) + +add_executable(memgraph__e2e__memory__limit_global_alloc memory_limit_global_alloc.cpp) +target_link_libraries(memgraph__e2e__memory__limit_global_alloc gflags mgclient mg-utils mg-io Threads::Threads) + +add_executable(memgraph__e2e__memory__limit_global_alloc_proc memory_limit_global_alloc_proc.cpp) +target_link_libraries(memgraph__e2e__memory__limit_global_alloc_proc gflags mgclient mg-utils mg-io Threads::Threads) + diff --git a/tests/e2e/memory/memory_control.cpp b/tests/e2e/memory/memory_control.cpp new file mode 100644 index 000000000..a6f5ea96f --- /dev/null +++ b/tests/e2e/memory/memory_control.cpp @@ -0,0 +1,42 @@ +#include +#include + +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); +DEFINE_uint64(timeout, 120, "Timeout seconds"); + +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph E2E Memory Control"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + if (!client) { + LOG_FATAL("Failed to connect!"); + } + + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + + const auto *create_query = "UNWIND range(1, 50) as u CREATE (n {string: \"Some longer string\"}) RETURN n;"; + + utils::Timer timer; + while (true) { + if (timer.Elapsed>().count() > FLAGS_timeout) { + LOG_FATAL("The test timed out"); + } + client->Execute(create_query); + if (!client->FetchOne()) { + break; + } + client->DiscardAll(); + } + + spdlog::info("Memgraph is out of memory"); + return 0; +} diff --git a/tests/e2e/memory/memory_limit_global_alloc.cpp b/tests/e2e/memory/memory_limit_global_alloc.cpp new file mode 100644 index 000000000..0d2094fea --- /dev/null +++ b/tests/e2e/memory/memory_limit_global_alloc.cpp @@ -0,0 +1,26 @@ +#include +#include + +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); +DEFINE_uint64(timeout, 120, "Timeout seconds"); + +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph E2E Memory Limit For Global Allocators"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + if (!client) { + LOG_FATAL("Failed to connect!"); + } + + bool result = client->Execute("CALL libglobal_memory_limit.procedure() YIELD *"); + MG_ASSERT(result == false); + return 0; +} diff --git a/tests/e2e/memory/memory_limit_global_alloc_proc.cpp b/tests/e2e/memory/memory_limit_global_alloc_proc.cpp new file mode 100644 index 000000000..78f3f1ea2 --- /dev/null +++ b/tests/e2e/memory/memory_limit_global_alloc_proc.cpp @@ -0,0 +1,32 @@ +#include + +#include +#include + +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); +DEFINE_uint64(timeout, 120, "Timeout seconds"); + +int main(int argc, char **argv) { + google::SetUsageMessage("Memgraph E2E Memory Limit For Global Allocators"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + if (!client) { + LOG_FATAL("Failed to connect!"); + } + bool result = client->Execute("CALL libglobal_memory_limit_proc.error() YIELD *"); + auto result1 = client->FetchAll(); + MG_ASSERT(result1 != std::nullopt && result1->size() == 0); + + result = client->Execute("CALL libglobal_memory_limit_proc.success() YIELD *"); + auto result2 = client->FetchAll(); + MG_ASSERT(result2 != std::nullopt && result2->size() > 0); + return 0; +} diff --git a/tests/e2e/memory/procedures/CMakeLists.txt b/tests/e2e/memory/procedures/CMakeLists.txt new file mode 100644 index 000000000..21201e59b --- /dev/null +++ b/tests/e2e/memory/procedures/CMakeLists.txt @@ -0,0 +1,5 @@ +add_library(global_memory_limit SHARED global_memory_limit.c) +target_include_directories(global_memory_limit PRIVATE ${CMAKE_SOURCE_DIR}/include) + +add_library(global_memory_limit_proc SHARED global_memory_limit_proc.c) +target_include_directories(global_memory_limit_proc PRIVATE ${CMAKE_SOURCE_DIR}/include) diff --git a/tests/e2e/memory/procedures/global_memory_limit.c b/tests/e2e/memory/procedures/global_memory_limit.c new file mode 100644 index 000000000..a2003cb59 --- /dev/null +++ b/tests/e2e/memory/procedures/global_memory_limit.c @@ -0,0 +1,36 @@ +#include "mg_procedure.h" + +int *gVal = NULL; + +void set_error(struct mgp_result *result) { mgp_result_set_error_msg(result, "Something went wrong"); } + +static void procedure(const struct mgp_list *args, const struct mgp_graph *graph, struct mgp_result *result, + struct mgp_memory *memory) { + struct mgp_result_record *record = mgp_result_new_record(result); + if (record == NULL) return set_error(result); + + struct mgp_value *result_msg = mgp_value_make_string("mgp_init_module allocation works", memory); + if (result_msg == NULL) return set_error(result); + + int result_inserted = mgp_result_record_insert(record, "result", result_msg); + mgp_value_destroy(result_msg); + if (!result_inserted) return set_error(result); +} + +int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) { + const size_t one_gb = 1 << 30; + gVal = mgp_global_alloc(one_gb); + if (!gVal) return 1; + + struct mgp_proc *proc = mgp_module_add_read_procedure(module, "procedure", procedure); + if (!proc) return 1; + + if (!mgp_proc_add_result(proc, "result", mgp_type_string())) return 1; + + return 0; +} + +int mgp_shutdown_module() { + if (gVal) mgp_global_free(gVal); + return 0; +} diff --git a/tests/e2e/memory/procedures/global_memory_limit_proc.c b/tests/e2e/memory/procedures/global_memory_limit_proc.c new file mode 100644 index 000000000..519f11f05 --- /dev/null +++ b/tests/e2e/memory/procedures/global_memory_limit_proc.c @@ -0,0 +1,63 @@ +#include "mg_procedure.h" + +int *gVal = NULL; + +void set_error(struct mgp_result *result) { mgp_result_set_error_msg(result, "Something went wrong"); } + +void set_out_of_memory_error(struct mgp_result *result) { mgp_result_set_error_msg(result, "Out of memory"); } + +static void error(const struct mgp_list *args, const struct mgp_graph *graph, struct mgp_result *result, + struct mgp_memory *memory) { + const size_t one_gb = 1 << 30; + if (gVal) { + mgp_global_free(gVal); + gVal = NULL; + } + if (!gVal) { + gVal = mgp_global_alloc(one_gb); + if (!gVal) return set_out_of_memory_error(result); + } + struct mgp_result_record *record = mgp_result_new_record(result); + if (record == NULL) return set_error(result); + struct mgp_value *error_value = mgp_value_make_string("ERROR", memory); + if (error_value == NULL) return set_error(result); + int result_inserted = mgp_result_record_insert(record, "error_result", error_value); + mgp_value_destroy(error_value); + if (!result_inserted) return set_error(result); +} + +static void success(const struct mgp_list *args, const struct mgp_graph *graph, struct mgp_result *result, + struct mgp_memory *memory) { + const size_t bytes = 1024; + if (!gVal) { + gVal = mgp_global_alloc(bytes); + if (!gVal) set_out_of_memory_error(result); + } + + struct mgp_result_record *record = mgp_result_new_record(result); + if (record == NULL) return set_error(result); + struct mgp_value *success_value = mgp_value_make_string("sucess", memory); + if (success_value == NULL) return set_error(result); + int result_inserted = mgp_result_record_insert(record, "success_result", success_value); + mgp_value_destroy(success_value); + if (!result_inserted) return set_error(result); +} + +int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) { + struct mgp_proc *error_proc = mgp_module_add_read_procedure(module, "error", error); + if (!error_proc) return 1; + + if (!mgp_proc_add_result(error_proc, "error_result", mgp_type_string())) return 1; + + struct mgp_proc *succ_proc = mgp_module_add_read_procedure(module, "success", success); + if (!succ_proc) return 1; + + if (!mgp_proc_add_result(succ_proc, "success_result", mgp_type_string())) return 1; + + return 0; +} + +int mgp_shutdown_module() { + if (gVal) mgp_global_free(gVal); + return 0; +} diff --git a/tests/e2e/memory/workloads.yaml b/tests/e2e/memory/workloads.yaml new file mode 100644 index 000000000..bf7ba373e --- /dev/null +++ b/tests/e2e/memory/workloads.yaml @@ -0,0 +1,26 @@ +bolt_port: &bolt_port "7687" +template_cluster: &template_cluster + cluster: + main: + args: ["--bolt-port", *bolt_port, "--memory-limit=1000", "--storage-gc-cycle-sec=180", "--log-level=TRACE"] + log_file: "memory-e2e.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Memory control" + binary: "tests/e2e/memory/memgraph__e2e__memory__control" + args: ["--bolt-port", *bolt_port, "--timeout", "180"] + <<: *template_cluster + + - name: "Memory limit for modules upon loading" + binary: "tests/e2e/memory/memgraph__e2e__memory__limit_global_alloc" + args: ["--bolt-port", *bolt_port, "--timeout", "180"] + proc: "tests/e2e/memory/procedures/" + <<: *template_cluster + + - name: "Memory limit for modules inside a procedure" + binary: "tests/e2e/memory/memgraph__e2e__memory__limit_global_alloc_proc" + args: ["--bolt-port", *bolt_port, "--timeout", "180"] + proc: "tests/e2e/memory/procedures/" + <<: *template_cluster diff --git a/tests/e2e/replication/constraints.cpp b/tests/e2e/replication/constraints.cpp index 004fd0f79..2fcd6eb4e 100644 --- a/tests/e2e/replication/constraints.cpp +++ b/tests/e2e/replication/constraints.cpp @@ -39,7 +39,7 @@ int main(int argc, char **argv) { const auto label_name = (*data)[0][1].ValueString(); const auto property_name = (*data)[0][2].ValueList()[0].ValueString(); if (label_name != "Node" || property_name != "id") { - LOG_FATAL("{} does NOT hava valid constraint created.", database_endpoint) + LOG_FATAL("{} does NOT hava valid constraint created.", database_endpoint); } } else { LOG_FATAL("Unable to get CONSTRAINT INFO from {}", database_endpoint); diff --git a/tests/e2e/replication/workloads.yaml b/tests/e2e/replication/workloads.yaml index 5f5a0d65d..39e1c9bdf 100644 --- a/tests/e2e/replication/workloads.yaml +++ b/tests/e2e/replication/workloads.yaml @@ -11,19 +11,23 @@ template_validation_queries: &template_validation_queries template_cluster: &template_cluster cluster: replica_1: - args: ["--bolt-port", "7688"] + args: ["--bolt-port", "7688", "--log-level=TRACE"] + log_file: "replication-e2e-replica1.log" setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10001;"] <<: *template_validation_queries replica_2: - args: ["--bolt-port", "7689"] + args: ["--bolt-port", "7689", "--log-level=TRACE"] + log_file: "replication-e2e-replica2.log" setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10002;"] <<: *template_validation_queries replica_3: - args: ["--bolt-port", "7690"] + args: ["--bolt-port", "7690", "--log-level=TRACE"] + log_file: "replication-e2e-replica3.log" setup_queries: ["SET REPLICATION ROLE TO REPLICA WITH PORT 10003;"] <<: *template_validation_queries main: - args: ["--bolt-port", "7687"] + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "replication-e2e-main.log" setup_queries: [ "REGISTER REPLICA replica_1 SYNC WITH TIMEOUT 0 TO '127.0.0.1:10001'", "REGISTER REPLICA replica_2 SYNC WITH TIMEOUT 1 TO '127.0.0.1:10002'", diff --git a/tests/e2e/runner.py b/tests/e2e/runner.py index d1d29ac23..74b69955e 100755 --- a/tests/e2e/runner.py +++ b/tests/e2e/runner.py @@ -2,6 +2,7 @@ from argparse import ArgumentParser import atexit import logging import os +from pathlib import Path import subprocess import yaml @@ -12,23 +13,26 @@ PROJECT_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..")) BUILD_DIR = os.path.join(PROJECT_DIR, "build") MEMGRAPH_BINARY = os.path.join(BUILD_DIR, "memgraph") -log = logging.getLogger("memgraph.tests.e2e.replication") +log = logging.getLogger("memgraph.tests.e2e") def load_args(): parser = ArgumentParser() - parser.add_argument("--workloads-path", required=True) + parser.add_argument("--workloads-root-directory", required=True) parser.add_argument("--workload-name", default=None, required=False) return parser.parse_args() -def load_workloads(path): - with open(path, "r") as f: - return yaml.load(f, Loader=yaml.FullLoader)['workloads'] +def load_workloads(root_directory): + workloads = [] + for file in Path(root_directory).rglob('*.yaml'): + with open(file, "r") as f: + workloads.extend(yaml.load(f, Loader=yaml.FullLoader)['workloads']) + return workloads def run(args): - workloads = load_workloads(args.workloads_path) + workloads = load_workloads(args.workloads_root_directory) for workload in workloads: workload_name = workload['name'] if args.workload_name is not None and \ @@ -37,6 +41,7 @@ def run(args): log.info("%s STARTED.", workload_name) # Setup. mg_instances = {} + @atexit.register def cleanup(): for mg_instance in mg_instances.values(): @@ -44,7 +49,13 @@ def run(args): for name, config in workload['cluster'].items(): mg_instance = MemgraphInstanceRunner(MEMGRAPH_BINARY) mg_instances[name] = mg_instance - mg_instance.start(args=config['args']) + log_file_path = os.path.join(BUILD_DIR, 'logs', config['log_file']) + binary_args = config['args'] + ["--log-file", log_file_path] + if 'proc' in workload: + procdir = "--query-modules-directory=" + os.path.join(BUILD_DIR, workload['proc']) + binary_args.append(procdir) + + mg_instance.start(args=binary_args) for query in config['setup_queries']: mg_instance.query(query) # Test. diff --git a/tests/e2e/triggers/CMakeLists.txt b/tests/e2e/triggers/CMakeLists.txt new file mode 100644 index 000000000..d1587c56b --- /dev/null +++ b/tests/e2e/triggers/CMakeLists.txt @@ -0,0 +1,11 @@ +add_library(memgraph__e2e__triggers_common STATIC common.hpp common.cpp) +target_link_libraries(memgraph__e2e__triggers_common PUBLIC gflags mgclient mg-utils) + +add_executable(memgraph__e2e__triggers__on_create on_create_triggers.cpp) +target_link_libraries(memgraph__e2e__triggers__on_create memgraph__e2e__triggers_common) + +add_executable(memgraph__e2e__triggers__on_update on_update_triggers.cpp) +target_link_libraries(memgraph__e2e__triggers__on_update memgraph__e2e__triggers_common) + +add_executable(memgraph__e2e__triggers__on_delete on_delete_triggers.cpp) +target_link_libraries(memgraph__e2e__triggers__on_delete memgraph__e2e__triggers_common) diff --git a/tests/e2e/triggers/common.cpp b/tests/e2e/triggers/common.cpp new file mode 100644 index 000000000..17f916217 --- /dev/null +++ b/tests/e2e/triggers/common.cpp @@ -0,0 +1,94 @@ +#include "common.hpp" + +#include +#include + +#include +#include +#include "utils/logging.hpp" +#include "utils/timer.hpp" + +DEFINE_uint64(bolt_port, 7687, "Bolt port"); + +std::unique_ptr Connect() { + auto client = + mg::Client::Connect({.host = "127.0.0.1", .port = static_cast(FLAGS_bolt_port), .use_ssl = false}); + MG_ASSERT(client, "Failed to connect!"); + return client; +} + +void CreateVertex(mg::Client &client, int vertex_id) { + mg::Map parameters{ + {"id", mg::Value{vertex_id}}, + }; + client.Execute(fmt::format("CREATE (n: {} {{ id: $id }})", kVertexLabel), mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void CreateEdge(mg::Client &client, int from_vertex, int to_vertex, int edge_id) { + mg::Map parameters{ + {"from", mg::Value{from_vertex}}, + {"to", mg::Value{to_vertex}}, + {"id", mg::Value{edge_id}}, + }; + client.Execute(fmt::format("MATCH (from: {} {{ id: $from }}), (to: {} {{id: $to }}) " + "CREATE (from)-[r: {} {{id: $id}}]->(to)", + kVertexLabel, kVertexLabel, kEdgeLabel), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +int GetNumberOfAllVertices(mg::Client &client) { + client.Execute("MATCH (n) RETURN COUNT(*)"); + const auto value = client.FetchOne(); + MG_ASSERT(value, "Unexpected error"); + MG_ASSERT(value->size() == 1, "Unexpected number of columns!"); + client.FetchAll(); + MG_ASSERT(value->at(0).type() == mg::Value::Type::Int, "Unexpected type!"); + return value->at(0).ValueInt(); +} + +void WaitForNumberOfAllVertices(mg::Client &client, int number_of_vertices) { + utils::Timer timer{}; + while ((timer.Elapsed().count() <= 0.5) && GetNumberOfAllVertices(client) != number_of_vertices) { + } + CheckNumberOfAllVertices(client, number_of_vertices); +} + +void CheckNumberOfAllVertices(mg::Client &client, int expected_number_of_vertices) { + const auto number_of_vertices = GetNumberOfAllVertices(client); + MG_ASSERT(number_of_vertices == expected_number_of_vertices, "There are {} vertices, expected {}!", + number_of_vertices, expected_number_of_vertices); +} + +std::optional GetVertex(mg::Client &client, std::string_view label, int vertex_id) { + mg::Map parameters{ + {"id", mg::Value{vertex_id}}, + }; + + client.Execute(fmt::format("MATCH (n: {} {{id: $id}}) RETURN n", label), mg::ConstMap{parameters.ptr()}); + const auto result = client.FetchAll(); + MG_ASSERT(result, "Vertex with label {} and id {} cannot be found!", label, vertex_id); + const auto &rows = *result; + MG_ASSERT(rows.size() <= 1, "Unexpected number of vertices with label {} and id {}, found {} vertices", label, + vertex_id, rows.size()); + if (rows.empty()) { + return std::nullopt; + } + + return rows[0][0]; +} + +bool VertexExists(mg::Client &client, std::string_view label, int vertex_id) { + return GetVertex(client, label, vertex_id).has_value(); +} + +void CheckVertexMissing(mg::Client &client, std::string_view label, int vertex_id) { + MG_ASSERT(!VertexExists(client, label, vertex_id), "Not expected vertex exist with label {} and id {}!", label, + vertex_id); +} + +void CheckVertexExists(mg::Client &client, std::string_view label, int vertex_id) { + MG_ASSERT(VertexExists(client, label, vertex_id), "Expected vertex doesn't exist with label {} and id {}!", label, + vertex_id); +} diff --git a/tests/e2e/triggers/common.hpp b/tests/e2e/triggers/common.hpp new file mode 100644 index 000000000..903dd8125 --- /dev/null +++ b/tests/e2e/triggers/common.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include +#include + +#include + +constexpr std::string_view kVertexLabel{"VERTEX"}; +constexpr std::string_view kEdgeLabel{"EDGE"}; + +std::unique_ptr Connect(); +void CreateVertex(mg::Client &client, int vertex_id); +void CreateEdge(mg::Client &client, int from_vertex, int to_vertex, int edge_id); + +int GetNumberOfAllVertices(mg::Client &client); +void WaitForNumberOfAllVertices(mg::Client &client, int number_of_vertices); +void CheckNumberOfAllVertices(mg::Client &client, int expected_number_of_vertices); +std::optional GetVertex(mg::Client &client, std::string_view label, int vertex_id); +bool VertexExists(mg::Client &client, std::string_view label, int vertex_id); +void CheckVertexMissing(mg::Client &client, std::string_view label, int vertex_id); +void CheckVertexExists(mg::Client &client, std::string_view label, int vertex_id); \ No newline at end of file diff --git a/tests/e2e/triggers/on_create_triggers.cpp b/tests/e2e/triggers/on_create_triggers.cpp new file mode 100644 index 000000000..d2699ddcf --- /dev/null +++ b/tests/e2e/triggers/on_create_triggers.cpp @@ -0,0 +1,107 @@ +#include +#include + +#include +#include +#include "common.hpp" +#include "utils/logging.hpp" + +constexpr std::string_view kTriggerCreatedVertexLabel{"CREATED_VERTEX"}; +constexpr std::string_view kTriggerCreatedEdgeLabel{"CREATED_EDGE"}; +constexpr std::string_view kTriggerCreatedObjectLabel{"CREATED_OBJECT"}; + +void CreateOnCreateTriggers(mg::Client &client, bool is_before) { + const std::string_view before_or_after = is_before ? "BEFORE" : "AFTER"; + client.Execute( + fmt::format("CREATE TRIGGER CreatedVerticesTrigger ON () CREATE " + "{} COMMIT " + "EXECUTE " + "UNWIND createdVertices as createdVertex " + "CREATE (n: {} {{ id: createdVertex.id }})", + before_or_after, kTriggerCreatedVertexLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER CreatedEdgesTrigger ON --> CREATE " + "{} COMMIT " + "EXECUTE " + "UNWIND createdEdges as createdEdge " + "CREATE (n: {} {{ id: createdEdge.id }})", + before_or_after, kTriggerCreatedEdgeLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER CreatedObjectsTrigger ON CREATE " + "{} COMMIT " + "EXECUTE " + "UNWIND createdObjects as createdObjectEvent " + "WITH CASE createdObjectEvent.event_type WHEN \"created_vertex\" THEN createdObjectEvent.vertex.id " + "ELSE createdObjectEvent.edge.id END as id " + "CREATE (n: {} {{ id: id }})", + before_or_after, kTriggerCreatedObjectLabel)); + client.DiscardAll(); +} + +void DropOnCreateTriggers(mg::Client &client) { + client.Execute("DROP TRIGGER CreatedVerticesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER CreatedEdgesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER CreatedObjectsTrigger"); + client.DiscardAll(); +} + +int main(int argc, char **argv) { + gflags::SetUsageMessage("Memgraph E2E ON CREATE Triggers"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = Connect(); + + const auto run_create_trigger_tests = [&](bool is_before) { + const std::array vertex_ids{1, 2}; + const int edge_id = 3; + { + CreateOnCreateTriggers(*client, is_before); + client->BeginTransaction(); + for (const auto vertex_id : vertex_ids) { + CreateVertex(*client, vertex_id); + CheckVertexExists(*client, kVertexLabel, vertex_id); + CheckVertexMissing(*client, kTriggerCreatedVertexLabel, vertex_id); + CheckVertexMissing(*client, kTriggerCreatedObjectLabel, vertex_id); + } + CreateEdge(*client, vertex_ids[0], vertex_ids[1], edge_id); + CheckVertexMissing(*client, kTriggerCreatedEdgeLabel, edge_id); + CheckVertexMissing(*client, kTriggerCreatedObjectLabel, edge_id); + client->CommitTransaction(); + + // :VERTEX x 2 + // :CREATED_VERTEX x 2 + // :CREATED_EDGE x 1 + // :CREATED_OBJECT x 3 + constexpr auto kNumberOfExpectedVertices = 8; + + if (is_before) { + CheckNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } else { + WaitForNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } + + for (const auto vertex_id : vertex_ids) { + CheckVertexExists(*client, kTriggerCreatedVertexLabel, vertex_id); + CheckVertexExists(*client, kTriggerCreatedObjectLabel, vertex_id); + } + CheckVertexExists(*client, kTriggerCreatedEdgeLabel, edge_id); + CheckVertexExists(*client, kTriggerCreatedObjectLabel, edge_id); + DropOnCreateTriggers(*client); + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + } + }; + constexpr bool kBeforeCommit = true; + constexpr bool kAfterCommit = false; + run_create_trigger_tests(kBeforeCommit); + run_create_trigger_tests(kAfterCommit); + + return 0; +} diff --git a/tests/e2e/triggers/on_delete_triggers.cpp b/tests/e2e/triggers/on_delete_triggers.cpp new file mode 100644 index 000000000..1c0886863 --- /dev/null +++ b/tests/e2e/triggers/on_delete_triggers.cpp @@ -0,0 +1,133 @@ +#include +#include + +#include +#include +#include "common.hpp" +#include "utils/logging.hpp" + +constexpr std::string_view kTriggerDeletedVertexLabel{"DELETED_VERTEX"}; +constexpr std::string_view kTriggerDeletedEdgeLabel{"DELETED_EDGE"}; +constexpr std::string_view kTriggerDeletedObjectLabel{"DELETED_OBJECT"}; + +void DetachDeleteVertex(mg::Client &client, int vertex_id) { + mg::Map parameters{{"id", mg::Value{vertex_id}}}; + client.Execute(fmt::format("MATCH (n: {} {{id: $id}}) DETACH DELETE n", kVertexLabel), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void DeleteEdge(mg::Client &client, int edge_id) { + mg::Map parameters{{"id", mg::Value{edge_id}}}; + client.Execute(fmt::format("MATCH ()-[r: {} {{id: $id}}]->() DELETE r", kEdgeLabel), mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void CreateOnDeleteTriggers(mg::Client &client, bool is_before) { + const std::string_view before_or_after = is_before ? "BEFORE" : "AFTER"; + client.Execute( + fmt::format("CREATE TRIGGER DeletedVerticesTrigger ON () DELETE " + "{} COMMIT " + "EXECUTE " + "UNWIND deletedVertices as deletedVertex " + "CREATE (n: {} {{ id: deletedVertex.id }})", + before_or_after, kTriggerDeletedVertexLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER DeletedEdgesTrigger ON --> DELETE " + "{} COMMIT " + "EXECUTE " + "UNWIND deletedEdges as deletedEdge " + "CREATE (n: {} {{ id: deletedEdge.id }})", + before_or_after, kTriggerDeletedEdgeLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER DeletedObjectsTrigger ON DELETE " + "{} COMMIT " + "EXECUTE " + "UNWIND deletedObjects as deletedObjectEvent " + "WITH CASE deletedObjectEvent.event_type WHEN \"deleted_vertex\" THEN deletedObjectEvent.vertex.id " + "ELSE deletedObjectEvent.edge.id END as id " + "CREATE (n: {} {{ id: id }})", + before_or_after, kTriggerDeletedObjectLabel)); + client.DiscardAll(); +} + +void DropOnDeleteTriggers(mg::Client &client) { + client.Execute("DROP TRIGGER DeletedVerticesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER DeletedEdgesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER DeletedObjectsTrigger"); + client.DiscardAll(); +} + +struct EdgeInfo { + int from_vertex; + int to_vertex; + int edge_id; +}; + +int main(int argc, char **argv) { + gflags::SetUsageMessage("Memgraph E2E ON DELETE Triggers"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = Connect(); + + const auto run_delete_trigger_tests = [&](bool is_before) { + const std::array vertex_ids{1, 2, 3, 4}; + const std::array edges{EdgeInfo{vertex_ids[0], vertex_ids[1], 5}, EdgeInfo{vertex_ids[2], vertex_ids[3], 6}}; + { + CreateOnDeleteTriggers(*client, is_before); + + client->BeginTransaction(); + for (const auto vertex_id : vertex_ids) { + CreateVertex(*client, vertex_id); + } + for (const auto &edge : edges) { + CreateEdge(*client, edge.from_vertex, edge.to_vertex, edge.edge_id); + } + client->CommitTransaction(); + CheckNumberOfAllVertices(*client, vertex_ids.size()); + + client->BeginTransaction(); + DetachDeleteVertex(*client, vertex_ids[0]); + DeleteEdge(*client, edges[1].edge_id); + client->CommitTransaction(); + + // :VERTEX x 4 + // deleted :VERTEX x -1 + // :DELETED_VERTEX x 1 + // :DELETED_EDGE x 2 + // :DELETED_OBJECT x 3 + constexpr auto kNumberOfExpectedVertices = 9; + + if (is_before) { + CheckNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } else { + WaitForNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } + + CheckVertexExists(*client, kTriggerDeletedVertexLabel, vertex_ids[0]); + CheckVertexExists(*client, kTriggerDeletedObjectLabel, vertex_ids[0]); + + for (const auto &edge : edges) { + CheckVertexExists(*client, kTriggerDeletedEdgeLabel, edge.edge_id); + CheckVertexExists(*client, kTriggerDeletedObjectLabel, edge.edge_id); + } + + DropOnDeleteTriggers(*client); + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + } + }; + constexpr bool kBeforeCommit = true; + constexpr bool kAfterCommit = false; + run_delete_trigger_tests(kBeforeCommit); + run_delete_trigger_tests(kAfterCommit); + + return 0; +} diff --git a/tests/e2e/triggers/on_update_triggers.cpp b/tests/e2e/triggers/on_update_triggers.cpp new file mode 100644 index 000000000..3d24b1d28 --- /dev/null +++ b/tests/e2e/triggers/on_update_triggers.cpp @@ -0,0 +1,286 @@ +#include +#include + +#include +#include +#include "common.hpp" +#include "utils/logging.hpp" + +constexpr std::string_view kTriggerUpdatedVertexLabel{"UPDATED_VERTEX"}; +constexpr std::string_view kTriggerUpdatedEdgeLabel{"UPDATED_EDGE"}; +constexpr std::string_view kTriggerUpdatedObjectLabel{"UPDATED_OBJECT"}; +constexpr std::string_view kTriggerSetVertexPropertyLabel{"SET_VERTEX_PROPERTY"}; +constexpr std::string_view kTriggerRemovedVertexPropertyLabel{"REMOVED_VERTEX_PROPERTY"}; +constexpr std::string_view kTriggerSetVertexLabelLabel{"SET_VERTEX_LABEL"}; +constexpr std::string_view kTriggerRemovedVertexLabelLabel{"REMOVED_VERTEX_LABEL"}; +constexpr std::string_view kTriggerSetEdgePropertyLabel{"SET_EDGE_PROPERTY"}; +constexpr std::string_view kTriggerRemovedEdgePropertyLabel{"REMOVED_EDGE_PROPERTY"}; + +void SetVertexProperty(mg::Client &client, int vertex_id, std::string_view property_name, mg::Value value) { + mg::Map parameters{ + {"id", mg::Value{vertex_id}}, + {"value", std::move(value)}, + }; + client.Execute(fmt::format("MATCH (n: {} {{id: $id}}) " + "SET n.{} = $value", + kVertexLabel, property_name), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void SetEdgeProperty(mg::Client &client, int edge_id, std::string_view property_name, mg::Value value) { + mg::Map parameters{ + {"id", mg::Value{edge_id}}, + {"value", std::move(value)}, + }; + client.Execute(fmt::format("MATCH ()-[r: {} {{id: $id}}]->() " + "SET r.{} = $value", + kEdgeLabel, property_name), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void DoVertexLabelOperation(mg::Client &client, int vertex_id, std::string_view label, std::string_view operation) { + mg::Map parameters{{"id", mg::Value{vertex_id}}}; + client.Execute(fmt::format("MATCH (n: {} {{id: $id}}) " + "{} n:{}", + kVertexLabel, operation, label), + mg::ConstMap{parameters.ptr()}); + client.DiscardAll(); +} + +void AddVertexLabel(mg::Client &client, int vertex_id, std::string_view label) { + DoVertexLabelOperation(client, vertex_id, label, "SET"); +} + +void RemoveVertexLabel(mg::Client &client, int vertex_id, std::string_view label) { + DoVertexLabelOperation(client, vertex_id, label, "REMOVE"); +} + +void CheckVertexProperty(mg::Client &client, std::string_view label, int vertex_id, std::string_view property_name, + const mg::Value &value) { + const auto vertex = GetVertex(client, label, vertex_id); + MG_ASSERT(vertex, "Cannot check property of not existing vertex with label {} and id {}", label, vertex_id); + + const auto properties = vertex->ValueNode().properties(); + const auto prop_it = properties.find(property_name); + MG_ASSERT(prop_it != properties.end(), "Vertex with label {} and id {} doesn't have expected property {}!", label, + vertex_id, property_name); + MG_ASSERT((*prop_it).second == value, "Property {} of vertex with label {} and id {} doesn't have expected value!", + property_name, label, vertex_id); +} + +void CreateOnUpdateTriggers(mg::Client &client, bool is_before) { + const std::string_view before_or_after = is_before ? "BEFORE" : "AFTER"; + client.Execute( + fmt::format("CREATE TRIGGER UpdatedVerticesTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND updatedVertices as updateVertexEvent " + "CREATE (n: {} {{ id: updateVertexEvent.vertex.id , event_type: updateVertexEvent.event_type }})", + before_or_after, kTriggerUpdatedVertexLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER UpdatedEdgesTrigger ON --> UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND updatedEdges as updatedEdgeEvent " + "CREATE (n: {} {{ id: updatedEdgeEvent.edge.id, event_type: updatedEdgeEvent.event_type }})", + before_or_after, kTriggerUpdatedEdgeLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER UpdatedObjectsTrigger ON UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND updatedObjects as updatedObject " + "WITH CASE updatedObject.event_type " + "WHEN \"set_edge_property\" THEN updatedObject.edge.id " + "WHEN \"removed_edge_property\" THEN updatedObject.edge.id " + "ELSE updatedObject.vertex.id END as id, updatedObject " + "CREATE (n: {} {{ id: id, event_type: updatedObject.event_type }})", + before_or_after, kTriggerUpdatedObjectLabel)); + client.DiscardAll(); + + client.Execute( + fmt::format("CREATE TRIGGER SetVertexPropertiesTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND setVertexProperties as assignedVertexProperty " + "CREATE (n: {} {{ id: assignedVertexProperty.vertex.id }})", + before_or_after, kTriggerSetVertexPropertyLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER RemovedVertexPropertiesTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND removedVertexProperties as removedVertexProperty " + "CREATE (n: {} {{ id: removedVertexProperty.vertex.id }})", + before_or_after, kTriggerRemovedVertexPropertyLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER SetVertexLabelsTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND setVertexLabels as assignedVertexLabel " + "UNWIND assignedVertexLabel.vertices as vertex " + "CREATE (n: {} {{ id: vertex.id }})", + before_or_after, kTriggerSetVertexLabelLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER RemovedVertexLabelTrigger ON () UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND removedVertexLabels as removedVertexLabel " + "UNWIND removedVertexLabel.vertices as vertex " + "CREATE (n: {} {{ id: vertex.id }})", + before_or_after, kTriggerRemovedVertexLabelLabel)); + client.DiscardAll(); + + client.Execute( + fmt::format("CREATE TRIGGER SetEdgePropertiesTrigger ON --> UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND setEdgeProperties as assignedEdgeProperty " + "CREATE (n: {} {{ id: assignedEdgeProperty.edge.id }})", + before_or_after, kTriggerSetEdgePropertyLabel)); + client.DiscardAll(); + client.Execute( + fmt::format("CREATE TRIGGER RemovedEdgePropertiesTrigger ON --> UPDATE " + "{} COMMIT " + "EXECUTE " + "UNWIND removedEdgeProperties as removedEdgeProperty " + "CREATE (n: {} {{ id: removedEdgeProperty.edge.id }})", + before_or_after, kTriggerRemovedEdgePropertyLabel)); + client.DiscardAll(); +} + +void DropOnUpdateTriggers(mg::Client &client) { + client.Execute("DROP TRIGGER UpdatedVerticesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER UpdatedEdgesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER UpdatedObjectsTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER SetVertexPropertiesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER RemoveVertexPropertiesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER SetVertexLabelsTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER RemovedVertexLabelTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER SetEdgePropertiesTrigger"); + client.DiscardAll(); + client.Execute("DROP TRIGGER RemovedEdgePropertiesTrigger"); + client.DiscardAll(); +} + +struct EdgeInfo { + int from_vertex; + int to_vertex; + int edge_id; +}; + +int main(int argc, char **argv) { + constexpr std::string_view kExtraLabel = "EXTRA_LABEL"; + constexpr std::string_view kUpdatedProperty = "updateProperty"; + gflags::SetUsageMessage("Memgraph E2E ON UPDATE Triggers"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + logging::RedirectToStderr(); + + mg::Client::Init(); + + auto client = Connect(); + + const auto run_update_trigger_tests = [&](bool is_before) { + const std::array vertex_ids{1, 2, 3, 4}; + const std::array edges{EdgeInfo{vertex_ids[0], vertex_ids[1], 5}, EdgeInfo{vertex_ids[2], vertex_ids[3], 6}}; + { + CreateOnUpdateTriggers(*client, is_before); + + client->BeginTransaction(); + for (const auto vertex_id : vertex_ids) { + CreateVertex(*client, vertex_id); + SetVertexProperty(*client, vertex_id, kUpdatedProperty, mg::Value(vertex_id)); + AddVertexLabel(*client, vertex_id, kExtraLabel); + } + for (const auto &edge : edges) { + CreateEdge(*client, edge.from_vertex, edge.to_vertex, edge.edge_id); + SetEdgeProperty(*client, edge.edge_id, kUpdatedProperty, mg::Value(edge.edge_id)); + } + client->CommitTransaction(); + CheckNumberOfAllVertices(*client, vertex_ids.size()); + + client->BeginTransaction(); + SetVertexProperty(*client, vertex_ids[0], kUpdatedProperty, mg::Value(-1)); + SetVertexProperty(*client, vertex_ids[1], kUpdatedProperty, mg::Value()); + AddVertexLabel(*client, vertex_ids[2], "NEW_LABEL"); + RemoveVertexLabel(*client, vertex_ids[3], kExtraLabel); + SetEdgeProperty(*client, edges[0].edge_id, kUpdatedProperty, mg::Value(-1)); + SetEdgeProperty(*client, edges[1].edge_id, kUpdatedProperty, mg::Value()); + CheckNumberOfAllVertices(*client, vertex_ids.size()); + client->CommitTransaction(); + + // :VERTEX x 4 + // :UPDATED_VERTEX x 4 + // :UPDATED_EDGE x 2 + // :UPDATED_OBJECT x 6 + // :SET_VERTEX_PROPERTY x 1 + // :REMOVED_VERTEX_PROPERTY x 1 + // :SET_VERTEX_LABEL x 1 + // :REMOVED_VERTEX_LABEL x 1 + // :SET_EDGE_PROPERTY x 1 + // :REMOVED_EDGE_PROPERTY x 1 + constexpr auto kNumberOfExpectedVertices = 22; + + if (is_before) { + CheckNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } else { + WaitForNumberOfAllVertices(*client, kNumberOfExpectedVertices); + } + + CheckVertexProperty(*client, kTriggerUpdatedVertexLabel, vertex_ids[0], "event_type", + mg::Value{"set_vertex_property"}); + CheckVertexProperty(*client, kTriggerUpdatedVertexLabel, vertex_ids[1], "event_type", + mg::Value{"removed_vertex_property"}); + CheckVertexProperty(*client, kTriggerUpdatedVertexLabel, vertex_ids[2], "event_type", + mg::Value{"set_vertex_label"}); + CheckVertexProperty(*client, kTriggerUpdatedVertexLabel, vertex_ids[3], "event_type", + mg::Value{"removed_vertex_label"}); + CheckVertexProperty(*client, kTriggerUpdatedEdgeLabel, edges[0].edge_id, "event_type", + mg::Value{"set_edge_property"}); + CheckVertexProperty(*client, kTriggerUpdatedEdgeLabel, edges[1].edge_id, "event_type", + mg::Value{"removed_edge_property"}); + + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, vertex_ids[0], "event_type", + mg::Value{"set_vertex_property"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, vertex_ids[1], "event_type", + mg::Value{"removed_vertex_property"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, vertex_ids[2], "event_type", + mg::Value{"set_vertex_label"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, vertex_ids[3], "event_type", + mg::Value{"removed_vertex_label"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, edges[0].edge_id, "event_type", + mg::Value{"set_edge_property"}); + CheckVertexProperty(*client, kTriggerUpdatedObjectLabel, edges[1].edge_id, "event_type", + mg::Value{"removed_edge_property"}); + + CheckVertexExists(*client, kTriggerSetVertexPropertyLabel, vertex_ids[0]); + CheckVertexExists(*client, kTriggerRemovedVertexPropertyLabel, vertex_ids[1]); + CheckVertexExists(*client, kTriggerSetVertexLabelLabel, vertex_ids[2]); + CheckVertexExists(*client, kTriggerRemovedVertexLabelLabel, vertex_ids[3]); + CheckVertexExists(*client, kTriggerSetEdgePropertyLabel, edges[0].edge_id); + CheckVertexExists(*client, kTriggerRemovedEdgePropertyLabel, edges[1].edge_id); + + DropOnUpdateTriggers(*client); + client->Execute("MATCH (n) DETACH DELETE n;"); + client->DiscardAll(); + } + }; + constexpr bool kBeforeCommit = true; + constexpr bool kAfterCommit = false; + run_update_trigger_tests(kBeforeCommit); + run_update_trigger_tests(kAfterCommit); + + return 0; +} diff --git a/tests/e2e/triggers/workloads.yaml b/tests/e2e/triggers/workloads.yaml new file mode 100644 index 000000000..55d82d485 --- /dev/null +++ b/tests/e2e/triggers/workloads.yaml @@ -0,0 +1,24 @@ +bolt_port: &bolt_port "7687" +template_cluster: &template_cluster + cluster: + main: + args: ["--bolt-port", *bolt_port, "--log-level=TRACE"] + log_file: "triggers-e2e.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "ON CREATE Triggers" + binary: "tests/e2e/triggers/memgraph__e2e__triggers__on_create" + args: ["--bolt-port", *bolt_port] + <<: *template_cluster + - name: "ON UPDATE Triggers" + binary: "tests/e2e/triggers/memgraph__e2e__triggers__on_update" + args: ["--bolt-port", *bolt_port] + <<: *template_cluster + - name: "ON DELETE Triggers" + binary: "tests/e2e/triggers/memgraph__e2e__triggers__on_delete" + args: ["--bolt-port", *bolt_port] + <<: *template_cluster + + diff --git a/tests/integration/ldap/runner.py b/tests/integration/ldap/runner.py index 33f575535..f5f8b4c5b 100755 --- a/tests/integration/ldap/runner.py +++ b/tests/integration/ldap/runner.py @@ -73,7 +73,7 @@ class Memgraph: virtualenv_bin = os.path.join(SCRIPT_DIR, "ve3", "bin", "python3") with open(script_file) as fin: data = fin.read() - data = data.replace("/usr/bin/env python3", virtualenv_bin) + data = data.replace("/usr/bin/python3", virtualenv_bin) data = data.replace("/etc/memgraph/auth/ldap.yaml", self._auth_config) with open(self._auth_module, "w") as fout: diff --git a/tests/integration/telemetry/runner.py b/tests/integration/telemetry/runner.py index da958489d..766ba9f43 100755 --- a/tests/integration/telemetry/runner.py +++ b/tests/integration/telemetry/runner.py @@ -45,7 +45,7 @@ def execute_test(**kwargs): server = None if start_server: server = subprocess.Popen(list(map(str, server_args))) - time.sleep(0.1) + time.sleep(0.4) assert server.poll() is None, "Server process died prematurely!" try: diff --git a/tests/jepsen/src/jepsen/memgraph/bank.clj b/tests/jepsen/src/jepsen/memgraph/bank.clj index 9db55106a..4b5955903 100644 --- a/tests/jepsen/src/jepsen/memgraph/bank.clj +++ b/tests/jepsen/src/jepsen/memgraph/bank.clj @@ -118,8 +118,10 @@ (filter #(= :ok (:type %))) (filter #(= :read (:f %)))) bad-reads (->> ok-reads + (map #(->> % :value :accounts)) + (filter #(= (count %) 5)) (map (fn [op] - (let [balances (->> op :value :accounts (map :balance)) + (let [balances (map :balance op) expected-total (* account-num starting-balance)] (cond (and (not-empty balances) @@ -164,4 +166,4 @@ {:bank (bank-checker) :timeline (timeline/html)}) :generator (c/replication-gen (gen/mix [read-balances valid-transfer])) - :final-generator (gen/once read-balances)}) + :final-generator {:gen (gen/once read-balances) :recovery-time 20}}) diff --git a/tests/jepsen/src/jepsen/memgraph/core.clj b/tests/jepsen/src/jepsen/memgraph/core.clj index 69a4a0aa0..77bf6a163 100644 --- a/tests/jepsen/src/jepsen/memgraph/core.clj +++ b/tests/jepsen/src/jepsen/memgraph/core.clj @@ -22,7 +22,7 @@ "A map of workload names to functions that can take opts and construct workloads." {:bank bank/workload - :sequential sequential/workload + ;; :sequential sequential/workload (T0532-MG) :large large/workload}) (def nemesis-configuration @@ -45,8 +45,8 @@ (gen/log "Healing cluster.") (gen/nemesis (:final-generator nemesis)) (gen/log "Waiting for recovery") - (gen/sleep 20) - (gen/clients final-generator)) + (gen/sleep (:recovery-time final-generator)) + (gen/clients (:gen final-generator))) gen)] (merge tests/noop-test opts diff --git a/tests/jepsen/src/jepsen/memgraph/large.clj b/tests/jepsen/src/jepsen/memgraph/large.clj index d58248df7..cc710252a 100644 --- a/tests/jepsen/src/jepsen/memgraph/large.clj +++ b/tests/jepsen/src/jepsen/memgraph/large.clj @@ -103,4 +103,4 @@ :timeline (timeline/html)}) :generator (c/replication-gen (gen/mix [read-nodes add-nodes])) - :final-generator (gen/once read-nodes)}) + :final-generator {:gen (gen/once read-nodes) :recovery-time 40}}) diff --git a/tests/jepsen/src/jepsen/memgraph/sequential.clj b/tests/jepsen/src/jepsen/memgraph/sequential.clj index b8f772cf2..d50a241cb 100644 --- a/tests/jepsen/src/jepsen/memgraph/sequential.clj +++ b/tests/jepsen/src/jepsen/memgraph/sequential.clj @@ -9,19 +9,7 @@ [jepsen.memgraph.client :as c])) (dbclient/defquery get-all-nodes - "MATCH (n:Node) RETURN n;") - -(dbclient/defquery get-max-id - "MATCH (n:Node) - RETURN n.id AS id - ORDER BY id DESC - LIMIT 1;") - -(dbclient/defquery get-min-id - "MATCH (n:Node) - RETURN n.id AS id - ORDER BY id - LIMIT 1;") + "MATCH (n:Node) RETURN n ORDER BY n.id;") (dbclient/defquery create-node "CREATE (n:Node {id: $id});") @@ -29,19 +17,23 @@ (dbclient/defquery delete-node-with-id "MATCH (n:Node {id: $id}) DELETE n;") +(def next-node-for-add (atom 0)) + (defn add-next-node "Add a new node with its id set to the next highest" [conn] - (dbclient/with-transaction conn tx - (let [max-id (-> (get-max-id tx) first :id)] - (create-node tx {:id (inc max-id)})))) + (when (dbclient/with-transaction conn tx + (create-node tx {:id (swap! next-node-for-add identity)})) + (swap! next-node-for-add inc))) + +(def next-node-for-delete (atom 0)) (defn delete-oldest-node "Delete a node with the lowest id" [conn] - (dbclient/with-transaction conn tx - (let [min-id (-> (get-min-id tx) first :id)] - (delete-node-with-id tx {:id min-id})))) + (when (dbclient/with-transaction conn tx + (delete-node-with-id tx {:id (swap! next-node-for-delete identity)})) + (swap! next-node-for-delete inc))) (c/replication-client Client [] (open! [this test node] @@ -123,11 +115,12 @@ (when (not-empty ids) (cond ((complement strictly-increasing) ids) {:type :not-increasing-ids - :op op} - - ((complement increased-by-1) ids) - {:type :ids-missing :op op}))))) + + ;; if there are multiple threads not sure how to guarante that the ids are created in order + ;;((complement increased-by-1) ids) + ;;{:type :ids-missing + ;; :op op}))))) (filter identity) (into [])) empty-nodes (let [all-nodes (->> ok-reads diff --git a/tests/manual/antlr_parser.cpp b/tests/manual/antlr_parser.cpp index d1986c4b1..42ccfba7e 100644 --- a/tests/manual/antlr_parser.cpp +++ b/tests/manual/antlr_parser.cpp @@ -8,7 +8,7 @@ using namespace antlropencypher; using namespace antlr4; int main(int, const char **a) { - const char *query = a[1]; + std::string_view query{a[1]}; ANTLRInputStream input(query); MemgraphCypherLexer lexer(&input); diff --git a/tests/manual/single_query.cpp b/tests/manual/single_query.cpp index e0deff027..471b7779a 100644 --- a/tests/manual/single_query.cpp +++ b/tests/manual/single_query.cpp @@ -1,6 +1,8 @@ #include "communication/result_stream_faker.hpp" #include "query/interpreter.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/storage.hpp" +#include "utils/on_scope_exit.hpp" int main(int argc, char *argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, true); @@ -12,7 +14,9 @@ int main(int argc, char *argv[]) { } storage::Storage db; - query::InterpreterContext interpreter_context{&db}; + auto data_directory = std::filesystem::temp_directory_path() / "single_query_test"; + utils::OnScopeExit([&data_directory] { std::filesystem::remove_all(data_directory); }); + query::InterpreterContext interpreter_context{&db, data_directory}; query::Interpreter interpreter{&interpreter_context}; ResultStreamFaker stream(&db); diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index beb45f44b..7a2c0e5e7 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -32,7 +32,7 @@ function(_add_unit_test test_cpp custom_main) # used to help create two targets of the same name even though CMake # requires unique logical target names set_target_properties(${target_name} PROPERTIES OUTPUT_NAME ${exec_name}) - target_link_libraries(${target_name} mg-utils gtest gmock Threads::Threads) + target_link_libraries(${target_name} mg-utils mg-new-delete gtest gmock Threads::Threads dl) # register test if(TEST_COVERAGE) add_test(${target_name} env LLVM_PROFILE_FILE=${exec_name}.profraw ./${exec_name}) @@ -95,6 +95,12 @@ target_link_libraries(${test_prefix}query_plan_v2_create_set_remove_delete mg-qu add_unit_test(query_pretty_print.cpp) target_link_libraries(${test_prefix}query_pretty_print mg-query) +add_unit_test(query_trigger.cpp) +target_link_libraries(${test_prefix}query_trigger mg-query) + +add_unit_test(query_serialization_property_value.cpp) +target_link_libraries(${test_prefix}query_serialization_property_value mg-query) + # Test query/procedure add_unit_test(query_procedure_mgp_type.cpp) target_link_libraries(${test_prefix}query_procedure_mgp_type mg-query) @@ -193,6 +199,9 @@ target_link_libraries(${test_prefix}utils_math mg-utils) add_unit_test(utils_memory.cpp) target_link_libraries(${test_prefix}utils_memory mg-utils) +add_unit_test(utils_memory_tracker.cpp) +target_link_libraries(${test_prefix}utils_memory_tracker mg-utils) + add_unit_test(utils_on_scope_exit.cpp) target_link_libraries(${test_prefix}utils_on_scope_exit mg-utils) @@ -226,6 +235,12 @@ target_link_libraries(${test_prefix}utils_file_locker mg-utils fmt) add_unit_test(utils_thread_pool.cpp) target_link_libraries(${test_prefix}utils_thread_pool mg-utils fmt) +add_unit_test(utils_csv_parsing.cpp ${CMAKE_SOURCE_DIR}/src/utils/csv_parsing.cpp) +target_link_libraries(${test_prefix}utils_csv_parsing mg-utils fmt) + +add_unit_test(utils_async_timer.cpp) +target_link_libraries(${test_prefix}utils_async_timer mg-utils) + # Test mg-storage-v2 add_unit_test(commit_log_v2.cpp) @@ -267,6 +282,9 @@ target_link_libraries(${test_prefix}storage_v2_wal_file mg-storage-v2 fmt) add_unit_test(storage_v2_replication.cpp) target_link_libraries(${test_prefix}storage_v2_replication mg-storage-v2 fmt) +add_unit_test(storage_v2_isolation_level.cpp) +target_link_libraries(${test_prefix}storage_v2_isolation_level mg-storage-v2) + # Test mg-auth if (MG_ENTERPRISE) diff --git a/tests/unit/bolt_session.cpp b/tests/unit/bolt_session.cpp index c9c8631d4..6bee34273 100644 --- a/tests/unit/bolt_session.cpp +++ b/tests/unit/bolt_session.cpp @@ -383,15 +383,16 @@ TEST(BoltSession, ExecuteRunWrongMarker) { } TEST(BoltSession, ExecuteRunMissingData) { + std::array run_req_without_parameters{ + run_req_header[0], run_req_header[1], run_req_header[2], 0x00, 0x00, 0x00}; // test lengths, they test the following situations: // missing header data, missing query data, missing parameters - int len[] = {1, 2, 37}; - + int len[] = {1, 2, run_req_without_parameters.size()}; for (int i = 0; i < 3; ++i) { INIT_VARS; ExecuteHandshake(input_stream, session, output); ExecuteInit(input_stream, session, output); - ASSERT_THROW(ExecuteCommand(input_stream, session, run_req_header, len[i]), SessionException); + ASSERT_THROW(ExecuteCommand(input_stream, session, run_req_without_parameters.data(), len[i]), SessionException); ASSERT_EQ(session.state_, State::Close); CheckFailureMessage(output); @@ -871,7 +872,7 @@ TEST(BoltSession, Noop) { CheckFailureMessage(output); session.state_ = State::Result; - ExecuteCommand(input_stream, session, pullall_req, sizeof(v4::pullall_req)); + ExecuteCommand(input_stream, session, pullall_req, sizeof(pullall_req)); CheckSuccessMessage(output); ASSERT_THROW(ExecuteCommand(input_stream, session, v4_1::noop, sizeof(v4_1::noop)), SessionException); diff --git a/tests/unit/cypher_main_visitor.cpp b/tests/unit/cypher_main_visitor.cpp index 87ceeded0..fd673c3f4 100644 --- a/tests/unit/cypher_main_visitor.cpp +++ b/tests/unit/cypher_main_visitor.cpp @@ -18,12 +18,15 @@ #include #include +#include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/ast/cypher_main_visitor.hpp" #include "query/frontend/opencypher/parser.hpp" #include "query/frontend/stripped.hpp" #include "query/typed_value.hpp" +#include "utils/string.hpp" + namespace { using namespace query; @@ -2052,6 +2055,18 @@ TEST_P(CypherMainVisitorTest, GrantPrivilege) { {AuthQuery::Privilege::CONSTRAINT}); check_auth_query(&ast_generator, "GRANT DUMP TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, {AuthQuery::Privilege::DUMP}); + check_auth_query(&ast_generator, "GRANT REPLICATION TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::REPLICATION}); + check_auth_query(&ast_generator, "GRANT LOCK_PATH TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::LOCK_PATH}); + check_auth_query(&ast_generator, "GRANT READ_FILE TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::READ_FILE}); + check_auth_query(&ast_generator, "GRANT FREE_MEMORY TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::FREE_MEMORY}); + check_auth_query(&ast_generator, "GRANT TRIGGER TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::TRIGGER}); + check_auth_query(&ast_generator, "GRANT CONFIG TO user", AuthQuery::Action::GRANT_PRIVILEGE, "", "", "user", {}, + {AuthQuery::Privilege::CONFIG}); } TEST_P(CypherMainVisitorTest, DenyPrivilege) { @@ -2730,7 +2745,8 @@ TEST_P(CypherMainVisitorTest, CallWithoutYield) { TEST_P(CypherMainVisitorTest, CallWithMemoryLimitWithoutYield) { auto &ast_generator = *GetParam(); - auto *query = dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() MEMORY LIMIT 32 KB")); + auto *query = + dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() PROCEDURE MEMORY LIMIT 32 KB")); ASSERT_TRUE(query); ASSERT_TRUE(query->single_query_); auto *single_query = query->single_query_; @@ -2747,7 +2763,7 @@ TEST_P(CypherMainVisitorTest, CallWithMemoryLimitWithoutYield) { TEST_P(CypherMainVisitorTest, CallWithMemoryUnlimitedWithoutYield) { auto &ast_generator = *GetParam(); - auto *query = dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() MEMORY UNLIMITED")); + auto *query = dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() PROCEDURE MEMORY UNLIMITED")); ASSERT_TRUE(query); ASSERT_TRUE(query->single_query_); auto *single_query = query->single_query_; @@ -2763,8 +2779,8 @@ TEST_P(CypherMainVisitorTest, CallWithMemoryUnlimitedWithoutYield) { TEST_P(CypherMainVisitorTest, CallProcedureWithMemoryLimit) { auto &ast_generator = *GetParam(); - auto *query = - dynamic_cast(ast_generator.ParseQuery("CALL proc.with.dots() MEMORY LIMIT 32 MB YIELD res")); + auto *query = dynamic_cast( + ast_generator.ParseQuery("CALL proc.with.dots() PROCEDURE MEMORY LIMIT 32 MB YIELD res")); ASSERT_TRUE(query); ASSERT_TRUE(query->single_query_); auto *single_query = query->single_query_; @@ -2788,8 +2804,8 @@ TEST_P(CypherMainVisitorTest, CallProcedureWithMemoryLimit) { TEST_P(CypherMainVisitorTest, CallProcedureWithMemoryUnlimited) { auto &ast_generator = *GetParam(); - auto *query = - dynamic_cast(ast_generator.ParseQuery("CALL proc.with.dots() MEMORY UNLIMITED YIELD res")); + auto *query = dynamic_cast( + ast_generator.ParseQuery("CALL proc.with.dots() PROCEDURE MEMORY UNLIMITED YIELD res")); ASSERT_TRUE(query); ASSERT_TRUE(query->single_query_); auto *single_query = query->single_query_; @@ -2822,12 +2838,10 @@ TEST_P(CypherMainVisitorTest, IncorrectCallProcedure) { ASSERT_THROW(ast_generator.ParseQuery("RETURN 42, CALL procedure() YIELD"), SyntaxException); ASSERT_THROW(ast_generator.ParseQuery("RETURN 42, CALL procedure() YIELD res"), SyntaxException); ASSERT_THROW(ast_generator.ParseQuery("RETURN 42 AS x CALL procedure() YIELD res"), SemanticException); - ASSERT_THROW(ast_generator.ParseQuery("CALL proc.with.dots() YIELD res MEMORY UNLIMITED"), SyntaxException); - ASSERT_THROW(ast_generator.ParseQuery("CALL proc.with.dots() YIELD res MEMORY LIMIT 32 KB"), SyntaxException); ASSERT_THROW(ast_generator.ParseQuery("CALL proc.with.dots() MEMORY YIELD res"), SyntaxException); // mg.procedures returns something, so it needs to have a YIELD. ASSERT_THROW(ast_generator.ParseQuery("CALL mg.procedures()"), SemanticException); - ASSERT_THROW(ast_generator.ParseQuery("CALL mg.procedures() MEMORY UNLIMITED"), SemanticException); + ASSERT_THROW(ast_generator.ParseQuery("CALL mg.procedures() PROCEDURE MEMORY UNLIMITED"), SemanticException); // TODO: Implement support for the following syntax. These are defined in // Neo4j and accepted in openCypher CIP. ASSERT_THROW(ast_generator.ParseQuery("CALL proc"), SyntaxException); @@ -2869,4 +2883,319 @@ TEST_P(CypherMainVisitorTest, TestLockPathQuery) { test_lock_path_query("UNLOCK", LockPathQuery::Action::UNLOCK_PATH); } +TEST_P(CypherMainVisitorTest, TestLoadCsvClause) { + auto &ast_generator = *GetParam(); + + { + const std::string query = R"(LOAD CSV FROM "file.csv")"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER DELIMITER ";")"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER DELIMITER ";" QUOTE "'")"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER DELIMITER ";" QUOTE "'" AS)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM file WITH HEADER IGNORE BAD DELIMITER ";" QUOTE "'" AS x)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER IGNORE BAD DELIMITER 0 QUOTE "'" AS x)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SemanticException); + } + + { + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER IGNORE BAD DELIMITER ";" QUOTE 0 AS x)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SemanticException); + } + + { + // can't be a standalone clause + const std::string query = R"(LOAD CSV FROM "file.csv" WITH HEADER IGNORE BAD DELIMITER ";" QUOTE "'" AS x)"; + ASSERT_THROW(ast_generator.ParseQuery(query), SemanticException); + } + + { + const std::string query = + R"(LOAD CSV FROM "file.csv" WITH HEADER IGNORE BAD DELIMITER ";" QUOTE "'" AS x RETURN x)"; + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(query)); + ASSERT_TRUE(parsed_query); + auto *load_csv_clause = dynamic_cast(parsed_query->single_query_->clauses_[0]); + ASSERT_TRUE(load_csv_clause); + ASSERT_TRUE(load_csv_clause->with_header_); + ASSERT_TRUE(load_csv_clause->ignore_bad_); + } +} + +TEST_P(CypherMainVisitorTest, MemoryLimit) { + auto &ast_generator = *GetParam(); + + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUE"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEM"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIM"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT KB"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT 12GB"), SyntaxException); + ASSERT_THROW(ast_generator.ParseQuery("QUERY MEMORY LIMIT 12KB RETURN x"), SyntaxException); + + { + auto *query = dynamic_cast(ast_generator.ParseQuery("RETURN x")); + ASSERT_TRUE(query); + ASSERT_FALSE(query->memory_limit_); + } + + { + auto *query = dynamic_cast(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT 12KB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 12); + ASSERT_EQ(query->memory_scale_, 1024U); + } + + { + auto *query = dynamic_cast(ast_generator.ParseQuery("RETURN x QUERY MEMORY LIMIT 12MB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 12); + ASSERT_EQ(query->memory_scale_, 1024U * 1024U); + } + + { + auto *query = dynamic_cast( + ast_generator.ParseQuery("CALL mg.procedures() YIELD x RETURN x QUERY MEMORY LIMIT 12MB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 12); + ASSERT_EQ(query->memory_scale_, 1024U * 1024U); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 2U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + CheckCallProcedureDefaultMemoryLimit(ast_generator, *call_proc); + } + + { + auto *query = dynamic_cast(ast_generator.ParseQuery( + "CALL mg.procedures() PROCEDURE MEMORY LIMIT 3KB YIELD x RETURN x QUERY MEMORY LIMIT 12MB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 12); + ASSERT_EQ(query->memory_scale_, 1024U * 1024U); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 2U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + ASSERT_TRUE(call_proc->memory_limit_); + ast_generator.CheckLiteral(call_proc->memory_limit_, 3); + ASSERT_EQ(call_proc->memory_scale_, 1024U); + } + + { + auto *query = dynamic_cast( + ast_generator.ParseQuery("CALL mg.procedures() PROCEDURE MEMORY LIMIT 3KB YIELD x RETURN x")); + ASSERT_TRUE(query); + ASSERT_FALSE(query->memory_limit_); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 2U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + ASSERT_TRUE(call_proc->memory_limit_); + ast_generator.CheckLiteral(call_proc->memory_limit_, 3); + ASSERT_EQ(call_proc->memory_scale_, 1024U); + } + + { + auto *query = + dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() PROCEDURE MEMORY LIMIT 3KB")); + ASSERT_TRUE(query); + ASSERT_FALSE(query->memory_limit_); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 1U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + ASSERT_TRUE(call_proc->memory_limit_); + ast_generator.CheckLiteral(call_proc->memory_limit_, 3); + ASSERT_EQ(call_proc->memory_scale_, 1024U); + } + + { + auto *query = dynamic_cast(ast_generator.ParseQuery("CALL mg.load_all() QUERY MEMORY LIMIT 3KB")); + ASSERT_TRUE(query); + ASSERT_TRUE(query->memory_limit_); + ast_generator.CheckLiteral(query->memory_limit_, 3); + ASSERT_EQ(query->memory_scale_, 1024U); + + ASSERT_TRUE(query->single_query_); + auto *single_query = query->single_query_; + ASSERT_EQ(single_query->clauses_.size(), 1U); + auto *call_proc = dynamic_cast(single_query->clauses_[0]); + CheckCallProcedureDefaultMemoryLimit(ast_generator, *call_proc); + } +} + +namespace { +void TestInvalidQuery(const auto &query, Base &ast_generator) { + ASSERT_THROW(ast_generator.ParseQuery(query), SyntaxException); +} +} // namespace + +TEST_P(CypherMainVisitorTest, DropTrigger) { + auto &ast_generator = *GetParam(); + + TestInvalidQuery("DROP TR", ast_generator); + TestInvalidQuery("DROP TRIGGER", ast_generator); + + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery("DROP TRIGGER trigger")); + EXPECT_EQ(parsed_query->action_, TriggerQuery::Action::DROP_TRIGGER); + EXPECT_EQ(parsed_query->trigger_name_, "trigger"); +} + +TEST_P(CypherMainVisitorTest, ShowTriggers) { + auto &ast_generator = *GetParam(); + + TestInvalidQuery("SHOW TR", ast_generator); + TestInvalidQuery("SHOW TRIGGER", ast_generator); + + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery("SHOW TRIGGERS")); + EXPECT_EQ(parsed_query->action_, TriggerQuery::Action::SHOW_TRIGGERS); +} + +namespace { +void ValidateCreateQuery(Base &ast_generator, const auto &query, const auto &trigger_name, + const query::TriggerQuery::EventType event_type, const auto &phase, const auto &statement) { + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(query)); + EXPECT_EQ(parsed_query->action_, TriggerQuery::Action::CREATE_TRIGGER); + EXPECT_EQ(parsed_query->trigger_name_, trigger_name); + EXPECT_EQ(parsed_query->event_type_, event_type); + EXPECT_EQ(parsed_query->before_commit_, phase == "BEFORE"); + EXPECT_EQ(parsed_query->statement_, statement); +} +} // namespace + +TEST_P(CypherMainVisitorTest, CreateTriggers) { + auto &ast_generator = *GetParam(); + + TestInvalidQuery("CREATE TRIGGER", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON ", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON ()", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON -->", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON () CREATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON --> CREATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON DELETE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON () DELETE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON --> DELETE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON UPDATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON () UPDATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON --> UPDATE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE BEFORE", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE BEFORE COMMIT", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE AFTER", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CREATE AFTER COMMIT", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON -> CREATE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON ) CREATE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON ( CREATE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON CRETE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON DELET AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON UPDTE AFTER COMMIT EXECUTE a", ast_generator); + TestInvalidQuery("CREATE TRIGGER trigger ON UPDATE COMMIT EXECUTE a", ast_generator); + + const auto *query_template = "CREATE TRIGGER trigger {} {} COMMIT EXECUTE {}"; + + constexpr std::array events{std::pair{"", query::TriggerQuery::EventType::ANY}, + std::pair{"ON CREATE", query::TriggerQuery::EventType::CREATE}, + std::pair{"ON () CREATE", query::TriggerQuery::EventType::VERTEX_CREATE}, + std::pair{"ON --> CREATE", query::TriggerQuery::EventType::EDGE_CREATE}, + std::pair{"ON DELETE", query::TriggerQuery::EventType::DELETE}, + std::pair{"ON () DELETE", query::TriggerQuery::EventType::VERTEX_DELETE}, + std::pair{"ON --> DELETE", query::TriggerQuery::EventType::EDGE_DELETE}, + std::pair{"ON UPDATE", query::TriggerQuery::EventType::UPDATE}, + std::pair{"ON () UPDATE", query::TriggerQuery::EventType::VERTEX_UPDATE}, + std::pair{"ON --> UPDATE", query::TriggerQuery::EventType::EDGE_UPDATE}}; + + constexpr std::array phases{"BEFORE", "AFTER"}; + + constexpr std::array statements{ + "", "SOME SUPER\nSTATEMENT", "Statement with 12312321 3 ", " Statement with 12312321 3 " + + }; + + for (const auto &[event_string, event_type] : events) { + for (const auto &phase : phases) { + for (const auto &statement : statements) { + ValidateCreateQuery(ast_generator, fmt::format(query_template, event_string, phase, statement), "trigger", + event_type, phase, utils::Trim(statement)); + } + } + } +} + +namespace { +void ValidateSetIsolationLevelQuery(Base &ast_generator, const auto &query, const auto scope, + const auto isolation_level) { + auto *parsed_query = dynamic_cast(ast_generator.ParseQuery(query)); + EXPECT_EQ(parsed_query->isolation_level_scope_, scope); + EXPECT_EQ(parsed_query->isolation_level_, isolation_level); +} +} // namespace + +TEST_P(CypherMainVisitorTest, SetIsolationLevelQuery) { + auto &ast_generator = *GetParam(); + TestInvalidQuery("SET ISO", ast_generator); + TestInvalidQuery("SET TRANSACTION ISOLATION", ast_generator); + TestInvalidQuery("SET TRANSACTION ISOLATION LEVEL", ast_generator); + TestInvalidQuery("SET TRANSACTION ISOLATION LEVEL READ COMMITTED", ast_generator); + TestInvalidQuery("SET NEXT TRANSACTION ISOLATION LEVEL", ast_generator); + TestInvalidQuery("SET ISOLATION LEVEL READ COMMITTED", ast_generator); + TestInvalidQuery("SET GLOBAL ISOLATION LEVEL READ COMMITTED", ast_generator); + TestInvalidQuery("SET GLOBAL TRANSACTION ISOLATION LEVEL READ COMITTED", ast_generator); + TestInvalidQuery("SET GLOBAL TRANSACTION ISOLATION LEVEL READ_COMITTED", ast_generator); + TestInvalidQuery("SET SESSION TRANSACTION ISOLATION LEVEL READCOMITTED", ast_generator); + + constexpr std::array scopes{std::pair{"GLOBAL", query::IsolationLevelQuery::IsolationLevelScope::GLOBAL}, + std::pair{"SESSION", query::IsolationLevelQuery::IsolationLevelScope::SESSION}, + std::pair{"NEXT", query::IsolationLevelQuery::IsolationLevelScope::NEXT}}; + constexpr std::array isolation_levels{ + std::pair{"READ UNCOMMITTED", query::IsolationLevelQuery::IsolationLevel::READ_UNCOMMITTED}, + std::pair{"READ COMMITTED", query::IsolationLevelQuery::IsolationLevel::READ_COMMITTED}, + std::pair{"SNAPSHOT ISOLATION", query::IsolationLevelQuery::IsolationLevel::SNAPSHOT_ISOLATION}}; + + constexpr const auto *query_template = "SET {} TRANSACTION ISOLATION LEVEL {}"; + + for (const auto &[scope_string, scope] : scopes) { + for (const auto &[isolation_level_string, isolation_level] : isolation_levels) { + ValidateSetIsolationLevelQuery(ast_generator, fmt::format(query_template, scope_string, isolation_level_string), + scope, isolation_level); + } + } +} } // namespace diff --git a/tests/unit/interpreter.cpp b/tests/unit/interpreter.cpp index 3ceb6183a..70b92fbca 100644 --- a/tests/unit/interpreter.cpp +++ b/tests/unit/interpreter.cpp @@ -1,4 +1,5 @@ #include +#include #include "communication/bolt/v1/value.hpp" #include "communication/result_stream_faker.hpp" @@ -10,7 +11,9 @@ #include "query/stream.hpp" #include "query/typed_value.hpp" #include "query_common.hpp" +#include "storage/v2/isolation_level.hpp" #include "storage/v2/property_value.hpp" +#include "utils/csv_parsing.hpp" #include "utils/logging.hpp" namespace { @@ -31,7 +34,8 @@ auto ToEdgeList(const communication::bolt::Value &v) { class InterpreterTest : public ::testing::Test { protected: storage::Storage db_; - query::InterpreterContext interpreter_context_{&db_}; + std::filesystem::path data_directory{std::filesystem::temp_directory_path() / "MG_tests_unit_interpreter"}; + query::InterpreterContext interpreter_context_{&db_, data_directory}; query::Interpreter interpreter_{&interpreter_context_}; auto Prepare(const std::string &query, const std::map ¶ms = {}) { @@ -193,6 +197,11 @@ TEST_F(InterpreterTest, Parameters) { } } +TEST_F(InterpreterTest, LoadCsv) { + // for debug purposes + auto [stream, qid] = Prepare(R"(LOAD CSV FROM "simple.csv" NO HEADER AS row RETURN row)"); +} + // Test bfs end to end. TEST_F(InterpreterTest, Bfs) { srand(0); @@ -776,3 +785,116 @@ TEST_F(InterpreterTest, Qid) { interpreter_.CommitTransaction(); } } + +namespace { +// copied from utils_csv_parsing.cpp - tmp dir management and csv file writer +class TmpCsvDirManager final { + public: + TmpCsvDirManager() { CreateCsvDir(); } + ~TmpCsvDirManager() { Clear(); } + + const std::filesystem::path &Path() const { return tmp_dir_; } + + private: + const std::filesystem::path tmp_dir_{std::filesystem::temp_directory_path() / "csv_directory"}; + + void CreateCsvDir() { + if (!std::filesystem::exists(tmp_dir_)) { + std::filesystem::create_directory(tmp_dir_); + } + } + + void Clear() { + if (!std::filesystem::exists(tmp_dir_)) return; + std::filesystem::remove_all(tmp_dir_); + } +}; + +class FileWriter { + public: + explicit FileWriter(const std::filesystem::path path) { stream_.open(path); } + + FileWriter(const FileWriter &) = delete; + FileWriter &operator=(const FileWriter &) = delete; + + FileWriter(FileWriter &&) = delete; + FileWriter &operator=(FileWriter &&) = delete; + + void Close() { stream_.close(); } + + size_t WriteLine(const std::string_view line) { + if (!stream_.is_open()) { + return 0; + } + + stream_ << line << std::endl; + + // including the newline character + return line.size() + 1; + } + + private: + std::ofstream stream_; +}; + +std::string CreateRow(const std::vector &columns, const std::string_view delim) { + return utils::Join(columns, delim); +} +} // namespace + +TEST_F(InterpreterTest, LoadCsvClause) { + auto dir_manager = TmpCsvDirManager(); + const auto csv_path = dir_manager.Path() / "file.csv"; + auto writer = FileWriter(csv_path); + + const std::string delimiter{"|"}; + + const std::vector header{"A", "B", "C"}; + writer.WriteLine(CreateRow(header, delimiter)); + + const std::vector good_columns_1{"a", "b", "c"}; + writer.WriteLine(CreateRow(good_columns_1, delimiter)); + + const std::vector bad_columns{"\"\"1", "2", "3"}; + writer.WriteLine(CreateRow(bad_columns, delimiter)); + + const std::vector good_columns_2{"d", "e", "f"}; + writer.WriteLine(CreateRow(good_columns_2, delimiter)); + + writer.Close(); + + { + const std::string query = fmt::format(R"(LOAD CSV FROM "{}" WITH HEADER IGNORE BAD DELIMITER "{}" AS x RETURN x.A)", + csv_path.string(), delimiter); + auto [stream, qid] = Prepare(query); + ASSERT_EQ(stream.GetHeader().size(), 1U); + EXPECT_EQ(stream.GetHeader()[0], "x.A"); + + Pull(&stream, 1); + ASSERT_EQ(stream.GetSummary().count("has_more"), 1); + ASSERT_TRUE(stream.GetSummary().at("has_more").ValueBool()); + ASSERT_EQ(stream.GetResults().size(), 1U); + ASSERT_EQ(stream.GetResults()[0][0].ValueString(), "a"); + + Pull(&stream, 1); + ASSERT_EQ(stream.GetSummary().count("has_more"), 1); + ASSERT_FALSE(stream.GetSummary().at("has_more").ValueBool()); + ASSERT_EQ(stream.GetResults().size(), 2U); + ASSERT_EQ(stream.GetResults()[1][0].ValueString(), "d"); + } + + { + const std::string query = fmt::format(R"(LOAD CSV FROM "{}" WITH HEADER IGNORE BAD DELIMITER "{}" AS x RETURN x.C)", + csv_path.string(), delimiter); + auto [stream, qid] = Prepare(query); + ASSERT_EQ(stream.GetHeader().size(), 1U); + EXPECT_EQ(stream.GetHeader()[0], "x.C"); + + Pull(&stream); + ASSERT_EQ(stream.GetSummary().count("has_more"), 1); + ASSERT_FALSE(stream.GetSummary().at("has_more").ValueBool()); + ASSERT_EQ(stream.GetResults().size(), 2U); + ASSERT_EQ(stream.GetResults()[0][0].ValueString(), "c"); + ASSERT_EQ(stream.GetResults()[1][0].ValueString(), "f"); + } +} diff --git a/tests/unit/main.cpp b/tests/unit/main.cpp index 5b0fa17fb..cb05a4e3b 100644 --- a/tests/unit/main.cpp +++ b/tests/unit/main.cpp @@ -4,6 +4,6 @@ int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); logging::RedirectToStderr(); - spdlog::set_level(spdlog::level::info); + spdlog::set_level(spdlog::level::warn); return RUN_ALL_TESTS(); } diff --git a/tests/unit/query_common.hpp b/tests/unit/query_common.hpp index 8f8c6eb66..67698075f 100644 --- a/tests/unit/query_common.hpp +++ b/tests/unit/query_common.hpp @@ -463,7 +463,7 @@ auto GetMerge(AstStorage &storage, Pattern *pattern, OnMatch on_match, OnCreate #define MATCH(...) query::test_common::GetWithPatterns(storage.Create(), {__VA_ARGS__}) #define WHERE(expr) storage.Create((expr)) #define CREATE(...) query::test_common::GetWithPatterns(storage.Create(), {__VA_ARGS__}) -#define IDENT(name) storage.Create((name)) +#define IDENT(...) storage.Create(__VA_ARGS__) #define LITERAL(val) storage.Create((val)) #define LIST(...) storage.Create(std::vector{__VA_ARGS__}) #define MAP(...) \ diff --git a/tests/unit/query_dump.cpp b/tests/unit/query_dump.cpp index b565de5fe..ee19f8ddf 100644 --- a/tests/unit/query_dump.cpp +++ b/tests/unit/query_dump.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -187,7 +188,8 @@ DatabaseState GetState(storage::Storage *db) { } auto Execute(storage::Storage *db, const std::string &query) { - query::InterpreterContext context(db); + auto data_directory = std::filesystem::temp_directory_path() / "MG_tests_unit_query_dump"; + query::InterpreterContext context(db, data_directory); query::Interpreter interpreter(&context); ResultStreamFaker stream(db); @@ -700,7 +702,8 @@ TEST(DumpTest, ExecuteDumpDatabase) { class StatefulInterpreter { public: - explicit StatefulInterpreter(storage::Storage *db) : db_(db), context_(db_), interpreter_(&context_) {} + explicit StatefulInterpreter(storage::Storage *db) + : db_(db), context_(db_, data_directory_), interpreter_(&context_) {} auto Execute(const std::string &query) { ResultStreamFaker stream(db_); @@ -714,11 +717,16 @@ class StatefulInterpreter { } private: + static const std::filesystem::path data_directory_; + storage::Storage *db_; query::InterpreterContext context_; query::Interpreter interpreter_; }; +const std::filesystem::path StatefulInterpreter::data_directory_{std::filesystem::temp_directory_path() / + "MG_tests_unit_query_dump_stateful"}; + // NOLINTNEXTLINE(hicpp-special-member-functions) TEST(DumpTest, ExecuteDumpDatabaseInMulticommandTransaction) { storage::Storage db; diff --git a/tests/unit/query_plan.cpp b/tests/unit/query_plan.cpp index d15efc0af..f15cd0055 100644 --- a/tests/unit/query_plan.cpp +++ b/tests/unit/query_plan.cpp @@ -73,6 +73,12 @@ class TestPlanner : public ::testing::Test {}; using PlannerTypes = ::testing::Types; +void DeleteListContent(std::list *list) { + for (BaseOpChecker *ptr : *list) { + delete ptr; + } +} + TYPED_TEST_CASE(TestPlanner, PlannerTypes); TYPED_TEST(TestPlanner, MatchNodeReturn) { @@ -223,6 +229,7 @@ TYPED_TEST(TestPlanner, OptionalMatchNamedPatternReturn) { auto planner = MakePlanner(&dba, storage, symbol_table, query); std::list optional{new ExpectScanAll(), new ExpectExpand(), new ExpectConstructNamedPath()}; CheckPlan(planner.plan(), symbol_table, ExpectOptional(optional_symbols, optional), ExpectProduce()); + DeleteListContent(&optional); } TYPED_TEST(TestPlanner, MatchWhereReturn) { @@ -549,10 +556,8 @@ TYPED_TEST(TestPlanner, MatchMerge) { auto acc = ExpectAccumulate({symbol_table.at(*ident_n)}); auto planner = MakePlanner(&dba, storage, symbol_table, query); CheckPlan(planner.plan(), symbol_table, ExpectScanAll(), ExpectMerge(on_match, on_create), acc, ExpectProduce()); - for (auto &op : on_match) delete op; - on_match.clear(); - for (auto &op : on_create) delete op; - on_create.clear(); + DeleteListContent(&on_match); + DeleteListContent(&on_create); } TYPED_TEST(TestPlanner, MatchOptionalMatchWhereReturn) { @@ -564,6 +569,7 @@ TYPED_TEST(TestPlanner, MatchOptionalMatchWhereReturn) { WHERE(LESS(PROPERTY_LOOKUP("m", prop), LITERAL(42))), RETURN("r"))); std::list optional{new ExpectScanAll(), new ExpectExpand(), new ExpectFilter()}; CheckPlan(query, storage, ExpectScanAll(), ExpectOptional(optional), ExpectProduce()); + DeleteListContent(&optional); } TYPED_TEST(TestPlanner, MatchUnwindReturn) { @@ -705,6 +711,7 @@ TYPED_TEST(TestPlanner, MatchOptionalMatchWhere) { // optional ScanAll. std::list optional{new ExpectFilter(), new ExpectScanAll()}; CheckPlan(query, storage, ExpectScanAll(), ExpectExpand(), ExpectOptional(optional), ExpectProduce()); + DeleteListContent(&optional); } TYPED_TEST(TestPlanner, MatchReturnAsterisk) { @@ -763,8 +770,8 @@ TYPED_TEST(TestPlanner, UnwindMergeNodeProperty) { std::list on_match{new ExpectScanAll(), new ExpectFilter()}; std::list on_create{new ExpectCreateNode()}; CheckPlan(query, storage, ExpectUnwind(), ExpectMerge(on_match, on_create)); - for (auto &op : on_match) delete op; - for (auto &op : on_create) delete op; + DeleteListContent(&on_match); + DeleteListContent(&on_create); } TYPED_TEST(TestPlanner, MultipleOptionalMatchReturn) { @@ -774,6 +781,7 @@ TYPED_TEST(TestPlanner, MultipleOptionalMatchReturn) { QUERY(SINGLE_QUERY(OPTIONAL_MATCH(PATTERN(NODE("n"))), OPTIONAL_MATCH(PATTERN(NODE("m"))), RETURN("n"))); std::list optional{new ExpectScanAll()}; CheckPlan(query, storage, ExpectOptional(optional), ExpectOptional(optional), ExpectProduce()); + DeleteListContent(&optional); } TYPED_TEST(TestPlanner, FunctionAggregationReturn) { diff --git a/tests/unit/query_plan_edge_cases.cpp b/tests/unit/query_plan_edge_cases.cpp index 71e558f17..fe3035240 100644 --- a/tests/unit/query_plan_edge_cases.cpp +++ b/tests/unit/query_plan_edge_cases.cpp @@ -2,6 +2,7 @@ // that's not easily testable with single-phase testing. instead, for // easy testing and latter readability they are tested end-to-end. +#include #include #include "gmock/gmock.h" @@ -19,9 +20,11 @@ class QueryExecution : public testing::Test { std::optional interpreter_context_; std::optional interpreter_; + std::filesystem::path data_directory{std::filesystem::temp_directory_path() / "MG_tests_unit_query_plan_edge_cases"}; + void SetUp() { db_.emplace(); - interpreter_context_.emplace(&*db_); + interpreter_context_.emplace(&*db_, data_directory); interpreter_.emplace(&*interpreter_context_); } diff --git a/tests/unit/query_procedure_mgp_module.cpp b/tests/unit/query_procedure_mgp_module.cpp index 3da92bb5c..74e2da106 100644 --- a/tests/unit/query_procedure_mgp_module.cpp +++ b/tests/unit/query_procedure_mgp_module.cpp @@ -5,6 +5,8 @@ #include "query/procedure/mg_procedure_impl.hpp" +#include "test_utils.hpp" + static void DummyCallback(const mgp_list *, const mgp_graph *, mgp_result *, mgp_memory *) {} TEST(Module, InvalidProcedureRegistration) { @@ -53,7 +55,8 @@ TEST(Module, ProcedureSignature) { CheckSignature(proc, "proc() :: ()"); mgp_proc_add_arg(proc, "arg1", mgp_type_number()); CheckSignature(proc, "proc(arg1 :: NUMBER) :: ()"); - mgp_proc_add_opt_arg(proc, "opt1", mgp_type_nullable(mgp_type_any()), mgp_value_make_null(&memory)); + mgp_proc_add_opt_arg(proc, "opt1", mgp_type_nullable(mgp_type_any()), + test_utils::CreateValueOwningPtr(mgp_value_make_null(&memory)).get()); CheckSignature(proc, "proc(arg1 :: NUMBER, opt1 = Null :: ANY?) :: ()"); mgp_proc_add_result(proc, "res1", mgp_type_list(mgp_type_int())); CheckSignature(proc, "proc(arg1 :: NUMBER, opt1 = Null :: ANY?) :: (res1 :: LIST OF INTEGER)"); @@ -69,7 +72,8 @@ TEST(Module, ProcedureSignature) { "(res1 :: LIST OF INTEGER, DEPRECATED res2 :: STRING)"); EXPECT_FALSE(mgp_proc_add_result(proc, "res2", mgp_type_any())); EXPECT_FALSE(mgp_proc_add_deprecated_result(proc, "res1", mgp_type_any())); - mgp_proc_add_opt_arg(proc, "opt2", mgp_type_string(), mgp_value_make_string("string=\"value\"", &memory)); + mgp_proc_add_opt_arg(proc, "opt2", mgp_type_string(), + test_utils::CreateValueOwningPtr(mgp_value_make_string("string=\"value\"", &memory)).get()); CheckSignature(proc, "proc(arg1 :: NUMBER, opt1 = Null :: ANY?, " "opt2 = \"string=\\\"value\\\"\" :: STRING) :: " @@ -80,6 +84,7 @@ TEST(Module, ProcedureSignatureOnlyOptArg) { mgp_memory memory{utils::NewDeleteResource()}; mgp_module module(utils::NewDeleteResource()); auto *proc = mgp_module_add_read_procedure(&module, "proc", DummyCallback); - mgp_proc_add_opt_arg(proc, "opt1", mgp_type_nullable(mgp_type_any()), mgp_value_make_null(&memory)); + mgp_proc_add_opt_arg(proc, "opt1", mgp_type_nullable(mgp_type_any()), + test_utils::CreateValueOwningPtr(mgp_value_make_null(&memory)).get()); CheckSignature(proc, "proc(opt1 = Null :: ANY?) :: ()"); } diff --git a/tests/unit/query_procedure_mgp_type.cpp b/tests/unit/query_procedure_mgp_type.cpp index 2cb1b0ce4..588a35a85 100644 --- a/tests/unit/query_procedure_mgp_type.cpp +++ b/tests/unit/query_procedure_mgp_type.cpp @@ -1,7 +1,13 @@ +#include +#include +#include + #include #include "query/procedure/mg_procedure_impl.hpp" +#include "test_utils.hpp" + TEST(CypherType, PresentableNameSimpleTypes) { EXPECT_EQ(mgp_type_any()->impl->GetPresentableName(), "ANY"); EXPECT_EQ(mgp_type_bool()->impl->GetPresentableName(), "BOOLEAN"); @@ -66,6 +72,7 @@ TEST(CypherType, NullSatisfiesType) { EXPECT_TRUE(null_type->impl->SatisfiesType(tv_null)); } } + mgp_value_destroy(mgp_null); } } @@ -101,6 +108,7 @@ TEST(CypherType, BoolSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_bool, tv_bool, {mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_bool); } TEST(CypherType, IntSatisfiesType) { @@ -111,6 +119,7 @@ TEST(CypherType, IntSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_int, tv_int, {mgp_type_bool(), mgp_type_string(), mgp_type_float(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_int); } TEST(CypherType, DoubleSatisfiesType) { @@ -121,6 +130,7 @@ TEST(CypherType, DoubleSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_double, tv_double, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_double); } TEST(CypherType, StringSatisfiesType) { @@ -131,12 +141,13 @@ TEST(CypherType, StringSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_string, tv_string, {mgp_type_bool(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_string); } TEST(CypherType, MapSatisfiesType) { mgp_memory memory{utils::NewDeleteResource()}; auto *map = mgp_map_make_empty(&memory); - mgp_map_insert(map, "key", mgp_value_make_int(42, &memory)); + mgp_map_insert(map, "key", test_utils::CreateValueOwningPtr(mgp_value_make_int(42, &memory)).get()); auto *mgp_map_v = mgp_value_make_map(map); const query::TypedValue tv_map(std::map{{"key", query::TypedValue(42)}}); CheckSatisfiesTypesAndNullable(mgp_map_v, tv_map, {mgp_type_any(), mgp_type_map()}); @@ -144,6 +155,7 @@ TEST(CypherType, MapSatisfiesType) { mgp_map_v, tv_map, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_map_v); } TEST(CypherType, VertexSatisfiesType) { @@ -160,6 +172,7 @@ TEST(CypherType, VertexSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_vertex_v, tv_vertex, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_vertex_v); } TEST(CypherType, EdgeSatisfiesType) { @@ -178,6 +191,7 @@ TEST(CypherType, EdgeSatisfiesType) { CheckNotSatisfiesTypesAndListAndNullable(mgp_edge_v, tv_edge, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_node(), mgp_type_path()}); + mgp_value_destroy(mgp_edge_v); } TEST(CypherType, PathSatisfiesType) { @@ -190,9 +204,13 @@ TEST(CypherType, PathSatisfiesType) { mgp_memory memory{utils::NewDeleteResource()}; utils::Allocator alloc(memory.impl); mgp_graph graph{&dba, storage::View::NEW}; - auto *path = mgp_path_make_with_start(alloc.new_object(v1, &graph), &memory); + auto *mgp_vertex_v = alloc.new_object(v1, &graph); + auto path = mgp_path_make_with_start(mgp_vertex_v, &memory); ASSERT_TRUE(path); - ASSERT_TRUE(mgp_path_expand(path, alloc.new_object(edge, &graph))); + alloc.delete_object(mgp_vertex_v); + auto mgp_edge_v = alloc.new_object(edge, &graph); + ASSERT_TRUE(mgp_path_expand(path, mgp_edge_v)); + alloc.delete_object(mgp_edge_v); auto *mgp_path_v = mgp_value_make_path(path); const query::TypedValue tv_path(query::Path(v1, edge, v2)); CheckSatisfiesTypesAndNullable(mgp_path_v, tv_path, {mgp_type_any(), mgp_type_path()}); @@ -200,6 +218,7 @@ TEST(CypherType, PathSatisfiesType) { mgp_path_v, tv_path, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_map(), mgp_type_node(), mgp_type_relationship()}); + mgp_value_destroy(mgp_path_v); } static std::vector MakeListTypes(const std::vector &element_types) { @@ -224,6 +243,7 @@ TEST(CypherType, EmptyListSatisfiesType) { auto all_types = MakeListTypes(primitive_types); all_types.push_back(mgp_type_any()); CheckSatisfiesTypesAndNullable(mgp_list_v, tv_list, all_types); + mgp_value_destroy(mgp_list_v); } TEST(CypherType, ListOfIntSatisfiesType) { @@ -233,7 +253,7 @@ TEST(CypherType, ListOfIntSatisfiesType) { auto *mgp_list_v = mgp_value_make_list(list); query::TypedValue tv_list(std::vector{}); for (int64_t i = 0; i < elem_count; ++i) { - ASSERT_TRUE(mgp_list_append(list, mgp_value_make_int(i, &memory))); + ASSERT_TRUE(mgp_list_append(list, test_utils::CreateValueOwningPtr(mgp_value_make_int(i, &memory)).get())); tv_list.ValueList().emplace_back(i); auto valid_types = MakeListTypes({mgp_type_any(), mgp_type_int(), mgp_type_number()}); valid_types.push_back(mgp_type_any()); @@ -242,6 +262,7 @@ TEST(CypherType, ListOfIntSatisfiesType) { {mgp_type_bool(), mgp_type_string(), mgp_type_float(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); } + mgp_value_destroy(mgp_list_v); } TEST(CypherType, ListOfIntAndBoolSatisfiesType) { @@ -251,10 +272,10 @@ TEST(CypherType, ListOfIntAndBoolSatisfiesType) { auto *mgp_list_v = mgp_value_make_list(list); query::TypedValue tv_list(std::vector{}); // Add an int - ASSERT_TRUE(mgp_list_append(list, mgp_value_make_int(42, &memory))); + ASSERT_TRUE(mgp_list_append(list, test_utils::CreateValueOwningPtr(mgp_value_make_int(42, &memory)).get())); tv_list.ValueList().emplace_back(42); // Add a boolean - ASSERT_TRUE(mgp_list_append(list, mgp_value_make_bool(1, &memory))); + ASSERT_TRUE(mgp_list_append(list, test_utils::CreateValueOwningPtr(mgp_value_make_bool(1, &memory)).get())); tv_list.ValueList().emplace_back(true); auto valid_types = MakeListTypes({mgp_type_any()}); valid_types.push_back(mgp_type_any()); @@ -264,6 +285,7 @@ TEST(CypherType, ListOfIntAndBoolSatisfiesType) { mgp_list_v, tv_list, {mgp_type_bool(), mgp_type_string(), mgp_type_int(), mgp_type_float(), mgp_type_number(), mgp_type_map(), mgp_type_node(), mgp_type_relationship(), mgp_type_path()}); + mgp_value_destroy(mgp_list_v); } TEST(CypherType, ListOfNullSatisfiesType) { @@ -271,7 +293,7 @@ TEST(CypherType, ListOfNullSatisfiesType) { auto *list = mgp_list_make_empty(1, &memory); auto *mgp_list_v = mgp_value_make_list(list); query::TypedValue tv_list(std::vector{}); - ASSERT_TRUE(mgp_list_append(list, mgp_value_make_null(&memory))); + ASSERT_TRUE(mgp_list_append(list, test_utils::CreateValueOwningPtr(mgp_value_make_null(&memory)).get())); tv_list.ValueList().emplace_back(); // List with Null satisfies all nullable list element types std::vector primitive_types{ @@ -295,4 +317,5 @@ TEST(CypherType, ListOfNullSatisfiesType) { EXPECT_FALSE(null_type->impl->SatisfiesType(*mgp_list_v)) << null_type->impl->GetPresentableName(); EXPECT_FALSE(null_type->impl->SatisfiesType(tv_list)); } + mgp_value_destroy(mgp_list_v); } diff --git a/tests/unit/query_procedure_py_module.cpp b/tests/unit/query_procedure_py_module.cpp index ccc630842..95a7af2a1 100644 --- a/tests/unit/query_procedure_py_module.cpp +++ b/tests/unit/query_procedure_py_module.cpp @@ -254,6 +254,7 @@ TEST(PyModule, PyObjectToMgpValue) { const mgp_value *v2 = mgp_map_at(map, "four"); ASSERT_TRUE(mgp_value_is_double(v2)); EXPECT_EQ(mgp_value_get_double(v2), 4.0); + mgp_value_destroy(value); } int main(int argc, char **argv) { diff --git a/tests/unit/query_profile.cpp b/tests/unit/query_profile.cpp index 6f80f3812..09d657201 100644 --- a/tests/unit/query_profile.cpp +++ b/tests/unit/query_profile.cpp @@ -19,7 +19,7 @@ TEST(QueryProfileTest, SimpleQuery) { // | * Once | 2 | 25.000000 % | 0.250000 ms | // +---------------+---------------+---------------+---------------+ // clang-format: on - auto table = ProfilingStatsToTable(produce, total_time); + auto table = ProfilingStatsToTable(ProfilingStatsWithTotalTime{produce, total_time}); EXPECT_EQ(table[0][0].ValueString(), "* Produce"); EXPECT_EQ(table[0][1].ValueInt(), 2); @@ -48,7 +48,7 @@ TEST(QueryProfileTest, SimpleQuery) { // "relative_time": 0.75 // } // clang-format: on - auto json = ProfilingStatsToJson(produce, total_time); + auto json = ProfilingStatsToJson(ProfilingStatsWithTotalTime{produce, total_time}); /* * NOTE: When one of these comparions fails and Google Test tries to report @@ -94,7 +94,7 @@ TEST(QueryProfileTest, ComplicatedQuery) { // | * Once (1) | 2 | 5.000000 % | 0.050000 ms | // +----------------+----------------+----------------+----------------+ // clang-format: on - auto table = ProfilingStatsToTable(produce, total_time); + auto table = ProfilingStatsToTable({produce, total_time}); EXPECT_EQ(table[0][0].ValueString(), "* Produce"); EXPECT_EQ(table[0][1].ValueInt(), 2); @@ -209,7 +209,7 @@ TEST(QueryProfileTest, ComplicatedQuery) { // "relative_time": 0.1, // } // clang-format: on - auto json = ProfilingStatsToJson(produce, total_time); + auto json = ProfilingStatsToJson(ProfilingStatsWithTotalTime{produce, total_time}); EXPECT_EQ(json["actual_hits"], 2); EXPECT_EQ(json["relative_time"], 0.1); diff --git a/tests/unit/query_required_privileges.cpp b/tests/unit/query_required_privileges.cpp index 2e929ee45..8ce38ee55 100644 --- a/tests/unit/query_required_privileges.cpp +++ b/tests/unit/query_required_privileges.cpp @@ -1,6 +1,7 @@ #include #include +#include "query/frontend/ast/ast_visitor.hpp" #include "query/frontend/semantic/required_privileges.hpp" #include "storage/v2/id_types.hpp" @@ -131,3 +132,30 @@ TEST_F(TestPrivilegeExtractor, DumpDatabase) { auto *query = storage.Create(); EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::DUMP)); } + +TEST_F(TestPrivilegeExtractor, ReadFile) { + auto load_csv = storage.Create(); + load_csv->row_var_ = IDENT("row"); + auto *query = QUERY(SINGLE_QUERY(load_csv)); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::READ_FILE)); +} + +TEST_F(TestPrivilegeExtractor, LockPathQuery) { + auto *query = storage.Create(); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::LOCK_PATH)); +} + +TEST_F(TestPrivilegeExtractor, FreeMemoryQuery) { + auto *query = storage.Create(); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::FREE_MEMORY)); +} + +TEST_F(TestPrivilegeExtractor, TriggerQuery) { + auto *query = storage.Create(); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::TRIGGER)); +} + +TEST_F(TestPrivilegeExtractor, SetIsolationLevelQuery) { + auto *query = storage.Create(); + EXPECT_THAT(GetRequiredPrivileges(query), UnorderedElementsAre(AuthQuery::Privilege::CONFIG)); +} diff --git a/tests/unit/query_semantic.cpp b/tests/unit/query_semantic.cpp index 4d243382a..1e19f6594 100644 --- a/tests/unit/query_semantic.cpp +++ b/tests/unit/query_semantic.cpp @@ -3,6 +3,7 @@ #include "gtest/gtest.h" +#include "query/exceptions.hpp" #include "query/frontend/ast/ast.hpp" #include "query/frontend/semantic/symbol_generator.hpp" #include "query/frontend/semantic/symbol_table.hpp" @@ -1093,3 +1094,53 @@ TEST(TestSymbolTable, CreateAnonymousSymbolWithExistingUserSymbolCalledAnon) { auto anon2 = symbol_table.CreateAnonymousSymbol(); ASSERT_EQ(anon2.name_, "anon2"); } + +TEST_F(TestSymbolGenerator, PredefinedIdentifiers) { + auto *first_op = IDENT("first_op", false); + auto *second_op = IDENT("second_op", false); + // RETURN first_op + second_op AS result + auto query = QUERY(SINGLE_QUERY(RETURN(ADD(first_op, second_op), AS("result")))); + EXPECT_THROW(query::MakeSymbolTable(query), SemanticException); + EXPECT_THROW(query::MakeSymbolTable(query, {first_op}), SemanticException); + EXPECT_THROW(query::MakeSymbolTable(query, {second_op}), SemanticException); + auto symbol_table = query::MakeSymbolTable(query, {first_op, second_op}); + ASSERT_EQ(symbol_table.max_position(), 3); + + // predefined identifier can only be used in one scope + // RETURN first_op + second_op AS result UNION RETURN second_op + first_op AS result + query = QUERY(SINGLE_QUERY(RETURN(ADD(first_op, second_op), AS("result"))), + UNION(SINGLE_QUERY(RETURN(ADD(second_op, first_op), AS("result"))))); + ASSERT_THROW(query::MakeSymbolTable(query, {first_op, second_op}), SemanticException); + + // predefined identifier can be introduced in any of the scope + // different predefined identifiers can be introduced in different scopes + // RETURN first_op AS result UNION RETURN second_op AS result + query = QUERY(SINGLE_QUERY(RETURN(first_op, AS("result"))), UNION(SINGLE_QUERY(RETURN(second_op, AS("result"))))); + ASSERT_THROW(query::MakeSymbolTable(query), SemanticException); + symbol_table = query::MakeSymbolTable(query, {first_op, second_op}); + ASSERT_EQ(symbol_table.max_position(), 5); + + // WITH statement resets the scope, but the predefined identifier is okay + // because it's the first introduction of it in the query + // WITH 1 as one RETURN first_op AS first + query = QUERY(SINGLE_QUERY(WITH(LITERAL(1), AS("one")), RETURN(first_op, AS("first")))); + ASSERT_THROW(query::MakeSymbolTable(query), SemanticException); + symbol_table = query::MakeSymbolTable(query, {first_op}); + ASSERT_EQ(symbol_table.max_position(), 3); + + // In the first scope, first_op represents identifier created by match, + // in the second it represent the predefined identifier + // MATCH(first_op) WITH first_op as n RETURN first_op, n + query = QUERY(SINGLE_QUERY(MATCH(PATTERN(NODE("first_op"))), WITH("first_op", AS("n")), RETURN("first_op", "n"))); + ASSERT_THROW(query::MakeSymbolTable(query), SemanticException); + symbol_table = query::MakeSymbolTable(query, {first_op}); + ASSERT_EQ(symbol_table.max_position(), 6); + + // You cannot redaclare the predefined identifier in the same scope + // UNWIND first_op as u CREATE(first_op {prop: u}) + auto unwind = UNWIND(first_op, AS("u")); + auto node = NODE("first_op"); + node->properties_[storage.GetPropertyIx("prop")] = dynamic_cast(unwind->named_expression_->expression_); + query = QUERY(SINGLE_QUERY(unwind, CREATE(PATTERN(node)))); + ASSERT_THROW(query::MakeSymbolTable(query, {first_op}), SemanticException); +} diff --git a/tests/unit/query_serialization_property_value.cpp b/tests/unit/query_serialization_property_value.cpp new file mode 100644 index 000000000..569d13ec2 --- /dev/null +++ b/tests/unit/query_serialization_property_value.cpp @@ -0,0 +1,85 @@ +#include + +#include "query/serialization/property_value.hpp" +#include "utils/logging.hpp" + +namespace { +void ExpectPropEq(const storage::PropertyValue &a, const storage::PropertyValue &b) { + ASSERT_EQ(a.type(), b.type()); + ASSERT_EQ(a, b); +} + +void CheckJsonConversion(const storage::PropertyValue &property_value) { + const auto json_string = query::serialization::SerializePropertyValue(property_value).dump(); + const auto json_object = nlohmann::json::parse(json_string); + ExpectPropEq(property_value, query::serialization::DeserializePropertyValue(json_object)); +} + +} // namespace + +TEST(PropertyValueSerializationTest, Null) { CheckJsonConversion(storage::PropertyValue{}); } + +TEST(PropertyValueSerializationTest, Bool) { + CheckJsonConversion(storage::PropertyValue{true}); + CheckJsonConversion(storage::PropertyValue{false}); +} + +TEST(PropertyValueSerializationTest, Int) { + CheckJsonConversion(storage::PropertyValue{1}); + CheckJsonConversion(storage::PropertyValue{100}); +} + +TEST(PropertyValueSerializationTest, Double) { + CheckJsonConversion(storage::PropertyValue{1.0}); + CheckJsonConversion(storage::PropertyValue{2.321}); +} + +TEST(PropertyValueSerializationTest, String) { + CheckJsonConversion(storage::PropertyValue{"TestString"}); + CheckJsonConversion(storage::PropertyValue{""}); +} + +namespace { + +std::vector GetPropertyValueListWithBasicTypes() { + return {storage::PropertyValue{}, storage::PropertyValue{true}, storage::PropertyValue{"string"}, + storage::PropertyValue{1}, storage::PropertyValue{1.0}}; +} + +std::map GetPropertyValueMapWithBasicTypes() { + return {{"null", storage::PropertyValue{}}, + {"bool", storage::PropertyValue{true}}, + {"int", storage::PropertyValue{1}}, + {"double", storage::PropertyValue{1.0}}, + {"string", storage::PropertyValue{"string"}}}; +} + +} // namespace + +TEST(PropertyValueSerializationTest, List) { + storage::PropertyValue list = storage::PropertyValue{GetPropertyValueListWithBasicTypes()}; + + SPDLOG_DEBUG("Basic list"); + CheckJsonConversion(list); + + SPDLOG_DEBUG("Nested list"); + CheckJsonConversion(storage::PropertyValue{std::vector{list, list}}); + + SPDLOG_DEBUG("List with map"); + list.ValueList().emplace_back(GetPropertyValueMapWithBasicTypes()); + CheckJsonConversion(list); +} + +TEST(PropertyValueSerializationTest, Map) { + auto map = GetPropertyValueMapWithBasicTypes(); + SPDLOG_DEBUG("Basic map"); + CheckJsonConversion(storage::PropertyValue{map}); + + SPDLOG_DEBUG("Nested map"); + map.emplace("map", storage::PropertyValue{map}); + CheckJsonConversion(storage::PropertyValue{map}); + + SPDLOG_DEBUG("Map with list"); + map.emplace("list", storage::PropertyValue{GetPropertyValueListWithBasicTypes()}); + CheckJsonConversion(storage::PropertyValue{map}); +} diff --git a/tests/unit/query_trigger.cpp b/tests/unit/query_trigger.cpp new file mode 100644 index 000000000..27eac69f6 --- /dev/null +++ b/tests/unit/query_trigger.cpp @@ -0,0 +1,1082 @@ +#include +#include + +#include +#include "query/db_accessor.hpp" +#include "query/interpreter.hpp" +#include "query/trigger.hpp" +#include "query/typed_value.hpp" +#include "utils/memory.hpp" + +namespace { +const std::unordered_set kAllEventTypes{ + query::TriggerEventType::ANY, query::TriggerEventType::VERTEX_CREATE, query::TriggerEventType::EDGE_CREATE, + query::TriggerEventType::CREATE, query::TriggerEventType::VERTEX_DELETE, query::TriggerEventType::EDGE_DELETE, + query::TriggerEventType::DELETE, query::TriggerEventType::VERTEX_UPDATE, query::TriggerEventType::EDGE_UPDATE, + query::TriggerEventType::UPDATE, +}; +} // namespace + +class TriggerContextTest : public ::testing::Test { + public: + void SetUp() override { db.emplace(); } + + void TearDown() override { + accessors.clear(); + db.reset(); + } + + storage::Storage::Accessor &StartTransaction() { + accessors.push_back(db->Access()); + return accessors.back(); + } + + protected: + std::optional db; + std::list accessors; +}; + +namespace { +void CheckTypedValueSize(const query::TriggerContext &trigger_context, const query::TriggerIdentifierTag tag, + const size_t expected_size, query::DbAccessor &dba) { + auto typed_values = trigger_context.GetTypedValue(tag, &dba); + ASSERT_TRUE(typed_values.IsList()); + ASSERT_EQ(expected_size, typed_values.ValueList().size()); +}; + +void CheckLabelList(const query::TriggerContext &trigger_context, const query::TriggerIdentifierTag tag, + const size_t expected, query::DbAccessor &dba) { + auto typed_values = trigger_context.GetTypedValue(tag, &dba); + ASSERT_TRUE(typed_values.IsList()); + const auto &label_maps = typed_values.ValueList(); + size_t value_count = 0; + for (const auto &label_map : label_maps) { + ASSERT_TRUE(label_map.IsMap()); + const auto &typed_values_map = label_map.ValueMap(); + ASSERT_EQ(typed_values_map.size(), 2); + const auto label_it = typed_values_map.find("label"); + ASSERT_NE(label_it, typed_values_map.end()); + ASSERT_TRUE(label_it->second.IsString()); + const auto vertices_it = typed_values_map.find("vertices"); + ASSERT_NE(vertices_it, typed_values_map.end()); + ASSERT_TRUE(vertices_it->second.IsList()); + value_count += vertices_it->second.ValueList().size(); + } + ASSERT_EQ(value_count, expected); +}; +} // namespace + +// Ensure that TriggerContext returns only valid objects. +// Returned TypedValue should always contain only objects +// that exist (unless its explicitly created for the deleted object) +TEST_F(TriggerContextTest, ValidObjectsTest) { + query::TriggerContext trigger_context; + query::TriggerContextCollector trigger_context_collector{kAllEventTypes}; + + size_t vertex_count = 0; + size_t edge_count = 0; + { + query::DbAccessor dba{&StartTransaction()}; + + auto create_vertex = [&] { + auto created_vertex = dba.InsertVertex(); + trigger_context_collector.RegisterCreatedObject(created_vertex); + ++vertex_count; + return created_vertex; + }; + + // Create vertices and add them to the trigger context as created + std::vector vertices; + for (size_t i = 0; i < 4; ++i) { + vertices.push_back(create_vertex()); + } + + auto create_edge = [&](auto &from, auto &to) { + auto maybe_edge = dba.InsertEdge(&from, &to, dba.NameToEdgeType("EDGE")); + ASSERT_FALSE(maybe_edge.HasError()); + trigger_context_collector.RegisterCreatedObject(*maybe_edge); + ++edge_count; + }; + + // Create edges and add them to the trigger context as created + create_edge(vertices[0], vertices[1]); + create_edge(vertices[1], vertices[2]); + create_edge(vertices[2], vertices[3]); + + dba.AdvanceCommand(); + trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + trigger_context_collector = query::TriggerContextCollector{kAllEventTypes}; + + // Should have all the created objects + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, vertex_count + edge_count, dba); + + // we delete one of the vertices and edges in the same transaction + ASSERT_TRUE(dba.DetachRemoveVertex(&vertices[0]).HasValue()); + --vertex_count; + --edge_count; + + dba.AdvanceCommand(); + + // Should have one less created object for vertex and edge + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, vertex_count + edge_count, dba); + + ASSERT_FALSE(dba.Commit().HasError()); + } + + { + query::DbAccessor dba{&StartTransaction()}; + trigger_context.AdaptForAccessor(&dba); + + // Should have one less created object for vertex and edge + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, vertex_count + edge_count, dba); + } + + size_t deleted_vertex_count = 0; + size_t deleted_edge_count = 0; + { + query::DbAccessor dba{&StartTransaction()}; + + // register each type of change for each object + { + auto vertices = dba.Vertices(storage::View::OLD); + for (auto vertex : vertices) { + trigger_context_collector.RegisterSetObjectProperty(vertex, dba.NameToProperty("PROPERTY1"), + query::TypedValue("Value"), query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(vertex, dba.NameToProperty("PROPERTY2"), + query::TypedValue("Value")); + trigger_context_collector.RegisterSetVertexLabel(vertex, dba.NameToLabel("LABEL1")); + trigger_context_collector.RegisterRemovedVertexLabel(vertex, dba.NameToLabel("LABEL2")); + + auto out_edges = vertex.OutEdges(storage::View::OLD); + ASSERT_TRUE(out_edges.HasValue()); + + for (auto edge : *out_edges) { + trigger_context_collector.RegisterSetObjectProperty( + edge, dba.NameToProperty("PROPERTY1"), query::TypedValue("Value"), query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(edge, dba.NameToProperty("PROPERTY2"), + query::TypedValue("Value")); + } + } + } + + // Delete the first vertex with its edge and register the deleted object + { + auto vertices = dba.Vertices(storage::View::OLD); + for (auto vertex : vertices) { + const auto maybe_values = dba.DetachRemoveVertex(&vertex); + ASSERT_TRUE(maybe_values.HasValue()); + ASSERT_TRUE(maybe_values.GetValue()); + const auto &[deleted_vertex, deleted_edges] = *maybe_values.GetValue(); + + trigger_context_collector.RegisterDeletedObject(deleted_vertex); + ++deleted_vertex_count; + --vertex_count; + for (const auto &edge : deleted_edges) { + trigger_context_collector.RegisterDeletedObject(edge); + ++deleted_edge_count; + --edge_count; + } + + break; + } + } + + dba.AdvanceCommand(); + ASSERT_FALSE(dba.Commit().HasError()); + + trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + trigger_context_collector = query::TriggerContextCollector{kAllEventTypes}; + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, edge_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, edge_count, dba); + + CheckLabelList(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, vertex_count, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, vertex_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 4 * vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 2 * edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_OBJECTS, + 4 * vertex_count + 2 * edge_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_VERTICES, deleted_vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_EDGES, deleted_edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_OBJECTS, + deleted_vertex_count + deleted_edge_count, dba); + } + + // delete a single vertex with its edges, it should reduce number of typed values returned by the trigger context + // for each update event. + // TypedValue of the deleted objects stay the same as they're bound to the transaction which deleted them. + { + query::DbAccessor dba{&StartTransaction()}; + trigger_context.AdaptForAccessor(&dba); + + auto vertices = dba.Vertices(storage::View::OLD); + for (auto vertex : vertices) { + ASSERT_TRUE(dba.DetachRemoveVertex(&vertex).HasValue()); + break; + } + --vertex_count; + --edge_count; + + ASSERT_FALSE(dba.Commit().HasError()); + } + + { + query::DbAccessor dba{&StartTransaction()}; + trigger_context.AdaptForAccessor(&dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, edge_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, edge_count, dba); + + CheckLabelList(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, vertex_count, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, vertex_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 4 * vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 2 * edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_OBJECTS, + 4 * vertex_count + 2 * edge_count, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_VERTICES, deleted_vertex_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_EDGES, deleted_edge_count, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_OBJECTS, + deleted_vertex_count + deleted_edge_count, dba); + } +} + +// If the trigger context registered a created object, each future event on the same object will be ignored. +// Binding the trigger context to transaction will mean that creating and updating an object in the same transaction +// will return only the CREATE event. +TEST_F(TriggerContextTest, ReturnCreateOnlyEvent) { + query::TriggerContextCollector trigger_context_collector{kAllEventTypes}; + + query::DbAccessor dba{&StartTransaction()}; + + auto create_vertex = [&] { + auto vertex = dba.InsertVertex(); + trigger_context_collector.RegisterCreatedObject(vertex); + trigger_context_collector.RegisterSetObjectProperty(vertex, dba.NameToProperty("PROPERTY1"), + query::TypedValue("Value"), query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(vertex, dba.NameToProperty("PROPERTY2"), + query::TypedValue("Value")); + trigger_context_collector.RegisterSetVertexLabel(vertex, dba.NameToLabel("LABEL1")); + trigger_context_collector.RegisterRemovedVertexLabel(vertex, dba.NameToLabel("LABEL2")); + return vertex; + }; + + auto v1 = create_vertex(); + auto v2 = create_vertex(); + auto maybe_edge = dba.InsertEdge(&v1, &v2, dba.NameToEdgeType("EDGE")); + ASSERT_FALSE(maybe_edge.HasError()); + trigger_context_collector.RegisterCreatedObject(*maybe_edge); + trigger_context_collector.RegisterSetObjectProperty(*maybe_edge, dba.NameToProperty("PROPERTY1"), + query::TypedValue("Value"), query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(*maybe_edge, dba.NameToProperty("PROPERTY2"), + query::TypedValue("Value")); + + dba.AdvanceCommand(); + + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, 2, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, 1, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, 3, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, 0, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, 0, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, 0, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, 0, dba); + + CheckLabelList(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, 0, dba); + CheckLabelList(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, 0, dba); + + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, 0, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, 0, dba); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_OBJECTS, 0, dba); +} + +namespace { +void EXPECT_PROP_TRUE(const query::TypedValue &a) { + EXPECT_TRUE(a.type() == query::TypedValue::Type::Bool && a.ValueBool()); +} + +void EXPECT_PROP_EQ(const query::TypedValue &a, const query::TypedValue &b) { EXPECT_PROP_TRUE(a == b); } +} // namespace + +// During a transaction, same property for the same object can change multiple times. TriggerContext should ensure +// that only the change on the global value is returned (value before the transaction + latest value after the +// transaction) everything inbetween should be ignored. +TEST_F(TriggerContextTest, GlobalPropertyChange) { + query::DbAccessor dba{&StartTransaction()}; + const std::unordered_set event_types{query::TriggerEventType::VERTEX_UPDATE}; + + auto v = dba.InsertVertex(); + dba.AdvanceCommand(); + + { + SPDLOG_DEBUG("SET -> SET"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), + query::TypedValue("ValueNew")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("ValueNew"), query::TypedValue("ValueNewer")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"set_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value"}}, + {"new", query::TypedValue{"ValueNewer"}}}}); + } + + { + SPDLOG_DEBUG("SET -> REMOVE"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), + query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("ValueNew")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"removed_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value"}}}}); + } + + { + SPDLOG_DEBUG("REMOVE -> SET"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("ValueNew")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"set_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value"}}, + {"new", query::TypedValue{"ValueNew"}}}}); + } + + { + SPDLOG_DEBUG("REMOVE -> REMOVE"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"removed_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value"}}}}); + } + + { + SPDLOG_DEBUG("SET -> SET (no change on transaction level)"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value"), + query::TypedValue("ValueNew")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("ValueNew"), query::TypedValue("Value")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("SET -> REMOVE (no change on transaction level)"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("ValueNew")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("ValueNew")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("REMOVE -> SET (no change on transaction level)"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("Value")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("REMOVE -> REMOVE (no change on transaction level)"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue()); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("SET -> REMOVE -> SET -> REMOVE -> SET"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue("Value0"), + query::TypedValue("Value1")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value1")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("Value2")); + trigger_context_collector.RegisterRemovedObjectProperty(v, dba.NameToProperty("PROPERTY"), + query::TypedValue("Value2")); + trigger_context_collector.RegisterSetObjectProperty(v, dba.NameToProperty("PROPERTY"), query::TypedValue(), + query::TypedValue("Value3")); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"set_vertex_property"}}, + {"vertex", query::TypedValue{v}}, + {"key", query::TypedValue{"PROPERTY"}}, + {"old", query::TypedValue{"Value0"}}, + {"new", query::TypedValue{"Value3"}}}}); + } +} + +// Same as above, but for label changes +TEST_F(TriggerContextTest, GlobalLabelChange) { + query::DbAccessor dba{&StartTransaction()}; + const std::unordered_set event_types{query::TriggerEventType::VERTEX_UPDATE}; + + auto v = dba.InsertVertex(); + dba.AdvanceCommand(); + + const auto label_id = dba.NameToLabel("LABEL"); + // You cannot add the same label multiple times and you cannot remove non existing labels + // so REMOVE -> REMOVE and SET -> SET doesn't make sense + { + SPDLOG_DEBUG("SET -> REMOVE"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("REMOVE -> SET"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 0); + } + + { + SPDLOG_DEBUG("SET -> REMOVE -> SET -> REMOVE -> SET"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"set_vertex_label"}}, + {"vertex", query::TypedValue{v}}, + {"label", query::TypedValue{"LABEL"}}}}); + } + + { + SPDLOG_DEBUG("REMOVE -> SET -> REMOVE -> SET -> REMOVE"); + query::TriggerContextCollector trigger_context_collector{event_types}; + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + trigger_context_collector.RegisterSetVertexLabel(v, label_id); + trigger_context_collector.RegisterRemovedVertexLabel(v, label_id); + const auto trigger_context = std::move(trigger_context_collector).TransformToTriggerContext(); + auto updated_vertices = trigger_context.GetTypedValue(query::TriggerIdentifierTag::UPDATED_VERTICES, &dba); + ASSERT_TRUE(updated_vertices.IsList()); + auto &updated_vertices_list = updated_vertices.ValueList(); + ASSERT_EQ(updated_vertices_list.size(), 1); + auto &update = updated_vertices_list[0]; + ASSERT_TRUE(update.IsMap()); + EXPECT_PROP_EQ(update, query::TypedValue{std::map{ + {"event_type", query::TypedValue{"removed_vertex_label"}}, + {"vertex", query::TypedValue{v}}, + {"label", query::TypedValue{"LABEL"}}}}); + } +} + +namespace { +struct ShouldRegisterExpectation { + bool creation{false}; + bool deletion{false}; + bool update{false}; +}; + +template +void CheckRegisterInfo(const query::TriggerContextCollector &collector, const ShouldRegisterExpectation &expectation) { + EXPECT_EQ(expectation.creation, collector.ShouldRegisterCreatedObject()); + EXPECT_EQ(expectation.deletion, collector.ShouldRegisterDeletedObject()); + EXPECT_EQ(expectation.update, collector.ShouldRegisterObjectPropertyChange()); +} + +size_t BoolToSize(const bool value) { return value ? 1 : 0; } + +void CheckFilters(const std::unordered_set &event_types, + const ShouldRegisterExpectation &vertex_expectation, + const ShouldRegisterExpectation &edge_expectation, storage::Storage::Accessor *accessor) { + query::TriggerContextCollector collector{event_types}; + { + SCOPED_TRACE("Checking vertex"); + CheckRegisterInfo(collector, vertex_expectation); + } + { + SCOPED_TRACE("Checking edge"); + CheckRegisterInfo(collector, edge_expectation); + } + EXPECT_EQ(collector.ShouldRegisterVertexLabelChange(), vertex_expectation.update); + + query::DbAccessor dba{accessor}; + + auto vertex_to_delete = dba.InsertVertex(); + auto vertex_to_modify = dba.InsertVertex(); + + auto from_vertex = dba.InsertVertex(); + auto to_vertex = dba.InsertVertex(); + auto maybe_edge_to_delete = dba.InsertEdge(&from_vertex, &to_vertex, dba.NameToEdgeType("EDGE")); + auto maybe_edge_to_modify = dba.InsertEdge(&from_vertex, &to_vertex, dba.NameToEdgeType("EDGE")); + auto &edge_to_delete = maybe_edge_to_delete.GetValue(); + auto &edge_to_modify = maybe_edge_to_modify.GetValue(); + + dba.AdvanceCommand(); + + const auto created_vertex = dba.InsertVertex(); + const auto maybe_created_edge = dba.InsertEdge(&from_vertex, &to_vertex, dba.NameToEdgeType("EDGE")); + const auto created_edge = maybe_created_edge.GetValue(); + collector.RegisterCreatedObject(created_vertex); + collector.RegisterCreatedObject(created_edge); + collector.RegisterDeletedObject(dba.RemoveEdge(&edge_to_delete).GetValue().value()); + collector.RegisterDeletedObject(dba.RemoveVertex(&vertex_to_delete).GetValue().value()); + collector.RegisterSetObjectProperty(vertex_to_modify, dba.NameToProperty("UPDATE"), query::TypedValue{1}, + query::TypedValue{2}); + collector.RegisterRemovedObjectProperty(vertex_to_modify, dba.NameToProperty("REMOVE"), query::TypedValue{1}); + collector.RegisterSetObjectProperty(edge_to_modify, dba.NameToProperty("UPDATE"), query::TypedValue{1}, + query::TypedValue{2}); + collector.RegisterRemovedObjectProperty(edge_to_modify, dba.NameToProperty("REMOVE"), query::TypedValue{1}); + collector.RegisterSetVertexLabel(vertex_to_modify, dba.NameToLabel("SET")); + collector.RegisterRemovedVertexLabel(vertex_to_modify, dba.NameToLabel("REMOVE")); + dba.AdvanceCommand(); + + const auto trigger_context = std::move(collector).TransformToTriggerContext(); + const auto created_vertices = BoolToSize(vertex_expectation.creation); + { + SCOPED_TRACE("CREATED_VERTICES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_VERTICES, created_vertices, dba); + } + const auto created_edges = BoolToSize(edge_expectation.creation); + { + SCOPED_TRACE("CREATED_EDGES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_EDGES, created_edges, dba); + } + { + SCOPED_TRACE("CREATED_OBJECTS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::CREATED_OBJECTS, created_vertices + created_edges, + dba); + } + const auto deleted_vertices = BoolToSize(vertex_expectation.deletion); + { + SCOPED_TRACE("DELETED_VERTICES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_VERTICES, deleted_vertices, dba); + } + const auto deleted_edges = BoolToSize(edge_expectation.deletion); + { + SCOPED_TRACE("DELETED_EDGES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_EDGES, deleted_edges, dba); + } + { + SCOPED_TRACE("DELETED_OBJECTS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::DELETED_OBJECTS, deleted_vertices + deleted_edges, + dba); + } + { + SCOPED_TRACE("SET_VERTEX_PROPERTIES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_PROPERTIES, + BoolToSize(vertex_expectation.update), dba); + } + { + SCOPED_TRACE("SET_EDGE_PROPERTIES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_EDGE_PROPERTIES, + BoolToSize(edge_expectation.update), dba); + } + { + SCOPED_TRACE("REMOVED_VERTEX_PROPERTIES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_PROPERTIES, + BoolToSize(vertex_expectation.update), dba); + } + { + SCOPED_TRACE("REMOVED_EDGE_PROPERTIES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_EDGE_PROPERTIES, + BoolToSize(edge_expectation.update), dba); + } + const auto set_and_removed_vertex_props_and_labels = BoolToSize(vertex_expectation.update) * 4; + { + SCOPED_TRACE("UPDATED_VERTICES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_VERTICES, + set_and_removed_vertex_props_and_labels, dba); + } + const auto set_and_removed_edge_props = BoolToSize(edge_expectation.update) * 2; + { + SCOPED_TRACE("UPDATED_EDGES"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_EDGES, set_and_removed_edge_props, dba); + } + // sum of the previous + { + SCOPED_TRACE("UPDATED_OBJECTS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::UPDATED_OBJECTS, + set_and_removed_vertex_props_and_labels + set_and_removed_edge_props, dba); + } + { + SCOPED_TRACE("SET_VERTEX_LABELS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::SET_VERTEX_LABELS, + BoolToSize(vertex_expectation.update), dba); + } + { + SCOPED_TRACE("REMOVED_VERTEX_LABELS"); + CheckTypedValueSize(trigger_context, query::TriggerIdentifierTag::REMOVED_VERTEX_LABELS, + BoolToSize(vertex_expectation.update), dba); + } + + dba.Abort(); +} +} // namespace + +TEST_F(TriggerContextTest, Filtering) { + using TET = query::TriggerEventType; + // Check all event type individually + { + SCOPED_TRACE("TET::ANY"); + CheckFilters({TET::ANY}, ShouldRegisterExpectation{true, true, true}, ShouldRegisterExpectation{true, true, true}, + &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_CREATE"); + CheckFilters({TET::VERTEX_CREATE}, ShouldRegisterExpectation{true, false, false}, + ShouldRegisterExpectation{false, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::EDGE_CREATE"); + CheckFilters({TET::EDGE_CREATE}, ShouldRegisterExpectation{false, false, false}, + ShouldRegisterExpectation{true, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::CREATE"); + CheckFilters({TET::CREATE}, ShouldRegisterExpectation{true, false, false}, + ShouldRegisterExpectation{true, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_DELETE"); + CheckFilters({TET::VERTEX_DELETE}, ShouldRegisterExpectation{true, true, false}, + ShouldRegisterExpectation{false, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::EDGE_DELETE"); + CheckFilters({TET::EDGE_DELETE}, ShouldRegisterExpectation{false, false, false}, + ShouldRegisterExpectation{true, true, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::DELETE"); + CheckFilters({TET::DELETE}, ShouldRegisterExpectation{true, true, false}, + ShouldRegisterExpectation{true, true, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_UPDATE"); + CheckFilters({TET::VERTEX_UPDATE}, ShouldRegisterExpectation{true, false, true}, + ShouldRegisterExpectation{false, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::EDGE_UPDATE"); + CheckFilters({TET::EDGE_UPDATE}, ShouldRegisterExpectation{false, false, false}, + ShouldRegisterExpectation{true, false, true}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::UPDATE"); + CheckFilters({TET::UPDATE}, ShouldRegisterExpectation{true, false, true}, + ShouldRegisterExpectation{true, false, true}, &StartTransaction()); + } + // Some combined versions + { + SCOPED_TRACE("TET::VERTEX_UPDATE, TET::EDGE_UPDATE"); + CheckFilters({TET::VERTEX_UPDATE, TET::EDGE_UPDATE}, ShouldRegisterExpectation{true, false, true}, + ShouldRegisterExpectation{true, false, true}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_UPDATE, TET::EDGE_UPDATE, TET::DELETE"); + CheckFilters({TET::VERTEX_UPDATE, TET::EDGE_UPDATE, TET::DELETE}, ShouldRegisterExpectation{true, true, true}, + ShouldRegisterExpectation{true, true, true}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::UPDATE, TET::VERTEX_DELETE, TET::EDGE_DELETE"); + CheckFilters({TET::UPDATE, TET::VERTEX_DELETE, TET::EDGE_DELETE}, ShouldRegisterExpectation{true, true, true}, + ShouldRegisterExpectation{true, true, true}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::VERTEX_CREATE, TET::VERTEX_UPDATE"); + CheckFilters({TET::VERTEX_CREATE, TET::VERTEX_UPDATE}, ShouldRegisterExpectation{true, false, true}, + ShouldRegisterExpectation{false, false, false}, &StartTransaction()); + } + { + SCOPED_TRACE("TET::EDGE_CREATE, TET::EDGE_UPDATE"); + CheckFilters({TET::EDGE_CREATE, TET::EDGE_UPDATE}, ShouldRegisterExpectation{false, false, false}, + ShouldRegisterExpectation{true, false, true}, &StartTransaction()); + } +} + +class TriggerStoreTest : public ::testing::Test { + protected: + const std::filesystem::path testing_directory{std::filesystem::temp_directory_path() / "MG_test_unit_query_trigger"}; + + void SetUp() override { + Clear(); + + storage_accessor.emplace(storage.Access()); + dba.emplace(&*storage_accessor); + } + + void TearDown() override { + Clear(); + + dba.reset(); + storage_accessor.reset(); + } + + std::optional dba; + + utils::SkipList ast_cache; + utils::SpinLock antlr_lock; + + private: + void Clear() { + if (!std::filesystem::exists(testing_directory)) return; + std::filesystem::remove_all(testing_directory); + } + + storage::Storage storage; + std::optional storage_accessor; +}; + +TEST_F(TriggerStoreTest, Load) { + std::optional store; + + store.emplace(testing_directory, &ast_cache, &*dba, &antlr_lock); + + const auto check_empty = [&] { + ASSERT_EQ(store->GetTriggerInfo().size(), 0); + ASSERT_EQ(store->BeforeCommitTriggers().size(), 0); + ASSERT_EQ(store->AfterCommitTriggers().size(), 0); + }; + + check_empty(); + + const auto *trigger_name_before = "trigger"; + const auto *trigger_name_after = "trigger_after"; + const auto *trigger_statement = "RETURN $parameter"; + const auto event_type = query::TriggerEventType::VERTEX_CREATE; + store->AddTrigger(trigger_name_before, trigger_statement, + std::map{{"parameter", storage::PropertyValue{1}}}, event_type, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock); + store->AddTrigger(trigger_name_after, trigger_statement, + std::map{{"parameter", storage::PropertyValue{"value"}}}, + event_type, query::TriggerPhase::AFTER_COMMIT, &ast_cache, &*dba, &antlr_lock); + + const auto check_triggers = [&] { + ASSERT_EQ(store->GetTriggerInfo().size(), 2); + + const auto verify_trigger = [&](const auto &trigger, const auto &name) { + ASSERT_EQ(trigger.Name(), name); + ASSERT_EQ(trigger.OriginalStatement(), trigger_statement); + ASSERT_EQ(trigger.EventType(), event_type); + }; + + const auto before_commit_triggers = store->BeforeCommitTriggers().access(); + ASSERT_EQ(before_commit_triggers.size(), 1); + for (const auto &trigger : before_commit_triggers) { + verify_trigger(trigger, trigger_name_before); + } + + const auto after_commit_triggers = store->AfterCommitTriggers().access(); + ASSERT_EQ(after_commit_triggers.size(), 1); + for (const auto &trigger : after_commit_triggers) { + verify_trigger(trigger, trigger_name_after); + } + }; + + check_triggers(); + + // recreate trigger store, this should reload everything from the disk + store.emplace(testing_directory, &ast_cache, &*dba, &antlr_lock); + check_triggers(); + + ASSERT_NO_THROW(store->DropTrigger(trigger_name_after)); + ASSERT_NO_THROW(store->DropTrigger(trigger_name_before)); + + check_empty(); + + store.emplace(testing_directory, &ast_cache, &*dba, &antlr_lock); + + check_empty(); +} + +TEST_F(TriggerStoreTest, AddTrigger) { + query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; + + // Invalid query in statements + ASSERT_THROW(store.AddTrigger("trigger", "RETUR 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + ASSERT_THROW(store.AddTrigger("trigger", "RETURN createdEdges", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + + ASSERT_THROW(store.AddTrigger("trigger", "RETURN $parameter", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + + ASSERT_NO_THROW(store.AddTrigger( + "trigger", "RETURN $parameter", + std::map{{"parameter", storage::PropertyValue{1}}}, + query::TriggerEventType::VERTEX_CREATE, query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock)); + + // Inserting with the same name + ASSERT_THROW(store.AddTrigger("trigger", "RETURN 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + ASSERT_THROW(store.AddTrigger("trigger", "RETURN 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::AFTER_COMMIT, &ast_cache, &*dba, &antlr_lock), + utils::BasicException); + + ASSERT_EQ(store.GetTriggerInfo().size(), 1); + ASSERT_EQ(store.BeforeCommitTriggers().size(), 1); + ASSERT_EQ(store.AfterCommitTriggers().size(), 0); +} + +TEST_F(TriggerStoreTest, DropTrigger) { + query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; + + ASSERT_THROW(store.DropTrigger("Unknown"), utils::BasicException); + + const auto *trigger_name = "trigger"; + store.AddTrigger(trigger_name, "RETURN 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock); + + ASSERT_THROW(store.DropTrigger("Unknown"), utils::BasicException); + ASSERT_NO_THROW(store.DropTrigger(trigger_name)); + ASSERT_EQ(store.GetTriggerInfo().size(), 0); +} + +TEST_F(TriggerStoreTest, TriggerInfo) { + query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; + + std::vector expected_info; + store.AddTrigger("trigger", "RETURN 1", {}, query::TriggerEventType::VERTEX_CREATE, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock); + expected_info.push_back( + {"trigger", "RETURN 1", query::TriggerEventType::VERTEX_CREATE, query::TriggerPhase::BEFORE_COMMIT}); + + const auto check_trigger_info = [&] { + const auto trigger_info = store.GetTriggerInfo(); + ASSERT_EQ(expected_info.size(), trigger_info.size()); + // ensure all of the expected trigger infos can be found in the retrieved infos + ASSERT_TRUE(std::all_of(expected_info.begin(), expected_info.end(), [&](const auto &info) { + return std::find_if(trigger_info.begin(), trigger_info.end(), [&](const auto &other) { + return info.name == other.name && info.statement == other.statement && + info.event_type == other.event_type && info.phase == other.phase; + }) != trigger_info.end(); + })); + }; + + check_trigger_info(); + + store.AddTrigger("edge_update_trigger", "RETURN 1", {}, query::TriggerEventType::EDGE_UPDATE, + query::TriggerPhase::AFTER_COMMIT, &ast_cache, &*dba, &antlr_lock); + expected_info.push_back( + {"edge_update_trigger", "RETURN 1", query::TriggerEventType::EDGE_UPDATE, query::TriggerPhase::AFTER_COMMIT}); + + check_trigger_info(); + + store.DropTrigger("edge_update_trigger"); + const auto erase_from_expected = [&](const std::string_view name) { + const auto erase_count = std::erase_if(expected_info, [name](const auto &info) { return info.name == name; }); + ASSERT_EQ(erase_count, 1); + }; + erase_from_expected("edge_update_trigger"); + + check_trigger_info(); + + store.DropTrigger("trigger"); + erase_from_expected("trigger"); + + check_trigger_info(); +} + +TEST_F(TriggerStoreTest, AnyTriggerAllKeywords) { + query::TriggerStore store{testing_directory, &ast_cache, &*dba, &antlr_lock}; + + using namespace std::literals; + + const auto created_vertices = "createdVertices"sv; + const auto created_edges = "createdEdges"sv; + const auto created_objects = "createdObjects"sv; + const auto deleted_vertices = "deletedVertices"sv; + const auto deleted_edges = "deletedEdges"sv; + const auto deleted_objects = "deletedObjects"sv; + const auto set_vertex_properties = "setVertexProperties"sv; + const auto set_edge_properties = "setEdgeProperties"sv; + const auto removed_vertex_properties = "removedVertexProperties"sv; + const auto removed_edge_properties = "removedEdgeProperties"sv; + const auto set_vertex_labels = "setVertexLabels"sv; + const auto removed_vertex_labels = "removedVertexLabels"sv; + const auto updated_vertices = "updatedVertices"sv; + const auto updated_edges = "updatedEdges"sv; + const auto updates_objects = "updatedObjects"sv; + + std::array event_types_to_test = { + std::make_pair(query::TriggerEventType::CREATE, std::vector{created_vertices, created_edges, created_objects}), + std::make_pair(query::TriggerEventType::VERTEX_CREATE, std::vector{created_vertices}), + std::make_pair(query::TriggerEventType::EDGE_CREATE, std::vector{created_edges}), + std::make_pair(query::TriggerEventType::UPDATE, + std::vector{ + set_vertex_properties, + set_edge_properties, + removed_vertex_properties, + removed_edge_properties, + set_vertex_labels, + removed_vertex_labels, + updated_vertices, + updated_edges, + updates_objects, + }), + std::make_pair(query::TriggerEventType::VERTEX_UPDATE, + std::vector{ + set_vertex_properties, + removed_vertex_properties, + set_vertex_labels, + removed_vertex_labels, + updated_vertices, + }), + std::make_pair(query::TriggerEventType::EDGE_UPDATE, + std::vector{ + set_edge_properties, + removed_edge_properties, + updated_edges, + }), + std::make_pair(query::TriggerEventType::DELETE, + std::vector{ + deleted_vertices, + deleted_edges, + deleted_objects, + }), + std::make_pair(query::TriggerEventType::VERTEX_DELETE, + std::vector{ + deleted_vertices, + }), + std::make_pair(query::TriggerEventType::EDGE_DELETE, + std::vector{ + deleted_edges, + }), + std::make_pair(query::TriggerEventType::ANY, + std::vector{ + created_vertices, + created_edges, + created_objects, + deleted_vertices, + deleted_edges, + deleted_objects, + set_vertex_properties, + set_edge_properties, + removed_vertex_properties, + removed_edge_properties, + set_vertex_labels, + removed_vertex_labels, + updated_vertices, + updated_edges, + updates_objects, + }), + }; + + const auto trigger_name = "trigger"s; + for (const auto &[event_type, keywords] : event_types_to_test) { + SCOPED_TRACE(query::TriggerEventTypeToString(event_type)); + for (const auto keyword : keywords) { + SCOPED_TRACE(keyword); + EXPECT_NO_THROW(store.AddTrigger(trigger_name, fmt::format("RETURN {}", keyword), {}, event_type, + query::TriggerPhase::BEFORE_COMMIT, &ast_cache, &*dba, &antlr_lock)); + store.DropTrigger(trigger_name); + } + } +} diff --git a/tests/unit/storage_v2.cpp b/tests/unit/storage_v2.cpp index 309694fe2..db9082bd7 100644 --- a/tests/unit/storage_v2.cpp +++ b/tests/unit/storage_v2.cpp @@ -3,7 +3,9 @@ #include +#include "storage/v2/property_value.hpp" #include "storage/v2/storage.hpp" +#include "storage/v2/vertex_accessor.hpp" using testing::UnorderedElementsAre; @@ -752,7 +754,7 @@ TEST(StorageV2, VertexDeleteProperty) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); // Set property 5 to "nandare" - ASSERT_TRUE(vertex->SetProperty(property, storage::PropertyValue("nandare")).GetValue()); + ASSERT_TRUE(vertex->SetProperty(property, storage::PropertyValue("nandare"))->IsNull()); // Check whether property 5 exists ASSERT_TRUE(vertex->GetProperty(property, storage::View::OLD)->IsNull()); @@ -799,7 +801,7 @@ TEST(StorageV2, VertexDeleteProperty) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); // Set property 5 to "nandare" - ASSERT_TRUE(vertex->SetProperty(property, storage::PropertyValue("nandare")).GetValue()); + ASSERT_TRUE(vertex->SetProperty(property, storage::PropertyValue("nandare"))->IsNull()); // Check whether property 5 exists ASSERT_TRUE(vertex->GetProperty(property, storage::View::OLD)->IsNull()); @@ -1347,9 +1349,9 @@ TEST(StorageV2, VertexPropertyCommit) { ASSERT_EQ(vertex.Properties(storage::View::NEW)->size(), 0); { - auto res = vertex.SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex.SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(vertex.GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -1360,9 +1362,9 @@ TEST(StorageV2, VertexPropertyCommit) { } { - auto res = vertex.SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex.SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex.GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -1410,9 +1412,9 @@ TEST(StorageV2, VertexPropertyCommit) { auto property = acc.NameToProperty("property5"); { - auto res = vertex->SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -1426,9 +1428,9 @@ TEST(StorageV2, VertexPropertyCommit) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); { - auto res = vertex->SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_FALSE(acc.Commit().HasError()); @@ -1479,9 +1481,9 @@ TEST(StorageV2, VertexPropertyAbort) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); { - auto res = vertex->SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -1492,9 +1494,9 @@ TEST(StorageV2, VertexPropertyAbort) { } { - auto res = vertex->SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -1540,9 +1542,9 @@ TEST(StorageV2, VertexPropertyAbort) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); { - auto res = vertex->SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -1553,9 +1555,9 @@ TEST(StorageV2, VertexPropertyAbort) { } { - auto res = vertex->SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -1621,9 +1623,9 @@ TEST(StorageV2, VertexPropertyAbort) { } { - auto res = vertex->SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -1692,9 +1694,9 @@ TEST(StorageV2, VertexPropertyAbort) { } { - auto res = vertex->SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = vertex->SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(vertex->GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -1762,9 +1764,9 @@ TEST(StorageV2, VertexPropertySerializationError) { ASSERT_EQ(vertex->Properties(storage::View::NEW)->size(), 0); { - auto res = vertex->SetProperty(property1, storage::PropertyValue(123)); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property1, storage::PropertyValue(123)); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::OLD)->IsNull()); @@ -1884,7 +1886,7 @@ TEST(StorageV2, VertexLabelPropertyMixed) { ASSERT_EQ(vertex.Properties(storage::View::NEW)->size(), 0); // Set property 5 to "nandare" - ASSERT_TRUE(vertex.SetProperty(property, storage::PropertyValue("nandare")).GetValue()); + ASSERT_TRUE(vertex.SetProperty(property, storage::PropertyValue("nandare"))->IsNull()); // Check whether label 5 and property 5 exist ASSERT_TRUE(vertex.HasLabel(label, storage::View::OLD).GetValue()); @@ -1938,7 +1940,7 @@ TEST(StorageV2, VertexLabelPropertyMixed) { } // Set property 5 to "haihai" - ASSERT_FALSE(vertex.SetProperty(property, storage::PropertyValue("haihai")).GetValue()); + ASSERT_FALSE(vertex.SetProperty(property, storage::PropertyValue("haihai"))->IsNull()); // Check whether label 5 and property 5 exist ASSERT_TRUE(vertex.HasLabel(label, storage::View::OLD).GetValue()); @@ -2042,7 +2044,7 @@ TEST(StorageV2, VertexLabelPropertyMixed) { } // Set property 5 to null - ASSERT_FALSE(vertex.SetProperty(property, storage::PropertyValue()).GetValue()); + ASSERT_FALSE(vertex.SetProperty(property, storage::PropertyValue())->IsNull()); // Check whether label 5 and property 5 exist ASSERT_FALSE(vertex.HasLabel(label, storage::View::OLD).GetValue()); @@ -2084,9 +2086,9 @@ TEST(StorageV2, VertexPropertyClear) { auto vertex = acc.CreateVertex(); gid = vertex.Gid(); - auto res = vertex.SetProperty(property1, storage::PropertyValue("value")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex.SetProperty(property1, storage::PropertyValue("value")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); ASSERT_FALSE(acc.Commit().HasError()); } @@ -2101,9 +2103,9 @@ TEST(StorageV2, VertexPropertyClear) { UnorderedElementsAre(std::pair(property1, storage::PropertyValue("value")))); { - auto ret = vertex->ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + auto old_values = vertex->ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_FALSE(old_values->empty()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::NEW)->IsNull()); @@ -2111,9 +2113,9 @@ TEST(StorageV2, VertexPropertyClear) { ASSERT_EQ(vertex->Properties(storage::View::NEW).GetValue().size(), 0); { - auto ret = vertex->ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_FALSE(ret.GetValue()); + auto old_values = vertex->ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_TRUE(old_values->empty()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::NEW)->IsNull()); @@ -2127,9 +2129,9 @@ TEST(StorageV2, VertexPropertyClear) { auto vertex = acc.FindVertex(gid, storage::View::OLD); ASSERT_TRUE(vertex); - auto res = vertex->SetProperty(property2, storage::PropertyValue(42)); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = vertex->SetProperty(property2, storage::PropertyValue(42)); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); ASSERT_FALSE(acc.Commit().HasError()); } @@ -2145,9 +2147,9 @@ TEST(StorageV2, VertexPropertyClear) { std::pair(property2, storage::PropertyValue(42)))); { - auto ret = vertex->ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + auto old_values = vertex->ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_FALSE(old_values->empty()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::NEW)->IsNull()); @@ -2155,9 +2157,9 @@ TEST(StorageV2, VertexPropertyClear) { ASSERT_EQ(vertex->Properties(storage::View::NEW).GetValue().size(), 0); { - auto ret = vertex->ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_FALSE(ret.GetValue()); + auto old_values = vertex->ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_TRUE(old_values->empty()); } ASSERT_TRUE(vertex->GetProperty(property1, storage::View::NEW)->IsNull()); @@ -2530,3 +2532,47 @@ TEST(StorageV2, VertexVisibilityMultipleTransactions) { acc.Abort(); } } + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TEST(StorageV2, DeletedVertexAccessor) { + storage::Storage store; + + const auto property = store.NameToProperty("property"); + const storage::PropertyValue property_value{"property_value"}; + + std::optional gid; + // Create the vertex + { + auto acc = store.Access(); + auto vertex = acc.CreateVertex(); + gid = vertex.Gid(); + ASSERT_FALSE(vertex.SetProperty(property, property_value).HasError()); + ASSERT_FALSE(acc.Commit().HasError()); + } + + auto acc = store.Access(); + auto vertex = acc.FindVertex(*gid, storage::View::OLD); + ASSERT_TRUE(vertex); + auto maybe_deleted_vertex = acc.DeleteVertex(&*vertex); + ASSERT_FALSE(maybe_deleted_vertex.HasError()); + + auto deleted_vertex = maybe_deleted_vertex.GetValue(); + ASSERT_TRUE(deleted_vertex); + // you cannot modify deleted vertex + ASSERT_TRUE(deleted_vertex->ClearProperties().HasError()); + + // you can call read only methods + const auto maybe_property = deleted_vertex->GetProperty(property, storage::View::OLD); + ASSERT_FALSE(maybe_property.HasError()); + ASSERT_EQ(property_value, *maybe_property); + ASSERT_FALSE(acc.Commit().HasError()); + + { + // you can call read only methods and get valid results even after the + // transaction which deleted the vertex committed, but only if the transaction + // accessor is still alive + const auto maybe_property = deleted_vertex->GetProperty(property, storage::View::OLD); + ASSERT_FALSE(maybe_property.HasError()); + ASSERT_EQ(property_value, *maybe_property); + } +} diff --git a/tests/unit/storage_v2_durability.cpp b/tests/unit/storage_v2_durability.cpp index c6c8a883e..dfe84f7ce 100644 --- a/tests/unit/storage_v2_durability.cpp +++ b/tests/unit/storage_v2_durability.cpp @@ -709,14 +709,15 @@ TEST_P(DurabilityTest, SnapshotFallback) { {.items = {.properties_on_edges = GetParam()}, .durability = {.storage_directory = storage_directory, .snapshot_wal_mode = storage::Config::Durability::SnapshotWalMode::PERIODIC_SNAPSHOT, - .snapshot_interval = std::chrono::milliseconds(2000)}}); + .snapshot_interval = std::chrono::milliseconds(3000)}}); CreateBaseDataset(&store, GetParam()); - std::this_thread::sleep_for(std::chrono::milliseconds(2500)); + std::this_thread::sleep_for(std::chrono::milliseconds(3500)); + ASSERT_EQ(GetSnapshotsList().size(), 1); CreateExtendedDataset(&store); - std::this_thread::sleep_for(std::chrono::milliseconds(2500)); + std::this_thread::sleep_for(std::chrono::milliseconds(3000)); } - ASSERT_GE(GetSnapshotsList().size(), 2); + ASSERT_EQ(GetSnapshotsList().size(), 2); ASSERT_EQ(GetBackupSnapshotsList().size(), 0); ASSERT_EQ(GetWalsList().size(), 0); ASSERT_EQ(GetBackupWalsList().size(), 0); @@ -724,7 +725,7 @@ TEST_P(DurabilityTest, SnapshotFallback) { // Destroy last snapshot. { auto snapshots = GetSnapshotsList(); - ASSERT_GE(snapshots.size(), 2); + ASSERT_EQ(snapshots.size(), 2); DestroySnapshot(*snapshots.begin()); } diff --git a/tests/unit/storage_v2_edge.cpp b/tests/unit/storage_v2_edge.cpp index 2b6bcf421..89eff1e7d 100644 --- a/tests/unit/storage_v2_edge.cpp +++ b/tests/unit/storage_v2_edge.cpp @@ -3324,7 +3324,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteSingleCommit) { { auto ret = acc.DetachDeleteVertex(&*vertex_from); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -3543,7 +3543,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteMultipleCommit) { { auto ret = acc.DetachDeleteVertex(&*vertex1); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -3791,7 +3791,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteSingleAbort) { { auto ret = acc.DetachDeleteVertex(&*vertex_from); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -3895,7 +3895,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteSingleAbort) { { auto ret = acc.DetachDeleteVertex(&*vertex_from); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -4114,7 +4114,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteMultipleAbort) { { auto ret = acc.DetachDeleteVertex(&*vertex1); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -4430,7 +4430,7 @@ TEST_P(StorageEdgeTest, VertexDetachDeleteMultipleAbort) { { auto ret = acc.DetachDeleteVertex(&*vertex1); ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + ASSERT_TRUE(*ret); } // Check edges @@ -4622,9 +4622,9 @@ TEST(StorageWithProperties, EdgePropertyCommit) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -4635,9 +4635,9 @@ TEST(StorageWithProperties, EdgePropertyCommit) { } { - auto res = edge.SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -4687,9 +4687,9 @@ TEST(StorageWithProperties, EdgePropertyCommit) { auto property = acc.NameToProperty("property5"); { - auto res = edge.SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -4703,9 +4703,9 @@ TEST(StorageWithProperties, EdgePropertyCommit) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_FALSE(acc.Commit().HasError()); @@ -4763,9 +4763,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -4776,9 +4776,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { } { - auto res = edge.SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -4826,9 +4826,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property, storage::PropertyValue("temporary")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("temporary")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "temporary"); @@ -4839,9 +4839,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { } { - auto res = edge.SetProperty(property, storage::PropertyValue("nandare")); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue("nandare")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::NEW)->ValueString(), "nandare"); @@ -4909,9 +4909,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { } { - auto res = edge.SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -4982,9 +4982,9 @@ TEST(StorageWithProperties, EdgePropertyAbort) { } { - auto res = edge.SetProperty(property, storage::PropertyValue()); - ASSERT_TRUE(res.HasValue()); - ASSERT_FALSE(res.GetValue()); + auto old_value = edge.SetProperty(property, storage::PropertyValue()); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_FALSE(old_value->IsNull()); } ASSERT_EQ(edge.GetProperty(property, storage::View::OLD)->ValueString(), "nandare"); @@ -5059,9 +5059,9 @@ TEST(StorageWithProperties, EdgePropertySerializationError) { ASSERT_EQ(edge.Properties(storage::View::NEW)->size(), 0); { - auto res = edge.SetProperty(property1, storage::PropertyValue(123)); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property1, storage::PropertyValue(123)); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::OLD)->IsNull()); @@ -5148,9 +5148,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { ASSERT_EQ(edge.FromVertex(), vertex); ASSERT_EQ(edge.ToVertex(), vertex); - auto res = edge.SetProperty(property1, storage::PropertyValue("value")); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property1, storage::PropertyValue("value")); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); ASSERT_FALSE(acc.Commit().HasError()); } @@ -5166,9 +5166,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { UnorderedElementsAre(std::pair(property1, storage::PropertyValue("value")))); { - auto ret = edge.ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + auto old_values = edge.ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_FALSE(old_values->empty()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::NEW)->IsNull()); @@ -5176,9 +5176,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { ASSERT_EQ(edge.Properties(storage::View::NEW).GetValue().size(), 0); { - auto ret = edge.ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_FALSE(ret.GetValue()); + auto old_values = edge.ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_TRUE(old_values->empty()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::NEW)->IsNull()); @@ -5193,9 +5193,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { ASSERT_TRUE(vertex); auto edge = vertex->OutEdges(storage::View::NEW).GetValue()[0]; - auto res = edge.SetProperty(property2, storage::PropertyValue(42)); - ASSERT_TRUE(res.HasValue()); - ASSERT_TRUE(res.GetValue()); + auto old_value = edge.SetProperty(property2, storage::PropertyValue(42)); + ASSERT_TRUE(old_value.HasValue()); + ASSERT_TRUE(old_value->IsNull()); ASSERT_FALSE(acc.Commit().HasError()); } @@ -5212,9 +5212,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { std::pair(property2, storage::PropertyValue(42)))); { - auto ret = edge.ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_TRUE(ret.GetValue()); + auto old_values = edge.ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_FALSE(old_values->empty()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::NEW)->IsNull()); @@ -5222,9 +5222,9 @@ TEST(StorageWithProperties, EdgePropertyClear) { ASSERT_EQ(edge.Properties(storage::View::NEW).GetValue().size(), 0); { - auto ret = edge.ClearProperties(); - ASSERT_TRUE(ret.HasValue()); - ASSERT_FALSE(ret.GetValue()); + auto old_values = edge.ClearProperties(); + ASSERT_TRUE(old_values.HasValue()); + ASSERT_TRUE(old_values->empty()); } ASSERT_TRUE(edge.GetProperty(property1, storage::View::NEW)->IsNull()); @@ -5361,7 +5361,7 @@ TEST(StorageWithProperties, EdgeNonexistentPropertyAPI) { ASSERT_EQ(*edge->GetProperty(property, storage::View::NEW), storage::PropertyValue()); // Modify edge. - ASSERT_TRUE(edge->SetProperty(property, storage::PropertyValue("value")).HasValue()); + ASSERT_TRUE(edge->SetProperty(property, storage::PropertyValue("value"))->IsNull()); // Check state after (OLD view). ASSERT_EQ(edge->Properties(storage::View::OLD).GetError(), storage::Error::NONEXISTENT_OBJECT); diff --git a/tests/unit/storage_v2_isolation_level.cpp b/tests/unit/storage_v2_isolation_level.cpp new file mode 100644 index 000000000..f80bc2ef2 --- /dev/null +++ b/tests/unit/storage_v2_isolation_level.cpp @@ -0,0 +1,98 @@ +#include + +#include "storage/v2/isolation_level.hpp" +#include "storage/v2/storage.hpp" + +namespace { +int64_t VerticesCount(storage::Storage::Accessor &accessor) { + int64_t count{0}; + for ([[maybe_unused]] const auto &vertex : accessor.Vertices(storage::View::NEW)) { + ++count; + } + + return count; +} + +constexpr std::array isolation_levels{storage::IsolationLevel::SNAPSHOT_ISOLATION, + storage::IsolationLevel::READ_COMMITTED, + storage::IsolationLevel::READ_UNCOMMITTED}; + +std::string_view IsolationLevelToString(const storage::IsolationLevel isolation_level) { + switch (isolation_level) { + case storage::IsolationLevel::SNAPSHOT_ISOLATION: + return "SNAPSHOT_ISOLATION"; + case storage::IsolationLevel::READ_COMMITTED: + return "READ_COMMITTED"; + case storage::IsolationLevel::READ_UNCOMMITTED: + return "READ_UNCOMMITTED"; + } +} +} // namespace + +class StorageIsolationLevelTest : public ::testing::TestWithParam { + public: + struct PrintToStringParamName { + std::string operator()(const testing::TestParamInfo &info) { + return std::string(IsolationLevelToString(static_cast(info.param))); + } + }; +}; + +TEST_P(StorageIsolationLevelTest, Visibility) { + const auto default_isolation_level = GetParam(); + + for (const auto override_isolation_level : isolation_levels) { + storage::Storage storage{storage::Config{.transaction = {.isolation_level = default_isolation_level}}}; + auto creator = storage.Access(); + auto default_isolation_level_reader = storage.Access(); + auto override_isolation_level_reader = storage.Access(override_isolation_level); + + ASSERT_EQ(VerticesCount(default_isolation_level_reader), 0); + ASSERT_EQ(VerticesCount(override_isolation_level_reader), 0); + + constexpr auto iteration_count = 10; + { + SCOPED_TRACE(fmt::format( + "Visibility while the creator transaction is active " + "(default isolation level = {}, override isolation level = {})", + IsolationLevelToString(default_isolation_level), IsolationLevelToString(override_isolation_level))); + for (size_t i = 1; i <= iteration_count; ++i) { + creator.CreateVertex(); + + const auto check_vertices_count = [i](auto &accessor, const auto isolation_level) { + const auto expected_count = isolation_level == storage::IsolationLevel::READ_UNCOMMITTED ? i : 0; + EXPECT_EQ(VerticesCount(accessor), expected_count); + }; + check_vertices_count(default_isolation_level_reader, default_isolation_level); + check_vertices_count(override_isolation_level_reader, override_isolation_level); + } + } + + ASSERT_FALSE(creator.Commit().HasError()); + { + SCOPED_TRACE(fmt::format( + "Visibility after the creator transaction is committed " + "(default isolation level = {}, override isolation level = {})", + IsolationLevelToString(default_isolation_level), IsolationLevelToString(override_isolation_level))); + const auto check_vertices_count = [iteration_count](auto &accessor, const auto isolation_level) { + const auto expected_count = + isolation_level == storage::IsolationLevel::SNAPSHOT_ISOLATION ? 0 : iteration_count; + ASSERT_EQ(VerticesCount(accessor), expected_count); + }; + + check_vertices_count(default_isolation_level_reader, default_isolation_level); + check_vertices_count(override_isolation_level_reader, override_isolation_level); + } + + ASSERT_FALSE(default_isolation_level_reader.Commit().HasError()); + ASSERT_FALSE(override_isolation_level_reader.Commit().HasError()); + + SCOPED_TRACE("Visibility after a new transaction is started"); + auto verifier = storage.Access(); + ASSERT_EQ(VerticesCount(verifier), iteration_count); + ASSERT_FALSE(verifier.Commit().HasError()); + } +} + +INSTANTIATE_TEST_CASE_P(ParameterizedStorageIsolationLevelTests, StorageIsolationLevelTest, + ::testing::ValuesIn(isolation_levels), StorageIsolationLevelTest::PrintToStringParamName()); diff --git a/tests/unit/storage_v2_wal_file.cpp b/tests/unit/storage_v2_wal_file.cpp index 34b48b77e..79e03f351 100644 --- a/tests/unit/storage_v2_wal_file.cpp +++ b/tests/unit/storage_v2_wal_file.cpp @@ -45,7 +45,9 @@ class DeltaGenerator final { private: friend class DeltaGenerator; - explicit Transaction(DeltaGenerator *gen) : gen_(gen), transaction_(gen->transaction_id_++, gen->timestamp_++) {} + explicit Transaction(DeltaGenerator *gen) + : gen_(gen), + transaction_(gen->transaction_id_++, gen->timestamp_++, storage::IsolationLevel::SNAPSHOT_ISOLATION) {} public: storage::Vertex *CreateVertex() { diff --git a/tests/unit/test_utils.hpp b/tests/unit/test_utils.hpp new file mode 100644 index 000000000..4efc0f4b2 --- /dev/null +++ b/tests/unit/test_utils.hpp @@ -0,0 +1,9 @@ +#include + +#include "query/procedure/mg_procedure_impl.hpp" + +namespace test_utils { +using MgpValueOwningPtr = std::unique_ptr; + +MgpValueOwningPtr CreateValueOwningPtr(mgp_value *value) { return MgpValueOwningPtr(value, &mgp_value_destroy); } +} // namespace test_utils diff --git a/tests/unit/typed_value.cpp b/tests/unit/typed_value.cpp index 9d848cde7..ffc28262a 100644 --- a/tests/unit/typed_value.cpp +++ b/tests/unit/typed_value.cpp @@ -397,8 +397,8 @@ TEST_F(TypedValueLogicTest, LogicalXor) { // NOLINTNEXTLINE(hicpp-special-member-functions) TEST_F(AllTypesFixture, ConstructionWithMemoryResource) { - std::vector values_with_custom_memory; utils::MonotonicBufferResource monotonic_memory(1024); + std::vector values_with_custom_memory; for (const auto &value : values_) { EXPECT_EQ(value.GetMemoryResource(), utils::NewDeleteResource()); TypedValue copy_constructed_value(value, &monotonic_memory); diff --git a/tests/unit/utils_async_timer.cpp b/tests/unit/utils_async_timer.cpp new file mode 100644 index 000000000..65fa69dee --- /dev/null +++ b/tests/unit/utils_async_timer.cpp @@ -0,0 +1,138 @@ +#include +#include +#include + +#include "gtest/gtest.h" + +#include "utils/async_timer.hpp" + +using AsyncTimer = utils::AsyncTimer; + +constexpr auto kSecondsInMilis = 1000.0; +constexpr auto kIntervalInSeconds = 0.3; +constexpr auto kIntervalInMilis = kIntervalInSeconds * kSecondsInMilis; +constexpr auto kAbsoluteErrorInMilis = 50; + +std::chrono::steady_clock::time_point Now() { return std::chrono::steady_clock::now(); } + +int ElapsedMilis(const std::chrono::steady_clock::time_point &start, const std::chrono::steady_clock::time_point &end) { + return std::chrono::duration_cast(end - start).count(); +} + +void CheckTimeSimple() { + const auto before = Now(); + AsyncTimer timer{kIntervalInSeconds}; + while (!timer.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 2 * kIntervalInMilis); + } + + const auto after = Now(); + + EXPECT_NEAR(ElapsedMilis(before, after), kIntervalInMilis, kAbsoluteErrorInMilis); +} + +TEST(AsyncTimer, SimpleWait) { CheckTimeSimple(); } + +TEST(AsyncTimer, DoubleWait) { + CheckTimeSimple(); + CheckTimeSimple(); +} + +TEST(AsyncTimer, MoveConstruct) { + const auto before = Now(); + AsyncTimer timer_1{kIntervalInSeconds}; + AsyncTimer timer_2{std::move(timer_1)}; + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + const auto first_check_point = Now(); + + while (!timer_2.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 2 * kIntervalInMilis); + } + const auto second_check_point = Now(); + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_TRUE(timer_2.IsExpired()); + + EXPECT_LT(ElapsedMilis(before, first_check_point), kIntervalInMilis / 2); + EXPECT_NEAR(ElapsedMilis(before, second_check_point), kIntervalInMilis, kAbsoluteErrorInMilis); +} + +TEST(AsyncTimer, MoveAssign) { + const auto before = Now(); + AsyncTimer timer_1{2 * kIntervalInSeconds}; + AsyncTimer timer_2{kIntervalInSeconds}; + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + const auto first_check_point = Now(); + + timer_2 = std::move(timer_1); + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + + while (!timer_2.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 3 * kIntervalInMilis); + } + const auto second_check_point = Now(); + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_TRUE(timer_2.IsExpired()); + + EXPECT_LT(ElapsedMilis(before, first_check_point), kIntervalInMilis / 2); + EXPECT_NEAR(ElapsedMilis(before, second_check_point), 2 * kIntervalInMilis, kAbsoluteErrorInMilis); +} + +TEST(AsyncTimer, AssignToExpiredTimer) { + const auto before = Now(); + AsyncTimer timer_1{2 * kIntervalInSeconds}; + AsyncTimer timer_2{kIntervalInSeconds}; + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + const auto first_check_point = Now(); + + while (!timer_2.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 3 * kIntervalInMilis); + } + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_TRUE(timer_2.IsExpired()); + const auto second_check_point = Now(); + + timer_2 = std::move(timer_1); + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_FALSE(timer_2.IsExpired()); + const auto third_check_point = Now(); + + while (!timer_2.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 3 * kIntervalInMilis); + } + + EXPECT_FALSE(timer_1.IsExpired()); + EXPECT_TRUE(timer_2.IsExpired()); + const auto fourth_check_point = Now(); + + EXPECT_LT(ElapsedMilis(before, first_check_point), kIntervalInMilis / 2); + EXPECT_NEAR(ElapsedMilis(before, second_check_point), kIntervalInMilis, kAbsoluteErrorInMilis); + EXPECT_LT(ElapsedMilis(before, third_check_point), 1.5 * kIntervalInMilis); + EXPECT_NEAR(ElapsedMilis(before, fourth_check_point), 2 * kIntervalInMilis, kAbsoluteErrorInMilis); +} + +TEST(AsyncTimer, DestroyTimerWhileItIsStillRunning) { + { AsyncTimer timer_to_destroy{kIntervalInSeconds}; } + const auto before = Now(); + AsyncTimer timer_to_wait{1.5 * kIntervalInSeconds}; + while (!timer_to_wait.IsExpired()) { + ASSERT_LT(ElapsedMilis(before, Now()), 3 * kIntervalInMilis); + } + // At this point the timer_to_destroy has expired, nothing bad happened. This doesn't mean the timer cancellation + // works properly, it just means that nothing bad happens if a timer get cancelled. +} + +TEST(AsyncTimer, TimersWithExtremeValues) { + AsyncTimer timer_with_zero{0}; + const double expected_maximum_value = std::nexttoward(std::numeric_limits::max(), 0.0); + AsyncTimer timer_with_max_value{expected_maximum_value}; +} diff --git a/tests/unit/utils_csv_parsing.cpp b/tests/unit/utils_csv_parsing.cpp new file mode 100644 index 000000000..335e16769 --- /dev/null +++ b/tests/unit/utils_csv_parsing.cpp @@ -0,0 +1,322 @@ +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "utils/csv_parsing.hpp" + +#include "utils/string.hpp" + +class CsvReaderTest : public ::testing::TestWithParam { + protected: + const std::filesystem::path csv_directory{std::filesystem::temp_directory_path() / "csv_testing"}; + + void SetUp() override { + Clear(); + CreateCsvDir(); + } + + void TearDown() override { Clear(); } + + private: + void CreateCsvDir() { + if (!std::filesystem::exists(csv_directory)) { + std::filesystem::create_directory(csv_directory); + } + } + void Clear() { + if (!std::filesystem::exists(csv_directory)) return; + std::filesystem::remove_all(csv_directory); + } +}; + +namespace { +class FileWriter { + public: + explicit FileWriter(const std::filesystem::path path, std::string newline = "\n") : newline_{std::move(newline)} { + stream_.open(path); + } + + FileWriter(const FileWriter &) = delete; + FileWriter &operator=(const FileWriter &) = delete; + + FileWriter(FileWriter &&) = delete; + FileWriter &operator=(FileWriter &&) = delete; + + void Close() { stream_.close(); } + + size_t WriteLine(const std::string_view line) { + if (!stream_.is_open()) { + return 0; + } + + stream_ << line << newline_; + + // including the newline character + return line.size() + 1; + } + + private: + std::ofstream stream_; + std::string newline_; +}; + +std::string CreateRow(const std::vector &columns, const std::string_view delim) { + return utils::Join(columns, delim); +} + +auto ToPmrColumns(const std::vector &columns) { + utils::pmr::vector pmr_columns(utils::NewDeleteResource()); + for (const auto &col : columns) { + pmr_columns.emplace_back(col); + } + return pmr_columns; +} + +} // namespace + +TEST_P(CsvReaderTest, CommaDelimiter) { + // create a file with a single valid row; + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath, GetParam()); + + const std::vector columns{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns, ",")); + + writer.Close(); + + utils::MemoryResource *mem{utils::NewDeleteResource()}; + + bool with_header = false; + bool ignore_bad = false; + utils::pmr::string delimiter{",", mem}; + utils::pmr::string quote{"\"", mem}; + + csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg, mem); + + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(columns)); +} + +TEST_P(CsvReaderTest, SemicolonDelimiter) { + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath, GetParam()); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{";", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector columns{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns, delimiter)); + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = false; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg, mem); + + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(columns)); +} + +TEST_P(CsvReaderTest, SkipBad) { + // create a file with invalid first two rows (containing a string with a + // missing closing quote); + // the last row is valid; + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath, GetParam()); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{";", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector columns_bad{"A", "B", "\"\"C"}; + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + + const std::vector columns_good{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns_good, delimiter)); + + writer.Close(); + + { + // we set the 'ignore_bad' flag in the read configuration to 'true'; + // parser's output should be solely the valid row; + const bool with_header = false; + const bool ignore_bad = true; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg, mem); + + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(columns_good)); + } + + { + // we set the 'ignore_bad' flag in the read configuration to 'false'; + // an exception must be thrown; + const bool with_header = false; + const bool ignore_bad = false; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg, mem); + + EXPECT_THROW(reader.GetNextRow(mem), csv::CsvReadException); + } +} + +TEST_P(CsvReaderTest, AllRowsValid) { + // create a file with all rows valid; + // parser should return 'std::nullopt' + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath, GetParam()); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + std::vector columns{"A", "B", "C"}; + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = false; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg); + + const auto pmr_columns = ToPmrColumns(columns); + while (auto parsed_row = reader.GetNextRow(mem)) { + ASSERT_EQ(*parsed_row, pmr_columns); + } +} + +TEST_P(CsvReaderTest, SkipAllRows) { + // create a file with all rows invalid (containing a string with a missing closing quote); + // parser should return 'std::nullopt' + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath, GetParam()); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector columns_bad{"A", "B", "\"\"C"}; + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + writer.WriteLine(CreateRow(columns_bad, delimiter)); + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = true; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg); + + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(parsed_row, std::nullopt); +} + +TEST_P(CsvReaderTest, WithHeader) { + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath, GetParam()); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector header{"A", "B", "C"}; + const std::vector columns{"1", "2", "3"}; + writer.WriteLine(CreateRow(header, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + writer.WriteLine(CreateRow(columns, delimiter)); + + writer.Close(); + + const bool with_header = true; + const bool ignore_bad = false; + const csv::Reader::Config cfg(with_header, ignore_bad, delimiter, quote); + auto reader = csv::Reader(filepath, cfg); + + const auto pmr_header = ToPmrColumns(header); + ASSERT_EQ(reader.GetHeader(), pmr_header); + + const auto pmr_columns = ToPmrColumns(columns); + while (auto parsed_row = reader.GetNextRow(mem)) { + ASSERT_EQ(*parsed_row, pmr_columns); + } +} + +TEST_P(CsvReaderTest, MultilineQuotedString) { + // create a file with first row valid and the second row containing a quoted + // string spanning two lines; + // parser should return two valid rows + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath, GetParam()); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + const std::vector first_row{"A", "B", "C"}; + const std::vector multiline_first{"D", "\"E", "\"\"F"}; + const std::vector multiline_second{"G\"", "H"}; + + writer.WriteLine(CreateRow(first_row, delimiter)); + writer.WriteLine(CreateRow(multiline_first, delimiter)); + writer.WriteLine(CreateRow(multiline_second, delimiter)); + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = true; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg); + + auto parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(first_row)); + + const std::vector expected_multiline{"D", "E,\"FG", "H"}; + parsed_row = reader.GetNextRow(mem); + ASSERT_EQ(*parsed_row, ToPmrColumns(expected_multiline)); +} + +TEST_P(CsvReaderTest, EmptyColumns) { + // create a file with all rows valid; + // parser should return 'std::nullopt' + const auto filepath = csv_directory / "bla.csv"; + auto writer = FileWriter(filepath, GetParam()); + + utils::MemoryResource *mem(utils::NewDeleteResource()); + + const utils::pmr::string delimiter{",", mem}; + const utils::pmr::string quote{"\"", mem}; + + std::vector> expected_rows{{"", "B", "C"}, {"A", "", "C"}, {"A", "B", ""}}; + + for (const auto &row : expected_rows) { + writer.WriteLine(CreateRow(row, delimiter)); + } + + writer.Close(); + + const bool with_header = false; + const bool ignore_bad = false; + const csv::Reader::Config cfg{with_header, ignore_bad, delimiter, quote}; + auto reader = csv::Reader(filepath, cfg); + + for (const auto &expected_row : expected_rows) { + const auto pmr_expected_row = ToPmrColumns(expected_row); + const auto parsed_row = reader.GetNextRow(mem); + ASSERT_TRUE(parsed_row.has_value()); + ASSERT_EQ(*parsed_row, pmr_expected_row); + } +} + +INSTANTIATE_TEST_CASE_P(NewlineParameterizedTest, CsvReaderTest, ::testing::Values("\n", "\r\n")); diff --git a/tests/unit/utils_memory.cpp b/tests/unit/utils_memory.cpp index 18e39ae72..54c50fed3 100644 --- a/tests/unit/utils_memory.cpp +++ b/tests/unit/utils_memory.cpp @@ -12,6 +12,7 @@ class TestMemory final : public utils::MemoryResource { size_t delete_count_{0}; private: + static constexpr size_t kPadSize = 32; void *DoAllocate(size_t bytes, size_t alignment) override { new_count_++; EXPECT_TRUE(alignment != 0U && (alignment & (alignment - 1U)) == 0U) << "Alignment must be power of 2"; @@ -20,11 +21,11 @@ class TestMemory final : public utils::MemoryResource { EXPECT_TRUE(bytes + pad_size > bytes) << "TestMemory size overflow"; EXPECT_TRUE(bytes + pad_size + alignment > bytes + alignment) << "TestMemory size overflow"; EXPECT_TRUE(2U * alignment > alignment) << "TestMemory alignment overflow"; - // Allocate a block containing extra alignment and pad_size bytes, but + // Allocate a block containing extra alignment and kPadSize bytes, but // aligned to 2 * alignment. Then we can offset the ptr so that it's never // aligned to 2 * alignment. This ought to make allocator alignment issues // more obvious. - void *ptr = utils::NewDeleteResource()->Allocate(alignment + bytes + pad_size, 2U * alignment); + void *ptr = utils::NewDeleteResource()->Allocate(alignment + bytes + kPadSize, 2U * alignment); // Clear allocated memory to 0xFF, marking the invalid region. memset(ptr, 0xFF, alignment + bytes + pad_size); // Offset the ptr so it's not aligned to 2 * alignment, but still aligned to @@ -39,7 +40,8 @@ class TestMemory final : public utils::MemoryResource { void DoDeallocate(void *ptr, size_t bytes, size_t alignment) override { delete_count_++; // Deallocate the original ptr, before alignment adjustment. - return utils::NewDeleteResource()->Deallocate(static_cast(ptr) - alignment, bytes, alignment); + return utils::NewDeleteResource()->Deallocate(static_cast(ptr) - alignment, alignment + bytes + kPadSize, + 2U * alignment); } bool DoIsEqual(const utils::MemoryResource &other) const noexcept override { return this == &other; } diff --git a/tests/unit/utils_memory_tracker.cpp b/tests/unit/utils_memory_tracker.cpp new file mode 100644 index 000000000..37acc07b4 --- /dev/null +++ b/tests/unit/utils_memory_tracker.cpp @@ -0,0 +1,59 @@ +#include + +#include + +#include +#include + +TEST(MemoryTrackerTest, ExceptionEnabler) { + utils::MemoryTracker memory_tracker; + + constexpr size_t hard_limit = 10; + memory_tracker.SetHardLimit(hard_limit); + + std::atomic can_continue{false}; + std::atomic enabler_created{false}; + std::thread t1{[&] { + // wait until the second thread creates exception enabler + while (!enabler_created) + ; + + // we use the OnScopeExit so the test doesn't deadlock when + // an ASSERT fails + utils::OnScopeExit thread_notifier{[&] { + // tell the second thread it can finish its test + can_continue = true; + }}; + + ASSERT_NO_THROW(memory_tracker.Alloc(hard_limit + 1)); + }}; + + std::thread t2{[&] { + utils::MemoryTracker::OutOfMemoryExceptionEnabler exception_enabler; + enabler_created = true; + ASSERT_THROW(memory_tracker.Alloc(hard_limit + 1), utils::OutOfMemoryException); + + // hold the enabler until the first thread finishes + while (!can_continue) + ; + }}; + + t1.join(); + t2.join(); +} + +TEST(MemoryTrackerTest, ExceptionBlocker) { + utils::MemoryTracker memory_tracker; + + constexpr size_t hard_limit = 10; + memory_tracker.SetHardLimit(hard_limit); + + utils::MemoryTracker::OutOfMemoryExceptionEnabler exception_enabler; + { + utils::MemoryTracker::OutOfMemoryExceptionBlocker exception_blocker; + + ASSERT_NO_THROW(memory_tracker.Alloc(hard_limit + 1)); + ASSERT_EQ(memory_tracker.Amount(), hard_limit + 1); + } + ASSERT_THROW(memory_tracker.Alloc(hard_limit + 1), utils::OutOfMemoryException); +} diff --git a/tools/bench-graph-client/main.py b/tools/bench-graph-client/main.py new file mode 100755 index 000000000..e90340ef0 --- /dev/null +++ b/tools/bench-graph-client/main.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +""" +Bench Graph client responsible for sending benchmarking data in JSON format to +the Bench Graph server. +""" + +import json +import logging +import os +import requests +import subprocess +from datetime import datetime +from argparse import ArgumentParser + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + +GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "") +GITHUB_SHA = os.getenv("GITHUB_SHA", "") +GITHUB_REF = os.getenv("GITHUB_REF", "") + +BENCH_GRAPH_SERVER_ENDPOINT = os.getenv( + "BENCH_GRAPH_SERVER_ENDPOINT", + "http://bench-graph-api:9001") + +log = logging.getLogger(__name__) + + +def parse_args(): + argp = ArgumentParser(description=__doc__) + argp.add_argument("--benchmark-name", type=str, required=True) + argp.add_argument("--benchmark-results-path", type=str, required=True) + argp.add_argument("--github-run-id", type=int, required=True) + argp.add_argument("--github-run-number", type=int, required=True) + return argp.parse_args() + + +def post_measurement(args): + with open(args.benchmark_results_path, "r") as f: + data = json.load(f) + timestamp = datetime.now().timestamp() + branch = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + stdout=subprocess.PIPE, + check=True).stdout.decode("utf-8").strip() + req = requests.post( + f"{BENCH_GRAPH_SERVER_ENDPOINT}/measurements", + json={ + "name": args.benchmark_name, + "timestamp": timestamp, + "git_repo": GITHUB_REPOSITORY, + "git_ref": GITHUB_REF, + "git_sha": GITHUB_SHA, + "github_run_id": args.github_run_id, + "github_run_number": args.github_run_number, + "results": data, + "git_branch": branch}, + timeout=1) + assert req.status_code == 200, \ + f"Uploading {args.benchmark_name} data failed." + log.info(f"{args.benchmark_name} data sent to " + f"{BENCH_GRAPH_SERVER_ENDPOINT}") + + +if __name__ == "__main__": + args = parse_args() + logging.basicConfig(level=logging.INFO) + post_measurement(args) diff --git a/tools/bench-graph-client/requirements.txt b/tools/bench-graph-client/requirements.txt new file mode 100644 index 000000000..9d84d3588 --- /dev/null +++ b/tools/bench-graph-client/requirements.txt @@ -0,0 +1 @@ +requests==2.25.1 diff --git a/tools/github/clang-tidy/clang-tidy-diff.py b/tools/github/clang-tidy/clang-tidy-diff.py new file mode 100755 index 000000000..a20b1f1f4 --- /dev/null +++ b/tools/github/clang-tidy/clang-tidy-diff.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +# +#===- clang-tidy-diff.py - ClangTidy Diff Checker -----------*- python -*--===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===-----------------------------------------------------------------------===# + +r""" +ClangTidy Diff Checker +====================== + +This script reads input from a unified diff, runs clang-tidy on all changed +files and outputs clang-tidy warnings in changed lines only. This is useful to +detect clang-tidy regressions in the lines touched by a specific patch. +Example usage for git/svn users: + + git diff -U0 HEAD^ | clang-tidy-diff.py -p1 + svn diff --diff-cmd=diff -x-U0 | \ + clang-tidy-diff.py -fix -checks=-*,modernize-use-override + +""" + +import argparse +import glob +import json +import multiprocessing +import os +import re +import shutil +import subprocess +import sys +import tempfile +import threading +import traceback + +try: + import yaml +except ImportError: + yaml = None + +is_py2 = sys.version[0] == '2' + +if is_py2: + import Queue as queue +else: + import queue as queue + + +def run_tidy(task_queue, lock, timeout): + watchdog = None + while True: + command = task_queue.get() + try: + proc = subprocess.Popen(command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + if timeout is not None: + watchdog = threading.Timer(timeout, proc.kill) + watchdog.start() + + stdout, stderr = proc.communicate() + + with lock: + sys.stdout.write(stdout.decode('utf-8') + '\n') + sys.stdout.flush() + if stderr: + sys.stderr.write(stderr.decode('utf-8') + '\n') + sys.stderr.flush() + except Exception as e: + with lock: + sys.stderr.write('Failed: ' + str(e) + ': '.join(command) + '\n') + finally: + with lock: + if not (timeout is None or watchdog is None): + if not watchdog.is_alive(): + sys.stderr.write('Terminated by timeout: ' + + ' '.join(command) + '\n') + watchdog.cancel() + task_queue.task_done() + + +def start_workers(max_tasks, tidy_caller, task_queue, lock, timeout): + for _ in range(max_tasks): + t = threading.Thread(target=tidy_caller, args=(task_queue, lock, timeout)) + t.daemon = True + t.start() + + +def merge_replacement_files(tmpdir, mergefile): + """Merge all replacement files in a directory into a single file""" + # The fixes suggested by clang-tidy >= 4.0.0 are given under + # the top level key 'Diagnostics' in the output yaml files + mergekey = "Diagnostics" + merged = [] + for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): + content = yaml.safe_load(open(replacefile, 'r')) + if not content: + continue # Skip empty files. + merged.extend(content.get(mergekey, [])) + + if merged: + # MainSourceFile: The key is required by the definition inside + # include/clang/Tooling/ReplacementsYaml.h, but the value + # is actually never used inside clang-apply-replacements, + # so we set it to '' here. + output = {'MainSourceFile': '', mergekey: merged} + with open(mergefile, 'w') as out: + yaml.safe_dump(output, out) + else: + # Empty the file: + open(mergefile, 'w').close() + + +def main(): + parser = argparse.ArgumentParser(description= + 'Run clang-tidy against changed files, and ' + 'output diagnostics only for modified ' + 'lines.') + parser.add_argument('-clang-tidy-binary', metavar='PATH', + default='clang-tidy', + help='path to clang-tidy binary') + parser.add_argument('-p', metavar='NUM', default=0, + help='strip the smallest prefix containing P slashes') + parser.add_argument('-regex', metavar='PATTERN', default=None, + help='custom pattern selecting file paths to check ' + '(case sensitive, overrides -iregex)') + parser.add_argument('-iregex', metavar='PATTERN', default= + r'.*\.(cpp|cc|c\+\+|cxx|c|cl|h|hpp|m|mm|inc)', + help='custom pattern selecting file paths to check ' + '(case insensitive, overridden by -regex)') + parser.add_argument('-j', type=int, default=1, + help='number of tidy instances to be run in parallel.') + parser.add_argument('-timeout', type=int, default=None, + help='timeout per each file in seconds.') + parser.add_argument('-fix', action='store_true', default=False, + help='apply suggested fixes') + parser.add_argument('-checks', + help='checks filter, when not specified, use clang-tidy ' + 'default', + default='') + parser.add_argument('-path', dest='build_path', + help='Path used to read a compile command database.') + if yaml: + parser.add_argument('-export-fixes', metavar='FILE', dest='export_fixes', + help='Create a yaml file to store suggested fixes in, ' + 'which can be applied with clang-apply-replacements.') + parser.add_argument('-extra-arg', dest='extra_arg', + action='append', default=[], + help='Additional argument to append to the compiler ' + 'command line.') + parser.add_argument('-extra-arg-before', dest='extra_arg_before', + action='append', default=[], + help='Additional argument to prepend to the compiler ' + 'command line.') + parser.add_argument('-quiet', action='store_true', default=False, + help='Run clang-tidy in quiet mode') + clang_tidy_args = [] + argv = sys.argv[1:] + if '--' in argv: + clang_tidy_args.extend(argv[argv.index('--'):]) + argv = argv[:argv.index('--')] + + args = parser.parse_args(argv) + + # Extract changed lines for each file. + filename = None + lines_by_file = {} + for line in sys.stdin: + match = re.search('^\+\+\+\ \"?(.*?/){%s}([^ \t\n\"]*)' % args.p, line) + if match: + filename = match.group(2) + if filename is None: + continue + + if args.regex is not None: + if not re.match('^%s$' % args.regex, filename): + continue + else: + if not re.match('^%s$' % args.iregex, filename, re.IGNORECASE): + continue + + match = re.search('^@@.*\+(\d+)(,(\d+))?', line) + if match: + start_line = int(match.group(1)) + line_count = 1 + if match.group(3): + line_count = int(match.group(3)) + if line_count == 0: + continue + end_line = start_line + line_count - 1 + lines_by_file.setdefault(filename, []).append([start_line, end_line]) + + if not any(lines_by_file): + print("No relevant changes found.") + sys.exit(0) + + max_task_count = args.j + if max_task_count == 0: + max_task_count = multiprocessing.cpu_count() + max_task_count = min(len(lines_by_file), max_task_count) + + tmpdir = None + if yaml and args.export_fixes: + tmpdir = tempfile.mkdtemp() + + # Tasks for clang-tidy. + task_queue = queue.Queue(max_task_count) + # A lock for console output. + lock = threading.Lock() + + # Run a pool of clang-tidy workers. + start_workers(max_task_count, run_tidy, task_queue, lock, args.timeout) + + # Form the common args list. + common_clang_tidy_args = [] + if args.fix: + common_clang_tidy_args.append('-fix') + if args.checks != '': + common_clang_tidy_args.append('-checks=' + args.checks) + if args.quiet: + common_clang_tidy_args.append('-quiet') + if args.build_path is not None: + common_clang_tidy_args.append('-p=%s' % args.build_path) + for arg in args.extra_arg: + common_clang_tidy_args.append('-extra-arg=%s' % arg) + for arg in args.extra_arg_before: + common_clang_tidy_args.append('-extra-arg-before=%s' % arg) + + for name in lines_by_file: + line_filter_json = json.dumps( + [{"name": name, "lines": lines_by_file[name]}], + separators=(',', ':')) + + # Run clang-tidy on files containing changes. + command = [args.clang_tidy_binary] + command.append('-line-filter=' + line_filter_json) + if yaml and args.export_fixes: + # Get a temporary file. We immediately close the handle so clang-tidy can + # overwrite it. + (handle, tmp_name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) + os.close(handle) + command.append('-export-fixes=' + tmp_name) + command.extend(common_clang_tidy_args) + command.append(name) + command.extend(clang_tidy_args) + + task_queue.put(command) + + # Wait for all threads to be done. + task_queue.join() + + if yaml and args.export_fixes: + print('Writing fixes to ' + args.export_fixes + ' ...') + try: + merge_replacement_files(tmpdir, args.export_fixes) + except: + sys.stderr.write('Error exporting fixes.\n') + traceback.print_exc() + + if tmpdir: + shutil.rmtree(tmpdir) + + +if __name__ == '__main__': + main() diff --git a/tools/github/clang-tidy/count_errors.sh b/tools/github/clang-tidy/count_errors.sh new file mode 100755 index 000000000..4237099eb --- /dev/null +++ b/tools/github/clang-tidy/count_errors.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# the first sort | uniq is necessary, because the same occurrence of the same error +# can be reported from headers when they are included in multiple source files +`dirname ${BASH_SOURCE[0]}`/grep_error_lines.sh | + sort | uniq | + sed -E 's/.*\[(.*)\]\r?$/\1/g' | # extract the check name from [check-name] + sort | uniq -c | # count each type of check + sort -nr # sort them into descending order diff --git a/tools/github/clang-tidy/grep_error_lines.sh b/tools/github/clang-tidy/grep_error_lines.sh new file mode 100755 index 000000000..3dd0fd7f0 --- /dev/null +++ b/tools/github/clang-tidy/grep_error_lines.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# Matches timestamp like "2021-03-25T17:06:42.2621697Z" +TIMESTAMP_PATTERN="\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{7}Z" + +# Matches absolute file pathes with line and column identifier like +# "/opt/actions-runner/_work/memgraph/memgraph/src/utils/exceptions.hpp:71:11:" +FILE_ABSOLUTE_PATH_PATTERN="/[^:]+:\d+:\d+:" + +ERROR_OR_WARNING_PATTERN="(error|warning):" + +grep -P "^($TIMESTAMP_PATTERN )?$FILE_ABSOLUTE_PATH_PATTERN $ERROR_OR_WARNING_PATTERN.*$" \ No newline at end of file diff --git a/tools/github/clang-tidy/run-clang-tidy.py b/tools/github/clang-tidy/run-clang-tidy.py new file mode 100755 index 000000000..0dbac0b25 --- /dev/null +++ b/tools/github/clang-tidy/run-clang-tidy.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +# +#===- run-clang-tidy.py - Parallel clang-tidy runner --------*- python -*--===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===-----------------------------------------------------------------------===# +# FIXME: Integrate with clang-tidy-diff.py + + +""" +Parallel clang-tidy runner +========================== + +Runs clang-tidy over all files in a compilation database. Requires clang-tidy +and clang-apply-replacements in $PATH. + +Example invocations. +- Run clang-tidy on all files in the current working directory with a default + set of checks and show warnings in the cpp files and all project headers. + run-clang-tidy.py $PWD + +- Fix all header guards. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard + +- Fix all header guards included from clang-tidy and header guards + for clang-tidy headers. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \ + -header-filter=extra/clang-tidy + +Compilation database setup: +http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html +""" + +from __future__ import print_function + +import argparse +import glob +import json +import multiprocessing +import os +import re +import shutil +import subprocess +import sys +import tempfile +import threading +import traceback + +try: + import yaml +except ImportError: + yaml = None + +is_py2 = sys.version[0] == '2' + +if is_py2: + import Queue as queue +else: + import queue as queue + + +def find_compilation_database(path): + """Adjusts the directory until a compilation database is found.""" + result = './' + while not os.path.isfile(os.path.join(result, path)): + if os.path.realpath(result) == '/': + print('Error: could not find compilation database.') + sys.exit(1) + result += '../' + return os.path.realpath(result) + + +def make_absolute(f, directory): + if os.path.isabs(f): + return f + return os.path.normpath(os.path.join(directory, f)) + + +def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path, + header_filter, allow_enabling_alpha_checkers, + extra_arg, extra_arg_before, quiet, config): + """Gets a command line for clang-tidy.""" + start = [clang_tidy_binary] + if allow_enabling_alpha_checkers: + start.append('-allow-enabling-analyzer-alpha-checkers') + if header_filter is not None: + start.append('-header-filter=' + header_filter) + if checks: + start.append('-checks=' + checks) + if tmpdir is not None: + start.append('-export-fixes') + # Get a temporary file. We immediately close the handle so clang-tidy can + # overwrite it. + (handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) + os.close(handle) + start.append(name) + for arg in extra_arg: + start.append('-extra-arg=%s' % arg) + for arg in extra_arg_before: + start.append('-extra-arg-before=%s' % arg) + start.append('-p=' + build_path) + if quiet: + start.append('-quiet') + if config: + start.append('-config=' + config) + start.append(f) + return start + + +def merge_replacement_files(tmpdir, mergefile): + """Merge all replacement files in a directory into a single file""" + # The fixes suggested by clang-tidy >= 4.0.0 are given under + # the top level key 'Diagnostics' in the output yaml files + mergekey = "Diagnostics" + merged=[] + for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): + content = yaml.safe_load(open(replacefile, 'r')) + if not content: + continue # Skip empty files. + merged.extend(content.get(mergekey, [])) + + if merged: + # MainSourceFile: The key is required by the definition inside + # include/clang/Tooling/ReplacementsYaml.h, but the value + # is actually never used inside clang-apply-replacements, + # so we set it to '' here. + output = {'MainSourceFile': '', mergekey: merged} + with open(mergefile, 'w') as out: + yaml.safe_dump(output, out) + else: + # Empty the file: + open(mergefile, 'w').close() + + +def check_clang_apply_replacements_binary(args): + """Checks if invoking supplied clang-apply-replacements binary works.""" + try: + subprocess.check_call([args.clang_apply_replacements_binary, '--version']) + except: + print('Unable to run clang-apply-replacements. Is clang-apply-replacements ' + 'binary correctly specified?', file=sys.stderr) + traceback.print_exc() + sys.exit(1) + + +def apply_fixes(args, tmpdir): + """Calls clang-apply-fixes on a given directory.""" + invocation = [args.clang_apply_replacements_binary] + if args.format: + invocation.append('-format') + if args.style: + invocation.append('-style=' + args.style) + invocation.append(tmpdir) + subprocess.call(invocation) + + +def run_tidy(args, tmpdir, build_path, queue, lock, failed_files): + """Takes filenames out of queue and runs clang-tidy on them.""" + while True: + name = queue.get() + invocation = get_tidy_invocation(name, args.clang_tidy_binary, args.checks, + tmpdir, build_path, args.header_filter, + args.allow_enabling_alpha_checkers, + args.extra_arg, args.extra_arg_before, + args.quiet, args.config) + + proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = proc.communicate() + if proc.returncode != 0: + failed_files.append(name) + with lock: + sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) + if len(err) > 0: + sys.stdout.flush() + sys.stderr.write(err.decode('utf-8')) + queue.task_done() + + +def main(): + parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' + 'in a compilation database. Requires ' + 'clang-tidy and clang-apply-replacements in ' + '$PATH.') + parser.add_argument('-allow-enabling-alpha-checkers', + action='store_true', help='allow alpha checkers from ' + 'clang-analyzer.') + parser.add_argument('-clang-tidy-binary', metavar='PATH', + default='clang-tidy-11', + help='path to clang-tidy binary') + parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', + default='clang-apply-replacements-11', + help='path to clang-apply-replacements binary') + parser.add_argument('-checks', default=None, + help='checks filter, when not specified, use clang-tidy ' + 'default') + parser.add_argument('-config', default=None, + help='Specifies a configuration in YAML/JSON format: ' + ' -config="{Checks: \'*\', ' + ' CheckOptions: [{key: x, ' + ' value: y}]}" ' + 'When the value is empty, clang-tidy will ' + 'attempt to find a file named .clang-tidy for ' + 'each source file in its parent directories.') + parser.add_argument('-header-filter', default=None, + help='regular expression matching the names of the ' + 'headers to output diagnostics from. Diagnostics from ' + 'the main file of each translation unit are always ' + 'displayed.') + if yaml: + parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', + help='Create a yaml file to store suggested fixes in, ' + 'which can be applied with clang-apply-replacements.') + parser.add_argument('-j', type=int, default=0, + help='number of tidy instances to be run in parallel.') + parser.add_argument('files', nargs='*', default=['.*'], + help='files to be processed (regex on path)') + parser.add_argument('-fix', action='store_true', help='apply fix-its') + parser.add_argument('-format', action='store_true', help='Reformat code ' + 'after applying fixes') + parser.add_argument('-style', default='file', help='The style of reformat ' + 'code after applying fixes') + parser.add_argument('-p', dest='build_path', + help='Path used to read a compile command database.') + parser.add_argument('-extra-arg', dest='extra_arg', + action='append', default=[], + help='Additional argument to append to the compiler ' + 'command line.') + parser.add_argument('-extra-arg-before', dest='extra_arg_before', + action='append', default=[], + help='Additional argument to prepend to the compiler ' + 'command line.') + parser.add_argument('-quiet', action='store_true', + help='Run clang-tidy in quiet mode') + args = parser.parse_args() + + db_path = 'compile_commands.json' + + if args.build_path is not None: + build_path = args.build_path + else: + # Find our database + build_path = find_compilation_database(db_path) + + try: + invocation = [args.clang_tidy_binary, '-list-checks'] + if args.allow_enabling_alpha_checkers: + invocation.append('-allow-enabling-analyzer-alpha-checkers') + invocation.append('-p=' + build_path) + if args.checks: + invocation.append('-checks=' + args.checks) + invocation.append('-') + if args.quiet: + # Even with -quiet we still want to check if we can call clang-tidy. + with open(os.devnull, 'w') as dev_null: + subprocess.check_call(invocation, stdout=dev_null) + else: + subprocess.check_call(invocation) + except: + print("Unable to run clang-tidy.", file=sys.stderr) + sys.exit(1) + + # Load the database and extract all files. + database = json.load(open(os.path.join(build_path, db_path))) + files = [make_absolute(entry['file'], entry['directory']) + for entry in database] + + max_task = args.j + if max_task == 0: + max_task = multiprocessing.cpu_count() + + tmpdir = None + if args.fix or (yaml and args.export_fixes): + check_clang_apply_replacements_binary(args) + tmpdir = tempfile.mkdtemp() + + # Build up a big regexy filter from all command line arguments. + file_name_re = re.compile('|'.join(args.files)) + + return_code = 0 + try: + # Spin up a bunch of tidy-launching threads. + task_queue = queue.Queue(max_task) + # List of files with a non-zero return code. + failed_files = [] + lock = threading.Lock() + for _ in range(max_task): + t = threading.Thread(target=run_tidy, + args=(args, tmpdir, build_path, task_queue, lock, failed_files)) + t.daemon = True + t.start() + + # Fill the queue with files. + for name in files: + if file_name_re.search(name): + task_queue.put(name) + + # Wait for all threads to be done. + task_queue.join() + if len(failed_files): + return_code = 1 + + except KeyboardInterrupt: + # This is a sad hack. Unfortunately subprocess goes + # bonkers with ctrl-c and we start forking merrily. + print('\nCtrl-C detected, goodbye.') + if tmpdir: + shutil.rmtree(tmpdir) + os.kill(0, 9) + + if yaml and args.export_fixes: + print('Writing fixes to ' + args.export_fixes + ' ...') + try: + merge_replacement_files(tmpdir, args.export_fixes) + except: + print('Error exporting fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code=1 + + if args.fix: + print('Applying fixes ...') + try: + apply_fixes(args, tmpdir) + except: + print('Error applying fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code = 1 + + if tmpdir: + shutil.rmtree(tmpdir) + sys.exit(return_code) + + +if __name__ == '__main__': + main() diff --git a/tools/lsan.supp b/tools/lsan.supp new file mode 100644 index 000000000..daf07ed1a --- /dev/null +++ b/tools/lsan.supp @@ -0,0 +1,12 @@ +leak:antlr4::atn::ArrayPredictionContext::ArrayPredictionContext +leak:std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count, std::weak_ptr&, unsigned long&>(antlr4::atn::SingletonPredictionContext*&, std::_Sp_alloc_shared_tag >, std::weak_ptr&, unsigned long&) +leak:antlr4::atn::PredictionContext::mergeSingletons(std::shared_ptr const&, std::shared_ptr const&, bool, std::map, std::shared_ptr >, std::shared_ptr, std::less, std::shared_ptr > >, std::allocator, std::shared_ptr > const, std::shared_ptr > > >*) +leak:void std::vector, std::allocator > >::_M_realloc_insert >(__gnu_cxx::__normal_iterator*, std::vector, std::allocator > > >, std::shared_ptr&&) +leak:antlr4::atn::ParserATNSimulator::closureCheckingStopState(std::shared_ptr const&, antlr4::atn::ATNConfigSet*, std::unordered_set, antlr4::atn::ATNConfig::Hasher, antlr4::atn::ATNConfig::Comparer, std::allocator > >&, bool, bool, int, bool) +leak:antlr4::atn::ParserATNSimulator::computeReachSet(antlr4::atn::ATNConfigSet*, unsigned long, bool) +leak:std::_Hashtable, std::allocator >, std::__detail::_Select1st, std::equal_to, std::hash, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits >::_M_insert_unique_node(unsigned long const&, unsigned long, unsigned long, std::__detail::_Hash_node, false>*, unsigned long) +leak:void std::vector, std::allocator > >::_M_realloc_insert const&>(__gnu_cxx::__normal_iterator*, std::vector, std::allocator > > >, std::shared_ptr const&) +leak:antlr4::atn::ATNConfigSet::add(std::shared_ptr const&, std::map, std::shared_ptr >, std::shared_ptr, std::less, std::shared_ptr > >, std::allocator, std::shared_ptr > const, std::shared_ptr > > >*) +leak:antlr4::atn::ParserATNSimulator::getEpsilonTarget(std::shared_ptr const&, antlr4::atn::Transition*, bool, bool, bool, bool) +leak:antlr4::atn::PredictionContext::mergeArrays(std::shared_ptr const&, std::shared_ptr const&, bool, std::map, std::shared_ptr >, std::shared_ptr, std::less, std::shared_ptr > >, std::allocator, std::shared_ptr > const, std::shared_ptr > > >*) +leak:/lib/x86_64-linux-gnu/libpython3. diff --git a/tools/src/CMakeLists.txt b/tools/src/CMakeLists.txt index d9f5a53be..82b89c1c9 100644 --- a/tools/src/CMakeLists.txt +++ b/tools/src/CMakeLists.txt @@ -3,28 +3,11 @@ set(VERSION_STRING ${MEMGRAPH_VERSION}) configure_file(../../src/version.hpp.in version.hpp @ONLY) include_directories(${CMAKE_CURRENT_BINARY_DIR}) -# Memgraph Client Target -add_executable(mg_client mg_client/main.cpp) -set(CLIENT_LIBS mg-communication mg-io mg-utils) -if (READLINE_FOUND) - list(APPEND CLIENT_LIBS readline) -endif() -target_link_libraries(mg_client ${CLIENT_LIBS}) - # Memgraph Dump Target add_executable(mg_dump mg_dump/main.cpp) target_include_directories(mg_dump PRIVATE ${MGCLIENT_INCLUDE_DIR}) target_link_libraries(mg_dump gflags spdlog fmt mgclient pthread) - -# Strip the executable in release build. -string(TOLOWER ${CMAKE_BUILD_TYPE} lower_build_type) -if (lower_build_type STREQUAL "release") - add_custom_command(TARGET mg_client POST_BUILD - COMMAND strip -s mg_client - COMMENT "Stripping symbols and sections from mg_client") -endif() - -install(TARGETS mg_client RUNTIME DESTINATION bin) +install(TARGETS mg_dump RUNTIME DESTINATION bin) # Target for building all the tool executables. -add_custom_target(tools DEPENDS mg_client mg_dump) +add_custom_target(tools DEPENDS mg_dump) diff --git a/tools/src/mg_client/main.cpp b/tools/src/mg_client/main.cpp deleted file mode 100644 index d28ddd670..000000000 --- a/tools/src/mg_client/main.cpp +++ /dev/null @@ -1,709 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "communication/bolt/client.hpp" -#include "communication/init.hpp" -#include "io/network/endpoint.hpp" -#include "io/network/utils.hpp" -#include "utils/algorithm.hpp" -#include "utils/file.hpp" -#include "utils/flag_validation.hpp" -#include "utils/logging.hpp" -#include "utils/signals.hpp" -#include "utils/string.hpp" -#include "utils/terminate_handler.hpp" -#include "utils/timer.hpp" -#include "version.hpp" - -namespace fs = std::filesystem; - -volatile sig_atomic_t is_shutting_down = 0; - -// Usage strings. -static const std::string kUsage = - "Memgraph bolt client.\n" - "The client can be run in interactive or non-interactive mode.\n"; -static const std::string kInteractiveUsage = - "In interactive mode, user can enter cypher queries and supported " - "commands.\n\n" - "Cypher queries can span through multiple lines and conclude with a\n" - "semi-colon (;). Each query is executed in the database and the results\n" - "are printed out.\n\n" - "The following interactive commands are supported:\n\n" - "\t:help\t Print out usage for interactive mode\n" - "\t:quit\t Exit the shell\n"; - -// Supported commands. -// Maybe also add reconnect? -static const std::string kCommandQuit = ":quit"; -static const std::string kCommandHelp = ":help"; - -// Supported formats. -static const std::string kCsvFormat = "csv"; -static const std::string kTabularFormat = "tabular"; - -DEFINE_string(host, "127.0.0.1", - "Server address. It can be a DNS resolvable hostname."); -DEFINE_int32(port, 7687, "Server port"); -DEFINE_string(username, "", "Username for the database"); -DEFINE_string(password, "", "Password for the database"); -DEFINE_bool(use_ssl, true, "Use SSL when connecting to the server."); -DEFINE_bool(fit_to_screen, false, "Fit output width to screen width."); -DEFINE_VALIDATED_string( - output_format, "tabular", - "Query output format. Can be csv/tabular. If output format is " - "other than tabular `fit-to-screen` flag is ignored.", - { - if (value == kCsvFormat || value == kTabularFormat) { - return true; - } - return false; - }); -DEFINE_VALIDATED_string(csv_delimiter, ",", - "Character used to separate fields.", { - if (value.size() != 1) { - return false; - } - return true; - }); -DEFINE_string( - csv_escapechar, "", - "Character used to escape the quotechar(\") if csv-doublequote is false."); -DEFINE_bool( - csv_doublequote, true, - "Controls how instances of quotechar(\") appearing inside a field should " - "themselves be quoted. When true, the character is doubled. When false, " - "the escapechar is used as a prefix to the quotechar. " - "If csv-doublequote is false 'csv-escapechar' must be set."); - -static bool ValidateCsvDoubleQuote() { - if (!FLAGS_csv_doublequote && FLAGS_csv_escapechar.size() != 1) { - return false; - } - return true; -} - -#ifdef HAS_READLINE -DEFINE_string(history, "~/.memgraph", - "Use the specified directory for saving history."); -DEFINE_bool(no_history, false, "Do not save history."); - -// History default directory. -static const std::string kDefaultHistoryBaseDir = "~"; -static const std::string kDefaultHistoryMemgraphDir = ".memgraph"; -// History filename. -static const std::string kHistoryFilename = "client_history"; -#endif - -DECLARE_int32(min_log_level); - -// Unfinished query text from previous input. -// e.g. Previous input was MATCH(n) RETURN n; MATCH -// then default_text would be set to MATCH for next query. -static std::string default_text; - -static const std::string kPrompt = "memgraph> "; -static const std::string kMultilinePrompt = " -> "; - -static void PrintHelp() { std::cout << kInteractiveUsage << std::endl; } - -static void PrintValue(std::ostream &os, const std::string &value) { - os << value; -} - -static void PrintValue(std::ostream &os, - const communication::bolt::Value &value) { - switch (value.type()) { - case communication::bolt::Value::Type::String: - os << value.ValueString(); - return; - default: - os << value; - return; - } -} - -static void EchoFailure(const std::string &failure_msg, - const std::string &explanation) { - if (isatty(STDIN_FILENO)) { - std::cout << "\033[1;31m" << failure_msg << ": \033[0m"; - std::cout << explanation << std::endl; - } else { - std::cerr << failure_msg << ": "; - std::cerr << explanation << std::endl; - } -} - -static void EchoInfo(const std::string &message) { - if (isatty(STDIN_FILENO)) { - std::cout << message << std::endl; - } -} - -static void SetStdinEcho(bool enable = true) { - struct termios tty; - tcgetattr(STDIN_FILENO, &tty); - if (!enable) { - tty.c_lflag &= ~ECHO; - } else { - tty.c_lflag |= ECHO; - } - tcsetattr(STDIN_FILENO, TCSANOW, &tty); -} - -#ifdef HAS_READLINE - -#include "readline/history.h" -#include "readline/readline.h" - -/// Helper function that sets default input for 'readline' -static int SetDefaultText() { - rl_insert_text(default_text.c_str()); - default_text = ""; - rl_startup_hook = (rl_hook_func_t *)NULL; - return 0; -} - -/// Memgraph and OpenCypher keywords. -/// Copied from src/query/frontend/opencypher/grammar/Cypher.g4 -/// and src/query/frontend/grammar/MemgraphCypher.g4 -static const std::vector kMemgraphKeywords{ - "ALTER", "AUTH", "BATCH", "BATCHES", "CLEAR", "DATA", - "DENY", "DROP", "FOR", "FROM", "GRANT", "IDENTIFIED", - "INTERVAL", "K_TEST", "KAFKA", "LOAD", "PASSWORD", "PRIVILEGES", - "REVOKE", "ROLE", "ROLES", "SIZE", "START", "STOP", - "STREAM", "STREAMS", "TO", "TOPIC", "TRANSFORM", "USER", - "USERS"}; -static const std::vector kCypherKeywords{ - "ALL", "AND", "ANY", "AS", "ASC", "ASCENDING", - "BFS", "BY", "CASE", "CONTAINS", "COUNT", "CREATE", - "CYPHERNULL", "DELETE", "DESC", "DESCENDING", "DETACH", "DISTINCT", - "ELSE", "END", "ENDS", "EXTRACT", "FALSE", "FILTER", - "IN", "INDEX", "IS", "LIMIT", "L_SKIP", "MATCH", - "MERGE", "NONE", "NOT", "ON", "OPTIONAL", "OR", - "ORDER", "REDUCE", "REMOVE", "RETURN", "SET", "SHOW", - "SINGLE", "STARTS", "THEN", "TRUE", "UNION", "UNWIND", - "WHEN", "WHERE", "WITH", "WSHORTEST", "XOR"}; - -static char *CompletionGenerator(const char *text, int state) { - // This function is called with state=0 the first time; subsequent calls - // are with a nonzero state. state=0 can be used to perform one-time - // initialization for this completion session. - static std::vector matches; - static size_t match_index = 0; - - if (state == 0) { - // During initialization, compute the actual matches for 'text' and - // keep them in a static vector. - matches.clear(); - match_index = 0; - - // Collect a vector of matches: vocabulary words that begin with text. - std::string text_str = utils::ToUpperCase(std::string(text)); - for (auto word : kCypherKeywords) { - if (word.size() >= text_str.size() && - word.compare(0, text_str.size(), text_str) == 0) { - matches.push_back(word); - } - } - for (auto word : kMemgraphKeywords) { - if (word.size() >= text_str.size() && - word.compare(0, text_str.size(), text_str) == 0) { - matches.push_back(word); - } - } - } - - if (match_index >= matches.size()) { - // We return nullptr to notify the caller no more matches are available. - return nullptr; - } else { - // Return a malloc'd char* for the match. The caller frees it. - return strdup(matches[match_index++].c_str()); - } -} - -static char **Completer(const char *text, int start, int end) { - // Don't do filename completion even if our generator finds no matches. - rl_attempted_completion_over = 1; - // Note: returning nullptr here will make readline use the default filename - // completer. This note is copied from examples - I think because - // rl_attempted_completion_over is set to 1, filename completer won't be used. - return rl_completion_matches(text, CompletionGenerator); -} - -/// Helper function that reads a line from the -/// standard input using the 'readline' lib. -/// Adds support for history and reverse-search. -/// -/// @param prompt The prompt to display. -/// @return User input line, or nullopt on EOF. -static std::optional ReadLine(const std::string &prompt) { - if (default_text.size() > 0) { - // Initialize text with remainder of previous query. - rl_startup_hook = SetDefaultText; - } - char *line = readline(prompt.c_str()); - if (!line) return std::nullopt; - - std::string r_val(line); - if (!utils::Trim(r_val).empty()) add_history(line); - free(line); - return r_val; -} - -#else - -/// Helper function that reads a line from the standard input -/// using getline. -/// @param prompt The prompt to display. -/// @return User input line, or nullopt on EOF. -static std::optional ReadLine(const std::string &prompt) { - std::cout << prompt << default_text; - std::string line; - std::getline(std::cin, line); - if (std::cin.eof()) return std::nullopt; - line = default_text + line; - default_text = ""; - return line; -} - -#endif // HAS_READLINE - -static std::optional GetLine() { - std::string line; - std::getline(std::cin, line); - if (std::cin.eof()) return std::nullopt; - line = default_text + line; - default_text = ""; - return line; -} - -/// Helper function that parses user line input. -/// @param line user input line. -/// @param quote quote character or '\0'; if set line is inside quotation. -/// @param escaped if set, next character should be escaped. -/// @return pair of string and bool. string is parsed line and bool marks -/// if query finished(Query finishes with ';') with this line. -static std::pair ParseLine(const std::string &line, - char *quote, bool *escaped) { - // Parse line. - bool is_done = false; - std::stringstream parsed_line; - for (auto c : line) { - if (*quote && c == '\\') { - // Escaping is only used inside quotation to not end the quote - // when quotation char is escaped. - *escaped = !*escaped; - parsed_line << c; - continue; - } else if ((!*quote && (c == '\"' || c == '\'')) || - (!*escaped && c == *quote)) { - *quote = *quote ? '\0' : c; - } else if (!*quote && c == ';') { - is_done = true; - break; - } - parsed_line << c; - *escaped = false; - } - return std::make_pair(parsed_line.str(), is_done); -} - -static std::optional GetQuery() { - char quote = '\0'; - bool escaped = false; - auto ret = ParseLine(default_text, "e, &escaped); - if (ret.second) { - auto idx = ret.first.size() + 1; - default_text = utils::Trim(default_text.substr(idx)); - return ret.first; - } - std::stringstream query; - std::optional line; - int line_cnt = 0; - auto is_done = false; - while (!is_done) { - if (!isatty(STDIN_FILENO)) { - line = GetLine(); - } else { - line = ReadLine(line_cnt == 0 ? kPrompt : kMultilinePrompt); - if (line_cnt == 0 && line && line->size() > 0 && (*line)[0] == ':') { - auto trimmed_line = utils::Trim(*line); - if (trimmed_line == kCommandQuit) { - return std::nullopt; - } else if (trimmed_line == kCommandHelp) { - PrintHelp(); - return ""; - } else { - EchoFailure("Unsupported command", std::string(trimmed_line)); - PrintHelp(); - return ""; - } - } - } - if (!line) return std::nullopt; - if (line->empty()) continue; - auto ret = ParseLine(*line, "e, &escaped); - query << ret.first; - auto char_count = ret.first.size(); - if (ret.second) { - is_done = true; - char_count += 1; // ';' sign - } else { - // Query is multiline so append newline. - query << "\n"; - } - if (char_count < line->size()) { - default_text = utils::Trim(line->substr(char_count)); - } - ++line_cnt; - } - return query.str(); -} - -template -static void PrintRowTabular(const std::vector &data, int total_width, - int column_width, int num_columns, - bool all_columns_fit, int margin = 1) { - if (!all_columns_fit) num_columns -= 1; - std::string data_output = std::string(total_width, ' '); - for (auto i = 0; i < total_width; i += column_width) { - data_output[i] = '|'; - int idx = i / column_width; - if (idx < num_columns) { - std::stringstream field; - PrintValue(field, data[idx]); // convert Value to string - std::string field_str(field.str()); - if (field_str.size() > column_width - 2 * margin - 1) { - field_str.erase(column_width - 2 * margin - 1, std::string::npos); - field_str.replace(field_str.size() - 3, 3, "..."); - } - data_output.replace(i + 1 + margin, field_str.size(), field_str); - } - } - if (!all_columns_fit) { - data_output.replace(total_width - column_width, 3, "..."); - } - data_output[total_width - 1] = '|'; - std::cout << data_output << std::endl; -} - -/// Helper function for determining maximum length of data. -/// @param data Vector of string representable elements. Elements should have -/// operator '<<' implemented. -/// @param margin Column margin width. -/// @return length needed for representing max size element in vector. Plus -/// one is added because of column start character '|'. -template -static uint64_t GetMaxColumnWidth(const std::vector &data, int margin = 1) { - uint64_t column_width = 0; - for (auto &elem : data) { - std::stringstream field; - PrintValue(field, elem); - column_width = std::max(column_width, field.str().size() + 2 * margin); - } - return column_width + 1; -} - -static void PrintTabular( - const std::vector &header, - const std::vector> &records) { - struct winsize w; - ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); - bool all_columns_fit = true; - - auto num_columns = header.size(); - auto column_width = GetMaxColumnWidth(header); - for (size_t i = 0; i < records.size(); ++i) { - column_width = std::max(column_width, GetMaxColumnWidth(records[i])); - } - column_width = std::max(static_cast(5), - column_width); // set column width to min 5 - auto total_width = column_width * num_columns + 1; - - // Fit to screen width. - if (FLAGS_fit_to_screen && total_width > w.ws_col) { - uint64_t lo = 5; - uint64_t hi = column_width; - uint64_t last = 5; - while (lo < hi) { - uint64_t mid = lo + (hi - lo) / 2; - uint64_t width = mid * num_columns + 1; - if (width <= w.ws_col) { - last = mid; - lo = mid + 1; - } else { - hi = mid - 1; - } - } - column_width = last; - total_width = column_width * num_columns + 1; - // All columns do not fit on screen. - while (total_width > w.ws_col && num_columns > 1) { - num_columns -= 1; - total_width = column_width * num_columns + 1; - all_columns_fit = false; - } - } - - auto line_fill = std::string(total_width, '-'); - for (auto i = 0; i < total_width; i += column_width) { - line_fill[i] = '+'; - } - line_fill[total_width - 1] = '+'; - std::cout << line_fill << std::endl; - // Print Header. - PrintRowTabular(header, total_width, column_width, num_columns, - all_columns_fit); - std::cout << line_fill << std::endl; - // Print Records. - for (size_t i = 0; i < records.size(); ++i) { - PrintRowTabular(records[i], total_width, column_width, num_columns, - all_columns_fit); - } - std::cout << line_fill << std::endl; -} - -template -static std::vector FormatCsvFields(const std::vector &fields) { - std::vector formatted; - formatted.reserve(fields.size()); - for (auto &field : fields) { - std::stringstream field_stream; - field_stream << field; - std::string formatted_field(field_stream.str()); - if (FLAGS_csv_doublequote) { - formatted_field = utils::Replace(formatted_field, "\"", "\"\""); - } else { - formatted_field = - utils::Replace(formatted_field, "\"", FLAGS_csv_escapechar + "\""); - } - formatted_field.insert(0, 1, '"'); - formatted_field.append(1, '"'); - formatted.push_back(formatted_field); - } - return formatted; -} - -static void PrintCsv( - const std::vector &header, - const std::vector> &records) { - // Print Header. - auto formatted_header = FormatCsvFields(header); - utils::PrintIterable(std::cout, formatted_header, FLAGS_csv_delimiter); - std::cout << std::endl; - // Print Records. - for (size_t i = 0; i < records.size(); ++i) { - auto formatted_row = FormatCsvFields(records[i]); - utils::PrintIterable(std::cout, formatted_row, FLAGS_csv_delimiter); - std::cout << std::endl; - } -} - -static void Output( - const std::vector &header, - const std::vector> &records) { - if (FLAGS_output_format == kTabularFormat) { - PrintTabular(header, records); - } else if (FLAGS_output_format == kCsvFormat) { - PrintCsv(header, records); - } -} - -int main(int argc, char **argv) { - gflags::SetVersionString(version_string); - gflags::SetUsageMessage(kUsage); - - gflags::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_output_format == kCsvFormat && !ValidateCsvDoubleQuote()) { - EchoFailure( - "Unsupported combination of 'csv-doublequote' and 'csv-escapechar'\n" - "flags", - "Run './mg_client --help' for usage."); - return 1; - } - auto password = FLAGS_password; - if (isatty(STDIN_FILENO) && FLAGS_username.size() > 0 && - password.size() == 0) { - SetStdinEcho(false); - auto password_optional = ReadLine("Password: "); - std::cout << std::endl; - if (password_optional) { - password = *password_optional; - } else { - EchoFailure( - "Password not submitted", - fmt::format("Requested password for username {}", FLAGS_username)); - return 1; - } - SetStdinEcho(true); - } - - spdlog::set_level(spdlog::level::err); - - communication::SSLInit sslInit; - -#ifdef HAS_READLINE - using_history(); - int history_len = 0; - rl_attempted_completion_function = Completer; - fs::path history_dir = FLAGS_history; - if (FLAGS_history == - (kDefaultHistoryBaseDir + "/" + kDefaultHistoryMemgraphDir)) { - // Fetch home dir for user. - struct passwd *pw = getpwuid(getuid()); - history_dir = fs::path(pw->pw_dir) / kDefaultHistoryMemgraphDir; - } - if (!utils::EnsureDir(history_dir)) { - EchoFailure("History directory doesn't exist", history_dir); - // Should program exit here or just continue with warning message? - return 1; - } - fs::path history_file = history_dir / kHistoryFilename; - // Read history file. - if (fs::exists(history_file)) { - auto ret = read_history(history_file.string().c_str()); - if (ret != 0) { - EchoFailure("Unable to read history file", history_file); - // Should program exit here or just continue with warning message? - return 1; - } - history_len = history_length; - } - - // Save history function. Used to save readline history after each query. - auto save_history = [&history_len, history_file] { - if (!FLAGS_no_history) { - int ret = 0; - // If there was no history, create history file. - // Otherwise, append to existing history. - if (history_len == 0) { - ret = write_history(history_file.string().c_str()); - } else { - ret = append_history(1, history_file.string().c_str()); - } - if (ret != 0) { - EchoFailure("Unable to save history to file", history_file); - return 1; - } - ++history_len; - } - return 0; - }; -#endif - - // Prevent handling shutdown inside a shutdown. For example, SIGINT handler - // being interrupted by SIGTERM before is_shutting_down is set, thus causing - // double shutdown. - sigset_t block_shutdown_signals; - sigemptyset(&block_shutdown_signals); - sigaddset(&block_shutdown_signals, SIGTERM); - sigaddset(&block_shutdown_signals, SIGINT); - - auto shutdown = [](int exit_code = 0) { - if (is_shutting_down) return; - is_shutting_down = 1; - std::quick_exit(exit_code); - }; - - utils::SignalHandler::RegisterHandler(utils::Signal::Terminate, shutdown, - block_shutdown_signals); - utils::SignalHandler::RegisterHandler(utils::Signal::Interupt, shutdown, - block_shutdown_signals); - - // TODO handle endpoint exception. - // It has CHECK in constructor if address is not valid. - io::network::Endpoint endpoint(io::network::ResolveHostname(FLAGS_host), - FLAGS_port); - communication::ClientContext context(FLAGS_use_ssl); - communication::bolt::Client client(&context); - - std::string bolt_client_version = - fmt::format("mg_client/{}", gflags::VersionString()); - try { - client.Connect(endpoint, FLAGS_username, password, bolt_client_version); - } catch (const communication::bolt::ClientFatalException &e) { - EchoFailure("Connection failure", e.what()); - return 1; - } - - EchoInfo(fmt::format("mg_client {}", gflags::VersionString())); - EchoInfo("Type :help for shell usage"); - EchoInfo("Quit the shell by typing Ctrl-D(eof) or :quit"); - EchoInfo(fmt::format("Connected to 'memgraph://{}'", endpoint)); - int num_retries = 3; - while (true) { - auto query = GetQuery(); - if (!query) { - EchoInfo("Bye"); - break; - } - if (query->empty()) continue; - try { - utils::Timer t; - auto ret = client.Execute(*query, {}); - auto elapsed = t.Elapsed().count(); - if (ret.records.size() > 0) Output(ret.fields, ret.records); - if (isatty(STDIN_FILENO)) { - std::string summary; - if (ret.records.size() == 0) { - summary = "Empty set"; - } else if (ret.records.size() == 1) { - summary = std::to_string(ret.records.size()) + " row in set"; - } else { - summary = std::to_string(ret.records.size()) + " rows in set"; - } - std::cout << summary << " (" << fmt::format("{:.3f}", elapsed) - << " sec)" << std::endl; -#ifdef HAS_READLINE - auto history_ret = save_history(); - if (history_ret != 0) return history_ret; -#endif - } - } catch (const communication::bolt::ClientQueryException &e) { - if (!isatty(STDIN_FILENO)) { - EchoFailure("Failed query", *query); - } - EchoFailure("Client received exception", e.what()); - if (!isatty(STDIN_FILENO)) { - return 1; - } - } catch (const communication::bolt::ClientFatalException &e) { - EchoFailure("Client received exception", e.what()); - EchoInfo("Trying to reconnect"); - bool is_connected = false; - client.Close(); - while (num_retries > 0) { - --num_retries; - try { - client.Connect(endpoint, FLAGS_username, FLAGS_password, - bolt_client_version); - is_connected = true; - break; - } catch (const communication::bolt::ClientFatalException &e) { - EchoFailure("Connection failure", e.what()); - } - std::this_thread::sleep_for(std::chrono::seconds(1)); - } - if (is_connected) { - num_retries = 3; - EchoInfo(fmt::format("Connected to 'memgraph://{}'", endpoint)); - } else { - EchoFailure("Couldn't connect to", - fmt::format("'memgraph://{}'", endpoint)); - return 1; - } - } - } - return 0; -} diff --git a/tools/tests/test_mg_client b/tools/tests/test_mg_client deleted file mode 100755 index e6ab4b46b..000000000 --- a/tools/tests/test_mg_client +++ /dev/null @@ -1,131 +0,0 @@ -#!/bin/bash - -## Helper functions - -function wait_for_server { - port=$1 - while ! nc -z -w 1 127.0.0.1 $port; do - sleep 0.1 - done - sleep 1 -} - -function echo_info { printf "\033[1;36m~~ $1 ~~\033[0m\n"; } -function echo_success { printf "\033[1;32m~~ $1 ~~\033[0m\n\n"; } -function echo_failure { printf "\033[1;31m~~ $1 ~~\033[0m\n\n"; } - - -## Environment setup -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd "$DIR" - -# Create a temporary directory for output files -tmpdir=/tmp/mg_client/output -if [ -d $tmpdir ]; then - rm -rf $tmpdir -fi -mkdir -p $tmpdir -cd $tmpdir - -# Find memgraph binaries. -memgraph_dir="$DIR/../../build" - -# Find mg_client binaries. -client_dir="$memgraph_dir/tools/src" -if [ ! -d $client_dir ]; then - echo_failure "mg-client directory not found" - exit 1 -fi - -# Find tests dir. -tests_dir="$DIR/client" -if [ ! -d $tests_dir ]; then - echo_failure "Directory with tests not found" - exit 1 -fi - -# Find tests input files. -input_dir="$tests_dir/input" -if [ ! -d $input_dir ]; then - echo_failure "Directory with tests input files not found" - exit 1 -fi - -# Check and generate SSL certificates -use_ssl=false -key_file="" -cert_file="" -if [ "$1" == "--use-ssl" ]; then - use_ssl=true - key_file=".key.pem" - cert_file=".cert.pem" - openssl req -new -newkey rsa:4096 -days 365 -nodes -x509 \ - -subj "/C=HR/ST=Zagreb/L=Zagreb/O=Memgraph/CN=db.memgraph.com" \ - -keyout $key_file -out $cert_file || exit 1 -fi - - -## Startup - -# Start the memgraph process and wait for it to start. -echo_info "Starting memgraph" -$memgraph_dir/memgraph --bolt-cert-file=$cert_file --bolt-key-file=$key_file & -pid=$! -wait_for_server 7687 -echo_success "Started memgraph" - - -## Tests - -echo_info "Running tests" -echo # Blank line - -client_flags="--use-ssl=$use_ssl" -test_code=0 -for output_dir in $tests_dir/output_*; do - for filename in $input_dir/*; do - test_name=$(basename $filename) - test_name=${test_name%.*} - output_name="$test_name.txt" - - output_format=$(basename $output_dir) - output_format=${output_format#*_} - run_flags="$client_flags --output-format=$output_format" - - echo_info "Running test '$test_name' with $output_format output" - $client_dir/mg_client $run_flags < $filename > $tmpdir/$test_name - diff -b $tmpdir/$test_name $output_dir/$output_name - test_code=$? - if [ $test_code -ne 0 ]; then - echo_failure "Test '$test_name' with $output_format output failed" - break - else - echo_success "Test '$test_name' with $output_format output passed" - fi - - # Clear database for each test. - $client_dir/mg_client $client_flags <<< "MATCH (n) DETACH DELETE n;" \ - &> /dev/null || exit 1 - done - if [ $test_code -ne 0 ]; then - break - fi -done - - -## Cleanup -echo_info "Starting test cleanup" - -# Shutdown the memgraph process. -kill $pid -wait -n -code_mg=$? - -# Check memgraph exit code. -if [ $code_mg -ne 0 ]; then - echo_failure "The memgraph process didn't terminate properly!" - exit $code_mg -fi -echo_success "Test cleanup done" - -exit $test_code